xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivopts.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Induction variable optimizations.
2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33    2) Candidates for the induction variables are found.  This includes
34 
35       -- old induction variables
36       -- the variables defined by expressions derived from the "interesting
37 	 uses" above
38 
39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40       cost function assigns a cost to sets of induction variables and consists
41       of three parts:
42 
43       -- The use costs.  Each of the interesting uses chooses the best induction
44 	 variable in the set and adds its cost to the sum.  The cost reflects
45 	 the time spent on modifying the induction variables value to be usable
46 	 for the given purpose (adding base and offset for arrays, etc.).
47       -- The variable costs.  Each of the variables has a cost assigned that
48 	 reflects the costs associated with incrementing the value of the
49 	 variable.  The original variables are somewhat preferred.
50       -- The set cost.  Depending on the size of the set, extra cost may be
51 	 added to reflect register pressure.
52 
53       All the costs are defined in a machine-specific way, using the target
54       hooks and machine descriptions to determine them.
55 
56    4) The trees are transformed to use the new variables, the dead code is
57       removed.
58 
59    All of this is done loop by loop.  Doing it globally is theoretically
60    possible, it might give a better performance and it might enable us
61    to decide costs more precisely, but getting all the interactions right
62    would be complicated.  */
63 
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
142 
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144    cost of different addressing modes.  This should be moved to a TBD
145    interface between the GIMPLE and RTL worlds.  */
146 #include "recog.h"
147 
148 /* The infinite cost.  */
149 #define INFTY 10000000
150 
151 #define AVG_LOOP_NITER(LOOP) 5
152 
153 /* Returns the expected number of loop iterations for LOOP.
154    The average trip count is computed from profile data if it
155    exists. */
156 
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
159 {
160   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161   if (niter == -1)
162     return AVG_LOOP_NITER (loop);
163 
164   return niter;
165 }
166 
167 /* Representation of the induction variable.  */
168 struct iv
169 {
170   tree base;		/* Initial value of the iv.  */
171   tree base_object;	/* A memory object to that the induction variable points.  */
172   tree step;		/* Step of the iv (constant only).  */
173   tree ssa_name;	/* The ssa name with the value.  */
174   bool biv_p;		/* Is it a biv?  */
175   bool have_use_for;	/* Do we already have a use for it?  */
176   unsigned use_id;	/* The identifier in the use if it is the case.  */
177 };
178 
179 /* Per-ssa version information (induction variable descriptions, etc.).  */
180 struct version_info
181 {
182   tree name;		/* The ssa name.  */
183   struct iv *iv;	/* Induction variable description.  */
184   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
185 			   an expression that is not an induction variable.  */
186   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
187   unsigned inv_id;	/* Id of an invariant.  */
188 };
189 
190 /* Types of uses.  */
191 enum use_type
192 {
193   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
194   USE_ADDRESS,		/* Use in an address.  */
195   USE_COMPARE		/* Use is a compare.  */
196 };
197 
198 /* Cost of a computation.  */
199 typedef struct
200 {
201   int cost;		/* The runtime cost.  */
202   unsigned complexity;	/* The estimate of the complexity of the code for
203 			   the computation (in no concrete units --
204 			   complexity field should be larger for more
205 			   complex expressions and addressing modes).  */
206 } comp_cost;
207 
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
210 
211 /* The candidate - cost pair.  */
212 struct cost_pair
213 {
214   struct iv_cand *cand;	/* The candidate.  */
215   comp_cost cost;	/* The cost.  */
216   bitmap depends_on;	/* The list of invariants that have to be
217 			   preserved.  */
218   tree value;		/* For final value elimination, the expression for
219 			   the final value of the iv.  For iv elimination,
220 			   the new bound to compare with.  */
221   enum tree_code comp;	/* For iv elimination, the comparison.  */
222   int inv_expr_id;      /* Loop invariant expression id.  */
223 };
224 
225 /* Use.  */
226 struct iv_use
227 {
228   unsigned id;		/* The id of the use.  */
229   enum use_type type;	/* Type of the use.  */
230   struct iv *iv;	/* The induction variable it is based on.  */
231   gimple stmt;		/* Statement in that it occurs.  */
232   tree *op_p;		/* The place where it occurs.  */
233   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
234 			   important ones.  */
235 
236   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
237   struct cost_pair *cost_map;
238 			/* The costs wrto the iv candidates.  */
239 
240   struct iv_cand *selected;
241 			/* The selected candidate.  */
242 };
243 
244 /* The position where the iv is computed.  */
245 enum iv_position
246 {
247   IP_NORMAL,		/* At the end, just before the exit condition.  */
248   IP_END,		/* At the end of the latch block.  */
249   IP_BEFORE_USE,	/* Immediately before a specific use.  */
250   IP_AFTER_USE,		/* Immediately after a specific use.  */
251   IP_ORIGINAL		/* The original biv.  */
252 };
253 
254 /* The induction variable candidate.  */
255 struct iv_cand
256 {
257   unsigned id;		/* The number of the candidate.  */
258   bool important;	/* Whether this is an "important" candidate, i.e. such
259 			   that it should be considered by all uses.  */
260   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
261   gimple incremented_at;/* For original biv, the statement where it is
262 			   incremented.  */
263   tree var_before;	/* The variable used for it before increment.  */
264   tree var_after;	/* The variable used for it after increment.  */
265   struct iv *iv;	/* The value of the candidate.  NULL for
266 			   "pseudocandidate" used to indicate the possibility
267 			   to replace the final value of an iv by direct
268 			   computation of the value.  */
269   unsigned cost;	/* Cost of the candidate.  */
270   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
271   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 			      where it is incremented.  */
273   bitmap depends_on;	/* The list of invariants that are used in step of the
274 			   biv.  */
275 };
276 
277 /* Loop invariant expression hashtable entry.  */
278 struct iv_inv_expr_ent
279 {
280   tree expr;
281   int id;
282   hashval_t hash;
283 };
284 
285 /* The data used by the induction variable optimizations.  */
286 
287 typedef struct iv_use *iv_use_p;
288 
289 typedef struct iv_cand *iv_cand_p;
290 
291 /* Hashtable helpers.  */
292 
293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
294 {
295   typedef iv_inv_expr_ent value_type;
296   typedef iv_inv_expr_ent compare_type;
297   static inline hashval_t hash (const value_type *);
298   static inline bool equal (const value_type *, const compare_type *);
299 };
300 
301 /* Hash function for loop invariant expressions.  */
302 
303 inline hashval_t
304 iv_inv_expr_hasher::hash (const value_type *expr)
305 {
306   return expr->hash;
307 }
308 
309 /* Hash table equality function for expressions.  */
310 
311 inline bool
312 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
313 {
314   return expr1->hash == expr2->hash
315 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
316 }
317 
318 struct ivopts_data
319 {
320   /* The currently optimized loop.  */
321   struct loop *current_loop;
322   source_location loop_loc;
323 
324   /* Numbers of iterations for all exits of the current loop.  */
325   hash_map<edge, tree_niter_desc *> *niters;
326 
327   /* Number of registers used in it.  */
328   unsigned regs_used;
329 
330   /* The size of version_info array allocated.  */
331   unsigned version_info_size;
332 
333   /* The array of information for the ssa names.  */
334   struct version_info *version_info;
335 
336   /* The hashtable of loop invariant expressions created
337      by ivopt.  */
338   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
339 
340   /* Loop invariant expression id.  */
341   int inv_expr_id;
342 
343   /* The bitmap of indices in version_info whose value was changed.  */
344   bitmap relevant;
345 
346   /* The uses of induction variables.  */
347   vec<iv_use_p> iv_uses;
348 
349   /* The candidates.  */
350   vec<iv_cand_p> iv_candidates;
351 
352   /* A bitmap of important candidates.  */
353   bitmap important_candidates;
354 
355   /* Cache used by tree_to_aff_combination_expand.  */
356   hash_map<tree, name_expansion *> *name_expansion_cache;
357 
358   /* The maximum invariant id.  */
359   unsigned max_inv_id;
360 
361   /* Whether to consider just related and important candidates when replacing a
362      use.  */
363   bool consider_all_candidates;
364 
365   /* Are we optimizing for speed?  */
366   bool speed;
367 
368   /* Whether the loop body includes any function calls.  */
369   bool body_includes_call;
370 
371   /* Whether the loop body can only be exited via single exit.  */
372   bool loop_single_exit_p;
373 };
374 
375 /* An assignment of iv candidates to uses.  */
376 
377 struct iv_ca
378 {
379   /* The number of uses covered by the assignment.  */
380   unsigned upto;
381 
382   /* Number of uses that cannot be expressed by the candidates in the set.  */
383   unsigned bad_uses;
384 
385   /* Candidate assigned to a use, together with the related costs.  */
386   struct cost_pair **cand_for_use;
387 
388   /* Number of times each candidate is used.  */
389   unsigned *n_cand_uses;
390 
391   /* The candidates used.  */
392   bitmap cands;
393 
394   /* The number of candidates in the set.  */
395   unsigned n_cands;
396 
397   /* Total number of registers needed.  */
398   unsigned n_regs;
399 
400   /* Total cost of expressing uses.  */
401   comp_cost cand_use_cost;
402 
403   /* Total cost of candidates.  */
404   unsigned cand_cost;
405 
406   /* Number of times each invariant is used.  */
407   unsigned *n_invariant_uses;
408 
409   /* The array holding the number of uses of each loop
410      invariant expressions created by ivopt.  */
411   unsigned *used_inv_expr;
412 
413   /* The number of created loop invariants.  */
414   unsigned num_used_inv_expr;
415 
416   /* Total cost of the assignment.  */
417   comp_cost cost;
418 };
419 
420 /* Difference of two iv candidate assignments.  */
421 
422 struct iv_ca_delta
423 {
424   /* Changed use.  */
425   struct iv_use *use;
426 
427   /* An old assignment (for rollback purposes).  */
428   struct cost_pair *old_cp;
429 
430   /* A new assignment.  */
431   struct cost_pair *new_cp;
432 
433   /* Next change in the list.  */
434   struct iv_ca_delta *next_change;
435 };
436 
437 /* Bound on number of candidates below that all candidates are considered.  */
438 
439 #define CONSIDER_ALL_CANDIDATES_BOUND \
440   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
441 
442 /* If there are more iv occurrences, we just give up (it is quite unlikely that
443    optimizing such a loop would help, and it would take ages).  */
444 
445 #define MAX_CONSIDERED_USES \
446   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
447 
448 /* If there are at most this number of ivs in the set, try removing unnecessary
449    ivs from the set always.  */
450 
451 #define ALWAYS_PRUNE_CAND_SET_BOUND \
452   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
453 
454 /* The list of trees for that the decl_rtl field must be reset is stored
455    here.  */
456 
457 static vec<tree> decl_rtl_to_reset;
458 
459 static comp_cost force_expr_to_var_cost (tree, bool);
460 
461 /* Number of uses recorded in DATA.  */
462 
463 static inline unsigned
464 n_iv_uses (struct ivopts_data *data)
465 {
466   return data->iv_uses.length ();
467 }
468 
469 /* Ith use recorded in DATA.  */
470 
471 static inline struct iv_use *
472 iv_use (struct ivopts_data *data, unsigned i)
473 {
474   return data->iv_uses[i];
475 }
476 
477 /* Number of candidates recorded in DATA.  */
478 
479 static inline unsigned
480 n_iv_cands (struct ivopts_data *data)
481 {
482   return data->iv_candidates.length ();
483 }
484 
485 /* Ith candidate recorded in DATA.  */
486 
487 static inline struct iv_cand *
488 iv_cand (struct ivopts_data *data, unsigned i)
489 {
490   return data->iv_candidates[i];
491 }
492 
493 /* The single loop exit if it dominates the latch, NULL otherwise.  */
494 
495 edge
496 single_dom_exit (struct loop *loop)
497 {
498   edge exit = single_exit (loop);
499 
500   if (!exit)
501     return NULL;
502 
503   if (!just_once_each_iteration_p (loop, exit->src))
504     return NULL;
505 
506   return exit;
507 }
508 
509 /* Dumps information about the induction variable IV to FILE.  */
510 
511 void
512 dump_iv (FILE *file, struct iv *iv)
513 {
514   if (iv->ssa_name)
515     {
516       fprintf (file, "ssa name ");
517       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
518       fprintf (file, "\n");
519     }
520 
521   fprintf (file, "  type ");
522   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
523   fprintf (file, "\n");
524 
525   if (iv->step)
526     {
527       fprintf (file, "  base ");
528       print_generic_expr (file, iv->base, TDF_SLIM);
529       fprintf (file, "\n");
530 
531       fprintf (file, "  step ");
532       print_generic_expr (file, iv->step, TDF_SLIM);
533       fprintf (file, "\n");
534     }
535   else
536     {
537       fprintf (file, "  invariant ");
538       print_generic_expr (file, iv->base, TDF_SLIM);
539       fprintf (file, "\n");
540     }
541 
542   if (iv->base_object)
543     {
544       fprintf (file, "  base object ");
545       print_generic_expr (file, iv->base_object, TDF_SLIM);
546       fprintf (file, "\n");
547     }
548 
549   if (iv->biv_p)
550     fprintf (file, "  is a biv\n");
551 }
552 
553 /* Dumps information about the USE to FILE.  */
554 
555 void
556 dump_use (FILE *file, struct iv_use *use)
557 {
558   fprintf (file, "use %d\n", use->id);
559 
560   switch (use->type)
561     {
562     case USE_NONLINEAR_EXPR:
563       fprintf (file, "  generic\n");
564       break;
565 
566     case USE_ADDRESS:
567       fprintf (file, "  address\n");
568       break;
569 
570     case USE_COMPARE:
571       fprintf (file, "  compare\n");
572       break;
573 
574     default:
575       gcc_unreachable ();
576     }
577 
578   fprintf (file, "  in statement ");
579   print_gimple_stmt (file, use->stmt, 0, 0);
580   fprintf (file, "\n");
581 
582   fprintf (file, "  at position ");
583   if (use->op_p)
584     print_generic_expr (file, *use->op_p, TDF_SLIM);
585   fprintf (file, "\n");
586 
587   dump_iv (file, use->iv);
588 
589   if (use->related_cands)
590     {
591       fprintf (file, "  related candidates ");
592       dump_bitmap (file, use->related_cands);
593     }
594 }
595 
596 /* Dumps information about the uses to FILE.  */
597 
598 void
599 dump_uses (FILE *file, struct ivopts_data *data)
600 {
601   unsigned i;
602   struct iv_use *use;
603 
604   for (i = 0; i < n_iv_uses (data); i++)
605     {
606       use = iv_use (data, i);
607 
608       dump_use (file, use);
609       fprintf (file, "\n");
610     }
611 }
612 
613 /* Dumps information about induction variable candidate CAND to FILE.  */
614 
615 void
616 dump_cand (FILE *file, struct iv_cand *cand)
617 {
618   struct iv *iv = cand->iv;
619 
620   fprintf (file, "candidate %d%s\n",
621 	   cand->id, cand->important ? " (important)" : "");
622 
623   if (cand->depends_on)
624     {
625       fprintf (file, "  depends on ");
626       dump_bitmap (file, cand->depends_on);
627     }
628 
629   if (!iv)
630     {
631       fprintf (file, "  final value replacement\n");
632       return;
633     }
634 
635   if (cand->var_before)
636     {
637       fprintf (file, "  var_before ");
638       print_generic_expr (file, cand->var_before, TDF_SLIM);
639       fprintf (file, "\n");
640     }
641   if (cand->var_after)
642     {
643       fprintf (file, "  var_after ");
644       print_generic_expr (file, cand->var_after, TDF_SLIM);
645       fprintf (file, "\n");
646     }
647 
648   switch (cand->pos)
649     {
650     case IP_NORMAL:
651       fprintf (file, "  incremented before exit test\n");
652       break;
653 
654     case IP_BEFORE_USE:
655       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
656       break;
657 
658     case IP_AFTER_USE:
659       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
660       break;
661 
662     case IP_END:
663       fprintf (file, "  incremented at end\n");
664       break;
665 
666     case IP_ORIGINAL:
667       fprintf (file, "  original biv\n");
668       break;
669     }
670 
671   dump_iv (file, iv);
672 }
673 
674 /* Returns the info for ssa version VER.  */
675 
676 static inline struct version_info *
677 ver_info (struct ivopts_data *data, unsigned ver)
678 {
679   return data->version_info + ver;
680 }
681 
682 /* Returns the info for ssa name NAME.  */
683 
684 static inline struct version_info *
685 name_info (struct ivopts_data *data, tree name)
686 {
687   return ver_info (data, SSA_NAME_VERSION (name));
688 }
689 
690 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
691    emitted in LOOP.  */
692 
693 static bool
694 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
695 {
696   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
697 
698   gcc_assert (bb);
699 
700   if (sbb == loop->latch)
701     return true;
702 
703   if (sbb != bb)
704     return false;
705 
706   return stmt == last_stmt (bb);
707 }
708 
709 /* Returns true if STMT if after the place where the original induction
710    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
711    if the positions are identical.  */
712 
713 static bool
714 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
715 {
716   basic_block cand_bb = gimple_bb (cand->incremented_at);
717   basic_block stmt_bb = gimple_bb (stmt);
718 
719   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
720     return false;
721 
722   if (stmt_bb != cand_bb)
723     return true;
724 
725   if (true_if_equal
726       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
727     return true;
728   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
729 }
730 
731 /* Returns true if STMT if after the place where the induction variable
732    CAND is incremented in LOOP.  */
733 
734 static bool
735 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
736 {
737   switch (cand->pos)
738     {
739     case IP_END:
740       return false;
741 
742     case IP_NORMAL:
743       return stmt_after_ip_normal_pos (loop, stmt);
744 
745     case IP_ORIGINAL:
746     case IP_AFTER_USE:
747       return stmt_after_inc_pos (cand, stmt, false);
748 
749     case IP_BEFORE_USE:
750       return stmt_after_inc_pos (cand, stmt, true);
751 
752     default:
753       gcc_unreachable ();
754     }
755 }
756 
757 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
758 
759 static bool
760 abnormal_ssa_name_p (tree exp)
761 {
762   if (!exp)
763     return false;
764 
765   if (TREE_CODE (exp) != SSA_NAME)
766     return false;
767 
768   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
769 }
770 
771 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
772    abnormal phi node.  Callback for for_each_index.  */
773 
774 static bool
775 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
776 				  void *data ATTRIBUTE_UNUSED)
777 {
778   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
779     {
780       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
781 	return false;
782       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
783 	return false;
784     }
785 
786   return !abnormal_ssa_name_p (*index);
787 }
788 
789 /* Returns true if EXPR contains a ssa name that occurs in an
790    abnormal phi node.  */
791 
792 bool
793 contains_abnormal_ssa_name_p (tree expr)
794 {
795   enum tree_code code;
796   enum tree_code_class codeclass;
797 
798   if (!expr)
799     return false;
800 
801   code = TREE_CODE (expr);
802   codeclass = TREE_CODE_CLASS (code);
803 
804   if (code == SSA_NAME)
805     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
806 
807   if (code == INTEGER_CST
808       || is_gimple_min_invariant (expr))
809     return false;
810 
811   if (code == ADDR_EXPR)
812     return !for_each_index (&TREE_OPERAND (expr, 0),
813 			    idx_contains_abnormal_ssa_name_p,
814 			    NULL);
815 
816   if (code == COND_EXPR)
817     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
818       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
819       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
820 
821   switch (codeclass)
822     {
823     case tcc_binary:
824     case tcc_comparison:
825       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
826 	return true;
827 
828       /* Fallthru.  */
829     case tcc_unary:
830       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
831 	return true;
832 
833       break;
834 
835     default:
836       gcc_unreachable ();
837     }
838 
839   return false;
840 }
841 
842 /*  Returns the structure describing number of iterations determined from
843     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
844 
845 static struct tree_niter_desc *
846 niter_for_exit (struct ivopts_data *data, edge exit)
847 {
848   struct tree_niter_desc *desc;
849   tree_niter_desc **slot;
850 
851   if (!data->niters)
852     {
853       data->niters = new hash_map<edge, tree_niter_desc *>;
854       slot = NULL;
855     }
856   else
857     slot = data->niters->get (exit);
858 
859   if (!slot)
860     {
861       /* Try to determine number of iterations.  We cannot safely work with ssa
862          names that appear in phi nodes on abnormal edges, so that we do not
863          create overlapping life ranges for them (PR 27283).  */
864       desc = XNEW (struct tree_niter_desc);
865       if (!number_of_iterations_exit (data->current_loop,
866 				      exit, desc, true)
867      	  || contains_abnormal_ssa_name_p (desc->niter))
868 	{
869 	  XDELETE (desc);
870 	  desc = NULL;
871 	}
872       data->niters->put (exit, desc);
873     }
874   else
875     desc = *slot;
876 
877   return desc;
878 }
879 
880 /* Returns the structure describing number of iterations determined from
881    single dominating exit of DATA->current_loop, or NULL if something
882    goes wrong.  */
883 
884 static struct tree_niter_desc *
885 niter_for_single_dom_exit (struct ivopts_data *data)
886 {
887   edge exit = single_dom_exit (data->current_loop);
888 
889   if (!exit)
890     return NULL;
891 
892   return niter_for_exit (data, exit);
893 }
894 
895 /* Initializes data structures used by the iv optimization pass, stored
896    in DATA.  */
897 
898 static void
899 tree_ssa_iv_optimize_init (struct ivopts_data *data)
900 {
901   data->version_info_size = 2 * num_ssa_names;
902   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
903   data->relevant = BITMAP_ALLOC (NULL);
904   data->important_candidates = BITMAP_ALLOC (NULL);
905   data->max_inv_id = 0;
906   data->niters = NULL;
907   data->iv_uses.create (20);
908   data->iv_candidates.create (20);
909   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
910   data->inv_expr_id = 0;
911   data->name_expansion_cache = NULL;
912   decl_rtl_to_reset.create (20);
913 }
914 
915 /* Returns a memory object to that EXPR points.  In case we are able to
916    determine that it does not point to any such object, NULL is returned.  */
917 
918 static tree
919 determine_base_object (tree expr)
920 {
921   enum tree_code code = TREE_CODE (expr);
922   tree base, obj;
923 
924   /* If this is a pointer casted to any type, we need to determine
925      the base object for the pointer; so handle conversions before
926      throwing away non-pointer expressions.  */
927   if (CONVERT_EXPR_P (expr))
928     return determine_base_object (TREE_OPERAND (expr, 0));
929 
930   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
931     return NULL_TREE;
932 
933   switch (code)
934     {
935     case INTEGER_CST:
936       return NULL_TREE;
937 
938     case ADDR_EXPR:
939       obj = TREE_OPERAND (expr, 0);
940       base = get_base_address (obj);
941 
942       if (!base)
943 	return expr;
944 
945       if (TREE_CODE (base) == MEM_REF)
946 	return determine_base_object (TREE_OPERAND (base, 0));
947 
948       return fold_convert (ptr_type_node,
949 		           build_fold_addr_expr (base));
950 
951     case POINTER_PLUS_EXPR:
952       return determine_base_object (TREE_OPERAND (expr, 0));
953 
954     case PLUS_EXPR:
955     case MINUS_EXPR:
956       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
957       gcc_unreachable ();
958 
959     default:
960       return fold_convert (ptr_type_node, expr);
961     }
962 }
963 
964 /* Return true if address expression with non-DECL_P operand appears
965    in EXPR.  */
966 
967 static bool
968 contain_complex_addr_expr (tree expr)
969 {
970   bool res = false;
971 
972   STRIP_NOPS (expr);
973   switch (TREE_CODE (expr))
974     {
975     case POINTER_PLUS_EXPR:
976     case PLUS_EXPR:
977     case MINUS_EXPR:
978       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
979       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
980       break;
981 
982     case ADDR_EXPR:
983       return (!DECL_P (TREE_OPERAND (expr, 0)));
984 
985     default:
986       return false;
987     }
988 
989   return res;
990 }
991 
992 /* Allocates an induction variable with given initial value BASE and step STEP
993    for loop LOOP.  */
994 
995 static struct iv *
996 alloc_iv (tree base, tree step)
997 {
998   tree expr = base;
999   struct iv *iv = XCNEW (struct iv);
1000   gcc_assert (step != NULL_TREE);
1001 
1002   /* Lower address expression in base except ones with DECL_P as operand.
1003      By doing this:
1004        1) More accurate cost can be computed for address expressions;
1005        2) Duplicate candidates won't be created for bases in different
1006           forms, like &a[0] and &a.  */
1007   STRIP_NOPS (expr);
1008   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1009       || contain_complex_addr_expr (expr))
1010     {
1011       aff_tree comb;
1012       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1013       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1014     }
1015 
1016   iv->base = base;
1017   iv->base_object = determine_base_object (base);
1018   iv->step = step;
1019   iv->biv_p = false;
1020   iv->have_use_for = false;
1021   iv->use_id = 0;
1022   iv->ssa_name = NULL_TREE;
1023 
1024   return iv;
1025 }
1026 
1027 /* Sets STEP and BASE for induction variable IV.  */
1028 
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1031 {
1032   struct version_info *info = name_info (data, iv);
1033 
1034   gcc_assert (!info->iv);
1035 
1036   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1037   info->iv = alloc_iv (base, step);
1038   info->iv->ssa_name = iv;
1039 }
1040 
1041 /* Finds induction variable declaration for VAR.  */
1042 
1043 static struct iv *
1044 get_iv (struct ivopts_data *data, tree var)
1045 {
1046   basic_block bb;
1047   tree type = TREE_TYPE (var);
1048 
1049   if (!POINTER_TYPE_P (type)
1050       && !INTEGRAL_TYPE_P (type))
1051     return NULL;
1052 
1053   if (!name_info (data, var)->iv)
1054     {
1055       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1056 
1057       if (!bb
1058 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1059 	set_iv (data, var, var, build_int_cst (type, 0));
1060     }
1061 
1062   return name_info (data, var)->iv;
1063 }
1064 
1065 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1066    not define a simple affine biv with nonzero step.  */
1067 
1068 static tree
1069 determine_biv_step (gphi *phi)
1070 {
1071   struct loop *loop = gimple_bb (phi)->loop_father;
1072   tree name = PHI_RESULT (phi);
1073   affine_iv iv;
1074 
1075   if (virtual_operand_p (name))
1076     return NULL_TREE;
1077 
1078   if (!simple_iv (loop, loop, name, &iv, true))
1079     return NULL_TREE;
1080 
1081   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1082 }
1083 
1084 /* Return the first non-invariant ssa var found in EXPR.  */
1085 
1086 static tree
1087 extract_single_var_from_expr (tree expr)
1088 {
1089   int i, n;
1090   tree tmp;
1091   enum tree_code code;
1092 
1093   if (!expr || is_gimple_min_invariant (expr))
1094     return NULL;
1095 
1096   code = TREE_CODE (expr);
1097   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1098     {
1099       n = TREE_OPERAND_LENGTH (expr);
1100       for (i = 0; i < n; i++)
1101 	{
1102 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1103 
1104 	  if (tmp)
1105 	    return tmp;
1106 	}
1107     }
1108   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1109 }
1110 
1111 /* Finds basic ivs.  */
1112 
1113 static bool
1114 find_bivs (struct ivopts_data *data)
1115 {
1116   gphi *phi;
1117   tree step, type, base, stop;
1118   bool found = false;
1119   struct loop *loop = data->current_loop;
1120   gphi_iterator psi;
1121 
1122   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1123     {
1124       phi = psi.phi ();
1125 
1126       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1127 	continue;
1128 
1129       step = determine_biv_step (phi);
1130       if (!step)
1131 	continue;
1132 
1133       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1134       /* Stop expanding iv base at the first ssa var referred by iv step.
1135 	 Ideally we should stop at any ssa var, because that's expensive
1136 	 and unusual to happen, we just do it on the first one.
1137 
1138 	 See PR64705 for the rationale.  */
1139       stop = extract_single_var_from_expr (step);
1140       base = expand_simple_operations (base, stop);
1141       if (contains_abnormal_ssa_name_p (base)
1142 	  || contains_abnormal_ssa_name_p (step))
1143 	continue;
1144 
1145       type = TREE_TYPE (PHI_RESULT (phi));
1146       base = fold_convert (type, base);
1147       if (step)
1148 	{
1149 	  if (POINTER_TYPE_P (type))
1150 	    step = convert_to_ptrofftype (step);
1151 	  else
1152 	    step = fold_convert (type, step);
1153 	}
1154 
1155       set_iv (data, PHI_RESULT (phi), base, step);
1156       found = true;
1157     }
1158 
1159   return found;
1160 }
1161 
1162 /* Marks basic ivs.  */
1163 
1164 static void
1165 mark_bivs (struct ivopts_data *data)
1166 {
1167   gphi *phi;
1168   gimple def;
1169   tree var;
1170   struct iv *iv, *incr_iv;
1171   struct loop *loop = data->current_loop;
1172   basic_block incr_bb;
1173   gphi_iterator psi;
1174 
1175   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1176     {
1177       phi = psi.phi ();
1178 
1179       iv = get_iv (data, PHI_RESULT (phi));
1180       if (!iv)
1181 	continue;
1182 
1183       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1184       def = SSA_NAME_DEF_STMT (var);
1185       /* Don't mark iv peeled from other one as biv.  */
1186       if (def
1187 	  && gimple_code (def) == GIMPLE_PHI
1188 	  && gimple_bb (def) == loop->header)
1189 	continue;
1190 
1191       incr_iv = get_iv (data, var);
1192       if (!incr_iv)
1193 	continue;
1194 
1195       /* If the increment is in the subloop, ignore it.  */
1196       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1197       if (incr_bb->loop_father != data->current_loop
1198 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1199 	continue;
1200 
1201       iv->biv_p = true;
1202       incr_iv->biv_p = true;
1203     }
1204 }
1205 
1206 /* Checks whether STMT defines a linear induction variable and stores its
1207    parameters to IV.  */
1208 
1209 static bool
1210 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1211 {
1212   tree lhs, stop;
1213   struct loop *loop = data->current_loop;
1214 
1215   iv->base = NULL_TREE;
1216   iv->step = NULL_TREE;
1217 
1218   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1219     return false;
1220 
1221   lhs = gimple_assign_lhs (stmt);
1222   if (TREE_CODE (lhs) != SSA_NAME)
1223     return false;
1224 
1225   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1226     return false;
1227 
1228   /* Stop expanding iv base at the first ssa var referred by iv step.
1229      Ideally we should stop at any ssa var, because that's expensive
1230      and unusual to happen, we just do it on the first one.
1231 
1232      See PR64705 for the rationale.  */
1233   stop = extract_single_var_from_expr (iv->step);
1234   iv->base = expand_simple_operations (iv->base, stop);
1235   if (contains_abnormal_ssa_name_p (iv->base)
1236       || contains_abnormal_ssa_name_p (iv->step))
1237     return false;
1238 
1239   /* If STMT could throw, then do not consider STMT as defining a GIV.
1240      While this will suppress optimizations, we can not safely delete this
1241      GIV and associated statements, even if it appears it is not used.  */
1242   if (stmt_could_throw_p (stmt))
1243     return false;
1244 
1245   return true;
1246 }
1247 
1248 /* Finds general ivs in statement STMT.  */
1249 
1250 static void
1251 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1252 {
1253   affine_iv iv;
1254 
1255   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1256     return;
1257 
1258   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1259 }
1260 
1261 /* Finds general ivs in basic block BB.  */
1262 
1263 static void
1264 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1265 {
1266   gimple_stmt_iterator bsi;
1267 
1268   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1269     find_givs_in_stmt (data, gsi_stmt (bsi));
1270 }
1271 
1272 /* Finds general ivs.  */
1273 
1274 static void
1275 find_givs (struct ivopts_data *data)
1276 {
1277   struct loop *loop = data->current_loop;
1278   basic_block *body = get_loop_body_in_dom_order (loop);
1279   unsigned i;
1280 
1281   for (i = 0; i < loop->num_nodes; i++)
1282     find_givs_in_bb (data, body[i]);
1283   free (body);
1284 }
1285 
1286 /* For each ssa name defined in LOOP determines whether it is an induction
1287    variable and if so, its initial value and step.  */
1288 
1289 static bool
1290 find_induction_variables (struct ivopts_data *data)
1291 {
1292   unsigned i;
1293   bitmap_iterator bi;
1294 
1295   if (!find_bivs (data))
1296     return false;
1297 
1298   find_givs (data);
1299   mark_bivs (data);
1300 
1301   if (dump_file && (dump_flags & TDF_DETAILS))
1302     {
1303       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1304 
1305       if (niter)
1306 	{
1307 	  fprintf (dump_file, "  number of iterations ");
1308 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1309 	  if (!integer_zerop (niter->may_be_zero))
1310 	    {
1311 	      fprintf (dump_file, "; zero if ");
1312 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1313 	    }
1314 	  fprintf (dump_file, "\n\n");
1315     	};
1316 
1317       fprintf (dump_file, "Induction variables:\n\n");
1318 
1319       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1320 	{
1321 	  if (ver_info (data, i)->iv)
1322 	    dump_iv (dump_file, ver_info (data, i)->iv);
1323 	}
1324     }
1325 
1326   return true;
1327 }
1328 
1329 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1330 
1331 static struct iv_use *
1332 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1333 	    gimple stmt, enum use_type use_type)
1334 {
1335   struct iv_use *use = XCNEW (struct iv_use);
1336 
1337   use->id = n_iv_uses (data);
1338   use->type = use_type;
1339   use->iv = iv;
1340   use->stmt = stmt;
1341   use->op_p = use_p;
1342   use->related_cands = BITMAP_ALLOC (NULL);
1343 
1344   /* To avoid showing ssa name in the dumps, if it was not reset by the
1345      caller.  */
1346   iv->ssa_name = NULL_TREE;
1347 
1348   if (dump_file && (dump_flags & TDF_DETAILS))
1349     dump_use (dump_file, use);
1350 
1351   data->iv_uses.safe_push (use);
1352 
1353   return use;
1354 }
1355 
1356 /* Checks whether OP is a loop-level invariant and if so, records it.
1357    NONLINEAR_USE is true if the invariant is used in a way we do not
1358    handle specially.  */
1359 
1360 static void
1361 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1362 {
1363   basic_block bb;
1364   struct version_info *info;
1365 
1366   if (TREE_CODE (op) != SSA_NAME
1367       || virtual_operand_p (op))
1368     return;
1369 
1370   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1371   if (bb
1372       && flow_bb_inside_loop_p (data->current_loop, bb))
1373     return;
1374 
1375   info = name_info (data, op);
1376   info->name = op;
1377   info->has_nonlin_use |= nonlinear_use;
1378   if (!info->inv_id)
1379     info->inv_id = ++data->max_inv_id;
1380   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1381 }
1382 
1383 /* Checks whether the use OP is interesting and if so, records it.  */
1384 
1385 static struct iv_use *
1386 find_interesting_uses_op (struct ivopts_data *data, tree op)
1387 {
1388   struct iv *iv;
1389   struct iv *civ;
1390   gimple stmt;
1391   struct iv_use *use;
1392 
1393   if (TREE_CODE (op) != SSA_NAME)
1394     return NULL;
1395 
1396   iv = get_iv (data, op);
1397   if (!iv)
1398     return NULL;
1399 
1400   if (iv->have_use_for)
1401     {
1402       use = iv_use (data, iv->use_id);
1403 
1404       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1405       return use;
1406     }
1407 
1408   if (integer_zerop (iv->step))
1409     {
1410       record_invariant (data, op, true);
1411       return NULL;
1412     }
1413   iv->have_use_for = true;
1414 
1415   civ = XNEW (struct iv);
1416   *civ = *iv;
1417 
1418   stmt = SSA_NAME_DEF_STMT (op);
1419   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1420 	      || is_gimple_assign (stmt));
1421 
1422   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1423   iv->use_id = use->id;
1424 
1425   return use;
1426 }
1427 
1428 /* Given a condition in statement STMT, checks whether it is a compare
1429    of an induction variable and an invariant.  If this is the case,
1430    CONTROL_VAR is set to location of the iv, BOUND to the location of
1431    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1432    induction variable descriptions, and true is returned.  If this is not
1433    the case, CONTROL_VAR and BOUND are set to the arguments of the
1434    condition and false is returned.  */
1435 
1436 static bool
1437 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1438 		       tree **control_var, tree **bound,
1439 		       struct iv **iv_var, struct iv **iv_bound)
1440 {
1441   /* The objects returned when COND has constant operands.  */
1442   static struct iv const_iv;
1443   static tree zero;
1444   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1445   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1446   bool ret = false;
1447 
1448   if (gimple_code (stmt) == GIMPLE_COND)
1449     {
1450       gcond *cond_stmt = as_a <gcond *> (stmt);
1451       op0 = gimple_cond_lhs_ptr (cond_stmt);
1452       op1 = gimple_cond_rhs_ptr (cond_stmt);
1453     }
1454   else
1455     {
1456       op0 = gimple_assign_rhs1_ptr (stmt);
1457       op1 = gimple_assign_rhs2_ptr (stmt);
1458     }
1459 
1460   zero = integer_zero_node;
1461   const_iv.step = integer_zero_node;
1462 
1463   if (TREE_CODE (*op0) == SSA_NAME)
1464     iv0 = get_iv (data, *op0);
1465   if (TREE_CODE (*op1) == SSA_NAME)
1466     iv1 = get_iv (data, *op1);
1467 
1468   /* Exactly one of the compared values must be an iv, and the other one must
1469      be an invariant.  */
1470   if (!iv0 || !iv1)
1471     goto end;
1472 
1473   if (integer_zerop (iv0->step))
1474     {
1475       /* Control variable may be on the other side.  */
1476       tmp_op = op0; op0 = op1; op1 = tmp_op;
1477       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1478     }
1479   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1480 
1481 end:
1482   if (control_var)
1483     *control_var = op0;;
1484   if (iv_var)
1485     *iv_var = iv0;;
1486   if (bound)
1487     *bound = op1;
1488   if (iv_bound)
1489     *iv_bound = iv1;
1490 
1491   return ret;
1492 }
1493 
1494 /* Checks whether the condition in STMT is interesting and if so,
1495    records it.  */
1496 
1497 static void
1498 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1499 {
1500   tree *var_p, *bound_p;
1501   struct iv *var_iv, *civ;
1502 
1503   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1504     {
1505       find_interesting_uses_op (data, *var_p);
1506       find_interesting_uses_op (data, *bound_p);
1507       return;
1508     }
1509 
1510   civ = XNEW (struct iv);
1511   *civ = *var_iv;
1512   record_use (data, NULL, civ, stmt, USE_COMPARE);
1513 }
1514 
1515 /* Returns the outermost loop EXPR is obviously invariant in
1516    relative to the loop LOOP, i.e. if all its operands are defined
1517    outside of the returned loop.  Returns NULL if EXPR is not
1518    even obviously invariant in LOOP.  */
1519 
1520 struct loop *
1521 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1522 {
1523   basic_block def_bb;
1524   unsigned i, len;
1525 
1526   if (is_gimple_min_invariant (expr))
1527     return current_loops->tree_root;
1528 
1529   if (TREE_CODE (expr) == SSA_NAME)
1530     {
1531       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1532       if (def_bb)
1533 	{
1534 	  if (flow_bb_inside_loop_p (loop, def_bb))
1535 	    return NULL;
1536 	  return superloop_at_depth (loop,
1537 				     loop_depth (def_bb->loop_father) + 1);
1538 	}
1539 
1540       return current_loops->tree_root;
1541     }
1542 
1543   if (!EXPR_P (expr))
1544     return NULL;
1545 
1546   unsigned maxdepth = 0;
1547   len = TREE_OPERAND_LENGTH (expr);
1548   for (i = 0; i < len; i++)
1549     {
1550       struct loop *ivloop;
1551       if (!TREE_OPERAND (expr, i))
1552 	continue;
1553 
1554       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1555       if (!ivloop)
1556 	return NULL;
1557       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1558     }
1559 
1560   return superloop_at_depth (loop, maxdepth);
1561 }
1562 
1563 /* Returns true if expression EXPR is obviously invariant in LOOP,
1564    i.e. if all its operands are defined outside of the LOOP.  LOOP
1565    should not be the function body.  */
1566 
1567 bool
1568 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1569 {
1570   basic_block def_bb;
1571   unsigned i, len;
1572 
1573   gcc_assert (loop_depth (loop) > 0);
1574 
1575   if (is_gimple_min_invariant (expr))
1576     return true;
1577 
1578   if (TREE_CODE (expr) == SSA_NAME)
1579     {
1580       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1581       if (def_bb
1582 	  && flow_bb_inside_loop_p (loop, def_bb))
1583 	return false;
1584 
1585       return true;
1586     }
1587 
1588   if (!EXPR_P (expr))
1589     return false;
1590 
1591   len = TREE_OPERAND_LENGTH (expr);
1592   for (i = 0; i < len; i++)
1593     if (TREE_OPERAND (expr, i)
1594 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1595       return false;
1596 
1597   return true;
1598 }
1599 
1600 /* Cumulates the steps of indices into DATA and replaces their values with the
1601    initial ones.  Returns false when the value of the index cannot be determined.
1602    Callback for for_each_index.  */
1603 
1604 struct ifs_ivopts_data
1605 {
1606   struct ivopts_data *ivopts_data;
1607   gimple stmt;
1608   tree step;
1609 };
1610 
1611 static bool
1612 idx_find_step (tree base, tree *idx, void *data)
1613 {
1614   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1615   struct iv *iv;
1616   tree step, iv_base, iv_step, lbound, off;
1617   struct loop *loop = dta->ivopts_data->current_loop;
1618 
1619   /* If base is a component ref, require that the offset of the reference
1620      be invariant.  */
1621   if (TREE_CODE (base) == COMPONENT_REF)
1622     {
1623       off = component_ref_field_offset (base);
1624       return expr_invariant_in_loop_p (loop, off);
1625     }
1626 
1627   /* If base is array, first check whether we will be able to move the
1628      reference out of the loop (in order to take its address in strength
1629      reduction).  In order for this to work we need both lower bound
1630      and step to be loop invariants.  */
1631   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1632     {
1633       /* Moreover, for a range, the size needs to be invariant as well.  */
1634       if (TREE_CODE (base) == ARRAY_RANGE_REF
1635 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1636 	return false;
1637 
1638       step = array_ref_element_size (base);
1639       lbound = array_ref_low_bound (base);
1640 
1641       if (!expr_invariant_in_loop_p (loop, step)
1642 	  || !expr_invariant_in_loop_p (loop, lbound))
1643 	return false;
1644     }
1645 
1646   if (TREE_CODE (*idx) != SSA_NAME)
1647     return true;
1648 
1649   iv = get_iv (dta->ivopts_data, *idx);
1650   if (!iv)
1651     return false;
1652 
1653   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1654 	  *&x[0], which is not folded and does not trigger the
1655 	  ARRAY_REF path below.  */
1656   *idx = iv->base;
1657 
1658   if (integer_zerop (iv->step))
1659     return true;
1660 
1661   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1662     {
1663       step = array_ref_element_size (base);
1664 
1665       /* We only handle addresses whose step is an integer constant.  */
1666       if (TREE_CODE (step) != INTEGER_CST)
1667 	return false;
1668     }
1669   else
1670     /* The step for pointer arithmetics already is 1 byte.  */
1671     step = size_one_node;
1672 
1673   iv_base = iv->base;
1674   iv_step = iv->step;
1675   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1676 			    sizetype, &iv_base, &iv_step, dta->stmt,
1677 			    false))
1678     {
1679       /* The index might wrap.  */
1680       return false;
1681     }
1682 
1683   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1684   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1685 
1686   return true;
1687 }
1688 
1689 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1690    object is passed to it in DATA.  */
1691 
1692 static bool
1693 idx_record_use (tree base, tree *idx,
1694 		void *vdata)
1695 {
1696   struct ivopts_data *data = (struct ivopts_data *) vdata;
1697   find_interesting_uses_op (data, *idx);
1698   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1699     {
1700       find_interesting_uses_op (data, array_ref_element_size (base));
1701       find_interesting_uses_op (data, array_ref_low_bound (base));
1702     }
1703   return true;
1704 }
1705 
1706 /* If we can prove that TOP = cst * BOT for some constant cst,
1707    store cst to MUL and return true.  Otherwise return false.
1708    The returned value is always sign-extended, regardless of the
1709    signedness of TOP and BOT.  */
1710 
1711 static bool
1712 constant_multiple_of (tree top, tree bot, widest_int *mul)
1713 {
1714   tree mby;
1715   enum tree_code code;
1716   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1717   widest_int res, p0, p1;
1718 
1719   STRIP_NOPS (top);
1720   STRIP_NOPS (bot);
1721 
1722   if (operand_equal_p (top, bot, 0))
1723     {
1724       *mul = 1;
1725       return true;
1726     }
1727 
1728   code = TREE_CODE (top);
1729   switch (code)
1730     {
1731     case MULT_EXPR:
1732       mby = TREE_OPERAND (top, 1);
1733       if (TREE_CODE (mby) != INTEGER_CST)
1734 	return false;
1735 
1736       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1737 	return false;
1738 
1739       *mul = wi::sext (res * wi::to_widest (mby), precision);
1740       return true;
1741 
1742     case PLUS_EXPR:
1743     case MINUS_EXPR:
1744       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1745 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1746 	return false;
1747 
1748       if (code == MINUS_EXPR)
1749 	p1 = -p1;
1750       *mul = wi::sext (p0 + p1, precision);
1751       return true;
1752 
1753     case INTEGER_CST:
1754       if (TREE_CODE (bot) != INTEGER_CST)
1755 	return false;
1756 
1757       p0 = widest_int::from (top, SIGNED);
1758       p1 = widest_int::from (bot, SIGNED);
1759       if (p1 == 0)
1760 	return false;
1761       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1762       return res == 0;
1763 
1764     default:
1765       return false;
1766     }
1767 }
1768 
1769 /* Return true if memory reference REF with step STEP may be unaligned.  */
1770 
1771 static bool
1772 may_be_unaligned_p (tree ref, tree step)
1773 {
1774   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1775      thus they are not misaligned.  */
1776   if (TREE_CODE (ref) == TARGET_MEM_REF)
1777     return false;
1778 
1779   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1780   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1781     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1782 
1783   unsigned HOST_WIDE_INT bitpos;
1784   unsigned int ref_align;
1785   get_object_alignment_1 (ref, &ref_align, &bitpos);
1786   if (ref_align < align
1787       || (bitpos % align) != 0
1788       || (bitpos % BITS_PER_UNIT) != 0)
1789     return true;
1790 
1791   unsigned int trailing_zeros = tree_ctz (step);
1792   if (trailing_zeros < HOST_BITS_PER_INT
1793       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1794     return true;
1795 
1796   return false;
1797 }
1798 
1799 /* Return true if EXPR may be non-addressable.   */
1800 
1801 bool
1802 may_be_nonaddressable_p (tree expr)
1803 {
1804   switch (TREE_CODE (expr))
1805     {
1806     case TARGET_MEM_REF:
1807       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1808 	 target, thus they are always addressable.  */
1809       return false;
1810 
1811     case COMPONENT_REF:
1812       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1813 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1814 
1815     case VIEW_CONVERT_EXPR:
1816       /* This kind of view-conversions may wrap non-addressable objects
1817 	 and make them look addressable.  After some processing the
1818 	 non-addressability may be uncovered again, causing ADDR_EXPRs
1819 	 of inappropriate objects to be built.  */
1820       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1821 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1822 	return true;
1823 
1824       /* ... fall through ... */
1825 
1826     case ARRAY_REF:
1827     case ARRAY_RANGE_REF:
1828       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1829 
1830     CASE_CONVERT:
1831       return true;
1832 
1833     default:
1834       break;
1835     }
1836 
1837   return false;
1838 }
1839 
1840 /* Finds addresses in *OP_P inside STMT.  */
1841 
1842 static void
1843 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1844 {
1845   tree base = *op_p, step = size_zero_node;
1846   struct iv *civ;
1847   struct ifs_ivopts_data ifs_ivopts_data;
1848 
1849   /* Do not play with volatile memory references.  A bit too conservative,
1850      perhaps, but safe.  */
1851   if (gimple_has_volatile_ops (stmt))
1852     goto fail;
1853 
1854   /* Ignore bitfields for now.  Not really something terribly complicated
1855      to handle.  TODO.  */
1856   if (TREE_CODE (base) == BIT_FIELD_REF)
1857     goto fail;
1858 
1859   base = unshare_expr (base);
1860 
1861   if (TREE_CODE (base) == TARGET_MEM_REF)
1862     {
1863       tree type = build_pointer_type (TREE_TYPE (base));
1864       tree astep;
1865 
1866       if (TMR_BASE (base)
1867 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1868 	{
1869 	  civ = get_iv (data, TMR_BASE (base));
1870 	  if (!civ)
1871 	    goto fail;
1872 
1873 	  TMR_BASE (base) = civ->base;
1874 	  step = civ->step;
1875 	}
1876       if (TMR_INDEX2 (base)
1877 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1878 	{
1879 	  civ = get_iv (data, TMR_INDEX2 (base));
1880 	  if (!civ)
1881 	    goto fail;
1882 
1883 	  TMR_INDEX2 (base) = civ->base;
1884 	  step = civ->step;
1885 	}
1886       if (TMR_INDEX (base)
1887 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1888 	{
1889 	  civ = get_iv (data, TMR_INDEX (base));
1890 	  if (!civ)
1891 	    goto fail;
1892 
1893 	  TMR_INDEX (base) = civ->base;
1894 	  astep = civ->step;
1895 
1896 	  if (astep)
1897 	    {
1898 	      if (TMR_STEP (base))
1899 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1900 
1901 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
1902 	    }
1903 	}
1904 
1905       if (integer_zerop (step))
1906 	goto fail;
1907       base = tree_mem_ref_addr (type, base);
1908     }
1909   else
1910     {
1911       ifs_ivopts_data.ivopts_data = data;
1912       ifs_ivopts_data.stmt = stmt;
1913       ifs_ivopts_data.step = size_zero_node;
1914       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1915 	  || integer_zerop (ifs_ivopts_data.step))
1916 	goto fail;
1917       step = ifs_ivopts_data.step;
1918 
1919       /* Check that the base expression is addressable.  This needs
1920 	 to be done after substituting bases of IVs into it.  */
1921       if (may_be_nonaddressable_p (base))
1922 	goto fail;
1923 
1924       /* Moreover, on strict alignment platforms, check that it is
1925 	 sufficiently aligned.  */
1926       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1927 	goto fail;
1928 
1929       base = build_fold_addr_expr (base);
1930 
1931       /* Substituting bases of IVs into the base expression might
1932 	 have caused folding opportunities.  */
1933       if (TREE_CODE (base) == ADDR_EXPR)
1934 	{
1935 	  tree *ref = &TREE_OPERAND (base, 0);
1936 	  while (handled_component_p (*ref))
1937 	    ref = &TREE_OPERAND (*ref, 0);
1938 	  if (TREE_CODE (*ref) == MEM_REF)
1939 	    {
1940 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1941 				      TREE_OPERAND (*ref, 0),
1942 				      TREE_OPERAND (*ref, 1));
1943 	      if (tem)
1944 		*ref = tem;
1945 	    }
1946 	}
1947     }
1948 
1949   civ = alloc_iv (base, step);
1950   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1951   return;
1952 
1953 fail:
1954   for_each_index (op_p, idx_record_use, data);
1955 }
1956 
1957 /* Finds and records invariants used in STMT.  */
1958 
1959 static void
1960 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1961 {
1962   ssa_op_iter iter;
1963   use_operand_p use_p;
1964   tree op;
1965 
1966   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1967     {
1968       op = USE_FROM_PTR (use_p);
1969       record_invariant (data, op, false);
1970     }
1971 }
1972 
1973 /* Finds interesting uses of induction variables in the statement STMT.  */
1974 
1975 static void
1976 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1977 {
1978   struct iv *iv;
1979   tree op, *lhs, *rhs;
1980   ssa_op_iter iter;
1981   use_operand_p use_p;
1982   enum tree_code code;
1983 
1984   find_invariants_stmt (data, stmt);
1985 
1986   if (gimple_code (stmt) == GIMPLE_COND)
1987     {
1988       find_interesting_uses_cond (data, stmt);
1989       return;
1990     }
1991 
1992   if (is_gimple_assign (stmt))
1993     {
1994       lhs = gimple_assign_lhs_ptr (stmt);
1995       rhs = gimple_assign_rhs1_ptr (stmt);
1996 
1997       if (TREE_CODE (*lhs) == SSA_NAME)
1998 	{
1999 	  /* If the statement defines an induction variable, the uses are not
2000 	     interesting by themselves.  */
2001 
2002 	  iv = get_iv (data, *lhs);
2003 
2004 	  if (iv && !integer_zerop (iv->step))
2005 	    return;
2006 	}
2007 
2008       code = gimple_assign_rhs_code (stmt);
2009       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2010 	  && (REFERENCE_CLASS_P (*rhs)
2011 	      || is_gimple_val (*rhs)))
2012 	{
2013 	  if (REFERENCE_CLASS_P (*rhs))
2014 	    find_interesting_uses_address (data, stmt, rhs);
2015 	  else
2016 	    find_interesting_uses_op (data, *rhs);
2017 
2018 	  if (REFERENCE_CLASS_P (*lhs))
2019 	    find_interesting_uses_address (data, stmt, lhs);
2020 	  return;
2021 	}
2022       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2023 	{
2024 	  find_interesting_uses_cond (data, stmt);
2025 	  return;
2026 	}
2027 
2028       /* TODO -- we should also handle address uses of type
2029 
2030 	 memory = call (whatever);
2031 
2032 	 and
2033 
2034 	 call (memory).  */
2035     }
2036 
2037   if (gimple_code (stmt) == GIMPLE_PHI
2038       && gimple_bb (stmt) == data->current_loop->header)
2039     {
2040       iv = get_iv (data, PHI_RESULT (stmt));
2041 
2042       if (iv && !integer_zerop (iv->step))
2043 	return;
2044     }
2045 
2046   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2047     {
2048       op = USE_FROM_PTR (use_p);
2049 
2050       if (TREE_CODE (op) != SSA_NAME)
2051 	continue;
2052 
2053       iv = get_iv (data, op);
2054       if (!iv)
2055 	continue;
2056 
2057       find_interesting_uses_op (data, op);
2058     }
2059 }
2060 
2061 /* Finds interesting uses of induction variables outside of loops
2062    on loop exit edge EXIT.  */
2063 
2064 static void
2065 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2066 {
2067   gphi *phi;
2068   gphi_iterator psi;
2069   tree def;
2070 
2071   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2072     {
2073       phi = psi.phi ();
2074       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2075       if (!virtual_operand_p (def))
2076         find_interesting_uses_op (data, def);
2077     }
2078 }
2079 
2080 /* Finds uses of the induction variables that are interesting.  */
2081 
2082 static void
2083 find_interesting_uses (struct ivopts_data *data)
2084 {
2085   basic_block bb;
2086   gimple_stmt_iterator bsi;
2087   basic_block *body = get_loop_body (data->current_loop);
2088   unsigned i;
2089   struct version_info *info;
2090   edge e;
2091 
2092   if (dump_file && (dump_flags & TDF_DETAILS))
2093     fprintf (dump_file, "Uses:\n\n");
2094 
2095   for (i = 0; i < data->current_loop->num_nodes; i++)
2096     {
2097       edge_iterator ei;
2098       bb = body[i];
2099 
2100       FOR_EACH_EDGE (e, ei, bb->succs)
2101 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2102 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2103 	  find_interesting_uses_outside (data, e);
2104 
2105       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2106 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2107       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2108 	if (!is_gimple_debug (gsi_stmt (bsi)))
2109 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2110     }
2111 
2112   if (dump_file && (dump_flags & TDF_DETAILS))
2113     {
2114       bitmap_iterator bi;
2115 
2116       fprintf (dump_file, "\n");
2117 
2118       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2119 	{
2120 	  info = ver_info (data, i);
2121 	  if (info->inv_id)
2122 	    {
2123 	      fprintf (dump_file, "  ");
2124 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
2125 	      fprintf (dump_file, " is invariant (%d)%s\n",
2126 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2127 	    }
2128 	}
2129 
2130       fprintf (dump_file, "\n");
2131     }
2132 
2133   free (body);
2134 }
2135 
2136 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2137    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2138    we are at the top-level of the processed address.  */
2139 
2140 static tree
2141 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2142 		HOST_WIDE_INT *offset)
2143 {
2144   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2145   enum tree_code code;
2146   tree type, orig_type = TREE_TYPE (expr);
2147   HOST_WIDE_INT off0, off1, st;
2148   tree orig_expr = expr;
2149 
2150   STRIP_NOPS (expr);
2151 
2152   type = TREE_TYPE (expr);
2153   code = TREE_CODE (expr);
2154   *offset = 0;
2155 
2156   switch (code)
2157     {
2158     case INTEGER_CST:
2159       if (!cst_and_fits_in_hwi (expr)
2160 	  || integer_zerop (expr))
2161 	return orig_expr;
2162 
2163       *offset = int_cst_value (expr);
2164       return build_int_cst (orig_type, 0);
2165 
2166     case POINTER_PLUS_EXPR:
2167     case PLUS_EXPR:
2168     case MINUS_EXPR:
2169       op0 = TREE_OPERAND (expr, 0);
2170       op1 = TREE_OPERAND (expr, 1);
2171 
2172       op0 = strip_offset_1 (op0, false, false, &off0);
2173       op1 = strip_offset_1 (op1, false, false, &off1);
2174 
2175       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2176       if (op0 == TREE_OPERAND (expr, 0)
2177 	  && op1 == TREE_OPERAND (expr, 1))
2178 	return orig_expr;
2179 
2180       if (integer_zerop (op1))
2181 	expr = op0;
2182       else if (integer_zerop (op0))
2183 	{
2184 	  if (code == MINUS_EXPR)
2185 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2186 	  else
2187 	    expr = op1;
2188 	}
2189       else
2190 	expr = fold_build2 (code, type, op0, op1);
2191 
2192       return fold_convert (orig_type, expr);
2193 
2194     case MULT_EXPR:
2195       op1 = TREE_OPERAND (expr, 1);
2196       if (!cst_and_fits_in_hwi (op1))
2197 	return orig_expr;
2198 
2199       op0 = TREE_OPERAND (expr, 0);
2200       op0 = strip_offset_1 (op0, false, false, &off0);
2201       if (op0 == TREE_OPERAND (expr, 0))
2202 	return orig_expr;
2203 
2204       *offset = off0 * int_cst_value (op1);
2205       if (integer_zerop (op0))
2206 	expr = op0;
2207       else
2208 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2209 
2210       return fold_convert (orig_type, expr);
2211 
2212     case ARRAY_REF:
2213     case ARRAY_RANGE_REF:
2214       if (!inside_addr)
2215 	return orig_expr;
2216 
2217       step = array_ref_element_size (expr);
2218       if (!cst_and_fits_in_hwi (step))
2219 	break;
2220 
2221       st = int_cst_value (step);
2222       op1 = TREE_OPERAND (expr, 1);
2223       op1 = strip_offset_1 (op1, false, false, &off1);
2224       *offset = off1 * st;
2225 
2226       if (top_compref
2227 	  && integer_zerop (op1))
2228 	{
2229 	  /* Strip the component reference completely.  */
2230 	  op0 = TREE_OPERAND (expr, 0);
2231 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2232 	  *offset += off0;
2233 	  return op0;
2234 	}
2235       break;
2236 
2237     case COMPONENT_REF:
2238       {
2239 	tree field;
2240 
2241 	if (!inside_addr)
2242 	  return orig_expr;
2243 
2244 	tmp = component_ref_field_offset (expr);
2245 	field = TREE_OPERAND (expr, 1);
2246 	if (top_compref
2247 	    && cst_and_fits_in_hwi (tmp)
2248 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2249 	  {
2250 	    HOST_WIDE_INT boffset, abs_off;
2251 
2252 	    /* Strip the component reference completely.  */
2253 	    op0 = TREE_OPERAND (expr, 0);
2254 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2255 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2256 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2257 	    if (boffset < 0)
2258 	      abs_off = -abs_off;
2259 
2260 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2261 	    return op0;
2262 	  }
2263       }
2264       break;
2265 
2266     case ADDR_EXPR:
2267       op0 = TREE_OPERAND (expr, 0);
2268       op0 = strip_offset_1 (op0, true, true, &off0);
2269       *offset += off0;
2270 
2271       if (op0 == TREE_OPERAND (expr, 0))
2272 	return orig_expr;
2273 
2274       expr = build_fold_addr_expr (op0);
2275       return fold_convert (orig_type, expr);
2276 
2277     case MEM_REF:
2278       /* ???  Offset operand?  */
2279       inside_addr = false;
2280       break;
2281 
2282     default:
2283       return orig_expr;
2284     }
2285 
2286   /* Default handling of expressions for that we want to recurse into
2287      the first operand.  */
2288   op0 = TREE_OPERAND (expr, 0);
2289   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2290   *offset += off0;
2291 
2292   if (op0 == TREE_OPERAND (expr, 0)
2293       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2294     return orig_expr;
2295 
2296   expr = copy_node (expr);
2297   TREE_OPERAND (expr, 0) = op0;
2298   if (op1)
2299     TREE_OPERAND (expr, 1) = op1;
2300 
2301   /* Inside address, we might strip the top level component references,
2302      thus changing type of the expression.  Handling of ADDR_EXPR
2303      will fix that.  */
2304   expr = fold_convert (orig_type, expr);
2305 
2306   return expr;
2307 }
2308 
2309 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2310 
2311 static tree
2312 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2313 {
2314   HOST_WIDE_INT off;
2315   tree core = strip_offset_1 (expr, false, false, &off);
2316   *offset = off;
2317   return core;
2318 }
2319 
2320 /* Returns variant of TYPE that can be used as base for different uses.
2321    We return unsigned type with the same precision, which avoids problems
2322    with overflows.  */
2323 
2324 static tree
2325 generic_type_for (tree type)
2326 {
2327   if (POINTER_TYPE_P (type))
2328     return unsigned_type_for (type);
2329 
2330   if (TYPE_UNSIGNED (type))
2331     return type;
2332 
2333   return unsigned_type_for (type);
2334 }
2335 
2336 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2337    the bitmap to that we should store it.  */
2338 
2339 static struct ivopts_data *fd_ivopts_data;
2340 static tree
2341 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2342 {
2343   bitmap *depends_on = (bitmap *) data;
2344   struct version_info *info;
2345 
2346   if (TREE_CODE (*expr_p) != SSA_NAME)
2347     return NULL_TREE;
2348   info = name_info (fd_ivopts_data, *expr_p);
2349 
2350   if (!info->inv_id || info->has_nonlin_use)
2351     return NULL_TREE;
2352 
2353   if (!*depends_on)
2354     *depends_on = BITMAP_ALLOC (NULL);
2355   bitmap_set_bit (*depends_on, info->inv_id);
2356 
2357   return NULL_TREE;
2358 }
2359 
2360 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2361    position to POS.  If USE is not NULL, the candidate is set as related to
2362    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2363    replacement of the final value of the iv by a direct computation.  */
2364 
2365 static struct iv_cand *
2366 add_candidate_1 (struct ivopts_data *data,
2367 		 tree base, tree step, bool important, enum iv_position pos,
2368 		 struct iv_use *use, gimple incremented_at)
2369 {
2370   unsigned i;
2371   struct iv_cand *cand = NULL;
2372   tree type, orig_type;
2373 
2374   /* For non-original variables, make sure their values are computed in a type
2375      that does not invoke undefined behavior on overflows (since in general,
2376      we cannot prove that these induction variables are non-wrapping).  */
2377   if (pos != IP_ORIGINAL)
2378     {
2379       orig_type = TREE_TYPE (base);
2380       type = generic_type_for (orig_type);
2381       if (type != orig_type)
2382 	{
2383 	  base = fold_convert (type, base);
2384 	  step = fold_convert (type, step);
2385 	}
2386     }
2387 
2388   for (i = 0; i < n_iv_cands (data); i++)
2389     {
2390       cand = iv_cand (data, i);
2391 
2392       if (cand->pos != pos)
2393 	continue;
2394 
2395       if (cand->incremented_at != incremented_at
2396 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2397 	      && cand->ainc_use != use))
2398 	continue;
2399 
2400       if (!cand->iv)
2401 	{
2402 	  if (!base && !step)
2403 	    break;
2404 
2405 	  continue;
2406 	}
2407 
2408       if (!base && !step)
2409 	continue;
2410 
2411       if (operand_equal_p (base, cand->iv->base, 0)
2412 	  && operand_equal_p (step, cand->iv->step, 0)
2413           && (TYPE_PRECISION (TREE_TYPE (base))
2414               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2415 	break;
2416     }
2417 
2418   if (i == n_iv_cands (data))
2419     {
2420       cand = XCNEW (struct iv_cand);
2421       cand->id = i;
2422 
2423       if (!base && !step)
2424 	cand->iv = NULL;
2425       else
2426 	cand->iv = alloc_iv (base, step);
2427 
2428       cand->pos = pos;
2429       if (pos != IP_ORIGINAL && cand->iv)
2430 	{
2431 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2432 	  cand->var_after = cand->var_before;
2433 	}
2434       cand->important = important;
2435       cand->incremented_at = incremented_at;
2436       data->iv_candidates.safe_push (cand);
2437 
2438       if (step
2439 	  && TREE_CODE (step) != INTEGER_CST)
2440 	{
2441 	  fd_ivopts_data = data;
2442 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2443 	}
2444 
2445       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2446 	cand->ainc_use = use;
2447       else
2448 	cand->ainc_use = NULL;
2449 
2450       if (dump_file && (dump_flags & TDF_DETAILS))
2451 	dump_cand (dump_file, cand);
2452     }
2453 
2454   if (important && !cand->important)
2455     {
2456       cand->important = true;
2457       if (dump_file && (dump_flags & TDF_DETAILS))
2458 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2459     }
2460 
2461   if (use)
2462     {
2463       bitmap_set_bit (use->related_cands, i);
2464       if (dump_file && (dump_flags & TDF_DETAILS))
2465 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2466 		 cand->id, use->id);
2467     }
2468 
2469   return cand;
2470 }
2471 
2472 /* Returns true if incrementing the induction variable at the end of the LOOP
2473    is allowed.
2474 
2475    The purpose is to avoid splitting latch edge with a biv increment, thus
2476    creating a jump, possibly confusing other optimization passes and leaving
2477    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2478    is not available (so we do not have a better alternative), or if the latch
2479    edge is already nonempty.  */
2480 
2481 static bool
2482 allow_ip_end_pos_p (struct loop *loop)
2483 {
2484   if (!ip_normal_pos (loop))
2485     return true;
2486 
2487   if (!empty_block_p (ip_end_pos (loop)))
2488     return true;
2489 
2490   return false;
2491 }
2492 
2493 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2494    Important field is set to IMPORTANT.  */
2495 
2496 static void
2497 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2498 			bool important, struct iv_use *use)
2499 {
2500   basic_block use_bb = gimple_bb (use->stmt);
2501   machine_mode mem_mode;
2502   unsigned HOST_WIDE_INT cstepi;
2503 
2504   /* If we insert the increment in any position other than the standard
2505      ones, we must ensure that it is incremented once per iteration.
2506      It must not be in an inner nested loop, or one side of an if
2507      statement.  */
2508   if (use_bb->loop_father != data->current_loop
2509       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2510       || stmt_could_throw_p (use->stmt)
2511       || !cst_and_fits_in_hwi (step))
2512     return;
2513 
2514   cstepi = int_cst_value (step);
2515 
2516   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2517   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2518 	|| USE_STORE_PRE_INCREMENT (mem_mode))
2519        && GET_MODE_SIZE (mem_mode) == cstepi)
2520       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2521 	   || USE_STORE_PRE_DECREMENT (mem_mode))
2522 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2523     {
2524       enum tree_code code = MINUS_EXPR;
2525       tree new_base;
2526       tree new_step = step;
2527 
2528       if (POINTER_TYPE_P (TREE_TYPE (base)))
2529 	{
2530 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2531 	  code = POINTER_PLUS_EXPR;
2532 	}
2533       else
2534 	new_step = fold_convert (TREE_TYPE (base), new_step);
2535       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2536       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2537 		       use->stmt);
2538     }
2539   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2540 	|| USE_STORE_POST_INCREMENT (mem_mode))
2541        && GET_MODE_SIZE (mem_mode) == cstepi)
2542       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2543 	   || USE_STORE_POST_DECREMENT (mem_mode))
2544 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2545     {
2546       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2547 		       use->stmt);
2548     }
2549 }
2550 
2551 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2552    position to POS.  If USE is not NULL, the candidate is set as related to
2553    it.  The candidate computation is scheduled on all available positions.  */
2554 
2555 static void
2556 add_candidate (struct ivopts_data *data,
2557 	       tree base, tree step, bool important, struct iv_use *use)
2558 {
2559   if (ip_normal_pos (data->current_loop))
2560     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2561   if (ip_end_pos (data->current_loop)
2562       && allow_ip_end_pos_p (data->current_loop))
2563     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2564 
2565   if (use != NULL && use->type == USE_ADDRESS)
2566     add_autoinc_candidates (data, base, step, important, use);
2567 }
2568 
2569 /* Adds standard iv candidates.  */
2570 
2571 static void
2572 add_standard_iv_candidates (struct ivopts_data *data)
2573 {
2574   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2575 
2576   /* The same for a double-integer type if it is still fast enough.  */
2577   if (TYPE_PRECISION
2578         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2579       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2580     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2581 		   build_int_cst (long_integer_type_node, 1), true, NULL);
2582 
2583   /* The same for a double-integer type if it is still fast enough.  */
2584   if (TYPE_PRECISION
2585         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2586       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2587     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2588 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
2589 }
2590 
2591 
2592 /* Adds candidates bases on the old induction variable IV.  */
2593 
2594 static void
2595 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2596 {
2597   gimple phi;
2598   tree def;
2599   struct iv_cand *cand;
2600 
2601   add_candidate (data, iv->base, iv->step, true, NULL);
2602 
2603   /* The same, but with initial value zero.  */
2604   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2605     add_candidate (data, size_int (0), iv->step, true, NULL);
2606   else
2607     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2608 		   iv->step, true, NULL);
2609 
2610   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2611   if (gimple_code (phi) == GIMPLE_PHI)
2612     {
2613       /* Additionally record the possibility of leaving the original iv
2614 	 untouched.  */
2615       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2616       /* Don't add candidate if it's from another PHI node because
2617 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
2618       phi = SSA_NAME_DEF_STMT (def);
2619       if (gimple_code (phi) != GIMPLE_PHI)
2620 	{
2621 	  cand = add_candidate_1 (data,
2622 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
2623 				  SSA_NAME_DEF_STMT (def));
2624 	  cand->var_before = iv->ssa_name;
2625 	  cand->var_after = def;
2626 	}
2627       else
2628 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
2629     }
2630 }
2631 
2632 /* Adds candidates based on the old induction variables.  */
2633 
2634 static void
2635 add_old_ivs_candidates (struct ivopts_data *data)
2636 {
2637   unsigned i;
2638   struct iv *iv;
2639   bitmap_iterator bi;
2640 
2641   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2642     {
2643       iv = ver_info (data, i)->iv;
2644       if (iv && iv->biv_p && !integer_zerop (iv->step))
2645 	add_old_iv_candidates (data, iv);
2646     }
2647 }
2648 
2649 /* Adds candidates based on the value of the induction variable IV and USE.  */
2650 
2651 static void
2652 add_iv_value_candidates (struct ivopts_data *data,
2653 			 struct iv *iv, struct iv_use *use)
2654 {
2655   unsigned HOST_WIDE_INT offset;
2656   tree base;
2657   tree basetype;
2658 
2659   add_candidate (data, iv->base, iv->step, false, use);
2660 
2661   /* The same, but with initial value zero.  Make such variable important,
2662      since it is generic enough so that possibly many uses may be based
2663      on it.  */
2664   basetype = TREE_TYPE (iv->base);
2665   if (POINTER_TYPE_P (basetype))
2666     basetype = sizetype;
2667   add_candidate (data, build_int_cst (basetype, 0),
2668 		 iv->step, true, use);
2669 
2670   /* Third, try removing the constant offset.  Make sure to even
2671      add a candidate for &a[0] vs. (T *)&a.  */
2672   base = strip_offset (iv->base, &offset);
2673   if (offset
2674       || base != iv->base)
2675     add_candidate (data, base, iv->step, false, use);
2676 }
2677 
2678 /* Adds candidates based on the uses.  */
2679 
2680 static void
2681 add_derived_ivs_candidates (struct ivopts_data *data)
2682 {
2683   unsigned i;
2684 
2685   for (i = 0; i < n_iv_uses (data); i++)
2686     {
2687       struct iv_use *use = iv_use (data, i);
2688 
2689       if (!use)
2690 	continue;
2691 
2692       switch (use->type)
2693 	{
2694 	case USE_NONLINEAR_EXPR:
2695 	case USE_COMPARE:
2696 	case USE_ADDRESS:
2697 	  /* Just add the ivs based on the value of the iv used here.  */
2698 	  add_iv_value_candidates (data, use->iv, use);
2699 	  break;
2700 
2701 	default:
2702 	  gcc_unreachable ();
2703 	}
2704     }
2705 }
2706 
2707 /* Record important candidates and add them to related_cands bitmaps
2708    if needed.  */
2709 
2710 static void
2711 record_important_candidates (struct ivopts_data *data)
2712 {
2713   unsigned i;
2714   struct iv_use *use;
2715 
2716   for (i = 0; i < n_iv_cands (data); i++)
2717     {
2718       struct iv_cand *cand = iv_cand (data, i);
2719 
2720       if (cand->important)
2721 	bitmap_set_bit (data->important_candidates, i);
2722     }
2723 
2724   data->consider_all_candidates = (n_iv_cands (data)
2725 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2726 
2727   if (data->consider_all_candidates)
2728     {
2729       /* We will not need "related_cands" bitmaps in this case,
2730 	 so release them to decrease peak memory consumption.  */
2731       for (i = 0; i < n_iv_uses (data); i++)
2732 	{
2733 	  use = iv_use (data, i);
2734 	  BITMAP_FREE (use->related_cands);
2735 	}
2736     }
2737   else
2738     {
2739       /* Add important candidates to the related_cands bitmaps.  */
2740       for (i = 0; i < n_iv_uses (data); i++)
2741 	bitmap_ior_into (iv_use (data, i)->related_cands,
2742 			 data->important_candidates);
2743     }
2744 }
2745 
2746 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2747    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2748    we allocate a simple list to every use.  */
2749 
2750 static void
2751 alloc_use_cost_map (struct ivopts_data *data)
2752 {
2753   unsigned i, size, s;
2754 
2755   for (i = 0; i < n_iv_uses (data); i++)
2756     {
2757       struct iv_use *use = iv_use (data, i);
2758 
2759       if (data->consider_all_candidates)
2760 	size = n_iv_cands (data);
2761       else
2762 	{
2763 	  s = bitmap_count_bits (use->related_cands);
2764 
2765 	  /* Round up to the power of two, so that moduling by it is fast.  */
2766 	  size = s ? (1 << ceil_log2 (s)) : 1;
2767 	}
2768 
2769       use->n_map_members = size;
2770       use->cost_map = XCNEWVEC (struct cost_pair, size);
2771     }
2772 }
2773 
2774 /* Returns description of computation cost of expression whose runtime
2775    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2776 
2777 static comp_cost
2778 new_cost (unsigned runtime, unsigned complexity)
2779 {
2780   comp_cost cost;
2781 
2782   cost.cost = runtime;
2783   cost.complexity = complexity;
2784 
2785   return cost;
2786 }
2787 
2788 /* Adds costs COST1 and COST2.  */
2789 
2790 static comp_cost
2791 add_costs (comp_cost cost1, comp_cost cost2)
2792 {
2793   cost1.cost += cost2.cost;
2794   cost1.complexity += cost2.complexity;
2795 
2796   return cost1;
2797 }
2798 /* Subtracts costs COST1 and COST2.  */
2799 
2800 static comp_cost
2801 sub_costs (comp_cost cost1, comp_cost cost2)
2802 {
2803   cost1.cost -= cost2.cost;
2804   cost1.complexity -= cost2.complexity;
2805 
2806   return cost1;
2807 }
2808 
2809 /* Returns a negative number if COST1 < COST2, a positive number if
2810    COST1 > COST2, and 0 if COST1 = COST2.  */
2811 
2812 static int
2813 compare_costs (comp_cost cost1, comp_cost cost2)
2814 {
2815   if (cost1.cost == cost2.cost)
2816     return cost1.complexity - cost2.complexity;
2817 
2818   return cost1.cost - cost2.cost;
2819 }
2820 
2821 /* Returns true if COST is infinite.  */
2822 
2823 static bool
2824 infinite_cost_p (comp_cost cost)
2825 {
2826   return cost.cost == INFTY;
2827 }
2828 
2829 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2830    on invariants DEPENDS_ON and that the value used in expressing it
2831    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2832 
2833 static void
2834 set_use_iv_cost (struct ivopts_data *data,
2835 		 struct iv_use *use, struct iv_cand *cand,
2836 		 comp_cost cost, bitmap depends_on, tree value,
2837 		 enum tree_code comp, int inv_expr_id)
2838 {
2839   unsigned i, s;
2840 
2841   if (infinite_cost_p (cost))
2842     {
2843       BITMAP_FREE (depends_on);
2844       return;
2845     }
2846 
2847   if (data->consider_all_candidates)
2848     {
2849       use->cost_map[cand->id].cand = cand;
2850       use->cost_map[cand->id].cost = cost;
2851       use->cost_map[cand->id].depends_on = depends_on;
2852       use->cost_map[cand->id].value = value;
2853       use->cost_map[cand->id].comp = comp;
2854       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2855       return;
2856     }
2857 
2858   /* n_map_members is a power of two, so this computes modulo.  */
2859   s = cand->id & (use->n_map_members - 1);
2860   for (i = s; i < use->n_map_members; i++)
2861     if (!use->cost_map[i].cand)
2862       goto found;
2863   for (i = 0; i < s; i++)
2864     if (!use->cost_map[i].cand)
2865       goto found;
2866 
2867   gcc_unreachable ();
2868 
2869 found:
2870   use->cost_map[i].cand = cand;
2871   use->cost_map[i].cost = cost;
2872   use->cost_map[i].depends_on = depends_on;
2873   use->cost_map[i].value = value;
2874   use->cost_map[i].comp = comp;
2875   use->cost_map[i].inv_expr_id = inv_expr_id;
2876 }
2877 
2878 /* Gets cost of (USE, CANDIDATE) pair.  */
2879 
2880 static struct cost_pair *
2881 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2882 		 struct iv_cand *cand)
2883 {
2884   unsigned i, s;
2885   struct cost_pair *ret;
2886 
2887   if (!cand)
2888     return NULL;
2889 
2890   if (data->consider_all_candidates)
2891     {
2892       ret = use->cost_map + cand->id;
2893       if (!ret->cand)
2894 	return NULL;
2895 
2896       return ret;
2897     }
2898 
2899   /* n_map_members is a power of two, so this computes modulo.  */
2900   s = cand->id & (use->n_map_members - 1);
2901   for (i = s; i < use->n_map_members; i++)
2902     if (use->cost_map[i].cand == cand)
2903       return use->cost_map + i;
2904     else if (use->cost_map[i].cand == NULL)
2905       return NULL;
2906   for (i = 0; i < s; i++)
2907     if (use->cost_map[i].cand == cand)
2908       return use->cost_map + i;
2909     else if (use->cost_map[i].cand == NULL)
2910       return NULL;
2911 
2912   return NULL;
2913 }
2914 
2915 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2916 static rtx
2917 produce_memory_decl_rtl (tree obj, int *regno)
2918 {
2919   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2920   machine_mode address_mode = targetm.addr_space.address_mode (as);
2921   rtx x;
2922 
2923   gcc_assert (obj);
2924   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2925     {
2926       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2927       x = gen_rtx_SYMBOL_REF (address_mode, name);
2928       SET_SYMBOL_REF_DECL (x, obj);
2929       x = gen_rtx_MEM (DECL_MODE (obj), x);
2930       set_mem_addr_space (x, as);
2931       targetm.encode_section_info (obj, x, true);
2932     }
2933   else
2934     {
2935       x = gen_raw_REG (address_mode, (*regno)++);
2936       x = gen_rtx_MEM (DECL_MODE (obj), x);
2937       set_mem_addr_space (x, as);
2938     }
2939 
2940   return x;
2941 }
2942 
2943 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2944    walk_tree.  DATA contains the actual fake register number.  */
2945 
2946 static tree
2947 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2948 {
2949   tree obj = NULL_TREE;
2950   rtx x = NULL_RTX;
2951   int *regno = (int *) data;
2952 
2953   switch (TREE_CODE (*expr_p))
2954     {
2955     case ADDR_EXPR:
2956       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2957 	   handled_component_p (*expr_p);
2958 	   expr_p = &TREE_OPERAND (*expr_p, 0))
2959 	continue;
2960       obj = *expr_p;
2961       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2962         x = produce_memory_decl_rtl (obj, regno);
2963       break;
2964 
2965     case SSA_NAME:
2966       *ws = 0;
2967       obj = SSA_NAME_VAR (*expr_p);
2968       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2969       if (!obj)
2970 	return NULL_TREE;
2971       if (!DECL_RTL_SET_P (obj))
2972 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2973       break;
2974 
2975     case VAR_DECL:
2976     case PARM_DECL:
2977     case RESULT_DECL:
2978       *ws = 0;
2979       obj = *expr_p;
2980 
2981       if (DECL_RTL_SET_P (obj))
2982 	break;
2983 
2984       if (DECL_MODE (obj) == BLKmode)
2985 	x = produce_memory_decl_rtl (obj, regno);
2986       else
2987 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2988 
2989       break;
2990 
2991     default:
2992       break;
2993     }
2994 
2995   if (x)
2996     {
2997       decl_rtl_to_reset.safe_push (obj);
2998       SET_DECL_RTL (obj, x);
2999     }
3000 
3001   return NULL_TREE;
3002 }
3003 
3004 /* Determines cost of the computation of EXPR.  */
3005 
3006 static unsigned
3007 computation_cost (tree expr, bool speed)
3008 {
3009   rtx_insn *seq;
3010   rtx rslt;
3011   tree type = TREE_TYPE (expr);
3012   unsigned cost;
3013   /* Avoid using hard regs in ways which may be unsupported.  */
3014   int regno = LAST_VIRTUAL_REGISTER + 1;
3015   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3016   enum node_frequency real_frequency = node->frequency;
3017 
3018   node->frequency = NODE_FREQUENCY_NORMAL;
3019   crtl->maybe_hot_insn_p = speed;
3020   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3021   start_sequence ();
3022   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3023   seq = get_insns ();
3024   end_sequence ();
3025   default_rtl_profile ();
3026   node->frequency = real_frequency;
3027 
3028   cost = seq_cost (seq, speed);
3029   if (MEM_P (rslt))
3030     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3031 			  TYPE_ADDR_SPACE (type), speed);
3032   else if (!REG_P (rslt))
3033     cost += set_src_cost (rslt, speed);
3034 
3035   return cost;
3036 }
3037 
3038 /* Returns variable containing the value of candidate CAND at statement AT.  */
3039 
3040 static tree
3041 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3042 {
3043   if (stmt_after_increment (loop, cand, stmt))
3044     return cand->var_after;
3045   else
3046     return cand->var_before;
3047 }
3048 
3049 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3050    same precision that is at least as wide as the precision of TYPE, stores
3051    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3052    type of A and B.  */
3053 
3054 static tree
3055 determine_common_wider_type (tree *a, tree *b)
3056 {
3057   tree wider_type = NULL;
3058   tree suba, subb;
3059   tree atype = TREE_TYPE (*a);
3060 
3061   if (CONVERT_EXPR_P (*a))
3062     {
3063       suba = TREE_OPERAND (*a, 0);
3064       wider_type = TREE_TYPE (suba);
3065       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3066 	return atype;
3067     }
3068   else
3069     return atype;
3070 
3071   if (CONVERT_EXPR_P (*b))
3072     {
3073       subb = TREE_OPERAND (*b, 0);
3074       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3075 	return atype;
3076     }
3077   else
3078     return atype;
3079 
3080   *a = suba;
3081   *b = subb;
3082   return wider_type;
3083 }
3084 
3085 /* Determines the expression by that USE is expressed from induction variable
3086    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3087    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3088 
3089 static bool
3090 get_computation_aff (struct loop *loop,
3091 		     struct iv_use *use, struct iv_cand *cand, gimple at,
3092 		     struct aff_tree *aff)
3093 {
3094   tree ubase = use->iv->base;
3095   tree ustep = use->iv->step;
3096   tree cbase = cand->iv->base;
3097   tree cstep = cand->iv->step, cstep_common;
3098   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3099   tree common_type, var;
3100   tree uutype;
3101   aff_tree cbase_aff, var_aff;
3102   widest_int rat;
3103 
3104   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3105     {
3106       /* We do not have a precision to express the values of use.  */
3107       return false;
3108     }
3109 
3110   var = var_at_stmt (loop, cand, at);
3111   uutype = unsigned_type_for (utype);
3112 
3113   /* If the conversion is not noop, perform it.  */
3114   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3115     {
3116       cstep = fold_convert (uutype, cstep);
3117       cbase = fold_convert (uutype, cbase);
3118       var = fold_convert (uutype, var);
3119     }
3120 
3121   /* Ratio is 1 when computing the value of biv cand by itself.
3122      We can't rely on constant_multiple_of in this case because the
3123      use is created after the original biv is selected.  The call
3124      could fail because of inconsistent fold behavior.  See PR68021
3125      for more information.  */
3126   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3127     {
3128       gcc_assert (is_gimple_assign (use->stmt));
3129       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3130       rat = 1;
3131     }
3132   else if (!constant_multiple_of (ustep, cstep, &rat))
3133     return false;
3134 
3135   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3136      type, we achieve better folding by computing their difference in this
3137      wider type, and cast the result to UUTYPE.  We do not need to worry about
3138      overflows, as all the arithmetics will in the end be performed in UUTYPE
3139      anyway.  */
3140   common_type = determine_common_wider_type (&ubase, &cbase);
3141 
3142   /* use = ubase - ratio * cbase + ratio * var.  */
3143   tree_to_aff_combination (ubase, common_type, aff);
3144   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3145   tree_to_aff_combination (var, uutype, &var_aff);
3146 
3147   /* We need to shift the value if we are after the increment.  */
3148   if (stmt_after_increment (loop, cand, at))
3149     {
3150       aff_tree cstep_aff;
3151 
3152       if (common_type != uutype)
3153 	cstep_common = fold_convert (common_type, cstep);
3154       else
3155 	cstep_common = cstep;
3156 
3157       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3158       aff_combination_add (&cbase_aff, &cstep_aff);
3159     }
3160 
3161   aff_combination_scale (&cbase_aff, -rat);
3162   aff_combination_add (aff, &cbase_aff);
3163   if (common_type != uutype)
3164     aff_combination_convert (aff, uutype);
3165 
3166   aff_combination_scale (&var_aff, rat);
3167   aff_combination_add (aff, &var_aff);
3168 
3169   return true;
3170 }
3171 
3172 /* Return the type of USE.  */
3173 
3174 static tree
3175 get_use_type (struct iv_use *use)
3176 {
3177   tree base_type = TREE_TYPE (use->iv->base);
3178   tree type;
3179 
3180   if (use->type == USE_ADDRESS)
3181     {
3182       /* The base_type may be a void pointer.  Create a pointer type based on
3183 	 the mem_ref instead.  */
3184       type = build_pointer_type (TREE_TYPE (*use->op_p));
3185       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3186 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3187     }
3188   else
3189     type = base_type;
3190 
3191   return type;
3192 }
3193 
3194 /* Determines the expression by that USE is expressed from induction variable
3195    CAND at statement AT in LOOP.  The computation is unshared.  */
3196 
3197 static tree
3198 get_computation_at (struct loop *loop,
3199 		    struct iv_use *use, struct iv_cand *cand, gimple at)
3200 {
3201   aff_tree aff;
3202   tree type = get_use_type (use);
3203 
3204   if (!get_computation_aff (loop, use, cand, at, &aff))
3205     return NULL_TREE;
3206   unshare_aff_combination (&aff);
3207   return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209 
3210 /* Determines the expression by that USE is expressed from induction variable
3211    CAND in LOOP.  The computation is unshared.  */
3212 
3213 static tree
3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216   return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218 
3219 /* Adjust the cost COST for being in loop setup rather than loop body.
3220    If we're optimizing for space, the loop setup overhead is constant;
3221    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3222 static unsigned
3223 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3224 {
3225   if (cost == INFTY)
3226     return cost;
3227   else if (optimize_loop_for_speed_p (data->current_loop))
3228     return cost / avg_loop_niter (data->current_loop);
3229   else
3230     return cost;
3231 }
3232 
3233 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3234    validity for a memory reference accessing memory of mode MODE in
3235    address space AS.  */
3236 
3237 
3238 bool
3239 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3240 				 addr_space_t as)
3241 {
3242 #define MAX_RATIO 128
3243   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3244   static vec<sbitmap> valid_mult_list;
3245   sbitmap valid_mult;
3246 
3247   if (data_index >= valid_mult_list.length ())
3248     valid_mult_list.safe_grow_cleared (data_index + 1);
3249 
3250   valid_mult = valid_mult_list[data_index];
3251   if (!valid_mult)
3252     {
3253       machine_mode address_mode = targetm.addr_space.address_mode (as);
3254       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3255       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3256       rtx addr, scaled;
3257       HOST_WIDE_INT i;
3258 
3259       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3260       bitmap_clear (valid_mult);
3261       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3262       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3263       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3264 	{
3265 	  XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3266 	  if (memory_address_addr_space_p (mode, addr, as)
3267 	      || memory_address_addr_space_p (mode, scaled, as))
3268 	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3269 	}
3270 
3271       if (dump_file && (dump_flags & TDF_DETAILS))
3272 	{
3273 	  fprintf (dump_file, "  allowed multipliers:");
3274 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275 	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3276 	      fprintf (dump_file, " %d", (int) i);
3277 	  fprintf (dump_file, "\n");
3278 	  fprintf (dump_file, "\n");
3279 	}
3280 
3281       valid_mult_list[data_index] = valid_mult;
3282     }
3283 
3284   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3285     return false;
3286 
3287   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3288 }
3289 
3290 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3291    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3292    variable is omitted.  Compute the cost for a memory reference that accesses
3293    a memory location of mode MEM_MODE in address space AS.
3294 
3295    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3296    size of MEM_MODE / RATIO) is available.  To make this determination, we
3297    look at the size of the increment to be made, which is given in CSTEP.
3298    CSTEP may be zero if the step is unknown.
3299    STMT_AFTER_INC is true iff the statement we're looking at is after the
3300    increment of the original biv.
3301 
3302    TODO -- there must be some better way.  This all is quite crude.  */
3303 
3304 enum ainc_type
3305 {
3306   AINC_PRE_INC,		/* Pre increment.  */
3307   AINC_PRE_DEC,		/* Pre decrement.  */
3308   AINC_POST_INC,	/* Post increment.  */
3309   AINC_POST_DEC,	/* Post decrement.  */
3310   AINC_NONE		/* Also the number of auto increment types.  */
3311 };
3312 
3313 typedef struct address_cost_data_s
3314 {
3315   HOST_WIDE_INT min_offset, max_offset;
3316   unsigned costs[2][2][2][2];
3317   unsigned ainc_costs[AINC_NONE];
3318 } *address_cost_data;
3319 
3320 
3321 static comp_cost
3322 get_address_cost (bool symbol_present, bool var_present,
3323 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3324 		  HOST_WIDE_INT cstep, machine_mode mem_mode,
3325 		  addr_space_t as, bool speed,
3326 		  bool stmt_after_inc, bool *may_autoinc)
3327 {
3328   machine_mode address_mode = targetm.addr_space.address_mode (as);
3329   static vec<address_cost_data> address_cost_data_list;
3330   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3331   address_cost_data data;
3332   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3333   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3334   unsigned cost, acost, complexity;
3335   enum ainc_type autoinc_type;
3336   bool offset_p, ratio_p, autoinc;
3337   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3338   unsigned HOST_WIDE_INT mask;
3339   unsigned bits;
3340 
3341   if (data_index >= address_cost_data_list.length ())
3342     address_cost_data_list.safe_grow_cleared (data_index + 1);
3343 
3344   data = address_cost_data_list[data_index];
3345   if (!data)
3346     {
3347       HOST_WIDE_INT i;
3348       HOST_WIDE_INT rat, off = 0;
3349       int old_cse_not_expected, width;
3350       unsigned sym_p, var_p, off_p, rat_p, add_c;
3351       rtx_insn *seq;
3352       rtx addr, base;
3353       rtx reg0, reg1;
3354 
3355       data = (address_cost_data) xcalloc (1, sizeof (*data));
3356 
3357       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3358 
3359       width = GET_MODE_BITSIZE (address_mode) - 1;
3360       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3361 	width = HOST_BITS_PER_WIDE_INT - 1;
3362       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3363 
3364       for (i = width; i >= 0; i--)
3365 	{
3366 	  off = -((unsigned HOST_WIDE_INT) 1 << i);
3367 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3368 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3369 	    break;
3370 	}
3371       data->min_offset = (i == -1? 0 : off);
3372 
3373       for (i = width; i >= 0; i--)
3374 	{
3375 	  off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3376 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3377 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3378 	    break;
3379 	  /* For some strict-alignment targets, the offset must be naturally
3380 	     aligned.  Try an aligned offset if mem_mode is not QImode.  */
3381 	  off = mem_mode != QImode
3382 		? ((unsigned HOST_WIDE_INT) 1 << i)
3383 		    - GET_MODE_SIZE (mem_mode)
3384 		: 0;
3385 	  if (off > 0)
3386 	    {
3387 	      XEXP (addr, 1) = gen_int_mode (off, address_mode);
3388 	      if (memory_address_addr_space_p (mem_mode, addr, as))
3389 		break;
3390 	    }
3391 	}
3392       if (i == -1)
3393         off = 0;
3394       data->max_offset = off;
3395 
3396       if (dump_file && (dump_flags & TDF_DETAILS))
3397 	{
3398 	  fprintf (dump_file, "get_address_cost:\n");
3399 	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3400 		   GET_MODE_NAME (mem_mode),
3401 		   data->min_offset);
3402 	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3403 		   GET_MODE_NAME (mem_mode),
3404 		   data->max_offset);
3405 	}
3406 
3407       rat = 1;
3408       for (i = 2; i <= MAX_RATIO; i++)
3409 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
3410 	  {
3411 	    rat = i;
3412 	    break;
3413 	  }
3414 
3415       /* Compute the cost of various addressing modes.  */
3416       acost = 0;
3417       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3418       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3419 
3420       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3421 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3422 	{
3423 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
3424 	  has_predec[mem_mode]
3425 	    = memory_address_addr_space_p (mem_mode, addr, as);
3426 
3427 	  if (has_predec[mem_mode])
3428 	    data->ainc_costs[AINC_PRE_DEC]
3429 	      = address_cost (addr, mem_mode, as, speed);
3430 	}
3431       if (USE_LOAD_POST_DECREMENT (mem_mode)
3432 	  || USE_STORE_POST_DECREMENT (mem_mode))
3433 	{
3434 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
3435 	  has_postdec[mem_mode]
3436 	    = memory_address_addr_space_p (mem_mode, addr, as);
3437 
3438 	  if (has_postdec[mem_mode])
3439 	    data->ainc_costs[AINC_POST_DEC]
3440 	      = address_cost (addr, mem_mode, as, speed);
3441 	}
3442       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3443 	  || USE_STORE_PRE_DECREMENT (mem_mode))
3444 	{
3445 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
3446 	  has_preinc[mem_mode]
3447 	    = memory_address_addr_space_p (mem_mode, addr, as);
3448 
3449 	  if (has_preinc[mem_mode])
3450 	    data->ainc_costs[AINC_PRE_INC]
3451 	      = address_cost (addr, mem_mode, as, speed);
3452 	}
3453       if (USE_LOAD_POST_INCREMENT (mem_mode)
3454 	  || USE_STORE_POST_INCREMENT (mem_mode))
3455 	{
3456 	  addr = gen_rtx_POST_INC (address_mode, reg0);
3457 	  has_postinc[mem_mode]
3458 	    = memory_address_addr_space_p (mem_mode, addr, as);
3459 
3460 	  if (has_postinc[mem_mode])
3461 	    data->ainc_costs[AINC_POST_INC]
3462 	      = address_cost (addr, mem_mode, as, speed);
3463 	}
3464       for (i = 0; i < 16; i++)
3465 	{
3466 	  sym_p = i & 1;
3467 	  var_p = (i >> 1) & 1;
3468 	  off_p = (i >> 2) & 1;
3469 	  rat_p = (i >> 3) & 1;
3470 
3471 	  addr = reg0;
3472 	  if (rat_p)
3473 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3474 				   gen_int_mode (rat, address_mode));
3475 
3476 	  if (var_p)
3477 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3478 
3479 	  if (sym_p)
3480 	    {
3481 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3482 	      /* ??? We can run into trouble with some backends by presenting
3483 		 it with symbols which haven't been properly passed through
3484 		 targetm.encode_section_info.  By setting the local bit, we
3485 		 enhance the probability of things working.  */
3486 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3487 
3488 	      if (off_p)
3489 		base = gen_rtx_fmt_e (CONST, address_mode,
3490 				      gen_rtx_fmt_ee
3491 					(PLUS, address_mode, base,
3492 					 gen_int_mode (off, address_mode)));
3493 	    }
3494 	  else if (off_p)
3495 	    base = gen_int_mode (off, address_mode);
3496 	  else
3497 	    base = NULL_RTX;
3498 
3499 	  if (base)
3500 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3501 
3502 	  start_sequence ();
3503 	  /* To avoid splitting addressing modes, pretend that no cse will
3504 	     follow.  */
3505 	  old_cse_not_expected = cse_not_expected;
3506 	  cse_not_expected = true;
3507 	  addr = memory_address_addr_space (mem_mode, addr, as);
3508 	  cse_not_expected = old_cse_not_expected;
3509 	  seq = get_insns ();
3510 	  end_sequence ();
3511 
3512 	  acost = seq_cost (seq, speed);
3513 	  acost += address_cost (addr, mem_mode, as, speed);
3514 
3515 	  if (!acost)
3516 	    acost = 1;
3517 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
3518 	}
3519 
3520       /* On some targets, it is quite expensive to load symbol to a register,
3521 	 which makes addresses that contain symbols look much more expensive.
3522 	 However, the symbol will have to be loaded in any case before the
3523 	 loop (and quite likely we have it in register already), so it does not
3524 	 make much sense to penalize them too heavily.  So make some final
3525          tweaks for the SYMBOL_PRESENT modes:
3526 
3527          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3528 	 var is cheaper, use this mode with small penalty.
3529 	 If VAR_PRESENT is true, try whether the mode with
3530 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3531 	 if this is the case, use it.  */
3532       add_c = add_cost (speed, address_mode);
3533       for (i = 0; i < 8; i++)
3534 	{
3535 	  var_p = i & 1;
3536 	  off_p = (i >> 1) & 1;
3537 	  rat_p = (i >> 2) & 1;
3538 
3539 	  acost = data->costs[0][1][off_p][rat_p] + 1;
3540 	  if (var_p)
3541 	    acost += add_c;
3542 
3543 	  if (acost < data->costs[1][var_p][off_p][rat_p])
3544 	    data->costs[1][var_p][off_p][rat_p] = acost;
3545 	}
3546 
3547       if (dump_file && (dump_flags & TDF_DETAILS))
3548 	{
3549 	  fprintf (dump_file, "Address costs:\n");
3550 
3551 	  for (i = 0; i < 16; i++)
3552 	    {
3553 	      sym_p = i & 1;
3554 	      var_p = (i >> 1) & 1;
3555 	      off_p = (i >> 2) & 1;
3556 	      rat_p = (i >> 3) & 1;
3557 
3558 	      fprintf (dump_file, "  ");
3559 	      if (sym_p)
3560 		fprintf (dump_file, "sym + ");
3561 	      if (var_p)
3562 		fprintf (dump_file, "var + ");
3563 	      if (off_p)
3564 		fprintf (dump_file, "cst + ");
3565 	      if (rat_p)
3566 		fprintf (dump_file, "rat * ");
3567 
3568 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
3569 	      fprintf (dump_file, "index costs %d\n", acost);
3570 	    }
3571 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
3572 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
3573 	    fprintf (dump_file, "  May include autoinc/dec\n");
3574 	  fprintf (dump_file, "\n");
3575 	}
3576 
3577       address_cost_data_list[data_index] = data;
3578     }
3579 
3580   bits = GET_MODE_BITSIZE (address_mode);
3581   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3582   offset &= mask;
3583   if ((offset >> (bits - 1) & 1))
3584     offset |= ~mask;
3585   s_offset = offset;
3586 
3587   autoinc = false;
3588   autoinc_type = AINC_NONE;
3589   msize = GET_MODE_SIZE (mem_mode);
3590   autoinc_offset = offset;
3591   if (stmt_after_inc)
3592     autoinc_offset += ratio * cstep;
3593   if (symbol_present || var_present || ratio != 1)
3594     autoinc = false;
3595   else
3596     {
3597       if (has_postinc[mem_mode] && autoinc_offset == 0
3598 	  && msize == cstep)
3599 	autoinc_type = AINC_POST_INC;
3600       else if (has_postdec[mem_mode] && autoinc_offset == 0
3601 	       && msize == -cstep)
3602 	autoinc_type = AINC_POST_DEC;
3603       else if (has_preinc[mem_mode] && autoinc_offset == msize
3604 	       && msize == cstep)
3605 	autoinc_type = AINC_PRE_INC;
3606       else if (has_predec[mem_mode] && autoinc_offset == -msize
3607 	       && msize == -cstep)
3608 	autoinc_type = AINC_PRE_DEC;
3609 
3610       if (autoinc_type != AINC_NONE)
3611 	autoinc = true;
3612     }
3613 
3614   cost = 0;
3615   offset_p = (s_offset != 0
3616 	      && data->min_offset <= s_offset
3617 	      && s_offset <= data->max_offset);
3618   ratio_p = (ratio != 1
3619 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3620 
3621   if (ratio != 1 && !ratio_p)
3622     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3623 
3624   if (s_offset && !offset_p && !symbol_present)
3625     cost += add_cost (speed, address_mode);
3626 
3627   if (may_autoinc)
3628     *may_autoinc = autoinc;
3629   if (autoinc)
3630     acost = data->ainc_costs[autoinc_type];
3631   else
3632     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3633   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3634   return new_cost (cost + acost, complexity);
3635 }
3636 
3637  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3638     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3639     calculating the operands of EXPR.  Returns true if successful, and returns
3640     the cost in COST.  */
3641 
3642 static bool
3643 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3644                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3645 {
3646   comp_cost res;
3647   tree op1 = TREE_OPERAND (expr, 1);
3648   tree cst = TREE_OPERAND (mult, 1);
3649   tree multop = TREE_OPERAND (mult, 0);
3650   int m = exact_log2 (int_cst_value (cst));
3651   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3652   int as_cost, sa_cost;
3653   bool mult_in_op1;
3654 
3655   if (!(m >= 0 && m < maxm))
3656     return false;
3657 
3658   mult_in_op1 = operand_equal_p (op1, mult, 0);
3659 
3660   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3661 
3662   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3663      use that in preference to a shift insn followed by an add insn.  */
3664   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3665              ? shiftadd_cost (speed, mode, m)
3666              : (mult_in_op1
3667                 ? shiftsub1_cost (speed, mode, m)
3668                 : shiftsub0_cost (speed, mode, m)));
3669 
3670   res = new_cost (MIN (as_cost, sa_cost), 0);
3671   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3672 
3673   STRIP_NOPS (multop);
3674   if (!is_gimple_val (multop))
3675     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3676 
3677   *cost = res;
3678   return true;
3679 }
3680 
3681 /* Estimates cost of forcing expression EXPR into a variable.  */
3682 
3683 static comp_cost
3684 force_expr_to_var_cost (tree expr, bool speed)
3685 {
3686   static bool costs_initialized = false;
3687   static unsigned integer_cost [2];
3688   static unsigned symbol_cost [2];
3689   static unsigned address_cost [2];
3690   tree op0, op1;
3691   comp_cost cost0, cost1, cost;
3692   machine_mode mode;
3693 
3694   if (!costs_initialized)
3695     {
3696       tree type = build_pointer_type (integer_type_node);
3697       tree var, addr;
3698       rtx x;
3699       int i;
3700 
3701       var = create_tmp_var_raw (integer_type_node, "test_var");
3702       TREE_STATIC (var) = 1;
3703       x = produce_memory_decl_rtl (var, NULL);
3704       SET_DECL_RTL (var, x);
3705 
3706       addr = build1 (ADDR_EXPR, type, var);
3707 
3708 
3709       for (i = 0; i < 2; i++)
3710 	{
3711 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3712 							     2000), i);
3713 
3714 	  symbol_cost[i] = computation_cost (addr, i) + 1;
3715 
3716 	  address_cost[i]
3717 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3718 	  if (dump_file && (dump_flags & TDF_DETAILS))
3719 	    {
3720 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3721 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3722 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3723 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3724 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3725 	      fprintf (dump_file, "\n");
3726 	    }
3727 	}
3728 
3729       costs_initialized = true;
3730     }
3731 
3732   STRIP_NOPS (expr);
3733 
3734   if (SSA_VAR_P (expr))
3735     return no_cost;
3736 
3737   if (is_gimple_min_invariant (expr))
3738     {
3739       if (TREE_CODE (expr) == INTEGER_CST)
3740 	return new_cost (integer_cost [speed], 0);
3741 
3742       if (TREE_CODE (expr) == ADDR_EXPR)
3743 	{
3744 	  tree obj = TREE_OPERAND (expr, 0);
3745 
3746 	  if (TREE_CODE (obj) == VAR_DECL
3747 	      || TREE_CODE (obj) == PARM_DECL
3748 	      || TREE_CODE (obj) == RESULT_DECL)
3749 	    return new_cost (symbol_cost [speed], 0);
3750 	}
3751 
3752       return new_cost (address_cost [speed], 0);
3753     }
3754 
3755   switch (TREE_CODE (expr))
3756     {
3757     case POINTER_PLUS_EXPR:
3758     case PLUS_EXPR:
3759     case MINUS_EXPR:
3760     case MULT_EXPR:
3761       op0 = TREE_OPERAND (expr, 0);
3762       op1 = TREE_OPERAND (expr, 1);
3763       STRIP_NOPS (op0);
3764       STRIP_NOPS (op1);
3765       break;
3766 
3767     CASE_CONVERT:
3768     case NEGATE_EXPR:
3769       op0 = TREE_OPERAND (expr, 0);
3770       STRIP_NOPS (op0);
3771       op1 = NULL_TREE;
3772       break;
3773 
3774     default:
3775       /* Just an arbitrary value, FIXME.  */
3776       return new_cost (target_spill_cost[speed], 0);
3777     }
3778 
3779   if (op0 == NULL_TREE
3780       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3781     cost0 = no_cost;
3782   else
3783     cost0 = force_expr_to_var_cost (op0, speed);
3784 
3785   if (op1 == NULL_TREE
3786       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3787     cost1 = no_cost;
3788   else
3789     cost1 = force_expr_to_var_cost (op1, speed);
3790 
3791   mode = TYPE_MODE (TREE_TYPE (expr));
3792   switch (TREE_CODE (expr))
3793     {
3794     case POINTER_PLUS_EXPR:
3795     case PLUS_EXPR:
3796     case MINUS_EXPR:
3797     case NEGATE_EXPR:
3798       cost = new_cost (add_cost (speed, mode), 0);
3799       if (TREE_CODE (expr) != NEGATE_EXPR)
3800         {
3801           tree mult = NULL_TREE;
3802           comp_cost sa_cost;
3803           if (TREE_CODE (op1) == MULT_EXPR)
3804             mult = op1;
3805           else if (TREE_CODE (op0) == MULT_EXPR)
3806             mult = op0;
3807 
3808           if (mult != NULL_TREE
3809               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3810               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3811                                     speed, &sa_cost))
3812             return sa_cost;
3813         }
3814       break;
3815 
3816     CASE_CONVERT:
3817       {
3818 	tree inner_mode, outer_mode;
3819 	outer_mode = TREE_TYPE (expr);
3820 	inner_mode = TREE_TYPE (op0);
3821 	cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3822 				       TYPE_MODE (inner_mode), speed), 0);
3823       }
3824       break;
3825 
3826     case MULT_EXPR:
3827       if (cst_and_fits_in_hwi (op0))
3828 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3829 					     mode, speed), 0);
3830       else if (cst_and_fits_in_hwi (op1))
3831 	cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3832 					     mode, speed), 0);
3833       else
3834 	return new_cost (target_spill_cost [speed], 0);
3835       break;
3836 
3837     default:
3838       gcc_unreachable ();
3839     }
3840 
3841   cost = add_costs (cost, cost0);
3842   cost = add_costs (cost, cost1);
3843 
3844   /* Bound the cost by target_spill_cost.  The parts of complicated
3845      computations often are either loop invariant or at least can
3846      be shared between several iv uses, so letting this grow without
3847      limits would not give reasonable results.  */
3848   if (cost.cost > (int) target_spill_cost [speed])
3849     cost.cost = target_spill_cost [speed];
3850 
3851   return cost;
3852 }
3853 
3854 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3855    invariants the computation depends on.  */
3856 
3857 static comp_cost
3858 force_var_cost (struct ivopts_data *data,
3859 		tree expr, bitmap *depends_on)
3860 {
3861   if (depends_on)
3862     {
3863       fd_ivopts_data = data;
3864       walk_tree (&expr, find_depends, depends_on, NULL);
3865     }
3866 
3867   return force_expr_to_var_cost (expr, data->speed);
3868 }
3869 
3870 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3871    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3872    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3873    invariants the computation depends on.  */
3874 
3875 static comp_cost
3876 split_address_cost (struct ivopts_data *data,
3877 		    tree addr, bool *symbol_present, bool *var_present,
3878 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3879 {
3880   tree core;
3881   HOST_WIDE_INT bitsize;
3882   HOST_WIDE_INT bitpos;
3883   tree toffset;
3884   machine_mode mode;
3885   int unsignedp, volatilep;
3886 
3887   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3888 			      &unsignedp, &volatilep, false);
3889 
3890   if (toffset != 0
3891       || bitpos % BITS_PER_UNIT != 0
3892       || TREE_CODE (core) != VAR_DECL)
3893     {
3894       *symbol_present = false;
3895       *var_present = true;
3896       fd_ivopts_data = data;
3897       walk_tree (&addr, find_depends, depends_on, NULL);
3898       return new_cost (target_spill_cost[data->speed], 0);
3899     }
3900 
3901   *offset += bitpos / BITS_PER_UNIT;
3902   if (TREE_STATIC (core)
3903       || DECL_EXTERNAL (core))
3904     {
3905       *symbol_present = true;
3906       *var_present = false;
3907       return no_cost;
3908     }
3909 
3910   *symbol_present = false;
3911   *var_present = true;
3912   return no_cost;
3913 }
3914 
3915 /* Estimates cost of expressing difference of addresses E1 - E2 as
3916    var + symbol + offset.  The value of offset is added to OFFSET,
3917    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3918    part is missing.  DEPENDS_ON is a set of the invariants the computation
3919    depends on.  */
3920 
3921 static comp_cost
3922 ptr_difference_cost (struct ivopts_data *data,
3923 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3924 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3925 {
3926   HOST_WIDE_INT diff = 0;
3927   aff_tree aff_e1, aff_e2;
3928   tree type;
3929 
3930   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3931 
3932   if (ptr_difference_const (e1, e2, &diff))
3933     {
3934       *offset += diff;
3935       *symbol_present = false;
3936       *var_present = false;
3937       return no_cost;
3938     }
3939 
3940   if (integer_zerop (e2))
3941     return split_address_cost (data, TREE_OPERAND (e1, 0),
3942 			       symbol_present, var_present, offset, depends_on);
3943 
3944   *symbol_present = false;
3945   *var_present = true;
3946 
3947   type = signed_type_for (TREE_TYPE (e1));
3948   tree_to_aff_combination (e1, type, &aff_e1);
3949   tree_to_aff_combination (e2, type, &aff_e2);
3950   aff_combination_scale (&aff_e2, -1);
3951   aff_combination_add (&aff_e1, &aff_e2);
3952 
3953   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3954 }
3955 
3956 /* Estimates cost of expressing difference E1 - E2 as
3957    var + symbol + offset.  The value of offset is added to OFFSET,
3958    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3959    part is missing.  DEPENDS_ON is a set of the invariants the computation
3960    depends on.  */
3961 
3962 static comp_cost
3963 difference_cost (struct ivopts_data *data,
3964 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3965 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3966 {
3967   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3968   unsigned HOST_WIDE_INT off1, off2;
3969   aff_tree aff_e1, aff_e2;
3970   tree type;
3971 
3972   e1 = strip_offset (e1, &off1);
3973   e2 = strip_offset (e2, &off2);
3974   *offset += off1 - off2;
3975 
3976   STRIP_NOPS (e1);
3977   STRIP_NOPS (e2);
3978 
3979   if (TREE_CODE (e1) == ADDR_EXPR)
3980     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3981 				offset, depends_on);
3982   *symbol_present = false;
3983 
3984   if (operand_equal_p (e1, e2, 0))
3985     {
3986       *var_present = false;
3987       return no_cost;
3988     }
3989 
3990   *var_present = true;
3991 
3992   if (integer_zerop (e2))
3993     return force_var_cost (data, e1, depends_on);
3994 
3995   if (integer_zerop (e1))
3996     {
3997       comp_cost cost = force_var_cost (data, e2, depends_on);
3998       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3999       return cost;
4000     }
4001 
4002   type = signed_type_for (TREE_TYPE (e1));
4003   tree_to_aff_combination (e1, type, &aff_e1);
4004   tree_to_aff_combination (e2, type, &aff_e2);
4005   aff_combination_scale (&aff_e2, -1);
4006   aff_combination_add (&aff_e1, &aff_e2);
4007 
4008   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4009 }
4010 
4011 /* Returns true if AFF1 and AFF2 are identical.  */
4012 
4013 static bool
4014 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4015 {
4016   unsigned i;
4017 
4018   if (aff1->n != aff2->n)
4019     return false;
4020 
4021   for (i = 0; i < aff1->n; i++)
4022     {
4023       if (aff1->elts[i].coef != aff2->elts[i].coef)
4024         return false;
4025 
4026       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4027         return false;
4028     }
4029   return true;
4030 }
4031 
4032 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4033 
4034 static int
4035 get_expr_id (struct ivopts_data *data, tree expr)
4036 {
4037   struct iv_inv_expr_ent ent;
4038   struct iv_inv_expr_ent **slot;
4039 
4040   ent.expr = expr;
4041   ent.hash = iterative_hash_expr (expr, 0);
4042   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4043   if (*slot)
4044     return (*slot)->id;
4045 
4046   *slot = XNEW (struct iv_inv_expr_ent);
4047   (*slot)->expr = expr;
4048   (*slot)->hash = ent.hash;
4049   (*slot)->id = data->inv_expr_id++;
4050   return (*slot)->id;
4051 }
4052 
4053 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4054    requires a new compiler generated temporary.  Returns -1 otherwise.
4055    ADDRESS_P is a flag indicating if the expression is for address
4056    computation.  */
4057 
4058 static int
4059 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4060                             tree cbase, HOST_WIDE_INT ratio,
4061                             bool address_p)
4062 {
4063   aff_tree ubase_aff, cbase_aff;
4064   tree expr, ub, cb;
4065 
4066   STRIP_NOPS (ubase);
4067   STRIP_NOPS (cbase);
4068   ub = ubase;
4069   cb = cbase;
4070 
4071   if ((TREE_CODE (ubase) == INTEGER_CST)
4072       && (TREE_CODE (cbase) == INTEGER_CST))
4073     return -1;
4074 
4075   /* Strips the constant part. */
4076   if (TREE_CODE (ubase) == PLUS_EXPR
4077       || TREE_CODE (ubase) == MINUS_EXPR
4078       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4079     {
4080       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4081         ubase = TREE_OPERAND (ubase, 0);
4082     }
4083 
4084   /* Strips the constant part. */
4085   if (TREE_CODE (cbase) == PLUS_EXPR
4086       || TREE_CODE (cbase) == MINUS_EXPR
4087       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4088     {
4089       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4090         cbase = TREE_OPERAND (cbase, 0);
4091     }
4092 
4093   if (address_p)
4094     {
4095       if (((TREE_CODE (ubase) == SSA_NAME)
4096            || (TREE_CODE (ubase) == ADDR_EXPR
4097                && is_gimple_min_invariant (ubase)))
4098           && (TREE_CODE (cbase) == INTEGER_CST))
4099         return -1;
4100 
4101       if (((TREE_CODE (cbase) == SSA_NAME)
4102            || (TREE_CODE (cbase) == ADDR_EXPR
4103                && is_gimple_min_invariant (cbase)))
4104           && (TREE_CODE (ubase) == INTEGER_CST))
4105         return -1;
4106     }
4107 
4108   if (ratio == 1)
4109     {
4110       if (operand_equal_p (ubase, cbase, 0))
4111         return -1;
4112 
4113       if (TREE_CODE (ubase) == ADDR_EXPR
4114           && TREE_CODE (cbase) == ADDR_EXPR)
4115         {
4116           tree usym, csym;
4117 
4118           usym = TREE_OPERAND (ubase, 0);
4119           csym = TREE_OPERAND (cbase, 0);
4120           if (TREE_CODE (usym) == ARRAY_REF)
4121             {
4122               tree ind = TREE_OPERAND (usym, 1);
4123               if (TREE_CODE (ind) == INTEGER_CST
4124                   && tree_fits_shwi_p (ind)
4125                   && tree_to_shwi (ind) == 0)
4126                 usym = TREE_OPERAND (usym, 0);
4127             }
4128           if (TREE_CODE (csym) == ARRAY_REF)
4129             {
4130               tree ind = TREE_OPERAND (csym, 1);
4131               if (TREE_CODE (ind) == INTEGER_CST
4132                   && tree_fits_shwi_p (ind)
4133                   && tree_to_shwi (ind) == 0)
4134                 csym = TREE_OPERAND (csym, 0);
4135             }
4136           if (operand_equal_p (usym, csym, 0))
4137             return -1;
4138         }
4139       /* Now do more complex comparison  */
4140       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4141       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4142       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4143         return -1;
4144     }
4145 
4146   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4147   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4148 
4149   aff_combination_scale (&cbase_aff, -1 * ratio);
4150   aff_combination_add (&ubase_aff, &cbase_aff);
4151   expr = aff_combination_to_tree (&ubase_aff);
4152   return get_expr_id (data, expr);
4153 }
4154 
4155 
4156 
4157 /* Determines the cost of the computation by that USE is expressed
4158    from induction variable CAND.  If ADDRESS_P is true, we just need
4159    to create an address from it, otherwise we want to get it into
4160    register.  A set of invariants we depend on is stored in
4161    DEPENDS_ON.  AT is the statement at that the value is computed.
4162    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4163    addressing is likely.  */
4164 
4165 static comp_cost
4166 get_computation_cost_at (struct ivopts_data *data,
4167 			 struct iv_use *use, struct iv_cand *cand,
4168 			 bool address_p, bitmap *depends_on, gimple at,
4169 			 bool *can_autoinc,
4170                          int *inv_expr_id)
4171 {
4172   tree ubase = use->iv->base, ustep = use->iv->step;
4173   tree cbase, cstep;
4174   tree utype = TREE_TYPE (ubase), ctype;
4175   unsigned HOST_WIDE_INT cstepi, offset = 0;
4176   HOST_WIDE_INT ratio, aratio;
4177   bool var_present, symbol_present, stmt_is_after_inc;
4178   comp_cost cost;
4179   widest_int rat;
4180   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4181   machine_mode mem_mode = (address_p
4182 				? TYPE_MODE (TREE_TYPE (*use->op_p))
4183 				: VOIDmode);
4184 
4185   *depends_on = NULL;
4186 
4187   /* Only consider real candidates.  */
4188   if (!cand->iv)
4189     return infinite_cost;
4190 
4191   cbase = cand->iv->base;
4192   cstep = cand->iv->step;
4193   ctype = TREE_TYPE (cbase);
4194 
4195   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4196     {
4197       /* We do not have a precision to express the values of use.  */
4198       return infinite_cost;
4199     }
4200 
4201   if (address_p
4202       || (use->iv->base_object
4203 	  && cand->iv->base_object
4204 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4205 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4206     {
4207       /* Do not try to express address of an object with computation based
4208 	 on address of a different object.  This may cause problems in rtl
4209 	 level alias analysis (that does not expect this to be happening,
4210 	 as this is illegal in C), and would be unlikely to be useful
4211 	 anyway.  */
4212       if (use->iv->base_object
4213 	  && cand->iv->base_object
4214 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4215 	return infinite_cost;
4216     }
4217 
4218   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4219     {
4220       /* TODO -- add direct handling of this case.  */
4221       goto fallback;
4222     }
4223 
4224   /* CSTEPI is removed from the offset in case statement is after the
4225      increment.  If the step is not constant, we use zero instead.
4226      This is a bit imprecise (there is the extra addition), but
4227      redundancy elimination is likely to transform the code so that
4228      it uses value of the variable before increment anyway,
4229      so it is not that much unrealistic.  */
4230   if (cst_and_fits_in_hwi (cstep))
4231     cstepi = int_cst_value (cstep);
4232   else
4233     cstepi = 0;
4234 
4235   if (!constant_multiple_of (ustep, cstep, &rat))
4236     return infinite_cost;
4237 
4238   if (wi::fits_shwi_p (rat))
4239     ratio = rat.to_shwi ();
4240   else
4241     return infinite_cost;
4242 
4243   STRIP_NOPS (cbase);
4244   ctype = TREE_TYPE (cbase);
4245 
4246   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4247 
4248   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4249      or ratio == 1, it is better to handle this like
4250 
4251      ubase - ratio * cbase + ratio * var
4252 
4253      (also holds in the case ratio == -1, TODO.  */
4254 
4255   if (cst_and_fits_in_hwi (cbase))
4256     {
4257       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4258       cost = difference_cost (data,
4259 			      ubase, build_int_cst (utype, 0),
4260 			      &symbol_present, &var_present, &offset,
4261 			      depends_on);
4262       cost.cost /= avg_loop_niter (data->current_loop);
4263     }
4264   else if (ratio == 1)
4265     {
4266       tree real_cbase = cbase;
4267 
4268       /* Check to see if any adjustment is needed.  */
4269       if (cstepi == 0 && stmt_is_after_inc)
4270         {
4271           aff_tree real_cbase_aff;
4272           aff_tree cstep_aff;
4273 
4274           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4275                                    &real_cbase_aff);
4276           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4277 
4278           aff_combination_add (&real_cbase_aff, &cstep_aff);
4279           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4280         }
4281 
4282       cost = difference_cost (data,
4283 			      ubase, real_cbase,
4284 			      &symbol_present, &var_present, &offset,
4285 			      depends_on);
4286       cost.cost /= avg_loop_niter (data->current_loop);
4287     }
4288   else if (address_p
4289 	   && !POINTER_TYPE_P (ctype)
4290 	   && multiplier_allowed_in_address_p
4291 		(ratio, mem_mode,
4292 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4293     {
4294       cbase
4295 	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4296       cost = difference_cost (data,
4297 			      ubase, cbase,
4298 			      &symbol_present, &var_present, &offset,
4299 			      depends_on);
4300       cost.cost /= avg_loop_niter (data->current_loop);
4301     }
4302   else
4303     {
4304       cost = force_var_cost (data, cbase, depends_on);
4305       cost = add_costs (cost,
4306 			difference_cost (data,
4307 					 ubase, build_int_cst (utype, 0),
4308 					 &symbol_present, &var_present,
4309 					 &offset, depends_on));
4310       cost.cost /= avg_loop_niter (data->current_loop);
4311       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4312     }
4313 
4314   if (inv_expr_id)
4315     {
4316       *inv_expr_id =
4317           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4318       /* Clear depends on.  */
4319       if (*inv_expr_id != -1 && depends_on && *depends_on)
4320         bitmap_clear (*depends_on);
4321     }
4322 
4323   /* If we are after the increment, the value of the candidate is higher by
4324      one iteration.  */
4325   if (stmt_is_after_inc)
4326     offset -= ratio * cstepi;
4327 
4328   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4329      (symbol/var1/const parts may be omitted).  If we are looking for an
4330      address, find the cost of addressing this.  */
4331   if (address_p)
4332     return add_costs (cost,
4333 		      get_address_cost (symbol_present, var_present,
4334 					offset, ratio, cstepi,
4335 					mem_mode,
4336 					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4337 					speed, stmt_is_after_inc,
4338 					can_autoinc));
4339 
4340   /* Otherwise estimate the costs for computing the expression.  */
4341   if (!symbol_present && !var_present && !offset)
4342     {
4343       if (ratio != 1)
4344 	cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4345       return cost;
4346     }
4347 
4348   /* Symbol + offset should be compile-time computable so consider that they
4349       are added once to the variable, if present.  */
4350   if (var_present && (symbol_present || offset))
4351     cost.cost += adjust_setup_cost (data,
4352 				    add_cost (speed, TYPE_MODE (ctype)));
4353 
4354   /* Having offset does not affect runtime cost in case it is added to
4355      symbol, but it increases complexity.  */
4356   if (offset)
4357     cost.complexity++;
4358 
4359   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4360 
4361   aratio = ratio > 0 ? ratio : -ratio;
4362   if (aratio != 1)
4363     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4364   return cost;
4365 
4366 fallback:
4367   if (can_autoinc)
4368     *can_autoinc = false;
4369 
4370   {
4371     /* Just get the expression, expand it and measure the cost.  */
4372     tree comp = get_computation_at (data->current_loop, use, cand, at);
4373 
4374     if (!comp)
4375       return infinite_cost;
4376 
4377     if (address_p)
4378       comp = build_simple_mem_ref (comp);
4379 
4380     return new_cost (computation_cost (comp, speed), 0);
4381   }
4382 }
4383 
4384 /* Determines the cost of the computation by that USE is expressed
4385    from induction variable CAND.  If ADDRESS_P is true, we just need
4386    to create an address from it, otherwise we want to get it into
4387    register.  A set of invariants we depend on is stored in
4388    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4389    autoinc addressing is likely.  */
4390 
4391 static comp_cost
4392 get_computation_cost (struct ivopts_data *data,
4393 		      struct iv_use *use, struct iv_cand *cand,
4394 		      bool address_p, bitmap *depends_on,
4395                       bool *can_autoinc, int *inv_expr_id)
4396 {
4397   return get_computation_cost_at (data,
4398 				  use, cand, address_p, depends_on, use->stmt,
4399 				  can_autoinc, inv_expr_id);
4400 }
4401 
4402 /* Determines cost of basing replacement of USE on CAND in a generic
4403    expression.  */
4404 
4405 static bool
4406 determine_use_iv_cost_generic (struct ivopts_data *data,
4407 			       struct iv_use *use, struct iv_cand *cand)
4408 {
4409   bitmap depends_on;
4410   comp_cost cost;
4411   int inv_expr_id = -1;
4412 
4413   /* The simple case first -- if we need to express value of the preserved
4414      original biv, the cost is 0.  This also prevents us from counting the
4415      cost of increment twice -- once at this use and once in the cost of
4416      the candidate.  */
4417   if (cand->pos == IP_ORIGINAL
4418       && cand->incremented_at == use->stmt)
4419     {
4420       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4421                        ERROR_MARK, -1);
4422       return true;
4423     }
4424 
4425   cost = get_computation_cost (data, use, cand, false, &depends_on,
4426                                NULL, &inv_expr_id);
4427 
4428   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4429                    inv_expr_id);
4430 
4431   return !infinite_cost_p (cost);
4432 }
4433 
4434 /* Determines cost of basing replacement of USE on CAND in an address.  */
4435 
4436 static bool
4437 determine_use_iv_cost_address (struct ivopts_data *data,
4438 			       struct iv_use *use, struct iv_cand *cand)
4439 {
4440   bitmap depends_on;
4441   bool can_autoinc;
4442   int inv_expr_id = -1;
4443   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4444 					 &can_autoinc, &inv_expr_id);
4445 
4446   if (cand->ainc_use == use)
4447     {
4448       if (can_autoinc)
4449 	cost.cost -= cand->cost_step;
4450       /* If we generated the candidate solely for exploiting autoincrement
4451 	 opportunities, and it turns out it can't be used, set the cost to
4452 	 infinity to make sure we ignore it.  */
4453       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4454 	cost = infinite_cost;
4455     }
4456   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4457                    inv_expr_id);
4458 
4459   return !infinite_cost_p (cost);
4460 }
4461 
4462 /* Computes value of candidate CAND at position AT in iteration NITER, and
4463    stores it to VAL.  */
4464 
4465 static void
4466 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4467 	       aff_tree *val)
4468 {
4469   aff_tree step, delta, nit;
4470   struct iv *iv = cand->iv;
4471   tree type = TREE_TYPE (iv->base);
4472   tree steptype = type;
4473   if (POINTER_TYPE_P (type))
4474     steptype = sizetype;
4475   steptype = unsigned_type_for (type);
4476 
4477   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4478   aff_combination_convert (&step, steptype);
4479   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4480   aff_combination_convert (&nit, steptype);
4481   aff_combination_mult (&nit, &step, &delta);
4482   if (stmt_after_increment (loop, cand, at))
4483     aff_combination_add (&delta, &step);
4484 
4485   tree_to_aff_combination (iv->base, type, val);
4486   if (!POINTER_TYPE_P (type))
4487     aff_combination_convert (val, steptype);
4488   aff_combination_add (val, &delta);
4489 }
4490 
4491 /* Returns period of induction variable iv.  */
4492 
4493 static tree
4494 iv_period (struct iv *iv)
4495 {
4496   tree step = iv->step, period, type;
4497   tree pow2div;
4498 
4499   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4500 
4501   type = unsigned_type_for (TREE_TYPE (step));
4502   /* Period of the iv is lcm (step, type_range)/step -1,
4503      i.e., N*type_range/step - 1. Since type range is power
4504      of two, N == (step >> num_of_ending_zeros_binary (step),
4505      so the final result is
4506 
4507        (type_range >> num_of_ending_zeros_binary (step)) - 1
4508 
4509   */
4510   pow2div = num_ending_zeros (step);
4511 
4512   period = build_low_bits_mask (type,
4513                                 (TYPE_PRECISION (type)
4514                                  - tree_to_uhwi (pow2div)));
4515 
4516   return period;
4517 }
4518 
4519 /* Returns the comparison operator used when eliminating the iv USE.  */
4520 
4521 static enum tree_code
4522 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4523 {
4524   struct loop *loop = data->current_loop;
4525   basic_block ex_bb;
4526   edge exit;
4527 
4528   ex_bb = gimple_bb (use->stmt);
4529   exit = EDGE_SUCC (ex_bb, 0);
4530   if (flow_bb_inside_loop_p (loop, exit->dest))
4531     exit = EDGE_SUCC (ex_bb, 1);
4532 
4533   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4534 }
4535 
4536 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4537    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4538    calculation is performed in non-wrapping type.
4539 
4540    TODO: More generally, we could test for the situation that
4541 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4542 	 This would require knowing the sign of OFFSET.  */
4543 
4544 static bool
4545 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4546 {
4547   enum tree_code code;
4548   tree e1, e2;
4549   aff_tree aff_e1, aff_e2, aff_offset;
4550 
4551   if (!nowrap_type_p (TREE_TYPE (base)))
4552     return false;
4553 
4554   base = expand_simple_operations (base);
4555 
4556   if (TREE_CODE (base) == SSA_NAME)
4557     {
4558       gimple stmt = SSA_NAME_DEF_STMT (base);
4559 
4560       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4561 	return false;
4562 
4563       code = gimple_assign_rhs_code (stmt);
4564       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4565 	return false;
4566 
4567       e1 = gimple_assign_rhs1 (stmt);
4568       e2 = gimple_assign_rhs2 (stmt);
4569     }
4570   else
4571     {
4572       code = TREE_CODE (base);
4573       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4574 	return false;
4575       e1 = TREE_OPERAND (base, 0);
4576       e2 = TREE_OPERAND (base, 1);
4577     }
4578 
4579   /* Use affine expansion as deeper inspection to prove the equality.  */
4580   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4581 				  &aff_e2, &data->name_expansion_cache);
4582   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4583 				  &aff_offset, &data->name_expansion_cache);
4584   aff_combination_scale (&aff_offset, -1);
4585   switch (code)
4586     {
4587     case PLUS_EXPR:
4588       aff_combination_add (&aff_e2, &aff_offset);
4589       if (aff_combination_zero_p (&aff_e2))
4590 	return true;
4591 
4592       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4593 				      &aff_e1, &data->name_expansion_cache);
4594       aff_combination_add (&aff_e1, &aff_offset);
4595       return aff_combination_zero_p (&aff_e1);
4596 
4597     case POINTER_PLUS_EXPR:
4598       aff_combination_add (&aff_e2, &aff_offset);
4599       return aff_combination_zero_p (&aff_e2);
4600 
4601     default:
4602       return false;
4603     }
4604 }
4605 
4606 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4607    comparison with CAND.  NITER describes the number of iterations of
4608    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4609 
4610    We aim to handle the following situation:
4611 
4612    sometype *base, *p;
4613    int a, b, i;
4614 
4615    i = a;
4616    p = p_0 = base + a;
4617 
4618    do
4619      {
4620        bla (*p);
4621        p++;
4622        i++;
4623      }
4624    while (i < b);
4625 
4626    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4627    We aim to optimize this to
4628 
4629    p = p_0 = base + a;
4630    do
4631      {
4632        bla (*p);
4633        p++;
4634      }
4635    while (p < p_0 - a + b);
4636 
4637    This preserves the correctness, since the pointer arithmetics does not
4638    overflow.  More precisely:
4639 
4640    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4641       overflow in computing it or the values of p.
4642    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4643       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4644 
4645 static bool
4646 iv_elimination_compare_lt (struct ivopts_data *data,
4647                            struct iv_cand *cand, enum tree_code *comp_p,
4648 			   struct tree_niter_desc *niter)
4649 {
4650   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4651   struct aff_tree nit, tmpa, tmpb;
4652   enum tree_code comp;
4653   HOST_WIDE_INT step;
4654 
4655   /* We need to know that the candidate induction variable does not overflow.
4656      While more complex analysis may be used to prove this, for now just
4657      check that the variable appears in the original program and that it
4658      is computed in a type that guarantees no overflows.  */
4659   cand_type = TREE_TYPE (cand->iv->base);
4660   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4661     return false;
4662 
4663   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4664      the calculation of the BOUND could overflow, making the comparison
4665      invalid.  */
4666   if (!data->loop_single_exit_p)
4667     return false;
4668 
4669   /* We need to be able to decide whether candidate is increasing or decreasing
4670      in order to choose the right comparison operator.  */
4671   if (!cst_and_fits_in_hwi (cand->iv->step))
4672     return false;
4673   step = int_cst_value (cand->iv->step);
4674 
4675   /* Check that the number of iterations matches the expected pattern:
4676      a + 1 > b ? 0 : b - a - 1.  */
4677   mbz = niter->may_be_zero;
4678   if (TREE_CODE (mbz) == GT_EXPR)
4679     {
4680       /* Handle a + 1 > b.  */
4681       tree op0 = TREE_OPERAND (mbz, 0);
4682       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4683 	{
4684 	  a = TREE_OPERAND (op0, 0);
4685 	  b = TREE_OPERAND (mbz, 1);
4686 	}
4687       else
4688 	return false;
4689     }
4690   else if (TREE_CODE (mbz) == LT_EXPR)
4691     {
4692       tree op1 = TREE_OPERAND (mbz, 1);
4693 
4694       /* Handle b < a + 1.  */
4695       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4696         {
4697           a = TREE_OPERAND (op1, 0);
4698           b = TREE_OPERAND (mbz, 0);
4699         }
4700       else
4701 	return false;
4702     }
4703   else
4704     return false;
4705 
4706   /* Expected number of iterations is B - A - 1.  Check that it matches
4707      the actual number, i.e., that B - A - NITER = 1.  */
4708   tree_to_aff_combination (niter->niter, nit_type, &nit);
4709   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4710   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4711   aff_combination_scale (&nit, -1);
4712   aff_combination_scale (&tmpa, -1);
4713   aff_combination_add (&tmpb, &tmpa);
4714   aff_combination_add (&tmpb, &nit);
4715   if (tmpb.n != 0 || tmpb.offset != 1)
4716     return false;
4717 
4718   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4719      overflow.  */
4720   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4721 			cand->iv->step,
4722 			fold_convert (TREE_TYPE (cand->iv->step), a));
4723   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4724     return false;
4725 
4726   /* Determine the new comparison operator.  */
4727   comp = step < 0 ? GT_EXPR : LT_EXPR;
4728   if (*comp_p == NE_EXPR)
4729     *comp_p = comp;
4730   else if (*comp_p == EQ_EXPR)
4731     *comp_p = invert_tree_comparison (comp, false);
4732   else
4733     gcc_unreachable ();
4734 
4735   return true;
4736 }
4737 
4738 /* Check whether it is possible to express the condition in USE by comparison
4739    of candidate CAND.  If so, store the value compared with to BOUND, and the
4740    comparison operator to COMP.  */
4741 
4742 static bool
4743 may_eliminate_iv (struct ivopts_data *data,
4744 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
4745 		  enum tree_code *comp)
4746 {
4747   basic_block ex_bb;
4748   edge exit;
4749   tree period;
4750   struct loop *loop = data->current_loop;
4751   aff_tree bnd;
4752   struct tree_niter_desc *desc = NULL;
4753 
4754   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4755     return false;
4756 
4757   /* For now works only for exits that dominate the loop latch.
4758      TODO: extend to other conditions inside loop body.  */
4759   ex_bb = gimple_bb (use->stmt);
4760   if (use->stmt != last_stmt (ex_bb)
4761       || gimple_code (use->stmt) != GIMPLE_COND
4762       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4763     return false;
4764 
4765   exit = EDGE_SUCC (ex_bb, 0);
4766   if (flow_bb_inside_loop_p (loop, exit->dest))
4767     exit = EDGE_SUCC (ex_bb, 1);
4768   if (flow_bb_inside_loop_p (loop, exit->dest))
4769     return false;
4770 
4771   desc = niter_for_exit (data, exit);
4772   if (!desc)
4773     return false;
4774 
4775   /* Determine whether we can use the variable to test the exit condition.
4776      This is the case iff the period of the induction variable is greater
4777      than the number of iterations for which the exit condition is true.  */
4778   period = iv_period (cand->iv);
4779 
4780   /* If the number of iterations is constant, compare against it directly.  */
4781   if (TREE_CODE (desc->niter) == INTEGER_CST)
4782     {
4783       /* See cand_value_at.  */
4784       if (stmt_after_increment (loop, cand, use->stmt))
4785         {
4786           if (!tree_int_cst_lt (desc->niter, period))
4787             return false;
4788         }
4789       else
4790         {
4791           if (tree_int_cst_lt (period, desc->niter))
4792             return false;
4793         }
4794     }
4795 
4796   /* If not, and if this is the only possible exit of the loop, see whether
4797      we can get a conservative estimate on the number of iterations of the
4798      entire loop and compare against that instead.  */
4799   else
4800     {
4801       widest_int period_value, max_niter;
4802 
4803       max_niter = desc->max;
4804       if (stmt_after_increment (loop, cand, use->stmt))
4805         max_niter += 1;
4806       period_value = wi::to_widest (period);
4807       if (wi::gtu_p (max_niter, period_value))
4808         {
4809           /* See if we can take advantage of inferred loop bound information.  */
4810           if (data->loop_single_exit_p)
4811             {
4812               if (!max_loop_iterations (loop, &max_niter))
4813                 return false;
4814               /* The loop bound is already adjusted by adding 1.  */
4815               if (wi::gtu_p (max_niter, period_value))
4816                 return false;
4817             }
4818           else
4819             return false;
4820         }
4821     }
4822 
4823   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4824 
4825   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4826 			 aff_combination_to_tree (&bnd));
4827   *comp = iv_elimination_compare (data, use);
4828 
4829   /* It is unlikely that computing the number of iterations using division
4830      would be more profitable than keeping the original induction variable.  */
4831   if (expression_expensive_p (*bound))
4832     return false;
4833 
4834   /* Sometimes, it is possible to handle the situation that the number of
4835      iterations may be zero unless additional assumtions by using <
4836      instead of != in the exit condition.
4837 
4838      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4839 	   base the exit condition on it.  However, that is often too
4840 	   expensive.  */
4841   if (!integer_zerop (desc->may_be_zero))
4842     return iv_elimination_compare_lt (data, cand, comp, desc);
4843 
4844   return true;
4845 }
4846 
4847  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4848     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4849 
4850 static int
4851 parm_decl_cost (struct ivopts_data *data, tree bound)
4852 {
4853   tree sbound = bound;
4854   STRIP_NOPS (sbound);
4855 
4856   if (TREE_CODE (sbound) == SSA_NAME
4857       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4858       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4859       && data->body_includes_call)
4860     return COSTS_N_INSNS (1);
4861 
4862   return 0;
4863 }
4864 
4865 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4866 
4867 static bool
4868 determine_use_iv_cost_condition (struct ivopts_data *data,
4869 				 struct iv_use *use, struct iv_cand *cand)
4870 {
4871   tree bound = NULL_TREE;
4872   struct iv *cmp_iv;
4873   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4874   comp_cost elim_cost, express_cost, cost, bound_cost;
4875   bool ok;
4876   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4877   tree *control_var, *bound_cst;
4878   enum tree_code comp = ERROR_MARK;
4879 
4880   /* Only consider real candidates.  */
4881   if (!cand->iv)
4882     {
4883       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4884 		       ERROR_MARK, -1);
4885       return false;
4886     }
4887 
4888   /* Try iv elimination.  */
4889   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4890     {
4891       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4892       if (elim_cost.cost == 0)
4893         elim_cost.cost = parm_decl_cost (data, bound);
4894       else if (TREE_CODE (bound) == INTEGER_CST)
4895         elim_cost.cost = 0;
4896       /* If we replace a loop condition 'i < n' with 'p < base + n',
4897 	 depends_on_elim will have 'base' and 'n' set, which implies
4898 	 that both 'base' and 'n' will be live during the loop.	 More likely,
4899 	 'base + n' will be loop invariant, resulting in only one live value
4900 	 during the loop.  So in that case we clear depends_on_elim and set
4901         elim_inv_expr_id instead.  */
4902       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4903 	{
4904 	  elim_inv_expr_id = get_expr_id (data, bound);
4905 	  bitmap_clear (depends_on_elim);
4906 	}
4907       /* The bound is a loop invariant, so it will be only computed
4908 	 once.  */
4909       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4910     }
4911   else
4912     elim_cost = infinite_cost;
4913 
4914   /* Try expressing the original giv.  If it is compared with an invariant,
4915      note that we cannot get rid of it.  */
4916   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4917 			      NULL, &cmp_iv);
4918   gcc_assert (ok);
4919 
4920   /* When the condition is a comparison of the candidate IV against
4921      zero, prefer this IV.
4922 
4923      TODO: The constant that we're subtracting from the cost should
4924      be target-dependent.  This information should be added to the
4925      target costs for each backend.  */
4926   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4927       && integer_zerop (*bound_cst)
4928       && (operand_equal_p (*control_var, cand->var_after, 0)
4929 	  || operand_equal_p (*control_var, cand->var_before, 0)))
4930     elim_cost.cost -= 1;
4931 
4932   express_cost = get_computation_cost (data, use, cand, false,
4933 				       &depends_on_express, NULL,
4934                                        &express_inv_expr_id);
4935   fd_ivopts_data = data;
4936   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4937 
4938   /* Count the cost of the original bound as well.  */
4939   bound_cost = force_var_cost (data, *bound_cst, NULL);
4940   if (bound_cost.cost == 0)
4941     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4942   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4943     bound_cost.cost = 0;
4944   express_cost.cost += bound_cost.cost;
4945 
4946   /* Choose the better approach, preferring the eliminated IV. */
4947   if (compare_costs (elim_cost, express_cost) <= 0)
4948     {
4949       cost = elim_cost;
4950       depends_on = depends_on_elim;
4951       depends_on_elim = NULL;
4952       inv_expr_id = elim_inv_expr_id;
4953     }
4954   else
4955     {
4956       cost = express_cost;
4957       depends_on = depends_on_express;
4958       depends_on_express = NULL;
4959       bound = NULL_TREE;
4960       comp = ERROR_MARK;
4961       inv_expr_id = express_inv_expr_id;
4962     }
4963 
4964   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4965 
4966   if (depends_on_elim)
4967     BITMAP_FREE (depends_on_elim);
4968   if (depends_on_express)
4969     BITMAP_FREE (depends_on_express);
4970 
4971   return !infinite_cost_p (cost);
4972 }
4973 
4974 /* Determines cost of basing replacement of USE on CAND.  Returns false
4975    if USE cannot be based on CAND.  */
4976 
4977 static bool
4978 determine_use_iv_cost (struct ivopts_data *data,
4979 		       struct iv_use *use, struct iv_cand *cand)
4980 {
4981   switch (use->type)
4982     {
4983     case USE_NONLINEAR_EXPR:
4984       return determine_use_iv_cost_generic (data, use, cand);
4985 
4986     case USE_ADDRESS:
4987       return determine_use_iv_cost_address (data, use, cand);
4988 
4989     case USE_COMPARE:
4990       return determine_use_iv_cost_condition (data, use, cand);
4991 
4992     default:
4993       gcc_unreachable ();
4994     }
4995 }
4996 
4997 /* Return true if get_computation_cost indicates that autoincrement is
4998    a possibility for the pair of USE and CAND, false otherwise.  */
4999 
5000 static bool
5001 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5002 			   struct iv_cand *cand)
5003 {
5004   bitmap depends_on;
5005   bool can_autoinc;
5006   comp_cost cost;
5007 
5008   if (use->type != USE_ADDRESS)
5009     return false;
5010 
5011   cost = get_computation_cost (data, use, cand, true, &depends_on,
5012 			       &can_autoinc, NULL);
5013 
5014   BITMAP_FREE (depends_on);
5015 
5016   return !infinite_cost_p (cost) && can_autoinc;
5017 }
5018 
5019 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5020    use that allows autoincrement, and set their AINC_USE if possible.  */
5021 
5022 static void
5023 set_autoinc_for_original_candidates (struct ivopts_data *data)
5024 {
5025   unsigned i, j;
5026 
5027   for (i = 0; i < n_iv_cands (data); i++)
5028     {
5029       struct iv_cand *cand = iv_cand (data, i);
5030       struct iv_use *closest_before = NULL;
5031       struct iv_use *closest_after = NULL;
5032       if (cand->pos != IP_ORIGINAL)
5033 	continue;
5034 
5035       for (j = 0; j < n_iv_uses (data); j++)
5036 	{
5037 	  struct iv_use *use = iv_use (data, j);
5038 	  unsigned uid = gimple_uid (use->stmt);
5039 
5040 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5041 	    continue;
5042 
5043 	  if (uid < gimple_uid (cand->incremented_at)
5044 	      && (closest_before == NULL
5045 		  || uid > gimple_uid (closest_before->stmt)))
5046 	    closest_before = use;
5047 
5048 	  if (uid > gimple_uid (cand->incremented_at)
5049 	      && (closest_after == NULL
5050 		  || uid < gimple_uid (closest_after->stmt)))
5051 	    closest_after = use;
5052 	}
5053 
5054       if (closest_before != NULL
5055 	  && autoinc_possible_for_pair (data, closest_before, cand))
5056 	cand->ainc_use = closest_before;
5057       else if (closest_after != NULL
5058 	       && autoinc_possible_for_pair (data, closest_after, cand))
5059 	cand->ainc_use = closest_after;
5060     }
5061 }
5062 
5063 /* Finds the candidates for the induction variables.  */
5064 
5065 static void
5066 find_iv_candidates (struct ivopts_data *data)
5067 {
5068   /* Add commonly used ivs.  */
5069   add_standard_iv_candidates (data);
5070 
5071   /* Add old induction variables.  */
5072   add_old_ivs_candidates (data);
5073 
5074   /* Add induction variables derived from uses.  */
5075   add_derived_ivs_candidates (data);
5076 
5077   set_autoinc_for_original_candidates (data);
5078 
5079   /* Record the important candidates.  */
5080   record_important_candidates (data);
5081 }
5082 
5083 /* Determines costs of basing the use of the iv on an iv candidate.  */
5084 
5085 static void
5086 determine_use_iv_costs (struct ivopts_data *data)
5087 {
5088   unsigned i, j;
5089   struct iv_use *use;
5090   struct iv_cand *cand;
5091   bitmap to_clear = BITMAP_ALLOC (NULL);
5092 
5093   alloc_use_cost_map (data);
5094 
5095   for (i = 0; i < n_iv_uses (data); i++)
5096     {
5097       use = iv_use (data, i);
5098 
5099       if (data->consider_all_candidates)
5100 	{
5101 	  for (j = 0; j < n_iv_cands (data); j++)
5102 	    {
5103 	      cand = iv_cand (data, j);
5104 	      determine_use_iv_cost (data, use, cand);
5105 	    }
5106 	}
5107       else
5108 	{
5109 	  bitmap_iterator bi;
5110 
5111 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5112 	    {
5113 	      cand = iv_cand (data, j);
5114 	      if (!determine_use_iv_cost (data, use, cand))
5115 		bitmap_set_bit (to_clear, j);
5116 	    }
5117 
5118 	  /* Remove the candidates for that the cost is infinite from
5119 	     the list of related candidates.  */
5120 	  bitmap_and_compl_into (use->related_cands, to_clear);
5121 	  bitmap_clear (to_clear);
5122 	}
5123     }
5124 
5125   BITMAP_FREE (to_clear);
5126 
5127   if (dump_file && (dump_flags & TDF_DETAILS))
5128     {
5129       fprintf (dump_file, "Use-candidate costs:\n");
5130 
5131       for (i = 0; i < n_iv_uses (data); i++)
5132 	{
5133 	  use = iv_use (data, i);
5134 
5135 	  fprintf (dump_file, "Use %d:\n", i);
5136 	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5137 	  for (j = 0; j < use->n_map_members; j++)
5138 	    {
5139 	      if (!use->cost_map[j].cand
5140 		  || infinite_cost_p (use->cost_map[j].cost))
5141 		continue;
5142 
5143 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5144 		       use->cost_map[j].cand->id,
5145 		       use->cost_map[j].cost.cost,
5146 		       use->cost_map[j].cost.complexity);
5147 	      if (use->cost_map[j].depends_on)
5148 		bitmap_print (dump_file,
5149 			      use->cost_map[j].depends_on, "","");
5150               if (use->cost_map[j].inv_expr_id != -1)
5151                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5152 	      fprintf (dump_file, "\n");
5153 	    }
5154 
5155 	  fprintf (dump_file, "\n");
5156 	}
5157       fprintf (dump_file, "\n");
5158     }
5159 }
5160 
5161 /* Determines cost of the candidate CAND.  */
5162 
5163 static void
5164 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5165 {
5166   comp_cost cost_base;
5167   unsigned cost, cost_step;
5168   tree base;
5169 
5170   if (!cand->iv)
5171     {
5172       cand->cost = 0;
5173       return;
5174     }
5175 
5176   /* There are two costs associated with the candidate -- its increment
5177      and its initialization.  The second is almost negligible for any loop
5178      that rolls enough, so we take it just very little into account.  */
5179 
5180   base = cand->iv->base;
5181   cost_base = force_var_cost (data, base, NULL);
5182   /* It will be exceptional that the iv register happens to be initialized with
5183      the proper value at no cost.  In general, there will at least be a regcopy
5184      or a const set.  */
5185   if (cost_base.cost == 0)
5186     cost_base.cost = COSTS_N_INSNS (1);
5187   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5188 
5189   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5190 
5191   /* Prefer the original ivs unless we may gain something by replacing it.
5192      The reason is to make debugging simpler; so this is not relevant for
5193      artificial ivs created by other optimization passes.  */
5194   if (cand->pos != IP_ORIGINAL
5195       || !SSA_NAME_VAR (cand->var_before)
5196       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5197     cost++;
5198 
5199   /* Prefer not to insert statements into latch unless there are some
5200      already (so that we do not create unnecessary jumps).  */
5201   if (cand->pos == IP_END
5202       && empty_block_p (ip_end_pos (data->current_loop)))
5203     cost++;
5204 
5205   cand->cost = cost;
5206   cand->cost_step = cost_step;
5207 }
5208 
5209 /* Determines costs of computation of the candidates.  */
5210 
5211 static void
5212 determine_iv_costs (struct ivopts_data *data)
5213 {
5214   unsigned i;
5215 
5216   if (dump_file && (dump_flags & TDF_DETAILS))
5217     {
5218       fprintf (dump_file, "Candidate costs:\n");
5219       fprintf (dump_file, "  cand\tcost\n");
5220     }
5221 
5222   for (i = 0; i < n_iv_cands (data); i++)
5223     {
5224       struct iv_cand *cand = iv_cand (data, i);
5225 
5226       determine_iv_cost (data, cand);
5227 
5228       if (dump_file && (dump_flags & TDF_DETAILS))
5229 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5230     }
5231 
5232   if (dump_file && (dump_flags & TDF_DETAILS))
5233     fprintf (dump_file, "\n");
5234 }
5235 
5236 /* Calculates cost for having SIZE induction variables.  */
5237 
5238 static unsigned
5239 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5240 {
5241   /* We add size to the cost, so that we prefer eliminating ivs
5242      if possible.  */
5243   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5244 					    data->body_includes_call);
5245 }
5246 
5247 /* For each size of the induction variable set determine the penalty.  */
5248 
5249 static void
5250 determine_set_costs (struct ivopts_data *data)
5251 {
5252   unsigned j, n;
5253   gphi *phi;
5254   gphi_iterator psi;
5255   tree op;
5256   struct loop *loop = data->current_loop;
5257   bitmap_iterator bi;
5258 
5259   if (dump_file && (dump_flags & TDF_DETAILS))
5260     {
5261       fprintf (dump_file, "Global costs:\n");
5262       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5263       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5264       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5265       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5266     }
5267 
5268   n = 0;
5269   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5270     {
5271       phi = psi.phi ();
5272       op = PHI_RESULT (phi);
5273 
5274       if (virtual_operand_p (op))
5275 	continue;
5276 
5277       if (get_iv (data, op))
5278 	continue;
5279 
5280       n++;
5281     }
5282 
5283   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5284     {
5285       struct version_info *info = ver_info (data, j);
5286 
5287       if (info->inv_id && info->has_nonlin_use)
5288 	n++;
5289     }
5290 
5291   data->regs_used = n;
5292   if (dump_file && (dump_flags & TDF_DETAILS))
5293     fprintf (dump_file, "  regs_used %d\n", n);
5294 
5295   if (dump_file && (dump_flags & TDF_DETAILS))
5296     {
5297       fprintf (dump_file, "  cost for size:\n");
5298       fprintf (dump_file, "  ivs\tcost\n");
5299       for (j = 0; j <= 2 * target_avail_regs; j++)
5300 	fprintf (dump_file, "  %d\t%d\n", j,
5301 		 ivopts_global_cost_for_size (data, j));
5302       fprintf (dump_file, "\n");
5303     }
5304 }
5305 
5306 /* Returns true if A is a cheaper cost pair than B.  */
5307 
5308 static bool
5309 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5310 {
5311   int cmp;
5312 
5313   if (!a)
5314     return false;
5315 
5316   if (!b)
5317     return true;
5318 
5319   cmp = compare_costs (a->cost, b->cost);
5320   if (cmp < 0)
5321     return true;
5322 
5323   if (cmp > 0)
5324     return false;
5325 
5326   /* In case the costs are the same, prefer the cheaper candidate.  */
5327   if (a->cand->cost < b->cand->cost)
5328     return true;
5329 
5330   return false;
5331 }
5332 
5333 
5334 /* Returns candidate by that USE is expressed in IVS.  */
5335 
5336 static struct cost_pair *
5337 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5338 {
5339   return ivs->cand_for_use[use->id];
5340 }
5341 
5342 /* Computes the cost field of IVS structure.  */
5343 
5344 static void
5345 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5346 {
5347   comp_cost cost = ivs->cand_use_cost;
5348 
5349   cost.cost += ivs->cand_cost;
5350 
5351   cost.cost += ivopts_global_cost_for_size (data,
5352                                             ivs->n_regs + ivs->num_used_inv_expr);
5353 
5354   ivs->cost = cost;
5355 }
5356 
5357 /* Remove invariants in set INVS to set IVS.  */
5358 
5359 static void
5360 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5361 {
5362   bitmap_iterator bi;
5363   unsigned iid;
5364 
5365   if (!invs)
5366     return;
5367 
5368   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5369     {
5370       ivs->n_invariant_uses[iid]--;
5371       if (ivs->n_invariant_uses[iid] == 0)
5372         ivs->n_regs--;
5373     }
5374 }
5375 
5376 /* Set USE not to be expressed by any candidate in IVS.  */
5377 
5378 static void
5379 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5380 		 struct iv_use *use)
5381 {
5382   unsigned uid = use->id, cid;
5383   struct cost_pair *cp;
5384 
5385   cp = ivs->cand_for_use[uid];
5386   if (!cp)
5387     return;
5388   cid = cp->cand->id;
5389 
5390   ivs->bad_uses++;
5391   ivs->cand_for_use[uid] = NULL;
5392   ivs->n_cand_uses[cid]--;
5393 
5394   if (ivs->n_cand_uses[cid] == 0)
5395     {
5396       bitmap_clear_bit (ivs->cands, cid);
5397       /* Do not count the pseudocandidates.  */
5398       if (cp->cand->iv)
5399 	ivs->n_regs--;
5400       ivs->n_cands--;
5401       ivs->cand_cost -= cp->cand->cost;
5402 
5403       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5404     }
5405 
5406   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5407 
5408   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5409 
5410   if (cp->inv_expr_id != -1)
5411     {
5412       ivs->used_inv_expr[cp->inv_expr_id]--;
5413       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5414         ivs->num_used_inv_expr--;
5415     }
5416   iv_ca_recount_cost (data, ivs);
5417 }
5418 
5419 /* Add invariants in set INVS to set IVS.  */
5420 
5421 static void
5422 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5423 {
5424   bitmap_iterator bi;
5425   unsigned iid;
5426 
5427   if (!invs)
5428     return;
5429 
5430   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5431     {
5432       ivs->n_invariant_uses[iid]++;
5433       if (ivs->n_invariant_uses[iid] == 1)
5434         ivs->n_regs++;
5435     }
5436 }
5437 
5438 /* Set cost pair for USE in set IVS to CP.  */
5439 
5440 static void
5441 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5442 	      struct iv_use *use, struct cost_pair *cp)
5443 {
5444   unsigned uid = use->id, cid;
5445 
5446   if (ivs->cand_for_use[uid] == cp)
5447     return;
5448 
5449   if (ivs->cand_for_use[uid])
5450     iv_ca_set_no_cp (data, ivs, use);
5451 
5452   if (cp)
5453     {
5454       cid = cp->cand->id;
5455 
5456       ivs->bad_uses--;
5457       ivs->cand_for_use[uid] = cp;
5458       ivs->n_cand_uses[cid]++;
5459       if (ivs->n_cand_uses[cid] == 1)
5460 	{
5461 	  bitmap_set_bit (ivs->cands, cid);
5462 	  /* Do not count the pseudocandidates.  */
5463 	  if (cp->cand->iv)
5464 	    ivs->n_regs++;
5465 	  ivs->n_cands++;
5466 	  ivs->cand_cost += cp->cand->cost;
5467 
5468 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5469 	}
5470 
5471       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5472       iv_ca_set_add_invariants (ivs, cp->depends_on);
5473 
5474       if (cp->inv_expr_id != -1)
5475         {
5476           ivs->used_inv_expr[cp->inv_expr_id]++;
5477           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5478             ivs->num_used_inv_expr++;
5479         }
5480       iv_ca_recount_cost (data, ivs);
5481     }
5482 }
5483 
5484 /* Extend set IVS by expressing USE by some of the candidates in it
5485    if possible.  Consider all important candidates if candidates in
5486    set IVS don't give any result.  */
5487 
5488 static void
5489 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5490 	       struct iv_use *use)
5491 {
5492   struct cost_pair *best_cp = NULL, *cp;
5493   bitmap_iterator bi;
5494   unsigned i;
5495   struct iv_cand *cand;
5496 
5497   gcc_assert (ivs->upto >= use->id);
5498   ivs->upto++;
5499   ivs->bad_uses++;
5500 
5501   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5502     {
5503       cand = iv_cand (data, i);
5504       cp = get_use_iv_cost (data, use, cand);
5505       if (cheaper_cost_pair (cp, best_cp))
5506 	best_cp = cp;
5507     }
5508 
5509   if (best_cp == NULL)
5510     {
5511       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5512 	{
5513 	  cand = iv_cand (data, i);
5514 	  cp = get_use_iv_cost (data, use, cand);
5515 	  if (cheaper_cost_pair (cp, best_cp))
5516 	    best_cp = cp;
5517 	}
5518     }
5519 
5520   iv_ca_set_cp (data, ivs, use, best_cp);
5521 }
5522 
5523 /* Get cost for assignment IVS.  */
5524 
5525 static comp_cost
5526 iv_ca_cost (struct iv_ca *ivs)
5527 {
5528   /* This was a conditional expression but it triggered a bug in
5529      Sun C 5.5.  */
5530   if (ivs->bad_uses)
5531     return infinite_cost;
5532   else
5533     return ivs->cost;
5534 }
5535 
5536 /* Returns true if all dependences of CP are among invariants in IVS.  */
5537 
5538 static bool
5539 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5540 {
5541   unsigned i;
5542   bitmap_iterator bi;
5543 
5544   if (!cp->depends_on)
5545     return true;
5546 
5547   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5548     {
5549       if (ivs->n_invariant_uses[i] == 0)
5550 	return false;
5551     }
5552 
5553   return true;
5554 }
5555 
5556 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5557    it before NEXT_CHANGE.  */
5558 
5559 static struct iv_ca_delta *
5560 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5561 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5562 {
5563   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5564 
5565   change->use = use;
5566   change->old_cp = old_cp;
5567   change->new_cp = new_cp;
5568   change->next_change = next_change;
5569 
5570   return change;
5571 }
5572 
5573 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5574    are rewritten.  */
5575 
5576 static struct iv_ca_delta *
5577 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5578 {
5579   struct iv_ca_delta *last;
5580 
5581   if (!l2)
5582     return l1;
5583 
5584   if (!l1)
5585     return l2;
5586 
5587   for (last = l1; last->next_change; last = last->next_change)
5588     continue;
5589   last->next_change = l2;
5590 
5591   return l1;
5592 }
5593 
5594 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5595 
5596 static struct iv_ca_delta *
5597 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5598 {
5599   struct iv_ca_delta *act, *next, *prev = NULL;
5600   struct cost_pair *tmp;
5601 
5602   for (act = delta; act; act = next)
5603     {
5604       next = act->next_change;
5605       act->next_change = prev;
5606       prev = act;
5607 
5608       tmp = act->old_cp;
5609       act->old_cp = act->new_cp;
5610       act->new_cp = tmp;
5611     }
5612 
5613   return prev;
5614 }
5615 
5616 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5617    reverted instead.  */
5618 
5619 static void
5620 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5621 		    struct iv_ca_delta *delta, bool forward)
5622 {
5623   struct cost_pair *from, *to;
5624   struct iv_ca_delta *act;
5625 
5626   if (!forward)
5627     delta = iv_ca_delta_reverse (delta);
5628 
5629   for (act = delta; act; act = act->next_change)
5630     {
5631       from = act->old_cp;
5632       to = act->new_cp;
5633       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5634       iv_ca_set_cp (data, ivs, act->use, to);
5635     }
5636 
5637   if (!forward)
5638     iv_ca_delta_reverse (delta);
5639 }
5640 
5641 /* Returns true if CAND is used in IVS.  */
5642 
5643 static bool
5644 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5645 {
5646   return ivs->n_cand_uses[cand->id] > 0;
5647 }
5648 
5649 /* Returns number of induction variable candidates in the set IVS.  */
5650 
5651 static unsigned
5652 iv_ca_n_cands (struct iv_ca *ivs)
5653 {
5654   return ivs->n_cands;
5655 }
5656 
5657 /* Free the list of changes DELTA.  */
5658 
5659 static void
5660 iv_ca_delta_free (struct iv_ca_delta **delta)
5661 {
5662   struct iv_ca_delta *act, *next;
5663 
5664   for (act = *delta; act; act = next)
5665     {
5666       next = act->next_change;
5667       free (act);
5668     }
5669 
5670   *delta = NULL;
5671 }
5672 
5673 /* Allocates new iv candidates assignment.  */
5674 
5675 static struct iv_ca *
5676 iv_ca_new (struct ivopts_data *data)
5677 {
5678   struct iv_ca *nw = XNEW (struct iv_ca);
5679 
5680   nw->upto = 0;
5681   nw->bad_uses = 0;
5682   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5683   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5684   nw->cands = BITMAP_ALLOC (NULL);
5685   nw->n_cands = 0;
5686   nw->n_regs = 0;
5687   nw->cand_use_cost = no_cost;
5688   nw->cand_cost = 0;
5689   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5690   nw->cost = no_cost;
5691   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5692   nw->num_used_inv_expr = 0;
5693 
5694   return nw;
5695 }
5696 
5697 /* Free memory occupied by the set IVS.  */
5698 
5699 static void
5700 iv_ca_free (struct iv_ca **ivs)
5701 {
5702   free ((*ivs)->cand_for_use);
5703   free ((*ivs)->n_cand_uses);
5704   BITMAP_FREE ((*ivs)->cands);
5705   free ((*ivs)->n_invariant_uses);
5706   free ((*ivs)->used_inv_expr);
5707   free (*ivs);
5708   *ivs = NULL;
5709 }
5710 
5711 /* Dumps IVS to FILE.  */
5712 
5713 static void
5714 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5715 {
5716   const char *pref = "  invariants ";
5717   unsigned i;
5718   comp_cost cost = iv_ca_cost (ivs);
5719 
5720   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5721   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5722            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5723   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5724 
5725    for (i = 0; i < ivs->upto; i++)
5726     {
5727       struct iv_use *use = iv_use (data, i);
5728       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5729       if (cp)
5730         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5731                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5732       else
5733         fprintf (file, "   use:%d --> ??\n", use->id);
5734     }
5735 
5736   for (i = 1; i <= data->max_inv_id; i++)
5737     if (ivs->n_invariant_uses[i])
5738       {
5739 	fprintf (file, "%s%d", pref, i);
5740 	pref = ", ";
5741       }
5742   fprintf (file, "\n\n");
5743 }
5744 
5745 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5746    new set, and store differences in DELTA.  Number of induction variables
5747    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5748    the function will try to find a solution with mimimal iv candidates.  */
5749 
5750 static comp_cost
5751 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5752 	      struct iv_cand *cand, struct iv_ca_delta **delta,
5753 	      unsigned *n_ivs, bool min_ncand)
5754 {
5755   unsigned i;
5756   comp_cost cost;
5757   struct iv_use *use;
5758   struct cost_pair *old_cp, *new_cp;
5759 
5760   *delta = NULL;
5761   for (i = 0; i < ivs->upto; i++)
5762     {
5763       use = iv_use (data, i);
5764       old_cp = iv_ca_cand_for_use (ivs, use);
5765 
5766       if (old_cp
5767 	  && old_cp->cand == cand)
5768 	continue;
5769 
5770       new_cp = get_use_iv_cost (data, use, cand);
5771       if (!new_cp)
5772 	continue;
5773 
5774       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5775 	continue;
5776 
5777       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5778         continue;
5779 
5780       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5781     }
5782 
5783   iv_ca_delta_commit (data, ivs, *delta, true);
5784   cost = iv_ca_cost (ivs);
5785   if (n_ivs)
5786     *n_ivs = iv_ca_n_cands (ivs);
5787   iv_ca_delta_commit (data, ivs, *delta, false);
5788 
5789   return cost;
5790 }
5791 
5792 /* Try narrowing set IVS by removing CAND.  Return the cost of
5793    the new set and store the differences in DELTA.  START is
5794    the candidate with which we start narrowing.  */
5795 
5796 static comp_cost
5797 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5798 	      struct iv_cand *cand, struct iv_cand *start,
5799 	      struct iv_ca_delta **delta)
5800 {
5801   unsigned i, ci;
5802   struct iv_use *use;
5803   struct cost_pair *old_cp, *new_cp, *cp;
5804   bitmap_iterator bi;
5805   struct iv_cand *cnd;
5806   comp_cost cost, best_cost, acost;
5807 
5808   *delta = NULL;
5809   for (i = 0; i < n_iv_uses (data); i++)
5810     {
5811       use = iv_use (data, i);
5812 
5813       old_cp = iv_ca_cand_for_use (ivs, use);
5814       if (old_cp->cand != cand)
5815 	continue;
5816 
5817       best_cost = iv_ca_cost (ivs);
5818       /* Start narrowing with START.  */
5819       new_cp = get_use_iv_cost (data, use, start);
5820 
5821       if (data->consider_all_candidates)
5822 	{
5823 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5824 	    {
5825 	      if (ci == cand->id || (start && ci == start->id))
5826 		continue;
5827 
5828 	      cnd = iv_cand (data, ci);
5829 
5830 	      cp = get_use_iv_cost (data, use, cnd);
5831 	      if (!cp)
5832 		continue;
5833 
5834 	      iv_ca_set_cp (data, ivs, use, cp);
5835 	      acost = iv_ca_cost (ivs);
5836 
5837 	      if (compare_costs (acost, best_cost) < 0)
5838 		{
5839 		  best_cost = acost;
5840 		  new_cp = cp;
5841 		}
5842 	    }
5843 	}
5844       else
5845 	{
5846 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5847 	    {
5848 	      if (ci == cand->id || (start && ci == start->id))
5849 		continue;
5850 
5851 	      cnd = iv_cand (data, ci);
5852 
5853 	      cp = get_use_iv_cost (data, use, cnd);
5854 	      if (!cp)
5855 		continue;
5856 
5857 	      iv_ca_set_cp (data, ivs, use, cp);
5858 	      acost = iv_ca_cost (ivs);
5859 
5860 	      if (compare_costs (acost, best_cost) < 0)
5861 		{
5862 		  best_cost = acost;
5863 		  new_cp = cp;
5864 		}
5865 	    }
5866 	}
5867       /* Restore to old cp for use.  */
5868       iv_ca_set_cp (data, ivs, use, old_cp);
5869 
5870       if (!new_cp)
5871 	{
5872 	  iv_ca_delta_free (delta);
5873 	  return infinite_cost;
5874 	}
5875 
5876       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5877     }
5878 
5879   iv_ca_delta_commit (data, ivs, *delta, true);
5880   cost = iv_ca_cost (ivs);
5881   iv_ca_delta_commit (data, ivs, *delta, false);
5882 
5883   return cost;
5884 }
5885 
5886 /* Try optimizing the set of candidates IVS by removing candidates different
5887    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5888    differences in DELTA.  */
5889 
5890 static comp_cost
5891 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5892 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5893 {
5894   bitmap_iterator bi;
5895   struct iv_ca_delta *act_delta, *best_delta;
5896   unsigned i;
5897   comp_cost best_cost, acost;
5898   struct iv_cand *cand;
5899 
5900   best_delta = NULL;
5901   best_cost = iv_ca_cost (ivs);
5902 
5903   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5904     {
5905       cand = iv_cand (data, i);
5906 
5907       if (cand == except_cand)
5908 	continue;
5909 
5910       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5911 
5912       if (compare_costs (acost, best_cost) < 0)
5913 	{
5914 	  best_cost = acost;
5915 	  iv_ca_delta_free (&best_delta);
5916 	  best_delta = act_delta;
5917 	}
5918       else
5919 	iv_ca_delta_free (&act_delta);
5920     }
5921 
5922   if (!best_delta)
5923     {
5924       *delta = NULL;
5925       return best_cost;
5926     }
5927 
5928   /* Recurse to possibly remove other unnecessary ivs.  */
5929   iv_ca_delta_commit (data, ivs, best_delta, true);
5930   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5931   iv_ca_delta_commit (data, ivs, best_delta, false);
5932   *delta = iv_ca_delta_join (best_delta, *delta);
5933   return best_cost;
5934 }
5935 
5936 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5937    cheaper local cost for USE than BEST_CP.  Return pointer to
5938    the corresponding cost_pair, otherwise just return BEST_CP.  */
5939 
5940 static struct cost_pair*
5941 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5942 			unsigned int cand_idx, struct iv_cand *old_cand,
5943 			struct cost_pair *best_cp)
5944 {
5945   struct iv_cand *cand;
5946   struct cost_pair *cp;
5947 
5948   gcc_assert (old_cand != NULL && best_cp != NULL);
5949   if (cand_idx == old_cand->id)
5950     return best_cp;
5951 
5952   cand = iv_cand (data, cand_idx);
5953   cp = get_use_iv_cost (data, use, cand);
5954   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5955     return cp;
5956 
5957   return best_cp;
5958 }
5959 
5960 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5961    which are used by more than one iv uses.  For each of those candidates,
5962    this function tries to represent iv uses under that candidate using
5963    other ones with lower local cost, then tries to prune the new set.
5964    If the new set has lower cost, It returns the new cost after recording
5965    candidate replacement in list DELTA.  */
5966 
5967 static comp_cost
5968 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5969 	       struct iv_ca_delta **delta)
5970 {
5971   bitmap_iterator bi, bj;
5972   unsigned int i, j, k;
5973   struct iv_use *use;
5974   struct iv_cand *cand;
5975   comp_cost orig_cost, acost;
5976   struct iv_ca_delta *act_delta, *tmp_delta;
5977   struct cost_pair *old_cp, *best_cp = NULL;
5978 
5979   *delta = NULL;
5980   orig_cost = iv_ca_cost (ivs);
5981 
5982   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5983     {
5984       if (ivs->n_cand_uses[i] == 1
5985 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5986 	continue;
5987 
5988       cand = iv_cand (data, i);
5989 
5990       act_delta = NULL;
5991       /*  Represent uses under current candidate using other ones with
5992 	  lower local cost.  */
5993       for (j = 0; j < ivs->upto; j++)
5994 	{
5995 	  use = iv_use (data, j);
5996 	  old_cp = iv_ca_cand_for_use (ivs, use);
5997 
5998 	  if (old_cp->cand != cand)
5999 	    continue;
6000 
6001 	  best_cp = old_cp;
6002 	  if (data->consider_all_candidates)
6003 	    for (k = 0; k < n_iv_cands (data); k++)
6004 	      best_cp = cheaper_cost_with_cand (data, use, k,
6005 						old_cp->cand, best_cp);
6006 	  else
6007 	    EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6008 	      best_cp = cheaper_cost_with_cand (data, use, k,
6009 						old_cp->cand, best_cp);
6010 
6011 	  if (best_cp == old_cp)
6012 	    continue;
6013 
6014 	  act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6015 	}
6016       /* No need for further prune.  */
6017       if (!act_delta)
6018 	continue;
6019 
6020       /* Prune the new candidate set.  */
6021       iv_ca_delta_commit (data, ivs, act_delta, true);
6022       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6023       iv_ca_delta_commit (data, ivs, act_delta, false);
6024       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6025 
6026       if (compare_costs (acost, orig_cost) < 0)
6027 	{
6028 	  *delta = act_delta;
6029 	  return acost;
6030 	}
6031       else
6032 	iv_ca_delta_free (&act_delta);
6033     }
6034 
6035   return orig_cost;
6036 }
6037 
6038 /* Tries to extend the sets IVS in the best possible way in order
6039    to express the USE.  If ORIGINALP is true, prefer candidates from
6040    the original set of IVs, otherwise favor important candidates not
6041    based on any memory object.  */
6042 
6043 static bool
6044 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6045 		  struct iv_use *use, bool originalp)
6046 {
6047   comp_cost best_cost, act_cost;
6048   unsigned i;
6049   bitmap_iterator bi;
6050   struct iv_cand *cand;
6051   struct iv_ca_delta *best_delta = NULL, *act_delta;
6052   struct cost_pair *cp;
6053 
6054   iv_ca_add_use (data, ivs, use);
6055   best_cost = iv_ca_cost (ivs);
6056   cp = iv_ca_cand_for_use (ivs, use);
6057   if (cp)
6058     {
6059       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6060       iv_ca_set_no_cp (data, ivs, use);
6061     }
6062 
6063   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6064      first try important candidates not based on any memory object.  Only if
6065      this fails, try the specific ones.  Rationale -- in loops with many
6066      variables the best choice often is to use just one generic biv.  If we
6067      added here many ivs specific to the uses, the optimization algorithm later
6068      would be likely to get stuck in a local minimum, thus causing us to create
6069      too many ivs.  The approach from few ivs to more seems more likely to be
6070      successful -- starting from few ivs, replacing an expensive use by a
6071      specific iv should always be a win.  */
6072   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6073     {
6074       cand = iv_cand (data, i);
6075 
6076       if (originalp && cand->pos !=IP_ORIGINAL)
6077 	continue;
6078 
6079       if (!originalp && cand->iv->base_object != NULL_TREE)
6080 	continue;
6081 
6082       if (iv_ca_cand_used_p (ivs, cand))
6083         continue;
6084 
6085       cp = get_use_iv_cost (data, use, cand);
6086       if (!cp)
6087 	continue;
6088 
6089       iv_ca_set_cp (data, ivs, use, cp);
6090       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6091                                true);
6092       iv_ca_set_no_cp (data, ivs, use);
6093       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6094 
6095       if (compare_costs (act_cost, best_cost) < 0)
6096 	{
6097 	  best_cost = act_cost;
6098 
6099 	  iv_ca_delta_free (&best_delta);
6100 	  best_delta = act_delta;
6101 	}
6102       else
6103 	iv_ca_delta_free (&act_delta);
6104     }
6105 
6106   if (infinite_cost_p (best_cost))
6107     {
6108       for (i = 0; i < use->n_map_members; i++)
6109 	{
6110 	  cp = use->cost_map + i;
6111 	  cand = cp->cand;
6112 	  if (!cand)
6113 	    continue;
6114 
6115 	  /* Already tried this.  */
6116 	  if (cand->important)
6117 	    {
6118 	      if (originalp && cand->pos == IP_ORIGINAL)
6119 		continue;
6120 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6121 		continue;
6122 	    }
6123 
6124 	  if (iv_ca_cand_used_p (ivs, cand))
6125 	    continue;
6126 
6127 	  act_delta = NULL;
6128 	  iv_ca_set_cp (data, ivs, use, cp);
6129 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6130 	  iv_ca_set_no_cp (data, ivs, use);
6131 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6132 				       cp, act_delta);
6133 
6134 	  if (compare_costs (act_cost, best_cost) < 0)
6135 	    {
6136 	      best_cost = act_cost;
6137 
6138 	      if (best_delta)
6139 		iv_ca_delta_free (&best_delta);
6140 	      best_delta = act_delta;
6141 	    }
6142 	  else
6143 	    iv_ca_delta_free (&act_delta);
6144 	}
6145     }
6146 
6147   iv_ca_delta_commit (data, ivs, best_delta, true);
6148   iv_ca_delta_free (&best_delta);
6149 
6150   return !infinite_cost_p (best_cost);
6151 }
6152 
6153 /* Finds an initial assignment of candidates to uses.  */
6154 
6155 static struct iv_ca *
6156 get_initial_solution (struct ivopts_data *data, bool originalp)
6157 {
6158   struct iv_ca *ivs = iv_ca_new (data);
6159   unsigned i;
6160 
6161   for (i = 0; i < n_iv_uses (data); i++)
6162     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6163       {
6164 	iv_ca_free (&ivs);
6165 	return NULL;
6166       }
6167 
6168   return ivs;
6169 }
6170 
6171 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6172    points to a bool variable, this function tries to break local
6173    optimal fixed-point by replacing candidates in IVS if it's true.  */
6174 
6175 static bool
6176 try_improve_iv_set (struct ivopts_data *data,
6177 		    struct iv_ca *ivs, bool *try_replace_p)
6178 {
6179   unsigned i, n_ivs;
6180   comp_cost acost, best_cost = iv_ca_cost (ivs);
6181   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6182   struct iv_cand *cand;
6183 
6184   /* Try extending the set of induction variables by one.  */
6185   for (i = 0; i < n_iv_cands (data); i++)
6186     {
6187       cand = iv_cand (data, i);
6188 
6189       if (iv_ca_cand_used_p (ivs, cand))
6190 	continue;
6191 
6192       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6193       if (!act_delta)
6194 	continue;
6195 
6196       /* If we successfully added the candidate and the set is small enough,
6197 	 try optimizing it by removing other candidates.  */
6198       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6199       	{
6200 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6201 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6202 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6203 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6204 	}
6205 
6206       if (compare_costs (acost, best_cost) < 0)
6207 	{
6208 	  best_cost = acost;
6209 	  iv_ca_delta_free (&best_delta);
6210 	  best_delta = act_delta;
6211 	}
6212       else
6213 	iv_ca_delta_free (&act_delta);
6214     }
6215 
6216   if (!best_delta)
6217     {
6218       /* Try removing the candidates from the set instead.  */
6219       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6220 
6221       if (!best_delta && *try_replace_p)
6222 	{
6223 	  *try_replace_p = false;
6224 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6225 	     so that it can handle cases in which loops have many variables
6226 	     but the best choice is often to use only one general biv.  One
6227 	     weakness is it can't handle opposite cases, in which different
6228 	     candidates should be chosen with respect to each use.  To solve
6229 	     the problem, we replace candidates in a manner described by the
6230 	     comments of iv_ca_replace, thus give general algorithm a chance
6231 	     to break local optimal fixed-point in these cases.  */
6232 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6233 	}
6234 
6235       if (!best_delta)
6236 	return false;
6237     }
6238 
6239   iv_ca_delta_commit (data, ivs, best_delta, true);
6240   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6241   iv_ca_delta_free (&best_delta);
6242   return true;
6243 }
6244 
6245 /* Attempts to find the optimal set of induction variables.  We do simple
6246    greedy heuristic -- we try to replace at most one candidate in the selected
6247    solution and remove the unused ivs while this improves the cost.  */
6248 
6249 static struct iv_ca *
6250 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6251 {
6252   struct iv_ca *set;
6253   bool try_replace_p = true;
6254 
6255   /* Get the initial solution.  */
6256   set = get_initial_solution (data, originalp);
6257   if (!set)
6258     {
6259       if (dump_file && (dump_flags & TDF_DETAILS))
6260 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6261       return NULL;
6262     }
6263 
6264   if (dump_file && (dump_flags & TDF_DETAILS))
6265     {
6266       fprintf (dump_file, "Initial set of candidates:\n");
6267       iv_ca_dump (data, dump_file, set);
6268     }
6269 
6270   while (try_improve_iv_set (data, set, &try_replace_p))
6271     {
6272       if (dump_file && (dump_flags & TDF_DETAILS))
6273 	{
6274 	  fprintf (dump_file, "Improved to:\n");
6275 	  iv_ca_dump (data, dump_file, set);
6276 	}
6277     }
6278 
6279   return set;
6280 }
6281 
6282 static struct iv_ca *
6283 find_optimal_iv_set (struct ivopts_data *data)
6284 {
6285   unsigned i;
6286   struct iv_ca *set, *origset;
6287   struct iv_use *use;
6288   comp_cost cost, origcost;
6289 
6290   /* Determine the cost based on a strategy that starts with original IVs,
6291      and try again using a strategy that prefers candidates not based
6292      on any IVs.  */
6293   origset = find_optimal_iv_set_1 (data, true);
6294   set = find_optimal_iv_set_1 (data, false);
6295 
6296   if (!origset && !set)
6297     return NULL;
6298 
6299   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6300   cost = set ? iv_ca_cost (set) : infinite_cost;
6301 
6302   if (dump_file && (dump_flags & TDF_DETAILS))
6303     {
6304       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6305 	       origcost.cost, origcost.complexity);
6306       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6307 	       cost.cost, cost.complexity);
6308     }
6309 
6310   /* Choose the one with the best cost.  */
6311   if (compare_costs (origcost, cost) <= 0)
6312     {
6313       if (set)
6314 	iv_ca_free (&set);
6315       set = origset;
6316     }
6317   else if (origset)
6318     iv_ca_free (&origset);
6319 
6320   for (i = 0; i < n_iv_uses (data); i++)
6321     {
6322       use = iv_use (data, i);
6323       use->selected = iv_ca_cand_for_use (set, use)->cand;
6324     }
6325 
6326   return set;
6327 }
6328 
6329 /* Creates a new induction variable corresponding to CAND.  */
6330 
6331 static void
6332 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6333 {
6334   gimple_stmt_iterator incr_pos;
6335   tree base;
6336   bool after = false;
6337 
6338   if (!cand->iv)
6339     return;
6340 
6341   switch (cand->pos)
6342     {
6343     case IP_NORMAL:
6344       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6345       break;
6346 
6347     case IP_END:
6348       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6349       after = true;
6350       break;
6351 
6352     case IP_AFTER_USE:
6353       after = true;
6354       /* fall through */
6355     case IP_BEFORE_USE:
6356       incr_pos = gsi_for_stmt (cand->incremented_at);
6357       break;
6358 
6359     case IP_ORIGINAL:
6360       /* Mark that the iv is preserved.  */
6361       name_info (data, cand->var_before)->preserve_biv = true;
6362       name_info (data, cand->var_after)->preserve_biv = true;
6363 
6364       /* Rewrite the increment so that it uses var_before directly.  */
6365       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6366       return;
6367     }
6368 
6369   gimple_add_tmp_var (cand->var_before);
6370 
6371   base = unshare_expr (cand->iv->base);
6372 
6373   create_iv (base, unshare_expr (cand->iv->step),
6374 	     cand->var_before, data->current_loop,
6375 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6376 }
6377 
6378 /* Creates new induction variables described in SET.  */
6379 
6380 static void
6381 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6382 {
6383   unsigned i;
6384   struct iv_cand *cand;
6385   bitmap_iterator bi;
6386 
6387   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6388     {
6389       cand = iv_cand (data, i);
6390       create_new_iv (data, cand);
6391     }
6392 
6393   if (dump_file && (dump_flags & TDF_DETAILS))
6394     {
6395       fprintf (dump_file, "Selected IV set for loop %d",
6396 	       data->current_loop->num);
6397       if (data->loop_loc != UNKNOWN_LOCATION)
6398 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6399 		 LOCATION_LINE (data->loop_loc));
6400       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6401       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6402         {
6403           cand = iv_cand (data, i);
6404           dump_cand (dump_file, cand);
6405         }
6406       fprintf (dump_file, "\n");
6407     }
6408 }
6409 
6410 /* Rewrites USE (definition of iv used in a nonlinear expression)
6411    using candidate CAND.  */
6412 
6413 static void
6414 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6415 			    struct iv_use *use, struct iv_cand *cand)
6416 {
6417   tree comp;
6418   tree op, tgt;
6419   gassign *ass;
6420   gimple_stmt_iterator bsi;
6421 
6422   /* An important special case -- if we are asked to express value of
6423      the original iv by itself, just exit; there is no need to
6424      introduce a new computation (that might also need casting the
6425      variable to unsigned and back).  */
6426   if (cand->pos == IP_ORIGINAL
6427       && cand->incremented_at == use->stmt)
6428     {
6429       enum tree_code stmt_code;
6430 
6431       gcc_assert (is_gimple_assign (use->stmt));
6432       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6433 
6434       /* Check whether we may leave the computation unchanged.
6435 	 This is the case only if it does not rely on other
6436 	 computations in the loop -- otherwise, the computation
6437 	 we rely upon may be removed in remove_unused_ivs,
6438 	 thus leading to ICE.  */
6439       stmt_code = gimple_assign_rhs_code (use->stmt);
6440       if (stmt_code == PLUS_EXPR
6441 	  || stmt_code == MINUS_EXPR
6442 	  || stmt_code == POINTER_PLUS_EXPR)
6443 	{
6444 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6445 	    op = gimple_assign_rhs2 (use->stmt);
6446 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6447 	    op = gimple_assign_rhs1 (use->stmt);
6448 	  else
6449 	    op = NULL_TREE;
6450 	}
6451       else
6452 	op = NULL_TREE;
6453 
6454       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6455 	return;
6456     }
6457 
6458   comp = get_computation (data->current_loop, use, cand);
6459   gcc_assert (comp != NULL_TREE);
6460 
6461   switch (gimple_code (use->stmt))
6462     {
6463     case GIMPLE_PHI:
6464       tgt = PHI_RESULT (use->stmt);
6465 
6466       /* If we should keep the biv, do not replace it.  */
6467       if (name_info (data, tgt)->preserve_biv)
6468 	return;
6469 
6470       bsi = gsi_after_labels (gimple_bb (use->stmt));
6471       break;
6472 
6473     case GIMPLE_ASSIGN:
6474       tgt = gimple_assign_lhs (use->stmt);
6475       bsi = gsi_for_stmt (use->stmt);
6476       break;
6477 
6478     default:
6479       gcc_unreachable ();
6480     }
6481 
6482   if (!valid_gimple_rhs_p (comp)
6483       || (gimple_code (use->stmt) != GIMPLE_PHI
6484 	  /* We can't allow re-allocating the stmt as it might be pointed
6485 	     to still.  */
6486 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6487 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6488     {
6489       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6490 				       true, GSI_SAME_STMT);
6491       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6492 	{
6493 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6494 	  /* As this isn't a plain copy we have to reset alignment
6495 	     information.  */
6496 	  if (SSA_NAME_PTR_INFO (comp))
6497 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6498 	}
6499     }
6500 
6501   if (gimple_code (use->stmt) == GIMPLE_PHI)
6502     {
6503       ass = gimple_build_assign (tgt, comp);
6504       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6505 
6506       bsi = gsi_for_stmt (use->stmt);
6507       remove_phi_node (&bsi, false);
6508     }
6509   else
6510     {
6511       gimple_assign_set_rhs_from_tree (&bsi, comp);
6512       use->stmt = gsi_stmt (bsi);
6513     }
6514 }
6515 
6516 /* Performs a peephole optimization to reorder the iv update statement with
6517    a mem ref to enable instruction combining in later phases. The mem ref uses
6518    the iv value before the update, so the reordering transformation requires
6519    adjustment of the offset. CAND is the selected IV_CAND.
6520 
6521    Example:
6522 
6523    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6524    iv2 = iv1 + 1;
6525 
6526    if (t < val)      (1)
6527      goto L;
6528    goto Head;
6529 
6530 
6531    directly propagating t over to (1) will introduce overlapping live range
6532    thus increase register pressure. This peephole transform it into:
6533 
6534 
6535    iv2 = iv1 + 1;
6536    t = MEM_REF (base, iv2, 8, 8);
6537    if (t < val)
6538      goto L;
6539    goto Head;
6540 */
6541 
6542 static void
6543 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6544 {
6545   tree var_after;
6546   gimple iv_update, stmt;
6547   basic_block bb;
6548   gimple_stmt_iterator gsi, gsi_iv;
6549 
6550   if (cand->pos != IP_NORMAL)
6551     return;
6552 
6553   var_after = cand->var_after;
6554   iv_update = SSA_NAME_DEF_STMT (var_after);
6555 
6556   bb = gimple_bb (iv_update);
6557   gsi = gsi_last_nondebug_bb (bb);
6558   stmt = gsi_stmt (gsi);
6559 
6560   /* Only handle conditional statement for now.  */
6561   if (gimple_code (stmt) != GIMPLE_COND)
6562     return;
6563 
6564   gsi_prev_nondebug (&gsi);
6565   stmt = gsi_stmt (gsi);
6566   if (stmt != iv_update)
6567     return;
6568 
6569   gsi_prev_nondebug (&gsi);
6570   if (gsi_end_p (gsi))
6571     return;
6572 
6573   stmt = gsi_stmt (gsi);
6574   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6575     return;
6576 
6577   if (stmt != use->stmt)
6578     return;
6579 
6580   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6581     return;
6582 
6583   if (dump_file && (dump_flags & TDF_DETAILS))
6584     {
6585       fprintf (dump_file, "Reordering \n");
6586       print_gimple_stmt (dump_file, iv_update, 0, 0);
6587       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6588       fprintf (dump_file, "\n");
6589     }
6590 
6591   gsi = gsi_for_stmt (use->stmt);
6592   gsi_iv = gsi_for_stmt (iv_update);
6593   gsi_move_before (&gsi_iv, &gsi);
6594 
6595   cand->pos = IP_BEFORE_USE;
6596   cand->incremented_at = use->stmt;
6597 }
6598 
6599 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6600 
6601 static void
6602 rewrite_use_address (struct ivopts_data *data,
6603 		     struct iv_use *use, struct iv_cand *cand)
6604 {
6605   aff_tree aff;
6606   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6607   tree base_hint = NULL_TREE;
6608   tree ref, iv;
6609   bool ok;
6610 
6611   adjust_iv_update_pos (cand, use);
6612   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6613   gcc_assert (ok);
6614   unshare_aff_combination (&aff);
6615 
6616   /* To avoid undefined overflow problems, all IV candidates use unsigned
6617      integer types.  The drawback is that this makes it impossible for
6618      create_mem_ref to distinguish an IV that is based on a memory object
6619      from one that represents simply an offset.
6620 
6621      To work around this problem, we pass a hint to create_mem_ref that
6622      indicates which variable (if any) in aff is an IV based on a memory
6623      object.  Note that we only consider the candidate.  If this is not
6624      based on an object, the base of the reference is in some subexpression
6625      of the use -- but these will use pointer types, so they are recognized
6626      by the create_mem_ref heuristics anyway.  */
6627   if (cand->iv->base_object)
6628     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6629 
6630   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6631   tree type = TREE_TYPE (*use->op_p);
6632   unsigned int align = get_object_alignment (*use->op_p);
6633   if (align != TYPE_ALIGN (type))
6634     type = build_aligned_type (type, align);
6635   ref = create_mem_ref (&bsi, type, &aff,
6636 			reference_alias_ptr_type (*use->op_p),
6637 			iv, base_hint, data->speed);
6638   copy_ref_info (ref, *use->op_p);
6639   *use->op_p = ref;
6640 }
6641 
6642 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6643    candidate CAND.  */
6644 
6645 static void
6646 rewrite_use_compare (struct ivopts_data *data,
6647 		     struct iv_use *use, struct iv_cand *cand)
6648 {
6649   tree comp, *var_p, op, bound;
6650   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6651   enum tree_code compare;
6652   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6653   bool ok;
6654 
6655   bound = cp->value;
6656   if (bound)
6657     {
6658       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6659       tree var_type = TREE_TYPE (var);
6660       gimple_seq stmts;
6661 
6662       if (dump_file && (dump_flags & TDF_DETAILS))
6663         {
6664           fprintf (dump_file, "Replacing exit test: ");
6665           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6666         }
6667       compare = cp->comp;
6668       bound = unshare_expr (fold_convert (var_type, bound));
6669       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6670       if (stmts)
6671 	gsi_insert_seq_on_edge_immediate (
6672 		loop_preheader_edge (data->current_loop),
6673 		stmts);
6674 
6675       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6676       gimple_cond_set_lhs (cond_stmt, var);
6677       gimple_cond_set_code (cond_stmt, compare);
6678       gimple_cond_set_rhs (cond_stmt, op);
6679       return;
6680     }
6681 
6682   /* The induction variable elimination failed; just express the original
6683      giv.  */
6684   comp = get_computation (data->current_loop, use, cand);
6685   gcc_assert (comp != NULL_TREE);
6686 
6687   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6688   gcc_assert (ok);
6689 
6690   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6691 				     true, GSI_SAME_STMT);
6692 }
6693 
6694 /* Rewrites USE using candidate CAND.  */
6695 
6696 static void
6697 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6698 {
6699   switch (use->type)
6700     {
6701       case USE_NONLINEAR_EXPR:
6702 	rewrite_use_nonlinear_expr (data, use, cand);
6703 	break;
6704 
6705       case USE_ADDRESS:
6706 	rewrite_use_address (data, use, cand);
6707 	break;
6708 
6709       case USE_COMPARE:
6710 	rewrite_use_compare (data, use, cand);
6711 	break;
6712 
6713       default:
6714 	gcc_unreachable ();
6715     }
6716 
6717   update_stmt (use->stmt);
6718 }
6719 
6720 /* Rewrite the uses using the selected induction variables.  */
6721 
6722 static void
6723 rewrite_uses (struct ivopts_data *data)
6724 {
6725   unsigned i;
6726   struct iv_cand *cand;
6727   struct iv_use *use;
6728 
6729   for (i = 0; i < n_iv_uses (data); i++)
6730     {
6731       use = iv_use (data, i);
6732       cand = use->selected;
6733       gcc_assert (cand);
6734 
6735       rewrite_use (data, use, cand);
6736     }
6737 }
6738 
6739 /* Removes the ivs that are not used after rewriting.  */
6740 
6741 static void
6742 remove_unused_ivs (struct ivopts_data *data)
6743 {
6744   unsigned j;
6745   bitmap_iterator bi;
6746   bitmap toremove = BITMAP_ALLOC (NULL);
6747 
6748   /* Figure out an order in which to release SSA DEFs so that we don't
6749      release something that we'd have to propagate into a debug stmt
6750      afterwards.  */
6751   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6752     {
6753       struct version_info *info;
6754 
6755       info = ver_info (data, j);
6756       if (info->iv
6757 	  && !integer_zerop (info->iv->step)
6758 	  && !info->inv_id
6759 	  && !info->iv->have_use_for
6760 	  && !info->preserve_biv)
6761 	{
6762 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6763 
6764 	  tree def = info->iv->ssa_name;
6765 
6766 	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6767 	    {
6768 	      imm_use_iterator imm_iter;
6769 	      use_operand_p use_p;
6770 	      gimple stmt;
6771 	      int count = 0;
6772 
6773 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6774 		{
6775 		  if (!gimple_debug_bind_p (stmt))
6776 		    continue;
6777 
6778 		  /* We just want to determine whether to do nothing
6779 		     (count == 0), to substitute the computed
6780 		     expression into a single use of the SSA DEF by
6781 		     itself (count == 1), or to use a debug temp
6782 		     because the SSA DEF is used multiple times or as
6783 		     part of a larger expression (count > 1). */
6784 		  count++;
6785 		  if (gimple_debug_bind_get_value (stmt) != def)
6786 		    count++;
6787 
6788 		  if (count > 1)
6789 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
6790 		}
6791 
6792 	      if (!count)
6793 		continue;
6794 
6795 	      struct iv_use dummy_use;
6796 	      struct iv_cand *best_cand = NULL, *cand;
6797 	      unsigned i, best_pref = 0, cand_pref;
6798 
6799 	      memset (&dummy_use, 0, sizeof (dummy_use));
6800 	      dummy_use.iv = info->iv;
6801 	      for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6802 		{
6803 		  cand = iv_use (data, i)->selected;
6804 		  if (cand == best_cand)
6805 		    continue;
6806 		  cand_pref = operand_equal_p (cand->iv->step,
6807 					       info->iv->step, 0)
6808 		    ? 4 : 0;
6809 		  cand_pref
6810 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
6811 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
6812 		    ? 2 : 0;
6813 		  cand_pref
6814 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
6815 		    ? 1 : 0;
6816 		  if (best_cand == NULL || best_pref < cand_pref)
6817 		    {
6818 		      best_cand = cand;
6819 		      best_pref = cand_pref;
6820 		    }
6821 		}
6822 
6823 	      if (!best_cand)
6824 		continue;
6825 
6826 	      tree comp = get_computation_at (data->current_loop,
6827 					      &dummy_use, best_cand,
6828 					      SSA_NAME_DEF_STMT (def));
6829 	      if (!comp)
6830 		continue;
6831 
6832 	      if (count > 1)
6833 		{
6834 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
6835 		  DECL_ARTIFICIAL (vexpr) = 1;
6836 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
6837 		  if (SSA_NAME_VAR (def))
6838 		    DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6839 		  else
6840 		    DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6841 		  gdebug *def_temp
6842 		    = gimple_build_debug_bind (vexpr, comp, NULL);
6843 		  gimple_stmt_iterator gsi;
6844 
6845 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6846 		    gsi = gsi_after_labels (gimple_bb
6847 					    (SSA_NAME_DEF_STMT (def)));
6848 		  else
6849 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6850 
6851 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6852 		  comp = vexpr;
6853 		}
6854 
6855 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6856 		{
6857 		  if (!gimple_debug_bind_p (stmt))
6858 		    continue;
6859 
6860 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6861 		    SET_USE (use_p, comp);
6862 
6863 		  update_stmt (stmt);
6864 		}
6865 	    }
6866 	}
6867     }
6868 
6869   release_defs_bitset (toremove);
6870 
6871   BITMAP_FREE (toremove);
6872 }
6873 
6874 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6875    for hash_map::traverse.  */
6876 
6877 bool
6878 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6879 {
6880   free (value);
6881   return true;
6882 }
6883 
6884 /* Frees data allocated by the optimization of a single loop.  */
6885 
6886 static void
6887 free_loop_data (struct ivopts_data *data)
6888 {
6889   unsigned i, j;
6890   bitmap_iterator bi;
6891   tree obj;
6892 
6893   if (data->niters)
6894     {
6895       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6896       delete data->niters;
6897       data->niters = NULL;
6898     }
6899 
6900   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6901     {
6902       struct version_info *info;
6903 
6904       info = ver_info (data, i);
6905       free (info->iv);
6906       info->iv = NULL;
6907       info->has_nonlin_use = false;
6908       info->preserve_biv = false;
6909       info->inv_id = 0;
6910     }
6911   bitmap_clear (data->relevant);
6912   bitmap_clear (data->important_candidates);
6913 
6914   for (i = 0; i < n_iv_uses (data); i++)
6915     {
6916       struct iv_use *use = iv_use (data, i);
6917 
6918       free (use->iv);
6919       BITMAP_FREE (use->related_cands);
6920       for (j = 0; j < use->n_map_members; j++)
6921 	if (use->cost_map[j].depends_on)
6922 	  BITMAP_FREE (use->cost_map[j].depends_on);
6923       free (use->cost_map);
6924       free (use);
6925     }
6926   data->iv_uses.truncate (0);
6927 
6928   for (i = 0; i < n_iv_cands (data); i++)
6929     {
6930       struct iv_cand *cand = iv_cand (data, i);
6931 
6932       free (cand->iv);
6933       if (cand->depends_on)
6934 	BITMAP_FREE (cand->depends_on);
6935       free (cand);
6936     }
6937   data->iv_candidates.truncate (0);
6938 
6939   if (data->version_info_size < num_ssa_names)
6940     {
6941       data->version_info_size = 2 * num_ssa_names;
6942       free (data->version_info);
6943       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6944     }
6945 
6946   data->max_inv_id = 0;
6947 
6948   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6949     SET_DECL_RTL (obj, NULL_RTX);
6950 
6951   decl_rtl_to_reset.truncate (0);
6952 
6953   data->inv_expr_tab->empty ();
6954   data->inv_expr_id = 0;
6955 }
6956 
6957 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6958    loop tree.  */
6959 
6960 static void
6961 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6962 {
6963   free_loop_data (data);
6964   free (data->version_info);
6965   BITMAP_FREE (data->relevant);
6966   BITMAP_FREE (data->important_candidates);
6967 
6968   decl_rtl_to_reset.release ();
6969   data->iv_uses.release ();
6970   data->iv_candidates.release ();
6971   delete data->inv_expr_tab;
6972   data->inv_expr_tab = NULL;
6973   free_affine_expand_cache (&data->name_expansion_cache);
6974 }
6975 
6976 /* Returns true if the loop body BODY includes any function calls.  */
6977 
6978 static bool
6979 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6980 {
6981   gimple_stmt_iterator gsi;
6982   unsigned i;
6983 
6984   for (i = 0; i < num_nodes; i++)
6985     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6986       {
6987 	gimple stmt = gsi_stmt (gsi);
6988 	if (is_gimple_call (stmt)
6989 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6990 	  return true;
6991       }
6992   return false;
6993 }
6994 
6995 /* Optimizes the LOOP.  Returns true if anything changed.  */
6996 
6997 static bool
6998 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6999 {
7000   bool changed = false;
7001   struct iv_ca *iv_ca;
7002   edge exit = single_dom_exit (loop);
7003   basic_block *body;
7004 
7005   gcc_assert (!data->niters);
7006   data->current_loop = loop;
7007   data->loop_loc = find_loop_location (loop);
7008   data->speed = optimize_loop_for_speed_p (loop);
7009 
7010   if (dump_file && (dump_flags & TDF_DETAILS))
7011     {
7012       fprintf (dump_file, "Processing loop %d", loop->num);
7013       if (data->loop_loc != UNKNOWN_LOCATION)
7014 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7015 		 LOCATION_LINE (data->loop_loc));
7016       fprintf (dump_file, "\n");
7017 
7018       if (exit)
7019 	{
7020 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7021 		   exit->src->index, exit->dest->index);
7022 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7023 	  fprintf (dump_file, "\n");
7024 	}
7025 
7026       fprintf (dump_file, "\n");
7027     }
7028 
7029   body = get_loop_body (loop);
7030   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7031   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7032   free (body);
7033 
7034   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7035 
7036   /* For each ssa name determines whether it behaves as an induction variable
7037      in some loop.  */
7038   if (!find_induction_variables (data))
7039     goto finish;
7040 
7041   /* Finds interesting uses (item 1).  */
7042   find_interesting_uses (data);
7043   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7044     goto finish;
7045 
7046   /* Finds candidates for the induction variables (item 2).  */
7047   find_iv_candidates (data);
7048 
7049   /* Calculates the costs (item 3, part 1).  */
7050   determine_iv_costs (data);
7051   determine_use_iv_costs (data);
7052   determine_set_costs (data);
7053 
7054   /* Find the optimal set of induction variables (item 3, part 2).  */
7055   iv_ca = find_optimal_iv_set (data);
7056   if (!iv_ca)
7057     goto finish;
7058   changed = true;
7059 
7060   /* Create the new induction variables (item 4, part 1).  */
7061   create_new_ivs (data, iv_ca);
7062   iv_ca_free (&iv_ca);
7063 
7064   /* Rewrite the uses (item 4, part 2).  */
7065   rewrite_uses (data);
7066 
7067   /* Remove the ivs that are unused after rewriting.  */
7068   remove_unused_ivs (data);
7069 
7070   /* We have changed the structure of induction variables; it might happen
7071      that definitions in the scev database refer to some of them that were
7072      eliminated.  */
7073   scev_reset ();
7074 
7075 finish:
7076   free_loop_data (data);
7077 
7078   return changed;
7079 }
7080 
7081 /* Main entry point.  Optimizes induction variables in loops.  */
7082 
7083 void
7084 tree_ssa_iv_optimize (void)
7085 {
7086   struct loop *loop;
7087   struct ivopts_data data;
7088 
7089   tree_ssa_iv_optimize_init (&data);
7090 
7091   /* Optimize the loops starting with the innermost ones.  */
7092   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7093     {
7094       if (dump_file && (dump_flags & TDF_DETAILS))
7095 	flow_loop_dump (loop, dump_file, NULL, 1);
7096 
7097       tree_ssa_iv_optimize_loop (&data, loop);
7098     }
7099 
7100   tree_ssa_iv_optimize_finalize (&data);
7101 }
7102