xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/gimple-ssa-strength-reduction.c (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1 /* Straight-line strength reduction.
2    Copyright (C) 2012-2013 Free Software Foundation, Inc.
3    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* There are many algorithms for performing strength reduction on
22    loops.  This is not one of them.  IVOPTS handles strength reduction
23    of induction variables just fine.  This pass is intended to pick
24    up the crumbs it leaves behind, by considering opportunities for
25    strength reduction along dominator paths.
26 
27    Strength reduction will be implemented in four stages, gradually
28    adding more complex candidates:
29 
30    1) Explicit multiplies, known constant multipliers, no
31       conditional increments. (complete)
32    2) Explicit multiplies, unknown constant multipliers,
33       no conditional increments. (complete)
34    3) Implicit multiplies in addressing expressions. (complete)
35    4) Explicit multiplies, conditional increments. (pending)
36 
37    It would also be possible to apply strength reduction to divisions
38    and modulos, but such opportunities are relatively uncommon.
39 
40    Strength reduction is also currently restricted to integer operations.
41    If desired, it could be extended to floating-point operations under
42    control of something like -funsafe-math-optimizations.  */
43 
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
57 #include "params.h"
58 
59 /* Information about a strength reduction candidate.  Each statement
60    in the candidate table represents an expression of one of the
61    following forms (the special case of CAND_REF will be described
62    later):
63 
64    (CAND_MULT)  S1:  X = (B + i) * S
65    (CAND_ADD)   S1:  X = B + (i * S)
66 
67    Here X and B are SSA names, i is an integer constant, and S is
68    either an SSA name or a constant.  We call B the "base," i the
69    "index", and S the "stride."
70 
71    Any statement S0 that dominates S1 and is of the form:
72 
73    (CAND_MULT)  S0:  Y = (B + i') * S
74    (CAND_ADD)   S0:  Y = B + (i' * S)
75 
76    is called a "basis" for S1.  In both cases, S1 may be replaced by
77 
78                 S1':  X = Y + (i - i') * S,
79 
80    where (i - i') * S is folded to the extent possible.
81 
82    All gimple statements are visited in dominator order, and each
83    statement that may contribute to one of the forms of S1 above is
84    given at least one entry in the candidate table.  Such statements
85    include addition, pointer addition, subtraction, multiplication,
86    negation, copies, and nontrivial type casts.  If a statement may
87    represent more than one expression of the forms of S1 above,
88    multiple "interpretations" are stored in the table and chained
89    together.  Examples:
90 
91    * An add of two SSA names may treat either operand as the base.
92    * A multiply of two SSA names, likewise.
93    * A copy or cast may be thought of as either a CAND_MULT with
94      i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
95 
96    Candidate records are allocated from an obstack.  They are addressed
97    both from a hash table keyed on S1, and from a vector of candidate
98    pointers arranged in predominator order.
99 
100    Opportunity note
101    ----------------
102    Currently we don't recognize:
103 
104      S0: Y = (S * i') - B
105      S1: X = (S * i) - B
106 
107    as a strength reduction opportunity, even though this S1 would
108    also be replaceable by the S1' above.  This can be added if it
109    comes up in practice.
110 
111    Strength reduction in addressing
112    --------------------------------
113    There is another kind of candidate known as CAND_REF.  A CAND_REF
114    describes a statement containing a memory reference having
115    complex addressing that might benefit from strength reduction.
116    Specifically, we are interested in references for which
117    get_inner_reference returns a base address, offset, and bitpos as
118    follows:
119 
120      base:    MEM_REF (T1, C1)
121      offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
122      bitpos:  C4 * BITS_PER_UNIT
123 
124    Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
125    arbitrary integer constants.  Note that C2 may be zero, in which
126    case the offset will be MULT_EXPR (T2, C3).
127 
128    When this pattern is recognized, the original memory reference
129    can be replaced with:
130 
131      MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
132               C1 + (C2 * C3) + C4)
133 
134    which distributes the multiply to allow constant folding.  When
135    two or more addressing expressions can be represented by MEM_REFs
136    of this form, differing only in the constants C1, C2, and C4,
137    making this substitution produces more efficient addressing during
138    the RTL phases.  When there are not at least two expressions with
139    the same values of T1, T2, and C3, there is nothing to be gained
140    by the replacement.
141 
142    Strength reduction of CAND_REFs uses the same infrastructure as
143    that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
144    field, MULT_EXPR (T2, C3) in the stride (S) field, and
145    C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
146    is thus another CAND_REF with the same B and S values.  When at
147    least two CAND_REFs are chained together using the basis relation,
148    each of them is replaced as above, resulting in improved code
149    generation for addressing.  */
150 
151 
152 /* Index into the candidate vector, offset by 1.  VECs are zero-based,
153    while cand_idx's are one-based, with zero indicating null.  */
154 typedef unsigned cand_idx;
155 
156 /* The kind of candidate.  */
157 enum cand_kind
158 {
159   CAND_MULT,
160   CAND_ADD,
161   CAND_REF
162 };
163 
164 struct slsr_cand_d
165 {
166   /* The candidate statement S1.  */
167   gimple cand_stmt;
168 
169   /* The base expression B:  often an SSA name, but not always.  */
170   tree base_expr;
171 
172   /* The stride S.  */
173   tree stride;
174 
175   /* The index constant i.  */
176   double_int index;
177 
178   /* The type of the candidate.  This is normally the type of base_expr,
179      but casts may have occurred when combining feeding instructions.
180      A candidate can only be a basis for candidates of the same final type.
181      (For CAND_REFs, this is the type to be used for operand 1 of the
182      replacement MEM_REF.)  */
183   tree cand_type;
184 
185   /* The kind of candidate (CAND_MULT, etc.).  */
186   enum cand_kind kind;
187 
188   /* Index of this candidate in the candidate vector.  */
189   cand_idx cand_num;
190 
191   /* Index of the next candidate record for the same statement.
192      A statement may be useful in more than one way (e.g., due to
193      commutativity).  So we can have multiple "interpretations"
194      of a statement.  */
195   cand_idx next_interp;
196 
197   /* Index of the basis statement S0, if any, in the candidate vector.  */
198   cand_idx basis;
199 
200   /* First candidate for which this candidate is a basis, if one exists.  */
201   cand_idx dependent;
202 
203   /* Next candidate having the same basis as this one.  */
204   cand_idx sibling;
205 
206   /* If this is a conditional candidate, the defining PHI statement
207      for the base SSA name B.  For future use; always NULL for now.  */
208   gimple def_phi;
209 
210   /* Savings that can be expected from eliminating dead code if this
211      candidate is replaced.  */
212   int dead_savings;
213 };
214 
215 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
216 typedef const struct slsr_cand_d *const_slsr_cand_t;
217 
218 /* Pointers to candidates are chained together as part of a mapping
219    from base expressions to the candidates that use them.  */
220 
221 struct cand_chain_d
222 {
223   /* Base expression for the chain of candidates:  often, but not
224      always, an SSA name.  */
225   tree base_expr;
226 
227   /* Pointer to a candidate.  */
228   slsr_cand_t cand;
229 
230   /* Chain pointer.  */
231   struct cand_chain_d *next;
232 
233 };
234 
235 typedef struct cand_chain_d cand_chain, *cand_chain_t;
236 typedef const struct cand_chain_d *const_cand_chain_t;
237 
238 /* Information about a unique "increment" associated with candidates
239    having an SSA name for a stride.  An increment is the difference
240    between the index of the candidate and the index of its basis,
241    i.e., (i - i') as discussed in the module commentary.
242 
243    When we are not going to generate address arithmetic we treat
244    increments that differ only in sign as the same, allowing sharing
245    of the cost of initializers.  The absolute value of the increment
246    is stored in the incr_info.  */
247 
248 struct incr_info_d
249 {
250   /* The increment that relates a candidate to its basis.  */
251   double_int incr;
252 
253   /* How many times the increment occurs in the candidate tree.  */
254   unsigned count;
255 
256   /* Cost of replacing candidates using this increment.  Negative and
257      zero costs indicate replacement should be performed.  */
258   int cost;
259 
260   /* If this increment is profitable but is not -1, 0, or 1, it requires
261      an initializer T_0 = stride * incr to be found or introduced in the
262      nearest common dominator of all candidates.  This field holds T_0
263      for subsequent use.  */
264   tree initializer;
265 
266   /* If the initializer was found to already exist, this is the block
267      where it was found.  */
268   basic_block init_bb;
269 };
270 
271 typedef struct incr_info_d incr_info, *incr_info_t;
272 
273 /* Candidates are maintained in a vector.  If candidate X dominates
274    candidate Y, then X appears before Y in the vector; but the
275    converse does not necessarily hold.  */
276 static vec<slsr_cand_t> cand_vec;
277 
278 enum cost_consts
279 {
280   COST_NEUTRAL = 0,
281   COST_INFINITE = 1000
282 };
283 
284 /* Pointer map embodying a mapping from statements to candidates.  */
285 static struct pointer_map_t *stmt_cand_map;
286 
287 /* Obstack for candidates.  */
288 static struct obstack cand_obstack;
289 
290 /* Hash table embodying a mapping from base exprs to chains of candidates.  */
291 static htab_t base_cand_map;
292 
293 /* Obstack for candidate chains.  */
294 static struct obstack chain_obstack;
295 
296 /* An array INCR_VEC of incr_infos is used during analysis of related
297    candidates having an SSA name for a stride.  INCR_VEC_LEN describes
298    its current length.  */
299 static incr_info_t incr_vec;
300 static unsigned incr_vec_len;
301 
302 /* For a chain of candidates with unknown stride, indicates whether or not
303    we must generate pointer arithmetic when replacing statements.  */
304 static bool address_arithmetic_p;
305 
306 /* Produce a pointer to the IDX'th candidate in the candidate vector.  */
307 
308 static slsr_cand_t
309 lookup_cand (cand_idx idx)
310 {
311   return cand_vec[idx - 1];
312 }
313 
314 /* Callback to produce a hash value for a candidate chain header.  */
315 
316 static hashval_t
317 base_cand_hash (const void *p)
318 {
319   tree base_expr = ((const_cand_chain_t) p)->base_expr;
320   return iterative_hash_expr (base_expr, 0);
321 }
322 
323 /* Callback when an element is removed from the hash table.
324    We never remove entries until the entire table is released.  */
325 
326 static void
327 base_cand_free (void *p ATTRIBUTE_UNUSED)
328 {
329 }
330 
331 /* Callback to return true if two candidate chain headers are equal.  */
332 
333 static int
334 base_cand_eq (const void *p1, const void *p2)
335 {
336   const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
337   const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
338   return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
339 }
340 
341 /* Use the base expr from candidate C to look for possible candidates
342    that can serve as a basis for C.  Each potential basis must also
343    appear in a block that dominates the candidate statement and have
344    the same stride and type.  If more than one possible basis exists,
345    the one with highest index in the vector is chosen; this will be
346    the most immediately dominating basis.  */
347 
348 static int
349 find_basis_for_candidate (slsr_cand_t c)
350 {
351   cand_chain mapping_key;
352   cand_chain_t chain;
353   slsr_cand_t basis = NULL;
354 
355   // Limit potential of N^2 behavior for long candidate chains.
356   int iters = 0;
357   int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
358 
359   mapping_key.base_expr = c->base_expr;
360   chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
361 
362   for (; chain && iters < max_iters; chain = chain->next, ++iters)
363     {
364       slsr_cand_t one_basis = chain->cand;
365 
366       if (one_basis->kind != c->kind
367 	  || one_basis->cand_stmt == c->cand_stmt
368 	  || !operand_equal_p (one_basis->stride, c->stride, 0)
369 	  || !types_compatible_p (one_basis->cand_type, c->cand_type)
370 	  || !dominated_by_p (CDI_DOMINATORS,
371 			      gimple_bb (c->cand_stmt),
372 			      gimple_bb (one_basis->cand_stmt)))
373 	continue;
374 
375       if (!basis || basis->cand_num < one_basis->cand_num)
376 	basis = one_basis;
377     }
378 
379   if (basis)
380     {
381       c->sibling = basis->dependent;
382       basis->dependent = c->cand_num;
383       return basis->cand_num;
384     }
385 
386   return 0;
387 }
388 
389 /* Record a mapping from the base expression of C to C itself, indicating that
390    C may potentially serve as a basis using that base expression.  */
391 
392 static void
393 record_potential_basis (slsr_cand_t c)
394 {
395   cand_chain_t node;
396   void **slot;
397 
398   node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
399   node->base_expr = c->base_expr;
400   node->cand = c;
401   node->next = NULL;
402   slot = htab_find_slot (base_cand_map, node, INSERT);
403 
404   if (*slot)
405     {
406       cand_chain_t head = (cand_chain_t) (*slot);
407       node->next = head->next;
408       head->next = node;
409     }
410   else
411     *slot = node;
412 }
413 
414 /* Allocate storage for a new candidate and initialize its fields.
415    Attempt to find a basis for the candidate.  */
416 
417 static slsr_cand_t
418 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
419 			   double_int index, tree stride, tree ctype,
420 			   unsigned savings)
421 {
422   slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
423 					       sizeof (slsr_cand));
424   c->cand_stmt = gs;
425   c->base_expr = base;
426   c->stride = stride;
427   c->index = index;
428   c->cand_type = ctype;
429   c->kind = kind;
430   c->cand_num = cand_vec.length () + 1;
431   c->next_interp = 0;
432   c->dependent = 0;
433   c->sibling = 0;
434   c->def_phi = NULL;
435   c->dead_savings = savings;
436 
437   cand_vec.safe_push (c);
438   c->basis = find_basis_for_candidate (c);
439   record_potential_basis (c);
440 
441   return c;
442 }
443 
444 /* Determine the target cost of statement GS when compiling according
445    to SPEED.  */
446 
447 static int
448 stmt_cost (gimple gs, bool speed)
449 {
450   tree lhs, rhs1, rhs2;
451   enum machine_mode lhs_mode;
452 
453   gcc_assert (is_gimple_assign (gs));
454   lhs = gimple_assign_lhs (gs);
455   rhs1 = gimple_assign_rhs1 (gs);
456   lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
457 
458   switch (gimple_assign_rhs_code (gs))
459     {
460     case MULT_EXPR:
461       rhs2 = gimple_assign_rhs2 (gs);
462 
463       if (host_integerp (rhs2, 0))
464 	return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
465 
466       gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
467       return mul_cost (speed, lhs_mode);
468 
469     case PLUS_EXPR:
470     case POINTER_PLUS_EXPR:
471     case MINUS_EXPR:
472       return add_cost (speed, lhs_mode);
473 
474     case NEGATE_EXPR:
475       return neg_cost (speed, lhs_mode);
476 
477     case NOP_EXPR:
478       return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
479 
480     /* Note that we don't assign costs to copies that in most cases
481        will go away.  */
482     default:
483       ;
484     }
485 
486   gcc_unreachable ();
487   return 0;
488 }
489 
490 /* Look up the defining statement for BASE_IN and return a pointer
491    to its candidate in the candidate table, if any; otherwise NULL.
492    Only CAND_ADD and CAND_MULT candidates are returned.  */
493 
494 static slsr_cand_t
495 base_cand_from_table (tree base_in)
496 {
497   slsr_cand_t *result;
498 
499   gimple def = SSA_NAME_DEF_STMT (base_in);
500   if (!def)
501     return (slsr_cand_t) NULL;
502 
503   result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
504 
505   if (result && (*result)->kind != CAND_REF)
506     return *result;
507 
508   return (slsr_cand_t) NULL;
509 }
510 
511 /* Add an entry to the statement-to-candidate mapping.  */
512 
513 static void
514 add_cand_for_stmt (gimple gs, slsr_cand_t c)
515 {
516   void **slot = pointer_map_insert (stmt_cand_map, gs);
517   gcc_assert (!*slot);
518   *slot = c;
519 }
520 
521 /* Look for the following pattern:
522 
523     *PBASE:    MEM_REF (T1, C1)
524 
525     *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
526                      or
527                MULT_EXPR (PLUS_EXPR (T2, C2), C3)
528                      or
529                MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
530 
531     *PINDEX:   C4 * BITS_PER_UNIT
532 
533    If not present, leave the input values unchanged and return FALSE.
534    Otherwise, modify the input values as follows and return TRUE:
535 
536     *PBASE:    T1
537     *POFFSET:  MULT_EXPR (T2, C3)
538     *PINDEX:   C1 + (C2 * C3) + C4  */
539 
540 static bool
541 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
542 		       tree *ptype)
543 {
544   tree base = *pbase, offset = *poffset;
545   double_int index = *pindex;
546   double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
547   tree mult_op0, mult_op1, t1, t2, type;
548   double_int c1, c2, c3, c4;
549 
550   if (!base
551       || !offset
552       || TREE_CODE (base) != MEM_REF
553       || TREE_CODE (offset) != MULT_EXPR
554       || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
555       || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
556     return false;
557 
558   t1 = TREE_OPERAND (base, 0);
559   c1 = mem_ref_offset (base);
560   type = TREE_TYPE (TREE_OPERAND (base, 1));
561 
562   mult_op0 = TREE_OPERAND (offset, 0);
563   mult_op1 = TREE_OPERAND (offset, 1);
564 
565   c3 = tree_to_double_int (mult_op1);
566 
567   if (TREE_CODE (mult_op0) == PLUS_EXPR)
568 
569     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
570       {
571 	t2 = TREE_OPERAND (mult_op0, 0);
572 	c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
573       }
574     else
575       return false;
576 
577   else if (TREE_CODE (mult_op0) == MINUS_EXPR)
578 
579     if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
580       {
581 	t2 = TREE_OPERAND (mult_op0, 0);
582 	c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
583       }
584     else
585       return false;
586 
587   else
588     {
589       t2 = mult_op0;
590       c2 = double_int_zero;
591     }
592 
593   c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
594 
595   *pbase = t1;
596   *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
597 			  double_int_to_tree (sizetype, c3));
598   *pindex = c1 + c2 * c3 + c4;
599   *ptype = type;
600 
601   return true;
602 }
603 
604 /* Given GS which contains a data reference, create a CAND_REF entry in
605    the candidate table and attempt to find a basis.  */
606 
607 static void
608 slsr_process_ref (gimple gs)
609 {
610   tree ref_expr, base, offset, type;
611   HOST_WIDE_INT bitsize, bitpos;
612   enum machine_mode mode;
613   int unsignedp, volatilep;
614   double_int index;
615   slsr_cand_t c;
616 
617   if (gimple_vdef (gs))
618     ref_expr = gimple_assign_lhs (gs);
619   else
620     ref_expr = gimple_assign_rhs1 (gs);
621 
622   if (!handled_component_p (ref_expr)
623       || TREE_CODE (ref_expr) == BIT_FIELD_REF
624       || (TREE_CODE (ref_expr) == COMPONENT_REF
625 	  && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
626     return;
627 
628   base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
629 			      &unsignedp, &volatilep, false);
630   index = double_int::from_uhwi (bitpos);
631 
632   if (!restructure_reference (&base, &offset, &index, &type))
633     return;
634 
635   c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
636 				 type, 0);
637 
638   /* Add the candidate to the statement-candidate mapping.  */
639   add_cand_for_stmt (gs, c);
640 }
641 
642 /* Create a candidate entry for a statement GS, where GS multiplies
643    two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
644    about the two SSA names into the new candidate.  Return the new
645    candidate.  */
646 
647 static slsr_cand_t
648 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
649 {
650   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
651   double_int index;
652   unsigned savings = 0;
653   slsr_cand_t c;
654   slsr_cand_t base_cand = base_cand_from_table (base_in);
655 
656   /* Look at all interpretations of the base candidate, if necessary,
657      to find information to propagate into this candidate.  */
658   while (base_cand && !base)
659     {
660 
661       if (base_cand->kind == CAND_MULT
662 	  && operand_equal_p (base_cand->stride, integer_one_node, 0))
663 	{
664 	  /* Y = (B + i') * 1
665 	     X = Y * Z
666 	     ================
667 	     X = (B + i') * Z  */
668 	  base = base_cand->base_expr;
669 	  index = base_cand->index;
670 	  stride = stride_in;
671 	  ctype = base_cand->cand_type;
672 	  if (has_single_use (base_in))
673 	    savings = (base_cand->dead_savings
674 		       + stmt_cost (base_cand->cand_stmt, speed));
675 	}
676       else if (base_cand->kind == CAND_ADD
677 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
678 	{
679 	  /* Y = B + (i' * S), S constant
680 	     X = Y * Z
681 	     ============================
682 	     X = B + ((i' * S) * Z)  */
683 	  base = base_cand->base_expr;
684 	  index = base_cand->index * tree_to_double_int (base_cand->stride);
685 	  stride = stride_in;
686 	  ctype = base_cand->cand_type;
687 	  if (has_single_use (base_in))
688 	    savings = (base_cand->dead_savings
689 		       + stmt_cost (base_cand->cand_stmt, speed));
690 	}
691 
692       if (base_cand->next_interp)
693 	base_cand = lookup_cand (base_cand->next_interp);
694       else
695 	base_cand = NULL;
696     }
697 
698   if (!base)
699     {
700       /* No interpretations had anything useful to propagate, so
701 	 produce X = (Y + 0) * Z.  */
702       base = base_in;
703       index = double_int_zero;
704       stride = stride_in;
705       ctype = TREE_TYPE (base_in);
706     }
707 
708   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
709 				 ctype, savings);
710   return c;
711 }
712 
713 /* Create a candidate entry for a statement GS, where GS multiplies
714    SSA name BASE_IN by constant STRIDE_IN.  Propagate any known
715    information about BASE_IN into the new candidate.  Return the new
716    candidate.  */
717 
718 static slsr_cand_t
719 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
720 {
721   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
722   double_int index, temp;
723   unsigned savings = 0;
724   slsr_cand_t c;
725   slsr_cand_t base_cand = base_cand_from_table (base_in);
726 
727   /* Look at all interpretations of the base candidate, if necessary,
728      to find information to propagate into this candidate.  */
729   while (base_cand && !base)
730     {
731       if (base_cand->kind == CAND_MULT
732 	  && TREE_CODE (base_cand->stride) == INTEGER_CST)
733 	{
734 	  /* Y = (B + i') * S, S constant
735 	     X = Y * c
736 	     ============================
737 	     X = (B + i') * (S * c)  */
738 	  temp = tree_to_double_int (base_cand->stride)
739 		 * tree_to_double_int (stride_in);
740 	  if (double_int_fits_to_tree_p (TREE_TYPE (stride_in), temp))
741 	    {
742 	      base = base_cand->base_expr;
743 	      index = base_cand->index;
744 	      stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
745 	      ctype = base_cand->cand_type;
746 	      if (has_single_use (base_in))
747 		savings = (base_cand->dead_savings
748 			   + stmt_cost (base_cand->cand_stmt, speed));
749 	    }
750 	}
751       else if (base_cand->kind == CAND_ADD
752 	       && operand_equal_p (base_cand->stride, integer_one_node, 0))
753 	{
754 	  /* Y = B + (i' * 1)
755 	     X = Y * c
756 	     ===========================
757 	     X = (B + i') * c  */
758 	  base = base_cand->base_expr;
759 	  index = base_cand->index;
760 	  stride = stride_in;
761 	  ctype = base_cand->cand_type;
762 	  if (has_single_use (base_in))
763 	    savings = (base_cand->dead_savings
764 		       + stmt_cost (base_cand->cand_stmt, speed));
765 	}
766       else if (base_cand->kind == CAND_ADD
767 	       && base_cand->index.is_one ()
768 	       && TREE_CODE (base_cand->stride) == INTEGER_CST)
769 	{
770 	  /* Y = B + (1 * S), S constant
771 	     X = Y * c
772 	     ===========================
773 	     X = (B + S) * c  */
774 	  base = base_cand->base_expr;
775 	  index = tree_to_double_int (base_cand->stride);
776 	  stride = stride_in;
777 	  ctype = base_cand->cand_type;
778 	  if (has_single_use (base_in))
779 	    savings = (base_cand->dead_savings
780 		       + stmt_cost (base_cand->cand_stmt, speed));
781 	}
782 
783       if (base_cand->next_interp)
784 	base_cand = lookup_cand (base_cand->next_interp);
785       else
786 	base_cand = NULL;
787     }
788 
789   if (!base)
790     {
791       /* No interpretations had anything useful to propagate, so
792 	 produce X = (Y + 0) * c.  */
793       base = base_in;
794       index = double_int_zero;
795       stride = stride_in;
796       ctype = TREE_TYPE (base_in);
797     }
798 
799   c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
800 				 ctype, savings);
801   return c;
802 }
803 
804 /* Given GS which is a multiply of scalar integers, make an appropriate
805    entry in the candidate table.  If this is a multiply of two SSA names,
806    create two CAND_MULT interpretations and attempt to find a basis for
807    each of them.  Otherwise, create a single CAND_MULT and attempt to
808    find a basis.  */
809 
810 static void
811 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
812 {
813   slsr_cand_t c, c2;
814 
815   /* If this is a multiply of an SSA name with itself, it is highly
816      unlikely that we will get a strength reduction opportunity, so
817      don't record it as a candidate.  This simplifies the logic for
818      finding a basis, so if this is removed that must be considered.  */
819   if (rhs1 == rhs2)
820     return;
821 
822   if (TREE_CODE (rhs2) == SSA_NAME)
823     {
824       /* Record an interpretation of this statement in the candidate table
825 	 assuming RHS1 is the base expression and RHS2 is the stride.  */
826       c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
827 
828       /* Add the first interpretation to the statement-candidate mapping.  */
829       add_cand_for_stmt (gs, c);
830 
831       /* Record another interpretation of this statement assuming RHS1
832 	 is the stride and RHS2 is the base expression.  */
833       c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
834       c->next_interp = c2->cand_num;
835     }
836   else
837     {
838       /* Record an interpretation for the multiply-immediate.  */
839       c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
840 
841       /* Add the interpretation to the statement-candidate mapping.  */
842       add_cand_for_stmt (gs, c);
843     }
844 }
845 
846 /* Create a candidate entry for a statement GS, where GS adds two
847    SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
848    subtracts ADDEND_IN from BASE_IN otherwise.  Propagate any known
849    information about the two SSA names into the new candidate.
850    Return the new candidate.  */
851 
852 static slsr_cand_t
853 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
854 		     bool subtract_p, bool speed)
855 {
856   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
857   double_int index;
858   unsigned savings = 0;
859   slsr_cand_t c;
860   slsr_cand_t base_cand = base_cand_from_table (base_in);
861   slsr_cand_t addend_cand = base_cand_from_table (addend_in);
862 
863   /* The most useful transformation is a multiply-immediate feeding
864      an add or subtract.  Look for that first.  */
865   while (addend_cand && !base)
866     {
867       if (addend_cand->kind == CAND_MULT
868 	  && addend_cand->index.is_zero ()
869 	  && TREE_CODE (addend_cand->stride) == INTEGER_CST)
870 	{
871 	  /* Z = (B + 0) * S, S constant
872 	     X = Y +/- Z
873 	     ===========================
874 	     X = Y + ((+/-1 * S) * B)  */
875 	  base = base_in;
876 	  index = tree_to_double_int (addend_cand->stride);
877 	  if (subtract_p)
878 	    index = -index;
879 	  stride = addend_cand->base_expr;
880 	  ctype = TREE_TYPE (base_in);
881 	  if (has_single_use (addend_in))
882 	    savings = (addend_cand->dead_savings
883 		       + stmt_cost (addend_cand->cand_stmt, speed));
884 	}
885 
886       if (addend_cand->next_interp)
887 	addend_cand = lookup_cand (addend_cand->next_interp);
888       else
889 	addend_cand = NULL;
890     }
891 
892   while (base_cand && !base)
893     {
894       if (base_cand->kind == CAND_ADD
895 	  && (base_cand->index.is_zero ()
896 	      || operand_equal_p (base_cand->stride,
897 				  integer_zero_node, 0)))
898 	{
899 	  /* Y = B + (i' * S), i' * S = 0
900 	     X = Y +/- Z
901 	     ============================
902 	     X = B + (+/-1 * Z)  */
903 	  base = base_cand->base_expr;
904 	  index = subtract_p ? double_int_minus_one : double_int_one;
905 	  stride = addend_in;
906 	  ctype = base_cand->cand_type;
907 	  if (has_single_use (base_in))
908 	    savings = (base_cand->dead_savings
909 		       + stmt_cost (base_cand->cand_stmt, speed));
910 	}
911       else if (subtract_p)
912 	{
913 	  slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
914 
915 	  while (subtrahend_cand && !base)
916 	    {
917 	      if (subtrahend_cand->kind == CAND_MULT
918 		  && subtrahend_cand->index.is_zero ()
919 		  && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
920 		{
921 		  /* Z = (B + 0) * S, S constant
922 		     X = Y - Z
923 		     ===========================
924 		     Value:  X = Y + ((-1 * S) * B)  */
925 		  base = base_in;
926 		  index = tree_to_double_int (subtrahend_cand->stride);
927 		  index = -index;
928 		  stride = subtrahend_cand->base_expr;
929 		  ctype = TREE_TYPE (base_in);
930 		  if (has_single_use (addend_in))
931 		    savings = (subtrahend_cand->dead_savings
932 			       + stmt_cost (subtrahend_cand->cand_stmt, speed));
933 		}
934 
935 	      if (subtrahend_cand->next_interp)
936 		subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
937 	      else
938 		subtrahend_cand = NULL;
939 	    }
940 	}
941 
942       if (base_cand->next_interp)
943 	base_cand = lookup_cand (base_cand->next_interp);
944       else
945 	base_cand = NULL;
946     }
947 
948   if (!base)
949     {
950       /* No interpretations had anything useful to propagate, so
951 	 produce X = Y + (1 * Z).  */
952       base = base_in;
953       index = subtract_p ? double_int_minus_one : double_int_one;
954       stride = addend_in;
955       ctype = TREE_TYPE (base_in);
956     }
957 
958   c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
959 				 ctype, savings);
960   return c;
961 }
962 
963 /* Create a candidate entry for a statement GS, where GS adds SSA
964    name BASE_IN to constant INDEX_IN.  Propagate any known information
965    about BASE_IN into the new candidate.  Return the new candidate.  */
966 
967 static slsr_cand_t
968 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
969 {
970   enum cand_kind kind = CAND_ADD;
971   tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
972   double_int index, multiple;
973   unsigned savings = 0;
974   slsr_cand_t c;
975   slsr_cand_t base_cand = base_cand_from_table (base_in);
976 
977   while (base_cand && !base)
978     {
979       bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
980 
981       if (TREE_CODE (base_cand->stride) == INTEGER_CST
982 	  && index_in.multiple_of (tree_to_double_int (base_cand->stride),
983 				   unsigned_p, &multiple))
984 	{
985 	  /* Y = (B + i') * S, S constant, c = kS for some integer k
986 	     X = Y + c
987 	     ============================
988 	     X = (B + (i'+ k)) * S
989 	  OR
990 	     Y = B + (i' * S), S constant, c = kS for some integer k
991 	     X = Y + c
992 	     ============================
993 	     X = (B + (i'+ k)) * S  */
994 	  kind = base_cand->kind;
995 	  base = base_cand->base_expr;
996 	  index = base_cand->index + multiple;
997 	  stride = base_cand->stride;
998 	  ctype = base_cand->cand_type;
999 	  if (has_single_use (base_in))
1000 	    savings = (base_cand->dead_savings
1001 		       + stmt_cost (base_cand->cand_stmt, speed));
1002 	}
1003 
1004       if (base_cand->next_interp)
1005 	base_cand = lookup_cand (base_cand->next_interp);
1006       else
1007 	base_cand = NULL;
1008     }
1009 
1010   if (!base)
1011     {
1012       /* No interpretations had anything useful to propagate, so
1013 	 produce X = Y + (c * 1).  */
1014       kind = CAND_ADD;
1015       base = base_in;
1016       index = index_in;
1017       stride = integer_one_node;
1018       ctype = TREE_TYPE (base_in);
1019     }
1020 
1021   c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1022 				 ctype, savings);
1023   return c;
1024 }
1025 
1026 /* Given GS which is an add or subtract of scalar integers or pointers,
1027    make at least one appropriate entry in the candidate table.  */
1028 
1029 static void
1030 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1031 {
1032   bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1033   slsr_cand_t c = NULL, c2;
1034 
1035   if (TREE_CODE (rhs2) == SSA_NAME)
1036     {
1037       /* First record an interpretation assuming RHS1 is the base expression
1038 	 and RHS2 is the stride.  But it doesn't make sense for the
1039 	 stride to be a pointer, so don't record a candidate in that case.  */
1040       if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1041 	{
1042 	  c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1043 
1044 	  /* Add the first interpretation to the statement-candidate
1045 	     mapping.  */
1046 	  add_cand_for_stmt (gs, c);
1047 	}
1048 
1049       /* If the two RHS operands are identical, or this is a subtract,
1050 	 we're done.  */
1051       if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1052 	return;
1053 
1054       /* Otherwise, record another interpretation assuming RHS2 is the
1055 	 base expression and RHS1 is the stride, again provided that the
1056 	 stride is not a pointer.  */
1057       if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1058 	{
1059 	  c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1060 	  if (c)
1061 	    c->next_interp = c2->cand_num;
1062 	  else
1063 	    add_cand_for_stmt (gs, c2);
1064 	}
1065     }
1066   else
1067     {
1068       double_int index;
1069 
1070       /* Record an interpretation for the add-immediate.  */
1071       index = tree_to_double_int (rhs2);
1072       if (subtract_p)
1073 	index = -index;
1074 
1075       c = create_add_imm_cand (gs, rhs1, index, speed);
1076 
1077       /* Add the interpretation to the statement-candidate mapping.  */
1078       add_cand_for_stmt (gs, c);
1079     }
1080 }
1081 
1082 /* Given GS which is a negate of a scalar integer, make an appropriate
1083    entry in the candidate table.  A negate is equivalent to a multiply
1084    by -1.  */
1085 
1086 static void
1087 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1088 {
1089   /* Record a CAND_MULT interpretation for the multiply by -1.  */
1090   slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1091 
1092   /* Add the interpretation to the statement-candidate mapping.  */
1093   add_cand_for_stmt (gs, c);
1094 }
1095 
1096 /* Help function for legal_cast_p, operating on two trees.  Checks
1097    whether it's allowable to cast from RHS to LHS.  See legal_cast_p
1098    for more details.  */
1099 
1100 static bool
1101 legal_cast_p_1 (tree lhs, tree rhs)
1102 {
1103   tree lhs_type, rhs_type;
1104   unsigned lhs_size, rhs_size;
1105   bool lhs_wraps, rhs_wraps;
1106 
1107   lhs_type = TREE_TYPE (lhs);
1108   rhs_type = TREE_TYPE (rhs);
1109   lhs_size = TYPE_PRECISION (lhs_type);
1110   rhs_size = TYPE_PRECISION (rhs_type);
1111   lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1112   rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1113 
1114   if (lhs_size < rhs_size
1115       || (rhs_wraps && !lhs_wraps)
1116       || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1117     return false;
1118 
1119   return true;
1120 }
1121 
1122 /* Return TRUE if GS is a statement that defines an SSA name from
1123    a conversion and is legal for us to combine with an add and multiply
1124    in the candidate table.  For example, suppose we have:
1125 
1126      A = B + i;
1127      C = (type) A;
1128      D = C * S;
1129 
1130    Without the type-cast, we would create a CAND_MULT for D with base B,
1131    index i, and stride S.  We want to record this candidate only if it
1132    is equivalent to apply the type cast following the multiply:
1133 
1134      A = B + i;
1135      E = A * S;
1136      D = (type) E;
1137 
1138    We will record the type with the candidate for D.  This allows us
1139    to use a similar previous candidate as a basis.  If we have earlier seen
1140 
1141      A' = B + i';
1142      C' = (type) A';
1143      D' = C' * S;
1144 
1145    we can replace D with
1146 
1147      D = D' + (i - i') * S;
1148 
1149    But if moving the type-cast would change semantics, we mustn't do this.
1150 
1151    This is legitimate for casts from a non-wrapping integral type to
1152    any integral type of the same or larger size.  It is not legitimate
1153    to convert a wrapping type to a non-wrapping type, or to a wrapping
1154    type of a different size.  I.e., with a wrapping type, we must
1155    assume that the addition B + i could wrap, in which case performing
1156    the multiply before or after one of the "illegal" type casts will
1157    have different semantics.  */
1158 
1159 static bool
1160 legal_cast_p (gimple gs, tree rhs)
1161 {
1162   if (!is_gimple_assign (gs)
1163       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1164     return false;
1165 
1166   return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1167 }
1168 
1169 /* Given GS which is a cast to a scalar integer type, determine whether
1170    the cast is legal for strength reduction.  If so, make at least one
1171    appropriate entry in the candidate table.  */
1172 
1173 static void
1174 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1175 {
1176   tree lhs, ctype;
1177   slsr_cand_t base_cand, c, c2;
1178   unsigned savings = 0;
1179 
1180   if (!legal_cast_p (gs, rhs1))
1181     return;
1182 
1183   lhs = gimple_assign_lhs (gs);
1184   base_cand = base_cand_from_table (rhs1);
1185   ctype = TREE_TYPE (lhs);
1186 
1187   if (base_cand)
1188     {
1189       while (base_cand)
1190 	{
1191 	  /* Propagate all data from the base candidate except the type,
1192 	     which comes from the cast, and the base candidate's cast,
1193 	     which is no longer applicable.  */
1194 	  if (has_single_use (rhs1))
1195 	    savings = (base_cand->dead_savings
1196 		       + stmt_cost (base_cand->cand_stmt, speed));
1197 
1198 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1199 					 base_cand->base_expr,
1200 					 base_cand->index, base_cand->stride,
1201 					 ctype, savings);
1202 	  if (base_cand->next_interp)
1203 	    base_cand = lookup_cand (base_cand->next_interp);
1204 	  else
1205 	    base_cand = NULL;
1206 	}
1207     }
1208   else
1209     {
1210       /* If nothing is known about the RHS, create fresh CAND_ADD and
1211 	 CAND_MULT interpretations:
1212 
1213 	 X = Y + (0 * 1)
1214 	 X = (Y + 0) * 1
1215 
1216 	 The first of these is somewhat arbitrary, but the choice of
1217 	 1 for the stride simplifies the logic for propagating casts
1218 	 into their uses.  */
1219       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1220 				     integer_one_node, ctype, 0);
1221       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1222 				      integer_one_node, ctype, 0);
1223       c->next_interp = c2->cand_num;
1224     }
1225 
1226   /* Add the first (or only) interpretation to the statement-candidate
1227      mapping.  */
1228   add_cand_for_stmt (gs, c);
1229 }
1230 
1231 /* Given GS which is a copy of a scalar integer type, make at least one
1232    appropriate entry in the candidate table.
1233 
1234    This interface is included for completeness, but is unnecessary
1235    if this pass immediately follows a pass that performs copy
1236    propagation, such as DOM.  */
1237 
1238 static void
1239 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1240 {
1241   slsr_cand_t base_cand, c, c2;
1242   unsigned savings = 0;
1243 
1244   base_cand = base_cand_from_table (rhs1);
1245 
1246   if (base_cand)
1247     {
1248       while (base_cand)
1249 	{
1250 	  /* Propagate all data from the base candidate.  */
1251 	  if (has_single_use (rhs1))
1252 	    savings = (base_cand->dead_savings
1253 		       + stmt_cost (base_cand->cand_stmt, speed));
1254 
1255 	  c = alloc_cand_and_find_basis (base_cand->kind, gs,
1256 					 base_cand->base_expr,
1257 					 base_cand->index, base_cand->stride,
1258 					 base_cand->cand_type, savings);
1259 	  if (base_cand->next_interp)
1260 	    base_cand = lookup_cand (base_cand->next_interp);
1261 	  else
1262 	    base_cand = NULL;
1263 	}
1264     }
1265   else
1266     {
1267       /* If nothing is known about the RHS, create fresh CAND_ADD and
1268 	 CAND_MULT interpretations:
1269 
1270 	 X = Y + (0 * 1)
1271 	 X = (Y + 0) * 1
1272 
1273 	 The first of these is somewhat arbitrary, but the choice of
1274 	 1 for the stride simplifies the logic for propagating casts
1275 	 into their uses.  */
1276       c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1277 				     integer_one_node, TREE_TYPE (rhs1), 0);
1278       c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1279 				      integer_one_node, TREE_TYPE (rhs1), 0);
1280       c->next_interp = c2->cand_num;
1281     }
1282 
1283   /* Add the first (or only) interpretation to the statement-candidate
1284      mapping.  */
1285   add_cand_for_stmt (gs, c);
1286 }
1287 
1288 /* Find strength-reduction candidates in block BB.  */
1289 
1290 static void
1291 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1292 			  basic_block bb)
1293 {
1294   bool speed = optimize_bb_for_speed_p (bb);
1295   gimple_stmt_iterator gsi;
1296 
1297   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1298     {
1299       gimple gs = gsi_stmt (gsi);
1300 
1301       if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1302 	slsr_process_ref (gs);
1303 
1304       else if (is_gimple_assign (gs)
1305 	       && SCALAR_INT_MODE_P
1306 	            (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1307 	{
1308 	  tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1309 
1310 	  switch (gimple_assign_rhs_code (gs))
1311 	    {
1312 	    case MULT_EXPR:
1313 	    case PLUS_EXPR:
1314 	      rhs1 = gimple_assign_rhs1 (gs);
1315 	      rhs2 = gimple_assign_rhs2 (gs);
1316 	      /* Should never happen, but currently some buggy situations
1317 		 in earlier phases put constants in rhs1.  */
1318 	      if (TREE_CODE (rhs1) != SSA_NAME)
1319 		continue;
1320 	      break;
1321 
1322 	    /* Possible future opportunity: rhs1 of a ptr+ can be
1323 	       an ADDR_EXPR.  */
1324 	    case POINTER_PLUS_EXPR:
1325 	    case MINUS_EXPR:
1326 	      rhs2 = gimple_assign_rhs2 (gs);
1327 	      /* Fall-through.  */
1328 
1329 	    case NOP_EXPR:
1330 	    case MODIFY_EXPR:
1331 	    case NEGATE_EXPR:
1332 	      rhs1 = gimple_assign_rhs1 (gs);
1333 	      if (TREE_CODE (rhs1) != SSA_NAME)
1334 		continue;
1335 	      break;
1336 
1337 	    default:
1338 	      ;
1339 	    }
1340 
1341 	  switch (gimple_assign_rhs_code (gs))
1342 	    {
1343 	    case MULT_EXPR:
1344 	      slsr_process_mul (gs, rhs1, rhs2, speed);
1345 	      break;
1346 
1347 	    case PLUS_EXPR:
1348 	    case POINTER_PLUS_EXPR:
1349 	    case MINUS_EXPR:
1350 	      slsr_process_add (gs, rhs1, rhs2, speed);
1351 	      break;
1352 
1353 	    case NEGATE_EXPR:
1354 	      slsr_process_neg (gs, rhs1, speed);
1355 	      break;
1356 
1357 	    case NOP_EXPR:
1358 	      slsr_process_cast (gs, rhs1, speed);
1359 	      break;
1360 
1361 	    case MODIFY_EXPR:
1362 	      slsr_process_copy (gs, rhs1, speed);
1363 	      break;
1364 
1365 	    default:
1366 	      ;
1367 	    }
1368 	}
1369     }
1370 }
1371 
1372 /* Dump a candidate for debug.  */
1373 
1374 static void
1375 dump_candidate (slsr_cand_t c)
1376 {
1377   fprintf (dump_file, "%3d  [%d] ", c->cand_num,
1378 	   gimple_bb (c->cand_stmt)->index);
1379   print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1380   switch (c->kind)
1381     {
1382     case CAND_MULT:
1383       fputs ("     MULT : (", dump_file);
1384       print_generic_expr (dump_file, c->base_expr, 0);
1385       fputs (" + ", dump_file);
1386       dump_double_int (dump_file, c->index, false);
1387       fputs (") * ", dump_file);
1388       print_generic_expr (dump_file, c->stride, 0);
1389       fputs (" : ", dump_file);
1390       break;
1391     case CAND_ADD:
1392       fputs ("     ADD  : ", dump_file);
1393       print_generic_expr (dump_file, c->base_expr, 0);
1394       fputs (" + (", dump_file);
1395       dump_double_int (dump_file, c->index, false);
1396       fputs (" * ", dump_file);
1397       print_generic_expr (dump_file, c->stride, 0);
1398       fputs (") : ", dump_file);
1399       break;
1400     case CAND_REF:
1401       fputs ("     REF  : ", dump_file);
1402       print_generic_expr (dump_file, c->base_expr, 0);
1403       fputs (" + (", dump_file);
1404       print_generic_expr (dump_file, c->stride, 0);
1405       fputs (") + ", dump_file);
1406       dump_double_int (dump_file, c->index, false);
1407       fputs (" : ", dump_file);
1408       break;
1409     default:
1410       gcc_unreachable ();
1411     }
1412   print_generic_expr (dump_file, c->cand_type, 0);
1413   fprintf (dump_file, "\n     basis: %d  dependent: %d  sibling: %d\n",
1414 	   c->basis, c->dependent, c->sibling);
1415   fprintf (dump_file, "     next-interp: %d  dead-savings: %d\n",
1416 	   c->next_interp, c->dead_savings);
1417   if (c->def_phi)
1418     {
1419       fputs ("     phi:  ", dump_file);
1420       print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1421     }
1422   fputs ("\n", dump_file);
1423 }
1424 
1425 /* Dump the candidate vector for debug.  */
1426 
1427 static void
1428 dump_cand_vec (void)
1429 {
1430   unsigned i;
1431   slsr_cand_t c;
1432 
1433   fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1434 
1435   FOR_EACH_VEC_ELT (cand_vec, i, c)
1436     dump_candidate (c);
1437 }
1438 
1439 /* Callback used to dump the candidate chains hash table.  */
1440 
1441 static int
1442 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1443 {
1444   const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1445   cand_chain_t p;
1446 
1447   print_generic_expr (dump_file, chain->base_expr, 0);
1448   fprintf (dump_file, " -> %d", chain->cand->cand_num);
1449 
1450   for (p = chain->next; p; p = p->next)
1451     fprintf (dump_file, " -> %d", p->cand->cand_num);
1452 
1453   fputs ("\n", dump_file);
1454   return 1;
1455 }
1456 
1457 /* Dump the candidate chains.  */
1458 
1459 static void
1460 dump_cand_chains (void)
1461 {
1462   fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1463   htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1464   fputs ("\n", dump_file);
1465 }
1466 
1467 /* Dump the increment vector for debug.  */
1468 
1469 static void
1470 dump_incr_vec (void)
1471 {
1472   if (dump_file && (dump_flags & TDF_DETAILS))
1473     {
1474       unsigned i;
1475 
1476       fprintf (dump_file, "\nIncrement vector:\n\n");
1477 
1478       for (i = 0; i < incr_vec_len; i++)
1479 	{
1480 	  fprintf (dump_file, "%3d  increment:   ", i);
1481 	  dump_double_int (dump_file, incr_vec[i].incr, false);
1482 	  fprintf (dump_file, "\n     count:       %d", incr_vec[i].count);
1483 	  fprintf (dump_file, "\n     cost:        %d", incr_vec[i].cost);
1484 	  fputs ("\n     initializer: ", dump_file);
1485 	  print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1486 	  fputs ("\n\n", dump_file);
1487 	}
1488     }
1489 }
1490 
1491 /* Recursive helper for unconditional_cands_with_known_stride_p.
1492    Returns TRUE iff C, its siblings, and its dependents are all
1493    unconditional candidates.  */
1494 
1495 static bool
1496 unconditional_cands (slsr_cand_t c)
1497 {
1498   if (c->def_phi)
1499     return false;
1500 
1501   if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1502     return false;
1503 
1504   if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1505     return false;
1506 
1507   return true;
1508 }
1509 
1510 /* Determine whether or not the tree of candidates rooted at
1511    ROOT consists entirely of unconditional increments with
1512    an INTEGER_CST stride.  */
1513 
1514 static bool
1515 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1516 {
1517   /* The stride is identical for all related candidates, so
1518      check it once.  */
1519   if (TREE_CODE (root->stride) != INTEGER_CST)
1520     return false;
1521 
1522   return unconditional_cands (lookup_cand (root->dependent));
1523 }
1524 
1525 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1526    data reference.  */
1527 
1528 static void
1529 replace_ref (tree *expr, slsr_cand_t c)
1530 {
1531   tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1532   unsigned HOST_WIDE_INT misalign;
1533   unsigned align;
1534 
1535   /* Ensure the memory reference carries the minimum alignment
1536      requirement for the data type.  See PR58041.  */
1537   get_object_alignment_1 (*expr, &align, &misalign);
1538   if (misalign != 0)
1539     align = (misalign & -misalign);
1540   if (align < TYPE_ALIGN (acc_type))
1541     acc_type = build_aligned_type (acc_type, align);
1542 
1543   add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1544 			  c->base_expr, c->stride);
1545   mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1546 			 double_int_to_tree (c->cand_type, c->index));
1547 
1548   /* Gimplify the base addressing expression for the new MEM_REF tree.  */
1549   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1550   TREE_OPERAND (mem_ref, 0)
1551     = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1552 				/*simple_p=*/true, NULL,
1553 				/*before=*/true, GSI_SAME_STMT);
1554   copy_ref_info (mem_ref, *expr);
1555   *expr = mem_ref;
1556   update_stmt (c->cand_stmt);
1557 }
1558 
1559 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1560    dependent of candidate C with an equivalent strength-reduced data
1561    reference.  */
1562 
1563 static void
1564 replace_refs (slsr_cand_t c)
1565 {
1566   if (gimple_vdef (c->cand_stmt))
1567     {
1568       tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1569       replace_ref (lhs, c);
1570     }
1571   else
1572     {
1573       tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1574       replace_ref (rhs, c);
1575     }
1576 
1577   if (c->sibling)
1578     replace_refs (lookup_cand (c->sibling));
1579 
1580   if (c->dependent)
1581     replace_refs (lookup_cand (c->dependent));
1582 }
1583 
1584 /* Calculate the increment required for candidate C relative to
1585    its basis.  */
1586 
1587 static double_int
1588 cand_increment (slsr_cand_t c)
1589 {
1590   slsr_cand_t basis;
1591 
1592   /* If the candidate doesn't have a basis, just return its own
1593      index.  This is useful in record_increments to help us find
1594      an existing initializer.  */
1595   if (!c->basis)
1596     return c->index;
1597 
1598   basis = lookup_cand (c->basis);
1599   gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1600   return c->index - basis->index;
1601 }
1602 
1603 /* Calculate the increment required for candidate C relative to
1604    its basis.  If we aren't going to generate pointer arithmetic
1605    for this candidate, return the absolute value of that increment
1606    instead.  */
1607 
1608 static inline double_int
1609 cand_abs_increment (slsr_cand_t c)
1610 {
1611   double_int increment = cand_increment (c);
1612 
1613   if (!address_arithmetic_p && increment.is_negative ())
1614     increment = -increment;
1615 
1616   return increment;
1617 }
1618 
1619 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1620    new temporary reg of type TYPE and store it in *VAR.  */
1621 
1622 static inline void
1623 lazy_create_slsr_reg (tree *var, tree type)
1624 {
1625   if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1626     *var = create_tmp_reg (type, "slsr");
1627 }
1628 
1629 /* Return TRUE iff candidate C has already been replaced under
1630    another interpretation.  */
1631 
1632 static inline bool
1633 cand_already_replaced (slsr_cand_t c)
1634 {
1635   return (gimple_bb (c->cand_stmt) == 0);
1636 }
1637 
1638 /* Helper routine for replace_dependents, doing the work for a
1639    single candidate C.  */
1640 
1641 static void
1642 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1643 {
1644   double_int stride = tree_to_double_int (c->stride);
1645   double_int bump = cand_increment (c) * stride;
1646   gimple stmt_to_print = NULL;
1647   slsr_cand_t basis;
1648   tree basis_name, incr_type, bump_tree;
1649   enum tree_code code;
1650 
1651   /* It is highly unlikely, but possible, that the resulting
1652      bump doesn't fit in a HWI.  Abandon the replacement
1653      in this case.  Restriction to signed HWI is conservative
1654      for unsigned types but allows for safe negation without
1655      twisted logic.  */
1656   if (!bump.fits_shwi ())
1657     return;
1658 
1659   basis = lookup_cand (c->basis);
1660   basis_name = gimple_assign_lhs (basis->cand_stmt);
1661   if (cand_code == POINTER_PLUS_EXPR)
1662     {
1663       incr_type = sizetype;
1664       code = cand_code;
1665     }
1666   else
1667     {
1668       incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1669       code = PLUS_EXPR;
1670     }
1671 
1672   if (bump.is_negative ()
1673       && cand_code != POINTER_PLUS_EXPR)
1674     {
1675       code = MINUS_EXPR;
1676       bump = -bump;
1677     }
1678 
1679   bump_tree = double_int_to_tree (incr_type, bump);
1680 
1681   if (dump_file && (dump_flags & TDF_DETAILS))
1682     {
1683       fputs ("Replacing: ", dump_file);
1684       print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1685     }
1686 
1687   if (bump.is_zero ())
1688     {
1689       tree lhs = gimple_assign_lhs (c->cand_stmt);
1690       gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1691       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1692       gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1693       gsi_replace (&gsi, copy_stmt, false);
1694       if (dump_file && (dump_flags & TDF_DETAILS))
1695 	stmt_to_print = copy_stmt;
1696     }
1697   else
1698     {
1699       tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1700       tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1701       if (cand_code != NEGATE_EXPR
1702 	  && ((operand_equal_p (rhs1, basis_name, 0)
1703 	       && operand_equal_p (rhs2, bump_tree, 0))
1704 	      || (operand_equal_p (rhs1, bump_tree, 0)
1705 		  && operand_equal_p (rhs2, basis_name, 0))))
1706 	{
1707 	  if (dump_file && (dump_flags & TDF_DETAILS))
1708 	    {
1709 	      fputs ("(duplicate, not actually replacing)", dump_file);
1710 	      stmt_to_print = c->cand_stmt;
1711 	    }
1712 	}
1713       else
1714 	{
1715 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1716 	  gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1717 	  update_stmt (gsi_stmt (gsi));
1718 	  if (dump_file && (dump_flags & TDF_DETAILS))
1719 	    stmt_to_print = gsi_stmt (gsi);
1720 	}
1721     }
1722 
1723   if (dump_file && (dump_flags & TDF_DETAILS))
1724     {
1725       fputs ("With: ", dump_file);
1726       print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1727       fputs ("\n", dump_file);
1728     }
1729 }
1730 
1731 /* Replace candidate C, each sibling of candidate C, and each
1732    dependent of candidate C with an add or subtract.  Note that we
1733    only operate on CAND_MULTs with known strides, so we will never
1734    generate a POINTER_PLUS_EXPR.  Each candidate X = (B + i) * S is
1735    replaced by X = Y + ((i - i') * S), as described in the module
1736    commentary.  The folded value ((i - i') * S) is referred to here
1737    as the "bump."  */
1738 
1739 static void
1740 replace_dependents (slsr_cand_t c)
1741 {
1742   enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1743 
1744   /* It is not useful to replace casts, copies, or adds of an SSA name
1745      and a constant.  Also skip candidates that have already been
1746      replaced under another interpretation.  */
1747   if (cand_code != MODIFY_EXPR
1748       && cand_code != NOP_EXPR
1749       && c->kind == CAND_MULT
1750       && !cand_already_replaced (c))
1751     replace_dependent (c, cand_code);
1752 
1753   if (c->sibling)
1754     replace_dependents (lookup_cand (c->sibling));
1755 
1756   if (c->dependent)
1757     replace_dependents (lookup_cand (c->dependent));
1758 }
1759 
1760 /* Return the index in the increment vector of the given INCREMENT.  */
1761 
1762 static inline unsigned
1763 incr_vec_index (double_int increment)
1764 {
1765   unsigned i;
1766 
1767   for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1768     ;
1769 
1770   gcc_assert (i < incr_vec_len);
1771   return i;
1772 }
1773 
1774 /* Count the number of candidates in the tree rooted at C that have
1775    not already been replaced under other interpretations.  */
1776 
1777 static unsigned
1778 count_candidates (slsr_cand_t c)
1779 {
1780   unsigned count = cand_already_replaced (c) ? 0 : 1;
1781 
1782   if (c->sibling)
1783     count += count_candidates (lookup_cand (c->sibling));
1784 
1785   if (c->dependent)
1786     count += count_candidates (lookup_cand (c->dependent));
1787 
1788   return count;
1789 }
1790 
1791 /* Increase the count of INCREMENT by one in the increment vector.
1792    INCREMENT is associated with candidate C.  If an initializer
1793    T_0 = stride * I is provided by a candidate that dominates all
1794    candidates with the same increment, also record T_0 for subsequent use.  */
1795 
1796 static void
1797 record_increment (slsr_cand_t c, double_int increment)
1798 {
1799   bool found = false;
1800   unsigned i;
1801 
1802   /* Treat increments that differ only in sign as identical so as to
1803      share initializers, unless we are generating pointer arithmetic.  */
1804   if (!address_arithmetic_p && increment.is_negative ())
1805     increment = -increment;
1806 
1807   for (i = 0; i < incr_vec_len; i++)
1808     {
1809       if (incr_vec[i].incr == increment)
1810 	{
1811 	  incr_vec[i].count++;
1812 	  found = true;
1813 
1814 	  /* If we previously recorded an initializer that doesn't
1815 	     dominate this candidate, it's not going to be useful to
1816 	     us after all.  */
1817 	  if (incr_vec[i].initializer
1818 	      && !dominated_by_p (CDI_DOMINATORS,
1819 				  gimple_bb (c->cand_stmt),
1820 				  incr_vec[i].init_bb))
1821 	    {
1822 	      incr_vec[i].initializer = NULL_TREE;
1823 	      incr_vec[i].init_bb = NULL;
1824 	    }
1825 
1826 	  break;
1827 	}
1828     }
1829 
1830   if (!found)
1831     {
1832       /* The first time we see an increment, create the entry for it.
1833 	 If this is the root candidate which doesn't have a basis, set
1834 	 the count to zero.  We're only processing it so it can possibly
1835 	 provide an initializer for other candidates.  */
1836       incr_vec[incr_vec_len].incr = increment;
1837       incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1838       incr_vec[incr_vec_len].cost = COST_INFINITE;
1839 
1840       /* Optimistically record the first occurrence of this increment
1841 	 as providing an initializer (if it does); we will revise this
1842 	 opinion later if it doesn't dominate all other occurrences.
1843          Exception:  increments of -1, 0, 1 never need initializers.  */
1844       if (c->kind == CAND_ADD
1845 	  && c->index == increment
1846 	  && (increment.sgt (double_int_one)
1847 	      || increment.slt (double_int_minus_one))
1848 	  && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
1849 	      || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
1850 	{
1851 	  tree t0 = NULL_TREE;
1852 	  tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1853 	  tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1854 	  if (operand_equal_p (rhs1, c->base_expr, 0))
1855 	    t0 = rhs2;
1856 	  else if (operand_equal_p (rhs2, c->base_expr, 0))
1857 	    t0 = rhs1;
1858 	  if (t0
1859 	      && SSA_NAME_DEF_STMT (t0)
1860 	      && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1861 	    {
1862 	      incr_vec[incr_vec_len].initializer = t0;
1863 	      incr_vec[incr_vec_len++].init_bb
1864 		= gimple_bb (SSA_NAME_DEF_STMT (t0));
1865 	    }
1866 	  else
1867 	    {
1868 	      incr_vec[incr_vec_len].initializer = NULL_TREE;
1869 	      incr_vec[incr_vec_len++].init_bb = NULL;
1870 	    }
1871 	}
1872       else
1873 	{
1874 	  incr_vec[incr_vec_len].initializer = NULL_TREE;
1875 	  incr_vec[incr_vec_len++].init_bb = NULL;
1876 	}
1877     }
1878 }
1879 
1880 /* Determine how many times each unique increment occurs in the set
1881    of candidates rooted at C's parent, recording the data in the
1882    increment vector.  For each unique increment I, if an initializer
1883    T_0 = stride * I is provided by a candidate that dominates all
1884    candidates with the same increment, also record T_0 for subsequent
1885    use.  */
1886 
1887 static void
1888 record_increments (slsr_cand_t c)
1889 {
1890   if (!cand_already_replaced (c))
1891     record_increment (c, cand_increment (c));
1892 
1893   if (c->sibling)
1894     record_increments (lookup_cand (c->sibling));
1895 
1896   if (c->dependent)
1897     record_increments (lookup_cand (c->dependent));
1898 }
1899 
1900 /* Return the first candidate in the tree rooted at C that has not
1901    already been replaced, favoring siblings over dependents.  */
1902 
1903 static slsr_cand_t
1904 unreplaced_cand_in_tree (slsr_cand_t c)
1905 {
1906   if (!cand_already_replaced (c))
1907     return c;
1908 
1909   if (c->sibling)
1910     {
1911       slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1912       if (sib)
1913 	return sib;
1914     }
1915 
1916   if (c->dependent)
1917     {
1918       slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1919       if (dep)
1920 	return dep;
1921     }
1922 
1923   return NULL;
1924 }
1925 
1926 /* Return TRUE if the candidates in the tree rooted at C should be
1927    optimized for speed, else FALSE.  We estimate this based on the block
1928    containing the most dominant candidate in the tree that has not yet
1929    been replaced.  */
1930 
1931 static bool
1932 optimize_cands_for_speed_p (slsr_cand_t c)
1933 {
1934   slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1935   gcc_assert (c2);
1936   return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1937 }
1938 
1939 /* Add COST_IN to the lowest cost of any dependent path starting at
1940    candidate C or any of its siblings, counting only candidates along
1941    such paths with increment INCR.  Assume that replacing a candidate
1942    reduces cost by REPL_SAVINGS.  Also account for savings from any
1943    statements that would go dead.  */
1944 
1945 static int
1946 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1947 {
1948   int local_cost, sib_cost;
1949   double_int cand_incr = cand_abs_increment (c);
1950 
1951   if (cand_already_replaced (c))
1952     local_cost = cost_in;
1953   else if (incr == cand_incr)
1954     local_cost = cost_in - repl_savings - c->dead_savings;
1955   else
1956     local_cost = cost_in - c->dead_savings;
1957 
1958   if (c->dependent)
1959     local_cost = lowest_cost_path (local_cost, repl_savings,
1960 				   lookup_cand (c->dependent), incr);
1961 
1962   if (c->sibling)
1963     {
1964       sib_cost = lowest_cost_path (cost_in, repl_savings,
1965 				   lookup_cand (c->sibling), incr);
1966       local_cost = MIN (local_cost, sib_cost);
1967     }
1968 
1969   return local_cost;
1970 }
1971 
1972 /* Compute the total savings that would accrue from all replacements
1973    in the candidate tree rooted at C, counting only candidates with
1974    increment INCR.  Assume that replacing a candidate reduces cost
1975    by REPL_SAVINGS.  Also account for savings from statements that
1976    would go dead.  */
1977 
1978 static int
1979 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1980 {
1981   int savings = 0;
1982   double_int cand_incr = cand_abs_increment (c);
1983 
1984   if (incr == cand_incr && !cand_already_replaced (c))
1985     savings += repl_savings + c->dead_savings;
1986 
1987   if (c->dependent)
1988     savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1989 
1990   if (c->sibling)
1991     savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1992 
1993   return savings;
1994 }
1995 
1996 /* Use target-specific costs to determine and record which increments
1997    in the current candidate tree are profitable to replace, assuming
1998    MODE and SPEED.  FIRST_DEP is the first dependent of the root of
1999    the candidate tree.
2000 
2001    One slight limitation here is that we don't account for the possible
2002    introduction of casts in some cases.  See replace_one_candidate for
2003    the cases where these are introduced.  This should probably be cleaned
2004    up sometime.  */
2005 
2006 static void
2007 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2008 {
2009   unsigned i;
2010 
2011   for (i = 0; i < incr_vec_len; i++)
2012     {
2013       HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2014 
2015       /* If somehow this increment is bigger than a HWI, we won't
2016 	 be optimizing candidates that use it.  And if the increment
2017 	 has a count of zero, nothing will be done with it.  */
2018       if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2019 	incr_vec[i].cost = COST_INFINITE;
2020 
2021       /* Increments of 0, 1, and -1 are always profitable to replace,
2022 	 because they always replace a multiply or add with an add or
2023 	 copy, and may cause one or more existing instructions to go
2024 	 dead.  Exception:  -1 can't be assumed to be profitable for
2025 	 pointer addition.  */
2026       else if (incr == 0
2027 	       || incr == 1
2028 	       || (incr == -1
2029 		   && (gimple_assign_rhs_code (first_dep->cand_stmt)
2030 		       != POINTER_PLUS_EXPR)))
2031 	incr_vec[i].cost = COST_NEUTRAL;
2032 
2033       /* FORNOW: If we need to add an initializer, give up if a cast from
2034 	 the candidate's type to its stride's type can lose precision.
2035 	 This could eventually be handled better by expressly retaining the
2036 	 result of a cast to a wider type in the stride.  Example:
2037 
2038            short int _1;
2039 	   _2 = (int) _1;
2040 	   _3 = _2 * 10;
2041 	   _4 = x + _3;    ADD: x + (10 * _1) : int
2042 	   _5 = _2 * 15;
2043 	   _6 = x + _3;    ADD: x + (15 * _1) : int
2044 
2045          Right now replacing _6 would cause insertion of an initializer
2046 	 of the form "short int T = _1 * 5;" followed by a cast to
2047 	 int, which could overflow incorrectly.  Had we recorded _2 or
2048 	 (int)_1 as the stride, this wouldn't happen.  However, doing
2049          this breaks other opportunities, so this will require some
2050 	 care.  */
2051       else if (!incr_vec[i].initializer
2052 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
2053 	       && !legal_cast_p_1 (first_dep->stride,
2054 				   gimple_assign_lhs (first_dep->cand_stmt)))
2055 
2056 	incr_vec[i].cost = COST_INFINITE;
2057 
2058       /* If we need to add an initializer, make sure we don't introduce
2059 	 a multiply by a pointer type, which can happen in certain cast
2060 	 scenarios.  FIXME: When cleaning up these cast issues, we can
2061          afford to introduce the multiply provided we cast out to an
2062          unsigned int of appropriate size.  */
2063       else if (!incr_vec[i].initializer
2064 	       && TREE_CODE (first_dep->stride) != INTEGER_CST
2065 	       && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2066 
2067 	incr_vec[i].cost = COST_INFINITE;
2068 
2069       /* For any other increment, if this is a multiply candidate, we
2070 	 must introduce a temporary T and initialize it with
2071 	 T_0 = stride * increment.  When optimizing for speed, walk the
2072 	 candidate tree to calculate the best cost reduction along any
2073 	 path; if it offsets the fixed cost of inserting the initializer,
2074 	 replacing the increment is profitable.  When optimizing for
2075          size, instead calculate the total cost reduction from replacing
2076 	 all candidates with this increment.  */
2077       else if (first_dep->kind == CAND_MULT)
2078 	{
2079 	  int cost = mult_by_coeff_cost (incr, mode, speed);
2080 	  int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2081 	  if (speed)
2082 	    cost = lowest_cost_path (cost, repl_savings, first_dep,
2083 				     incr_vec[i].incr);
2084 	  else
2085 	    cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2086 
2087 	  incr_vec[i].cost = cost;
2088 	}
2089 
2090       /* If this is an add candidate, the initializer may already
2091 	 exist, so only calculate the cost of the initializer if it
2092 	 doesn't.  We are replacing one add with another here, so the
2093 	 known replacement savings is zero.  We will account for removal
2094 	 of dead instructions in lowest_cost_path or total_savings.  */
2095       else
2096 	{
2097 	  int cost = 0;
2098 	  if (!incr_vec[i].initializer)
2099 	    cost = mult_by_coeff_cost (incr, mode, speed);
2100 
2101 	  if (speed)
2102 	    cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2103 	  else
2104 	    cost -= total_savings (0, first_dep, incr_vec[i].incr);
2105 
2106 	  incr_vec[i].cost = cost;
2107 	}
2108     }
2109 }
2110 
2111 /* Return the nearest common dominator of BB1 and BB2.  If the blocks
2112    are identical, return the earlier of C1 and C2 in *WHERE.  Otherwise,
2113    if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2114    return C2 in *WHERE; and if the NCD matches neither, return NULL in
2115    *WHERE.  Note: It is possible for one of C1 and C2 to be NULL.  */
2116 
2117 static basic_block
2118 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2119 		   slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2120 {
2121   basic_block ncd;
2122 
2123   if (!bb1)
2124     {
2125       *where = c2;
2126       return bb2;
2127     }
2128 
2129   if (!bb2)
2130     {
2131       *where = c1;
2132       return bb1;
2133     }
2134 
2135   ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2136 
2137   /* If both candidates are in the same block, the earlier
2138      candidate wins.  */
2139   if (bb1 == ncd && bb2 == ncd)
2140     {
2141       if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2142 	*where = c2;
2143       else
2144 	*where = c1;
2145     }
2146 
2147   /* Otherwise, if one of them produced a candidate in the
2148      dominator, that one wins.  */
2149   else if (bb1 == ncd)
2150     *where = c1;
2151 
2152   else if (bb2 == ncd)
2153     *where = c2;
2154 
2155   /* If neither matches the dominator, neither wins.  */
2156   else
2157     *where = NULL;
2158 
2159   return ncd;
2160 }
2161 
2162 /* Consider all candidates in the tree rooted at C for which INCR
2163    represents the required increment of C relative to its basis.
2164    Find and return the basic block that most nearly dominates all
2165    such candidates.  If the returned block contains one or more of
2166    the candidates, return the earliest candidate in the block in
2167    *WHERE.  */
2168 
2169 static basic_block
2170 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2171 				    slsr_cand_t *where)
2172 {
2173   basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2174   slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2175   double_int cand_incr;
2176 
2177   /* First find the NCD of all siblings and dependents.  */
2178   if (c->sibling)
2179     sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2180 						  incr, &sib_where);
2181   if (c->dependent)
2182     dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2183 						  incr, &dep_where);
2184   if (!sib_ncd && !dep_ncd)
2185     {
2186       new_where = NULL;
2187       ncd = NULL;
2188     }
2189   else if (sib_ncd && !dep_ncd)
2190     {
2191       new_where = sib_where;
2192       ncd = sib_ncd;
2193     }
2194   else if (dep_ncd && !sib_ncd)
2195     {
2196       new_where = dep_where;
2197       ncd = dep_ncd;
2198     }
2199   else
2200     ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2201 			     dep_where, &new_where);
2202 
2203   /* If the candidate's increment doesn't match the one we're interested
2204      in, then the result depends only on siblings and dependents.  */
2205   cand_incr = cand_abs_increment (c);
2206 
2207   if (cand_incr != incr || cand_already_replaced (c))
2208     {
2209       *where = new_where;
2210       return ncd;
2211     }
2212 
2213   /* Otherwise, compare this candidate with the result from all siblings
2214      and dependents.  */
2215   this_where = c;
2216   this_ncd = gimple_bb (c->cand_stmt);
2217   ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2218 
2219   return ncd;
2220 }
2221 
2222 /* Return TRUE if the increment indexed by INDEX is profitable to replace.  */
2223 
2224 static inline bool
2225 profitable_increment_p (unsigned index)
2226 {
2227   return (incr_vec[index].cost <= COST_NEUTRAL);
2228 }
2229 
2230 /* For each profitable increment in the increment vector not equal to
2231    0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2232    dominator of all statements in the candidate chain rooted at C
2233    that require that increment, and insert an initializer
2234    T_0 = stride * increment at that location.  Record T_0 with the
2235    increment record.  */
2236 
2237 static void
2238 insert_initializers (slsr_cand_t c)
2239 {
2240   unsigned i;
2241   tree new_var = NULL_TREE;
2242 
2243   for (i = 0; i < incr_vec_len; i++)
2244     {
2245       basic_block bb;
2246       slsr_cand_t where = NULL;
2247       gimple init_stmt;
2248       tree stride_type, new_name, incr_tree;
2249       double_int incr = incr_vec[i].incr;
2250 
2251       if (!profitable_increment_p (i)
2252 	  || incr.is_one ()
2253 	  || (incr.is_minus_one ()
2254 	      && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2255 	  || incr.is_zero ())
2256 	continue;
2257 
2258       /* We may have already identified an existing initializer that
2259 	 will suffice.  */
2260       if (incr_vec[i].initializer)
2261 	{
2262 	  if (dump_file && (dump_flags & TDF_DETAILS))
2263 	    {
2264 	      fputs ("Using existing initializer: ", dump_file);
2265 	      print_gimple_stmt (dump_file,
2266 				 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2267 				 0, 0);
2268 	    }
2269 	  continue;
2270 	}
2271 
2272       /* Find the block that most closely dominates all candidates
2273 	 with this increment.  If there is at least one candidate in
2274 	 that block, the earliest one will be returned in WHERE.  */
2275       bb = nearest_common_dominator_for_cands (c, incr, &where);
2276 
2277       /* Create a new SSA name to hold the initializer's value.  */
2278       stride_type = TREE_TYPE (c->stride);
2279       lazy_create_slsr_reg (&new_var, stride_type);
2280       new_name = make_ssa_name (new_var, NULL);
2281       incr_vec[i].initializer = new_name;
2282 
2283       /* Create the initializer and insert it in the latest possible
2284 	 dominating position.  */
2285       incr_tree = double_int_to_tree (stride_type, incr);
2286       init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2287 						c->stride, incr_tree);
2288       if (where)
2289 	{
2290 	  gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2291 	  gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2292 	  gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2293 	}
2294       else
2295 	{
2296 	  gimple_stmt_iterator gsi = gsi_last_bb (bb);
2297 	  gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2298 
2299 	  if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2300 	    gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2301 	  else
2302 	    gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2303 
2304 	  gimple_set_location (init_stmt, gimple_location (basis_stmt));
2305 	}
2306 
2307       if (dump_file && (dump_flags & TDF_DETAILS))
2308 	{
2309 	  fputs ("Inserting initializer: ", dump_file);
2310 	  print_gimple_stmt (dump_file, init_stmt, 0, 0);
2311 	}
2312     }
2313 }
2314 
2315 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2316    type TO_TYPE, and insert it in front of the statement represented
2317    by candidate C.  Use *NEW_VAR to create the new SSA name.  Return
2318    the new SSA name.  */
2319 
2320 static tree
2321 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2322 			    tree from_expr, tree *new_var)
2323 {
2324   tree cast_lhs;
2325   gimple cast_stmt;
2326   gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2327 
2328   lazy_create_slsr_reg (new_var, to_type);
2329   cast_lhs = make_ssa_name (*new_var, NULL);
2330   cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2331 					    from_expr, NULL_TREE);
2332   gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2333   gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2334 
2335   if (dump_file && (dump_flags & TDF_DETAILS))
2336     {
2337       fputs ("  Inserting: ", dump_file);
2338       print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2339     }
2340 
2341   return cast_lhs;
2342 }
2343 
2344 /* Replace the RHS of the statement represented by candidate C with
2345    NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2346    leave C unchanged or just interchange its operands.  The original
2347    operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2348    If the replacement was made and we are doing a details dump,
2349    return the revised statement, else NULL.  */
2350 
2351 static gimple
2352 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2353 			enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2354 			slsr_cand_t c)
2355 {
2356   if (new_code != old_code
2357       || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2358 	   || !operand_equal_p (new_rhs2, old_rhs2, 0))
2359 	  && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2360 	      || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2361     {
2362       gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2363       gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2364       update_stmt (gsi_stmt (gsi));
2365 
2366       if (dump_file && (dump_flags & TDF_DETAILS))
2367 	return gsi_stmt (gsi);
2368     }
2369 
2370   else if (dump_file && (dump_flags & TDF_DETAILS))
2371     fputs ("  (duplicate, not actually replacing)\n", dump_file);
2372 
2373   return NULL;
2374 }
2375 
2376 /* Strength-reduce the statement represented by candidate C by replacing
2377    it with an equivalent addition or subtraction.  I is the index into
2378    the increment vector identifying C's increment.  NEW_VAR is used to
2379    create a new SSA name if a cast needs to be introduced.  BASIS_NAME
2380    is the rhs1 to use in creating the add/subtract.  */
2381 
2382 static void
2383 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2384 		       tree basis_name)
2385 {
2386   gimple stmt_to_print = NULL;
2387   tree orig_rhs1, orig_rhs2;
2388   tree rhs2;
2389   enum tree_code orig_code, repl_code;
2390   double_int cand_incr;
2391 
2392   orig_code = gimple_assign_rhs_code (c->cand_stmt);
2393   orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2394   orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2395   cand_incr = cand_increment (c);
2396 
2397   if (dump_file && (dump_flags & TDF_DETAILS))
2398     {
2399       fputs ("Replacing: ", dump_file);
2400       print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2401       stmt_to_print = c->cand_stmt;
2402     }
2403 
2404   if (address_arithmetic_p)
2405     repl_code = POINTER_PLUS_EXPR;
2406   else
2407     repl_code = PLUS_EXPR;
2408 
2409   /* If the increment has an initializer T_0, replace the candidate
2410      statement with an add of the basis name and the initializer.  */
2411   if (incr_vec[i].initializer)
2412     {
2413       tree init_type = TREE_TYPE (incr_vec[i].initializer);
2414       tree orig_type = TREE_TYPE (orig_rhs2);
2415 
2416       if (types_compatible_p (orig_type, init_type))
2417 	rhs2 = incr_vec[i].initializer;
2418       else
2419 	rhs2 = introduce_cast_before_cand (c, orig_type,
2420 					   incr_vec[i].initializer,
2421 					   new_var);
2422 
2423       if (incr_vec[i].incr != cand_incr)
2424 	{
2425 	  gcc_assert (repl_code == PLUS_EXPR);
2426 	  repl_code = MINUS_EXPR;
2427 	}
2428 
2429       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2430 					      orig_code, orig_rhs1, orig_rhs2,
2431 					      c);
2432     }
2433 
2434   /* Otherwise, the increment is one of -1, 0, and 1.  Replace
2435      with a subtract of the stride from the basis name, a copy
2436      from the basis name, or an add of the stride to the basis
2437      name, respectively.  It may be necessary to introduce a
2438      cast (or reuse an existing cast).  */
2439   else if (cand_incr.is_one ())
2440     {
2441       tree stride_type = TREE_TYPE (c->stride);
2442       tree orig_type = TREE_TYPE (orig_rhs2);
2443 
2444       if (types_compatible_p (orig_type, stride_type))
2445 	rhs2 = c->stride;
2446       else
2447 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2448 
2449       stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2450 					      orig_code, orig_rhs1, orig_rhs2,
2451 					      c);
2452     }
2453 
2454   else if (cand_incr.is_minus_one ())
2455     {
2456       tree stride_type = TREE_TYPE (c->stride);
2457       tree orig_type = TREE_TYPE (orig_rhs2);
2458       gcc_assert (repl_code != POINTER_PLUS_EXPR);
2459 
2460       if (types_compatible_p (orig_type, stride_type))
2461 	rhs2 = c->stride;
2462       else
2463 	rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2464 
2465       if (orig_code != MINUS_EXPR
2466 	  || !operand_equal_p (basis_name, orig_rhs1, 0)
2467 	  || !operand_equal_p (rhs2, orig_rhs2, 0))
2468 	{
2469 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2470 	  gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2471 	  update_stmt (gsi_stmt (gsi));
2472 
2473 	  if (dump_file && (dump_flags & TDF_DETAILS))
2474 	    stmt_to_print = gsi_stmt (gsi);
2475 	}
2476       else if (dump_file && (dump_flags & TDF_DETAILS))
2477 	fputs ("  (duplicate, not actually replacing)\n", dump_file);
2478     }
2479 
2480   else if (cand_incr.is_zero ())
2481     {
2482       tree lhs = gimple_assign_lhs (c->cand_stmt);
2483       tree lhs_type = TREE_TYPE (lhs);
2484       tree basis_type = TREE_TYPE (basis_name);
2485 
2486       if (types_compatible_p (lhs_type, basis_type))
2487 	{
2488 	  gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2489 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2490 	  gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2491 	  gsi_replace (&gsi, copy_stmt, false);
2492 
2493 	  if (dump_file && (dump_flags & TDF_DETAILS))
2494 	    stmt_to_print = copy_stmt;
2495 	}
2496       else
2497 	{
2498 	  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2499 	  gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2500 							   basis_name,
2501 							   NULL_TREE);
2502 	  gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2503 	  gsi_replace (&gsi, cast_stmt, false);
2504 
2505 	  if (dump_file && (dump_flags & TDF_DETAILS))
2506 	    stmt_to_print = cast_stmt;
2507 	}
2508     }
2509   else
2510     gcc_unreachable ();
2511 
2512   if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2513     {
2514       fputs ("With: ", dump_file);
2515       print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2516       fputs ("\n", dump_file);
2517     }
2518 }
2519 
2520 /* For each candidate in the tree rooted at C, replace it with
2521    an increment if such has been shown to be profitable.  */
2522 
2523 static void
2524 replace_profitable_candidates (slsr_cand_t c)
2525 {
2526   if (!cand_already_replaced (c))
2527     {
2528       double_int increment = cand_abs_increment (c);
2529       tree new_var = NULL;
2530       enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2531       unsigned i;
2532 
2533       i = incr_vec_index (increment);
2534 
2535       /* Only process profitable increments.  Nothing useful can be done
2536 	 to a cast or copy.  */
2537       if (profitable_increment_p (i)
2538 	  && orig_code != MODIFY_EXPR
2539 	  && orig_code != NOP_EXPR)
2540 	{
2541 	  slsr_cand_t basis = lookup_cand (c->basis);
2542 	  tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2543 	  replace_one_candidate (c, i, &new_var, basis_name);
2544 	}
2545     }
2546 
2547   if (c->sibling)
2548     replace_profitable_candidates (lookup_cand (c->sibling));
2549 
2550   if (c->dependent)
2551     replace_profitable_candidates (lookup_cand (c->dependent));
2552 }
2553 
2554 /* Analyze costs of related candidates in the candidate vector,
2555    and make beneficial replacements.  */
2556 
2557 static void
2558 analyze_candidates_and_replace (void)
2559 {
2560   unsigned i;
2561   slsr_cand_t c;
2562 
2563   /* Each candidate that has a null basis and a non-null
2564      dependent is the root of a tree of related statements.
2565      Analyze each tree to determine a subset of those
2566      statements that can be replaced with maximum benefit.  */
2567   FOR_EACH_VEC_ELT (cand_vec, i, c)
2568     {
2569       slsr_cand_t first_dep;
2570 
2571       if (c->basis != 0 || c->dependent == 0)
2572 	continue;
2573 
2574       if (dump_file && (dump_flags & TDF_DETAILS))
2575 	fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2576 		 c->cand_num);
2577 
2578       first_dep = lookup_cand (c->dependent);
2579 
2580       /* If this is a chain of CAND_REFs, unconditionally replace
2581 	 each of them with a strength-reduced data reference.  */
2582       if (c->kind == CAND_REF)
2583 	replace_refs (c);
2584 
2585       /* If the common stride of all related candidates is a
2586 	 known constant, and none of these has a phi-dependence,
2587 	 then all replacements are considered profitable.
2588 	 Each replaces a multiply by a single add, with the
2589 	 possibility that a feeding add also goes dead as a
2590 	 result.  */
2591       else if (unconditional_cands_with_known_stride_p (c))
2592 	replace_dependents (first_dep);
2593 
2594       /* When the stride is an SSA name, it may still be profitable
2595 	 to replace some or all of the dependent candidates, depending
2596 	 on whether the introduced increments can be reused, or are
2597 	 less expensive to calculate than the replaced statements.  */
2598       else
2599 	{
2600 	  unsigned length;
2601 	  enum machine_mode mode;
2602 	  bool speed;
2603 
2604 	  /* Determine whether we'll be generating pointer arithmetic
2605 	     when replacing candidates.  */
2606 	  address_arithmetic_p = (c->kind == CAND_ADD
2607 				  && POINTER_TYPE_P (c->cand_type));
2608 
2609 	  /* If all candidates have already been replaced under other
2610 	     interpretations, nothing remains to be done.  */
2611 	  length = count_candidates (c);
2612 	  if (!length)
2613 	    continue;
2614 
2615 	  /* Construct an array of increments for this candidate chain.  */
2616 	  incr_vec = XNEWVEC (incr_info, length);
2617 	  incr_vec_len = 0;
2618 	  record_increments (c);
2619 
2620 	  /* Determine which increments are profitable to replace.  */
2621 	  mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2622 	  speed = optimize_cands_for_speed_p (c);
2623 	  analyze_increments (first_dep, mode, speed);
2624 
2625 	  /* Insert initializers of the form T_0 = stride * increment
2626 	     for use in profitable replacements.  */
2627 	  insert_initializers (first_dep);
2628 	  dump_incr_vec ();
2629 
2630 	  /* Perform the replacements.  */
2631 	  replace_profitable_candidates (first_dep);
2632 	  free (incr_vec);
2633 	}
2634 
2635       /* TODO:  When conditional increments occur so that a
2636 	 candidate is dependent upon a phi-basis, the cost of
2637 	 introducing a temporary must be accounted for.  */
2638     }
2639 }
2640 
2641 static unsigned
2642 execute_strength_reduction (void)
2643 {
2644   struct dom_walk_data walk_data;
2645 
2646   /* Create the obstack where candidates will reside.  */
2647   gcc_obstack_init (&cand_obstack);
2648 
2649   /* Allocate the candidate vector.  */
2650   cand_vec.create (128);
2651 
2652   /* Allocate the mapping from statements to candidate indices.  */
2653   stmt_cand_map = pointer_map_create ();
2654 
2655   /* Create the obstack where candidate chains will reside.  */
2656   gcc_obstack_init (&chain_obstack);
2657 
2658   /* Allocate the mapping from base expressions to candidate chains.  */
2659   base_cand_map = htab_create (500, base_cand_hash,
2660 			       base_cand_eq, base_cand_free);
2661 
2662   /* Initialize the loop optimizer.  We need to detect flow across
2663      back edges, and this gives us dominator information as well.  */
2664   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2665 
2666   /* Set up callbacks for the generic dominator tree walker.  */
2667   walk_data.dom_direction = CDI_DOMINATORS;
2668   walk_data.initialize_block_local_data = NULL;
2669   walk_data.before_dom_children = find_candidates_in_block;
2670   walk_data.after_dom_children = NULL;
2671   walk_data.global_data = NULL;
2672   walk_data.block_local_data_size = 0;
2673   init_walk_dominator_tree (&walk_data);
2674 
2675   /* Walk the CFG in predominator order looking for strength reduction
2676      candidates.  */
2677   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2678 
2679   if (dump_file && (dump_flags & TDF_DETAILS))
2680     {
2681       dump_cand_vec ();
2682       dump_cand_chains ();
2683     }
2684 
2685   /* Analyze costs and make appropriate replacements.  */
2686   analyze_candidates_and_replace ();
2687 
2688   /* Free resources.  */
2689   fini_walk_dominator_tree (&walk_data);
2690   loop_optimizer_finalize ();
2691   htab_delete (base_cand_map);
2692   obstack_free (&chain_obstack, NULL);
2693   pointer_map_destroy (stmt_cand_map);
2694   cand_vec.release ();
2695   obstack_free (&cand_obstack, NULL);
2696 
2697   return 0;
2698 }
2699 
2700 static bool
2701 gate_strength_reduction (void)
2702 {
2703   return flag_tree_slsr;
2704 }
2705 
2706 struct gimple_opt_pass pass_strength_reduction =
2707 {
2708  {
2709   GIMPLE_PASS,
2710   "slsr",				/* name */
2711   OPTGROUP_NONE,                        /* optinfo_flags */
2712   gate_strength_reduction,		/* gate */
2713   execute_strength_reduction,		/* execute */
2714   NULL,					/* sub */
2715   NULL,					/* next */
2716   0,					/* static_pass_number */
2717   TV_GIMPLE_SLSR,			/* tv_id */
2718   PROP_cfg | PROP_ssa,			/* properties_required */
2719   0,					/* properties_provided */
2720   0,					/* properties_destroyed */
2721   0,					/* todo_flags_start */
2722   TODO_verify_ssa			/* todo_flags_finish */
2723  }
2724 };
2725