xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-reassoc.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* Reassociation for trees.
2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
30 #include "gimple.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
34 #include "vec.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
37 #include "cfgloop.h"
38 #include "flags.h"
39 #include "target.h"
40 #include "params.h"
41 #include "diagnostic-core.h"
42 
43 /*  This is a simple global reassociation pass.  It is, in part, based
44     on the LLVM pass of the same name (They do some things more/less
45     than we do, in different orders, etc).
46 
47     It consists of five steps:
48 
49     1. Breaking up subtract operations into addition + negate, where
50     it would promote the reassociation of adds.
51 
52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54     During linearization, we place the operands of the binary
55     expressions into a vector of operand_entry_t
56 
57     3. Optimization of the operand lists, eliminating things like a +
58     -a, a & a, etc.
59 
60     3a. Combine repeated factors with the same occurrence counts
61     into a __builtin_powi call that will later be optimized into
62     an optimal number of multiplies.
63 
64     4. Rewrite the expression trees we linearized and optimized so
65     they are in proper rank order.
66 
67     5. Repropagate negates, as nothing else will clean it up ATM.
68 
69     A bit of theory on #4, since nobody seems to write anything down
70     about why it makes sense to do it the way they do it:
71 
72     We could do this much nicer theoretically, but don't (for reasons
73     explained after how to do it theoretically nice :P).
74 
75     In order to promote the most redundancy elimination, you want
76     binary expressions whose operands are the same rank (or
77     preferably, the same value) exposed to the redundancy eliminator,
78     for possible elimination.
79 
80     So the way to do this if we really cared, is to build the new op
81     tree from the leaves to the roots, merging as you go, and putting the
82     new op on the end of the worklist, until you are left with one
83     thing on the worklist.
84 
85     IE if you have to rewrite the following set of operands (listed with
86     rank in parentheses), with opcode PLUS_EXPR:
87 
88     a (1),  b (1),  c (1),  d (2), e (2)
89 
90 
91     We start with our merge worklist empty, and the ops list with all of
92     those on it.
93 
94     You want to first merge all leaves of the same rank, as much as
95     possible.
96 
97     So first build a binary op of
98 
99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
100 
101     Because there is no three operand form of PLUS_EXPR, c is not going to
102     be exposed to redundancy elimination as a rank 1 operand.
103 
104     So you might as well throw it on the merge worklist (you could also
105     consider it to now be a rank two operand, and merge it with d and e,
106     but in this case, you then have evicted e from a binary op. So at
107     least in this situation, you can't win.)
108 
109     Then build a binary op of d + e
110     mergetmp2 = d + e
111 
112     and put mergetmp2 on the merge worklist.
113 
114     so merge worklist = {mergetmp, c, mergetmp2}
115 
116     Continue building binary ops of these operations until you have only
117     one operation left on the worklist.
118 
119     So we have
120 
121     build binary op
122     mergetmp3 = mergetmp + c
123 
124     worklist = {mergetmp2, mergetmp3}
125 
126     mergetmp4 = mergetmp2 + mergetmp3
127 
128     worklist = {mergetmp4}
129 
130     because we have one operation left, we can now just set the original
131     statement equal to the result of that operation.
132 
133     This will at least expose a + b  and d + e to redundancy elimination
134     as binary operations.
135 
136     For extra points, you can reuse the old statements to build the
137     mergetmps, since you shouldn't run out.
138 
139     So why don't we do this?
140 
141     Because it's expensive, and rarely will help.  Most trees we are
142     reassociating have 3 or less ops.  If they have 2 ops, they already
143     will be written into a nice single binary op.  If you have 3 ops, a
144     single simple check suffices to tell you whether the first two are of the
145     same rank.  If so, you know to order it
146 
147     mergetmp = op1 + op2
148     newstmt = mergetmp + op3
149 
150     instead of
151     mergetmp = op2 + op3
152     newstmt = mergetmp + op1
153 
154     If all three are of the same rank, you can't expose them all in a
155     single binary operator anyway, so the above is *still* the best you
156     can do.
157 
158     Thus, this is what we do.  When we have three ops left, we check to see
159     what order to put them in, and call it a day.  As a nod to vector sum
160     reduction, we check if any of the ops are really a phi node that is a
161     destructive update for the associating op, and keep the destructive
162     update together for vector sum reduction recognition.  */
163 
164 
165 /* Statistics */
166 static struct
167 {
168   int linearized;
169   int constants_eliminated;
170   int ops_eliminated;
171   int rewritten;
172   int pows_encountered;
173   int pows_created;
174 } reassociate_stats;
175 
176 /* Operator, rank pair.  */
177 typedef struct operand_entry
178 {
179   unsigned int rank;
180   int id;
181   tree op;
182   unsigned int count;
183 } *operand_entry_t;
184 
185 static alloc_pool operand_entry_pool;
186 
187 /* This is used to assign a unique ID to each struct operand_entry
188    so that qsort results are identical on different hosts.  */
189 static int next_operand_entry_id;
190 
191 /* Starting rank number for a given basic block, so that we can rank
192    operations using unmovable instructions in that BB based on the bb
193    depth.  */
194 static long *bb_rank;
195 
196 /* Operand->rank hashtable.  */
197 static struct pointer_map_t *operand_rank;
198 
199 /* Forward decls.  */
200 static long get_rank (tree);
201 
202 
203 /* Bias amount for loop-carried phis.  We want this to be larger than
204    the depth of any reassociation tree we can see, but not larger than
205    the rank difference between two blocks.  */
206 #define PHI_LOOP_BIAS (1 << 15)
207 
208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
209    an innermost loop, and the phi has only a single use which is inside
210    the loop, then the rank is the block rank of the loop latch plus an
211    extra bias for the loop-carried dependence.  This causes expressions
212    calculated into an accumulator variable to be independent for each
213    iteration of the loop.  If STMT is some other phi, the rank is the
214    block rank of its containing block.  */
215 static long
216 phi_rank (gimple stmt)
217 {
218   basic_block bb = gimple_bb (stmt);
219   struct loop *father = bb->loop_father;
220   tree res;
221   unsigned i;
222   use_operand_p use;
223   gimple use_stmt;
224 
225   /* We only care about real loops (those with a latch).  */
226   if (!father->latch)
227     return bb_rank[bb->index];
228 
229   /* Interesting phis must be in headers of innermost loops.  */
230   if (bb != father->header
231       || father->inner)
232     return bb_rank[bb->index];
233 
234   /* Ignore virtual SSA_NAMEs.  */
235   res = gimple_phi_result (stmt);
236   if (virtual_operand_p (res))
237     return bb_rank[bb->index];
238 
239   /* The phi definition must have a single use, and that use must be
240      within the loop.  Otherwise this isn't an accumulator pattern.  */
241   if (!single_imm_use (res, &use, &use_stmt)
242       || gimple_bb (use_stmt)->loop_father != father)
243     return bb_rank[bb->index];
244 
245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
247     {
248       tree arg = gimple_phi_arg_def (stmt, i);
249       if (TREE_CODE (arg) == SSA_NAME
250 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
251 	{
252 	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 	  if (gimple_bb (def_stmt)->loop_father == father)
254 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
255 	}
256     }
257 
258   /* Must be an uninteresting phi.  */
259   return bb_rank[bb->index];
260 }
261 
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263    loop-carried dependence of an innermost loop, return TRUE; else
264    return FALSE.  */
265 static bool
266 loop_carried_phi (tree exp)
267 {
268   gimple phi_stmt;
269   long block_rank;
270 
271   if (TREE_CODE (exp) != SSA_NAME
272       || SSA_NAME_IS_DEFAULT_DEF (exp))
273     return false;
274 
275   phi_stmt = SSA_NAME_DEF_STMT (exp);
276 
277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
278     return false;
279 
280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
281      an additional bias added in.  If this phi doesn't have block rank,
282      it's biased and should not be propagated.  */
283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
284 
285   if (phi_rank (phi_stmt) != block_rank)
286     return true;
287 
288   return false;
289 }
290 
291 /* Return the maximum of RANK and the rank that should be propagated
292    from expression OP.  For most operands, this is just the rank of OP.
293    For loop-carried phis, the value is zero to avoid undoing the bias
294    in favor of the phi.  */
295 static long
296 propagate_rank (long rank, tree op)
297 {
298   long op_rank;
299 
300   if (loop_carried_phi (op))
301     return rank;
302 
303   op_rank = get_rank (op);
304 
305   return MAX (rank, op_rank);
306 }
307 
308 /* Look up the operand rank structure for expression E.  */
309 
310 static inline long
311 find_operand_rank (tree e)
312 {
313   void **slot = pointer_map_contains (operand_rank, e);
314   return slot ? (long) (intptr_t) *slot : -1;
315 }
316 
317 /* Insert {E,RANK} into the operand rank hashtable.  */
318 
319 static inline void
320 insert_operand_rank (tree e, long rank)
321 {
322   void **slot;
323   gcc_assert (rank > 0);
324   slot = pointer_map_insert (operand_rank, e);
325   gcc_assert (!*slot);
326   *slot = (void *) (intptr_t) rank;
327 }
328 
329 /* Given an expression E, return the rank of the expression.  */
330 
331 static long
332 get_rank (tree e)
333 {
334   /* Constants have rank 0.  */
335   if (is_gimple_min_invariant (e))
336     return 0;
337 
338   /* SSA_NAME's have the rank of the expression they are the result
339      of.
340      For globals and uninitialized values, the rank is 0.
341      For function arguments, use the pre-setup rank.
342      For PHI nodes, stores, asm statements, etc, we use the rank of
343      the BB.
344      For simple operations, the rank is the maximum rank of any of
345      its operands, or the bb_rank, whichever is less.
346      I make no claims that this is optimal, however, it gives good
347      results.  */
348 
349   /* We make an exception to the normal ranking system to break
350      dependences of accumulator variables in loops.  Suppose we
351      have a simple one-block loop containing:
352 
353        x_1 = phi(x_0, x_2)
354        b = a + x_1
355        c = b + d
356        x_2 = c + e
357 
358      As shown, each iteration of the calculation into x is fully
359      dependent upon the iteration before it.  We would prefer to
360      see this in the form:
361 
362        x_1 = phi(x_0, x_2)
363        b = a + d
364        c = b + e
365        x_2 = c + x_1
366 
367      If the loop is unrolled, the calculations of b and c from
368      different iterations can be interleaved.
369 
370      To obtain this result during reassociation, we bias the rank
371      of the phi definition x_1 upward, when it is recognized as an
372      accumulator pattern.  The artificial rank causes it to be
373      added last, providing the desired independence.  */
374 
375   if (TREE_CODE (e) == SSA_NAME)
376     {
377       gimple stmt;
378       long rank;
379       int i, n;
380       tree op;
381 
382       if (SSA_NAME_IS_DEFAULT_DEF (e))
383 	return find_operand_rank (e);
384 
385       stmt = SSA_NAME_DEF_STMT (e);
386       if (gimple_code (stmt) == GIMPLE_PHI)
387 	return phi_rank (stmt);
388 
389       if (!is_gimple_assign (stmt)
390 	  || gimple_vdef (stmt))
391 	return bb_rank[gimple_bb (stmt)->index];
392 
393       /* If we already have a rank for this expression, use that.  */
394       rank = find_operand_rank (e);
395       if (rank != -1)
396 	return rank;
397 
398       /* Otherwise, find the maximum rank for the operands.  As an
399 	 exception, remove the bias from loop-carried phis when propagating
400 	 the rank so that dependent operations are not also biased.  */
401       rank = 0;
402       if (gimple_assign_single_p (stmt))
403 	{
404 	  tree rhs = gimple_assign_rhs1 (stmt);
405 	  n = TREE_OPERAND_LENGTH (rhs);
406 	  if (n == 0)
407 	    rank = propagate_rank (rank, rhs);
408 	  else
409 	    {
410 	      for (i = 0; i < n; i++)
411 		{
412 		  op = TREE_OPERAND (rhs, i);
413 
414 		  if (op != NULL_TREE)
415 		    rank = propagate_rank (rank, op);
416 		}
417 	    }
418 	}
419       else
420 	{
421 	  n = gimple_num_ops (stmt);
422 	  for (i = 1; i < n; i++)
423 	    {
424 	      op = gimple_op (stmt, i);
425 	      gcc_assert (op);
426 	      rank = propagate_rank (rank, op);
427 	    }
428 	}
429 
430       if (dump_file && (dump_flags & TDF_DETAILS))
431 	{
432 	  fprintf (dump_file, "Rank for ");
433 	  print_generic_expr (dump_file, e, 0);
434 	  fprintf (dump_file, " is %ld\n", (rank + 1));
435 	}
436 
437       /* Note the rank in the hashtable so we don't recompute it.  */
438       insert_operand_rank (e, (rank + 1));
439       return (rank + 1);
440     }
441 
442   /* Globals, etc,  are rank 0 */
443   return 0;
444 }
445 
446 
447 /* We want integer ones to end up last no matter what, since they are
448    the ones we can do the most with.  */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
452 
453 /* Classify an invariant tree into integer, float, or other, so that
454    we can sort them to be near other constants of the same type.  */
455 static inline int
456 constant_type (tree t)
457 {
458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459     return INTEGER_CONST_TYPE;
460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461     return FLOAT_CONST_TYPE;
462   else
463     return OTHER_CONST_TYPE;
464 }
465 
466 /* qsort comparison function to sort operand entries PA and PB by rank
467    so that the sorted array is ordered by rank in decreasing order.  */
468 static int
469 sort_by_operand_rank (const void *pa, const void *pb)
470 {
471   const operand_entry_t oea = *(const operand_entry_t *)pa;
472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
473 
474   /* It's nicer for optimize_expression if constants that are likely
475      to fold when added/multiplied//whatever are put next to each
476      other.  Since all constants have rank 0, order them by type.  */
477   if (oeb->rank == 0 && oea->rank == 0)
478     {
479       if (constant_type (oeb->op) != constant_type (oea->op))
480 	return constant_type (oeb->op) - constant_type (oea->op);
481       else
482 	/* To make sorting result stable, we use unique IDs to determine
483 	   order.  */
484         return oeb->id - oea->id;
485     }
486 
487   /* Lastly, make sure the versions that are the same go next to each
488      other.  We use SSA_NAME_VERSION because it's stable.  */
489   if ((oeb->rank - oea->rank == 0)
490       && TREE_CODE (oea->op) == SSA_NAME
491       && TREE_CODE (oeb->op) == SSA_NAME)
492     {
493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
495       else
496 	return oeb->id - oea->id;
497     }
498 
499   if (oeb->rank != oea->rank)
500     return oeb->rank - oea->rank;
501   else
502     return oeb->id - oea->id;
503 }
504 
505 /* Add an operand entry to *OPS for the tree operand OP.  */
506 
507 static void
508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
509 {
510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
511 
512   oe->op = op;
513   oe->rank = get_rank (op);
514   oe->id = next_operand_entry_id++;
515   oe->count = 1;
516   ops->safe_push (oe);
517 }
518 
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
520    count REPEAT.  */
521 
522 static void
523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 		       HOST_WIDE_INT repeat)
525 {
526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
527 
528   oe->op = op;
529   oe->rank = get_rank (op);
530   oe->id = next_operand_entry_id++;
531   oe->count = repeat;
532   ops->safe_push (oe);
533 
534   reassociate_stats.pows_encountered++;
535 }
536 
537 /* Return true if STMT is reassociable operation containing a binary
538    operation with tree code CODE, and is inside LOOP.  */
539 
540 static bool
541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
542 {
543   basic_block bb = gimple_bb (stmt);
544 
545   if (gimple_bb (stmt) == NULL)
546     return false;
547 
548   if (!flow_bb_inside_loop_p (loop, bb))
549     return false;
550 
551   if (is_gimple_assign (stmt)
552       && gimple_assign_rhs_code (stmt) == code
553       && has_single_use (gimple_assign_lhs (stmt)))
554     return true;
555 
556   return false;
557 }
558 
559 
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561    operand of the negate operation.  Otherwise, return NULL.  */
562 
563 static tree
564 get_unary_op (tree name, enum tree_code opcode)
565 {
566   gimple stmt = SSA_NAME_DEF_STMT (name);
567 
568   if (!is_gimple_assign (stmt))
569     return NULL_TREE;
570 
571   if (gimple_assign_rhs_code (stmt) == opcode)
572     return gimple_assign_rhs1 (stmt);
573   return NULL_TREE;
574 }
575 
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577    eliminate through equivalences, do so, remove them from OPS, and
578    return true.  Otherwise, return false.  */
579 
580 static bool
581 eliminate_duplicate_pair (enum tree_code opcode,
582 			  vec<operand_entry_t> *ops,
583 			  bool *all_done,
584 			  unsigned int i,
585 			  operand_entry_t curr,
586 			  operand_entry_t last)
587 {
588 
589   /* If we have two of the same op, and the opcode is & |, min, or max,
590      we can eliminate one of them.
591      If we have two of the same op, and the opcode is ^, we can
592      eliminate both of them.  */
593 
594   if (last && last->op == curr->op)
595     {
596       switch (opcode)
597 	{
598 	case MAX_EXPR:
599 	case MIN_EXPR:
600 	case BIT_IOR_EXPR:
601 	case BIT_AND_EXPR:
602 	  if (dump_file && (dump_flags & TDF_DETAILS))
603 	    {
604 	      fprintf (dump_file, "Equivalence: ");
605 	      print_generic_expr (dump_file, curr->op, 0);
606 	      fprintf (dump_file, " [&|minmax] ");
607 	      print_generic_expr (dump_file, last->op, 0);
608 	      fprintf (dump_file, " -> ");
609 	      print_generic_stmt (dump_file, last->op, 0);
610 	    }
611 
612 	  ops->ordered_remove (i);
613 	  reassociate_stats.ops_eliminated ++;
614 
615 	  return true;
616 
617 	case BIT_XOR_EXPR:
618 	  if (dump_file && (dump_flags & TDF_DETAILS))
619 	    {
620 	      fprintf (dump_file, "Equivalence: ");
621 	      print_generic_expr (dump_file, curr->op, 0);
622 	      fprintf (dump_file, " ^ ");
623 	      print_generic_expr (dump_file, last->op, 0);
624 	      fprintf (dump_file, " -> nothing\n");
625 	    }
626 
627 	  reassociate_stats.ops_eliminated += 2;
628 
629 	  if (ops->length () == 2)
630 	    {
631 	      ops->create (0);
632 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
633 	      *all_done = true;
634 	    }
635 	  else
636 	    {
637 	      ops->ordered_remove (i-1);
638 	      ops->ordered_remove (i-1);
639 	    }
640 
641 	  return true;
642 
643 	default:
644 	  break;
645 	}
646     }
647   return false;
648 }
649 
650 static vec<tree> plus_negates;
651 
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653    expression, look in OPS for a corresponding positive operation to cancel
654    it out.  If we find one, remove the other from OPS, replace
655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
656    return false. */
657 
658 static bool
659 eliminate_plus_minus_pair (enum tree_code opcode,
660 			   vec<operand_entry_t> *ops,
661 			   unsigned int currindex,
662 			   operand_entry_t curr)
663 {
664   tree negateop;
665   tree notop;
666   unsigned int i;
667   operand_entry_t oe;
668 
669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
670     return false;
671 
672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674   if (negateop == NULL_TREE && notop == NULL_TREE)
675     return false;
676 
677   /* Any non-negated version will have a rank that is one less than
678      the current rank.  So once we hit those ranks, if we don't find
679      one, we can stop.  */
680 
681   for (i = currindex + 1;
682        ops->iterate (i, &oe)
683        && oe->rank >= curr->rank - 1 ;
684        i++)
685     {
686       if (oe->op == negateop)
687 	{
688 
689 	  if (dump_file && (dump_flags & TDF_DETAILS))
690 	    {
691 	      fprintf (dump_file, "Equivalence: ");
692 	      print_generic_expr (dump_file, negateop, 0);
693 	      fprintf (dump_file, " + -");
694 	      print_generic_expr (dump_file, oe->op, 0);
695 	      fprintf (dump_file, " -> 0\n");
696 	    }
697 
698 	  ops->ordered_remove (i);
699 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 	  ops->ordered_remove (currindex);
701 	  reassociate_stats.ops_eliminated ++;
702 
703 	  return true;
704 	}
705       else if (oe->op == notop)
706 	{
707 	  tree op_type = TREE_TYPE (oe->op);
708 
709 	  if (dump_file && (dump_flags & TDF_DETAILS))
710 	    {
711 	      fprintf (dump_file, "Equivalence: ");
712 	      print_generic_expr (dump_file, notop, 0);
713 	      fprintf (dump_file, " + ~");
714 	      print_generic_expr (dump_file, oe->op, 0);
715 	      fprintf (dump_file, " -> -1\n");
716 	    }
717 
718 	  ops->ordered_remove (i);
719 	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 	  ops->ordered_remove (currindex);
721 	  reassociate_stats.ops_eliminated ++;
722 
723 	  return true;
724 	}
725     }
726 
727   /* CURR->OP is a negate expr in a plus expr: save it for later
728      inspection in repropagate_negates().  */
729   if (negateop != NULL_TREE)
730     plus_negates.safe_push (curr->op);
731 
732   return false;
733 }
734 
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736    bitwise not expression, look in OPS for a corresponding operand to
737    cancel it out.  If we find one, remove the other from OPS, replace
738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
739    false. */
740 
741 static bool
742 eliminate_not_pairs (enum tree_code opcode,
743 		     vec<operand_entry_t> *ops,
744 		     unsigned int currindex,
745 		     operand_entry_t curr)
746 {
747   tree notop;
748   unsigned int i;
749   operand_entry_t oe;
750 
751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752       || TREE_CODE (curr->op) != SSA_NAME)
753     return false;
754 
755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756   if (notop == NULL_TREE)
757     return false;
758 
759   /* Any non-not version will have a rank that is one less than
760      the current rank.  So once we hit those ranks, if we don't find
761      one, we can stop.  */
762 
763   for (i = currindex + 1;
764        ops->iterate (i, &oe)
765        && oe->rank >= curr->rank - 1;
766        i++)
767     {
768       if (oe->op == notop)
769 	{
770 	  if (dump_file && (dump_flags & TDF_DETAILS))
771 	    {
772 	      fprintf (dump_file, "Equivalence: ");
773 	      print_generic_expr (dump_file, notop, 0);
774 	      if (opcode == BIT_AND_EXPR)
775 		fprintf (dump_file, " & ~");
776 	      else if (opcode == BIT_IOR_EXPR)
777 		fprintf (dump_file, " | ~");
778 	      print_generic_expr (dump_file, oe->op, 0);
779 	      if (opcode == BIT_AND_EXPR)
780 		fprintf (dump_file, " -> 0\n");
781 	      else if (opcode == BIT_IOR_EXPR)
782 		fprintf (dump_file, " -> -1\n");
783 	    }
784 
785 	  if (opcode == BIT_AND_EXPR)
786 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 	  else if (opcode == BIT_IOR_EXPR)
788 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
789 
790 	  reassociate_stats.ops_eliminated += ops->length () - 1;
791 	  ops->truncate (0);
792 	  ops->quick_push (oe);
793 	  return true;
794 	}
795     }
796 
797   return false;
798 }
799 
800 /* Use constant value that may be present in OPS to try to eliminate
801    operands.  Note that this function is only really used when we've
802    eliminated ops for other reasons, or merged constants.  Across
803    single statements, fold already does all of this, plus more.  There
804    is little point in duplicating logic, so I've only included the
805    identities that I could ever construct testcases to trigger.  */
806 
807 static void
808 eliminate_using_constants (enum tree_code opcode,
809 			   vec<operand_entry_t> *ops)
810 {
811   operand_entry_t oelast = ops->last ();
812   tree type = TREE_TYPE (oelast->op);
813 
814   if (oelast->rank == 0
815       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
816     {
817       switch (opcode)
818 	{
819 	case BIT_AND_EXPR:
820 	  if (integer_zerop (oelast->op))
821 	    {
822 	      if (ops->length () != 1)
823 		{
824 		  if (dump_file && (dump_flags & TDF_DETAILS))
825 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
826 
827 		  reassociate_stats.ops_eliminated += ops->length () - 1;
828 
829 		  ops->truncate (0);
830 		  ops->quick_push (oelast);
831 		  return;
832 		}
833 	    }
834 	  else if (integer_all_onesp (oelast->op))
835 	    {
836 	      if (ops->length () != 1)
837 		{
838 		  if (dump_file && (dump_flags & TDF_DETAILS))
839 		    fprintf (dump_file, "Found & -1, removing\n");
840 		  ops->pop ();
841 		  reassociate_stats.ops_eliminated++;
842 		}
843 	    }
844 	  break;
845 	case BIT_IOR_EXPR:
846 	  if (integer_all_onesp (oelast->op))
847 	    {
848 	      if (ops->length () != 1)
849 		{
850 		  if (dump_file && (dump_flags & TDF_DETAILS))
851 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
852 
853 		  reassociate_stats.ops_eliminated += ops->length () - 1;
854 
855 		  ops->truncate (0);
856 		  ops->quick_push (oelast);
857 		  return;
858 		}
859 	    }
860 	  else if (integer_zerop (oelast->op))
861 	    {
862 	      if (ops->length () != 1)
863 		{
864 		  if (dump_file && (dump_flags & TDF_DETAILS))
865 		    fprintf (dump_file, "Found | 0, removing\n");
866 		  ops->pop ();
867 		  reassociate_stats.ops_eliminated++;
868 		}
869 	    }
870 	  break;
871 	case MULT_EXPR:
872 	  if (integer_zerop (oelast->op)
873 	      || (FLOAT_TYPE_P (type)
874 		  && !HONOR_NANS (TYPE_MODE (type))
875 		  && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
876 		  && real_zerop (oelast->op)))
877 	    {
878 	      if (ops->length () != 1)
879 		{
880 		  if (dump_file && (dump_flags & TDF_DETAILS))
881 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
882 
883 		  reassociate_stats.ops_eliminated += ops->length () - 1;
884 		  ops->truncate (1);
885 		  ops->quick_push (oelast);
886 		  return;
887 		}
888 	    }
889 	  else if (integer_onep (oelast->op)
890 		   || (FLOAT_TYPE_P (type)
891 		       && !HONOR_SNANS (TYPE_MODE (type))
892 		       && real_onep (oelast->op)))
893 	    {
894 	      if (ops->length () != 1)
895 		{
896 		  if (dump_file && (dump_flags & TDF_DETAILS))
897 		    fprintf (dump_file, "Found * 1, removing\n");
898 		  ops->pop ();
899 		  reassociate_stats.ops_eliminated++;
900 		  return;
901 		}
902 	    }
903 	  break;
904 	case BIT_XOR_EXPR:
905 	case PLUS_EXPR:
906 	case MINUS_EXPR:
907 	  if (integer_zerop (oelast->op)
908 	      || (FLOAT_TYPE_P (type)
909 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
910 		  && fold_real_zero_addition_p (type, oelast->op,
911 						opcode == MINUS_EXPR)))
912 	    {
913 	      if (ops->length () != 1)
914 		{
915 		  if (dump_file && (dump_flags & TDF_DETAILS))
916 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
917 		  ops->pop ();
918 		  reassociate_stats.ops_eliminated++;
919 		  return;
920 		}
921 	    }
922 	  break;
923 	default:
924 	  break;
925 	}
926     }
927 }
928 
929 
930 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
931 				 bool, bool);
932 
933 /* Structure for tracking and counting operands.  */
934 typedef struct oecount_s {
935   int cnt;
936   int id;
937   enum tree_code oecode;
938   tree op;
939 } oecount;
940 
941 
942 /* The heap for the oecount hashtable and the sorted list of operands.  */
943 static vec<oecount> cvec;
944 
945 /* Hash function for oecount.  */
946 
947 static hashval_t
948 oecount_hash (const void *p)
949 {
950   const oecount *c = &cvec[(size_t)p - 42];
951   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
952 }
953 
954 /* Comparison function for oecount.  */
955 
956 static int
957 oecount_eq (const void *p1, const void *p2)
958 {
959   const oecount *c1 = &cvec[(size_t)p1 - 42];
960   const oecount *c2 = &cvec[(size_t)p2 - 42];
961   return (c1->oecode == c2->oecode
962 	  && c1->op == c2->op);
963 }
964 
965 /* Comparison function for qsort sorting oecount elements by count.  */
966 
967 static int
968 oecount_cmp (const void *p1, const void *p2)
969 {
970   const oecount *c1 = (const oecount *)p1;
971   const oecount *c2 = (const oecount *)p2;
972   if (c1->cnt != c2->cnt)
973     return c1->cnt - c2->cnt;
974   else
975     /* If counts are identical, use unique IDs to stabilize qsort.  */
976     return c1->id - c2->id;
977 }
978 
979 /* Return TRUE iff STMT represents a builtin call that raises OP
980    to some exponent.  */
981 
982 static bool
983 stmt_is_power_of_op (gimple stmt, tree op)
984 {
985   tree fndecl;
986 
987   if (!is_gimple_call (stmt))
988     return false;
989 
990   fndecl = gimple_call_fndecl (stmt);
991 
992   if (!fndecl
993       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
994     return false;
995 
996   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
997     {
998     CASE_FLT_FN (BUILT_IN_POW):
999     CASE_FLT_FN (BUILT_IN_POWI):
1000       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1001 
1002     default:
1003       return false;
1004     }
1005 }
1006 
1007 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1008    in place and return the result.  Assumes that stmt_is_power_of_op
1009    was previously called for STMT and returned TRUE.  */
1010 
1011 static HOST_WIDE_INT
1012 decrement_power (gimple stmt)
1013 {
1014   REAL_VALUE_TYPE c, cint;
1015   HOST_WIDE_INT power;
1016   tree arg1;
1017 
1018   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1019     {
1020     CASE_FLT_FN (BUILT_IN_POW):
1021       arg1 = gimple_call_arg (stmt, 1);
1022       c = TREE_REAL_CST (arg1);
1023       power = real_to_integer (&c) - 1;
1024       real_from_integer (&cint, VOIDmode, power, 0, 0);
1025       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1026       return power;
1027 
1028     CASE_FLT_FN (BUILT_IN_POWI):
1029       arg1 = gimple_call_arg (stmt, 1);
1030       power = TREE_INT_CST_LOW (arg1) - 1;
1031       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1032       return power;
1033 
1034     default:
1035       gcc_unreachable ();
1036     }
1037 }
1038 
1039 /* Find the single immediate use of STMT's LHS, and replace it
1040    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1041    replace *DEF with OP as well.  */
1042 
1043 static void
1044 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1045 {
1046   tree lhs;
1047   gimple use_stmt;
1048   use_operand_p use;
1049   gimple_stmt_iterator gsi;
1050 
1051   if (is_gimple_call (stmt))
1052     lhs = gimple_call_lhs (stmt);
1053   else
1054     lhs = gimple_assign_lhs (stmt);
1055 
1056   gcc_assert (has_single_use (lhs));
1057   single_imm_use (lhs, &use, &use_stmt);
1058   if (lhs == *def)
1059     *def = op;
1060   SET_USE (use, op);
1061   if (TREE_CODE (op) != SSA_NAME)
1062     update_stmt (use_stmt);
1063   gsi = gsi_for_stmt (stmt);
1064   unlink_stmt_vdef (stmt);
1065   gsi_remove (&gsi, true);
1066   release_defs (stmt);
1067 }
1068 
1069 /* Walks the linear chain with result *DEF searching for an operation
1070    with operand OP and code OPCODE removing that from the chain.  *DEF
1071    is updated if there is only one operand but no operation left.  */
1072 
1073 static void
1074 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1075 {
1076   gimple stmt = SSA_NAME_DEF_STMT (*def);
1077 
1078   do
1079     {
1080       tree name;
1081 
1082       if (opcode == MULT_EXPR
1083 	  && stmt_is_power_of_op (stmt, op))
1084 	{
1085 	  if (decrement_power (stmt) == 1)
1086 	    propagate_op_to_single_use (op, stmt, def);
1087 	  return;
1088 	}
1089 
1090       name = gimple_assign_rhs1 (stmt);
1091 
1092       /* If this is the operation we look for and one of the operands
1093          is ours simply propagate the other operand into the stmts
1094 	 single use.  */
1095       if (gimple_assign_rhs_code (stmt) == opcode
1096 	  && (name == op
1097 	      || gimple_assign_rhs2 (stmt) == op))
1098 	{
1099 	  if (name == op)
1100 	    name = gimple_assign_rhs2 (stmt);
1101 	  propagate_op_to_single_use (name, stmt, def);
1102 	  return;
1103 	}
1104 
1105       /* We might have a multiply of two __builtin_pow* calls, and
1106 	 the operand might be hiding in the rightmost one.  */
1107       if (opcode == MULT_EXPR
1108 	  && gimple_assign_rhs_code (stmt) == opcode
1109 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1110 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1111 	{
1112 	  gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1113 	  if (stmt_is_power_of_op (stmt2, op))
1114 	    {
1115 	      if (decrement_power (stmt2) == 1)
1116 		propagate_op_to_single_use (op, stmt2, def);
1117 	      return;
1118 	    }
1119 	}
1120 
1121       /* Continue walking the chain.  */
1122       gcc_assert (name != op
1123 		  && TREE_CODE (name) == SSA_NAME);
1124       stmt = SSA_NAME_DEF_STMT (name);
1125     }
1126   while (1);
1127 }
1128 
1129 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1130    the result.  Places the statement after the definition of either
1131    OP1 or OP2.  Returns the new statement.  */
1132 
1133 static gimple
1134 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1135 {
1136   gimple op1def = NULL, op2def = NULL;
1137   gimple_stmt_iterator gsi;
1138   tree op;
1139   gimple sum;
1140 
1141   /* Create the addition statement.  */
1142   op = make_ssa_name (type, NULL);
1143   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1144 
1145   /* Find an insertion place and insert.  */
1146   if (TREE_CODE (op1) == SSA_NAME)
1147     op1def = SSA_NAME_DEF_STMT (op1);
1148   if (TREE_CODE (op2) == SSA_NAME)
1149     op2def = SSA_NAME_DEF_STMT (op2);
1150   if ((!op1def || gimple_nop_p (op1def))
1151       && (!op2def || gimple_nop_p (op2def)))
1152     {
1153       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1154       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1155     }
1156   else if ((!op1def || gimple_nop_p (op1def))
1157 	   || (op2def && !gimple_nop_p (op2def)
1158 	       && stmt_dominates_stmt_p (op1def, op2def)))
1159     {
1160       if (gimple_code (op2def) == GIMPLE_PHI)
1161 	{
1162 	  gsi = gsi_after_labels (gimple_bb (op2def));
1163 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1164 	}
1165       else
1166 	{
1167 	  if (!stmt_ends_bb_p (op2def))
1168 	    {
1169 	      gsi = gsi_for_stmt (op2def);
1170 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1171 	    }
1172 	  else
1173 	    {
1174 	      edge e;
1175 	      edge_iterator ei;
1176 
1177 	      FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1178 		if (e->flags & EDGE_FALLTHRU)
1179 		  gsi_insert_on_edge_immediate (e, sum);
1180 	    }
1181 	}
1182     }
1183   else
1184     {
1185       if (gimple_code (op1def) == GIMPLE_PHI)
1186 	{
1187 	  gsi = gsi_after_labels (gimple_bb (op1def));
1188 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1189 	}
1190       else
1191 	{
1192 	  if (!stmt_ends_bb_p (op1def))
1193 	    {
1194 	      gsi = gsi_for_stmt (op1def);
1195 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1196 	    }
1197 	  else
1198 	    {
1199 	      edge e;
1200 	      edge_iterator ei;
1201 
1202 	      FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1203 		if (e->flags & EDGE_FALLTHRU)
1204 		  gsi_insert_on_edge_immediate (e, sum);
1205 	    }
1206 	}
1207     }
1208   update_stmt (sum);
1209 
1210   return sum;
1211 }
1212 
1213 /* Perform un-distribution of divisions and multiplications.
1214    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1215    to (A + B) / X for real X.
1216 
1217    The algorithm is organized as follows.
1218 
1219     - First we walk the addition chain *OPS looking for summands that
1220       are defined by a multiplication or a real division.  This results
1221       in the candidates bitmap with relevant indices into *OPS.
1222 
1223     - Second we build the chains of multiplications or divisions for
1224       these candidates, counting the number of occurrences of (operand, code)
1225       pairs in all of the candidates chains.
1226 
1227     - Third we sort the (operand, code) pairs by number of occurrence and
1228       process them starting with the pair with the most uses.
1229 
1230       * For each such pair we walk the candidates again to build a
1231         second candidate bitmap noting all multiplication/division chains
1232 	that have at least one occurrence of (operand, code).
1233 
1234       * We build an alternate addition chain only covering these
1235         candidates with one (operand, code) operation removed from their
1236 	multiplication/division chain.
1237 
1238       * The first candidate gets replaced by the alternate addition chain
1239         multiplied/divided by the operand.
1240 
1241       * All candidate chains get disabled for further processing and
1242         processing of (operand, code) pairs continues.
1243 
1244   The alternate addition chains built are re-processed by the main
1245   reassociation algorithm which allows optimizing a * x * y + b * y * x
1246   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1247 
1248 static bool
1249 undistribute_ops_list (enum tree_code opcode,
1250 		       vec<operand_entry_t> *ops, struct loop *loop)
1251 {
1252   unsigned int length = ops->length ();
1253   operand_entry_t oe1;
1254   unsigned i, j;
1255   sbitmap candidates, candidates2;
1256   unsigned nr_candidates, nr_candidates2;
1257   sbitmap_iterator sbi0;
1258   vec<operand_entry_t> *subops;
1259   htab_t ctable;
1260   bool changed = false;
1261   int next_oecount_id = 0;
1262 
1263   if (length <= 1
1264       || opcode != PLUS_EXPR)
1265     return false;
1266 
1267   /* Build a list of candidates to process.  */
1268   candidates = sbitmap_alloc (length);
1269   bitmap_clear (candidates);
1270   nr_candidates = 0;
1271   FOR_EACH_VEC_ELT (*ops, i, oe1)
1272     {
1273       enum tree_code dcode;
1274       gimple oe1def;
1275 
1276       if (TREE_CODE (oe1->op) != SSA_NAME)
1277 	continue;
1278       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1279       if (!is_gimple_assign (oe1def))
1280 	continue;
1281       dcode = gimple_assign_rhs_code (oe1def);
1282       if ((dcode != MULT_EXPR
1283 	   && dcode != RDIV_EXPR)
1284 	  || !is_reassociable_op (oe1def, dcode, loop))
1285 	continue;
1286 
1287       bitmap_set_bit (candidates, i);
1288       nr_candidates++;
1289     }
1290 
1291   if (nr_candidates < 2)
1292     {
1293       sbitmap_free (candidates);
1294       return false;
1295     }
1296 
1297   if (dump_file && (dump_flags & TDF_DETAILS))
1298     {
1299       fprintf (dump_file, "searching for un-distribute opportunities ");
1300       print_generic_expr (dump_file,
1301 	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1302       fprintf (dump_file, " %d\n", nr_candidates);
1303     }
1304 
1305   /* Build linearized sub-operand lists and the counting table.  */
1306   cvec.create (0);
1307   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1308   /* ??? Macro arguments cannot have multi-argument template types in
1309      them.  This typedef is needed to workaround that limitation.  */
1310   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1311   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1312   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1313     {
1314       gimple oedef;
1315       enum tree_code oecode;
1316       unsigned j;
1317 
1318       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1319       oecode = gimple_assign_rhs_code (oedef);
1320       linearize_expr_tree (&subops[i], oedef,
1321 			   associative_tree_code (oecode), false);
1322 
1323       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1324 	{
1325 	  oecount c;
1326 	  void **slot;
1327 	  size_t idx;
1328 	  c.oecode = oecode;
1329 	  c.cnt = 1;
1330 	  c.id = next_oecount_id++;
1331 	  c.op = oe1->op;
1332 	  cvec.safe_push (c);
1333 	  idx = cvec.length () + 41;
1334 	  slot = htab_find_slot (ctable, (void *)idx, INSERT);
1335 	  if (!*slot)
1336 	    {
1337 	      *slot = (void *)idx;
1338 	    }
1339 	  else
1340 	    {
1341 	      cvec.pop ();
1342 	      cvec[(size_t)*slot - 42].cnt++;
1343 	    }
1344 	}
1345     }
1346   htab_delete (ctable);
1347 
1348   /* Sort the counting table.  */
1349   cvec.qsort (oecount_cmp);
1350 
1351   if (dump_file && (dump_flags & TDF_DETAILS))
1352     {
1353       oecount *c;
1354       fprintf (dump_file, "Candidates:\n");
1355       FOR_EACH_VEC_ELT (cvec, j, c)
1356 	{
1357 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1358 		   c->oecode == MULT_EXPR
1359 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1360 	  print_generic_expr (dump_file, c->op, 0);
1361 	  fprintf (dump_file, "\n");
1362 	}
1363     }
1364 
1365   /* Process the (operand, code) pairs in order of most occurence.  */
1366   candidates2 = sbitmap_alloc (length);
1367   while (!cvec.is_empty ())
1368     {
1369       oecount *c = &cvec.last ();
1370       if (c->cnt < 2)
1371 	break;
1372 
1373       /* Now collect the operands in the outer chain that contain
1374          the common operand in their inner chain.  */
1375       bitmap_clear (candidates2);
1376       nr_candidates2 = 0;
1377       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1378 	{
1379 	  gimple oedef;
1380 	  enum tree_code oecode;
1381 	  unsigned j;
1382 	  tree op = (*ops)[i]->op;
1383 
1384 	  /* If we undistributed in this chain already this may be
1385 	     a constant.  */
1386 	  if (TREE_CODE (op) != SSA_NAME)
1387 	    continue;
1388 
1389 	  oedef = SSA_NAME_DEF_STMT (op);
1390 	  oecode = gimple_assign_rhs_code (oedef);
1391 	  if (oecode != c->oecode)
1392 	    continue;
1393 
1394 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1395 	    {
1396 	      if (oe1->op == c->op)
1397 		{
1398 		  bitmap_set_bit (candidates2, i);
1399 		  ++nr_candidates2;
1400 		  break;
1401 		}
1402 	    }
1403 	}
1404 
1405       if (nr_candidates2 >= 2)
1406 	{
1407 	  operand_entry_t oe1, oe2;
1408 	  gimple prod;
1409 	  int first = bitmap_first_set_bit (candidates2);
1410 
1411 	  /* Build the new addition chain.  */
1412 	  oe1 = (*ops)[first];
1413 	  if (dump_file && (dump_flags & TDF_DETAILS))
1414 	    {
1415 	      fprintf (dump_file, "Building (");
1416 	      print_generic_expr (dump_file, oe1->op, 0);
1417 	    }
1418 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1419 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1420 	    {
1421 	      gimple sum;
1422 	      oe2 = (*ops)[i];
1423 	      if (dump_file && (dump_flags & TDF_DETAILS))
1424 		{
1425 		  fprintf (dump_file, " + ");
1426 		  print_generic_expr (dump_file, oe2->op, 0);
1427 		}
1428 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1429 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1430 				       oe1->op, oe2->op, opcode);
1431 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1432 	      oe2->rank = 0;
1433 	      oe1->op = gimple_get_lhs (sum);
1434 	    }
1435 
1436 	  /* Apply the multiplication/division.  */
1437 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1438 				    oe1->op, c->op, c->oecode);
1439 	  if (dump_file && (dump_flags & TDF_DETAILS))
1440 	    {
1441 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1442 	      print_generic_expr (dump_file, c->op, 0);
1443 	      fprintf (dump_file, "\n");
1444 	    }
1445 
1446 	  /* Record it in the addition chain and disable further
1447 	     undistribution with this op.  */
1448 	  oe1->op = gimple_assign_lhs (prod);
1449 	  oe1->rank = get_rank (oe1->op);
1450 	  subops[first].release ();
1451 
1452 	  changed = true;
1453 	}
1454 
1455       cvec.pop ();
1456     }
1457 
1458   for (i = 0; i < ops->length (); ++i)
1459     subops[i].release ();
1460   free (subops);
1461   cvec.release ();
1462   sbitmap_free (candidates);
1463   sbitmap_free (candidates2);
1464 
1465   return changed;
1466 }
1467 
1468 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1469    expression, examine the other OPS to see if any of them are comparisons
1470    of the same values, which we may be able to combine or eliminate.
1471    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1472 
1473 static bool
1474 eliminate_redundant_comparison (enum tree_code opcode,
1475 				vec<operand_entry_t> *ops,
1476 				unsigned int currindex,
1477 				operand_entry_t curr)
1478 {
1479   tree op1, op2;
1480   enum tree_code lcode, rcode;
1481   gimple def1, def2;
1482   int i;
1483   operand_entry_t oe;
1484 
1485   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1486     return false;
1487 
1488   /* Check that CURR is a comparison.  */
1489   if (TREE_CODE (curr->op) != SSA_NAME)
1490     return false;
1491   def1 = SSA_NAME_DEF_STMT (curr->op);
1492   if (!is_gimple_assign (def1))
1493     return false;
1494   lcode = gimple_assign_rhs_code (def1);
1495   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1496     return false;
1497   op1 = gimple_assign_rhs1 (def1);
1498   op2 = gimple_assign_rhs2 (def1);
1499 
1500   /* Now look for a similar comparison in the remaining OPS.  */
1501   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1502     {
1503       tree t;
1504 
1505       if (TREE_CODE (oe->op) != SSA_NAME)
1506 	continue;
1507       def2 = SSA_NAME_DEF_STMT (oe->op);
1508       if (!is_gimple_assign (def2))
1509 	continue;
1510       rcode = gimple_assign_rhs_code (def2);
1511       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1512 	continue;
1513 
1514       /* If we got here, we have a match.  See if we can combine the
1515 	 two comparisons.  */
1516       if (opcode == BIT_IOR_EXPR)
1517 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1518 				       rcode, gimple_assign_rhs1 (def2),
1519 				       gimple_assign_rhs2 (def2));
1520       else
1521 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1522 					rcode, gimple_assign_rhs1 (def2),
1523 					gimple_assign_rhs2 (def2));
1524       if (!t)
1525 	continue;
1526 
1527       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1528 	 always give us a boolean_type_node value back.  If the original
1529 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1530 	 we need to convert.  */
1531       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1532 	t = fold_convert (TREE_TYPE (curr->op), t);
1533 
1534       if (TREE_CODE (t) != INTEGER_CST
1535 	  && !operand_equal_p (t, curr->op, 0))
1536 	{
1537 	  enum tree_code subcode;
1538 	  tree newop1, newop2;
1539 	  if (!COMPARISON_CLASS_P (t))
1540 	    continue;
1541 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1542 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1543 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1544 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1545 	    continue;
1546 	}
1547 
1548       if (dump_file && (dump_flags & TDF_DETAILS))
1549 	{
1550 	  fprintf (dump_file, "Equivalence: ");
1551 	  print_generic_expr (dump_file, curr->op, 0);
1552 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1553 	  print_generic_expr (dump_file, oe->op, 0);
1554 	  fprintf (dump_file, " -> ");
1555 	  print_generic_expr (dump_file, t, 0);
1556 	  fprintf (dump_file, "\n");
1557 	}
1558 
1559       /* Now we can delete oe, as it has been subsumed by the new combined
1560          expression t.  */
1561       ops->ordered_remove (i);
1562       reassociate_stats.ops_eliminated ++;
1563 
1564       /* If t is the same as curr->op, we're done.  Otherwise we must
1565 	 replace curr->op with t.  Special case is if we got a constant
1566 	 back, in which case we add it to the end instead of in place of
1567 	 the current entry.  */
1568       if (TREE_CODE (t) == INTEGER_CST)
1569 	{
1570 	  ops->ordered_remove (currindex);
1571 	  add_to_ops_vec (ops, t);
1572 	}
1573       else if (!operand_equal_p (t, curr->op, 0))
1574 	{
1575 	  gimple sum;
1576 	  enum tree_code subcode;
1577 	  tree newop1;
1578 	  tree newop2;
1579 	  gcc_assert (COMPARISON_CLASS_P (t));
1580 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1581 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1582 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1583 	  gcc_checking_assert (is_gimple_val (newop1)
1584 			       && is_gimple_val (newop2));
1585 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1586 	  curr->op = gimple_get_lhs (sum);
1587 	}
1588       return true;
1589     }
1590 
1591   return false;
1592 }
1593 
1594 /* Perform various identities and other optimizations on the list of
1595    operand entries, stored in OPS.  The tree code for the binary
1596    operation between all the operands is OPCODE.  */
1597 
1598 static void
1599 optimize_ops_list (enum tree_code opcode,
1600 		   vec<operand_entry_t> *ops)
1601 {
1602   unsigned int length = ops->length ();
1603   unsigned int i;
1604   operand_entry_t oe;
1605   operand_entry_t oelast = NULL;
1606   bool iterate = false;
1607 
1608   if (length == 1)
1609     return;
1610 
1611   oelast = ops->last ();
1612 
1613   /* If the last two are constants, pop the constants off, merge them
1614      and try the next two.  */
1615   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1616     {
1617       operand_entry_t oelm1 = (*ops)[length - 2];
1618 
1619       if (oelm1->rank == 0
1620 	  && is_gimple_min_invariant (oelm1->op)
1621 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1622 				       TREE_TYPE (oelast->op)))
1623 	{
1624 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1625 				     oelm1->op, oelast->op);
1626 
1627 	  if (folded && is_gimple_min_invariant (folded))
1628 	    {
1629 	      if (dump_file && (dump_flags & TDF_DETAILS))
1630 		fprintf (dump_file, "Merging constants\n");
1631 
1632 	      ops->pop ();
1633 	      ops->pop ();
1634 
1635 	      add_to_ops_vec (ops, folded);
1636 	      reassociate_stats.constants_eliminated++;
1637 
1638 	      optimize_ops_list (opcode, ops);
1639 	      return;
1640 	    }
1641 	}
1642     }
1643 
1644   eliminate_using_constants (opcode, ops);
1645   oelast = NULL;
1646 
1647   for (i = 0; ops->iterate (i, &oe);)
1648     {
1649       bool done = false;
1650 
1651       if (eliminate_not_pairs (opcode, ops, i, oe))
1652 	return;
1653       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1654 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1655 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1656 	{
1657 	  if (done)
1658 	    return;
1659 	  iterate = true;
1660 	  oelast = NULL;
1661 	  continue;
1662 	}
1663       oelast = oe;
1664       i++;
1665     }
1666 
1667   length = ops->length ();
1668   oelast = ops->last ();
1669 
1670   if (iterate)
1671     optimize_ops_list (opcode, ops);
1672 }
1673 
1674 /* The following functions are subroutines to optimize_range_tests and allow
1675    it to try to change a logical combination of comparisons into a range
1676    test.
1677 
1678    For example, both
1679 	X == 2 || X == 5 || X == 3 || X == 4
1680    and
1681 	X >= 2 && X <= 5
1682    are converted to
1683 	(unsigned) (X - 2) <= 3
1684 
1685    For more information see comments above fold_test_range in fold-const.c,
1686    this implementation is for GIMPLE.  */
1687 
1688 struct range_entry
1689 {
1690   tree exp;
1691   tree low;
1692   tree high;
1693   bool in_p;
1694   bool strict_overflow_p;
1695   unsigned int idx, next;
1696 };
1697 
1698 /* This is similar to make_range in fold-const.c, but on top of
1699    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1700    an SSA_NAME and STMT argument is ignored, otherwise STMT
1701    argument should be a GIMPLE_COND.  */
1702 
1703 static void
1704 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1705 {
1706   int in_p;
1707   tree low, high;
1708   bool is_bool, strict_overflow_p;
1709 
1710   r->exp = NULL_TREE;
1711   r->in_p = false;
1712   r->strict_overflow_p = false;
1713   r->low = NULL_TREE;
1714   r->high = NULL_TREE;
1715   if (exp != NULL_TREE
1716       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1717     return;
1718 
1719   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1720      and see if we can refine the range.  Some of the cases below may not
1721      happen, but it doesn't seem worth worrying about this.  We "continue"
1722      the outer loop when we've changed something; otherwise we "break"
1723      the switch, which will "break" the while.  */
1724   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1725   high = low;
1726   in_p = 0;
1727   strict_overflow_p = false;
1728   is_bool = false;
1729   if (exp == NULL_TREE)
1730     is_bool = true;
1731   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1732     {
1733       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1734 	is_bool = true;
1735       else
1736 	return;
1737     }
1738   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1739     is_bool = true;
1740 
1741   while (1)
1742     {
1743       enum tree_code code;
1744       tree arg0, arg1, exp_type;
1745       tree nexp;
1746       location_t loc;
1747 
1748       if (exp != NULL_TREE)
1749 	{
1750 	  if (TREE_CODE (exp) != SSA_NAME)
1751 	    break;
1752 
1753 	  stmt = SSA_NAME_DEF_STMT (exp);
1754 	  if (!is_gimple_assign (stmt))
1755 	    break;
1756 
1757 	  code = gimple_assign_rhs_code (stmt);
1758 	  arg0 = gimple_assign_rhs1 (stmt);
1759 	  arg1 = gimple_assign_rhs2 (stmt);
1760 	  exp_type = TREE_TYPE (exp);
1761 	}
1762       else
1763 	{
1764 	  code = gimple_cond_code (stmt);
1765 	  arg0 = gimple_cond_lhs (stmt);
1766 	  arg1 = gimple_cond_rhs (stmt);
1767 	  exp_type = boolean_type_node;
1768 	}
1769 
1770       if (TREE_CODE (arg0) != SSA_NAME)
1771 	break;
1772       loc = gimple_location (stmt);
1773       switch (code)
1774 	{
1775 	case BIT_NOT_EXPR:
1776 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1777 	      /* Ensure the range is either +[-,0], +[0,0],
1778 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1779 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
1780 		 or similar expression of unconditional true or
1781 		 false, it should not be negated.  */
1782 	      && ((high && integer_zerop (high))
1783 		  || (low && integer_onep (low))))
1784 	    {
1785 	      in_p = !in_p;
1786 	      exp = arg0;
1787 	      continue;
1788 	    }
1789 	  break;
1790 	case SSA_NAME:
1791 	  exp = arg0;
1792 	  continue;
1793 	CASE_CONVERT:
1794 	  if (is_bool)
1795 	    goto do_default;
1796 	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1797 	    {
1798 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1799 		is_bool = true;
1800 	      else
1801 		return;
1802 	    }
1803 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1804 	    is_bool = true;
1805 	  goto do_default;
1806 	case EQ_EXPR:
1807 	case NE_EXPR:
1808 	case LT_EXPR:
1809 	case LE_EXPR:
1810 	case GE_EXPR:
1811 	case GT_EXPR:
1812 	  is_bool = true;
1813 	  /* FALLTHRU */
1814 	default:
1815 	  if (!is_bool)
1816 	    return;
1817 	do_default:
1818 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1819 				  &low, &high, &in_p,
1820 				  &strict_overflow_p);
1821 	  if (nexp != NULL_TREE)
1822 	    {
1823 	      exp = nexp;
1824 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
1825 	      continue;
1826 	    }
1827 	  break;
1828 	}
1829       break;
1830     }
1831   if (is_bool)
1832     {
1833       r->exp = exp;
1834       r->in_p = in_p;
1835       r->low = low;
1836       r->high = high;
1837       r->strict_overflow_p = strict_overflow_p;
1838     }
1839 }
1840 
1841 /* Comparison function for qsort.  Sort entries
1842    without SSA_NAME exp first, then with SSA_NAMEs sorted
1843    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1844    by increasing ->low and if ->low is the same, by increasing
1845    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1846    maximum.  */
1847 
1848 static int
1849 range_entry_cmp (const void *a, const void *b)
1850 {
1851   const struct range_entry *p = (const struct range_entry *) a;
1852   const struct range_entry *q = (const struct range_entry *) b;
1853 
1854   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1855     {
1856       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1857 	{
1858 	  /* Group range_entries for the same SSA_NAME together.  */
1859 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1860 	    return -1;
1861 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1862 	    return 1;
1863 	  /* If ->low is different, NULL low goes first, then by
1864 	     ascending low.  */
1865 	  if (p->low != NULL_TREE)
1866 	    {
1867 	      if (q->low != NULL_TREE)
1868 		{
1869 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1870 					  p->low, q->low);
1871 		  if (tem && integer_onep (tem))
1872 		    return -1;
1873 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1874 				     p->low, q->low);
1875 		  if (tem && integer_onep (tem))
1876 		    return 1;
1877 		}
1878 	      else
1879 		return 1;
1880 	    }
1881 	  else if (q->low != NULL_TREE)
1882 	    return -1;
1883 	  /* If ->high is different, NULL high goes last, before that by
1884 	     ascending high.  */
1885 	  if (p->high != NULL_TREE)
1886 	    {
1887 	      if (q->high != NULL_TREE)
1888 		{
1889 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1890 					  p->high, q->high);
1891 		  if (tem && integer_onep (tem))
1892 		    return -1;
1893 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1894 				     p->high, q->high);
1895 		  if (tem && integer_onep (tem))
1896 		    return 1;
1897 		}
1898 	      else
1899 		return -1;
1900 	    }
1901 	  else if (q->high != NULL_TREE)
1902 	    return 1;
1903 	  /* If both ranges are the same, sort below by ascending idx.  */
1904 	}
1905       else
1906 	return 1;
1907     }
1908   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1909     return -1;
1910 
1911   if (p->idx < q->idx)
1912     return -1;
1913   else
1914     {
1915       gcc_checking_assert (p->idx > q->idx);
1916       return 1;
1917     }
1918 }
1919 
1920 /* Helper routine of optimize_range_test.
1921    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1922    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1923    OPCODE and OPS are arguments of optimize_range_tests.  Return
1924    true if the range merge has been successful.
1925    If OPCODE is ERROR_MARK, this is called from within
1926    maybe_optimize_range_tests and is performing inter-bb range optimization.
1927    Changes should be then performed right away, and whether an op is
1928    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1929 
1930 static bool
1931 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1932 		   unsigned int count, enum tree_code opcode,
1933 		   vec<operand_entry_t> *ops, tree exp, bool in_p,
1934 		   tree low, tree high, bool strict_overflow_p)
1935 {
1936   operand_entry_t oe = (*ops)[range->idx];
1937   tree op = oe->op;
1938   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1939   location_t loc = gimple_location (stmt);
1940   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1941   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1942   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1943   gimple_stmt_iterator gsi;
1944 
1945   if (tem == NULL_TREE)
1946     return false;
1947 
1948   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1949     warning_at (loc, OPT_Wstrict_overflow,
1950 		"assuming signed overflow does not occur "
1951 		"when simplifying range test");
1952 
1953   if (dump_file && (dump_flags & TDF_DETAILS))
1954     {
1955       struct range_entry *r;
1956       fprintf (dump_file, "Optimizing range tests ");
1957       print_generic_expr (dump_file, range->exp, 0);
1958       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1959       print_generic_expr (dump_file, range->low, 0);
1960       fprintf (dump_file, ", ");
1961       print_generic_expr (dump_file, range->high, 0);
1962       fprintf (dump_file, "]");
1963       for (r = otherrange; r < otherrange + count; r++)
1964 	{
1965 	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1966 	  print_generic_expr (dump_file, r->low, 0);
1967 	  fprintf (dump_file, ", ");
1968 	  print_generic_expr (dump_file, r->high, 0);
1969 	  fprintf (dump_file, "]");
1970 	}
1971       fprintf (dump_file, "\n into ");
1972       print_generic_expr (dump_file, tem, 0);
1973       fprintf (dump_file, "\n");
1974     }
1975 
1976   if (opcode == BIT_IOR_EXPR
1977       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1978     tem = invert_truthvalue_loc (loc, tem);
1979 
1980   tem = fold_convert_loc (loc, optype, tem);
1981   gsi = gsi_for_stmt (stmt);
1982   /* In rare cases range->exp can be equal to lhs of stmt.
1983      In that case we have to insert after the stmt rather then before
1984      it.  */
1985   if (op == range->exp)
1986     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
1987 				    GSI_SAME_STMT);
1988   else
1989     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1990 				    GSI_SAME_STMT);
1991 
1992   /* If doing inter-bb range test optimization, update the
1993      stmts immediately.  Start with changing the first range test
1994      immediate use to the new value (TEM), or, if the first range
1995      test is a GIMPLE_COND stmt, change that condition.  */
1996   if (opcode == ERROR_MARK)
1997     {
1998       if (op)
1999 	{
2000 	  imm_use_iterator iter;
2001 	  use_operand_p use_p;
2002 	  gimple use_stmt;
2003 
2004 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2005 	    {
2006 	      if (is_gimple_debug (use_stmt))
2007 		continue;
2008 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2009 		SET_USE (use_p, tem);
2010 	      update_stmt (use_stmt);
2011 	    }
2012 	}
2013       else
2014 	{
2015 	  gimple_cond_set_code (stmt, NE_EXPR);
2016 	  gimple_cond_set_lhs (stmt, tem);
2017 	  gimple_cond_set_rhs (stmt, boolean_false_node);
2018 	  update_stmt (stmt);
2019 	}
2020     }
2021   oe->op = tem;
2022   range->exp = exp;
2023   range->low = low;
2024   range->high = high;
2025   range->in_p = in_p;
2026   range->strict_overflow_p = false;
2027 
2028   for (range = otherrange; range < otherrange + count; range++)
2029     {
2030       oe = (*ops)[range->idx];
2031       /* Now change all the other range test immediate uses, so that
2032 	 those tests will be optimized away.  */
2033       if (opcode == ERROR_MARK)
2034 	{
2035 	  if (oe->op)
2036 	    {
2037 	      imm_use_iterator iter;
2038 	      use_operand_p use_p;
2039 	      gimple use_stmt;
2040 
2041 	      FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2042 		{
2043 		  if (is_gimple_debug (use_stmt))
2044 		    continue;
2045 		  /* If imm use of _8 is a statement like _7 = _8 | _9;,
2046 		     adjust it into _7 = _9;.  */
2047 		  if (is_gimple_assign (use_stmt)
2048 		      && gimple_assign_rhs_code (use_stmt) == oe->rank)
2049 		    {
2050 		      tree expr = NULL_TREE;
2051 		      if (oe->op == gimple_assign_rhs1 (use_stmt))
2052 			expr = gimple_assign_rhs2 (use_stmt);
2053 		      else if (oe->op == gimple_assign_rhs2 (use_stmt))
2054 			expr = gimple_assign_rhs1 (use_stmt);
2055 		      if (expr
2056 			  && expr != oe->op
2057 			  && TREE_CODE (expr) == SSA_NAME)
2058 			{
2059 			  gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2060 			  gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2061 							  expr, NULL_TREE);
2062 			  update_stmt (use_stmt);
2063 			  continue;
2064 			}
2065 		    }
2066 		  /* If imm use of _8 is a statement like _7 = (int) _8;,
2067 		     adjust it into _7 = 0; or _7 = 1;.  */
2068 		  if (gimple_assign_cast_p (use_stmt)
2069 		      && oe->op == gimple_assign_rhs1 (use_stmt))
2070 		    {
2071 		      tree lhs = gimple_assign_lhs (use_stmt);
2072 		      if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2073 			{
2074 			  gimple_stmt_iterator gsi2
2075 			    = gsi_for_stmt (use_stmt);
2076 			  tree expr = build_int_cst (TREE_TYPE (lhs),
2077 						     oe->rank == BIT_IOR_EXPR
2078 						     ? 0 : 1);
2079 			  gimple_assign_set_rhs_with_ops (&gsi2,
2080 							  INTEGER_CST,
2081 							  expr, NULL_TREE);
2082 			  update_stmt (use_stmt);
2083 			  continue;
2084 			}
2085 		    }
2086 		  /* Otherwise replace the use with 0 or 1.  */
2087 		  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2088 		    SET_USE (use_p,
2089 			     build_int_cst (TREE_TYPE (oe->op),
2090 					    oe->rank == BIT_IOR_EXPR
2091 					    ? 0 : 1));
2092 		  update_stmt (use_stmt);
2093 		}
2094 	    }
2095 	  else
2096 	    {
2097 	      /* If range test was a GIMPLE_COND, simply change it
2098 		 into an always false or always true condition.  */
2099 	      stmt = last_stmt (BASIC_BLOCK (oe->id));
2100 	      if (oe->rank == BIT_IOR_EXPR)
2101 		gimple_cond_make_false (stmt);
2102 	      else
2103 		gimple_cond_make_true (stmt);
2104 	      update_stmt (stmt);
2105 	    }
2106 	}
2107       oe->op = error_mark_node;
2108       range->exp = NULL_TREE;
2109     }
2110   return true;
2111 }
2112 
2113 /* Optimize range tests, similarly how fold_range_test optimizes
2114    it on trees.  The tree code for the binary
2115    operation between all the operands is OPCODE.
2116    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2117    maybe_optimize_range_tests for inter-bb range optimization.
2118    In that case if oe->op is NULL, oe->id is bb->index whose
2119    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2120    the actual opcode.  */
2121 
2122 static void
2123 optimize_range_tests (enum tree_code opcode,
2124 		      vec<operand_entry_t> *ops)
2125 {
2126   unsigned int length = ops->length (), i, j, first;
2127   operand_entry_t oe;
2128   struct range_entry *ranges;
2129   bool any_changes = false;
2130 
2131   if (length == 1)
2132     return;
2133 
2134   ranges = XNEWVEC (struct range_entry, length);
2135   for (i = 0; i < length; i++)
2136     {
2137       oe = (*ops)[i];
2138       ranges[i].idx = i;
2139       init_range_entry (ranges + i, oe->op,
2140 			oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2141       /* For | invert it now, we will invert it again before emitting
2142 	 the optimized expression.  */
2143       if (opcode == BIT_IOR_EXPR
2144 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2145 	ranges[i].in_p = !ranges[i].in_p;
2146     }
2147 
2148   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2149   for (i = 0; i < length; i++)
2150     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2151       break;
2152 
2153   /* Try to merge ranges.  */
2154   for (first = i; i < length; i++)
2155     {
2156       tree low = ranges[i].low;
2157       tree high = ranges[i].high;
2158       int in_p = ranges[i].in_p;
2159       bool strict_overflow_p = ranges[i].strict_overflow_p;
2160       int update_fail_count = 0;
2161 
2162       for (j = i + 1; j < length; j++)
2163 	{
2164 	  if (ranges[i].exp != ranges[j].exp)
2165 	    break;
2166 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2167 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
2168 	    break;
2169 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2170 	}
2171 
2172       if (j == i + 1)
2173 	continue;
2174 
2175       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2176 			     ops, ranges[i].exp, in_p, low, high,
2177 			     strict_overflow_p))
2178 	{
2179 	  i = j - 1;
2180 	  any_changes = true;
2181 	}
2182       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2183 	 while update_range_test would fail.  */
2184       else if (update_fail_count == 64)
2185 	i = j - 1;
2186       else
2187 	++update_fail_count;
2188     }
2189 
2190   /* Optimize X == CST1 || X == CST2
2191      if popcount (CST1 ^ CST2) == 1 into
2192      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2193      Similarly for ranges.  E.g.
2194      X != 2 && X != 3 && X != 10 && X != 11
2195      will be transformed by the above loop into
2196      (X - 2U) <= 1U && (X - 10U) <= 1U
2197      and this loop can transform that into
2198      ((X & ~8) - 2U) <= 1U.  */
2199   for (i = first; i < length; i++)
2200     {
2201       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2202 
2203       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2204 	continue;
2205       type = TREE_TYPE (ranges[i].exp);
2206       if (!INTEGRAL_TYPE_P (type))
2207 	continue;
2208       lowi = ranges[i].low;
2209       if (lowi == NULL_TREE)
2210 	lowi = TYPE_MIN_VALUE (type);
2211       highi = ranges[i].high;
2212       if (highi == NULL_TREE)
2213 	continue;
2214       for (j = i + 1; j < length && j < i + 64; j++)
2215 	{
2216 	  if (ranges[j].exp == NULL_TREE)
2217 	    continue;
2218 	  if (ranges[i].exp != ranges[j].exp)
2219 	    break;
2220 	  if (ranges[j].in_p)
2221 	    continue;
2222 	  lowj = ranges[j].low;
2223 	  if (lowj == NULL_TREE)
2224 	    continue;
2225 	  highj = ranges[j].high;
2226 	  if (highj == NULL_TREE)
2227 	    highj = TYPE_MAX_VALUE (type);
2228 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2229 			     lowj, highi);
2230 	  if (tem == NULL_TREE || !integer_onep (tem))
2231 	    continue;
2232 	  lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2233 	  if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2234 	    continue;
2235 	  gcc_checking_assert (!integer_zerop (lowxor));
2236 	  tem = fold_binary (MINUS_EXPR, type, lowxor,
2237 			     build_int_cst (type, 1));
2238 	  if (tem == NULL_TREE)
2239 	    continue;
2240 	  tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2241 	  if (tem == NULL_TREE || !integer_zerop (tem))
2242 	    continue;
2243 	  highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2244 	  if (!tree_int_cst_equal (lowxor, highxor))
2245 	    continue;
2246 	  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2247 	  exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2248 	  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2249 	  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2250 	  if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2251 				 ranges[i].in_p, lowj, highj,
2252 				 ranges[i].strict_overflow_p
2253 				 || ranges[j].strict_overflow_p))
2254 	    {
2255 	      any_changes = true;
2256 	      break;
2257 	    }
2258 	}
2259     }
2260 
2261   if (any_changes && opcode != ERROR_MARK)
2262     {
2263       j = 0;
2264       FOR_EACH_VEC_ELT (*ops, i, oe)
2265 	{
2266 	  if (oe->op == error_mark_node)
2267 	    continue;
2268 	  else if (i != j)
2269 	    (*ops)[j] = oe;
2270 	  j++;
2271 	}
2272       ops->truncate (j);
2273     }
2274 
2275   XDELETEVEC (ranges);
2276 }
2277 
2278 /* Return true if STMT is a cast like:
2279    <bb N>:
2280    ...
2281    _123 = (int) _234;
2282 
2283    <bb M>:
2284    # _345 = PHI <_123(N), 1(...), 1(...)>
2285    where _234 has bool type, _123 has single use and
2286    bb N has a single successor M.  This is commonly used in
2287    the last block of a range test.  */
2288 
2289 static bool
2290 final_range_test_p (gimple stmt)
2291 {
2292   basic_block bb, rhs_bb;
2293   edge e;
2294   tree lhs, rhs;
2295   use_operand_p use_p;
2296   gimple use_stmt;
2297 
2298   if (!gimple_assign_cast_p (stmt))
2299     return false;
2300   bb = gimple_bb (stmt);
2301   if (!single_succ_p (bb))
2302     return false;
2303   e = single_succ_edge (bb);
2304   if (e->flags & EDGE_COMPLEX)
2305     return false;
2306 
2307   lhs = gimple_assign_lhs (stmt);
2308   rhs = gimple_assign_rhs1 (stmt);
2309   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2310       || TREE_CODE (rhs) != SSA_NAME
2311       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2312     return false;
2313 
2314   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2315   if (!single_imm_use (lhs, &use_p, &use_stmt))
2316     return false;
2317 
2318   if (gimple_code (use_stmt) != GIMPLE_PHI
2319       || gimple_bb (use_stmt) != e->dest)
2320     return false;
2321 
2322   /* And that the rhs is defined in the same loop.  */
2323   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2324   if (rhs_bb == NULL
2325       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2326     return false;
2327 
2328   return true;
2329 }
2330 
2331 /* Return true if BB is suitable basic block for inter-bb range test
2332    optimization.  If BACKWARD is true, BB should be the only predecessor
2333    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2334    or compared with to find a common basic block to which all conditions
2335    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2336    be the only predecessor of BB.  */
2337 
2338 static bool
2339 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2340 		  bool backward)
2341 {
2342   edge_iterator ei, ei2;
2343   edge e, e2;
2344   gimple stmt;
2345   gimple_stmt_iterator gsi;
2346   bool other_edge_seen = false;
2347   bool is_cond;
2348 
2349   if (test_bb == bb)
2350     return false;
2351   /* Check last stmt first.  */
2352   stmt = last_stmt (bb);
2353   if (stmt == NULL
2354       || (gimple_code (stmt) != GIMPLE_COND
2355 	  && (backward || !final_range_test_p (stmt)))
2356       || gimple_visited_p (stmt)
2357       || stmt_could_throw_p (stmt)
2358       || *other_bb == bb)
2359     return false;
2360   is_cond = gimple_code (stmt) == GIMPLE_COND;
2361   if (is_cond)
2362     {
2363       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2364 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2365 	 to *OTHER_BB (if not set yet, try to find it out).  */
2366       if (EDGE_COUNT (bb->succs) != 2)
2367 	return false;
2368       FOR_EACH_EDGE (e, ei, bb->succs)
2369 	{
2370 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2371 	    return false;
2372 	  if (e->dest == test_bb)
2373 	    {
2374 	      if (backward)
2375 		continue;
2376 	      else
2377 		return false;
2378 	    }
2379 	  if (e->dest == bb)
2380 	    return false;
2381 	  if (*other_bb == NULL)
2382 	    {
2383 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2384 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2385 		  return false;
2386 		else if (e->dest == e2->dest)
2387 		  *other_bb = e->dest;
2388 	      if (*other_bb == NULL)
2389 		return false;
2390 	    }
2391 	  if (e->dest == *other_bb)
2392 	    other_edge_seen = true;
2393 	  else if (backward)
2394 	    return false;
2395 	}
2396       if (*other_bb == NULL || !other_edge_seen)
2397 	return false;
2398     }
2399   else if (single_succ (bb) != *other_bb)
2400     return false;
2401 
2402   /* Now check all PHIs of *OTHER_BB.  */
2403   e = find_edge (bb, *other_bb);
2404   e2 = find_edge (test_bb, *other_bb);
2405   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2406     {
2407       gimple phi = gsi_stmt (gsi);
2408       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2409 	 corresponding to BB and TEST_BB predecessor must be the same.  */
2410       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2411 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
2412 	{
2413 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2414 	     one of the PHIs should have the lhs of the last stmt in
2415 	     that block as PHI arg and that PHI should have 0 or 1
2416 	     corresponding to it in all other range test basic blocks
2417 	     considered.  */
2418 	  if (!is_cond)
2419 	    {
2420 	      if (gimple_phi_arg_def (phi, e->dest_idx)
2421 		  == gimple_assign_lhs (stmt)
2422 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2423 		      || integer_onep (gimple_phi_arg_def (phi,
2424 							   e2->dest_idx))))
2425 		continue;
2426 	    }
2427 	  else
2428 	    {
2429 	      gimple test_last = last_stmt (test_bb);
2430 	      if (gimple_code (test_last) != GIMPLE_COND
2431 		  && gimple_phi_arg_def (phi, e2->dest_idx)
2432 		     == gimple_assign_lhs (test_last)
2433 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2434 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2435 		continue;
2436 	    }
2437 
2438 	  return false;
2439 	}
2440     }
2441   return true;
2442 }
2443 
2444 /* Return true if BB doesn't have side-effects that would disallow
2445    range test optimization, all SSA_NAMEs set in the bb are consumed
2446    in the bb and there are no PHIs.  */
2447 
2448 static bool
2449 no_side_effect_bb (basic_block bb)
2450 {
2451   gimple_stmt_iterator gsi;
2452   gimple last;
2453 
2454   if (!gimple_seq_empty_p (phi_nodes (bb)))
2455     return false;
2456   last = last_stmt (bb);
2457   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2458     {
2459       gimple stmt = gsi_stmt (gsi);
2460       tree lhs;
2461       imm_use_iterator imm_iter;
2462       use_operand_p use_p;
2463 
2464       if (is_gimple_debug (stmt))
2465 	continue;
2466       if (gimple_has_side_effects (stmt))
2467 	return false;
2468       if (stmt == last)
2469 	return true;
2470       if (!is_gimple_assign (stmt))
2471 	return false;
2472       lhs = gimple_assign_lhs (stmt);
2473       if (TREE_CODE (lhs) != SSA_NAME)
2474 	return false;
2475       if (gimple_assign_rhs_could_trap_p (stmt))
2476 	return false;
2477       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2478 	{
2479 	  gimple use_stmt = USE_STMT (use_p);
2480 	  if (is_gimple_debug (use_stmt))
2481 	    continue;
2482 	  if (gimple_bb (use_stmt) != bb)
2483 	    return false;
2484 	}
2485     }
2486   return false;
2487 }
2488 
2489 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2490    return true and fill in *OPS recursively.  */
2491 
2492 static bool
2493 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2494 	 struct loop *loop)
2495 {
2496   gimple stmt = SSA_NAME_DEF_STMT (var);
2497   tree rhs[2];
2498   int i;
2499 
2500   if (!is_reassociable_op (stmt, code, loop))
2501     return false;
2502 
2503   rhs[0] = gimple_assign_rhs1 (stmt);
2504   rhs[1] = gimple_assign_rhs2 (stmt);
2505   gimple_set_visited (stmt, true);
2506   for (i = 0; i < 2; i++)
2507     if (TREE_CODE (rhs[i]) == SSA_NAME
2508 	&& !get_ops (rhs[i], code, ops, loop)
2509 	&& has_single_use (rhs[i]))
2510       {
2511 	operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2512 
2513 	oe->op = rhs[i];
2514 	oe->rank = code;
2515 	oe->id = 0;
2516 	oe->count = 1;
2517 	ops->safe_push (oe);
2518       }
2519   return true;
2520 }
2521 
2522 /* Inter-bb range test optimization.  */
2523 
2524 static void
2525 maybe_optimize_range_tests (gimple stmt)
2526 {
2527   basic_block first_bb = gimple_bb (stmt);
2528   basic_block last_bb = first_bb;
2529   basic_block other_bb = NULL;
2530   basic_block bb;
2531   edge_iterator ei;
2532   edge e;
2533   vec<operand_entry_t> ops = vNULL;
2534 
2535   /* Consider only basic blocks that end with GIMPLE_COND or
2536      a cast statement satisfying final_range_test_p.  All
2537      but the last bb in the first_bb .. last_bb range
2538      should end with GIMPLE_COND.  */
2539   if (gimple_code (stmt) == GIMPLE_COND)
2540     {
2541       if (EDGE_COUNT (first_bb->succs) != 2)
2542 	return;
2543     }
2544   else if (final_range_test_p (stmt))
2545     other_bb = single_succ (first_bb);
2546   else
2547     return;
2548 
2549   if (stmt_could_throw_p (stmt))
2550     return;
2551 
2552   /* As relative ordering of post-dominator sons isn't fixed,
2553      maybe_optimize_range_tests can be called first on any
2554      bb in the range we want to optimize.  So, start searching
2555      backwards, if first_bb can be set to a predecessor.  */
2556   while (single_pred_p (first_bb))
2557     {
2558       basic_block pred_bb = single_pred (first_bb);
2559       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2560 	break;
2561       if (!no_side_effect_bb (first_bb))
2562 	break;
2563       first_bb = pred_bb;
2564     }
2565   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2566      Before starting forward search in last_bb successors, find
2567      out the other_bb.  */
2568   if (first_bb == last_bb)
2569     {
2570       other_bb = NULL;
2571       /* As non-GIMPLE_COND last stmt always terminates the range,
2572 	 if forward search didn't discover anything, just give up.  */
2573       if (gimple_code (stmt) != GIMPLE_COND)
2574 	return;
2575       /* Look at both successors.  Either it ends with a GIMPLE_COND
2576 	 and satisfies suitable_cond_bb, or ends with a cast and
2577 	 other_bb is that cast's successor.  */
2578       FOR_EACH_EDGE (e, ei, first_bb->succs)
2579 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2580 	    || e->dest == first_bb)
2581 	  return;
2582 	else if (single_pred_p (e->dest))
2583 	  {
2584 	    stmt = last_stmt (e->dest);
2585 	    if (stmt
2586 		&& gimple_code (stmt) == GIMPLE_COND
2587 		&& EDGE_COUNT (e->dest->succs) == 2)
2588 	      {
2589 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2590 		  break;
2591 		else
2592 		  other_bb = NULL;
2593 	      }
2594 	    else if (stmt
2595 		     && final_range_test_p (stmt)
2596 		     && find_edge (first_bb, single_succ (e->dest)))
2597 	      {
2598 		other_bb = single_succ (e->dest);
2599 		if (other_bb == first_bb)
2600 		  other_bb = NULL;
2601 	      }
2602 	  }
2603       if (other_bb == NULL)
2604 	return;
2605     }
2606   /* Now do the forward search, moving last_bb to successor bbs
2607      that aren't other_bb.  */
2608   while (EDGE_COUNT (last_bb->succs) == 2)
2609     {
2610       FOR_EACH_EDGE (e, ei, last_bb->succs)
2611 	if (e->dest != other_bb)
2612 	  break;
2613       if (e == NULL)
2614 	break;
2615       if (!single_pred_p (e->dest))
2616 	break;
2617       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2618 	break;
2619       if (!no_side_effect_bb (e->dest))
2620 	break;
2621       last_bb = e->dest;
2622     }
2623   if (first_bb == last_bb)
2624     return;
2625   /* Here basic blocks first_bb through last_bb's predecessor
2626      end with GIMPLE_COND, all of them have one of the edges to
2627      other_bb and another to another block in the range,
2628      all blocks except first_bb don't have side-effects and
2629      last_bb ends with either GIMPLE_COND, or cast satisfying
2630      final_range_test_p.  */
2631   for (bb = last_bb; ; bb = single_pred (bb))
2632     {
2633       enum tree_code code;
2634       tree lhs, rhs;
2635 
2636       e = find_edge (bb, other_bb);
2637       stmt = last_stmt (bb);
2638       gimple_set_visited (stmt, true);
2639       if (gimple_code (stmt) != GIMPLE_COND)
2640 	{
2641 	  use_operand_p use_p;
2642 	  gimple phi;
2643 	  edge e2;
2644 	  unsigned int d;
2645 
2646 	  lhs = gimple_assign_lhs (stmt);
2647 	  rhs = gimple_assign_rhs1 (stmt);
2648 	  gcc_assert (bb == last_bb);
2649 
2650 	  /* stmt is
2651 	     _123 = (int) _234;
2652 
2653 	     followed by:
2654 	     <bb M>:
2655 	     # _345 = PHI <_123(N), 1(...), 1(...)>
2656 
2657 	     or 0 instead of 1.  If it is 0, the _234
2658 	     range test is anded together with all the
2659 	     other range tests, if it is 1, it is ored with
2660 	     them.  */
2661 	  single_imm_use (lhs, &use_p, &phi);
2662 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2663 	  e2 = find_edge (first_bb, other_bb);
2664 	  d = e2->dest_idx;
2665 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2666 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
2667 	    code = BIT_AND_EXPR;
2668 	  else
2669 	    {
2670 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2671 	      code = BIT_IOR_EXPR;
2672 	    }
2673 
2674 	  /* If _234 SSA_NAME_DEF_STMT is
2675 	     _234 = _567 | _789;
2676 	     (or &, corresponding to 1/0 in the phi arguments,
2677 	     push into ops the individual range test arguments
2678 	     of the bitwise or resp. and, recursively.  */
2679 	  if (!get_ops (rhs, code, &ops,
2680 			loop_containing_stmt (stmt))
2681 	      && has_single_use (rhs))
2682 	    {
2683 	      /* Otherwise, push the _234 range test itself.  */
2684 	      operand_entry_t oe
2685 		= (operand_entry_t) pool_alloc (operand_entry_pool);
2686 
2687 	      oe->op = rhs;
2688 	      oe->rank = code;
2689 	      oe->id = 0;
2690 	      oe->count = 1;
2691 	      ops.safe_push (oe);
2692 	    }
2693 	  continue;
2694 	}
2695       /* Otherwise stmt is GIMPLE_COND.  */
2696       code = gimple_cond_code (stmt);
2697       lhs = gimple_cond_lhs (stmt);
2698       rhs = gimple_cond_rhs (stmt);
2699       if (TREE_CODE (lhs) == SSA_NAME
2700 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2701 	  && ((code != EQ_EXPR && code != NE_EXPR)
2702 	      || rhs != boolean_false_node
2703 		 /* Either push into ops the individual bitwise
2704 		    or resp. and operands, depending on which
2705 		    edge is other_bb.  */
2706 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2707 				 ^ (code == EQ_EXPR))
2708 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2709 			   loop_containing_stmt (stmt))))
2710 	{
2711 	  /* Or push the GIMPLE_COND stmt itself.  */
2712 	  operand_entry_t oe
2713 	    = (operand_entry_t) pool_alloc (operand_entry_pool);
2714 
2715 	  oe->op = NULL;
2716 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
2717 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
2718 	  /* oe->op = NULL signs that there is no SSA_NAME
2719 	     for the range test, and oe->id instead is the
2720 	     basic block number, at which's end the GIMPLE_COND
2721 	     is.  */
2722 	  oe->id = bb->index;
2723 	  oe->count = 1;
2724 	  ops.safe_push (oe);
2725 	}
2726       if (bb == first_bb)
2727 	break;
2728     }
2729   if (ops.length () > 1)
2730     optimize_range_tests (ERROR_MARK, &ops);
2731   ops.release ();
2732 }
2733 
2734 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2735    of STMT in it's operands.  This is also known as a "destructive
2736    update" operation.  */
2737 
2738 static bool
2739 is_phi_for_stmt (gimple stmt, tree operand)
2740 {
2741   gimple def_stmt;
2742   tree lhs;
2743   use_operand_p arg_p;
2744   ssa_op_iter i;
2745 
2746   if (TREE_CODE (operand) != SSA_NAME)
2747     return false;
2748 
2749   lhs = gimple_assign_lhs (stmt);
2750 
2751   def_stmt = SSA_NAME_DEF_STMT (operand);
2752   if (gimple_code (def_stmt) != GIMPLE_PHI)
2753     return false;
2754 
2755   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2756     if (lhs == USE_FROM_PTR (arg_p))
2757       return true;
2758   return false;
2759 }
2760 
2761 /* Remove def stmt of VAR if VAR has zero uses and recurse
2762    on rhs1 operand if so.  */
2763 
2764 static void
2765 remove_visited_stmt_chain (tree var)
2766 {
2767   gimple stmt;
2768   gimple_stmt_iterator gsi;
2769 
2770   while (1)
2771     {
2772       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2773 	return;
2774       stmt = SSA_NAME_DEF_STMT (var);
2775       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2776 	{
2777 	  var = gimple_assign_rhs1 (stmt);
2778 	  gsi = gsi_for_stmt (stmt);
2779 	  gsi_remove (&gsi, true);
2780 	  release_defs (stmt);
2781 	}
2782       else
2783 	return;
2784     }
2785 }
2786 
2787 /* This function checks three consequtive operands in
2788    passed operands vector OPS starting from OPINDEX and
2789    swaps two operands if it is profitable for binary operation
2790    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2791 
2792    We pair ops with the same rank if possible.
2793 
2794    The alternative we try is to see if STMT is a destructive
2795    update style statement, which is like:
2796    b = phi (a, ...)
2797    a = c + b;
2798    In that case, we want to use the destructive update form to
2799    expose the possible vectorizer sum reduction opportunity.
2800    In that case, the third operand will be the phi node. This
2801    check is not performed if STMT is null.
2802 
2803    We could, of course, try to be better as noted above, and do a
2804    lot of work to try to find these opportunities in >3 operand
2805    cases, but it is unlikely to be worth it.  */
2806 
2807 static void
2808 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2809 			  unsigned int opindex, gimple stmt)
2810 {
2811   operand_entry_t oe1, oe2, oe3;
2812 
2813   oe1 = ops[opindex];
2814   oe2 = ops[opindex + 1];
2815   oe3 = ops[opindex + 2];
2816 
2817   if ((oe1->rank == oe2->rank
2818        && oe2->rank != oe3->rank)
2819       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2820 	  && !is_phi_for_stmt (stmt, oe1->op)
2821 	  && !is_phi_for_stmt (stmt, oe2->op)))
2822     {
2823       struct operand_entry temp = *oe3;
2824       oe3->op = oe1->op;
2825       oe3->rank = oe1->rank;
2826       oe1->op = temp.op;
2827       oe1->rank= temp.rank;
2828     }
2829   else if ((oe1->rank == oe3->rank
2830 	    && oe2->rank != oe3->rank)
2831 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
2832 	       && !is_phi_for_stmt (stmt, oe1->op)
2833 	       && !is_phi_for_stmt (stmt, oe3->op)))
2834     {
2835       struct operand_entry temp = *oe2;
2836       oe2->op = oe1->op;
2837       oe2->rank = oe1->rank;
2838       oe1->op = temp.op;
2839       oe1->rank= temp.rank;
2840     }
2841 }
2842 
2843 /* Recursively rewrite our linearized statements so that the operators
2844    match those in OPS[OPINDEX], putting the computation in rank
2845    order.  */
2846 
2847 static void
2848 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2849 		   vec<operand_entry_t> ops, bool moved)
2850 {
2851   tree rhs1 = gimple_assign_rhs1 (stmt);
2852   tree rhs2 = gimple_assign_rhs2 (stmt);
2853   operand_entry_t oe;
2854 
2855   /* If we have three operands left, then we want to make sure the ones
2856      that get the double binary op are chosen wisely.  */
2857   if (opindex + 3 == ops.length ())
2858     swap_ops_for_binary_stmt (ops, opindex, stmt);
2859 
2860   /* The final recursion case for this function is that you have
2861      exactly two operations left.
2862      If we had one exactly one op in the entire list to start with, we
2863      would have never called this function, and the tail recursion
2864      rewrites them one at a time.  */
2865   if (opindex + 2 == ops.length ())
2866     {
2867       operand_entry_t oe1, oe2;
2868 
2869       oe1 = ops[opindex];
2870       oe2 = ops[opindex + 1];
2871 
2872       if (rhs1 != oe1->op || rhs2 != oe2->op)
2873 	{
2874 	  if (dump_file && (dump_flags & TDF_DETAILS))
2875 	    {
2876 	      fprintf (dump_file, "Transforming ");
2877 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2878 	    }
2879 
2880 	  gimple_assign_set_rhs1 (stmt, oe1->op);
2881 	  gimple_assign_set_rhs2 (stmt, oe2->op);
2882 	  update_stmt (stmt);
2883 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
2884 	    remove_visited_stmt_chain (rhs1);
2885 
2886 	  if (dump_file && (dump_flags & TDF_DETAILS))
2887 	    {
2888 	      fprintf (dump_file, " into ");
2889 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2890 	    }
2891 	}
2892       return;
2893     }
2894 
2895   /* If we hit here, we should have 3 or more ops left.  */
2896   gcc_assert (opindex + 2 < ops.length ());
2897 
2898   /* Rewrite the next operator.  */
2899   oe = ops[opindex];
2900 
2901   if (oe->op != rhs2)
2902     {
2903       if (!moved)
2904 	{
2905 	  gimple_stmt_iterator gsinow, gsirhs1;
2906 	  gimple stmt1 = stmt, stmt2;
2907 	  unsigned int count;
2908 
2909 	  gsinow = gsi_for_stmt (stmt);
2910 	  count = ops.length () - opindex - 2;
2911 	  while (count-- != 0)
2912 	    {
2913 	      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2914 	      gsirhs1 = gsi_for_stmt (stmt2);
2915 	      gsi_move_before (&gsirhs1, &gsinow);
2916 	      gsi_prev (&gsinow);
2917 	      stmt1 = stmt2;
2918 	    }
2919 	  moved = true;
2920 	}
2921 
2922       if (dump_file && (dump_flags & TDF_DETAILS))
2923 	{
2924 	  fprintf (dump_file, "Transforming ");
2925 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2926 	}
2927 
2928       gimple_assign_set_rhs2 (stmt, oe->op);
2929       update_stmt (stmt);
2930 
2931       if (dump_file && (dump_flags & TDF_DETAILS))
2932 	{
2933 	  fprintf (dump_file, " into ");
2934 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2935 	}
2936     }
2937   /* Recurse on the LHS of the binary operator, which is guaranteed to
2938      be the non-leaf side.  */
2939   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2940 }
2941 
2942 /* Find out how many cycles we need to compute statements chain.
2943    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2944    maximum number of independent statements we may execute per cycle.  */
2945 
2946 static int
2947 get_required_cycles (int ops_num, int cpu_width)
2948 {
2949   int res;
2950   int elog;
2951   unsigned int rest;
2952 
2953   /* While we have more than 2 * cpu_width operands
2954      we may reduce number of operands by cpu_width
2955      per cycle.  */
2956   res = ops_num / (2 * cpu_width);
2957 
2958   /* Remained operands count may be reduced twice per cycle
2959      until we have only one operand.  */
2960   rest = (unsigned)(ops_num - res * cpu_width);
2961   elog = exact_log2 (rest);
2962   if (elog >= 0)
2963     res += elog;
2964   else
2965     res += floor_log2 (rest) + 1;
2966 
2967   return res;
2968 }
2969 
2970 /* Returns an optimal number of registers to use for computation of
2971    given statements.  */
2972 
2973 static int
2974 get_reassociation_width (int ops_num, enum tree_code opc,
2975 			 enum machine_mode mode)
2976 {
2977   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2978   int width;
2979   int width_min;
2980   int cycles_best;
2981 
2982   if (param_width > 0)
2983     width = param_width;
2984   else
2985     width = targetm.sched.reassociation_width (opc, mode);
2986 
2987   if (width == 1)
2988     return width;
2989 
2990   /* Get the minimal time required for sequence computation.  */
2991   cycles_best = get_required_cycles (ops_num, width);
2992 
2993   /* Check if we may use less width and still compute sequence for
2994      the same time.  It will allow us to reduce registers usage.
2995      get_required_cycles is monotonically increasing with lower width
2996      so we can perform a binary search for the minimal width that still
2997      results in the optimal cycle count.  */
2998   width_min = 1;
2999   while (width > width_min)
3000     {
3001       int width_mid = (width + width_min) / 2;
3002 
3003       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3004 	width = width_mid;
3005       else if (width_min < width_mid)
3006 	width_min = width_mid;
3007       else
3008 	break;
3009     }
3010 
3011   return width;
3012 }
3013 
3014 /* Recursively rewrite our linearized statements so that the operators
3015    match those in OPS[OPINDEX], putting the computation in rank
3016    order and trying to allow operations to be executed in
3017    parallel.  */
3018 
3019 static void
3020 rewrite_expr_tree_parallel (gimple stmt, int width,
3021 			    vec<operand_entry_t>  ops)
3022 {
3023   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3024   int op_num = ops.length ();
3025   int stmt_num = op_num - 1;
3026   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3027   int op_index = op_num - 1;
3028   int stmt_index = 0;
3029   int ready_stmts_end = 0;
3030   int i = 0;
3031   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3032 
3033   /* We start expression rewriting from the top statements.
3034      So, in this loop we create a full list of statements
3035      we will work with.  */
3036   stmts[stmt_num - 1] = stmt;
3037   for (i = stmt_num - 2; i >= 0; i--)
3038     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3039 
3040   for (i = 0; i < stmt_num; i++)
3041     {
3042       tree op1, op2;
3043 
3044       /* Determine whether we should use results of
3045 	 already handled statements or not.  */
3046       if (ready_stmts_end == 0
3047 	  && (i - stmt_index >= width || op_index < 1))
3048 	ready_stmts_end = i;
3049 
3050       /* Now we choose operands for the next statement.  Non zero
3051 	 value in ready_stmts_end means here that we should use
3052 	 the result of already generated statements as new operand.  */
3053       if (ready_stmts_end > 0)
3054 	{
3055 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
3056 	  if (ready_stmts_end > stmt_index)
3057 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
3058 	  else if (op_index >= 0)
3059 	    op2 = ops[op_index--]->op;
3060 	  else
3061 	    {
3062 	      gcc_assert (stmt_index < i);
3063 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
3064 	    }
3065 
3066 	  if (stmt_index >= ready_stmts_end)
3067 	    ready_stmts_end = 0;
3068 	}
3069       else
3070 	{
3071 	  if (op_index > 1)
3072 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3073 	  op2 = ops[op_index--]->op;
3074 	  op1 = ops[op_index--]->op;
3075 	}
3076 
3077       /* If we emit the last statement then we should put
3078 	 operands into the last statement.  It will also
3079 	 break the loop.  */
3080       if (op_index < 0 && stmt_index == i)
3081 	i = stmt_num - 1;
3082 
3083       if (dump_file && (dump_flags & TDF_DETAILS))
3084 	{
3085 	  fprintf (dump_file, "Transforming ");
3086 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3087 	}
3088 
3089       /* We keep original statement only for the last one.  All
3090 	 others are recreated.  */
3091       if (i == stmt_num - 1)
3092 	{
3093 	  gimple_assign_set_rhs1 (stmts[i], op1);
3094 	  gimple_assign_set_rhs2 (stmts[i], op2);
3095 	  update_stmt (stmts[i]);
3096 	}
3097       else
3098 	stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3099 
3100       if (dump_file && (dump_flags & TDF_DETAILS))
3101 	{
3102 	  fprintf (dump_file, " into ");
3103 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3104 	}
3105     }
3106 
3107   remove_visited_stmt_chain (last_rhs1);
3108 }
3109 
3110 /* Transform STMT, which is really (A +B) + (C + D) into the left
3111    linear form, ((A+B)+C)+D.
3112    Recurse on D if necessary.  */
3113 
3114 static void
3115 linearize_expr (gimple stmt)
3116 {
3117   gimple_stmt_iterator gsinow, gsirhs;
3118   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3119   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3120   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3121   gimple newbinrhs = NULL;
3122   struct loop *loop = loop_containing_stmt (stmt);
3123 
3124   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3125 	      && is_reassociable_op (binrhs, rhscode, loop));
3126 
3127   gsinow = gsi_for_stmt (stmt);
3128   gsirhs = gsi_for_stmt (binrhs);
3129   gsi_move_before (&gsirhs, &gsinow);
3130 
3131   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3132   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3133   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3134 
3135   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3136     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3137 
3138   if (dump_file && (dump_flags & TDF_DETAILS))
3139     {
3140       fprintf (dump_file, "Linearized: ");
3141       print_gimple_stmt (dump_file, stmt, 0, 0);
3142     }
3143 
3144   reassociate_stats.linearized++;
3145   update_stmt (binrhs);
3146   update_stmt (binlhs);
3147   update_stmt (stmt);
3148 
3149   gimple_set_visited (stmt, true);
3150   gimple_set_visited (binlhs, true);
3151   gimple_set_visited (binrhs, true);
3152 
3153   /* Tail recurse on the new rhs if it still needs reassociation.  */
3154   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3155     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3156 	   want to change the algorithm while converting to tuples.  */
3157     linearize_expr (stmt);
3158 }
3159 
3160 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3161    it.  Otherwise, return NULL.  */
3162 
3163 static gimple
3164 get_single_immediate_use (tree lhs)
3165 {
3166   use_operand_p immuse;
3167   gimple immusestmt;
3168 
3169   if (TREE_CODE (lhs) == SSA_NAME
3170       && single_imm_use (lhs, &immuse, &immusestmt)
3171       && is_gimple_assign (immusestmt))
3172     return immusestmt;
3173 
3174   return NULL;
3175 }
3176 
3177 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3178    representing the negated value.  Insertions of any necessary
3179    instructions go before GSI.
3180    This function is recursive in that, if you hand it "a_5" as the
3181    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3182    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3183 
3184 static tree
3185 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3186 {
3187   gimple negatedefstmt= NULL;
3188   tree resultofnegate;
3189 
3190   /* If we are trying to negate a name, defined by an add, negate the
3191      add operands instead.  */
3192   if (TREE_CODE (tonegate) == SSA_NAME)
3193     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3194   if (TREE_CODE (tonegate) == SSA_NAME
3195       && is_gimple_assign (negatedefstmt)
3196       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3197       && has_single_use (gimple_assign_lhs (negatedefstmt))
3198       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3199     {
3200       gimple_stmt_iterator gsi;
3201       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3202       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3203 
3204       gsi = gsi_for_stmt (negatedefstmt);
3205       rhs1 = negate_value (rhs1, &gsi);
3206       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3207 
3208       gsi = gsi_for_stmt (negatedefstmt);
3209       rhs2 = negate_value (rhs2, &gsi);
3210       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3211 
3212       update_stmt (negatedefstmt);
3213       return gimple_assign_lhs (negatedefstmt);
3214     }
3215 
3216   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3217   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3218 					     NULL_TREE, true, GSI_SAME_STMT);
3219   return resultofnegate;
3220 }
3221 
3222 /* Return true if we should break up the subtract in STMT into an add
3223    with negate.  This is true when we the subtract operands are really
3224    adds, or the subtract itself is used in an add expression.  In
3225    either case, breaking up the subtract into an add with negate
3226    exposes the adds to reassociation.  */
3227 
3228 static bool
3229 should_break_up_subtract (gimple stmt)
3230 {
3231   tree lhs = gimple_assign_lhs (stmt);
3232   tree binlhs = gimple_assign_rhs1 (stmt);
3233   tree binrhs = gimple_assign_rhs2 (stmt);
3234   gimple immusestmt;
3235   struct loop *loop = loop_containing_stmt (stmt);
3236 
3237   if (TREE_CODE (binlhs) == SSA_NAME
3238       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3239     return true;
3240 
3241   if (TREE_CODE (binrhs) == SSA_NAME
3242       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3243     return true;
3244 
3245   if (TREE_CODE (lhs) == SSA_NAME
3246       && (immusestmt = get_single_immediate_use (lhs))
3247       && is_gimple_assign (immusestmt)
3248       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3249 	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3250     return true;
3251   return false;
3252 }
3253 
3254 /* Transform STMT from A - B into A + -B.  */
3255 
3256 static void
3257 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3258 {
3259   tree rhs1 = gimple_assign_rhs1 (stmt);
3260   tree rhs2 = gimple_assign_rhs2 (stmt);
3261 
3262   if (dump_file && (dump_flags & TDF_DETAILS))
3263     {
3264       fprintf (dump_file, "Breaking up subtract ");
3265       print_gimple_stmt (dump_file, stmt, 0, 0);
3266     }
3267 
3268   rhs2 = negate_value (rhs2, gsip);
3269   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3270   update_stmt (stmt);
3271 }
3272 
3273 /* Determine whether STMT is a builtin call that raises an SSA name
3274    to an integer power and has only one use.  If so, and this is early
3275    reassociation and unsafe math optimizations are permitted, place
3276    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3277    If any of these conditions does not hold, return FALSE.  */
3278 
3279 static bool
3280 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3281 {
3282   tree fndecl, arg1;
3283   REAL_VALUE_TYPE c, cint;
3284 
3285   if (!first_pass_instance
3286       || !flag_unsafe_math_optimizations
3287       || !is_gimple_call (stmt)
3288       || !has_single_use (gimple_call_lhs (stmt)))
3289     return false;
3290 
3291   fndecl = gimple_call_fndecl (stmt);
3292 
3293   if (!fndecl
3294       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3295     return false;
3296 
3297   switch (DECL_FUNCTION_CODE (fndecl))
3298     {
3299     CASE_FLT_FN (BUILT_IN_POW):
3300       if (flag_errno_math)
3301 	return false;
3302 
3303       *base = gimple_call_arg (stmt, 0);
3304       arg1 = gimple_call_arg (stmt, 1);
3305 
3306       if (TREE_CODE (arg1) != REAL_CST)
3307 	return false;
3308 
3309       c = TREE_REAL_CST (arg1);
3310 
3311       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3312 	return false;
3313 
3314       *exponent = real_to_integer (&c);
3315       real_from_integer (&cint, VOIDmode, *exponent,
3316 			 *exponent < 0 ? -1 : 0, 0);
3317       if (!real_identical (&c, &cint))
3318 	return false;
3319 
3320       break;
3321 
3322     CASE_FLT_FN (BUILT_IN_POWI):
3323       *base = gimple_call_arg (stmt, 0);
3324       arg1 = gimple_call_arg (stmt, 1);
3325 
3326       if (!host_integerp (arg1, 0))
3327 	return false;
3328 
3329       *exponent = TREE_INT_CST_LOW (arg1);
3330       break;
3331 
3332     default:
3333       return false;
3334     }
3335 
3336   /* Expanding negative exponents is generally unproductive, so we don't
3337      complicate matters with those.  Exponents of zero and one should
3338      have been handled by expression folding.  */
3339   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3340     return false;
3341 
3342   return true;
3343 }
3344 
3345 /* Recursively linearize a binary expression that is the RHS of STMT.
3346    Place the operands of the expression tree in the vector named OPS.  */
3347 
3348 static void
3349 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3350 		     bool is_associative, bool set_visited)
3351 {
3352   tree binlhs = gimple_assign_rhs1 (stmt);
3353   tree binrhs = gimple_assign_rhs2 (stmt);
3354   gimple binlhsdef = NULL, binrhsdef = NULL;
3355   bool binlhsisreassoc = false;
3356   bool binrhsisreassoc = false;
3357   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3358   struct loop *loop = loop_containing_stmt (stmt);
3359   tree base = NULL_TREE;
3360   HOST_WIDE_INT exponent = 0;
3361 
3362   if (set_visited)
3363     gimple_set_visited (stmt, true);
3364 
3365   if (TREE_CODE (binlhs) == SSA_NAME)
3366     {
3367       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3368       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3369 			 && !stmt_could_throw_p (binlhsdef));
3370     }
3371 
3372   if (TREE_CODE (binrhs) == SSA_NAME)
3373     {
3374       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3375       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3376 			 && !stmt_could_throw_p (binrhsdef));
3377     }
3378 
3379   /* If the LHS is not reassociable, but the RHS is, we need to swap
3380      them.  If neither is reassociable, there is nothing we can do, so
3381      just put them in the ops vector.  If the LHS is reassociable,
3382      linearize it.  If both are reassociable, then linearize the RHS
3383      and the LHS.  */
3384 
3385   if (!binlhsisreassoc)
3386     {
3387       tree temp;
3388 
3389       /* If this is not a associative operation like division, give up.  */
3390       if (!is_associative)
3391 	{
3392 	  add_to_ops_vec (ops, binrhs);
3393 	  return;
3394 	}
3395 
3396       if (!binrhsisreassoc)
3397 	{
3398 	  if (rhscode == MULT_EXPR
3399 	      && TREE_CODE (binrhs) == SSA_NAME
3400 	      && acceptable_pow_call (binrhsdef, &base, &exponent))
3401 	    {
3402 	      add_repeat_to_ops_vec (ops, base, exponent);
3403 	      gimple_set_visited (binrhsdef, true);
3404 	    }
3405 	  else
3406 	    add_to_ops_vec (ops, binrhs);
3407 
3408 	  if (rhscode == MULT_EXPR
3409 	      && TREE_CODE (binlhs) == SSA_NAME
3410 	      && acceptable_pow_call (binlhsdef, &base, &exponent))
3411 	    {
3412 	      add_repeat_to_ops_vec (ops, base, exponent);
3413 	      gimple_set_visited (binlhsdef, true);
3414 	    }
3415 	  else
3416 	    add_to_ops_vec (ops, binlhs);
3417 
3418 	  return;
3419 	}
3420 
3421       if (dump_file && (dump_flags & TDF_DETAILS))
3422 	{
3423 	  fprintf (dump_file, "swapping operands of ");
3424 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3425 	}
3426 
3427       swap_tree_operands (stmt,
3428 			  gimple_assign_rhs1_ptr (stmt),
3429 			  gimple_assign_rhs2_ptr (stmt));
3430       update_stmt (stmt);
3431 
3432       if (dump_file && (dump_flags & TDF_DETAILS))
3433 	{
3434 	  fprintf (dump_file, " is now ");
3435 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3436 	}
3437 
3438       /* We want to make it so the lhs is always the reassociative op,
3439 	 so swap.  */
3440       temp = binlhs;
3441       binlhs = binrhs;
3442       binrhs = temp;
3443     }
3444   else if (binrhsisreassoc)
3445     {
3446       linearize_expr (stmt);
3447       binlhs = gimple_assign_rhs1 (stmt);
3448       binrhs = gimple_assign_rhs2 (stmt);
3449     }
3450 
3451   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3452 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3453 				      rhscode, loop));
3454   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3455 		       is_associative, set_visited);
3456 
3457   if (rhscode == MULT_EXPR
3458       && TREE_CODE (binrhs) == SSA_NAME
3459       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3460     {
3461       add_repeat_to_ops_vec (ops, base, exponent);
3462       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3463     }
3464   else
3465     add_to_ops_vec (ops, binrhs);
3466 }
3467 
3468 /* Repropagate the negates back into subtracts, since no other pass
3469    currently does it.  */
3470 
3471 static void
3472 repropagate_negates (void)
3473 {
3474   unsigned int i = 0;
3475   tree negate;
3476 
3477   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3478     {
3479       gimple user = get_single_immediate_use (negate);
3480 
3481       if (!user || !is_gimple_assign (user))
3482 	continue;
3483 
3484       /* The negate operand can be either operand of a PLUS_EXPR
3485 	 (it can be the LHS if the RHS is a constant for example).
3486 
3487 	 Force the negate operand to the RHS of the PLUS_EXPR, then
3488 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
3489       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3490 	{
3491 	  /* If the negated operand appears on the LHS of the
3492 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
3493 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
3494 	  if (gimple_assign_rhs1 (user) == negate)
3495 	    {
3496 	      swap_tree_operands (user,
3497 				  gimple_assign_rhs1_ptr (user),
3498 				  gimple_assign_rhs2_ptr (user));
3499 	    }
3500 
3501 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3502 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3503 	  if (gimple_assign_rhs2 (user) == negate)
3504 	    {
3505 	      tree rhs1 = gimple_assign_rhs1 (user);
3506 	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3507 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3508 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3509 	      update_stmt (user);
3510 	    }
3511 	}
3512       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3513 	{
3514 	  if (gimple_assign_rhs1 (user) == negate)
3515 	    {
3516 	      /* We have
3517 	           x = -a
3518 		   y = x - b
3519 		 which we transform into
3520 		   x = a + b
3521 		   y = -x .
3522 		 This pushes down the negate which we possibly can merge
3523 		 into some other operation, hence insert it into the
3524 		 plus_negates vector.  */
3525 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3526 	      tree a = gimple_assign_rhs1 (feed);
3527 	      tree rhs2 = gimple_assign_rhs2 (user);
3528 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3529 	      gimple_replace_lhs (feed, negate);
3530 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3531 	      update_stmt (gsi_stmt (gsi));
3532 	      gsi2 = gsi_for_stmt (user);
3533 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3534 	      update_stmt (gsi_stmt (gsi2));
3535 	      gsi_move_before (&gsi, &gsi2);
3536 	      plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3537 	    }
3538 	  else
3539 	    {
3540 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
3541 	         rid of one operation.  */
3542 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3543 	      tree a = gimple_assign_rhs1 (feed);
3544 	      tree rhs1 = gimple_assign_rhs1 (user);
3545 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3546 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3547 	      update_stmt (gsi_stmt (gsi));
3548 	    }
3549 	}
3550     }
3551 }
3552 
3553 /* Returns true if OP is of a type for which we can do reassociation.
3554    That is for integral or non-saturating fixed-point types, and for
3555    floating point type when associative-math is enabled.  */
3556 
3557 static bool
3558 can_reassociate_p (tree op)
3559 {
3560   tree type = TREE_TYPE (op);
3561   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3562       || NON_SAT_FIXED_POINT_TYPE_P (type)
3563       || (flag_associative_math && FLOAT_TYPE_P (type)))
3564     return true;
3565   return false;
3566 }
3567 
3568 /* Break up subtract operations in block BB.
3569 
3570    We do this top down because we don't know whether the subtract is
3571    part of a possible chain of reassociation except at the top.
3572 
3573    IE given
3574    d = f + g
3575    c = a + e
3576    b = c - d
3577    q = b - r
3578    k = t - q
3579 
3580    we want to break up k = t - q, but we won't until we've transformed q
3581    = b - r, which won't be broken up until we transform b = c - d.
3582 
3583    En passant, clear the GIMPLE visited flag on every statement.  */
3584 
3585 static void
3586 break_up_subtract_bb (basic_block bb)
3587 {
3588   gimple_stmt_iterator gsi;
3589   basic_block son;
3590 
3591   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3592     {
3593       gimple stmt = gsi_stmt (gsi);
3594       gimple_set_visited (stmt, false);
3595 
3596       if (!is_gimple_assign (stmt)
3597 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
3598 	continue;
3599 
3600       /* Look for simple gimple subtract operations.  */
3601       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3602 	{
3603 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3604 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3605 	    continue;
3606 
3607 	  /* Check for a subtract used only in an addition.  If this
3608 	     is the case, transform it into add of a negate for better
3609 	     reassociation.  IE transform C = A-B into C = A + -B if C
3610 	     is only used in an addition.  */
3611 	  if (should_break_up_subtract (stmt))
3612 	    break_up_subtract (stmt, &gsi);
3613 	}
3614       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3615 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3616 	plus_negates.safe_push (gimple_assign_lhs (stmt));
3617     }
3618   for (son = first_dom_son (CDI_DOMINATORS, bb);
3619        son;
3620        son = next_dom_son (CDI_DOMINATORS, son))
3621     break_up_subtract_bb (son);
3622 }
3623 
3624 /* Used for repeated factor analysis.  */
3625 struct repeat_factor_d
3626 {
3627   /* An SSA name that occurs in a multiply chain.  */
3628   tree factor;
3629 
3630   /* Cached rank of the factor.  */
3631   unsigned rank;
3632 
3633   /* Number of occurrences of the factor in the chain.  */
3634   HOST_WIDE_INT count;
3635 
3636   /* An SSA name representing the product of this factor and
3637      all factors appearing later in the repeated factor vector.  */
3638   tree repr;
3639 };
3640 
3641 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3642 typedef const struct repeat_factor_d *const_repeat_factor_t;
3643 
3644 
3645 static vec<repeat_factor> repeat_factor_vec;
3646 
3647 /* Used for sorting the repeat factor vector.  Sort primarily by
3648    ascending occurrence count, secondarily by descending rank.  */
3649 
3650 static int
3651 compare_repeat_factors (const void *x1, const void *x2)
3652 {
3653   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3654   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3655 
3656   if (rf1->count != rf2->count)
3657     return rf1->count - rf2->count;
3658 
3659   return rf2->rank - rf1->rank;
3660 }
3661 
3662 /* Look for repeated operands in OPS in the multiply tree rooted at
3663    STMT.  Replace them with an optimal sequence of multiplies and powi
3664    builtin calls, and remove the used operands from OPS.  Return an
3665    SSA name representing the value of the replacement sequence.  */
3666 
3667 static tree
3668 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3669 {
3670   unsigned i, j, vec_len;
3671   int ii;
3672   operand_entry_t oe;
3673   repeat_factor_t rf1, rf2;
3674   repeat_factor rfnew;
3675   tree result = NULL_TREE;
3676   tree target_ssa, iter_result;
3677   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3678   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3679   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3680   gimple mul_stmt, pow_stmt;
3681 
3682   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3683      target.  */
3684   if (!powi_fndecl)
3685     return NULL_TREE;
3686 
3687   /* Allocate the repeated factor vector.  */
3688   repeat_factor_vec.create (10);
3689 
3690   /* Scan the OPS vector for all SSA names in the product and build
3691      up a vector of occurrence counts for each factor.  */
3692   FOR_EACH_VEC_ELT (*ops, i, oe)
3693     {
3694       if (TREE_CODE (oe->op) == SSA_NAME)
3695 	{
3696 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3697 	    {
3698 	      if (rf1->factor == oe->op)
3699 		{
3700 		  rf1->count += oe->count;
3701 		  break;
3702 		}
3703 	    }
3704 
3705 	  if (j >= repeat_factor_vec.length ())
3706 	    {
3707 	      rfnew.factor = oe->op;
3708 	      rfnew.rank = oe->rank;
3709 	      rfnew.count = oe->count;
3710 	      rfnew.repr = NULL_TREE;
3711 	      repeat_factor_vec.safe_push (rfnew);
3712 	    }
3713 	}
3714     }
3715 
3716   /* Sort the repeated factor vector by (a) increasing occurrence count,
3717      and (b) decreasing rank.  */
3718   repeat_factor_vec.qsort (compare_repeat_factors);
3719 
3720   /* It is generally best to combine as many base factors as possible
3721      into a product before applying __builtin_powi to the result.
3722      However, the sort order chosen for the repeated factor vector
3723      allows us to cache partial results for the product of the base
3724      factors for subsequent use.  When we already have a cached partial
3725      result from a previous iteration, it is best to make use of it
3726      before looking for another __builtin_pow opportunity.
3727 
3728      As an example, consider x * x * y * y * y * z * z * z * z.
3729      We want to first compose the product x * y * z, raise it to the
3730      second power, then multiply this by y * z, and finally multiply
3731      by z.  This can be done in 5 multiplies provided we cache y * z
3732      for use in both expressions:
3733 
3734         t1 = y * z
3735 	t2 = t1 * x
3736 	t3 = t2 * t2
3737 	t4 = t1 * t3
3738 	result = t4 * z
3739 
3740      If we instead ignored the cached y * z and first multiplied by
3741      the __builtin_pow opportunity z * z, we would get the inferior:
3742 
3743         t1 = y * z
3744 	t2 = t1 * x
3745 	t3 = t2 * t2
3746 	t4 = z * z
3747 	t5 = t3 * t4
3748         result = t5 * y  */
3749 
3750   vec_len = repeat_factor_vec.length ();
3751 
3752   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3753   while (true)
3754     {
3755       HOST_WIDE_INT power;
3756 
3757       /* First look for the largest cached product of factors from
3758 	 preceding iterations.  If found, create a builtin_powi for
3759 	 it if the minimum occurrence count for its factors is at
3760 	 least 2, or just use this cached product as our next
3761 	 multiplicand if the minimum occurrence count is 1.  */
3762       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3763 	{
3764 	  if (rf1->repr && rf1->count > 0)
3765 	    break;
3766 	}
3767 
3768       if (j < vec_len)
3769 	{
3770 	  power = rf1->count;
3771 
3772 	  if (power == 1)
3773 	    {
3774 	      iter_result = rf1->repr;
3775 
3776 	      if (dump_file && (dump_flags & TDF_DETAILS))
3777 		{
3778 		  unsigned elt;
3779 		  repeat_factor_t rf;
3780 		  fputs ("Multiplying by cached product ", dump_file);
3781 		  for (elt = j; elt < vec_len; elt++)
3782 		    {
3783 		      rf = &repeat_factor_vec[elt];
3784 		      print_generic_expr (dump_file, rf->factor, 0);
3785 		      if (elt < vec_len - 1)
3786 			fputs (" * ", dump_file);
3787 		    }
3788 		  fputs ("\n", dump_file);
3789 		}
3790 	    }
3791 	  else
3792 	    {
3793 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3794 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3795 					    build_int_cst (integer_type_node,
3796 							   power));
3797 	      gimple_call_set_lhs (pow_stmt, iter_result);
3798 	      gimple_set_location (pow_stmt, gimple_location (stmt));
3799 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3800 
3801 	      if (dump_file && (dump_flags & TDF_DETAILS))
3802 		{
3803 		  unsigned elt;
3804 		  repeat_factor_t rf;
3805 		  fputs ("Building __builtin_pow call for cached product (",
3806 			 dump_file);
3807 		  for (elt = j; elt < vec_len; elt++)
3808 		    {
3809 		      rf = &repeat_factor_vec[elt];
3810 		      print_generic_expr (dump_file, rf->factor, 0);
3811 		      if (elt < vec_len - 1)
3812 			fputs (" * ", dump_file);
3813 		    }
3814 		  fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3815 			   power);
3816 		}
3817 	    }
3818 	}
3819       else
3820 	{
3821 	  /* Otherwise, find the first factor in the repeated factor
3822 	     vector whose occurrence count is at least 2.  If no such
3823 	     factor exists, there are no builtin_powi opportunities
3824 	     remaining.  */
3825 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3826 	    {
3827 	      if (rf1->count >= 2)
3828 		break;
3829 	    }
3830 
3831 	  if (j >= vec_len)
3832 	    break;
3833 
3834 	  power = rf1->count;
3835 
3836 	  if (dump_file && (dump_flags & TDF_DETAILS))
3837 	    {
3838 	      unsigned elt;
3839 	      repeat_factor_t rf;
3840 	      fputs ("Building __builtin_pow call for (", dump_file);
3841 	      for (elt = j; elt < vec_len; elt++)
3842 		{
3843 		  rf = &repeat_factor_vec[elt];
3844 		  print_generic_expr (dump_file, rf->factor, 0);
3845 		  if (elt < vec_len - 1)
3846 		    fputs (" * ", dump_file);
3847 		}
3848 	      fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3849 	    }
3850 
3851 	  reassociate_stats.pows_created++;
3852 
3853 	  /* Visit each element of the vector in reverse order (so that
3854 	     high-occurrence elements are visited first, and within the
3855 	     same occurrence count, lower-ranked elements are visited
3856 	     first).  Form a linear product of all elements in this order
3857 	     whose occurrencce count is at least that of element J.
3858 	     Record the SSA name representing the product of each element
3859 	     with all subsequent elements in the vector.  */
3860 	  if (j == vec_len - 1)
3861 	    rf1->repr = rf1->factor;
3862 	  else
3863 	    {
3864 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
3865 		{
3866 		  tree op1, op2;
3867 
3868 		  rf1 = &repeat_factor_vec[ii];
3869 		  rf2 = &repeat_factor_vec[ii + 1];
3870 
3871 		  /* Init the last factor's representative to be itself.  */
3872 		  if (!rf2->repr)
3873 		    rf2->repr = rf2->factor;
3874 
3875 		  op1 = rf1->factor;
3876 		  op2 = rf2->repr;
3877 
3878 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3879 		  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3880 							   target_ssa,
3881 							   op1, op2);
3882 		  gimple_set_location (mul_stmt, gimple_location (stmt));
3883 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3884 		  rf1->repr = target_ssa;
3885 
3886 		  /* Don't reprocess the multiply we just introduced.  */
3887 		  gimple_set_visited (mul_stmt, true);
3888 		}
3889 	    }
3890 
3891 	  /* Form a call to __builtin_powi for the maximum product
3892 	     just formed, raised to the power obtained earlier.  */
3893 	  rf1 = &repeat_factor_vec[j];
3894 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3895 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3896 					build_int_cst (integer_type_node,
3897 						       power));
3898 	  gimple_call_set_lhs (pow_stmt, iter_result);
3899 	  gimple_set_location (pow_stmt, gimple_location (stmt));
3900 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3901 	}
3902 
3903       /* If we previously formed at least one other builtin_powi call,
3904 	 form the product of this one and those others.  */
3905       if (result)
3906 	{
3907 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3908 	  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3909 						   result, iter_result);
3910 	  gimple_set_location (mul_stmt, gimple_location (stmt));
3911 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3912 	  gimple_set_visited (mul_stmt, true);
3913 	  result = new_result;
3914 	}
3915       else
3916 	result = iter_result;
3917 
3918       /* Decrement the occurrence count of each element in the product
3919 	 by the count found above, and remove this many copies of each
3920 	 factor from OPS.  */
3921       for (i = j; i < vec_len; i++)
3922 	{
3923 	  unsigned k = power;
3924 	  unsigned n;
3925 
3926 	  rf1 = &repeat_factor_vec[i];
3927 	  rf1->count -= power;
3928 
3929 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3930 	    {
3931 	      if (oe->op == rf1->factor)
3932 		{
3933 		  if (oe->count <= k)
3934 		    {
3935 		      ops->ordered_remove (n);
3936 		      k -= oe->count;
3937 
3938 		      if (k == 0)
3939 			break;
3940 		    }
3941 		  else
3942 		    {
3943 		      oe->count -= k;
3944 		      break;
3945 		    }
3946 		}
3947 	    }
3948 	}
3949     }
3950 
3951   /* At this point all elements in the repeated factor vector have a
3952      remaining occurrence count of 0 or 1, and those with a count of 1
3953      don't have cached representatives.  Re-sort the ops vector and
3954      clean up.  */
3955   ops->qsort (sort_by_operand_rank);
3956   repeat_factor_vec.release ();
3957 
3958   /* Return the final product computed herein.  Note that there may
3959      still be some elements with single occurrence count left in OPS;
3960      those will be handled by the normal reassociation logic.  */
3961   return result;
3962 }
3963 
3964 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3965 
3966 static void
3967 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3968 {
3969   tree rhs1;
3970 
3971   if (dump_file && (dump_flags & TDF_DETAILS))
3972     {
3973       fprintf (dump_file, "Transforming ");
3974       print_gimple_stmt (dump_file, stmt, 0, 0);
3975     }
3976 
3977   rhs1 = gimple_assign_rhs1 (stmt);
3978   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3979   update_stmt (stmt);
3980   remove_visited_stmt_chain (rhs1);
3981 
3982   if (dump_file && (dump_flags & TDF_DETAILS))
3983     {
3984       fprintf (dump_file, " into ");
3985       print_gimple_stmt (dump_file, stmt, 0, 0);
3986     }
3987 }
3988 
3989 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3990 
3991 static void
3992 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3993 			    tree rhs1, tree rhs2)
3994 {
3995   if (dump_file && (dump_flags & TDF_DETAILS))
3996     {
3997       fprintf (dump_file, "Transforming ");
3998       print_gimple_stmt (dump_file, stmt, 0, 0);
3999     }
4000 
4001   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4002   update_stmt (gsi_stmt (*gsi));
4003   remove_visited_stmt_chain (rhs1);
4004 
4005   if (dump_file && (dump_flags & TDF_DETAILS))
4006     {
4007       fprintf (dump_file, " into ");
4008       print_gimple_stmt (dump_file, stmt, 0, 0);
4009     }
4010 }
4011 
4012 /* Reassociate expressions in basic block BB and its post-dominator as
4013    children.  */
4014 
4015 static void
4016 reassociate_bb (basic_block bb)
4017 {
4018   gimple_stmt_iterator gsi;
4019   basic_block son;
4020   gimple stmt = last_stmt (bb);
4021 
4022   if (stmt && !gimple_visited_p (stmt))
4023     maybe_optimize_range_tests (stmt);
4024 
4025   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4026     {
4027       stmt = gsi_stmt (gsi);
4028 
4029       if (is_gimple_assign (stmt)
4030 	  && !stmt_could_throw_p (stmt))
4031 	{
4032 	  tree lhs, rhs1, rhs2;
4033 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4034 
4035 	  /* If this is not a gimple binary expression, there is
4036 	     nothing for us to do with it.  */
4037 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4038 	    continue;
4039 
4040 	  /* If this was part of an already processed statement,
4041 	     we don't need to touch it again. */
4042 	  if (gimple_visited_p (stmt))
4043 	    {
4044 	      /* This statement might have become dead because of previous
4045 		 reassociations.  */
4046 	      if (has_zero_uses (gimple_get_lhs (stmt)))
4047 		{
4048 		  gsi_remove (&gsi, true);
4049 		  release_defs (stmt);
4050 		  /* We might end up removing the last stmt above which
4051 		     places the iterator to the end of the sequence.
4052 		     Reset it to the last stmt in this case which might
4053 		     be the end of the sequence as well if we removed
4054 		     the last statement of the sequence.  In which case
4055 		     we need to bail out.  */
4056 		  if (gsi_end_p (gsi))
4057 		    {
4058 		      gsi = gsi_last_bb (bb);
4059 		      if (gsi_end_p (gsi))
4060 			break;
4061 		    }
4062 		}
4063 	      continue;
4064 	    }
4065 
4066 	  lhs = gimple_assign_lhs (stmt);
4067 	  rhs1 = gimple_assign_rhs1 (stmt);
4068 	  rhs2 = gimple_assign_rhs2 (stmt);
4069 
4070 	  /* For non-bit or min/max operations we can't associate
4071 	     all types.  Verify that here.  */
4072 	  if (rhs_code != BIT_IOR_EXPR
4073 	      && rhs_code != BIT_AND_EXPR
4074 	      && rhs_code != BIT_XOR_EXPR
4075 	      && rhs_code != MIN_EXPR
4076 	      && rhs_code != MAX_EXPR
4077 	      && (!can_reassociate_p (lhs)
4078 		  || !can_reassociate_p (rhs1)
4079 		  || !can_reassociate_p (rhs2)))
4080 	    continue;
4081 
4082 	  if (associative_tree_code (rhs_code))
4083 	    {
4084 	      vec<operand_entry_t> ops = vNULL;
4085 	      tree powi_result = NULL_TREE;
4086 
4087 	      /* There may be no immediate uses left by the time we
4088 		 get here because we may have eliminated them all.  */
4089 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4090 		continue;
4091 
4092 	      gimple_set_visited (stmt, true);
4093 	      linearize_expr_tree (&ops, stmt, true, true);
4094 	      ops.qsort (sort_by_operand_rank);
4095 	      optimize_ops_list (rhs_code, &ops);
4096 	      if (undistribute_ops_list (rhs_code, &ops,
4097 					 loop_containing_stmt (stmt)))
4098 		{
4099 		  ops.qsort (sort_by_operand_rank);
4100 		  optimize_ops_list (rhs_code, &ops);
4101 		}
4102 
4103 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4104 		optimize_range_tests (rhs_code, &ops);
4105 
4106 	      if (first_pass_instance
4107 		  && rhs_code == MULT_EXPR
4108 		  && flag_unsafe_math_optimizations)
4109 		powi_result = attempt_builtin_powi (stmt, &ops);
4110 
4111 	      /* If the operand vector is now empty, all operands were
4112 		 consumed by the __builtin_powi optimization.  */
4113 	      if (ops.length () == 0)
4114 		transform_stmt_to_copy (&gsi, stmt, powi_result);
4115 	      else if (ops.length () == 1)
4116 		{
4117 		  tree last_op = ops.last ()->op;
4118 
4119 		  if (powi_result)
4120 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
4121 						powi_result);
4122 		  else
4123 		    transform_stmt_to_copy (&gsi, stmt, last_op);
4124 		}
4125 	      else
4126 		{
4127 		  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4128 		  int ops_num = ops.length ();
4129 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
4130 
4131 		  if (dump_file && (dump_flags & TDF_DETAILS))
4132 		    fprintf (dump_file,
4133 			     "Width = %d was chosen for reassociation\n", width);
4134 
4135 		  if (width > 1
4136 		      && ops.length () > 3)
4137 		    rewrite_expr_tree_parallel (stmt, width, ops);
4138 		  else
4139 		    rewrite_expr_tree (stmt, 0, ops, false);
4140 
4141 		  /* If we combined some repeated factors into a
4142 		     __builtin_powi call, multiply that result by the
4143 		     reassociated operands.  */
4144 		  if (powi_result)
4145 		    {
4146 		      gimple mul_stmt;
4147 		      tree type = TREE_TYPE (gimple_get_lhs (stmt));
4148 		      tree target_ssa = make_temp_ssa_name (type, NULL,
4149 							    "reassocpow");
4150 		      gimple_set_lhs (stmt, target_ssa);
4151 		      update_stmt (stmt);
4152 		      mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4153 							       powi_result,
4154 							       target_ssa);
4155 		      gimple_set_location (mul_stmt, gimple_location (stmt));
4156 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4157 		    }
4158 		}
4159 
4160 	      ops.release ();
4161 	    }
4162 	}
4163     }
4164   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4165        son;
4166        son = next_dom_son (CDI_POST_DOMINATORS, son))
4167     reassociate_bb (son);
4168 }
4169 
4170 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4171 void debug_ops_vector (vec<operand_entry_t> ops);
4172 
4173 /* Dump the operand entry vector OPS to FILE.  */
4174 
4175 void
4176 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4177 {
4178   operand_entry_t oe;
4179   unsigned int i;
4180 
4181   FOR_EACH_VEC_ELT (ops, i, oe)
4182     {
4183       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4184       print_generic_expr (file, oe->op, 0);
4185     }
4186 }
4187 
4188 /* Dump the operand entry vector OPS to STDERR.  */
4189 
4190 DEBUG_FUNCTION void
4191 debug_ops_vector (vec<operand_entry_t> ops)
4192 {
4193   dump_ops_vector (stderr, ops);
4194 }
4195 
4196 static void
4197 do_reassoc (void)
4198 {
4199   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4200   reassociate_bb (EXIT_BLOCK_PTR);
4201 }
4202 
4203 /* Initialize the reassociation pass.  */
4204 
4205 static void
4206 init_reassoc (void)
4207 {
4208   int i;
4209   long rank = 2;
4210   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4211 
4212   /* Find the loops, so that we can prevent moving calculations in
4213      them.  */
4214   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4215 
4216   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4217 
4218   operand_entry_pool = create_alloc_pool ("operand entry pool",
4219 					  sizeof (struct operand_entry), 30);
4220   next_operand_entry_id = 0;
4221 
4222   /* Reverse RPO (Reverse Post Order) will give us something where
4223      deeper loops come later.  */
4224   pre_and_rev_post_order_compute (NULL, bbs, false);
4225   bb_rank = XCNEWVEC (long, last_basic_block);
4226   operand_rank = pointer_map_create ();
4227 
4228   /* Give each default definition a distinct rank.  This includes
4229      parameters and the static chain.  Walk backwards over all
4230      SSA names so that we get proper rank ordering according
4231      to tree_swap_operands_p.  */
4232   for (i = num_ssa_names - 1; i > 0; --i)
4233     {
4234       tree name = ssa_name (i);
4235       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4236 	insert_operand_rank (name, ++rank);
4237     }
4238 
4239   /* Set up rank for each BB  */
4240   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4241     bb_rank[bbs[i]] = ++rank  << 16;
4242 
4243   free (bbs);
4244   calculate_dominance_info (CDI_POST_DOMINATORS);
4245   plus_negates = vNULL;
4246 }
4247 
4248 /* Cleanup after the reassociation pass, and print stats if
4249    requested.  */
4250 
4251 static void
4252 fini_reassoc (void)
4253 {
4254   statistics_counter_event (cfun, "Linearized",
4255 			    reassociate_stats.linearized);
4256   statistics_counter_event (cfun, "Constants eliminated",
4257 			    reassociate_stats.constants_eliminated);
4258   statistics_counter_event (cfun, "Ops eliminated",
4259 			    reassociate_stats.ops_eliminated);
4260   statistics_counter_event (cfun, "Statements rewritten",
4261 			    reassociate_stats.rewritten);
4262   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4263 			    reassociate_stats.pows_encountered);
4264   statistics_counter_event (cfun, "Built-in powi calls created",
4265 			    reassociate_stats.pows_created);
4266 
4267   pointer_map_destroy (operand_rank);
4268   free_alloc_pool (operand_entry_pool);
4269   free (bb_rank);
4270   plus_negates.release ();
4271   free_dominance_info (CDI_POST_DOMINATORS);
4272   loop_optimizer_finalize ();
4273 }
4274 
4275 /* Gate and execute functions for Reassociation.  */
4276 
4277 static unsigned int
4278 execute_reassoc (void)
4279 {
4280   init_reassoc ();
4281 
4282   do_reassoc ();
4283   repropagate_negates ();
4284 
4285   fini_reassoc ();
4286   return 0;
4287 }
4288 
4289 static bool
4290 gate_tree_ssa_reassoc (void)
4291 {
4292   return flag_tree_reassoc != 0;
4293 }
4294 
4295 struct gimple_opt_pass pass_reassoc =
4296 {
4297  {
4298   GIMPLE_PASS,
4299   "reassoc",				/* name */
4300   OPTGROUP_NONE,                        /* optinfo_flags */
4301   gate_tree_ssa_reassoc,		/* gate */
4302   execute_reassoc,			/* execute */
4303   NULL,					/* sub */
4304   NULL,					/* next */
4305   0,					/* static_pass_number */
4306   TV_TREE_REASSOC,			/* tv_id */
4307   PROP_cfg | PROP_ssa,			/* properties_required */
4308   0,					/* properties_provided */
4309   0,					/* properties_destroyed */
4310   0,					/* todo_flags_start */
4311   TODO_verify_ssa
4312     | TODO_update_ssa_only_virtuals
4313     | TODO_verify_flow
4314     | TODO_ggc_collect			/* todo_flags_finish */
4315  }
4316 };
4317