xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-reassoc.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
1 /* Reassociation for trees.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 
55 /*  This is a simple global reassociation pass.  It is, in part, based
56     on the LLVM pass of the same name (They do some things more/less
57     than we do, in different orders, etc).
58 
59     It consists of five steps:
60 
61     1. Breaking up subtract operations into addition + negate, where
62     it would promote the reassociation of adds.
63 
64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66     During linearization, we place the operands of the binary
67     expressions into a vector of operand_entry_*
68 
69     3. Optimization of the operand lists, eliminating things like a +
70     -a, a & a, etc.
71 
72     3a. Combine repeated factors with the same occurrence counts
73     into a __builtin_powi call that will later be optimized into
74     an optimal number of multiplies.
75 
76     4. Rewrite the expression trees we linearized and optimized so
77     they are in proper rank order.
78 
79     5. Repropagate negates, as nothing else will clean it up ATM.
80 
81     A bit of theory on #4, since nobody seems to write anything down
82     about why it makes sense to do it the way they do it:
83 
84     We could do this much nicer theoretically, but don't (for reasons
85     explained after how to do it theoretically nice :P).
86 
87     In order to promote the most redundancy elimination, you want
88     binary expressions whose operands are the same rank (or
89     preferably, the same value) exposed to the redundancy eliminator,
90     for possible elimination.
91 
92     So the way to do this if we really cared, is to build the new op
93     tree from the leaves to the roots, merging as you go, and putting the
94     new op on the end of the worklist, until you are left with one
95     thing on the worklist.
96 
97     IE if you have to rewrite the following set of operands (listed with
98     rank in parentheses), with opcode PLUS_EXPR:
99 
100     a (1),  b (1),  c (1),  d (2), e (2)
101 
102 
103     We start with our merge worklist empty, and the ops list with all of
104     those on it.
105 
106     You want to first merge all leaves of the same rank, as much as
107     possible.
108 
109     So first build a binary op of
110 
111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
112 
113     Because there is no three operand form of PLUS_EXPR, c is not going to
114     be exposed to redundancy elimination as a rank 1 operand.
115 
116     So you might as well throw it on the merge worklist (you could also
117     consider it to now be a rank two operand, and merge it with d and e,
118     but in this case, you then have evicted e from a binary op. So at
119     least in this situation, you can't win.)
120 
121     Then build a binary op of d + e
122     mergetmp2 = d + e
123 
124     and put mergetmp2 on the merge worklist.
125 
126     so merge worklist = {mergetmp, c, mergetmp2}
127 
128     Continue building binary ops of these operations until you have only
129     one operation left on the worklist.
130 
131     So we have
132 
133     build binary op
134     mergetmp3 = mergetmp + c
135 
136     worklist = {mergetmp2, mergetmp3}
137 
138     mergetmp4 = mergetmp2 + mergetmp3
139 
140     worklist = {mergetmp4}
141 
142     because we have one operation left, we can now just set the original
143     statement equal to the result of that operation.
144 
145     This will at least expose a + b  and d + e to redundancy elimination
146     as binary operations.
147 
148     For extra points, you can reuse the old statements to build the
149     mergetmps, since you shouldn't run out.
150 
151     So why don't we do this?
152 
153     Because it's expensive, and rarely will help.  Most trees we are
154     reassociating have 3 or less ops.  If they have 2 ops, they already
155     will be written into a nice single binary op.  If you have 3 ops, a
156     single simple check suffices to tell you whether the first two are of the
157     same rank.  If so, you know to order it
158 
159     mergetmp = op1 + op2
160     newstmt = mergetmp + op3
161 
162     instead of
163     mergetmp = op2 + op3
164     newstmt = mergetmp + op1
165 
166     If all three are of the same rank, you can't expose them all in a
167     single binary operator anyway, so the above is *still* the best you
168     can do.
169 
170     Thus, this is what we do.  When we have three ops left, we check to see
171     what order to put them in, and call it a day.  As a nod to vector sum
172     reduction, we check if any of the ops are really a phi node that is a
173     destructive update for the associating op, and keep the destructive
174     update together for vector sum reduction recognition.  */
175 
176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
177    point 3a in the pass header comment.  */
178 static bool reassoc_insert_powi_p;
179 
180 /* Statistics */
181 static struct
182 {
183   int linearized;
184   int constants_eliminated;
185   int ops_eliminated;
186   int rewritten;
187   int pows_encountered;
188   int pows_created;
189 } reassociate_stats;
190 
191 /* Operator, rank pair.  */
192 struct operand_entry
193 {
194   unsigned int rank;
195   unsigned int id;
196   tree op;
197   unsigned int count;
198   gimple *stmt_to_insert;
199 };
200 
201 static object_allocator<operand_entry> operand_entry_pool
202   ("operand entry pool");
203 
204 /* This is used to assign a unique ID to each struct operand_entry
205    so that qsort results are identical on different hosts.  */
206 static unsigned int next_operand_entry_id;
207 
208 /* Starting rank number for a given basic block, so that we can rank
209    operations using unmovable instructions in that BB based on the bb
210    depth.  */
211 static int64_t *bb_rank;
212 
213 /* Operand->rank hashtable.  */
214 static hash_map<tree, int64_t> *operand_rank;
215 
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217    all basic blocks the CFG should be adjusted - basic blocks
218    split right after that SSA_NAME's definition statement and before
219    the only use, which must be a bit ior.  */
220 static vec<tree> reassoc_branch_fixups;
221 
222 /* Forward decls.  */
223 static int64_t get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225 
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227    possibly added by gsi_remove.  */
228 
229 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 {
232   gimple *stmt = gsi_stmt (*gsi);
233 
234   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235     return gsi_remove (gsi, true);
236 
237   gimple_stmt_iterator prev = *gsi;
238   gsi_prev (&prev);
239   unsigned uid = gimple_uid (stmt);
240   basic_block bb = gimple_bb (stmt);
241   bool ret = gsi_remove (gsi, true);
242   if (!gsi_end_p (prev))
243     gsi_next (&prev);
244   else
245     prev = gsi_start_bb (bb);
246   gimple *end_stmt = gsi_stmt (*gsi);
247   while ((stmt = gsi_stmt (prev)) != end_stmt)
248     {
249       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250       gimple_set_uid (stmt, uid);
251       gsi_next (&prev);
252     }
253   return ret;
254 }
255 
256 /* Bias amount for loop-carried phis.  We want this to be larger than
257    the depth of any reassociation tree we can see, but not larger than
258    the rank difference between two blocks.  */
259 #define PHI_LOOP_BIAS (1 << 15)
260 
261 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
262    an innermost loop, and the phi has only a single use which is inside
263    the loop, then the rank is the block rank of the loop latch plus an
264    extra bias for the loop-carried dependence.  This causes expressions
265    calculated into an accumulator variable to be independent for each
266    iteration of the loop.  If STMT is some other phi, the rank is the
267    block rank of its containing block.  */
268 static int64_t
phi_rank(gimple * stmt)269 phi_rank (gimple *stmt)
270 {
271   basic_block bb = gimple_bb (stmt);
272   class loop *father = bb->loop_father;
273   tree res;
274   unsigned i;
275   use_operand_p use;
276   gimple *use_stmt;
277 
278   /* We only care about real loops (those with a latch).  */
279   if (!father->latch)
280     return bb_rank[bb->index];
281 
282   /* Interesting phis must be in headers of innermost loops.  */
283   if (bb != father->header
284       || father->inner)
285     return bb_rank[bb->index];
286 
287   /* Ignore virtual SSA_NAMEs.  */
288   res = gimple_phi_result (stmt);
289   if (virtual_operand_p (res))
290     return bb_rank[bb->index];
291 
292   /* The phi definition must have a single use, and that use must be
293      within the loop.  Otherwise this isn't an accumulator pattern.  */
294   if (!single_imm_use (res, &use, &use_stmt)
295       || gimple_bb (use_stmt)->loop_father != father)
296     return bb_rank[bb->index];
297 
298   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
299   for (i = 0; i < gimple_phi_num_args (stmt); i++)
300     {
301       tree arg = gimple_phi_arg_def (stmt, i);
302       if (TREE_CODE (arg) == SSA_NAME
303 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 	{
305 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 	  if (gimple_bb (def_stmt)->loop_father == father)
307 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
308 	}
309     }
310 
311   /* Must be an uninteresting phi.  */
312   return bb_rank[bb->index];
313 }
314 
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316    loop-carried dependence of an innermost loop, return TRUE; else
317    return FALSE.  */
318 static bool
loop_carried_phi(tree exp)319 loop_carried_phi (tree exp)
320 {
321   gimple *phi_stmt;
322   int64_t block_rank;
323 
324   if (TREE_CODE (exp) != SSA_NAME
325       || SSA_NAME_IS_DEFAULT_DEF (exp))
326     return false;
327 
328   phi_stmt = SSA_NAME_DEF_STMT (exp);
329 
330   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331     return false;
332 
333   /* Non-loop-carried phis have block rank.  Loop-carried phis have
334      an additional bias added in.  If this phi doesn't have block rank,
335      it's biased and should not be propagated.  */
336   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 
338   if (phi_rank (phi_stmt) != block_rank)
339     return true;
340 
341   return false;
342 }
343 
344 /* Return the maximum of RANK and the rank that should be propagated
345    from expression OP.  For most operands, this is just the rank of OP.
346    For loop-carried phis, the value is zero to avoid undoing the bias
347    in favor of the phi.  */
348 static int64_t
propagate_rank(int64_t rank,tree op)349 propagate_rank (int64_t rank, tree op)
350 {
351   int64_t op_rank;
352 
353   if (loop_carried_phi (op))
354     return rank;
355 
356   op_rank = get_rank (op);
357 
358   return MAX (rank, op_rank);
359 }
360 
361 /* Look up the operand rank structure for expression E.  */
362 
363 static inline int64_t
find_operand_rank(tree e)364 find_operand_rank (tree e)
365 {
366   int64_t *slot = operand_rank->get (e);
367   return slot ? *slot : -1;
368 }
369 
370 /* Insert {E,RANK} into the operand rank hashtable.  */
371 
372 static inline void
insert_operand_rank(tree e,int64_t rank)373 insert_operand_rank (tree e, int64_t rank)
374 {
375   gcc_assert (rank > 0);
376   gcc_assert (!operand_rank->put (e, rank));
377 }
378 
379 /* Given an expression E, return the rank of the expression.  */
380 
381 static int64_t
get_rank(tree e)382 get_rank (tree e)
383 {
384   /* SSA_NAME's have the rank of the expression they are the result
385      of.
386      For globals and uninitialized values, the rank is 0.
387      For function arguments, use the pre-setup rank.
388      For PHI nodes, stores, asm statements, etc, we use the rank of
389      the BB.
390      For simple operations, the rank is the maximum rank of any of
391      its operands, or the bb_rank, whichever is less.
392      I make no claims that this is optimal, however, it gives good
393      results.  */
394 
395   /* We make an exception to the normal ranking system to break
396      dependences of accumulator variables in loops.  Suppose we
397      have a simple one-block loop containing:
398 
399        x_1 = phi(x_0, x_2)
400        b = a + x_1
401        c = b + d
402        x_2 = c + e
403 
404      As shown, each iteration of the calculation into x is fully
405      dependent upon the iteration before it.  We would prefer to
406      see this in the form:
407 
408        x_1 = phi(x_0, x_2)
409        b = a + d
410        c = b + e
411        x_2 = c + x_1
412 
413      If the loop is unrolled, the calculations of b and c from
414      different iterations can be interleaved.
415 
416      To obtain this result during reassociation, we bias the rank
417      of the phi definition x_1 upward, when it is recognized as an
418      accumulator pattern.  The artificial rank causes it to be
419      added last, providing the desired independence.  */
420 
421   if (TREE_CODE (e) == SSA_NAME)
422     {
423       ssa_op_iter iter;
424       gimple *stmt;
425       int64_t rank;
426       tree op;
427 
428       if (SSA_NAME_IS_DEFAULT_DEF (e))
429 	return find_operand_rank (e);
430 
431       stmt = SSA_NAME_DEF_STMT (e);
432       if (gimple_code (stmt) == GIMPLE_PHI)
433 	return phi_rank (stmt);
434 
435       if (!is_gimple_assign (stmt))
436 	return bb_rank[gimple_bb (stmt)->index];
437 
438       /* If we already have a rank for this expression, use that.  */
439       rank = find_operand_rank (e);
440       if (rank != -1)
441 	return rank;
442 
443       /* Otherwise, find the maximum rank for the operands.  As an
444 	 exception, remove the bias from loop-carried phis when propagating
445 	 the rank so that dependent operations are not also biased.  */
446       /* Simply walk over all SSA uses - this takes advatage of the
447          fact that non-SSA operands are is_gimple_min_invariant and
448 	 thus have rank 0.  */
449       rank = 0;
450       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 	rank = propagate_rank (rank, op);
452 
453       if (dump_file && (dump_flags & TDF_DETAILS))
454 	{
455 	  fprintf (dump_file, "Rank for ");
456 	  print_generic_expr (dump_file, e);
457 	  fprintf (dump_file, " is %" PRId64 "\n", (rank + 1));
458 	}
459 
460       /* Note the rank in the hashtable so we don't recompute it.  */
461       insert_operand_rank (e, (rank + 1));
462       return (rank + 1);
463     }
464 
465   /* Constants, globals, etc., are rank 0 */
466   return 0;
467 }
468 
469 
470 /* We want integer ones to end up last no matter what, since they are
471    the ones we can do the most with.  */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
476 
477 /* Classify an invariant tree into integer, float, or other, so that
478    we can sort them to be near other constants of the same type.  */
479 static inline int
constant_type(tree t)480 constant_type (tree t)
481 {
482   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483     return INTEGER_CONST_TYPE;
484   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485     {
486       /* Sort -1.0 and 1.0 constants last, while in some cases
487 	 const_binop can't optimize some inexact operations, multiplication
488 	 by -1.0 or 1.0 can be always merged with others.  */
489       if (real_onep (t) || real_minus_onep (t))
490 	return FLOAT_ONE_CONST_TYPE;
491       return FLOAT_CONST_TYPE;
492     }
493   else
494     return OTHER_CONST_TYPE;
495 }
496 
497 /* qsort comparison function to sort operand entries PA and PB by rank
498    so that the sorted array is ordered by rank in decreasing order.  */
499 static int
sort_by_operand_rank(const void * pa,const void * pb)500 sort_by_operand_rank (const void *pa, const void *pb)
501 {
502   const operand_entry *oea = *(const operand_entry *const *)pa;
503   const operand_entry *oeb = *(const operand_entry *const *)pb;
504 
505   if (oeb->rank != oea->rank)
506     return oeb->rank > oea->rank ? 1 : -1;
507 
508   /* It's nicer for optimize_expression if constants that are likely
509      to fold when added/multiplied/whatever are put next to each
510      other.  Since all constants have rank 0, order them by type.  */
511   if (oea->rank == 0)
512     {
513       if (constant_type (oeb->op) != constant_type (oea->op))
514 	return constant_type (oea->op) - constant_type (oeb->op);
515       else
516 	/* To make sorting result stable, we use unique IDs to determine
517 	   order.  */
518 	return oeb->id > oea->id ? 1 : -1;
519     }
520 
521   if (TREE_CODE (oea->op) != SSA_NAME)
522     {
523       if (TREE_CODE (oeb->op) != SSA_NAME)
524 	return oeb->id > oea->id ? 1 : -1;
525       else
526 	return 1;
527     }
528   else if (TREE_CODE (oeb->op) != SSA_NAME)
529     return -1;
530 
531   /* Lastly, make sure the versions that are the same go next to each
532      other.  */
533   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
534     {
535       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 	 See PR60418.  */
539       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541       basic_block bba = gimple_bb (stmta);
542       basic_block bbb = gimple_bb (stmtb);
543       if (bbb != bba)
544 	{
545 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 	     but the other might not.  */
547 	  if (!bba)
548 	    return 1;
549 	  if (!bbb)
550 	    return -1;
551 	  /* If neither is, compare bb_rank.  */
552 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
553 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
554 	}
555 
556       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558       if (da != db)
559 	return da ? 1 : -1;
560 
561       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
562     }
563 
564   return oeb->id > oea->id ? 1 : -1;
565 }
566 
567 /* Add an operand entry to *OPS for the tree operand OP.  */
568 
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
571 {
572   operand_entry *oe = operand_entry_pool.allocate ();
573 
574   oe->op = op;
575   oe->rank = get_rank (op);
576   oe->id = next_operand_entry_id++;
577   oe->count = 1;
578   oe->stmt_to_insert = stmt_to_insert;
579   ops->safe_push (oe);
580 }
581 
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583    count REPEAT.  */
584 
585 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 		       HOST_WIDE_INT repeat)
588 {
589   operand_entry *oe = operand_entry_pool.allocate ();
590 
591   oe->op = op;
592   oe->rank = get_rank (op);
593   oe->id = next_operand_entry_id++;
594   oe->count = repeat;
595   oe->stmt_to_insert = NULL;
596   ops->safe_push (oe);
597 
598   reassociate_stats.pows_encountered++;
599 }
600 
601 /* Return true if STMT is reassociable operation containing a binary
602    operation with tree code CODE, and is inside LOOP.  */
603 
604 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
606 {
607   basic_block bb = gimple_bb (stmt);
608 
609   if (gimple_bb (stmt) == NULL)
610     return false;
611 
612   if (!flow_bb_inside_loop_p (loop, bb))
613     return false;
614 
615   if (is_gimple_assign (stmt)
616       && gimple_assign_rhs_code (stmt) == code
617       && has_single_use (gimple_assign_lhs (stmt)))
618     {
619       tree rhs1 = gimple_assign_rhs1 (stmt);
620       tree rhs2 = gimple_assign_rhs2 (stmt);
621       if (TREE_CODE (rhs1) == SSA_NAME
622 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 	return false;
624       if (rhs2
625 	  && TREE_CODE (rhs2) == SSA_NAME
626 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 	return false;
628       return true;
629     }
630 
631   return false;
632 }
633 
634 
635 /* Return true if STMT is a nop-conversion.  */
636 
637 static bool
gimple_nop_conversion_p(gimple * stmt)638 gimple_nop_conversion_p (gimple *stmt)
639 {
640   if (gassign *ass = dyn_cast <gassign *> (stmt))
641     {
642       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
645 	return true;
646     }
647   return false;
648 }
649 
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651    operand of the negate operation.  Otherwise, return NULL.  */
652 
653 static tree
get_unary_op(tree name,enum tree_code opcode)654 get_unary_op (tree name, enum tree_code opcode)
655 {
656   gimple *stmt = SSA_NAME_DEF_STMT (name);
657 
658   /* Look through nop conversions (sign changes).  */
659   if (gimple_nop_conversion_p (stmt)
660       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
662 
663   if (!is_gimple_assign (stmt))
664     return NULL_TREE;
665 
666   if (gimple_assign_rhs_code (stmt) == opcode)
667     return gimple_assign_rhs1 (stmt);
668   return NULL_TREE;
669 }
670 
671 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
672 
673 static bool
ops_equal_values_p(tree op1,tree op2)674 ops_equal_values_p (tree op1, tree op2)
675 {
676   if (op1 == op2)
677     return true;
678 
679   tree orig_op1 = op1;
680   if (TREE_CODE (op1) == SSA_NAME)
681     {
682       gimple *stmt = SSA_NAME_DEF_STMT (op1);
683       if (gimple_nop_conversion_p (stmt))
684 	{
685 	  op1 = gimple_assign_rhs1 (stmt);
686 	  if (op1 == op2)
687 	    return true;
688 	}
689     }
690 
691   if (TREE_CODE (op2) == SSA_NAME)
692     {
693       gimple *stmt = SSA_NAME_DEF_STMT (op2);
694       if (gimple_nop_conversion_p (stmt))
695 	{
696 	  op2 = gimple_assign_rhs1 (stmt);
697 	  if (op1 == op2
698 	      || orig_op1 == op2)
699 	    return true;
700 	}
701     }
702 
703   return false;
704 }
705 
706 
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708    eliminate through equivalences, do so, remove them from OPS, and
709    return true.  Otherwise, return false.  */
710 
711 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)712 eliminate_duplicate_pair (enum tree_code opcode,
713 			  vec<operand_entry *> *ops,
714 			  bool *all_done,
715 			  unsigned int i,
716 			  operand_entry *curr,
717 			  operand_entry *last)
718 {
719 
720   /* If we have two of the same op, and the opcode is & |, min, or max,
721      we can eliminate one of them.
722      If we have two of the same op, and the opcode is ^, we can
723      eliminate both of them.  */
724 
725   if (last && last->op == curr->op)
726     {
727       switch (opcode)
728 	{
729 	case MAX_EXPR:
730 	case MIN_EXPR:
731 	case BIT_IOR_EXPR:
732 	case BIT_AND_EXPR:
733 	  if (dump_file && (dump_flags & TDF_DETAILS))
734 	    {
735 	      fprintf (dump_file, "Equivalence: ");
736 	      print_generic_expr (dump_file, curr->op);
737 	      fprintf (dump_file, " [&|minmax] ");
738 	      print_generic_expr (dump_file, last->op);
739 	      fprintf (dump_file, " -> ");
740 	      print_generic_stmt (dump_file, last->op);
741 	    }
742 
743 	  ops->ordered_remove (i);
744 	  reassociate_stats.ops_eliminated ++;
745 
746 	  return true;
747 
748 	case BIT_XOR_EXPR:
749 	  if (dump_file && (dump_flags & TDF_DETAILS))
750 	    {
751 	      fprintf (dump_file, "Equivalence: ");
752 	      print_generic_expr (dump_file, curr->op);
753 	      fprintf (dump_file, " ^ ");
754 	      print_generic_expr (dump_file, last->op);
755 	      fprintf (dump_file, " -> nothing\n");
756 	    }
757 
758 	  reassociate_stats.ops_eliminated += 2;
759 
760 	  if (ops->length () == 2)
761 	    {
762 	      ops->truncate (0);
763 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 	      *all_done = true;
765 	    }
766 	  else
767 	    {
768 	      ops->ordered_remove (i-1);
769 	      ops->ordered_remove (i-1);
770 	    }
771 
772 	  return true;
773 
774 	default:
775 	  break;
776 	}
777     }
778   return false;
779 }
780 
781 static vec<tree> plus_negates;
782 
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784    expression, look in OPS for a corresponding positive operation to cancel
785    it out.  If we find one, remove the other from OPS, replace
786    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
787    return false. */
788 
789 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)790 eliminate_plus_minus_pair (enum tree_code opcode,
791 			   vec<operand_entry *> *ops,
792 			   unsigned int currindex,
793 			   operand_entry *curr)
794 {
795   tree negateop;
796   tree notop;
797   unsigned int i;
798   operand_entry *oe;
799 
800   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801     return false;
802 
803   negateop = get_unary_op (curr->op, NEGATE_EXPR);
804   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805   if (negateop == NULL_TREE && notop == NULL_TREE)
806     return false;
807 
808   /* Any non-negated version will have a rank that is one less than
809      the current rank.  So once we hit those ranks, if we don't find
810      one, we can stop.  */
811 
812   for (i = currindex + 1;
813        ops->iterate (i, &oe)
814        && oe->rank >= curr->rank - 1 ;
815        i++)
816     {
817       if (negateop
818 	  && ops_equal_values_p (oe->op, negateop))
819 	{
820 	  if (dump_file && (dump_flags & TDF_DETAILS))
821 	    {
822 	      fprintf (dump_file, "Equivalence: ");
823 	      print_generic_expr (dump_file, negateop);
824 	      fprintf (dump_file, " + -");
825 	      print_generic_expr (dump_file, oe->op);
826 	      fprintf (dump_file, " -> 0\n");
827 	    }
828 
829 	  ops->ordered_remove (i);
830 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 	  ops->ordered_remove (currindex);
832 	  reassociate_stats.ops_eliminated ++;
833 
834 	  return true;
835 	}
836       else if (notop
837 	       && ops_equal_values_p (oe->op, notop))
838 	{
839 	  tree op_type = TREE_TYPE (oe->op);
840 
841 	  if (dump_file && (dump_flags & TDF_DETAILS))
842 	    {
843 	      fprintf (dump_file, "Equivalence: ");
844 	      print_generic_expr (dump_file, notop);
845 	      fprintf (dump_file, " + ~");
846 	      print_generic_expr (dump_file, oe->op);
847 	      fprintf (dump_file, " -> -1\n");
848 	    }
849 
850 	  ops->ordered_remove (i);
851 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 	  ops->ordered_remove (currindex);
853 	  reassociate_stats.ops_eliminated ++;
854 
855 	  return true;
856 	}
857     }
858 
859   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860      save it for later inspection in repropagate_negates().  */
861   if (negateop != NULL_TREE
862       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863     plus_negates.safe_push (curr->op);
864 
865   return false;
866 }
867 
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869    bitwise not expression, look in OPS for a corresponding operand to
870    cancel it out.  If we find one, remove the other from OPS, replace
871    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
872    false. */
873 
874 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)875 eliminate_not_pairs (enum tree_code opcode,
876 		     vec<operand_entry *> *ops,
877 		     unsigned int currindex,
878 		     operand_entry *curr)
879 {
880   tree notop;
881   unsigned int i;
882   operand_entry *oe;
883 
884   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885       || TREE_CODE (curr->op) != SSA_NAME)
886     return false;
887 
888   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889   if (notop == NULL_TREE)
890     return false;
891 
892   /* Any non-not version will have a rank that is one less than
893      the current rank.  So once we hit those ranks, if we don't find
894      one, we can stop.  */
895 
896   for (i = currindex + 1;
897        ops->iterate (i, &oe)
898        && oe->rank >= curr->rank - 1;
899        i++)
900     {
901       if (oe->op == notop)
902 	{
903 	  if (dump_file && (dump_flags & TDF_DETAILS))
904 	    {
905 	      fprintf (dump_file, "Equivalence: ");
906 	      print_generic_expr (dump_file, notop);
907 	      if (opcode == BIT_AND_EXPR)
908 		fprintf (dump_file, " & ~");
909 	      else if (opcode == BIT_IOR_EXPR)
910 		fprintf (dump_file, " | ~");
911 	      print_generic_expr (dump_file, oe->op);
912 	      if (opcode == BIT_AND_EXPR)
913 		fprintf (dump_file, " -> 0\n");
914 	      else if (opcode == BIT_IOR_EXPR)
915 		fprintf (dump_file, " -> -1\n");
916 	    }
917 
918 	  if (opcode == BIT_AND_EXPR)
919 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 	  else if (opcode == BIT_IOR_EXPR)
921 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
922 
923 	  reassociate_stats.ops_eliminated += ops->length () - 1;
924 	  ops->truncate (0);
925 	  ops->quick_push (oe);
926 	  return true;
927 	}
928     }
929 
930   return false;
931 }
932 
933 /* Use constant value that may be present in OPS to try to eliminate
934    operands.  Note that this function is only really used when we've
935    eliminated ops for other reasons, or merged constants.  Across
936    single statements, fold already does all of this, plus more.  There
937    is little point in duplicating logic, so I've only included the
938    identities that I could ever construct testcases to trigger.  */
939 
940 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)941 eliminate_using_constants (enum tree_code opcode,
942 			   vec<operand_entry *> *ops)
943 {
944   operand_entry *oelast = ops->last ();
945   tree type = TREE_TYPE (oelast->op);
946 
947   if (oelast->rank == 0
948       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
949     {
950       switch (opcode)
951 	{
952 	case BIT_AND_EXPR:
953 	  if (integer_zerop (oelast->op))
954 	    {
955 	      if (ops->length () != 1)
956 		{
957 		  if (dump_file && (dump_flags & TDF_DETAILS))
958 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
959 
960 		  reassociate_stats.ops_eliminated += ops->length () - 1;
961 
962 		  ops->truncate (0);
963 		  ops->quick_push (oelast);
964 		  return;
965 		}
966 	    }
967 	  else if (integer_all_onesp (oelast->op))
968 	    {
969 	      if (ops->length () != 1)
970 		{
971 		  if (dump_file && (dump_flags & TDF_DETAILS))
972 		    fprintf (dump_file, "Found & -1, removing\n");
973 		  ops->pop ();
974 		  reassociate_stats.ops_eliminated++;
975 		}
976 	    }
977 	  break;
978 	case BIT_IOR_EXPR:
979 	  if (integer_all_onesp (oelast->op))
980 	    {
981 	      if (ops->length () != 1)
982 		{
983 		  if (dump_file && (dump_flags & TDF_DETAILS))
984 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
985 
986 		  reassociate_stats.ops_eliminated += ops->length () - 1;
987 
988 		  ops->truncate (0);
989 		  ops->quick_push (oelast);
990 		  return;
991 		}
992 	    }
993 	  else if (integer_zerop (oelast->op))
994 	    {
995 	      if (ops->length () != 1)
996 		{
997 		  if (dump_file && (dump_flags & TDF_DETAILS))
998 		    fprintf (dump_file, "Found | 0, removing\n");
999 		  ops->pop ();
1000 		  reassociate_stats.ops_eliminated++;
1001 		}
1002 	    }
1003 	  break;
1004 	case MULT_EXPR:
1005 	  if (integer_zerop (oelast->op)
1006 	      || (FLOAT_TYPE_P (type)
1007 		  && !HONOR_NANS (type)
1008 		  && !HONOR_SIGNED_ZEROS (type)
1009 		  && real_zerop (oelast->op)))
1010 	    {
1011 	      if (ops->length () != 1)
1012 		{
1013 		  if (dump_file && (dump_flags & TDF_DETAILS))
1014 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1015 
1016 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1017 		  ops->truncate (0);
1018 		  ops->quick_push (oelast);
1019 		  return;
1020 		}
1021 	    }
1022 	  else if (integer_onep (oelast->op)
1023 		   || (FLOAT_TYPE_P (type)
1024 		       && !HONOR_SNANS (type)
1025 		       && real_onep (oelast->op)))
1026 	    {
1027 	      if (ops->length () != 1)
1028 		{
1029 		  if (dump_file && (dump_flags & TDF_DETAILS))
1030 		    fprintf (dump_file, "Found * 1, removing\n");
1031 		  ops->pop ();
1032 		  reassociate_stats.ops_eliminated++;
1033 		  return;
1034 		}
1035 	    }
1036 	  break;
1037 	case BIT_XOR_EXPR:
1038 	case PLUS_EXPR:
1039 	case MINUS_EXPR:
1040 	  if (integer_zerop (oelast->op)
1041 	      || (FLOAT_TYPE_P (type)
1042 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 		  && fold_real_zero_addition_p (type, oelast->op,
1044 						opcode == MINUS_EXPR)))
1045 	    {
1046 	      if (ops->length () != 1)
1047 		{
1048 		  if (dump_file && (dump_flags & TDF_DETAILS))
1049 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 		  ops->pop ();
1051 		  reassociate_stats.ops_eliminated++;
1052 		  return;
1053 		}
1054 	    }
1055 	  break;
1056 	default:
1057 	  break;
1058 	}
1059     }
1060 }
1061 
1062 
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 				 bool, bool);
1065 
1066 /* Structure for tracking and counting operands.  */
1067 struct oecount {
1068   unsigned int cnt;
1069   unsigned int id;
1070   enum tree_code oecode;
1071   tree op;
1072 };
1073 
1074 
1075 /* The heap for the oecount hashtable and the sorted list of operands.  */
1076 static vec<oecount> cvec;
1077 
1078 
1079 /* Oecount hashtable helpers.  */
1080 
1081 struct oecount_hasher : int_hash <int, 0, 1>
1082 {
1083   static inline hashval_t hash (int);
1084   static inline bool equal (int, int);
1085 };
1086 
1087 /* Hash function for oecount.  */
1088 
1089 inline hashval_t
hash(int p)1090 oecount_hasher::hash (int p)
1091 {
1092   const oecount *c = &cvec[p - 42];
1093   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1094 }
1095 
1096 /* Comparison function for oecount.  */
1097 
1098 inline bool
equal(int p1,int p2)1099 oecount_hasher::equal (int p1, int p2)
1100 {
1101   const oecount *c1 = &cvec[p1 - 42];
1102   const oecount *c2 = &cvec[p2 - 42];
1103   return c1->oecode == c2->oecode && c1->op == c2->op;
1104 }
1105 
1106 /* Comparison function for qsort sorting oecount elements by count.  */
1107 
1108 static int
oecount_cmp(const void * p1,const void * p2)1109 oecount_cmp (const void *p1, const void *p2)
1110 {
1111   const oecount *c1 = (const oecount *)p1;
1112   const oecount *c2 = (const oecount *)p2;
1113   if (c1->cnt != c2->cnt)
1114     return c1->cnt > c2->cnt ? 1 : -1;
1115   else
1116     /* If counts are identical, use unique IDs to stabilize qsort.  */
1117     return c1->id > c2->id ? 1 : -1;
1118 }
1119 
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121    to some exponent.  */
1122 
1123 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1124 stmt_is_power_of_op (gimple *stmt, tree op)
1125 {
1126   if (!is_gimple_call (stmt))
1127     return false;
1128 
1129   switch (gimple_call_combined_fn (stmt))
1130     {
1131     CASE_CFN_POW:
1132     CASE_CFN_POWI:
1133       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1134 
1135     default:
1136       return false;
1137     }
1138 }
1139 
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141    in place and return the result.  Assumes that stmt_is_power_of_op
1142    was previously called for STMT and returned TRUE.  */
1143 
1144 static HOST_WIDE_INT
decrement_power(gimple * stmt)1145 decrement_power (gimple *stmt)
1146 {
1147   REAL_VALUE_TYPE c, cint;
1148   HOST_WIDE_INT power;
1149   tree arg1;
1150 
1151   switch (gimple_call_combined_fn (stmt))
1152     {
1153     CASE_CFN_POW:
1154       arg1 = gimple_call_arg (stmt, 1);
1155       c = TREE_REAL_CST (arg1);
1156       power = real_to_integer (&c) - 1;
1157       real_from_integer (&cint, VOIDmode, power, SIGNED);
1158       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159       return power;
1160 
1161     CASE_CFN_POWI:
1162       arg1 = gimple_call_arg (stmt, 1);
1163       power = TREE_INT_CST_LOW (arg1) - 1;
1164       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165       return power;
1166 
1167     default:
1168       gcc_unreachable ();
1169     }
1170 }
1171 
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173    SSA.  Also return the new SSA.  */
1174 
1175 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1177 {
1178   gimple *use_stmt;
1179   use_operand_p use;
1180   imm_use_iterator iter;
1181   tree new_lhs, new_debug_lhs = NULL_TREE;
1182   tree lhs = gimple_get_lhs (stmt);
1183 
1184   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185   gimple_set_lhs (stmt, new_lhs);
1186 
1187   /* Also need to update GIMPLE_DEBUGs.  */
1188   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1189     {
1190       tree repl = new_lhs;
1191       if (is_gimple_debug (use_stmt))
1192 	{
1193 	  if (new_debug_lhs == NULL_TREE)
1194 	    {
1195 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 	      gdebug *def_temp
1197 		= gimple_build_debug_bind (new_debug_lhs,
1198 					   build2 (opcode, TREE_TYPE (lhs),
1199 						   new_lhs, op),
1200 					   stmt);
1201 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1205 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1207 	    }
1208 	  repl = new_debug_lhs;
1209 	}
1210       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 	SET_USE (use, repl);
1212       update_stmt (use_stmt);
1213     }
1214   return new_lhs;
1215 }
1216 
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218    uses with new SSAs.  Also do this for the stmt that defines DEF
1219    if *DEF is not OP.  */
1220 
1221 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 			   vec<gimple *> &stmts_to_fix)
1224 {
1225   unsigned i;
1226   gimple *stmt;
1227 
1228   if (*def != op
1229       && TREE_CODE (*def) == SSA_NAME
1230       && (stmt = SSA_NAME_DEF_STMT (*def))
1231       && gimple_code (stmt) != GIMPLE_NOP)
1232     *def = make_new_ssa_for_def (stmt, opcode, op);
1233 
1234   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235     make_new_ssa_for_def (stmt, opcode, op);
1236 }
1237 
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1240    replace *DEF with OP as well.  */
1241 
1242 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1244 {
1245   tree lhs;
1246   gimple *use_stmt;
1247   use_operand_p use;
1248   gimple_stmt_iterator gsi;
1249 
1250   if (is_gimple_call (stmt))
1251     lhs = gimple_call_lhs (stmt);
1252   else
1253     lhs = gimple_assign_lhs (stmt);
1254 
1255   gcc_assert (has_single_use (lhs));
1256   single_imm_use (lhs, &use, &use_stmt);
1257   if (lhs == *def)
1258     *def = op;
1259   SET_USE (use, op);
1260   if (TREE_CODE (op) != SSA_NAME)
1261     update_stmt (use_stmt);
1262   gsi = gsi_for_stmt (stmt);
1263   unlink_stmt_vdef (stmt);
1264   reassoc_remove_stmt (&gsi);
1265   release_defs (stmt);
1266 }
1267 
1268 /* Walks the linear chain with result *DEF searching for an operation
1269    with operand OP and code OPCODE removing that from the chain.  *DEF
1270    is updated if there is only one operand but no operation left.  */
1271 
1272 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1274 {
1275   tree orig_def = *def;
1276   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277   /* PR72835 - Record the stmt chain that has to be updated such that
1278      we dont use the same LHS when the values computed are different.  */
1279   auto_vec<gimple *, 64> stmts_to_fix;
1280 
1281   do
1282     {
1283       tree name;
1284 
1285       if (opcode == MULT_EXPR)
1286 	{
1287 	  if (stmt_is_power_of_op (stmt, op))
1288 	    {
1289 	      if (decrement_power (stmt) == 1)
1290 		{
1291 		  if (stmts_to_fix.length () > 0)
1292 		    stmts_to_fix.pop ();
1293 		  propagate_op_to_single_use (op, stmt, def);
1294 		}
1295 	      break;
1296 	    }
1297 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1298 	    {
1299 	      if (gimple_assign_rhs1 (stmt) == op)
1300 		{
1301 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 		  if (stmts_to_fix.length () > 0)
1303 		    stmts_to_fix.pop ();
1304 		  propagate_op_to_single_use (cst, stmt, def);
1305 		  break;
1306 		}
1307 	      else if (integer_minus_onep (op)
1308 		       || real_minus_onep (op))
1309 		{
1310 		  gimple_assign_set_rhs_code
1311 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 		  break;
1313 		}
1314 	    }
1315 	}
1316 
1317       name = gimple_assign_rhs1 (stmt);
1318 
1319       /* If this is the operation we look for and one of the operands
1320          is ours simply propagate the other operand into the stmts
1321 	 single use.  */
1322       if (gimple_assign_rhs_code (stmt) == opcode
1323 	  && (name == op
1324 	      || gimple_assign_rhs2 (stmt) == op))
1325 	{
1326 	  if (name == op)
1327 	    name = gimple_assign_rhs2 (stmt);
1328 	  if (stmts_to_fix.length () > 0)
1329 	    stmts_to_fix.pop ();
1330 	  propagate_op_to_single_use (name, stmt, def);
1331 	  break;
1332 	}
1333 
1334       /* We might have a multiply of two __builtin_pow* calls, and
1335 	 the operand might be hiding in the rightmost one.  Likewise
1336 	 this can happen for a negate.  */
1337       if (opcode == MULT_EXPR
1338 	  && gimple_assign_rhs_code (stmt) == opcode
1339 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1341 	{
1342 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 	  if (stmt_is_power_of_op (stmt2, op))
1344 	    {
1345 	      if (decrement_power (stmt2) == 1)
1346 		propagate_op_to_single_use (op, stmt2, def);
1347 	      else
1348 		stmts_to_fix.safe_push (stmt2);
1349 	      break;
1350 	    }
1351 	  else if (is_gimple_assign (stmt2)
1352 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1353 	    {
1354 	      if (gimple_assign_rhs1 (stmt2) == op)
1355 		{
1356 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 		  propagate_op_to_single_use (cst, stmt2, def);
1358 		  break;
1359 		}
1360 	      else if (integer_minus_onep (op)
1361 		       || real_minus_onep (op))
1362 		{
1363 		  stmts_to_fix.safe_push (stmt2);
1364 		  gimple_assign_set_rhs_code
1365 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 		  break;
1367 		}
1368 	    }
1369 	}
1370 
1371       /* Continue walking the chain.  */
1372       gcc_assert (name != op
1373 		  && TREE_CODE (name) == SSA_NAME);
1374       stmt = SSA_NAME_DEF_STMT (name);
1375       stmts_to_fix.safe_push (stmt);
1376     }
1377   while (1);
1378 
1379   if (stmts_to_fix.length () > 0 || *def == orig_def)
1380     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1381 }
1382 
1383 /* Returns true if statement S1 dominates statement S2.  Like
1384    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1385 
1386 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1388 {
1389   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1390 
1391   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392      SSA_NAME.  Assume it lives at the beginning of function and
1393      thus dominates everything.  */
1394   if (!bb1 || s1 == s2)
1395     return true;
1396 
1397   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1398   if (!bb2)
1399     return false;
1400 
1401   if (bb1 == bb2)
1402     {
1403       /* PHIs in the same basic block are assumed to be
1404 	 executed all in parallel, if only one stmt is a PHI,
1405 	 it dominates the other stmt in the same basic block.  */
1406       if (gimple_code (s1) == GIMPLE_PHI)
1407 	return true;
1408 
1409       if (gimple_code (s2) == GIMPLE_PHI)
1410 	return false;
1411 
1412       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1413 
1414       if (gimple_uid (s1) < gimple_uid (s2))
1415 	return true;
1416 
1417       if (gimple_uid (s1) > gimple_uid (s2))
1418 	return false;
1419 
1420       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421       unsigned int uid = gimple_uid (s1);
1422       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1423 	{
1424 	  gimple *s = gsi_stmt (gsi);
1425 	  if (gimple_uid (s) != uid)
1426 	    break;
1427 	  if (s == s2)
1428 	    return true;
1429 	}
1430 
1431       return false;
1432     }
1433 
1434   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1435 }
1436 
1437 /* Insert STMT after INSERT_POINT.  */
1438 
1439 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1441 {
1442   gimple_stmt_iterator gsi;
1443   basic_block bb;
1444 
1445   if (gimple_code (insert_point) == GIMPLE_PHI)
1446     bb = gimple_bb (insert_point);
1447   else if (!stmt_ends_bb_p (insert_point))
1448     {
1449       gsi = gsi_for_stmt (insert_point);
1450       gimple_set_uid (stmt, gimple_uid (insert_point));
1451       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452       return;
1453     }
1454   else
1455     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456        thus if it must end a basic block, it should be a call that can
1457        throw, or some assignment that can throw.  If it throws, the LHS
1458        of it will not be initialized though, so only valid places using
1459        the SSA_NAME should be dominated by the fallthru edge.  */
1460     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461   gsi = gsi_after_labels (bb);
1462   if (gsi_end_p (gsi))
1463     {
1464       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465       gimple_set_uid (stmt,
1466 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1467     }
1468   else
1469     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1471 }
1472 
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474    the result.  Places the statement after the definition of either
1475    OP1 or OP2.  Returns the new statement.  */
1476 
1477 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1479 {
1480   gimple *op1def = NULL, *op2def = NULL;
1481   gimple_stmt_iterator gsi;
1482   tree op;
1483   gassign *sum;
1484 
1485   /* Create the addition statement.  */
1486   op = make_ssa_name (type);
1487   sum = gimple_build_assign (op, opcode, op1, op2);
1488 
1489   /* Find an insertion place and insert.  */
1490   if (TREE_CODE (op1) == SSA_NAME)
1491     op1def = SSA_NAME_DEF_STMT (op1);
1492   if (TREE_CODE (op2) == SSA_NAME)
1493     op2def = SSA_NAME_DEF_STMT (op2);
1494   if ((!op1def || gimple_nop_p (op1def))
1495       && (!op2def || gimple_nop_p (op2def)))
1496     {
1497       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498       if (!gsi_end_p (gsi)
1499 	  && is_gimple_call (gsi_stmt (gsi))
1500 	  && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1501 	{
1502 	  /* Don't add statements before a returns_twice call at the start
1503 	     of a function.  */
1504 	  split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1505 	  gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1506 	}
1507       if (gsi_end_p (gsi))
1508 	{
1509 	  gimple_stmt_iterator gsi2
1510 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1511 	  gimple_set_uid (sum,
1512 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1513 	}
1514       else
1515 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1516       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1517     }
1518   else
1519     {
1520       gimple *insert_point;
1521       if ((!op1def || gimple_nop_p (op1def))
1522 	   || (op2def && !gimple_nop_p (op2def)
1523 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1524 	insert_point = op2def;
1525       else
1526 	insert_point = op1def;
1527       insert_stmt_after (sum, insert_point);
1528     }
1529   update_stmt (sum);
1530 
1531   return sum;
1532 }
1533 
1534 /* Perform un-distribution of divisions and multiplications.
1535    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1536    to (A + B) / X for real X.
1537 
1538    The algorithm is organized as follows.
1539 
1540     - First we walk the addition chain *OPS looking for summands that
1541       are defined by a multiplication or a real division.  This results
1542       in the candidates bitmap with relevant indices into *OPS.
1543 
1544     - Second we build the chains of multiplications or divisions for
1545       these candidates, counting the number of occurrences of (operand, code)
1546       pairs in all of the candidates chains.
1547 
1548     - Third we sort the (operand, code) pairs by number of occurrence and
1549       process them starting with the pair with the most uses.
1550 
1551       * For each such pair we walk the candidates again to build a
1552         second candidate bitmap noting all multiplication/division chains
1553 	that have at least one occurrence of (operand, code).
1554 
1555       * We build an alternate addition chain only covering these
1556         candidates with one (operand, code) operation removed from their
1557 	multiplication/division chain.
1558 
1559       * The first candidate gets replaced by the alternate addition chain
1560         multiplied/divided by the operand.
1561 
1562       * All candidate chains get disabled for further processing and
1563         processing of (operand, code) pairs continues.
1564 
1565   The alternate addition chains built are re-processed by the main
1566   reassociation algorithm which allows optimizing a * x * y + b * y * x
1567   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1568 
1569 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1570 undistribute_ops_list (enum tree_code opcode,
1571 		       vec<operand_entry *> *ops, class loop *loop)
1572 {
1573   unsigned int length = ops->length ();
1574   operand_entry *oe1;
1575   unsigned i, j;
1576   unsigned nr_candidates, nr_candidates2;
1577   sbitmap_iterator sbi0;
1578   vec<operand_entry *> *subops;
1579   bool changed = false;
1580   unsigned int next_oecount_id = 0;
1581 
1582   if (length <= 1
1583       || opcode != PLUS_EXPR)
1584     return false;
1585 
1586   /* Build a list of candidates to process.  */
1587   auto_sbitmap candidates (length);
1588   bitmap_clear (candidates);
1589   nr_candidates = 0;
1590   FOR_EACH_VEC_ELT (*ops, i, oe1)
1591     {
1592       enum tree_code dcode;
1593       gimple *oe1def;
1594 
1595       if (TREE_CODE (oe1->op) != SSA_NAME)
1596 	continue;
1597       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1598       if (!is_gimple_assign (oe1def))
1599 	continue;
1600       dcode = gimple_assign_rhs_code (oe1def);
1601       if ((dcode != MULT_EXPR
1602 	   && dcode != RDIV_EXPR)
1603 	  || !is_reassociable_op (oe1def, dcode, loop))
1604 	continue;
1605 
1606       bitmap_set_bit (candidates, i);
1607       nr_candidates++;
1608     }
1609 
1610   if (nr_candidates < 2)
1611     return false;
1612 
1613   if (dump_file && (dump_flags & TDF_DETAILS))
1614     {
1615       fprintf (dump_file, "searching for un-distribute opportunities ");
1616       print_generic_expr (dump_file,
1617 	(*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1618       fprintf (dump_file, " %d\n", nr_candidates);
1619     }
1620 
1621   /* Build linearized sub-operand lists and the counting table.  */
1622   cvec.create (0);
1623 
1624   hash_table<oecount_hasher> ctable (15);
1625 
1626   /* ??? Macro arguments cannot have multi-argument template types in
1627      them.  This typedef is needed to workaround that limitation.  */
1628   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1629   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1630   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1631     {
1632       gimple *oedef;
1633       enum tree_code oecode;
1634       unsigned j;
1635 
1636       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1637       oecode = gimple_assign_rhs_code (oedef);
1638       linearize_expr_tree (&subops[i], oedef,
1639 			   associative_tree_code (oecode), false);
1640 
1641       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1642 	{
1643 	  oecount c;
1644 	  int *slot;
1645 	  int idx;
1646 	  c.oecode = oecode;
1647 	  c.cnt = 1;
1648 	  c.id = next_oecount_id++;
1649 	  c.op = oe1->op;
1650 	  cvec.safe_push (c);
1651 	  idx = cvec.length () + 41;
1652 	  slot = ctable.find_slot (idx, INSERT);
1653 	  if (!*slot)
1654 	    {
1655 	      *slot = idx;
1656 	    }
1657 	  else
1658 	    {
1659 	      cvec.pop ();
1660 	      cvec[*slot - 42].cnt++;
1661 	    }
1662 	}
1663     }
1664 
1665   /* Sort the counting table.  */
1666   cvec.qsort (oecount_cmp);
1667 
1668   if (dump_file && (dump_flags & TDF_DETAILS))
1669     {
1670       oecount *c;
1671       fprintf (dump_file, "Candidates:\n");
1672       FOR_EACH_VEC_ELT (cvec, j, c)
1673 	{
1674 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1675 		   c->oecode == MULT_EXPR
1676 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1677 	  print_generic_expr (dump_file, c->op);
1678 	  fprintf (dump_file, "\n");
1679 	}
1680     }
1681 
1682   /* Process the (operand, code) pairs in order of most occurrence.  */
1683   auto_sbitmap candidates2 (length);
1684   while (!cvec.is_empty ())
1685     {
1686       oecount *c = &cvec.last ();
1687       if (c->cnt < 2)
1688 	break;
1689 
1690       /* Now collect the operands in the outer chain that contain
1691          the common operand in their inner chain.  */
1692       bitmap_clear (candidates2);
1693       nr_candidates2 = 0;
1694       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1695 	{
1696 	  gimple *oedef;
1697 	  enum tree_code oecode;
1698 	  unsigned j;
1699 	  tree op = (*ops)[i]->op;
1700 
1701 	  /* If we undistributed in this chain already this may be
1702 	     a constant.  */
1703 	  if (TREE_CODE (op) != SSA_NAME)
1704 	    continue;
1705 
1706 	  oedef = SSA_NAME_DEF_STMT (op);
1707 	  oecode = gimple_assign_rhs_code (oedef);
1708 	  if (oecode != c->oecode)
1709 	    continue;
1710 
1711 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1712 	    {
1713 	      if (oe1->op == c->op)
1714 		{
1715 		  bitmap_set_bit (candidates2, i);
1716 		  ++nr_candidates2;
1717 		  break;
1718 		}
1719 	    }
1720 	}
1721 
1722       if (nr_candidates2 >= 2)
1723 	{
1724 	  operand_entry *oe1, *oe2;
1725 	  gimple *prod;
1726 	  int first = bitmap_first_set_bit (candidates2);
1727 
1728 	  /* Build the new addition chain.  */
1729 	  oe1 = (*ops)[first];
1730 	  if (dump_file && (dump_flags & TDF_DETAILS))
1731 	    {
1732 	      fprintf (dump_file, "Building (");
1733 	      print_generic_expr (dump_file, oe1->op);
1734 	    }
1735 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1736 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1737 	    {
1738 	      gimple *sum;
1739 	      oe2 = (*ops)[i];
1740 	      if (dump_file && (dump_flags & TDF_DETAILS))
1741 		{
1742 		  fprintf (dump_file, " + ");
1743 		  print_generic_expr (dump_file, oe2->op);
1744 		}
1745 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1746 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1747 				       oe1->op, oe2->op, opcode);
1748 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1749 	      oe2->rank = 0;
1750 	      oe1->op = gimple_get_lhs (sum);
1751 	    }
1752 
1753 	  /* Apply the multiplication/division.  */
1754 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1755 				    oe1->op, c->op, c->oecode);
1756 	  if (dump_file && (dump_flags & TDF_DETAILS))
1757 	    {
1758 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1759 	      print_generic_expr (dump_file, c->op);
1760 	      fprintf (dump_file, "\n");
1761 	    }
1762 
1763 	  /* Record it in the addition chain and disable further
1764 	     undistribution with this op.  */
1765 	  oe1->op = gimple_assign_lhs (prod);
1766 	  oe1->rank = get_rank (oe1->op);
1767 	  subops[first].release ();
1768 
1769 	  changed = true;
1770 	}
1771 
1772       cvec.pop ();
1773     }
1774 
1775   for (i = 0; i < ops->length (); ++i)
1776     subops[i].release ();
1777   free (subops);
1778   cvec.release ();
1779 
1780   return changed;
1781 }
1782 
1783 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1784    first: element index for each relevant BIT_FIELD_REF.
1785    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1786 typedef std::pair<unsigned, unsigned> v_info_elem;
1787 struct v_info {
1788   tree vec_type;
1789   auto_vec<v_info_elem, 32> vec;
1790 };
1791 typedef v_info *v_info_ptr;
1792 
1793 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1794 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1795 sort_by_mach_mode (const void *p_i, const void *p_j)
1796 {
1797   const tree tr1 = *((const tree *) p_i);
1798   const tree tr2 = *((const tree *) p_j);
1799   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1800   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1801   if (mode1 > mode2)
1802     return 1;
1803   else if (mode1 < mode2)
1804     return -1;
1805   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1806     return -1;
1807   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1808     return 1;
1809   return 0;
1810 }
1811 
1812 /* Cleanup hash map for VECTOR information.  */
1813 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1814 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1815 {
1816   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1817        it != info_map.end (); ++it)
1818     {
1819       v_info_ptr info = (*it).second;
1820       delete info;
1821       (*it).second = NULL;
1822     }
1823 }
1824 
1825 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1826      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1827    is transformed to
1828      Vs = (V1 + V2 + ... + Vn)
1829      Vs[0] + Vs[1] + ... + Vs[k]
1830 
1831    The basic steps are listed below:
1832 
1833     1) Check the addition chain *OPS by looking those summands coming from
1834        VECTOR bit_field_ref on VECTOR type.  Put the information into
1835        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1836 
1837     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1838        continuous, they can cover the whole VECTOR perfectly without any holes.
1839        Obtain one VECTOR list which contain candidates to be transformed.
1840 
1841     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1842        candidates with same mode, build the addition statements for them and
1843        generate BIT_FIELD_REFs accordingly.
1844 
1845    TODO:
1846        The current implementation requires the whole VECTORs should be fully
1847        covered, but it can be extended to support partial, checking adjacent
1848        but not fill the whole, it may need some cost model to define the
1849        boundary to do or not.
1850 */
1851 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1852 undistribute_bitref_for_vector (enum tree_code opcode,
1853 				vec<operand_entry *> *ops, struct loop *loop)
1854 {
1855   if (ops->length () <= 1)
1856     return false;
1857 
1858   if (opcode != PLUS_EXPR
1859       && opcode != MULT_EXPR
1860       && opcode != BIT_XOR_EXPR
1861       && opcode != BIT_IOR_EXPR
1862       && opcode != BIT_AND_EXPR)
1863     return false;
1864 
1865   hash_map<tree, v_info_ptr> v_info_map;
1866   operand_entry *oe1;
1867   unsigned i;
1868 
1869   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1870      information into map.  */
1871   FOR_EACH_VEC_ELT (*ops, i, oe1)
1872     {
1873       enum tree_code dcode;
1874       gimple *oe1def;
1875 
1876       if (TREE_CODE (oe1->op) != SSA_NAME)
1877 	continue;
1878       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1879       if (!is_gimple_assign (oe1def))
1880 	continue;
1881       dcode = gimple_assign_rhs_code (oe1def);
1882       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1883 	continue;
1884 
1885       tree rhs = gimple_assign_rhs1 (oe1def);
1886       tree vec = TREE_OPERAND (rhs, 0);
1887       tree vec_type = TREE_TYPE (vec);
1888 
1889       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1890 	continue;
1891 
1892       /* Ignore it if target machine can't support this VECTOR type.  */
1893       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1894 	continue;
1895 
1896       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1897       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1898 	continue;
1899 
1900       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1901 	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1902 	continue;
1903 
1904       /* The type of BIT_FIELD_REF might not be equal to the element type of
1905 	 the vector.  We want to use a vector type with element type the
1906 	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1907       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1908 	{
1909 	  machine_mode simd_mode;
1910 	  unsigned HOST_WIDE_INT size, nunits;
1911 	  unsigned HOST_WIDE_INT elem_size
1912 	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1913 	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1914 	    continue;
1915 	  if (size <= elem_size || (size % elem_size) != 0)
1916 	    continue;
1917 	  nunits = size / elem_size;
1918 	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1919 				nunits).exists (&simd_mode))
1920 	    continue;
1921 	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1922 
1923 	  /* Ignore it if target machine can't support this VECTOR type.  */
1924 	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1925 	    continue;
1926 
1927 	  /* Check const vector type, constrain BIT_FIELD_REF offset and
1928 	     size.  */
1929 	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1930 	    continue;
1931 
1932 	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1933 			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1934 	    continue;
1935 	}
1936 
1937       tree elem_type = TREE_TYPE (vec_type);
1938       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1939       if (maybe_ne (bit_field_size (rhs), elem_size))
1940 	continue;
1941 
1942       unsigned idx;
1943       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1944 	continue;
1945 
1946       /* Ignore it if target machine can't support this type of VECTOR
1947          operation.  */
1948       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1949       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1950 	continue;
1951 
1952       bool existed;
1953       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1954       if (!existed)
1955 	{
1956 	  info = new v_info;
1957 	  info->vec_type = vec_type;
1958 	}
1959       else if (!types_compatible_p (vec_type, info->vec_type))
1960 	continue;
1961       info->vec.safe_push (std::make_pair (idx, i));
1962     }
1963 
1964   /* At least two VECTOR to combine.  */
1965   if (v_info_map.elements () <= 1)
1966     {
1967       cleanup_vinfo_map (v_info_map);
1968       return false;
1969     }
1970 
1971   /* Verify all VECTOR candidates by checking two conditions:
1972        1) sorted offsets are adjacent, no holes.
1973        2) can fill the whole VECTOR perfectly.
1974      And add the valid candidates to a vector for further handling.  */
1975   auto_vec<tree> valid_vecs (v_info_map.elements ());
1976   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1977        it != v_info_map.end (); ++it)
1978     {
1979       tree cand_vec = (*it).first;
1980       v_info_ptr cand_info = (*it).second;
1981       unsigned int num_elems
1982 	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1983       if (cand_info->vec.length () != num_elems)
1984 	continue;
1985       sbitmap holes = sbitmap_alloc (num_elems);
1986       bitmap_ones (holes);
1987       bool valid = true;
1988       v_info_elem *curr;
1989       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1990 	{
1991 	  if (!bitmap_bit_p (holes, curr->first))
1992 	    {
1993 	      valid = false;
1994 	      break;
1995 	    }
1996 	  else
1997 	    bitmap_clear_bit (holes, curr->first);
1998 	}
1999       if (valid && bitmap_empty_p (holes))
2000 	valid_vecs.quick_push (cand_vec);
2001       sbitmap_free (holes);
2002     }
2003 
2004   /* At least two VECTOR to combine.  */
2005   if (valid_vecs.length () <= 1)
2006     {
2007       cleanup_vinfo_map (v_info_map);
2008       return false;
2009     }
2010 
2011   valid_vecs.qsort (sort_by_mach_mode);
2012   /* Go through all candidates by machine mode order, query the mode_to_total
2013      to get the total number for each mode and skip the single one.  */
2014   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2015     {
2016       tree tvec = valid_vecs[i];
2017       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2018 
2019       /* Skip modes with only a single candidate.  */
2020       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2021 	continue;
2022 
2023       unsigned int idx, j;
2024       gimple *sum = NULL;
2025       tree sum_vec = tvec;
2026       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2027       v_info_elem *elem;
2028       tree vec_type = info_ptr->vec_type;
2029 
2030       /* Build the sum for all candidates with same mode.  */
2031       do
2032 	{
2033 	  sum = build_and_add_sum (vec_type, sum_vec,
2034 				   valid_vecs[i + 1], opcode);
2035 	  if (!useless_type_conversion_p (vec_type,
2036 					  TREE_TYPE (valid_vecs[i + 1])))
2037 	    {
2038 	      /* Update the operands only after build_and_add_sum,
2039 		 so that we don't have to repeat the placement algorithm
2040 		 of build_and_add_sum.  */
2041 	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2042 	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2043 				 valid_vecs[i + 1]);
2044 	      tree lhs = make_ssa_name (vec_type);
2045 	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2046 	      gimple_set_uid (g, gimple_uid (sum));
2047 	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2048 	      gimple_assign_set_rhs2 (sum, lhs);
2049 	      if (sum_vec == tvec)
2050 		{
2051 		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2052 		  lhs = make_ssa_name (vec_type);
2053 		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2054 		  gimple_set_uid (g, gimple_uid (sum));
2055 		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2056 		  gimple_assign_set_rhs1 (sum, lhs);
2057 		}
2058 	      update_stmt (sum);
2059 	    }
2060 	  sum_vec = gimple_get_lhs (sum);
2061 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2062 	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2063 	  /* Update those related ops of current candidate VECTOR.  */
2064 	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2065 	    {
2066 	      idx = elem->second;
2067 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2068 	      /* Set this then op definition will get DCEd later.  */
2069 	      gimple_set_visited (def, true);
2070 	      if (opcode == PLUS_EXPR
2071 		  || opcode == BIT_XOR_EXPR
2072 		  || opcode == BIT_IOR_EXPR)
2073 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2074 	      else if (opcode == MULT_EXPR)
2075 		(*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2076 	      else
2077 		{
2078 		  gcc_assert (opcode == BIT_AND_EXPR);
2079 		  (*ops)[idx]->op
2080 		    = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2081 		}
2082 	      (*ops)[idx]->rank = 0;
2083 	    }
2084 	  if (dump_file && (dump_flags & TDF_DETAILS))
2085 	    {
2086 	      fprintf (dump_file, "Generating addition -> ");
2087 	      print_gimple_stmt (dump_file, sum, 0);
2088 	    }
2089 	  i++;
2090 	}
2091       while ((i < valid_vecs.length () - 1)
2092 	     && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2093 
2094       /* Referring to first valid VECTOR with this mode, generate the
2095          BIT_FIELD_REF statements accordingly.  */
2096       info_ptr = *(v_info_map.get (tvec));
2097       gcc_assert (sum);
2098       tree elem_type = TREE_TYPE (vec_type);
2099       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2100 	{
2101 	  idx = elem->second;
2102 	  tree dst = make_ssa_name (elem_type);
2103 	  tree pos = bitsize_int (elem->first
2104 				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
2105 	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2106 			     TYPE_SIZE (elem_type), pos);
2107 	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2108 	  insert_stmt_after (gs, sum);
2109 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2110 	  /* Set this then op definition will get DCEd later.  */
2111 	  gimple_set_visited (def, true);
2112 	  (*ops)[idx]->op = gimple_assign_lhs (gs);
2113 	  (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2114 	  if (dump_file && (dump_flags & TDF_DETAILS))
2115 	    {
2116 	      fprintf (dump_file, "Generating bit_field_ref -> ");
2117 	      print_gimple_stmt (dump_file, gs, 0);
2118 	    }
2119 	}
2120     }
2121 
2122   if (dump_file && (dump_flags & TDF_DETAILS))
2123     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2124 
2125   cleanup_vinfo_map (v_info_map);
2126 
2127   return true;
2128 }
2129 
2130 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2131    expression, examine the other OPS to see if any of them are comparisons
2132    of the same values, which we may be able to combine or eliminate.
2133    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2134 
2135 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2136 eliminate_redundant_comparison (enum tree_code opcode,
2137 				vec<operand_entry *> *ops,
2138 				unsigned int currindex,
2139 				operand_entry *curr)
2140 {
2141   tree op1, op2;
2142   enum tree_code lcode, rcode;
2143   gimple *def1, *def2;
2144   int i;
2145   operand_entry *oe;
2146 
2147   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2148     return false;
2149 
2150   /* Check that CURR is a comparison.  */
2151   if (TREE_CODE (curr->op) != SSA_NAME)
2152     return false;
2153   def1 = SSA_NAME_DEF_STMT (curr->op);
2154   if (!is_gimple_assign (def1))
2155     return false;
2156   lcode = gimple_assign_rhs_code (def1);
2157   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2158     return false;
2159   op1 = gimple_assign_rhs1 (def1);
2160   op2 = gimple_assign_rhs2 (def1);
2161 
2162   /* Now look for a similar comparison in the remaining OPS.  */
2163   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2164     {
2165       tree t;
2166 
2167       if (TREE_CODE (oe->op) != SSA_NAME)
2168 	continue;
2169       def2 = SSA_NAME_DEF_STMT (oe->op);
2170       if (!is_gimple_assign (def2))
2171 	continue;
2172       rcode = gimple_assign_rhs_code (def2);
2173       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2174 	continue;
2175 
2176       /* If we got here, we have a match.  See if we can combine the
2177 	 two comparisons.  */
2178       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2179       if (opcode == BIT_IOR_EXPR)
2180 	t = maybe_fold_or_comparisons (type,
2181 				       lcode, op1, op2,
2182 				       rcode, gimple_assign_rhs1 (def2),
2183 				       gimple_assign_rhs2 (def2));
2184       else
2185 	t = maybe_fold_and_comparisons (type,
2186 					lcode, op1, op2,
2187 					rcode, gimple_assign_rhs1 (def2),
2188 					gimple_assign_rhs2 (def2));
2189       if (!t)
2190 	continue;
2191 
2192       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2193 	 always give us a boolean_type_node value back.  If the original
2194 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2195 	 we need to convert.  */
2196       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2197 	t = fold_convert (TREE_TYPE (curr->op), t);
2198 
2199       if (TREE_CODE (t) != INTEGER_CST
2200 	  && !operand_equal_p (t, curr->op, 0))
2201 	{
2202 	  enum tree_code subcode;
2203 	  tree newop1, newop2;
2204 	  if (!COMPARISON_CLASS_P (t))
2205 	    continue;
2206 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2207 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2208 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2209 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2210 	    continue;
2211 	}
2212 
2213       if (dump_file && (dump_flags & TDF_DETAILS))
2214 	{
2215 	  fprintf (dump_file, "Equivalence: ");
2216 	  print_generic_expr (dump_file, curr->op);
2217 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
2218 	  print_generic_expr (dump_file, oe->op);
2219 	  fprintf (dump_file, " -> ");
2220 	  print_generic_expr (dump_file, t);
2221 	  fprintf (dump_file, "\n");
2222 	}
2223 
2224       /* Now we can delete oe, as it has been subsumed by the new combined
2225          expression t.  */
2226       ops->ordered_remove (i);
2227       reassociate_stats.ops_eliminated ++;
2228 
2229       /* If t is the same as curr->op, we're done.  Otherwise we must
2230 	 replace curr->op with t.  Special case is if we got a constant
2231 	 back, in which case we add it to the end instead of in place of
2232 	 the current entry.  */
2233       if (TREE_CODE (t) == INTEGER_CST)
2234 	{
2235 	  ops->ordered_remove (currindex);
2236 	  add_to_ops_vec (ops, t);
2237 	}
2238       else if (!operand_equal_p (t, curr->op, 0))
2239 	{
2240 	  gimple *sum;
2241 	  enum tree_code subcode;
2242 	  tree newop1;
2243 	  tree newop2;
2244 	  gcc_assert (COMPARISON_CLASS_P (t));
2245 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2246 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2247 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2248 	  gcc_checking_assert (is_gimple_val (newop1)
2249 			       && is_gimple_val (newop2));
2250 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2251 	  curr->op = gimple_get_lhs (sum);
2252 	}
2253       return true;
2254     }
2255 
2256   return false;
2257 }
2258 
2259 
2260 /* Transform repeated addition of same values into multiply with
2261    constant.  */
2262 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2263 transform_add_to_multiply (vec<operand_entry *> *ops)
2264 {
2265   operand_entry *oe;
2266   tree op = NULL_TREE;
2267   int j;
2268   int i, start = -1, end = 0, count = 0;
2269   auto_vec<std::pair <int, int> > indxs;
2270   bool changed = false;
2271 
2272   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2273       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2274 	  || !flag_unsafe_math_optimizations))
2275     return false;
2276 
2277   /* Look for repeated operands.  */
2278   FOR_EACH_VEC_ELT (*ops, i, oe)
2279     {
2280       if (start == -1)
2281 	{
2282 	  count = 1;
2283 	  op = oe->op;
2284 	  start = i;
2285 	}
2286       else if (operand_equal_p (oe->op, op, 0))
2287 	{
2288 	  count++;
2289 	  end = i;
2290 	}
2291       else
2292 	{
2293 	  if (count > 1)
2294 	    indxs.safe_push (std::make_pair (start, end));
2295 	  count = 1;
2296 	  op = oe->op;
2297 	  start = i;
2298 	}
2299     }
2300 
2301   if (count > 1)
2302     indxs.safe_push (std::make_pair (start, end));
2303 
2304   for (j = indxs.length () - 1; j >= 0; --j)
2305     {
2306       /* Convert repeated operand addition to multiplication.  */
2307       start = indxs[j].first;
2308       end = indxs[j].second;
2309       op = (*ops)[start]->op;
2310       count = end - start + 1;
2311       for (i = end; i >= start; --i)
2312 	ops->unordered_remove (i);
2313       tree tmp = make_ssa_name (TREE_TYPE (op));
2314       tree cst = build_int_cst (integer_type_node, count);
2315       gassign *mul_stmt
2316 	= gimple_build_assign (tmp, MULT_EXPR,
2317 			       op, fold_convert (TREE_TYPE (op), cst));
2318       gimple_set_visited (mul_stmt, true);
2319       add_to_ops_vec (ops, tmp, mul_stmt);
2320       changed = true;
2321     }
2322 
2323   return changed;
2324 }
2325 
2326 
2327 /* Perform various identities and other optimizations on the list of
2328    operand entries, stored in OPS.  The tree code for the binary
2329    operation between all the operands is OPCODE.  */
2330 
2331 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2332 optimize_ops_list (enum tree_code opcode,
2333 		   vec<operand_entry *> *ops)
2334 {
2335   unsigned int length = ops->length ();
2336   unsigned int i;
2337   operand_entry *oe;
2338   operand_entry *oelast = NULL;
2339   bool iterate = false;
2340 
2341   if (length == 1)
2342     return;
2343 
2344   oelast = ops->last ();
2345 
2346   /* If the last two are constants, pop the constants off, merge them
2347      and try the next two.  */
2348   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2349     {
2350       operand_entry *oelm1 = (*ops)[length - 2];
2351 
2352       if (oelm1->rank == 0
2353 	  && is_gimple_min_invariant (oelm1->op)
2354 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2355 				       TREE_TYPE (oelast->op)))
2356 	{
2357 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2358 				     oelm1->op, oelast->op);
2359 
2360 	  if (folded && is_gimple_min_invariant (folded))
2361 	    {
2362 	      if (dump_file && (dump_flags & TDF_DETAILS))
2363 		fprintf (dump_file, "Merging constants\n");
2364 
2365 	      ops->pop ();
2366 	      ops->pop ();
2367 
2368 	      add_to_ops_vec (ops, folded);
2369 	      reassociate_stats.constants_eliminated++;
2370 
2371 	      optimize_ops_list (opcode, ops);
2372 	      return;
2373 	    }
2374 	}
2375     }
2376 
2377   eliminate_using_constants (opcode, ops);
2378   oelast = NULL;
2379 
2380   for (i = 0; ops->iterate (i, &oe);)
2381     {
2382       bool done = false;
2383 
2384       if (eliminate_not_pairs (opcode, ops, i, oe))
2385 	return;
2386       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2387 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2388 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2389 	{
2390 	  if (done)
2391 	    return;
2392 	  iterate = true;
2393 	  oelast = NULL;
2394 	  continue;
2395 	}
2396       oelast = oe;
2397       i++;
2398     }
2399 
2400   if (iterate)
2401     optimize_ops_list (opcode, ops);
2402 }
2403 
2404 /* The following functions are subroutines to optimize_range_tests and allow
2405    it to try to change a logical combination of comparisons into a range
2406    test.
2407 
2408    For example, both
2409 	X == 2 || X == 5 || X == 3 || X == 4
2410    and
2411 	X >= 2 && X <= 5
2412    are converted to
2413 	(unsigned) (X - 2) <= 3
2414 
2415    For more information see comments above fold_test_range in fold-const.c,
2416    this implementation is for GIMPLE.  */
2417 
2418 struct range_entry
2419 {
2420   tree exp;
2421   tree low;
2422   tree high;
2423   bool in_p;
2424   bool strict_overflow_p;
2425   unsigned int idx, next;
2426 };
2427 
2428 /* This is similar to make_range in fold-const.c, but on top of
2429    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2430    an SSA_NAME and STMT argument is ignored, otherwise STMT
2431    argument should be a GIMPLE_COND.  */
2432 
2433 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2434 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2435 {
2436   int in_p;
2437   tree low, high;
2438   bool is_bool, strict_overflow_p;
2439 
2440   r->exp = NULL_TREE;
2441   r->in_p = false;
2442   r->strict_overflow_p = false;
2443   r->low = NULL_TREE;
2444   r->high = NULL_TREE;
2445   if (exp != NULL_TREE
2446       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2447     return;
2448 
2449   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2450      and see if we can refine the range.  Some of the cases below may not
2451      happen, but it doesn't seem worth worrying about this.  We "continue"
2452      the outer loop when we've changed something; otherwise we "break"
2453      the switch, which will "break" the while.  */
2454   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2455   high = low;
2456   in_p = 0;
2457   strict_overflow_p = false;
2458   is_bool = false;
2459   if (exp == NULL_TREE)
2460     is_bool = true;
2461   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2462     {
2463       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2464 	is_bool = true;
2465       else
2466 	return;
2467     }
2468   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2469     is_bool = true;
2470 
2471   while (1)
2472     {
2473       enum tree_code code;
2474       tree arg0, arg1, exp_type;
2475       tree nexp;
2476       location_t loc;
2477 
2478       if (exp != NULL_TREE)
2479 	{
2480 	  if (TREE_CODE (exp) != SSA_NAME
2481 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2482 	    break;
2483 
2484 	  stmt = SSA_NAME_DEF_STMT (exp);
2485 	  if (!is_gimple_assign (stmt))
2486 	    break;
2487 
2488 	  code = gimple_assign_rhs_code (stmt);
2489 	  arg0 = gimple_assign_rhs1 (stmt);
2490 	  arg1 = gimple_assign_rhs2 (stmt);
2491 	  exp_type = TREE_TYPE (exp);
2492 	}
2493       else
2494 	{
2495 	  code = gimple_cond_code (stmt);
2496 	  arg0 = gimple_cond_lhs (stmt);
2497 	  arg1 = gimple_cond_rhs (stmt);
2498 	  exp_type = boolean_type_node;
2499 	}
2500 
2501       if (TREE_CODE (arg0) != SSA_NAME
2502 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2503 	break;
2504       loc = gimple_location (stmt);
2505       switch (code)
2506 	{
2507 	case BIT_NOT_EXPR:
2508 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2509 	      /* Ensure the range is either +[-,0], +[0,0],
2510 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2511 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2512 		 or similar expression of unconditional true or
2513 		 false, it should not be negated.  */
2514 	      && ((high && integer_zerop (high))
2515 		  || (low && integer_onep (low))))
2516 	    {
2517 	      in_p = !in_p;
2518 	      exp = arg0;
2519 	      continue;
2520 	    }
2521 	  break;
2522 	case SSA_NAME:
2523 	  exp = arg0;
2524 	  continue;
2525 	CASE_CONVERT:
2526 	  if (is_bool)
2527 	    {
2528 	      if ((TYPE_PRECISION (exp_type) == 1
2529 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2530 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2531 		return;
2532 	    }
2533 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2534 	    {
2535 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2536 		is_bool = true;
2537 	      else
2538 		return;
2539 	    }
2540 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2541 	    is_bool = true;
2542 	  goto do_default;
2543 	case EQ_EXPR:
2544 	case NE_EXPR:
2545 	case LT_EXPR:
2546 	case LE_EXPR:
2547 	case GE_EXPR:
2548 	case GT_EXPR:
2549 	  is_bool = true;
2550 	  /* FALLTHRU */
2551 	default:
2552 	  if (!is_bool)
2553 	    return;
2554 	do_default:
2555 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2556 				  &low, &high, &in_p,
2557 				  &strict_overflow_p);
2558 	  if (nexp != NULL_TREE)
2559 	    {
2560 	      exp = nexp;
2561 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2562 	      continue;
2563 	    }
2564 	  break;
2565 	}
2566       break;
2567     }
2568   if (is_bool)
2569     {
2570       r->exp = exp;
2571       r->in_p = in_p;
2572       r->low = low;
2573       r->high = high;
2574       r->strict_overflow_p = strict_overflow_p;
2575     }
2576 }
2577 
2578 /* Comparison function for qsort.  Sort entries
2579    without SSA_NAME exp first, then with SSA_NAMEs sorted
2580    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2581    by increasing ->low and if ->low is the same, by increasing
2582    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2583    maximum.  */
2584 
2585 static int
range_entry_cmp(const void * a,const void * b)2586 range_entry_cmp (const void *a, const void *b)
2587 {
2588   const struct range_entry *p = (const struct range_entry *) a;
2589   const struct range_entry *q = (const struct range_entry *) b;
2590 
2591   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2592     {
2593       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2594 	{
2595 	  /* Group range_entries for the same SSA_NAME together.  */
2596 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2597 	    return -1;
2598 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2599 	    return 1;
2600 	  /* If ->low is different, NULL low goes first, then by
2601 	     ascending low.  */
2602 	  if (p->low != NULL_TREE)
2603 	    {
2604 	      if (q->low != NULL_TREE)
2605 		{
2606 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2607 					  p->low, q->low);
2608 		  if (tem && integer_onep (tem))
2609 		    return -1;
2610 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2611 				     p->low, q->low);
2612 		  if (tem && integer_onep (tem))
2613 		    return 1;
2614 		}
2615 	      else
2616 		return 1;
2617 	    }
2618 	  else if (q->low != NULL_TREE)
2619 	    return -1;
2620 	  /* If ->high is different, NULL high goes last, before that by
2621 	     ascending high.  */
2622 	  if (p->high != NULL_TREE)
2623 	    {
2624 	      if (q->high != NULL_TREE)
2625 		{
2626 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2627 					  p->high, q->high);
2628 		  if (tem && integer_onep (tem))
2629 		    return -1;
2630 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2631 				     p->high, q->high);
2632 		  if (tem && integer_onep (tem))
2633 		    return 1;
2634 		}
2635 	      else
2636 		return -1;
2637 	    }
2638 	  else if (q->high != NULL_TREE)
2639 	    return 1;
2640 	  /* If both ranges are the same, sort below by ascending idx.  */
2641 	}
2642       else
2643 	return 1;
2644     }
2645   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2646     return -1;
2647 
2648   if (p->idx < q->idx)
2649     return -1;
2650   else
2651     {
2652       gcc_checking_assert (p->idx > q->idx);
2653       return 1;
2654     }
2655 }
2656 
2657 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2658    insert needed statements BEFORE or after GSI.  */
2659 
2660 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2661 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2662 {
2663   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2664   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2665   if (TREE_CODE (ret) != SSA_NAME)
2666     {
2667       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2668       if (before)
2669 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2670       else
2671 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2672       ret = gimple_assign_lhs (g);
2673     }
2674   return ret;
2675 }
2676 
2677 /* Helper routine of optimize_range_test.
2678    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2679    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2680    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2681    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2682    an array of COUNT pointers to other ranges.  Return
2683    true if the range merge has been successful.
2684    If OPCODE is ERROR_MARK, this is called from within
2685    maybe_optimize_range_tests and is performing inter-bb range optimization.
2686    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2687    oe->rank.  */
2688 
2689 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2690 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2691 		   struct range_entry **otherrangep,
2692 		   unsigned int count, enum tree_code opcode,
2693 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2694 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2695 {
2696   operand_entry *oe = (*ops)[range->idx];
2697   tree op = oe->op;
2698   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2699 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2700   location_t loc = gimple_location (stmt);
2701   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2702   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2703 				in_p, low, high);
2704   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2705   gimple_stmt_iterator gsi;
2706   unsigned int i, uid;
2707 
2708   if (tem == NULL_TREE)
2709     return false;
2710 
2711   /* If op is default def SSA_NAME, there is no place to insert the
2712      new comparison.  Give up, unless we can use OP itself as the
2713      range test.  */
2714   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2715     {
2716       if (op == range->exp
2717 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2718 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2719 	  && (op == tem
2720 	      || (TREE_CODE (tem) == EQ_EXPR
2721 		  && TREE_OPERAND (tem, 0) == op
2722 		  && integer_onep (TREE_OPERAND (tem, 1))))
2723 	  && opcode != BIT_IOR_EXPR
2724 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2725 	{
2726 	  stmt = NULL;
2727 	  tem = op;
2728 	}
2729       else
2730 	return false;
2731     }
2732 
2733   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2734     warning_at (loc, OPT_Wstrict_overflow,
2735 		"assuming signed overflow does not occur "
2736 		"when simplifying range test");
2737 
2738   if (dump_file && (dump_flags & TDF_DETAILS))
2739     {
2740       struct range_entry *r;
2741       fprintf (dump_file, "Optimizing range tests ");
2742       print_generic_expr (dump_file, range->exp);
2743       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2744       print_generic_expr (dump_file, range->low);
2745       fprintf (dump_file, ", ");
2746       print_generic_expr (dump_file, range->high);
2747       fprintf (dump_file, "]");
2748       for (i = 0; i < count; i++)
2749 	{
2750 	  if (otherrange)
2751 	    r = otherrange + i;
2752 	  else
2753 	    r = otherrangep[i];
2754 	  if (r->exp
2755 	      && r->exp != range->exp
2756 	      && TREE_CODE (r->exp) == SSA_NAME)
2757 	    {
2758 	      fprintf (dump_file, " and ");
2759 	      print_generic_expr (dump_file, r->exp);
2760 	    }
2761 	  else
2762 	    fprintf (dump_file, " and");
2763 	  fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2764 	  print_generic_expr (dump_file, r->low);
2765 	  fprintf (dump_file, ", ");
2766 	  print_generic_expr (dump_file, r->high);
2767 	  fprintf (dump_file, "]");
2768 	}
2769       fprintf (dump_file, "\n into ");
2770       print_generic_expr (dump_file, tem);
2771       fprintf (dump_file, "\n");
2772     }
2773 
2774   if (opcode == BIT_IOR_EXPR
2775       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2776     tem = invert_truthvalue_loc (loc, tem);
2777 
2778   tem = fold_convert_loc (loc, optype, tem);
2779   if (stmt)
2780     {
2781       gsi = gsi_for_stmt (stmt);
2782       uid = gimple_uid (stmt);
2783     }
2784   else
2785     {
2786       gsi = gsi_none ();
2787       uid = 0;
2788     }
2789   if (stmt == NULL)
2790     gcc_checking_assert (tem == op);
2791   /* In rare cases range->exp can be equal to lhs of stmt.
2792      In that case we have to insert after the stmt rather then before
2793      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2794   else if (op != range->exp)
2795     {
2796       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2797       tem = force_into_ssa_name (&gsi, tem, true);
2798       gsi_prev (&gsi);
2799     }
2800   else if (gimple_code (stmt) != GIMPLE_PHI)
2801     {
2802       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2803       tem = force_into_ssa_name (&gsi, tem, false);
2804     }
2805   else
2806     {
2807       gsi = gsi_after_labels (gimple_bb (stmt));
2808       if (!gsi_end_p (gsi))
2809 	uid = gimple_uid (gsi_stmt (gsi));
2810       else
2811 	{
2812 	  gsi = gsi_start_bb (gimple_bb (stmt));
2813 	  uid = 1;
2814 	  while (!gsi_end_p (gsi))
2815 	    {
2816 	      uid = gimple_uid (gsi_stmt (gsi));
2817 	      gsi_next (&gsi);
2818 	    }
2819 	}
2820       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2821       tem = force_into_ssa_name (&gsi, tem, true);
2822       if (gsi_end_p (gsi))
2823 	gsi = gsi_last_bb (gimple_bb (stmt));
2824       else
2825 	gsi_prev (&gsi);
2826     }
2827   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2828     if (gimple_uid (gsi_stmt (gsi)))
2829       break;
2830     else
2831       gimple_set_uid (gsi_stmt (gsi), uid);
2832 
2833   oe->op = tem;
2834   range->exp = exp;
2835   range->low = low;
2836   range->high = high;
2837   range->in_p = in_p;
2838   range->strict_overflow_p = false;
2839 
2840   for (i = 0; i < count; i++)
2841     {
2842       if (otherrange)
2843 	range = otherrange + i;
2844       else
2845 	range = otherrangep[i];
2846       oe = (*ops)[range->idx];
2847       /* Now change all the other range test immediate uses, so that
2848 	 those tests will be optimized away.  */
2849       if (opcode == ERROR_MARK)
2850 	{
2851 	  if (oe->op)
2852 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2853 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2854 	  else
2855 	    oe->op = (oe->rank == BIT_IOR_EXPR
2856 		      ? boolean_false_node : boolean_true_node);
2857 	}
2858       else
2859 	oe->op = error_mark_node;
2860       range->exp = NULL_TREE;
2861       range->low = NULL_TREE;
2862       range->high = NULL_TREE;
2863     }
2864   return true;
2865 }
2866 
2867 /* Optimize X == CST1 || X == CST2
2868    if popcount (CST1 ^ CST2) == 1 into
2869    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2870    Similarly for ranges.  E.g.
2871    X != 2 && X != 3 && X != 10 && X != 11
2872    will be transformed by the previous optimization into
2873    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2874    and this loop can transform that into
2875    !(((X & ~8) - 2U) <= 1U).  */
2876 
2877 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2878 optimize_range_tests_xor (enum tree_code opcode, tree type,
2879 			  tree lowi, tree lowj, tree highi, tree highj,
2880 			  vec<operand_entry *> *ops,
2881 			  struct range_entry *rangei,
2882 			  struct range_entry *rangej)
2883 {
2884   tree lowxor, highxor, tem, exp;
2885   /* Check lowi ^ lowj == highi ^ highj and
2886      popcount (lowi ^ lowj) == 1.  */
2887   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2888   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2889     return false;
2890   if (!integer_pow2p (lowxor))
2891     return false;
2892   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2893   if (!tree_int_cst_equal (lowxor, highxor))
2894     return false;
2895 
2896   exp = rangei->exp;
2897   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2898   int prec = GET_MODE_PRECISION (mode);
2899   if (TYPE_PRECISION (type) < prec
2900       || (wi::to_wide (TYPE_MIN_VALUE (type))
2901 	  != wi::min_value (prec, TYPE_SIGN (type)))
2902       || (wi::to_wide (TYPE_MAX_VALUE (type))
2903 	  != wi::max_value (prec, TYPE_SIGN (type))))
2904     {
2905       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2906       exp = fold_convert (type, exp);
2907       lowxor = fold_convert (type, lowxor);
2908       lowi = fold_convert (type, lowi);
2909       highi = fold_convert (type, highi);
2910     }
2911   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2912   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2913   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2914   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2915   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2916 			 NULL, rangei->in_p, lowj, highj,
2917 			 rangei->strict_overflow_p
2918 			 || rangej->strict_overflow_p))
2919     return true;
2920   return false;
2921 }
2922 
2923 /* Optimize X == CST1 || X == CST2
2924    if popcount (CST2 - CST1) == 1 into
2925    ((X - CST1) & ~(CST2 - CST1)) == 0.
2926    Similarly for ranges.  E.g.
2927    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2928    || X == 75 || X == 45
2929    will be transformed by the previous optimization into
2930    (X - 43U) <= 3U || (X - 75U) <= 3U
2931    and this loop can transform that into
2932    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2933 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2934 optimize_range_tests_diff (enum tree_code opcode, tree type,
2935 			   tree lowi, tree lowj, tree highi, tree highj,
2936 			   vec<operand_entry *> *ops,
2937 			   struct range_entry *rangei,
2938 			   struct range_entry *rangej)
2939 {
2940   tree tem1, tem2, mask;
2941   /* Check highi - lowi == highj - lowj.  */
2942   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2943   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2944     return false;
2945   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2946   if (!tree_int_cst_equal (tem1, tem2))
2947     return false;
2948   /* Check popcount (lowj - lowi) == 1.  */
2949   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2950   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2951     return false;
2952   if (!integer_pow2p (tem1))
2953     return false;
2954 
2955   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2956   int prec = GET_MODE_PRECISION (mode);
2957   if (TYPE_PRECISION (type) < prec
2958       || (wi::to_wide (TYPE_MIN_VALUE (type))
2959 	  != wi::min_value (prec, TYPE_SIGN (type)))
2960       || (wi::to_wide (TYPE_MAX_VALUE (type))
2961 	  != wi::max_value (prec, TYPE_SIGN (type))))
2962     type = build_nonstandard_integer_type (prec, 1);
2963   else
2964     type = unsigned_type_for (type);
2965   tem1 = fold_convert (type, tem1);
2966   tem2 = fold_convert (type, tem2);
2967   lowi = fold_convert (type, lowi);
2968   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2969   tem1 = fold_build2 (MINUS_EXPR, type,
2970 		      fold_convert (type, rangei->exp), lowi);
2971   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2972   lowj = build_int_cst (type, 0);
2973   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2974 			 NULL, rangei->in_p, lowj, tem2,
2975 			 rangei->strict_overflow_p
2976 			 || rangej->strict_overflow_p))
2977     return true;
2978   return false;
2979 }
2980 
2981 /* It does some common checks for function optimize_range_tests_xor and
2982    optimize_range_tests_diff.
2983    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2984    Else it calls optimize_range_tests_diff.  */
2985 
2986 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2987 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2988 			bool optimize_xor, vec<operand_entry *> *ops,
2989 			struct range_entry *ranges)
2990 {
2991   int i, j;
2992   bool any_changes = false;
2993   for (i = first; i < length; i++)
2994     {
2995       tree lowi, highi, lowj, highj, type, tem;
2996 
2997       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2998 	continue;
2999       type = TREE_TYPE (ranges[i].exp);
3000       if (!INTEGRAL_TYPE_P (type))
3001 	continue;
3002       lowi = ranges[i].low;
3003       if (lowi == NULL_TREE)
3004 	lowi = TYPE_MIN_VALUE (type);
3005       highi = ranges[i].high;
3006       if (highi == NULL_TREE)
3007 	continue;
3008       for (j = i + 1; j < length && j < i + 64; j++)
3009 	{
3010 	  bool changes;
3011 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3012 	    continue;
3013 	  lowj = ranges[j].low;
3014 	  if (lowj == NULL_TREE)
3015 	    continue;
3016 	  highj = ranges[j].high;
3017 	  if (highj == NULL_TREE)
3018 	    highj = TYPE_MAX_VALUE (type);
3019 	  /* Check lowj > highi.  */
3020 	  tem = fold_binary (GT_EXPR, boolean_type_node,
3021 			     lowj, highi);
3022 	  if (tem == NULL_TREE || !integer_onep (tem))
3023 	    continue;
3024 	  if (optimize_xor)
3025 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3026 						highi, highj, ops,
3027 						ranges + i, ranges + j);
3028 	  else
3029 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3030 						 highi, highj, ops,
3031 						 ranges + i, ranges + j);
3032 	  if (changes)
3033 	    {
3034 	      any_changes = true;
3035 	      break;
3036 	    }
3037 	}
3038     }
3039   return any_changes;
3040 }
3041 
3042 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3043    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3044    EXP on success, NULL otherwise.  */
3045 
3046 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3047 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3048 		       wide_int *mask, tree *totallowp)
3049 {
3050   tree tem = int_const_binop (MINUS_EXPR, high, low);
3051   if (tem == NULL_TREE
3052       || TREE_CODE (tem) != INTEGER_CST
3053       || TREE_OVERFLOW (tem)
3054       || tree_int_cst_sgn (tem) == -1
3055       || compare_tree_int (tem, prec) != -1)
3056     return NULL_TREE;
3057 
3058   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3059   *mask = wi::shifted_mask (0, max, false, prec);
3060   if (TREE_CODE (exp) == BIT_AND_EXPR
3061       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3062     {
3063       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3064       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3065       if (wi::popcount (msk) == 1
3066 	  && wi::ltu_p (msk, prec - max))
3067 	{
3068 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3069 	  max += msk.to_uhwi ();
3070 	  exp = TREE_OPERAND (exp, 0);
3071 	  if (integer_zerop (low)
3072 	      && TREE_CODE (exp) == PLUS_EXPR
3073 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3074 	    {
3075 	      tree ret = TREE_OPERAND (exp, 0);
3076 	      STRIP_NOPS (ret);
3077 	      widest_int bias
3078 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3079 				     TYPE_PRECISION (TREE_TYPE (low))));
3080 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3081 	      if (totallowp)
3082 		{
3083 		  *totallowp = tbias;
3084 		  return ret;
3085 		}
3086 	      else if (!tree_int_cst_lt (totallow, tbias))
3087 		return NULL_TREE;
3088 	      bias = wi::to_widest (tbias);
3089 	      bias -= wi::to_widest (totallow);
3090 	      if (bias >= 0 && bias < prec - max)
3091 		{
3092 		  *mask = wi::lshift (*mask, bias);
3093 		  return ret;
3094 		}
3095 	    }
3096 	}
3097     }
3098   if (totallowp)
3099     return exp;
3100   if (!tree_int_cst_lt (totallow, low))
3101     return exp;
3102   tem = int_const_binop (MINUS_EXPR, low, totallow);
3103   if (tem == NULL_TREE
3104       || TREE_CODE (tem) != INTEGER_CST
3105       || TREE_OVERFLOW (tem)
3106       || compare_tree_int (tem, prec - max) == 1)
3107     return NULL_TREE;
3108 
3109   *mask = wi::lshift (*mask, wi::to_widest (tem));
3110   return exp;
3111 }
3112 
3113 /* Attempt to optimize small range tests using bit test.
3114    E.g.
3115    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3116    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3117    has been by earlier optimizations optimized into:
3118    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3119    As all the 43 through 82 range is less than 64 numbers,
3120    for 64-bit word targets optimize that into:
3121    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3122 
3123 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3124 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3125 				  vec<operand_entry *> *ops,
3126 				  struct range_entry *ranges)
3127 {
3128   int i, j;
3129   bool any_changes = false;
3130   int prec = GET_MODE_BITSIZE (word_mode);
3131   auto_vec<struct range_entry *, 64> candidates;
3132 
3133   for (i = first; i < length - 2; i++)
3134     {
3135       tree lowi, highi, lowj, highj, type;
3136 
3137       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3138 	continue;
3139       type = TREE_TYPE (ranges[i].exp);
3140       if (!INTEGRAL_TYPE_P (type))
3141 	continue;
3142       lowi = ranges[i].low;
3143       if (lowi == NULL_TREE)
3144 	lowi = TYPE_MIN_VALUE (type);
3145       highi = ranges[i].high;
3146       if (highi == NULL_TREE)
3147 	continue;
3148       wide_int mask;
3149       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3150 					highi, &mask, &lowi);
3151       if (exp == NULL_TREE)
3152 	continue;
3153       bool strict_overflow_p = ranges[i].strict_overflow_p;
3154       candidates.truncate (0);
3155       int end = MIN (i + 64, length);
3156       for (j = i + 1; j < end; j++)
3157 	{
3158 	  tree exp2;
3159 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3160 	    continue;
3161 	  if (ranges[j].exp == exp)
3162 	    ;
3163 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3164 	    {
3165 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
3166 	      if (exp2 == exp)
3167 		;
3168 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
3169 		{
3170 		  exp2 = TREE_OPERAND (exp2, 0);
3171 		  STRIP_NOPS (exp2);
3172 		  if (exp2 != exp)
3173 		    continue;
3174 		}
3175 	      else
3176 		continue;
3177 	    }
3178 	  else
3179 	    continue;
3180 	  lowj = ranges[j].low;
3181 	  if (lowj == NULL_TREE)
3182 	    continue;
3183 	  highj = ranges[j].high;
3184 	  if (highj == NULL_TREE)
3185 	    highj = TYPE_MAX_VALUE (type);
3186 	  wide_int mask2;
3187 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3188 					highj, &mask2, NULL);
3189 	  if (exp2 != exp)
3190 	    continue;
3191 	  mask |= mask2;
3192 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3193 	  candidates.safe_push (&ranges[j]);
3194 	}
3195 
3196       /* If we need otherwise 3 or more comparisons, use a bit test.  */
3197       if (candidates.length () >= 2)
3198 	{
3199 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
3200 					wi::to_widest (lowi)
3201 					+ prec - 1 - wi::clz (mask));
3202 	  operand_entry *oe = (*ops)[ranges[i].idx];
3203 	  tree op = oe->op;
3204 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3205 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3206 	  location_t loc = gimple_location (stmt);
3207 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3208 
3209 	  /* See if it isn't cheaper to pretend the minimum value of the
3210 	     range is 0, if maximum value is small enough.
3211 	     We can avoid then subtraction of the minimum value, but the
3212 	     mask constant could be perhaps more expensive.  */
3213 	  if (compare_tree_int (lowi, 0) > 0
3214 	      && compare_tree_int (high, prec) < 0)
3215 	    {
3216 	      int cost_diff;
3217 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
3218 	      rtx reg = gen_raw_REG (word_mode, 10000);
3219 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3220 	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3221 						      GEN_INT (-m)), speed_p);
3222 	      rtx r = immed_wide_int_const (mask, word_mode);
3223 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3224 					 word_mode, speed_p);
3225 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3226 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3227 					 word_mode, speed_p);
3228 	      if (cost_diff > 0)
3229 		{
3230 		  mask = wi::lshift (mask, m);
3231 		  lowi = build_zero_cst (TREE_TYPE (lowi));
3232 		}
3233 	    }
3234 
3235 	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
3236 					false, lowi, high);
3237 	  if (tem == NULL_TREE || is_gimple_val (tem))
3238 	    continue;
3239 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
3240 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3241 				 fold_convert_loc (loc, etype, exp),
3242 				 fold_convert_loc (loc, etype, lowi));
3243 	  exp = fold_convert_loc (loc, integer_type_node, exp);
3244 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3245 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3246 				 build_int_cst (word_type, 1), exp);
3247 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3248 				 wide_int_to_tree (word_type, mask));
3249 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3250 				 build_zero_cst (word_type));
3251 	  if (is_gimple_val (exp))
3252 	    continue;
3253 
3254 	  /* The shift might have undefined behavior if TEM is true,
3255 	     but reassociate_bb isn't prepared to have basic blocks
3256 	     split when it is running.  So, temporarily emit a code
3257 	     with BIT_IOR_EXPR instead of &&, and fix it up in
3258 	     branch_fixup.  */
3259 	  gimple_seq seq;
3260 	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3261 	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
3262 	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3263 	  gimple_seq seq2;
3264 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3265 	  gimple_seq_add_seq_without_update (&seq, seq2);
3266 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
3267 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3268 	  gimple *g = gimple_build_assign (make_ssa_name (optype),
3269 					   BIT_IOR_EXPR, tem, exp);
3270 	  gimple_set_location (g, loc);
3271 	  gimple_seq_add_stmt_without_update (&seq, g);
3272 	  exp = gimple_assign_lhs (g);
3273 	  tree val = build_zero_cst (optype);
3274 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
3275 				 candidates.length (), opcode, ops, exp,
3276 				 seq, false, val, val, strict_overflow_p))
3277 	    {
3278 	      any_changes = true;
3279 	      reassoc_branch_fixups.safe_push (tem);
3280 	    }
3281 	  else
3282 	    gimple_seq_discard (seq);
3283 	}
3284     }
3285   return any_changes;
3286 }
3287 
3288 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3289    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
3290 
3291 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3292 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3293 				  vec<operand_entry *> *ops,
3294 				  struct range_entry *ranges)
3295 {
3296   int i;
3297   unsigned int b;
3298   bool any_changes = false;
3299   auto_vec<int, 128> buckets;
3300   auto_vec<int, 32> chains;
3301   auto_vec<struct range_entry *, 32> candidates;
3302 
3303   for (i = first; i < length; i++)
3304     {
3305       if (ranges[i].exp == NULL_TREE
3306 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3307 	  || !ranges[i].in_p
3308 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3309 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3310 	  || ranges[i].low == NULL_TREE
3311 	  || ranges[i].low != ranges[i].high)
3312 	continue;
3313 
3314       bool zero_p = integer_zerop (ranges[i].low);
3315       if (!zero_p && !integer_all_onesp (ranges[i].low))
3316 	continue;
3317 
3318       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3319       if (buckets.length () <= b)
3320 	buckets.safe_grow_cleared (b + 1);
3321       if (chains.length () <= (unsigned) i)
3322 	chains.safe_grow (i + 1);
3323       chains[i] = buckets[b];
3324       buckets[b] = i + 1;
3325     }
3326 
3327   FOR_EACH_VEC_ELT (buckets, b, i)
3328     if (i && chains[i - 1])
3329       {
3330 	int j, k = i;
3331 	for (j = chains[i - 1]; j; j = chains[j - 1])
3332 	  {
3333 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3334 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3335 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
3336 	      k = j;
3337 	  }
3338 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
3339 	tree type2 = NULL_TREE;
3340 	bool strict_overflow_p = false;
3341 	candidates.truncate (0);
3342 	for (j = i; j; j = chains[j - 1])
3343 	  {
3344 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3345 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3346 	    if (j == k
3347 		|| useless_type_conversion_p (type1, type))
3348 	      ;
3349 	    else if (type2 == NULL_TREE
3350 		     || useless_type_conversion_p (type2, type))
3351 	      {
3352 		if (type2 == NULL_TREE)
3353 		  type2 = type;
3354 		candidates.safe_push (&ranges[j - 1]);
3355 	      }
3356 	  }
3357 	unsigned l = candidates.length ();
3358 	for (j = i; j; j = chains[j - 1])
3359 	  {
3360 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3361 	    if (j == k)
3362 	      continue;
3363 	    if (useless_type_conversion_p (type1, type))
3364 	      ;
3365 	    else if (type2 == NULL_TREE
3366 		     || useless_type_conversion_p (type2, type))
3367 	      continue;
3368 	    candidates.safe_push (&ranges[j - 1]);
3369 	  }
3370 	gimple_seq seq = NULL;
3371 	tree op = NULL_TREE;
3372 	unsigned int id;
3373 	struct range_entry *r;
3374 	candidates.safe_push (&ranges[k - 1]);
3375 	FOR_EACH_VEC_ELT (candidates, id, r)
3376 	  {
3377 	    gimple *g;
3378 	    if (id == 0)
3379 	      {
3380 		op = r->exp;
3381 		continue;
3382 	      }
3383 	    if (id == l)
3384 	      {
3385 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3386 		gimple_seq_add_stmt_without_update (&seq, g);
3387 		op = gimple_assign_lhs (g);
3388 	      }
3389 	    tree type = TREE_TYPE (r->exp);
3390 	    tree exp = r->exp;
3391 	    if (id >= l && !useless_type_conversion_p (type1, type))
3392 	      {
3393 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3394 		gimple_seq_add_stmt_without_update (&seq, g);
3395 		exp = gimple_assign_lhs (g);
3396 	      }
3397 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3398 				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3399 				     op, exp);
3400 	    gimple_seq_add_stmt_without_update (&seq, g);
3401 	    op = gimple_assign_lhs (g);
3402 	  }
3403 	candidates.pop ();
3404 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3405 			       candidates.length (), opcode, ops, op,
3406 			       seq, true, ranges[k - 1].low,
3407 			       ranges[k - 1].low, strict_overflow_p))
3408 	  any_changes = true;
3409 	else
3410 	  gimple_seq_discard (seq);
3411       }
3412 
3413   return any_changes;
3414 }
3415 
3416 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3417    a >= 0 && a < b into (unsigned) a < (unsigned) b
3418    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3419 
3420 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3421 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3422 				vec<operand_entry *> *ops,
3423 				struct range_entry *ranges,
3424 				basic_block first_bb)
3425 {
3426   int i;
3427   bool any_changes = false;
3428   hash_map<tree, int> *map = NULL;
3429 
3430   for (i = first; i < length; i++)
3431     {
3432       if (ranges[i].exp == NULL_TREE
3433 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3434 	  || !ranges[i].in_p)
3435 	continue;
3436 
3437       tree type = TREE_TYPE (ranges[i].exp);
3438       if (!INTEGRAL_TYPE_P (type)
3439 	  || TYPE_UNSIGNED (type)
3440 	  || ranges[i].low == NULL_TREE
3441 	  || !integer_zerop (ranges[i].low)
3442 	  || ranges[i].high != NULL_TREE)
3443 	continue;
3444       /* EXP >= 0 here.  */
3445       if (map == NULL)
3446 	map = new hash_map <tree, int>;
3447       map->put (ranges[i].exp, i);
3448     }
3449 
3450   if (map == NULL)
3451     return false;
3452 
3453   for (i = 0; i < length; i++)
3454     {
3455       bool in_p = ranges[i].in_p;
3456       if (ranges[i].low == NULL_TREE
3457 	  || ranges[i].high == NULL_TREE)
3458 	continue;
3459       if (!integer_zerop (ranges[i].low)
3460 	  || !integer_zerop (ranges[i].high))
3461 	{
3462 	  if (ranges[i].exp
3463 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3464 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3465 	      && integer_onep (ranges[i].low)
3466 	      && integer_onep (ranges[i].high))
3467 	    in_p = !in_p;
3468 	  else
3469 	    continue;
3470 	}
3471 
3472       gimple *stmt;
3473       tree_code ccode;
3474       tree rhs1, rhs2;
3475       if (ranges[i].exp)
3476 	{
3477 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3478 	    continue;
3479 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3480 	  if (!is_gimple_assign (stmt))
3481 	    continue;
3482 	  ccode = gimple_assign_rhs_code (stmt);
3483 	  rhs1 = gimple_assign_rhs1 (stmt);
3484 	  rhs2 = gimple_assign_rhs2 (stmt);
3485 	}
3486       else
3487 	{
3488 	  operand_entry *oe = (*ops)[ranges[i].idx];
3489 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3490 	  if (gimple_code (stmt) != GIMPLE_COND)
3491 	    continue;
3492 	  ccode = gimple_cond_code (stmt);
3493 	  rhs1 = gimple_cond_lhs (stmt);
3494 	  rhs2 = gimple_cond_rhs (stmt);
3495 	}
3496 
3497       if (TREE_CODE (rhs1) != SSA_NAME
3498 	  || rhs2 == NULL_TREE
3499 	  || TREE_CODE (rhs2) != SSA_NAME)
3500 	continue;
3501 
3502       switch (ccode)
3503 	{
3504 	case GT_EXPR:
3505 	case GE_EXPR:
3506 	case LT_EXPR:
3507 	case LE_EXPR:
3508 	  break;
3509 	default:
3510 	  continue;
3511 	}
3512       if (in_p)
3513 	ccode = invert_tree_comparison (ccode, false);
3514       switch (ccode)
3515 	{
3516 	case GT_EXPR:
3517 	case GE_EXPR:
3518 	  std::swap (rhs1, rhs2);
3519 	  ccode = swap_tree_comparison (ccode);
3520 	  break;
3521 	case LT_EXPR:
3522 	case LE_EXPR:
3523 	  break;
3524 	default:
3525 	  gcc_unreachable ();
3526 	}
3527 
3528       int *idx = map->get (rhs1);
3529       if (idx == NULL)
3530 	continue;
3531 
3532       /* maybe_optimize_range_tests allows statements without side-effects
3533 	 in the basic blocks as long as they are consumed in the same bb.
3534 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3535 	 use safely get_nonzero_bits on it.  E.g. in:
3536 	  # RANGE [-83, 1] NONZERO 173
3537 	  # k_32 = PHI <k_47(13), k_12(9)>
3538 	 ...
3539 	  if (k_32 >= 0)
3540 	    goto <bb 5>; [26.46%]
3541 	  else
3542 	    goto <bb 9>; [73.54%]
3543 
3544 	  <bb 5> [local count: 140323371]:
3545 	  # RANGE [0, 1] NONZERO 1
3546 	  _5 = (int) k_32;
3547 	  # RANGE [0, 4] NONZERO 4
3548 	  _21 = _5 << 2;
3549 	  # RANGE [0, 4] NONZERO 4
3550 	  iftmp.0_44 = (char) _21;
3551 	  if (k_32 < iftmp.0_44)
3552 	    goto <bb 6>; [84.48%]
3553 	  else
3554 	    goto <bb 9>; [15.52%]
3555 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3556 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3557 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3558 	 those stmts even for negative k_32 and the value ranges would be no
3559 	 longer guaranteed and so the optimization would be invalid.  */
3560       while (opcode == ERROR_MARK)
3561 	{
3562 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3563 	  basic_block bb2 = gimple_bb (g);
3564 	  if (bb2
3565 	      && bb2 != first_bb
3566 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3567 	    {
3568 	      /* As an exception, handle a few common cases.  */
3569 	      if (gimple_assign_cast_p (g)
3570 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3571 		{
3572 		  tree op0 = gimple_assign_rhs1 (g);
3573 		  if (TYPE_UNSIGNED (TREE_TYPE (op0))
3574 		      && (TYPE_PRECISION (TREE_TYPE (rhs2))
3575 			  > TYPE_PRECISION (TREE_TYPE (op0))))
3576 		    /* Zero-extension is always ok.  */
3577 		    break;
3578 		  else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3579 			   == TYPE_PRECISION (TREE_TYPE (op0))
3580 			   && TREE_CODE (op0) == SSA_NAME)
3581 		    {
3582 		      /* Cast from signed to unsigned or vice versa.  Retry
3583 			 with the op0 as new rhs2.  */
3584 		      rhs2 = op0;
3585 		      continue;
3586 		    }
3587 		}
3588 	      else if (is_gimple_assign (g)
3589 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3590 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3591 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3592 		/* Masking with INTEGER_CST with MSB clear is always ok
3593 		   too.  */
3594 		break;
3595 	      rhs2 = NULL_TREE;
3596 	    }
3597 	  break;
3598 	}
3599       if (rhs2 == NULL_TREE)
3600 	continue;
3601 
3602       wide_int nz = get_nonzero_bits (rhs2);
3603       if (wi::neg_p (nz))
3604 	continue;
3605 
3606       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3607 	 and RHS2 is known to be RHS2 >= 0.  */
3608       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3609 
3610       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3611       if ((ranges[*idx].strict_overflow_p
3612 	   || ranges[i].strict_overflow_p)
3613 	  && issue_strict_overflow_warning (wc))
3614 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3615 		    "assuming signed overflow does not occur "
3616 		    "when simplifying range test");
3617 
3618       if (dump_file && (dump_flags & TDF_DETAILS))
3619 	{
3620 	  struct range_entry *r = &ranges[*idx];
3621 	  fprintf (dump_file, "Optimizing range test ");
3622 	  print_generic_expr (dump_file, r->exp);
3623 	  fprintf (dump_file, " +[");
3624 	  print_generic_expr (dump_file, r->low);
3625 	  fprintf (dump_file, ", ");
3626 	  print_generic_expr (dump_file, r->high);
3627 	  fprintf (dump_file, "] and comparison ");
3628 	  print_generic_expr (dump_file, rhs1);
3629 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3630 	  print_generic_expr (dump_file, rhs2);
3631 	  fprintf (dump_file, "\n into (");
3632 	  print_generic_expr (dump_file, utype);
3633 	  fprintf (dump_file, ") ");
3634 	  print_generic_expr (dump_file, rhs1);
3635 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3636 	  print_generic_expr (dump_file, utype);
3637 	  fprintf (dump_file, ") ");
3638 	  print_generic_expr (dump_file, rhs2);
3639 	  fprintf (dump_file, "\n");
3640 	}
3641 
3642       operand_entry *oe = (*ops)[ranges[i].idx];
3643       ranges[i].in_p = 0;
3644       if (opcode == BIT_IOR_EXPR
3645 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3646 	{
3647 	  ranges[i].in_p = 1;
3648 	  ccode = invert_tree_comparison (ccode, false);
3649 	}
3650 
3651       unsigned int uid = gimple_uid (stmt);
3652       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3653       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3654       gimple_set_uid (g, uid);
3655       rhs1 = gimple_assign_lhs (g);
3656       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3657       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3658 	{
3659 	  g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3660 	  gimple_set_uid (g, uid);
3661 	  rhs2 = gimple_assign_lhs (g);
3662 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3663 	}
3664       if (tree_swap_operands_p (rhs1, rhs2))
3665 	{
3666 	  std::swap (rhs1, rhs2);
3667 	  ccode = swap_tree_comparison (ccode);
3668 	}
3669       if (gimple_code (stmt) == GIMPLE_COND)
3670 	{
3671 	  gcond *c = as_a <gcond *> (stmt);
3672 	  gimple_cond_set_code (c, ccode);
3673 	  gimple_cond_set_lhs (c, rhs1);
3674 	  gimple_cond_set_rhs (c, rhs2);
3675 	  update_stmt (stmt);
3676 	}
3677       else
3678 	{
3679 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3680 	  if (!INTEGRAL_TYPE_P (ctype)
3681 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3682 		  && TYPE_PRECISION (ctype) != 1))
3683 	    ctype = boolean_type_node;
3684 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3685 	  gimple_set_uid (g, uid);
3686 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3687 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3688 	    {
3689 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3690 				       NOP_EXPR, gimple_assign_lhs (g));
3691 	      gimple_set_uid (g, uid);
3692 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3693 	    }
3694 	  ranges[i].exp = gimple_assign_lhs (g);
3695 	  oe->op = ranges[i].exp;
3696 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3697 	  ranges[i].high = ranges[i].low;
3698 	}
3699       ranges[i].strict_overflow_p = false;
3700       oe = (*ops)[ranges[*idx].idx];
3701       /* Now change all the other range test immediate uses, so that
3702 	 those tests will be optimized away.  */
3703       if (opcode == ERROR_MARK)
3704 	{
3705 	  if (oe->op)
3706 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3707 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3708 	  else
3709 	    oe->op = (oe->rank == BIT_IOR_EXPR
3710 		      ? boolean_false_node : boolean_true_node);
3711 	}
3712       else
3713 	oe->op = error_mark_node;
3714       ranges[*idx].exp = NULL_TREE;
3715       ranges[*idx].low = NULL_TREE;
3716       ranges[*idx].high = NULL_TREE;
3717       any_changes = true;
3718     }
3719 
3720   delete map;
3721   return any_changes;
3722 }
3723 
3724 /* Optimize range tests, similarly how fold_range_test optimizes
3725    it on trees.  The tree code for the binary
3726    operation between all the operands is OPCODE.
3727    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3728    maybe_optimize_range_tests for inter-bb range optimization.
3729    In that case if oe->op is NULL, oe->id is bb->index whose
3730    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3731    the actual opcode.
3732    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3733 
3734 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3735 optimize_range_tests (enum tree_code opcode,
3736 		      vec<operand_entry *> *ops, basic_block first_bb)
3737 {
3738   unsigned int length = ops->length (), i, j, first;
3739   operand_entry *oe;
3740   struct range_entry *ranges;
3741   bool any_changes = false;
3742 
3743   if (length == 1)
3744     return false;
3745 
3746   ranges = XNEWVEC (struct range_entry, length);
3747   for (i = 0; i < length; i++)
3748     {
3749       oe = (*ops)[i];
3750       ranges[i].idx = i;
3751       init_range_entry (ranges + i, oe->op,
3752 			oe->op
3753 			? NULL
3754 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3755       /* For | invert it now, we will invert it again before emitting
3756 	 the optimized expression.  */
3757       if (opcode == BIT_IOR_EXPR
3758 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3759 	ranges[i].in_p = !ranges[i].in_p;
3760     }
3761 
3762   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3763   for (i = 0; i < length; i++)
3764     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3765       break;
3766 
3767   /* Try to merge ranges.  */
3768   for (first = i; i < length; i++)
3769     {
3770       tree low = ranges[i].low;
3771       tree high = ranges[i].high;
3772       int in_p = ranges[i].in_p;
3773       bool strict_overflow_p = ranges[i].strict_overflow_p;
3774       int update_fail_count = 0;
3775 
3776       for (j = i + 1; j < length; j++)
3777 	{
3778 	  if (ranges[i].exp != ranges[j].exp)
3779 	    break;
3780 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3781 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3782 	    break;
3783 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3784 	}
3785 
3786       if (j == i + 1)
3787 	continue;
3788 
3789       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3790 			     opcode, ops, ranges[i].exp, NULL, in_p,
3791 			     low, high, strict_overflow_p))
3792 	{
3793 	  i = j - 1;
3794 	  any_changes = true;
3795 	}
3796       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3797 	 while update_range_test would fail.  */
3798       else if (update_fail_count == 64)
3799 	i = j - 1;
3800       else
3801 	++update_fail_count;
3802     }
3803 
3804   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3805 					 ops, ranges);
3806 
3807   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3808     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3809 					   ops, ranges);
3810   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3811     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3812 						     ops, ranges);
3813   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3814 						   ops, ranges);
3815   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3816 						 ranges, first_bb);
3817 
3818   if (any_changes && opcode != ERROR_MARK)
3819     {
3820       j = 0;
3821       FOR_EACH_VEC_ELT (*ops, i, oe)
3822 	{
3823 	  if (oe->op == error_mark_node)
3824 	    continue;
3825 	  else if (i != j)
3826 	    (*ops)[j] = oe;
3827 	  j++;
3828 	}
3829       ops->truncate (j);
3830     }
3831 
3832   XDELETEVEC (ranges);
3833   return any_changes;
3834 }
3835 
3836 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3837    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3838    otherwise the comparison code.  TYPE is a return value that is set
3839    to type of comparison.  */
3840 
3841 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type)3842 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3843 {
3844   if (TREE_CODE (var) != SSA_NAME)
3845     return ERROR_MARK;
3846 
3847   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3848   if (stmt == NULL)
3849     return ERROR_MARK;
3850 
3851   /* ??? If we start creating more COND_EXPR, we could perform
3852      this same optimization with them.	For now, simplify.  */
3853   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3854     return ERROR_MARK;
3855 
3856   tree cond = gimple_assign_rhs1 (stmt);
3857   tree_code cmp = TREE_CODE (cond);
3858   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3859     return ERROR_MARK;
3860 
3861   /* ??? For now, allow only canonical true and false result vectors.
3862      We could expand this to other constants should the need arise,
3863      but at the moment we don't create them.  */
3864   tree t = gimple_assign_rhs2 (stmt);
3865   tree f = gimple_assign_rhs3 (stmt);
3866   bool inv;
3867   if (integer_all_onesp (t))
3868     inv = false;
3869   else if (integer_all_onesp (f))
3870     {
3871       cmp = invert_tree_comparison (cmp, false);
3872       inv = true;
3873     }
3874   else
3875     return ERROR_MARK;
3876   if (!integer_zerop (f))
3877     return ERROR_MARK;
3878 
3879   /* Success!  */
3880   if (rets)
3881     *rets = stmt;
3882   if (reti)
3883     *reti = inv;
3884   if (type)
3885     *type = TREE_TYPE (cond);
3886   return cmp;
3887 }
3888 
3889 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3890    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3891 
3892 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3893 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3894 {
3895   unsigned int length = ops->length (), i, j;
3896   bool any_changes = false;
3897 
3898   if (length == 1)
3899     return false;
3900 
3901   for (i = 0; i < length; ++i)
3902     {
3903       tree elt0 = (*ops)[i]->op;
3904 
3905       gassign *stmt0;
3906       bool invert;
3907       tree type;
3908       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3909       if (cmp0 == ERROR_MARK)
3910 	continue;
3911 
3912       for (j = i + 1; j < length; ++j)
3913 	{
3914 	  tree &elt1 = (*ops)[j]->op;
3915 
3916 	  gassign *stmt1;
3917 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3918 	  if (cmp1 == ERROR_MARK)
3919 	    continue;
3920 
3921 	  tree cond0 = gimple_assign_rhs1 (stmt0);
3922 	  tree x0 = TREE_OPERAND (cond0, 0);
3923 	  tree y0 = TREE_OPERAND (cond0, 1);
3924 
3925 	  tree cond1 = gimple_assign_rhs1 (stmt1);
3926 	  tree x1 = TREE_OPERAND (cond1, 0);
3927 	  tree y1 = TREE_OPERAND (cond1, 1);
3928 
3929 	  tree comb;
3930 	  if (opcode == BIT_AND_EXPR)
3931 	    comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3932 					       y1);
3933 	  else if (opcode == BIT_IOR_EXPR)
3934 	    comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3935 					      y1);
3936 	  else
3937 	    gcc_unreachable ();
3938 	  if (comb == NULL)
3939 	    continue;
3940 
3941 	  /* Success! */
3942 	  if (dump_file && (dump_flags & TDF_DETAILS))
3943 	    {
3944 	      fprintf (dump_file, "Transforming ");
3945 	      print_generic_expr (dump_file, cond0);
3946 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3947 	      print_generic_expr (dump_file, cond1);
3948 	      fprintf (dump_file, " into ");
3949 	      print_generic_expr (dump_file, comb);
3950 	      fputc ('\n', dump_file);
3951 	    }
3952 
3953 	  gimple_assign_set_rhs1 (stmt0, comb);
3954 	  if (invert)
3955 	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
3956 		       *gimple_assign_rhs3_ptr (stmt0));
3957 	  update_stmt (stmt0);
3958 
3959 	  elt1 = error_mark_node;
3960 	  any_changes = true;
3961 	}
3962     }
3963 
3964   if (any_changes)
3965     {
3966       operand_entry *oe;
3967       j = 0;
3968       FOR_EACH_VEC_ELT (*ops, i, oe)
3969 	{
3970 	  if (oe->op == error_mark_node)
3971 	    continue;
3972 	  else if (i != j)
3973 	    (*ops)[j] = oe;
3974 	  j++;
3975 	}
3976       ops->truncate (j);
3977     }
3978 
3979   return any_changes;
3980 }
3981 
3982 /* Return true if STMT is a cast like:
3983    <bb N>:
3984    ...
3985    _123 = (int) _234;
3986 
3987    <bb M>:
3988    # _345 = PHI <_123(N), 1(...), 1(...)>
3989    where _234 has bool type, _123 has single use and
3990    bb N has a single successor M.  This is commonly used in
3991    the last block of a range test.
3992 
3993    Also Return true if STMT is tcc_compare like:
3994    <bb N>:
3995    ...
3996    _234 = a_2(D) == 2;
3997 
3998    <bb M>:
3999    # _345 = PHI <_234(N), 1(...), 1(...)>
4000    _346 = (int) _345;
4001    where _234 has booltype, single use and
4002    bb N has a single successor M.  This is commonly used in
4003    the last block of a range test.  */
4004 
4005 static bool
final_range_test_p(gimple * stmt)4006 final_range_test_p (gimple *stmt)
4007 {
4008   basic_block bb, rhs_bb, lhs_bb;
4009   edge e;
4010   tree lhs, rhs;
4011   use_operand_p use_p;
4012   gimple *use_stmt;
4013 
4014   if (!gimple_assign_cast_p (stmt)
4015       && (!is_gimple_assign (stmt)
4016 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4017 	      != tcc_comparison)))
4018     return false;
4019   bb = gimple_bb (stmt);
4020   if (!single_succ_p (bb))
4021     return false;
4022   e = single_succ_edge (bb);
4023   if (e->flags & EDGE_COMPLEX)
4024     return false;
4025 
4026   lhs = gimple_assign_lhs (stmt);
4027   rhs = gimple_assign_rhs1 (stmt);
4028   if (gimple_assign_cast_p (stmt)
4029       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4030 	  || TREE_CODE (rhs) != SSA_NAME
4031 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4032     return false;
4033 
4034   if (!gimple_assign_cast_p (stmt)
4035       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4036       return false;
4037 
4038   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4039   if (!single_imm_use (lhs, &use_p, &use_stmt))
4040     return false;
4041 
4042   if (gimple_code (use_stmt) != GIMPLE_PHI
4043       || gimple_bb (use_stmt) != e->dest)
4044     return false;
4045 
4046   /* And that the rhs is defined in the same loop.  */
4047   if (gimple_assign_cast_p (stmt))
4048     {
4049       if (TREE_CODE (rhs) != SSA_NAME
4050 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4051 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4052 	return false;
4053     }
4054   else
4055     {
4056       if (TREE_CODE (lhs) != SSA_NAME
4057 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4058 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4059 	return false;
4060     }
4061 
4062   return true;
4063 }
4064 
4065 /* Return true if BB is suitable basic block for inter-bb range test
4066    optimization.  If BACKWARD is true, BB should be the only predecessor
4067    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4068    or compared with to find a common basic block to which all conditions
4069    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4070    be the only predecessor of BB.  */
4071 
4072 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)4073 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4074 		  bool backward)
4075 {
4076   edge_iterator ei, ei2;
4077   edge e, e2;
4078   gimple *stmt;
4079   gphi_iterator gsi;
4080   bool other_edge_seen = false;
4081   bool is_cond;
4082 
4083   if (test_bb == bb)
4084     return false;
4085   /* Check last stmt first.  */
4086   stmt = last_stmt (bb);
4087   if (stmt == NULL
4088       || (gimple_code (stmt) != GIMPLE_COND
4089 	  && (backward || !final_range_test_p (stmt)))
4090       || gimple_visited_p (stmt)
4091       || stmt_could_throw_p (cfun, stmt)
4092       || *other_bb == bb)
4093     return false;
4094   is_cond = gimple_code (stmt) == GIMPLE_COND;
4095   if (is_cond)
4096     {
4097       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4098 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4099 	 to *OTHER_BB (if not set yet, try to find it out).  */
4100       if (EDGE_COUNT (bb->succs) != 2)
4101 	return false;
4102       FOR_EACH_EDGE (e, ei, bb->succs)
4103 	{
4104 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4105 	    return false;
4106 	  if (e->dest == test_bb)
4107 	    {
4108 	      if (backward)
4109 		continue;
4110 	      else
4111 		return false;
4112 	    }
4113 	  if (e->dest == bb)
4114 	    return false;
4115 	  if (*other_bb == NULL)
4116 	    {
4117 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4118 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4119 		  return false;
4120 		else if (e->dest == e2->dest)
4121 		  *other_bb = e->dest;
4122 	      if (*other_bb == NULL)
4123 		return false;
4124 	    }
4125 	  if (e->dest == *other_bb)
4126 	    other_edge_seen = true;
4127 	  else if (backward)
4128 	    return false;
4129 	}
4130       if (*other_bb == NULL || !other_edge_seen)
4131 	return false;
4132     }
4133   else if (single_succ (bb) != *other_bb)
4134     return false;
4135 
4136   /* Now check all PHIs of *OTHER_BB.  */
4137   e = find_edge (bb, *other_bb);
4138   e2 = find_edge (test_bb, *other_bb);
4139   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4140     {
4141       gphi *phi = gsi.phi ();
4142       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4143 	 corresponding to BB and TEST_BB predecessor must be the same.  */
4144       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4145 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
4146 	{
4147 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4148 	     one of the PHIs should have the lhs of the last stmt in
4149 	     that block as PHI arg and that PHI should have 0 or 1
4150 	     corresponding to it in all other range test basic blocks
4151 	     considered.  */
4152 	  if (!is_cond)
4153 	    {
4154 	      if (gimple_phi_arg_def (phi, e->dest_idx)
4155 		  == gimple_assign_lhs (stmt)
4156 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4157 		      || integer_onep (gimple_phi_arg_def (phi,
4158 							   e2->dest_idx))))
4159 		continue;
4160 	    }
4161 	  else
4162 	    {
4163 	      gimple *test_last = last_stmt (test_bb);
4164 	      if (gimple_code (test_last) != GIMPLE_COND
4165 		  && gimple_phi_arg_def (phi, e2->dest_idx)
4166 		     == gimple_assign_lhs (test_last)
4167 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4168 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4169 		continue;
4170 	    }
4171 
4172 	  return false;
4173 	}
4174     }
4175   return true;
4176 }
4177 
4178 /* Return true if BB doesn't have side-effects that would disallow
4179    range test optimization, all SSA_NAMEs set in the bb are consumed
4180    in the bb and there are no PHIs.  */
4181 
4182 static bool
no_side_effect_bb(basic_block bb)4183 no_side_effect_bb (basic_block bb)
4184 {
4185   gimple_stmt_iterator gsi;
4186   gimple *last;
4187 
4188   if (!gimple_seq_empty_p (phi_nodes (bb)))
4189     return false;
4190   last = last_stmt (bb);
4191   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4192     {
4193       gimple *stmt = gsi_stmt (gsi);
4194       tree lhs;
4195       imm_use_iterator imm_iter;
4196       use_operand_p use_p;
4197 
4198       if (is_gimple_debug (stmt))
4199 	continue;
4200       if (gimple_has_side_effects (stmt))
4201 	return false;
4202       if (stmt == last)
4203 	return true;
4204       if (!is_gimple_assign (stmt))
4205 	return false;
4206       lhs = gimple_assign_lhs (stmt);
4207       if (TREE_CODE (lhs) != SSA_NAME)
4208 	return false;
4209       if (gimple_assign_rhs_could_trap_p (stmt))
4210 	return false;
4211       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4212 	{
4213 	  gimple *use_stmt = USE_STMT (use_p);
4214 	  if (is_gimple_debug (use_stmt))
4215 	    continue;
4216 	  if (gimple_bb (use_stmt) != bb)
4217 	    return false;
4218 	}
4219     }
4220   return false;
4221 }
4222 
4223 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4224    return true and fill in *OPS recursively.  */
4225 
4226 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4227 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4228 	 class loop *loop)
4229 {
4230   gimple *stmt = SSA_NAME_DEF_STMT (var);
4231   tree rhs[2];
4232   int i;
4233 
4234   if (!is_reassociable_op (stmt, code, loop))
4235     return false;
4236 
4237   rhs[0] = gimple_assign_rhs1 (stmt);
4238   rhs[1] = gimple_assign_rhs2 (stmt);
4239   gimple_set_visited (stmt, true);
4240   for (i = 0; i < 2; i++)
4241     if (TREE_CODE (rhs[i]) == SSA_NAME
4242 	&& !get_ops (rhs[i], code, ops, loop)
4243 	&& has_single_use (rhs[i]))
4244       {
4245 	operand_entry *oe = operand_entry_pool.allocate ();
4246 
4247 	oe->op = rhs[i];
4248 	oe->rank = code;
4249 	oe->id = 0;
4250 	oe->count = 1;
4251 	oe->stmt_to_insert = NULL;
4252 	ops->safe_push (oe);
4253       }
4254   return true;
4255 }
4256 
4257 /* Find the ops that were added by get_ops starting from VAR, see if
4258    they were changed during update_range_test and if yes, create new
4259    stmts.  */
4260 
4261 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,class loop * loop)4262 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4263 	    unsigned int *pidx, class loop *loop)
4264 {
4265   gimple *stmt = SSA_NAME_DEF_STMT (var);
4266   tree rhs[4];
4267   int i;
4268 
4269   if (!is_reassociable_op (stmt, code, loop))
4270     return NULL;
4271 
4272   rhs[0] = gimple_assign_rhs1 (stmt);
4273   rhs[1] = gimple_assign_rhs2 (stmt);
4274   rhs[2] = rhs[0];
4275   rhs[3] = rhs[1];
4276   for (i = 0; i < 2; i++)
4277     if (TREE_CODE (rhs[i]) == SSA_NAME)
4278       {
4279 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4280 	if (rhs[2 + i] == NULL_TREE)
4281 	  {
4282 	    if (has_single_use (rhs[i]))
4283 	      rhs[2 + i] = ops[(*pidx)++]->op;
4284 	    else
4285 	      rhs[2 + i] = rhs[i];
4286 	  }
4287       }
4288   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4289       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4290     {
4291       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4292       var = make_ssa_name (TREE_TYPE (var));
4293       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4294 					rhs[2], rhs[3]);
4295       gimple_set_uid (g, gimple_uid (stmt));
4296       gimple_set_visited (g, true);
4297       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4298     }
4299   return var;
4300 }
4301 
4302 /* Structure to track the initial value passed to get_ops and
4303    the range in the ops vector for each basic block.  */
4304 
4305 struct inter_bb_range_test_entry
4306 {
4307   tree op;
4308   unsigned int first_idx, last_idx;
4309 };
4310 
4311 /* Inter-bb range test optimization.
4312 
4313    Returns TRUE if a gimple conditional is optimized to a true/false,
4314    otherwise return FALSE.
4315 
4316    This indicates to the caller that it should run a CFG cleanup pass
4317    once reassociation is completed.  */
4318 
4319 static bool
maybe_optimize_range_tests(gimple * stmt)4320 maybe_optimize_range_tests (gimple *stmt)
4321 {
4322   basic_block first_bb = gimple_bb (stmt);
4323   basic_block last_bb = first_bb;
4324   basic_block other_bb = NULL;
4325   basic_block bb;
4326   edge_iterator ei;
4327   edge e;
4328   auto_vec<operand_entry *> ops;
4329   auto_vec<inter_bb_range_test_entry> bbinfo;
4330   bool any_changes = false;
4331   bool cfg_cleanup_needed = false;
4332 
4333   /* Consider only basic blocks that end with GIMPLE_COND or
4334      a cast statement satisfying final_range_test_p.  All
4335      but the last bb in the first_bb .. last_bb range
4336      should end with GIMPLE_COND.  */
4337   if (gimple_code (stmt) == GIMPLE_COND)
4338     {
4339       if (EDGE_COUNT (first_bb->succs) != 2)
4340 	return cfg_cleanup_needed;
4341     }
4342   else if (final_range_test_p (stmt))
4343     other_bb = single_succ (first_bb);
4344   else
4345     return cfg_cleanup_needed;
4346 
4347   if (stmt_could_throw_p (cfun, stmt))
4348     return cfg_cleanup_needed;
4349 
4350   /* As relative ordering of post-dominator sons isn't fixed,
4351      maybe_optimize_range_tests can be called first on any
4352      bb in the range we want to optimize.  So, start searching
4353      backwards, if first_bb can be set to a predecessor.  */
4354   while (single_pred_p (first_bb))
4355     {
4356       basic_block pred_bb = single_pred (first_bb);
4357       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4358 	break;
4359       if (!no_side_effect_bb (first_bb))
4360 	break;
4361       first_bb = pred_bb;
4362     }
4363   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4364      Before starting forward search in last_bb successors, find
4365      out the other_bb.  */
4366   if (first_bb == last_bb)
4367     {
4368       other_bb = NULL;
4369       /* As non-GIMPLE_COND last stmt always terminates the range,
4370 	 if forward search didn't discover anything, just give up.  */
4371       if (gimple_code (stmt) != GIMPLE_COND)
4372 	return cfg_cleanup_needed;
4373       /* Look at both successors.  Either it ends with a GIMPLE_COND
4374 	 and satisfies suitable_cond_bb, or ends with a cast and
4375 	 other_bb is that cast's successor.  */
4376       FOR_EACH_EDGE (e, ei, first_bb->succs)
4377 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4378 	    || e->dest == first_bb)
4379 	  return cfg_cleanup_needed;
4380 	else if (single_pred_p (e->dest))
4381 	  {
4382 	    stmt = last_stmt (e->dest);
4383 	    if (stmt
4384 		&& gimple_code (stmt) == GIMPLE_COND
4385 		&& EDGE_COUNT (e->dest->succs) == 2)
4386 	      {
4387 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4388 		  break;
4389 		else
4390 		  other_bb = NULL;
4391 	      }
4392 	    else if (stmt
4393 		     && final_range_test_p (stmt)
4394 		     && find_edge (first_bb, single_succ (e->dest)))
4395 	      {
4396 		other_bb = single_succ (e->dest);
4397 		if (other_bb == first_bb)
4398 		  other_bb = NULL;
4399 	      }
4400 	  }
4401       if (other_bb == NULL)
4402 	return cfg_cleanup_needed;
4403     }
4404   /* Now do the forward search, moving last_bb to successor bbs
4405      that aren't other_bb.  */
4406   while (EDGE_COUNT (last_bb->succs) == 2)
4407     {
4408       FOR_EACH_EDGE (e, ei, last_bb->succs)
4409 	if (e->dest != other_bb)
4410 	  break;
4411       if (e == NULL)
4412 	break;
4413       if (!single_pred_p (e->dest))
4414 	break;
4415       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4416 	break;
4417       if (!no_side_effect_bb (e->dest))
4418 	break;
4419       last_bb = e->dest;
4420     }
4421   if (first_bb == last_bb)
4422     return cfg_cleanup_needed;
4423   /* Here basic blocks first_bb through last_bb's predecessor
4424      end with GIMPLE_COND, all of them have one of the edges to
4425      other_bb and another to another block in the range,
4426      all blocks except first_bb don't have side-effects and
4427      last_bb ends with either GIMPLE_COND, or cast satisfying
4428      final_range_test_p.  */
4429   for (bb = last_bb; ; bb = single_pred (bb))
4430     {
4431       enum tree_code code;
4432       tree lhs, rhs;
4433       inter_bb_range_test_entry bb_ent;
4434 
4435       bb_ent.op = NULL_TREE;
4436       bb_ent.first_idx = ops.length ();
4437       bb_ent.last_idx = bb_ent.first_idx;
4438       e = find_edge (bb, other_bb);
4439       stmt = last_stmt (bb);
4440       gimple_set_visited (stmt, true);
4441       if (gimple_code (stmt) != GIMPLE_COND)
4442 	{
4443 	  use_operand_p use_p;
4444 	  gimple *phi;
4445 	  edge e2;
4446 	  unsigned int d;
4447 
4448 	  lhs = gimple_assign_lhs (stmt);
4449 	  rhs = gimple_assign_rhs1 (stmt);
4450 	  gcc_assert (bb == last_bb);
4451 
4452 	  /* stmt is
4453 	     _123 = (int) _234;
4454 	     OR
4455 	     _234 = a_2(D) == 2;
4456 
4457 	     followed by:
4458 	     <bb M>:
4459 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4460 
4461 	     or 0 instead of 1.  If it is 0, the _234
4462 	     range test is anded together with all the
4463 	     other range tests, if it is 1, it is ored with
4464 	     them.  */
4465 	  single_imm_use (lhs, &use_p, &phi);
4466 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4467 	  e2 = find_edge (first_bb, other_bb);
4468 	  d = e2->dest_idx;
4469 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4470 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4471 	    code = BIT_AND_EXPR;
4472 	  else
4473 	    {
4474 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4475 	      code = BIT_IOR_EXPR;
4476 	    }
4477 
4478 	  /* If _234 SSA_NAME_DEF_STMT is
4479 	     _234 = _567 | _789;
4480 	     (or &, corresponding to 1/0 in the phi arguments,
4481 	     push into ops the individual range test arguments
4482 	     of the bitwise or resp. and, recursively.  */
4483 	  if (TREE_CODE (rhs) == SSA_NAME
4484 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4485 		  != tcc_comparison)
4486 	      && !get_ops (rhs, code, &ops,
4487 			loop_containing_stmt (stmt))
4488 	      && has_single_use (rhs))
4489 	    {
4490 	      /* Otherwise, push the _234 range test itself.  */
4491 	      operand_entry *oe = operand_entry_pool.allocate ();
4492 
4493 	      oe->op = rhs;
4494 	      oe->rank = code;
4495 	      oe->id = 0;
4496 	      oe->count = 1;
4497 	      oe->stmt_to_insert = NULL;
4498 	      ops.safe_push (oe);
4499 	      bb_ent.last_idx++;
4500 	      bb_ent.op = rhs;
4501 	    }
4502 	  else if (is_gimple_assign (stmt)
4503 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4504 		       == tcc_comparison)
4505 		   && !get_ops (lhs, code, &ops,
4506 				loop_containing_stmt (stmt))
4507 		   && has_single_use (lhs))
4508 	    {
4509 	      operand_entry *oe = operand_entry_pool.allocate ();
4510 	      oe->op = lhs;
4511 	      oe->rank = code;
4512 	      oe->id = 0;
4513 	      oe->count = 1;
4514 	      ops.safe_push (oe);
4515 	      bb_ent.last_idx++;
4516 	      bb_ent.op = lhs;
4517 	    }
4518 	  else
4519 	    {
4520 	      bb_ent.last_idx = ops.length ();
4521 	      bb_ent.op = rhs;
4522 	    }
4523 	  bbinfo.safe_push (bb_ent);
4524 	  continue;
4525 	}
4526       /* Otherwise stmt is GIMPLE_COND.  */
4527       code = gimple_cond_code (stmt);
4528       lhs = gimple_cond_lhs (stmt);
4529       rhs = gimple_cond_rhs (stmt);
4530       if (TREE_CODE (lhs) == SSA_NAME
4531 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4532 	  && ((code != EQ_EXPR && code != NE_EXPR)
4533 	      || rhs != boolean_false_node
4534 		 /* Either push into ops the individual bitwise
4535 		    or resp. and operands, depending on which
4536 		    edge is other_bb.  */
4537 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4538 				 ^ (code == EQ_EXPR))
4539 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4540 			   loop_containing_stmt (stmt))))
4541 	{
4542 	  /* Or push the GIMPLE_COND stmt itself.  */
4543 	  operand_entry *oe = operand_entry_pool.allocate ();
4544 
4545 	  oe->op = NULL;
4546 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4547 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4548 	  /* oe->op = NULL signs that there is no SSA_NAME
4549 	     for the range test, and oe->id instead is the
4550 	     basic block number, at which's end the GIMPLE_COND
4551 	     is.  */
4552 	  oe->id = bb->index;
4553 	  oe->count = 1;
4554 	  oe->stmt_to_insert = NULL;
4555 	  ops.safe_push (oe);
4556 	  bb_ent.op = NULL;
4557 	  bb_ent.last_idx++;
4558 	}
4559       else if (ops.length () > bb_ent.first_idx)
4560 	{
4561 	  bb_ent.op = lhs;
4562 	  bb_ent.last_idx = ops.length ();
4563 	}
4564       bbinfo.safe_push (bb_ent);
4565       if (bb == first_bb)
4566 	break;
4567     }
4568   if (ops.length () > 1)
4569     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4570   if (any_changes)
4571     {
4572       unsigned int idx, max_idx = 0;
4573       /* update_ops relies on has_single_use predicates returning the
4574 	 same values as it did during get_ops earlier.  Additionally it
4575 	 never removes statements, only adds new ones and it should walk
4576 	 from the single imm use and check the predicate already before
4577 	 making those changes.
4578 	 On the other side, the handling of GIMPLE_COND directly can turn
4579 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4580 	 it needs to be done in a separate loop afterwards.  */
4581       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4582 	{
4583 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4584 	      && bbinfo[idx].op != NULL_TREE)
4585 	    {
4586 	      tree new_op;
4587 
4588 	      max_idx = idx;
4589 	      stmt = last_stmt (bb);
4590 	      new_op = update_ops (bbinfo[idx].op,
4591 				   (enum tree_code)
4592 				   ops[bbinfo[idx].first_idx]->rank,
4593 				   ops, &bbinfo[idx].first_idx,
4594 				   loop_containing_stmt (stmt));
4595 	      if (new_op == NULL_TREE)
4596 		{
4597 		  gcc_assert (bb == last_bb);
4598 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4599 		}
4600 	      if (bbinfo[idx].op != new_op)
4601 		{
4602 		  imm_use_iterator iter;
4603 		  use_operand_p use_p;
4604 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4605 
4606 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4607 		    if (is_gimple_debug (use_stmt))
4608 		      continue;
4609 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4610 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4611 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4612 			SET_USE (use_p, new_op);
4613 		    else if ((is_gimple_assign (use_stmt)
4614 			      && (TREE_CODE_CLASS
4615 				  (gimple_assign_rhs_code (use_stmt))
4616 				  == tcc_comparison)))
4617 		      cast_or_tcc_cmp_stmt = use_stmt;
4618 		    else if (gimple_assign_cast_p (use_stmt))
4619 		      cast_or_tcc_cmp_stmt = use_stmt;
4620 		    else
4621 		      gcc_unreachable ();
4622 
4623 		  if (cast_or_tcc_cmp_stmt)
4624 		    {
4625 		      gcc_assert (bb == last_bb);
4626 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4627 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4628 		      enum tree_code rhs_code
4629 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4630 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4631 			: CONVERT_EXPR;
4632 		      gassign *g;
4633 		      if (is_gimple_min_invariant (new_op))
4634 			{
4635 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4636 			  g = gimple_build_assign (new_lhs, new_op);
4637 			}
4638 		      else
4639 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4640 		      gimple_stmt_iterator gsi
4641 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4642 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4643 		      gimple_set_visited (g, true);
4644 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4645 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4646 			if (is_gimple_debug (use_stmt))
4647 			  continue;
4648 			else if (gimple_code (use_stmt) == GIMPLE_COND
4649 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4650 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4651 			    SET_USE (use_p, new_lhs);
4652 			else
4653 			  gcc_unreachable ();
4654 		    }
4655 		}
4656 	    }
4657 	  if (bb == first_bb)
4658 	    break;
4659 	}
4660       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4661 	{
4662 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4663 	      && bbinfo[idx].op == NULL_TREE
4664 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4665 	    {
4666 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4667 
4668 	      if (idx > max_idx)
4669 		max_idx = idx;
4670 
4671 	      /* If we collapse the conditional to a true/false
4672 		 condition, then bubble that knowledge up to our caller.  */
4673 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4674 		{
4675 		  gimple_cond_make_false (cond_stmt);
4676 		  cfg_cleanup_needed = true;
4677 		}
4678 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4679 		{
4680 		  gimple_cond_make_true (cond_stmt);
4681 		  cfg_cleanup_needed = true;
4682 		}
4683 	      else
4684 		{
4685 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4686 		  gimple_cond_set_lhs (cond_stmt,
4687 				       ops[bbinfo[idx].first_idx]->op);
4688 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4689 		}
4690 	      update_stmt (cond_stmt);
4691 	    }
4692 	  if (bb == first_bb)
4693 	    break;
4694 	}
4695 
4696       /* The above changes could result in basic blocks after the first
4697 	 modified one, up to and including last_bb, to be executed even if
4698 	 they would not be in the original program.  If the value ranges of
4699 	 assignment lhs' in those bbs were dependent on the conditions
4700 	 guarding those basic blocks which now can change, the VRs might
4701 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4702 	 are only used within the same bb, it should be not a big deal if
4703 	 we just reset all the VRs in those bbs.  See PR68671.  */
4704       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4705 	reset_flow_sensitive_info_in_bb (bb);
4706     }
4707   return cfg_cleanup_needed;
4708 }
4709 
4710 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4711    of STMT in it's operands.  This is also known as a "destructive
4712    update" operation.  */
4713 
4714 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4715 is_phi_for_stmt (gimple *stmt, tree operand)
4716 {
4717   gimple *def_stmt;
4718   gphi *def_phi;
4719   tree lhs;
4720   use_operand_p arg_p;
4721   ssa_op_iter i;
4722 
4723   if (TREE_CODE (operand) != SSA_NAME)
4724     return false;
4725 
4726   lhs = gimple_assign_lhs (stmt);
4727 
4728   def_stmt = SSA_NAME_DEF_STMT (operand);
4729   def_phi = dyn_cast <gphi *> (def_stmt);
4730   if (!def_phi)
4731     return false;
4732 
4733   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4734     if (lhs == USE_FROM_PTR (arg_p))
4735       return true;
4736   return false;
4737 }
4738 
4739 /* Remove def stmt of VAR if VAR has zero uses and recurse
4740    on rhs1 operand if so.  */
4741 
4742 static void
remove_visited_stmt_chain(tree var)4743 remove_visited_stmt_chain (tree var)
4744 {
4745   gimple *stmt;
4746   gimple_stmt_iterator gsi;
4747 
4748   while (1)
4749     {
4750       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4751 	return;
4752       stmt = SSA_NAME_DEF_STMT (var);
4753       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4754 	{
4755 	  var = gimple_assign_rhs1 (stmt);
4756 	  gsi = gsi_for_stmt (stmt);
4757 	  reassoc_remove_stmt (&gsi);
4758 	  release_defs (stmt);
4759 	}
4760       else
4761 	return;
4762     }
4763 }
4764 
4765 /* This function checks three consequtive operands in
4766    passed operands vector OPS starting from OPINDEX and
4767    swaps two operands if it is profitable for binary operation
4768    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4769 
4770    We pair ops with the same rank if possible.
4771 
4772    The alternative we try is to see if STMT is a destructive
4773    update style statement, which is like:
4774    b = phi (a, ...)
4775    a = c + b;
4776    In that case, we want to use the destructive update form to
4777    expose the possible vectorizer sum reduction opportunity.
4778    In that case, the third operand will be the phi node. This
4779    check is not performed if STMT is null.
4780 
4781    We could, of course, try to be better as noted above, and do a
4782    lot of work to try to find these opportunities in >3 operand
4783    cases, but it is unlikely to be worth it.  */
4784 
4785 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4786 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4787 			  unsigned int opindex, gimple *stmt)
4788 {
4789   operand_entry *oe1, *oe2, *oe3;
4790 
4791   oe1 = ops[opindex];
4792   oe2 = ops[opindex + 1];
4793   oe3 = ops[opindex + 2];
4794 
4795   if ((oe1->rank == oe2->rank
4796        && oe2->rank != oe3->rank)
4797       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4798 	  && !is_phi_for_stmt (stmt, oe1->op)
4799 	  && !is_phi_for_stmt (stmt, oe2->op)))
4800     std::swap (*oe1, *oe3);
4801   else if ((oe1->rank == oe3->rank
4802 	    && oe2->rank != oe3->rank)
4803 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
4804 	       && !is_phi_for_stmt (stmt, oe1->op)
4805 	       && !is_phi_for_stmt (stmt, oe3->op)))
4806     std::swap (*oe1, *oe2);
4807 }
4808 
4809 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4810    two definitions, otherwise return STMT.  Sets INSERT_BEFORE to indicate
4811    whether RHS1 op RHS2 can be inserted before or needs to be inserted
4812    after the returned stmt.  */
4813 
4814 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2,bool & insert_before)4815 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
4816 {
4817   insert_before = true;
4818   if (TREE_CODE (rhs1) == SSA_NAME
4819       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4820     {
4821       stmt = SSA_NAME_DEF_STMT (rhs1);
4822       insert_before = false;
4823     }
4824   if (TREE_CODE (rhs2) == SSA_NAME
4825       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4826     {
4827       stmt = SSA_NAME_DEF_STMT (rhs2);
4828       insert_before = false;
4829     }
4830   return stmt;
4831 }
4832 
4833 /* If the stmt that defines operand has to be inserted, insert it
4834    before the use.  */
4835 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4836 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4837 {
4838   gcc_assert (is_gimple_assign (stmt_to_insert));
4839   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4840   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4841   bool insert_before;
4842   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
4843   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4844   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4845 
4846   /* If the insert point is not stmt, then insert_point would be
4847      the point where operand rhs1 or rhs2 is defined. In this case,
4848      stmt_to_insert has to be inserted afterwards. This would
4849      only happen when the stmt insertion point is flexible. */
4850   if (insert_before)
4851     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4852   else
4853     insert_stmt_after (stmt_to_insert, insert_point);
4854 }
4855 
4856 
4857 /* Recursively rewrite our linearized statements so that the operators
4858    match those in OPS[OPINDEX], putting the computation in rank
4859    order.  Return new lhs.
4860    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4861    the current stmt and during recursive invocations.
4862    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4863    recursive invocations.  */
4864 
4865 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4866 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
4867 		   vec<operand_entry *> ops, bool changed, bool next_changed)
4868 {
4869   tree rhs1 = gimple_assign_rhs1 (stmt);
4870   tree rhs2 = gimple_assign_rhs2 (stmt);
4871   tree lhs = gimple_assign_lhs (stmt);
4872   operand_entry *oe;
4873 
4874   /* The final recursion case for this function is that you have
4875      exactly two operations left.
4876      If we had exactly one op in the entire list to start with, we
4877      would have never called this function, and the tail recursion
4878      rewrites them one at a time.  */
4879   if (opindex + 2 == ops.length ())
4880     {
4881       operand_entry *oe1, *oe2;
4882 
4883       oe1 = ops[opindex];
4884       oe2 = ops[opindex + 1];
4885 
4886       if (rhs1 != oe1->op || rhs2 != oe2->op)
4887 	{
4888 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4889 	  unsigned int uid = gimple_uid (stmt);
4890 
4891 	  if (dump_file && (dump_flags & TDF_DETAILS))
4892 	    {
4893 	      fprintf (dump_file, "Transforming ");
4894 	      print_gimple_stmt (dump_file, stmt, 0);
4895 	    }
4896 
4897 	  /* If the stmt that defines operand has to be inserted, insert it
4898 	     before the use.  */
4899 	  if (oe1->stmt_to_insert)
4900 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4901 	  if (oe2->stmt_to_insert)
4902 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4903 	  /* Even when changed is false, reassociation could have e.g. removed
4904 	     some redundant operations, so unless we are just swapping the
4905 	     arguments or unless there is no change at all (then we just
4906 	     return lhs), force creation of a new SSA_NAME.  */
4907 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4908 	    {
4909 	      bool insert_before;
4910 	      gimple *insert_point
4911 		= find_insert_point (stmt, oe1->op, oe2->op, insert_before);
4912 	      lhs = make_ssa_name (TREE_TYPE (lhs));
4913 	      stmt
4914 		= gimple_build_assign (lhs, rhs_code,
4915 				       oe1->op, oe2->op);
4916 	      gimple_set_uid (stmt, uid);
4917 	      gimple_set_visited (stmt, true);
4918 	      if (insert_before)
4919 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4920 	      else
4921 		insert_stmt_after (stmt, insert_point);
4922 	    }
4923 	  else
4924 	    {
4925 	      bool insert_before;
4926 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
4927 						      insert_before)
4928 				   == stmt);
4929 	      gimple_assign_set_rhs1 (stmt, oe1->op);
4930 	      gimple_assign_set_rhs2 (stmt, oe2->op);
4931 	      update_stmt (stmt);
4932 	    }
4933 
4934 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
4935 	    remove_visited_stmt_chain (rhs1);
4936 
4937 	  if (dump_file && (dump_flags & TDF_DETAILS))
4938 	    {
4939 	      fprintf (dump_file, " into ");
4940 	      print_gimple_stmt (dump_file, stmt, 0);
4941 	    }
4942 	}
4943       return lhs;
4944     }
4945 
4946   /* If we hit here, we should have 3 or more ops left.  */
4947   gcc_assert (opindex + 2 < ops.length ());
4948 
4949   /* Rewrite the next operator.  */
4950   oe = ops[opindex];
4951 
4952   /* If the stmt that defines operand has to be inserted, insert it
4953      before the use.  */
4954   if (oe->stmt_to_insert)
4955     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4956 
4957   /* Recurse on the LHS of the binary operator, which is guaranteed to
4958      be the non-leaf side.  */
4959   tree new_rhs1
4960     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
4961 			 changed || oe->op != rhs2 || next_changed,
4962 			 false);
4963 
4964   if (oe->op != rhs2 || new_rhs1 != rhs1)
4965     {
4966       if (dump_file && (dump_flags & TDF_DETAILS))
4967 	{
4968 	  fprintf (dump_file, "Transforming ");
4969 	  print_gimple_stmt (dump_file, stmt, 0);
4970 	}
4971 
4972       /* If changed is false, this is either opindex == 0
4973 	 or all outer rhs2's were equal to corresponding oe->op,
4974 	 and powi_result is NULL.
4975 	 That means lhs is equivalent before and after reassociation.
4976 	 Otherwise ensure the old lhs SSA_NAME is not reused and
4977 	 create a new stmt as well, so that any debug stmts will be
4978 	 properly adjusted.  */
4979       if (changed)
4980 	{
4981 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4982 	  unsigned int uid = gimple_uid (stmt);
4983 	  bool insert_before;
4984 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
4985 						    insert_before);
4986 
4987 	  lhs = make_ssa_name (TREE_TYPE (lhs));
4988 	  stmt = gimple_build_assign (lhs, rhs_code,
4989 				      new_rhs1, oe->op);
4990 	  gimple_set_uid (stmt, uid);
4991 	  gimple_set_visited (stmt, true);
4992 	  if (insert_before)
4993 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4994 	  else
4995 	    insert_stmt_after (stmt, insert_point);
4996 	}
4997       else
4998 	{
4999 	  bool insert_before;
5000 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5001 						  insert_before)
5002 			       == stmt);
5003 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
5004 	  gimple_assign_set_rhs2 (stmt, oe->op);
5005 	  update_stmt (stmt);
5006 	}
5007 
5008       if (dump_file && (dump_flags & TDF_DETAILS))
5009 	{
5010 	  fprintf (dump_file, " into ");
5011 	  print_gimple_stmt (dump_file, stmt, 0);
5012 	}
5013     }
5014   return lhs;
5015 }
5016 
5017 /* Find out how many cycles we need to compute statements chain.
5018    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5019    maximum number of independent statements we may execute per cycle.  */
5020 
5021 static int
get_required_cycles(int ops_num,int cpu_width)5022 get_required_cycles (int ops_num, int cpu_width)
5023 {
5024   int res;
5025   int elog;
5026   unsigned int rest;
5027 
5028   /* While we have more than 2 * cpu_width operands
5029      we may reduce number of operands by cpu_width
5030      per cycle.  */
5031   res = ops_num / (2 * cpu_width);
5032 
5033   /* Remained operands count may be reduced twice per cycle
5034      until we have only one operand.  */
5035   rest = (unsigned)(ops_num - res * cpu_width);
5036   elog = exact_log2 (rest);
5037   if (elog >= 0)
5038     res += elog;
5039   else
5040     res += floor_log2 (rest) + 1;
5041 
5042   return res;
5043 }
5044 
5045 /* Returns an optimal number of registers to use for computation of
5046    given statements.  */
5047 
5048 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5049 get_reassociation_width (int ops_num, enum tree_code opc,
5050 			 machine_mode mode)
5051 {
5052   int param_width = param_tree_reassoc_width;
5053   int width;
5054   int width_min;
5055   int cycles_best;
5056 
5057   if (param_width > 0)
5058     width = param_width;
5059   else
5060     width = targetm.sched.reassociation_width (opc, mode);
5061 
5062   if (width == 1)
5063     return width;
5064 
5065   /* Get the minimal time required for sequence computation.  */
5066   cycles_best = get_required_cycles (ops_num, width);
5067 
5068   /* Check if we may use less width and still compute sequence for
5069      the same time.  It will allow us to reduce registers usage.
5070      get_required_cycles is monotonically increasing with lower width
5071      so we can perform a binary search for the minimal width that still
5072      results in the optimal cycle count.  */
5073   width_min = 1;
5074   while (width > width_min)
5075     {
5076       int width_mid = (width + width_min) / 2;
5077 
5078       if (get_required_cycles (ops_num, width_mid) == cycles_best)
5079 	width = width_mid;
5080       else if (width_min < width_mid)
5081 	width_min = width_mid;
5082       else
5083 	break;
5084     }
5085 
5086   return width;
5087 }
5088 
5089 /* Recursively rewrite our linearized statements so that the operators
5090    match those in OPS[OPINDEX], putting the computation in rank
5091    order and trying to allow operations to be executed in
5092    parallel.  */
5093 
5094 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)5095 rewrite_expr_tree_parallel (gassign *stmt, int width,
5096 			    vec<operand_entry *> ops)
5097 {
5098   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5099   int op_num = ops.length ();
5100   gcc_assert (op_num > 0);
5101   int stmt_num = op_num - 1;
5102   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5103   int op_index = op_num - 1;
5104   int stmt_index = 0;
5105   int ready_stmts_end = 0;
5106   int i = 0;
5107   gimple *stmt1 = NULL, *stmt2 = NULL;
5108   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5109 
5110   /* We start expression rewriting from the top statements.
5111      So, in this loop we create a full list of statements
5112      we will work with.  */
5113   stmts[stmt_num - 1] = stmt;
5114   for (i = stmt_num - 2; i >= 0; i--)
5115     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5116 
5117   for (i = 0; i < stmt_num; i++)
5118     {
5119       tree op1, op2;
5120 
5121       /* Determine whether we should use results of
5122 	 already handled statements or not.  */
5123       if (ready_stmts_end == 0
5124 	  && (i - stmt_index >= width || op_index < 1))
5125 	ready_stmts_end = i;
5126 
5127       /* Now we choose operands for the next statement.  Non zero
5128 	 value in ready_stmts_end means here that we should use
5129 	 the result of already generated statements as new operand.  */
5130       if (ready_stmts_end > 0)
5131 	{
5132 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
5133 	  if (ready_stmts_end > stmt_index)
5134 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
5135 	  else if (op_index >= 0)
5136 	    {
5137 	      operand_entry *oe = ops[op_index--];
5138 	      stmt2 = oe->stmt_to_insert;
5139 	      op2 = oe->op;
5140 	    }
5141 	  else
5142 	    {
5143 	      gcc_assert (stmt_index < i);
5144 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
5145 	    }
5146 
5147 	  if (stmt_index >= ready_stmts_end)
5148 	    ready_stmts_end = 0;
5149 	}
5150       else
5151 	{
5152 	  if (op_index > 1)
5153 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5154 	  operand_entry *oe2 = ops[op_index--];
5155 	  operand_entry *oe1 = ops[op_index--];
5156 	  op2 = oe2->op;
5157 	  stmt2 = oe2->stmt_to_insert;
5158 	  op1 = oe1->op;
5159 	  stmt1 = oe1->stmt_to_insert;
5160 	}
5161 
5162       /* If we emit the last statement then we should put
5163 	 operands into the last statement.  It will also
5164 	 break the loop.  */
5165       if (op_index < 0 && stmt_index == i)
5166 	i = stmt_num - 1;
5167 
5168       if (dump_file && (dump_flags & TDF_DETAILS))
5169 	{
5170 	  fprintf (dump_file, "Transforming ");
5171 	  print_gimple_stmt (dump_file, stmts[i], 0);
5172 	}
5173 
5174       /* If the stmt that defines operand has to be inserted, insert it
5175 	 before the use.  */
5176       if (stmt1)
5177 	insert_stmt_before_use (stmts[i], stmt1);
5178       if (stmt2)
5179 	insert_stmt_before_use (stmts[i], stmt2);
5180       stmt1 = stmt2 = NULL;
5181 
5182       /* We keep original statement only for the last one.  All
5183 	 others are recreated.  */
5184       if (i == stmt_num - 1)
5185 	{
5186 	  gimple_assign_set_rhs1 (stmts[i], op1);
5187 	  gimple_assign_set_rhs2 (stmts[i], op2);
5188 	  update_stmt (stmts[i]);
5189 	}
5190       else
5191 	{
5192 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5193 	  gimple_set_visited (stmts[i], true);
5194 	}
5195       if (dump_file && (dump_flags & TDF_DETAILS))
5196 	{
5197 	  fprintf (dump_file, " into ");
5198 	  print_gimple_stmt (dump_file, stmts[i], 0);
5199 	}
5200     }
5201 
5202   remove_visited_stmt_chain (last_rhs1);
5203 }
5204 
5205 /* Transform STMT, which is really (A +B) + (C + D) into the left
5206    linear form, ((A+B)+C)+D.
5207    Recurse on D if necessary.  */
5208 
5209 static void
linearize_expr(gimple * stmt)5210 linearize_expr (gimple *stmt)
5211 {
5212   gimple_stmt_iterator gsi;
5213   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5214   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5215   gimple *oldbinrhs = binrhs;
5216   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5217   gimple *newbinrhs = NULL;
5218   class loop *loop = loop_containing_stmt (stmt);
5219   tree lhs = gimple_assign_lhs (stmt);
5220 
5221   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5222 	      && is_reassociable_op (binrhs, rhscode, loop));
5223 
5224   gsi = gsi_for_stmt (stmt);
5225 
5226   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5227   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5228 				gimple_assign_rhs_code (binrhs),
5229 				gimple_assign_lhs (binlhs),
5230 				gimple_assign_rhs2 (binrhs));
5231   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5232   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5233   gimple_set_uid (binrhs, gimple_uid (stmt));
5234 
5235   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5236     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5237 
5238   if (dump_file && (dump_flags & TDF_DETAILS))
5239     {
5240       fprintf (dump_file, "Linearized: ");
5241       print_gimple_stmt (dump_file, stmt, 0);
5242     }
5243 
5244   reassociate_stats.linearized++;
5245   update_stmt (stmt);
5246 
5247   gsi = gsi_for_stmt (oldbinrhs);
5248   reassoc_remove_stmt (&gsi);
5249   release_defs (oldbinrhs);
5250 
5251   gimple_set_visited (stmt, true);
5252   gimple_set_visited (binlhs, true);
5253   gimple_set_visited (binrhs, true);
5254 
5255   /* Tail recurse on the new rhs if it still needs reassociation.  */
5256   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5257     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5258 	   want to change the algorithm while converting to tuples.  */
5259     linearize_expr (stmt);
5260 }
5261 
5262 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5263    it.  Otherwise, return NULL.  */
5264 
5265 static gimple *
get_single_immediate_use(tree lhs)5266 get_single_immediate_use (tree lhs)
5267 {
5268   use_operand_p immuse;
5269   gimple *immusestmt;
5270 
5271   if (TREE_CODE (lhs) == SSA_NAME
5272       && single_imm_use (lhs, &immuse, &immusestmt)
5273       && is_gimple_assign (immusestmt))
5274     return immusestmt;
5275 
5276   return NULL;
5277 }
5278 
5279 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5280    representing the negated value.  Insertions of any necessary
5281    instructions go before GSI.
5282    This function is recursive in that, if you hand it "a_5" as the
5283    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5284    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5285 
5286 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5287 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5288 {
5289   gimple *negatedefstmt = NULL;
5290   tree resultofnegate;
5291   gimple_stmt_iterator gsi;
5292   unsigned int uid;
5293 
5294   /* If we are trying to negate a name, defined by an add, negate the
5295      add operands instead.  */
5296   if (TREE_CODE (tonegate) == SSA_NAME)
5297     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5298   if (TREE_CODE (tonegate) == SSA_NAME
5299       && is_gimple_assign (negatedefstmt)
5300       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5301       && has_single_use (gimple_assign_lhs (negatedefstmt))
5302       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5303     {
5304       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5305       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5306       tree lhs = gimple_assign_lhs (negatedefstmt);
5307       gimple *g;
5308 
5309       gsi = gsi_for_stmt (negatedefstmt);
5310       rhs1 = negate_value (rhs1, &gsi);
5311 
5312       gsi = gsi_for_stmt (negatedefstmt);
5313       rhs2 = negate_value (rhs2, &gsi);
5314 
5315       gsi = gsi_for_stmt (negatedefstmt);
5316       lhs = make_ssa_name (TREE_TYPE (lhs));
5317       gimple_set_visited (negatedefstmt, true);
5318       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5319       gimple_set_uid (g, gimple_uid (negatedefstmt));
5320       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5321       return lhs;
5322     }
5323 
5324   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5325   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5326 					     NULL_TREE, true, GSI_SAME_STMT);
5327   gsi = *gsip;
5328   uid = gimple_uid (gsi_stmt (gsi));
5329   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5330     {
5331       gimple *stmt = gsi_stmt (gsi);
5332       if (gimple_uid (stmt) != 0)
5333 	break;
5334       gimple_set_uid (stmt, uid);
5335     }
5336   return resultofnegate;
5337 }
5338 
5339 /* Return true if we should break up the subtract in STMT into an add
5340    with negate.  This is true when we the subtract operands are really
5341    adds, or the subtract itself is used in an add expression.  In
5342    either case, breaking up the subtract into an add with negate
5343    exposes the adds to reassociation.  */
5344 
5345 static bool
should_break_up_subtract(gimple * stmt)5346 should_break_up_subtract (gimple *stmt)
5347 {
5348   tree lhs = gimple_assign_lhs (stmt);
5349   tree binlhs = gimple_assign_rhs1 (stmt);
5350   tree binrhs = gimple_assign_rhs2 (stmt);
5351   gimple *immusestmt;
5352   class loop *loop = loop_containing_stmt (stmt);
5353 
5354   if (TREE_CODE (binlhs) == SSA_NAME
5355       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5356     return true;
5357 
5358   if (TREE_CODE (binrhs) == SSA_NAME
5359       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5360     return true;
5361 
5362   if (TREE_CODE (lhs) == SSA_NAME
5363       && (immusestmt = get_single_immediate_use (lhs))
5364       && is_gimple_assign (immusestmt)
5365       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5366 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5367 	      && gimple_assign_rhs1 (immusestmt) == lhs)
5368 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5369     return true;
5370   return false;
5371 }
5372 
5373 /* Transform STMT from A - B into A + -B.  */
5374 
5375 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5376 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5377 {
5378   tree rhs1 = gimple_assign_rhs1 (stmt);
5379   tree rhs2 = gimple_assign_rhs2 (stmt);
5380 
5381   if (dump_file && (dump_flags & TDF_DETAILS))
5382     {
5383       fprintf (dump_file, "Breaking up subtract ");
5384       print_gimple_stmt (dump_file, stmt, 0);
5385     }
5386 
5387   rhs2 = negate_value (rhs2, gsip);
5388   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5389   update_stmt (stmt);
5390 }
5391 
5392 /* Determine whether STMT is a builtin call that raises an SSA name
5393    to an integer power and has only one use.  If so, and this is early
5394    reassociation and unsafe math optimizations are permitted, place
5395    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5396    If any of these conditions does not hold, return FALSE.  */
5397 
5398 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5399 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5400 {
5401   tree arg1;
5402   REAL_VALUE_TYPE c, cint;
5403 
5404   switch (gimple_call_combined_fn (stmt))
5405     {
5406     CASE_CFN_POW:
5407       if (flag_errno_math)
5408 	return false;
5409 
5410       *base = gimple_call_arg (stmt, 0);
5411       arg1 = gimple_call_arg (stmt, 1);
5412 
5413       if (TREE_CODE (arg1) != REAL_CST)
5414 	return false;
5415 
5416       c = TREE_REAL_CST (arg1);
5417 
5418       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5419 	return false;
5420 
5421       *exponent = real_to_integer (&c);
5422       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5423       if (!real_identical (&c, &cint))
5424 	return false;
5425 
5426       break;
5427 
5428     CASE_CFN_POWI:
5429       *base = gimple_call_arg (stmt, 0);
5430       arg1 = gimple_call_arg (stmt, 1);
5431 
5432       if (!tree_fits_shwi_p (arg1))
5433 	return false;
5434 
5435       *exponent = tree_to_shwi (arg1);
5436       break;
5437 
5438     default:
5439       return false;
5440     }
5441 
5442   /* Expanding negative exponents is generally unproductive, so we don't
5443      complicate matters with those.  Exponents of zero and one should
5444      have been handled by expression folding.  */
5445   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5446     return false;
5447 
5448   return true;
5449 }
5450 
5451 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5452    unsuccessful.  */
5453 
5454 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5455 try_special_add_to_ops (vec<operand_entry *> *ops,
5456 			enum tree_code code,
5457 			tree op, gimple* def_stmt)
5458 {
5459   tree base = NULL_TREE;
5460   HOST_WIDE_INT exponent = 0;
5461 
5462   if (TREE_CODE (op) != SSA_NAME
5463       || ! has_single_use (op))
5464     return false;
5465 
5466   if (code == MULT_EXPR
5467       && reassoc_insert_powi_p
5468       && flag_unsafe_math_optimizations
5469       && is_gimple_call (def_stmt)
5470       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5471     {
5472       add_repeat_to_ops_vec (ops, base, exponent);
5473       gimple_set_visited (def_stmt, true);
5474       return true;
5475     }
5476   else if (code == MULT_EXPR
5477 	   && is_gimple_assign (def_stmt)
5478 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5479 	   && !HONOR_SNANS (TREE_TYPE (op))
5480 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5481 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5482     {
5483       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5484       tree cst = build_minus_one_cst (TREE_TYPE (op));
5485       add_to_ops_vec (ops, rhs1);
5486       add_to_ops_vec (ops, cst);
5487       gimple_set_visited (def_stmt, true);
5488       return true;
5489     }
5490 
5491   return false;
5492 }
5493 
5494 /* Recursively linearize a binary expression that is the RHS of STMT.
5495    Place the operands of the expression tree in the vector named OPS.  */
5496 
5497 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5498 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5499 		     bool is_associative, bool set_visited)
5500 {
5501   tree binlhs = gimple_assign_rhs1 (stmt);
5502   tree binrhs = gimple_assign_rhs2 (stmt);
5503   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5504   bool binlhsisreassoc = false;
5505   bool binrhsisreassoc = false;
5506   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5507   class loop *loop = loop_containing_stmt (stmt);
5508 
5509   if (set_visited)
5510     gimple_set_visited (stmt, true);
5511 
5512   if (TREE_CODE (binlhs) == SSA_NAME)
5513     {
5514       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5515       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5516 			 && !stmt_could_throw_p (cfun, binlhsdef));
5517     }
5518 
5519   if (TREE_CODE (binrhs) == SSA_NAME)
5520     {
5521       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5522       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5523 			 && !stmt_could_throw_p (cfun, binrhsdef));
5524     }
5525 
5526   /* If the LHS is not reassociable, but the RHS is, we need to swap
5527      them.  If neither is reassociable, there is nothing we can do, so
5528      just put them in the ops vector.  If the LHS is reassociable,
5529      linearize it.  If both are reassociable, then linearize the RHS
5530      and the LHS.  */
5531 
5532   if (!binlhsisreassoc)
5533     {
5534       /* If this is not a associative operation like division, give up.  */
5535       if (!is_associative)
5536 	{
5537 	  add_to_ops_vec (ops, binrhs);
5538 	  return;
5539 	}
5540 
5541       if (!binrhsisreassoc)
5542 	{
5543 	  bool swap = false;
5544 	  if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5545 	    /* If we add ops for the rhs we expect to be able to recurse
5546 	       to it via the lhs during expression rewrite so swap
5547 	       operands.  */
5548 	    swap = true;
5549 	  else
5550 	    add_to_ops_vec (ops, binrhs);
5551 
5552 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5553 	    add_to_ops_vec (ops, binlhs);
5554 
5555 	  if (!swap)
5556 	    return;
5557 	}
5558 
5559       if (dump_file && (dump_flags & TDF_DETAILS))
5560 	{
5561 	  fprintf (dump_file, "swapping operands of ");
5562 	  print_gimple_stmt (dump_file, stmt, 0);
5563 	}
5564 
5565       swap_ssa_operands (stmt,
5566 			 gimple_assign_rhs1_ptr (stmt),
5567 			 gimple_assign_rhs2_ptr (stmt));
5568       update_stmt (stmt);
5569 
5570       if (dump_file && (dump_flags & TDF_DETAILS))
5571 	{
5572 	  fprintf (dump_file, " is now ");
5573 	  print_gimple_stmt (dump_file, stmt, 0);
5574 	}
5575       if (!binrhsisreassoc)
5576 	return;
5577 
5578       /* We want to make it so the lhs is always the reassociative op,
5579 	 so swap.  */
5580       std::swap (binlhs, binrhs);
5581     }
5582   else if (binrhsisreassoc)
5583     {
5584       linearize_expr (stmt);
5585       binlhs = gimple_assign_rhs1 (stmt);
5586       binrhs = gimple_assign_rhs2 (stmt);
5587     }
5588 
5589   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5590 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5591 				      rhscode, loop));
5592   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5593 		       is_associative, set_visited);
5594 
5595   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5596     add_to_ops_vec (ops, binrhs);
5597 }
5598 
5599 /* Repropagate the negates back into subtracts, since no other pass
5600    currently does it.  */
5601 
5602 static void
repropagate_negates(void)5603 repropagate_negates (void)
5604 {
5605   unsigned int i = 0;
5606   tree negate;
5607 
5608   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5609     {
5610       gimple *user = get_single_immediate_use (negate);
5611       if (!user || !is_gimple_assign (user))
5612 	continue;
5613 
5614       tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5615       if (TREE_CODE (negateop) == SSA_NAME
5616 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
5617 	continue;
5618 
5619       /* The negate operand can be either operand of a PLUS_EXPR
5620 	 (it can be the LHS if the RHS is a constant for example).
5621 
5622 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5623 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5624       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5625 	{
5626 	  /* If the negated operand appears on the LHS of the
5627 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5628 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5629 	  if (gimple_assign_rhs1 (user) == negate)
5630 	    {
5631 	      swap_ssa_operands (user,
5632 				 gimple_assign_rhs1_ptr (user),
5633 				 gimple_assign_rhs2_ptr (user));
5634 	    }
5635 
5636 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5637 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5638 	  if (gimple_assign_rhs2 (user) == negate)
5639 	    {
5640 	      tree rhs1 = gimple_assign_rhs1 (user);
5641 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5642 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
5643 					      negateop);
5644 	      update_stmt (user);
5645 	    }
5646 	}
5647       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5648 	{
5649 	  if (gimple_assign_rhs1 (user) == negate)
5650 	    {
5651 	      /* We have
5652 		   x = -negateop
5653 		   y = x - b
5654 		 which we transform into
5655 		   x = negateop + b
5656 		   y = -x .
5657 		 This pushes down the negate which we possibly can merge
5658 		 into some other operation, hence insert it into the
5659 		 plus_negates vector.  */
5660 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5661 	      tree b = gimple_assign_rhs2 (user);
5662 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5663 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5664 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5665 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
5666 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5667 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5668 	      user = gsi_stmt (gsi2);
5669 	      update_stmt (user);
5670 	      reassoc_remove_stmt (&gsi);
5671 	      release_defs (feed);
5672 	      plus_negates.safe_push (gimple_assign_lhs (user));
5673 	    }
5674 	  else
5675 	    {
5676 	      /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
5677 		 getting rid of one operation.  */
5678 	      tree rhs1 = gimple_assign_rhs1 (user);
5679 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5680 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
5681 	      update_stmt (gsi_stmt (gsi));
5682 	    }
5683 	}
5684     }
5685 }
5686 
5687 /* Returns true if OP is of a type for which we can do reassociation.
5688    That is for integral or non-saturating fixed-point types, and for
5689    floating point type when associative-math is enabled.  */
5690 
5691 static bool
can_reassociate_p(tree op)5692 can_reassociate_p (tree op)
5693 {
5694   tree type = TREE_TYPE (op);
5695   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5696     return false;
5697   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5698       || NON_SAT_FIXED_POINT_TYPE_P (type)
5699       || (flag_associative_math && FLOAT_TYPE_P (type)))
5700     return true;
5701   return false;
5702 }
5703 
5704 /* Break up subtract operations in block BB.
5705 
5706    We do this top down because we don't know whether the subtract is
5707    part of a possible chain of reassociation except at the top.
5708 
5709    IE given
5710    d = f + g
5711    c = a + e
5712    b = c - d
5713    q = b - r
5714    k = t - q
5715 
5716    we want to break up k = t - q, but we won't until we've transformed q
5717    = b - r, which won't be broken up until we transform b = c - d.
5718 
5719    En passant, clear the GIMPLE visited flag on every statement
5720    and set UIDs within each basic block.  */
5721 
5722 static void
break_up_subtract_bb(basic_block bb)5723 break_up_subtract_bb (basic_block bb)
5724 {
5725   gimple_stmt_iterator gsi;
5726   basic_block son;
5727   unsigned int uid = 1;
5728 
5729   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5730     {
5731       gimple *stmt = gsi_stmt (gsi);
5732       gimple_set_visited (stmt, false);
5733       gimple_set_uid (stmt, uid++);
5734 
5735       if (!is_gimple_assign (stmt)
5736 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
5737 	continue;
5738 
5739       /* Look for simple gimple subtract operations.  */
5740       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5741 	{
5742 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5743 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5744 	    continue;
5745 
5746 	  /* Check for a subtract used only in an addition.  If this
5747 	     is the case, transform it into add of a negate for better
5748 	     reassociation.  IE transform C = A-B into C = A + -B if C
5749 	     is only used in an addition.  */
5750 	  if (should_break_up_subtract (stmt))
5751 	    break_up_subtract (stmt, &gsi);
5752 	}
5753       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5754 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5755 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5756     }
5757   for (son = first_dom_son (CDI_DOMINATORS, bb);
5758        son;
5759        son = next_dom_son (CDI_DOMINATORS, son))
5760     break_up_subtract_bb (son);
5761 }
5762 
5763 /* Used for repeated factor analysis.  */
5764 struct repeat_factor
5765 {
5766   /* An SSA name that occurs in a multiply chain.  */
5767   tree factor;
5768 
5769   /* Cached rank of the factor.  */
5770   unsigned rank;
5771 
5772   /* Number of occurrences of the factor in the chain.  */
5773   HOST_WIDE_INT count;
5774 
5775   /* An SSA name representing the product of this factor and
5776      all factors appearing later in the repeated factor vector.  */
5777   tree repr;
5778 };
5779 
5780 
5781 static vec<repeat_factor> repeat_factor_vec;
5782 
5783 /* Used for sorting the repeat factor vector.  Sort primarily by
5784    ascending occurrence count, secondarily by descending rank.  */
5785 
5786 static int
compare_repeat_factors(const void * x1,const void * x2)5787 compare_repeat_factors (const void *x1, const void *x2)
5788 {
5789   const repeat_factor *rf1 = (const repeat_factor *) x1;
5790   const repeat_factor *rf2 = (const repeat_factor *) x2;
5791 
5792   if (rf1->count != rf2->count)
5793     return rf1->count - rf2->count;
5794 
5795   return rf2->rank - rf1->rank;
5796 }
5797 
5798 /* Look for repeated operands in OPS in the multiply tree rooted at
5799    STMT.  Replace them with an optimal sequence of multiplies and powi
5800    builtin calls, and remove the used operands from OPS.  Return an
5801    SSA name representing the value of the replacement sequence.  */
5802 
5803 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5804 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5805 {
5806   unsigned i, j, vec_len;
5807   int ii;
5808   operand_entry *oe;
5809   repeat_factor *rf1, *rf2;
5810   repeat_factor rfnew;
5811   tree result = NULL_TREE;
5812   tree target_ssa, iter_result;
5813   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5814   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5815   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5816   gimple *mul_stmt, *pow_stmt;
5817 
5818   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5819      target.  */
5820   if (!powi_fndecl)
5821     return NULL_TREE;
5822 
5823   /* Allocate the repeated factor vector.  */
5824   repeat_factor_vec.create (10);
5825 
5826   /* Scan the OPS vector for all SSA names in the product and build
5827      up a vector of occurrence counts for each factor.  */
5828   FOR_EACH_VEC_ELT (*ops, i, oe)
5829     {
5830       if (TREE_CODE (oe->op) == SSA_NAME)
5831 	{
5832 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5833 	    {
5834 	      if (rf1->factor == oe->op)
5835 		{
5836 		  rf1->count += oe->count;
5837 		  break;
5838 		}
5839 	    }
5840 
5841 	  if (j >= repeat_factor_vec.length ())
5842 	    {
5843 	      rfnew.factor = oe->op;
5844 	      rfnew.rank = oe->rank;
5845 	      rfnew.count = oe->count;
5846 	      rfnew.repr = NULL_TREE;
5847 	      repeat_factor_vec.safe_push (rfnew);
5848 	    }
5849 	}
5850     }
5851 
5852   /* Sort the repeated factor vector by (a) increasing occurrence count,
5853      and (b) decreasing rank.  */
5854   repeat_factor_vec.qsort (compare_repeat_factors);
5855 
5856   /* It is generally best to combine as many base factors as possible
5857      into a product before applying __builtin_powi to the result.
5858      However, the sort order chosen for the repeated factor vector
5859      allows us to cache partial results for the product of the base
5860      factors for subsequent use.  When we already have a cached partial
5861      result from a previous iteration, it is best to make use of it
5862      before looking for another __builtin_pow opportunity.
5863 
5864      As an example, consider x * x * y * y * y * z * z * z * z.
5865      We want to first compose the product x * y * z, raise it to the
5866      second power, then multiply this by y * z, and finally multiply
5867      by z.  This can be done in 5 multiplies provided we cache y * z
5868      for use in both expressions:
5869 
5870         t1 = y * z
5871 	t2 = t1 * x
5872 	t3 = t2 * t2
5873 	t4 = t1 * t3
5874 	result = t4 * z
5875 
5876      If we instead ignored the cached y * z and first multiplied by
5877      the __builtin_pow opportunity z * z, we would get the inferior:
5878 
5879         t1 = y * z
5880 	t2 = t1 * x
5881 	t3 = t2 * t2
5882 	t4 = z * z
5883 	t5 = t3 * t4
5884         result = t5 * y  */
5885 
5886   vec_len = repeat_factor_vec.length ();
5887 
5888   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5889   while (true)
5890     {
5891       HOST_WIDE_INT power;
5892 
5893       /* First look for the largest cached product of factors from
5894 	 preceding iterations.  If found, create a builtin_powi for
5895 	 it if the minimum occurrence count for its factors is at
5896 	 least 2, or just use this cached product as our next
5897 	 multiplicand if the minimum occurrence count is 1.  */
5898       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5899 	{
5900 	  if (rf1->repr && rf1->count > 0)
5901 	    break;
5902 	}
5903 
5904       if (j < vec_len)
5905 	{
5906 	  power = rf1->count;
5907 
5908 	  if (power == 1)
5909 	    {
5910 	      iter_result = rf1->repr;
5911 
5912 	      if (dump_file && (dump_flags & TDF_DETAILS))
5913 		{
5914 		  unsigned elt;
5915 		  repeat_factor *rf;
5916 		  fputs ("Multiplying by cached product ", dump_file);
5917 		  for (elt = j; elt < vec_len; elt++)
5918 		    {
5919 		      rf = &repeat_factor_vec[elt];
5920 		      print_generic_expr (dump_file, rf->factor);
5921 		      if (elt < vec_len - 1)
5922 			fputs (" * ", dump_file);
5923 		    }
5924 		  fputs ("\n", dump_file);
5925 		}
5926 	    }
5927 	  else
5928 	    {
5929 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5930 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5931 					    build_int_cst (integer_type_node,
5932 							   power));
5933 	      gimple_call_set_lhs (pow_stmt, iter_result);
5934 	      gimple_set_location (pow_stmt, gimple_location (stmt));
5935 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
5936 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5937 
5938 	      if (dump_file && (dump_flags & TDF_DETAILS))
5939 		{
5940 		  unsigned elt;
5941 		  repeat_factor *rf;
5942 		  fputs ("Building __builtin_pow call for cached product (",
5943 			 dump_file);
5944 		  for (elt = j; elt < vec_len; elt++)
5945 		    {
5946 		      rf = &repeat_factor_vec[elt];
5947 		      print_generic_expr (dump_file, rf->factor);
5948 		      if (elt < vec_len - 1)
5949 			fputs (" * ", dump_file);
5950 		    }
5951 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5952 			   power);
5953 		}
5954 	    }
5955 	}
5956       else
5957 	{
5958 	  /* Otherwise, find the first factor in the repeated factor
5959 	     vector whose occurrence count is at least 2.  If no such
5960 	     factor exists, there are no builtin_powi opportunities
5961 	     remaining.  */
5962 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5963 	    {
5964 	      if (rf1->count >= 2)
5965 		break;
5966 	    }
5967 
5968 	  if (j >= vec_len)
5969 	    break;
5970 
5971 	  power = rf1->count;
5972 
5973 	  if (dump_file && (dump_flags & TDF_DETAILS))
5974 	    {
5975 	      unsigned elt;
5976 	      repeat_factor *rf;
5977 	      fputs ("Building __builtin_pow call for (", dump_file);
5978 	      for (elt = j; elt < vec_len; elt++)
5979 		{
5980 		  rf = &repeat_factor_vec[elt];
5981 		  print_generic_expr (dump_file, rf->factor);
5982 		  if (elt < vec_len - 1)
5983 		    fputs (" * ", dump_file);
5984 		}
5985 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5986 	    }
5987 
5988 	  reassociate_stats.pows_created++;
5989 
5990 	  /* Visit each element of the vector in reverse order (so that
5991 	     high-occurrence elements are visited first, and within the
5992 	     same occurrence count, lower-ranked elements are visited
5993 	     first).  Form a linear product of all elements in this order
5994 	     whose occurrencce count is at least that of element J.
5995 	     Record the SSA name representing the product of each element
5996 	     with all subsequent elements in the vector.  */
5997 	  if (j == vec_len - 1)
5998 	    rf1->repr = rf1->factor;
5999 	  else
6000 	    {
6001 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
6002 		{
6003 		  tree op1, op2;
6004 
6005 		  rf1 = &repeat_factor_vec[ii];
6006 		  rf2 = &repeat_factor_vec[ii + 1];
6007 
6008 		  /* Init the last factor's representative to be itself.  */
6009 		  if (!rf2->repr)
6010 		    rf2->repr = rf2->factor;
6011 
6012 		  op1 = rf1->factor;
6013 		  op2 = rf2->repr;
6014 
6015 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6016 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6017 						  op1, op2);
6018 		  gimple_set_location (mul_stmt, gimple_location (stmt));
6019 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6020 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6021 		  rf1->repr = target_ssa;
6022 
6023 		  /* Don't reprocess the multiply we just introduced.  */
6024 		  gimple_set_visited (mul_stmt, true);
6025 		}
6026 	    }
6027 
6028 	  /* Form a call to __builtin_powi for the maximum product
6029 	     just formed, raised to the power obtained earlier.  */
6030 	  rf1 = &repeat_factor_vec[j];
6031 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6032 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6033 					build_int_cst (integer_type_node,
6034 						       power));
6035 	  gimple_call_set_lhs (pow_stmt, iter_result);
6036 	  gimple_set_location (pow_stmt, gimple_location (stmt));
6037 	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
6038 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6039 	}
6040 
6041       /* If we previously formed at least one other builtin_powi call,
6042 	 form the product of this one and those others.  */
6043       if (result)
6044 	{
6045 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6046 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6047 					  result, iter_result);
6048 	  gimple_set_location (mul_stmt, gimple_location (stmt));
6049 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6050 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6051 	  gimple_set_visited (mul_stmt, true);
6052 	  result = new_result;
6053 	}
6054       else
6055 	result = iter_result;
6056 
6057       /* Decrement the occurrence count of each element in the product
6058 	 by the count found above, and remove this many copies of each
6059 	 factor from OPS.  */
6060       for (i = j; i < vec_len; i++)
6061 	{
6062 	  unsigned k = power;
6063 	  unsigned n;
6064 
6065 	  rf1 = &repeat_factor_vec[i];
6066 	  rf1->count -= power;
6067 
6068 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6069 	    {
6070 	      if (oe->op == rf1->factor)
6071 		{
6072 		  if (oe->count <= k)
6073 		    {
6074 		      ops->ordered_remove (n);
6075 		      k -= oe->count;
6076 
6077 		      if (k == 0)
6078 			break;
6079 		    }
6080 		  else
6081 		    {
6082 		      oe->count -= k;
6083 		      break;
6084 		    }
6085 		}
6086 	    }
6087 	}
6088     }
6089 
6090   /* At this point all elements in the repeated factor vector have a
6091      remaining occurrence count of 0 or 1, and those with a count of 1
6092      don't have cached representatives.  Re-sort the ops vector and
6093      clean up.  */
6094   ops->qsort (sort_by_operand_rank);
6095   repeat_factor_vec.release ();
6096 
6097   /* Return the final product computed herein.  Note that there may
6098      still be some elements with single occurrence count left in OPS;
6099      those will be handled by the normal reassociation logic.  */
6100   return result;
6101 }
6102 
6103 /* Attempt to optimize
6104    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6105    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6106 
6107 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6108 attempt_builtin_copysign (vec<operand_entry *> *ops)
6109 {
6110   operand_entry *oe;
6111   unsigned int i;
6112   unsigned int length = ops->length ();
6113   tree cst = ops->last ()->op;
6114 
6115   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6116     return;
6117 
6118   FOR_EACH_VEC_ELT (*ops, i, oe)
6119     {
6120       if (TREE_CODE (oe->op) == SSA_NAME
6121 	  && has_single_use (oe->op))
6122 	{
6123 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6124 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6125 	    {
6126 	      tree arg0, arg1;
6127 	      switch (gimple_call_combined_fn (old_call))
6128 		{
6129 		CASE_CFN_COPYSIGN:
6130 		CASE_CFN_COPYSIGN_FN:
6131 		  arg0 = gimple_call_arg (old_call, 0);
6132 		  arg1 = gimple_call_arg (old_call, 1);
6133 		  /* The first argument of copysign must be a constant,
6134 		     otherwise there's nothing to do.  */
6135 		  if (TREE_CODE (arg0) == REAL_CST)
6136 		    {
6137 		      tree type = TREE_TYPE (arg0);
6138 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6139 		      /* If we couldn't fold to a single constant, skip it.
6140 			 That happens e.g. for inexact multiplication when
6141 			 -frounding-math.  */
6142 		      if (mul == NULL_TREE)
6143 			break;
6144 		      /* Instead of adjusting OLD_CALL, let's build a new
6145 			 call to not leak the LHS and prevent keeping bogus
6146 			 debug statements.  DCE will clean up the old call.  */
6147 		      gcall *new_call;
6148 		      if (gimple_call_internal_p (old_call))
6149 			new_call = gimple_build_call_internal
6150 			  (IFN_COPYSIGN, 2, mul, arg1);
6151 		      else
6152 			new_call = gimple_build_call
6153 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
6154 		      tree lhs = make_ssa_name (type);
6155 		      gimple_call_set_lhs (new_call, lhs);
6156 		      gimple_set_location (new_call,
6157 					   gimple_location (old_call));
6158 		      insert_stmt_after (new_call, old_call);
6159 		      /* We've used the constant, get rid of it.  */
6160 		      ops->pop ();
6161 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6162 		      /* Handle the CST1 < 0 case by negating the result.  */
6163 		      if (cst1_neg)
6164 			{
6165 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6166 			  gimple *negate_stmt
6167 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6168 			  insert_stmt_after (negate_stmt, new_call);
6169 			  oe->op = negrhs;
6170 			}
6171 		      else
6172 			oe->op = lhs;
6173 		      if (dump_file && (dump_flags & TDF_DETAILS))
6174 			{
6175 			  fprintf (dump_file, "Optimizing copysign: ");
6176 			  print_generic_expr (dump_file, cst);
6177 			  fprintf (dump_file, " * COPYSIGN (");
6178 			  print_generic_expr (dump_file, arg0);
6179 			  fprintf (dump_file, ", ");
6180 			  print_generic_expr (dump_file, arg1);
6181 			  fprintf (dump_file, ") into %sCOPYSIGN (",
6182 				   cst1_neg ? "-" : "");
6183 			  print_generic_expr (dump_file, mul);
6184 			  fprintf (dump_file, ", ");
6185 			  print_generic_expr (dump_file, arg1);
6186 			  fprintf (dump_file, "\n");
6187 			}
6188 		      return;
6189 		    }
6190 		  break;
6191 		default:
6192 		  break;
6193 		}
6194 	    }
6195 	}
6196     }
6197 }
6198 
6199 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6200 
6201 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6202 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6203 {
6204   tree rhs1;
6205 
6206   if (dump_file && (dump_flags & TDF_DETAILS))
6207     {
6208       fprintf (dump_file, "Transforming ");
6209       print_gimple_stmt (dump_file, stmt, 0);
6210     }
6211 
6212   rhs1 = gimple_assign_rhs1 (stmt);
6213   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6214   update_stmt (stmt);
6215   remove_visited_stmt_chain (rhs1);
6216 
6217   if (dump_file && (dump_flags & TDF_DETAILS))
6218     {
6219       fprintf (dump_file, " into ");
6220       print_gimple_stmt (dump_file, stmt, 0);
6221     }
6222 }
6223 
6224 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6225 
6226 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6227 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6228 			    tree rhs1, tree rhs2)
6229 {
6230   if (dump_file && (dump_flags & TDF_DETAILS))
6231     {
6232       fprintf (dump_file, "Transforming ");
6233       print_gimple_stmt (dump_file, stmt, 0);
6234     }
6235 
6236   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6237   update_stmt (gsi_stmt (*gsi));
6238   remove_visited_stmt_chain (rhs1);
6239 
6240   if (dump_file && (dump_flags & TDF_DETAILS))
6241     {
6242       fprintf (dump_file, " into ");
6243       print_gimple_stmt (dump_file, stmt, 0);
6244     }
6245 }
6246 
6247 /* Reassociate expressions in basic block BB and its post-dominator as
6248    children.
6249 
6250    Bubble up return status from maybe_optimize_range_tests.  */
6251 
6252 static bool
reassociate_bb(basic_block bb)6253 reassociate_bb (basic_block bb)
6254 {
6255   gimple_stmt_iterator gsi;
6256   basic_block son;
6257   gimple *stmt = last_stmt (bb);
6258   bool cfg_cleanup_needed = false;
6259 
6260   if (stmt && !gimple_visited_p (stmt))
6261     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6262 
6263   bool do_prev = false;
6264   for (gsi = gsi_last_bb (bb);
6265        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6266     {
6267       do_prev = true;
6268       stmt = gsi_stmt (gsi);
6269 
6270       if (is_gimple_assign (stmt)
6271 	  && !stmt_could_throw_p (cfun, stmt))
6272 	{
6273 	  tree lhs, rhs1, rhs2;
6274 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6275 
6276 	  /* If this was part of an already processed statement,
6277 	     we don't need to touch it again. */
6278 	  if (gimple_visited_p (stmt))
6279 	    {
6280 	      /* This statement might have become dead because of previous
6281 		 reassociations.  */
6282 	      if (has_zero_uses (gimple_get_lhs (stmt)))
6283 		{
6284 		  reassoc_remove_stmt (&gsi);
6285 		  release_defs (stmt);
6286 		  /* We might end up removing the last stmt above which
6287 		     places the iterator to the end of the sequence.
6288 		     Reset it to the last stmt in this case and make sure
6289 		     we don't do gsi_prev in that case.  */
6290 		  if (gsi_end_p (gsi))
6291 		    {
6292 		      gsi = gsi_last_bb (bb);
6293 		      do_prev = false;
6294 		    }
6295 		}
6296 	      continue;
6297 	    }
6298 
6299 	  /* If this is not a gimple binary expression, there is
6300 	     nothing for us to do with it.  */
6301 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6302 	    continue;
6303 
6304 	  lhs = gimple_assign_lhs (stmt);
6305 	  rhs1 = gimple_assign_rhs1 (stmt);
6306 	  rhs2 = gimple_assign_rhs2 (stmt);
6307 
6308 	  /* For non-bit or min/max operations we can't associate
6309 	     all types.  Verify that here.  */
6310 	  if (rhs_code != BIT_IOR_EXPR
6311 	      && rhs_code != BIT_AND_EXPR
6312 	      && rhs_code != BIT_XOR_EXPR
6313 	      && rhs_code != MIN_EXPR
6314 	      && rhs_code != MAX_EXPR
6315 	      && (!can_reassociate_p (lhs)
6316 		  || !can_reassociate_p (rhs1)
6317 		  || !can_reassociate_p (rhs2)))
6318 	    continue;
6319 
6320 	  if (associative_tree_code (rhs_code))
6321 	    {
6322 	      auto_vec<operand_entry *> ops;
6323 	      tree powi_result = NULL_TREE;
6324 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6325 
6326 	      /* There may be no immediate uses left by the time we
6327 		 get here because we may have eliminated them all.  */
6328 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6329 		continue;
6330 
6331 	      gimple_set_visited (stmt, true);
6332 	      linearize_expr_tree (&ops, stmt, true, true);
6333 	      ops.qsort (sort_by_operand_rank);
6334 	      int orig_len = ops.length ();
6335 	      optimize_ops_list (rhs_code, &ops);
6336 	      if (undistribute_ops_list (rhs_code, &ops,
6337 					 loop_containing_stmt (stmt)))
6338 		{
6339 		  ops.qsort (sort_by_operand_rank);
6340 		  optimize_ops_list (rhs_code, &ops);
6341 		}
6342 	      if (undistribute_bitref_for_vector (rhs_code, &ops,
6343 						  loop_containing_stmt (stmt)))
6344 		{
6345 		  ops.qsort (sort_by_operand_rank);
6346 		  optimize_ops_list (rhs_code, &ops);
6347 		}
6348 	      if (rhs_code == PLUS_EXPR
6349 		  && transform_add_to_multiply (&ops))
6350 		ops.qsort (sort_by_operand_rank);
6351 
6352 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6353 		{
6354 		  if (is_vector)
6355 		    optimize_vec_cond_expr (rhs_code, &ops);
6356 		  else
6357 		    optimize_range_tests (rhs_code, &ops, NULL);
6358 	        }
6359 
6360 	      if (rhs_code == MULT_EXPR && !is_vector)
6361 	        {
6362 		  attempt_builtin_copysign (&ops);
6363 
6364 		  if (reassoc_insert_powi_p
6365 		      && flag_unsafe_math_optimizations)
6366 		    powi_result = attempt_builtin_powi (stmt, &ops);
6367 		}
6368 
6369 	      operand_entry *last;
6370 	      bool negate_result = false;
6371 	      if (ops.length () > 1
6372 		  && rhs_code == MULT_EXPR)
6373 		{
6374 		  last = ops.last ();
6375 		  if ((integer_minus_onep (last->op)
6376 		       || real_minus_onep (last->op))
6377 		      && !HONOR_SNANS (TREE_TYPE (lhs))
6378 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6379 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6380 		    {
6381 		      ops.pop ();
6382 		      negate_result = true;
6383 		    }
6384 		}
6385 
6386 	      tree new_lhs = lhs;
6387 	      /* If the operand vector is now empty, all operands were
6388 		 consumed by the __builtin_powi optimization.  */
6389 	      if (ops.length () == 0)
6390 		transform_stmt_to_copy (&gsi, stmt, powi_result);
6391 	      else if (ops.length () == 1)
6392 		{
6393 		  tree last_op = ops.last ()->op;
6394 
6395 		  /* If the stmt that defines operand has to be inserted, insert it
6396 		     before the use.  */
6397 		  if (ops.last ()->stmt_to_insert)
6398 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6399 		  if (powi_result)
6400 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
6401 						powi_result);
6402 		  else
6403 		    transform_stmt_to_copy (&gsi, stmt, last_op);
6404 		}
6405 	      else
6406 		{
6407 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6408 		  int ops_num = ops.length ();
6409 		  int width;
6410 
6411 		  /* For binary bit operations, if there are at least 3
6412 		     operands and the last operand in OPS is a constant,
6413 		     move it to the front.  This helps ensure that we generate
6414 		     (X & Y) & C rather than (X & C) & Y.  The former will
6415 		     often match a canonical bit test when we get to RTL.  */
6416 		  if (ops.length () > 2
6417 		      && (rhs_code == BIT_AND_EXPR
6418 		          || rhs_code == BIT_IOR_EXPR
6419 		          || rhs_code == BIT_XOR_EXPR)
6420 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6421 		    std::swap (*ops[0], *ops[ops_num - 1]);
6422 
6423 		  /* Only rewrite the expression tree to parallel in the
6424 		     last reassoc pass to avoid useless work back-and-forth
6425 		     with initial linearization.  */
6426 		  if (!reassoc_insert_powi_p
6427 		      && ops.length () > 3
6428 		      && (width = get_reassociation_width (ops_num, rhs_code,
6429 							   mode)) > 1)
6430 		    {
6431 		      if (dump_file && (dump_flags & TDF_DETAILS))
6432 			fprintf (dump_file,
6433 				 "Width = %d was chosen for reassociation\n",
6434 				 width);
6435 		      rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6436 						  width, ops);
6437 		    }
6438 		  else
6439                     {
6440                       /* When there are three operands left, we want
6441                          to make sure the ones that get the double
6442                          binary op are chosen wisely.  */
6443                       int len = ops.length ();
6444                       if (len >= 3)
6445                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6446 
6447 		      new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6448 						   powi_result != NULL
6449 						   || negate_result,
6450 						   len != orig_len);
6451                     }
6452 
6453 		  /* If we combined some repeated factors into a
6454 		     __builtin_powi call, multiply that result by the
6455 		     reassociated operands.  */
6456 		  if (powi_result)
6457 		    {
6458 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6459 		      tree type = TREE_TYPE (lhs);
6460 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6461 							    "reassocpow");
6462 		      gimple_set_lhs (lhs_stmt, target_ssa);
6463 		      update_stmt (lhs_stmt);
6464 		      if (lhs != new_lhs)
6465 			{
6466 			  target_ssa = new_lhs;
6467 			  new_lhs = lhs;
6468 			}
6469 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6470 						      powi_result, target_ssa);
6471 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6472 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6473 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6474 		    }
6475 		}
6476 
6477 	      if (negate_result)
6478 		{
6479 		  stmt = SSA_NAME_DEF_STMT (lhs);
6480 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6481 		  gimple_set_lhs (stmt, tmp);
6482 		  if (lhs != new_lhs)
6483 		    tmp = new_lhs;
6484 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6485 							   tmp);
6486 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6487 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6488 		  update_stmt (stmt);
6489 		}
6490 	    }
6491 	}
6492     }
6493   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6494        son;
6495        son = next_dom_son (CDI_POST_DOMINATORS, son))
6496     cfg_cleanup_needed |= reassociate_bb (son);
6497 
6498   return cfg_cleanup_needed;
6499 }
6500 
6501 /* Add jumps around shifts for range tests turned into bit tests.
6502    For each SSA_NAME VAR we have code like:
6503    VAR = ...; // final stmt of range comparison
6504    // bit test here...;
6505    OTHERVAR = ...; // final stmt of the bit test sequence
6506    RES = VAR | OTHERVAR;
6507    Turn the above into:
6508    VAR = ...;
6509    if (VAR != 0)
6510      goto <l3>;
6511    else
6512      goto <l2>;
6513    <l2>:
6514    // bit test here...;
6515    OTHERVAR = ...;
6516    <l3>:
6517    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6518 
6519 static void
branch_fixup(void)6520 branch_fixup (void)
6521 {
6522   tree var;
6523   unsigned int i;
6524 
6525   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6526     {
6527       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6528       gimple *use_stmt;
6529       use_operand_p use;
6530       bool ok = single_imm_use (var, &use, &use_stmt);
6531       gcc_assert (ok
6532 		  && is_gimple_assign (use_stmt)
6533 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6534 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6535 
6536       basic_block cond_bb = gimple_bb (def_stmt);
6537       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6538       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6539 
6540       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6541       gimple *g = gimple_build_cond (NE_EXPR, var,
6542 				     build_zero_cst (TREE_TYPE (var)),
6543 				     NULL_TREE, NULL_TREE);
6544       location_t loc = gimple_location (use_stmt);
6545       gimple_set_location (g, loc);
6546       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6547 
6548       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6549       etrue->probability = profile_probability::even ();
6550       edge efalse = find_edge (cond_bb, then_bb);
6551       efalse->flags = EDGE_FALSE_VALUE;
6552       efalse->probability -= etrue->probability;
6553       then_bb->count -= etrue->count ();
6554 
6555       tree othervar = NULL_TREE;
6556       if (gimple_assign_rhs1 (use_stmt) == var)
6557 	othervar = gimple_assign_rhs2 (use_stmt);
6558       else if (gimple_assign_rhs2 (use_stmt) == var)
6559 	othervar = gimple_assign_rhs1 (use_stmt);
6560       else
6561 	gcc_unreachable ();
6562       tree lhs = gimple_assign_lhs (use_stmt);
6563       gphi *phi = create_phi_node (lhs, merge_bb);
6564       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6565       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6566       gsi = gsi_for_stmt (use_stmt);
6567       gsi_remove (&gsi, true);
6568 
6569       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6570       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6571     }
6572   reassoc_branch_fixups.release ();
6573 }
6574 
6575 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6576 void debug_ops_vector (vec<operand_entry *> ops);
6577 
6578 /* Dump the operand entry vector OPS to FILE.  */
6579 
6580 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6581 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6582 {
6583   operand_entry *oe;
6584   unsigned int i;
6585 
6586   FOR_EACH_VEC_ELT (ops, i, oe)
6587     {
6588       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6589       print_generic_expr (file, oe->op);
6590       fprintf (file, "\n");
6591     }
6592 }
6593 
6594 /* Dump the operand entry vector OPS to STDERR.  */
6595 
6596 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6597 debug_ops_vector (vec<operand_entry *> ops)
6598 {
6599   dump_ops_vector (stderr, ops);
6600 }
6601 
6602 /* Bubble up return status from reassociate_bb.  */
6603 
6604 static bool
do_reassoc(void)6605 do_reassoc (void)
6606 {
6607   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6608   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6609 }
6610 
6611 /* Initialize the reassociation pass.  */
6612 
6613 static void
init_reassoc(void)6614 init_reassoc (void)
6615 {
6616   int i;
6617   int64_t rank = 2;
6618   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6619 
6620   /* Find the loops, so that we can prevent moving calculations in
6621      them.  */
6622   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6623 
6624   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6625 
6626   next_operand_entry_id = 0;
6627 
6628   /* Reverse RPO (Reverse Post Order) will give us something where
6629      deeper loops come later.  */
6630   pre_and_rev_post_order_compute (NULL, bbs, false);
6631   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6632   operand_rank = new hash_map<tree, int64_t>;
6633 
6634   /* Give each default definition a distinct rank.  This includes
6635      parameters and the static chain.  Walk backwards over all
6636      SSA names so that we get proper rank ordering according
6637      to tree_swap_operands_p.  */
6638   for (i = num_ssa_names - 1; i > 0; --i)
6639     {
6640       tree name = ssa_name (i);
6641       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6642 	insert_operand_rank (name, ++rank);
6643     }
6644 
6645   /* Set up rank for each BB  */
6646   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6647     bb_rank[bbs[i]] = ++rank << 16;
6648 
6649   free (bbs);
6650   calculate_dominance_info (CDI_POST_DOMINATORS);
6651   plus_negates = vNULL;
6652 }
6653 
6654 /* Cleanup after the reassociation pass, and print stats if
6655    requested.  */
6656 
6657 static void
fini_reassoc(void)6658 fini_reassoc (void)
6659 {
6660   statistics_counter_event (cfun, "Linearized",
6661 			    reassociate_stats.linearized);
6662   statistics_counter_event (cfun, "Constants eliminated",
6663 			    reassociate_stats.constants_eliminated);
6664   statistics_counter_event (cfun, "Ops eliminated",
6665 			    reassociate_stats.ops_eliminated);
6666   statistics_counter_event (cfun, "Statements rewritten",
6667 			    reassociate_stats.rewritten);
6668   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6669 			    reassociate_stats.pows_encountered);
6670   statistics_counter_event (cfun, "Built-in powi calls created",
6671 			    reassociate_stats.pows_created);
6672 
6673   delete operand_rank;
6674   operand_entry_pool.release ();
6675   free (bb_rank);
6676   plus_negates.release ();
6677   free_dominance_info (CDI_POST_DOMINATORS);
6678   loop_optimizer_finalize ();
6679 }
6680 
6681 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6682    insertion of __builtin_powi calls.
6683 
6684    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6685    optimization of a gimple conditional.  Otherwise returns zero.  */
6686 
6687 static unsigned int
execute_reassoc(bool insert_powi_p)6688 execute_reassoc (bool insert_powi_p)
6689 {
6690   reassoc_insert_powi_p = insert_powi_p;
6691 
6692   init_reassoc ();
6693 
6694   bool cfg_cleanup_needed;
6695   cfg_cleanup_needed = do_reassoc ();
6696   repropagate_negates ();
6697   branch_fixup ();
6698 
6699   fini_reassoc ();
6700   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6701 }
6702 
6703 namespace {
6704 
6705 const pass_data pass_data_reassoc =
6706 {
6707   GIMPLE_PASS, /* type */
6708   "reassoc", /* name */
6709   OPTGROUP_NONE, /* optinfo_flags */
6710   TV_TREE_REASSOC, /* tv_id */
6711   ( PROP_cfg | PROP_ssa ), /* properties_required */
6712   0, /* properties_provided */
6713   0, /* properties_destroyed */
6714   0, /* todo_flags_start */
6715   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6716 };
6717 
6718 class pass_reassoc : public gimple_opt_pass
6719 {
6720 public:
pass_reassoc(gcc::context * ctxt)6721   pass_reassoc (gcc::context *ctxt)
6722     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6723   {}
6724 
6725   /* opt_pass methods: */
clone()6726   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6727   void set_pass_param (unsigned int n, bool param)
6728     {
6729       gcc_assert (n == 0);
6730       insert_powi_p = param;
6731     }
gate(function *)6732   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6733   virtual unsigned int execute (function *)
6734     { return execute_reassoc (insert_powi_p); }
6735 
6736  private:
6737   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6738      point 3a in the pass header comment.  */
6739   bool insert_powi_p;
6740 }; // class pass_reassoc
6741 
6742 } // anon namespace
6743 
6744 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6745 make_pass_reassoc (gcc::context *ctxt)
6746 {
6747   return new pass_reassoc (ctxt);
6748 }
6749