xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-reassoc.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Reassociation for trees.
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 
58 /*  This is a simple global reassociation pass.  It is, in part, based
59     on the LLVM pass of the same name (They do some things more/less
60     than we do, in different orders, etc).
61 
62     It consists of five steps:
63 
64     1. Breaking up subtract operations into addition + negate, where
65     it would promote the reassociation of adds.
66 
67     2. Left linearization of the expression trees, so that (A+B)+(C+D)
68     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69     During linearization, we place the operands of the binary
70     expressions into a vector of operand_entry_*
71 
72     3. Optimization of the operand lists, eliminating things like a +
73     -a, a & a, etc.
74 
75     3a. Combine repeated factors with the same occurrence counts
76     into a __builtin_powi call that will later be optimized into
77     an optimal number of multiplies.
78 
79     4. Rewrite the expression trees we linearized and optimized so
80     they are in proper rank order.
81 
82     5. Repropagate negates, as nothing else will clean it up ATM.
83 
84     A bit of theory on #4, since nobody seems to write anything down
85     about why it makes sense to do it the way they do it:
86 
87     We could do this much nicer theoretically, but don't (for reasons
88     explained after how to do it theoretically nice :P).
89 
90     In order to promote the most redundancy elimination, you want
91     binary expressions whose operands are the same rank (or
92     preferably, the same value) exposed to the redundancy eliminator,
93     for possible elimination.
94 
95     So the way to do this if we really cared, is to build the new op
96     tree from the leaves to the roots, merging as you go, and putting the
97     new op on the end of the worklist, until you are left with one
98     thing on the worklist.
99 
100     IE if you have to rewrite the following set of operands (listed with
101     rank in parentheses), with opcode PLUS_EXPR:
102 
103     a (1),  b (1),  c (1),  d (2), e (2)
104 
105 
106     We start with our merge worklist empty, and the ops list with all of
107     those on it.
108 
109     You want to first merge all leaves of the same rank, as much as
110     possible.
111 
112     So first build a binary op of
113 
114     mergetmp = a + b, and put "mergetmp" on the merge worklist.
115 
116     Because there is no three operand form of PLUS_EXPR, c is not going to
117     be exposed to redundancy elimination as a rank 1 operand.
118 
119     So you might as well throw it on the merge worklist (you could also
120     consider it to now be a rank two operand, and merge it with d and e,
121     but in this case, you then have evicted e from a binary op. So at
122     least in this situation, you can't win.)
123 
124     Then build a binary op of d + e
125     mergetmp2 = d + e
126 
127     and put mergetmp2 on the merge worklist.
128 
129     so merge worklist = {mergetmp, c, mergetmp2}
130 
131     Continue building binary ops of these operations until you have only
132     one operation left on the worklist.
133 
134     So we have
135 
136     build binary op
137     mergetmp3 = mergetmp + c
138 
139     worklist = {mergetmp2, mergetmp3}
140 
141     mergetmp4 = mergetmp2 + mergetmp3
142 
143     worklist = {mergetmp4}
144 
145     because we have one operation left, we can now just set the original
146     statement equal to the result of that operation.
147 
148     This will at least expose a + b  and d + e to redundancy elimination
149     as binary operations.
150 
151     For extra points, you can reuse the old statements to build the
152     mergetmps, since you shouldn't run out.
153 
154     So why don't we do this?
155 
156     Because it's expensive, and rarely will help.  Most trees we are
157     reassociating have 3 or less ops.  If they have 2 ops, they already
158     will be written into a nice single binary op.  If you have 3 ops, a
159     single simple check suffices to tell you whether the first two are of the
160     same rank.  If so, you know to order it
161 
162     mergetmp = op1 + op2
163     newstmt = mergetmp + op3
164 
165     instead of
166     mergetmp = op2 + op3
167     newstmt = mergetmp + op1
168 
169     If all three are of the same rank, you can't expose them all in a
170     single binary operator anyway, so the above is *still* the best you
171     can do.
172 
173     Thus, this is what we do.  When we have three ops left, we check to see
174     what order to put them in, and call it a day.  As a nod to vector sum
175     reduction, we check if any of the ops are really a phi node that is a
176     destructive update for the associating op, and keep the destructive
177     update together for vector sum reduction recognition.  */
178 
179 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
180    point 3a in the pass header comment.  */
181 static bool reassoc_insert_powi_p;
182 
183 /* Enable biasing ranks of loop accumulators.  We don't want this before
184    vectorization, since it interferes with reduction chains.  */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
186 
187 /* Statistics */
188 static struct
189 {
190   int linearized;
191   int constants_eliminated;
192   int ops_eliminated;
193   int rewritten;
194   int pows_encountered;
195   int pows_created;
196 } reassociate_stats;
197 
198 
199 static object_allocator<operand_entry> operand_entry_pool
200   ("operand entry pool");
201 
202 /* This is used to assign a unique ID to each struct operand_entry
203    so that qsort results are identical on different hosts.  */
204 static unsigned int next_operand_entry_id;
205 
206 /* Starting rank number for a given basic block, so that we can rank
207    operations using unmovable instructions in that BB based on the bb
208    depth.  */
209 static int64_t *bb_rank;
210 
211 /* Operand->rank hashtable.  */
212 static hash_map<tree, int64_t> *operand_rank;
213 
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215    biased.  */
216 static auto_bitmap biased_names;
217 
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219    all basic blocks the CFG should be adjusted - basic blocks
220    split right after that SSA_NAME's definition statement and before
221    the only use, which must be a bit ior.  */
222 static vec<tree> reassoc_branch_fixups;
223 
224 /* Forward decls.  */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229    possibly added by gsi_remove.  */
230 
231 static bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 {
234   gimple *stmt = gsi_stmt (*gsi);
235 
236   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237     return gsi_remove (gsi, true);
238 
239   gimple_stmt_iterator prev = *gsi;
240   gsi_prev (&prev);
241   unsigned uid = gimple_uid (stmt);
242   basic_block bb = gimple_bb (stmt);
243   bool ret = gsi_remove (gsi, true);
244   if (!gsi_end_p (prev))
245     gsi_next (&prev);
246   else
247     prev = gsi_start_bb (bb);
248   gimple *end_stmt = gsi_stmt (*gsi);
249   while ((stmt = gsi_stmt (prev)) != end_stmt)
250     {
251       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252       gimple_set_uid (stmt, uid);
253       gsi_next (&prev);
254     }
255   return ret;
256 }
257 
258 /* Bias amount for loop-carried phis.  We want this to be larger than
259    the depth of any reassociation tree we can see, but not larger than
260    the rank difference between two blocks.  */
261 #define PHI_LOOP_BIAS (1 << 15)
262 
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264    operands to the STMT's left-hand side.  The goal is to preserve bias in code
265    like this:
266 
267      x_1 = phi(x_0, x_2)
268      a = x_1 | 1
269      b = a ^ 2
270      .MEM = b
271      c = b + d
272      x_2 = c + e
273 
274    That is, we need to preserve bias along single-use chains originating from
275    loop-carried phis.  Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276    uses, because only they participate in rank propagation.  */
277 static bool
propagate_bias_p(gimple * stmt)278 propagate_bias_p (gimple *stmt)
279 {
280   use_operand_p use;
281   imm_use_iterator use_iter;
282   gimple *single_use_stmt = NULL;
283 
284   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285     return false;
286 
287   FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
288     {
289       gimple *current_use_stmt = USE_STMT (use);
290 
291       if (is_gimple_assign (current_use_stmt)
292 	  && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
293 	{
294 	  if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 	    return false;
296 	  single_use_stmt = current_use_stmt;
297 	}
298     }
299 
300   if (single_use_stmt == NULL)
301     return false;
302 
303   if (gimple_bb (stmt)->loop_father
304       != gimple_bb (single_use_stmt)->loop_father)
305     return false;
306 
307   return true;
308 }
309 
310 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
311    an innermost loop, and the phi has only a single use which is inside
312    the loop, then the rank is the block rank of the loop latch plus an
313    extra bias for the loop-carried dependence.  This causes expressions
314    calculated into an accumulator variable to be independent for each
315    iteration of the loop.  If STMT is some other phi, the rank is the
316    block rank of its containing block.  */
317 static int64_t
phi_rank(gimple * stmt)318 phi_rank (gimple *stmt)
319 {
320   basic_block bb = gimple_bb (stmt);
321   class loop *father = bb->loop_father;
322   tree res;
323   unsigned i;
324   use_operand_p use;
325   gimple *use_stmt;
326 
327   if (!reassoc_bias_loop_carried_phi_ranks_p)
328     return bb_rank[bb->index];
329 
330   /* We only care about real loops (those with a latch).  */
331   if (!father->latch)
332     return bb_rank[bb->index];
333 
334   /* Interesting phis must be in headers of innermost loops.  */
335   if (bb != father->header
336       || father->inner)
337     return bb_rank[bb->index];
338 
339   /* Ignore virtual SSA_NAMEs.  */
340   res = gimple_phi_result (stmt);
341   if (virtual_operand_p (res))
342     return bb_rank[bb->index];
343 
344   /* The phi definition must have a single use, and that use must be
345      within the loop.  Otherwise this isn't an accumulator pattern.  */
346   if (!single_imm_use (res, &use, &use_stmt)
347       || gimple_bb (use_stmt)->loop_father != father)
348     return bb_rank[bb->index];
349 
350   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
351   for (i = 0; i < gimple_phi_num_args (stmt); i++)
352     {
353       tree arg = gimple_phi_arg_def (stmt, i);
354       if (TREE_CODE (arg) == SSA_NAME
355 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
356 	{
357 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 	  if (gimple_bb (def_stmt)->loop_father == father)
359 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
360 	}
361     }
362 
363   /* Must be an uninteresting phi.  */
364   return bb_rank[bb->index];
365 }
366 
367 /* Return the maximum of RANK and the rank that should be propagated
368    from expression OP.  For most operands, this is just the rank of OP.
369    For loop-carried phis, the value is zero to avoid undoing the bias
370    in favor of the phi.  */
371 static int64_t
propagate_rank(int64_t rank,tree op,bool * maybe_biased_p)372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
373 {
374   int64_t op_rank;
375 
376   op_rank = get_rank (op);
377 
378   /* Check whether op is biased after the get_rank () call, since it might have
379      updated biased_names.  */
380   if (TREE_CODE (op) == SSA_NAME
381       && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
382     {
383       if (maybe_biased_p == NULL)
384 	return rank;
385       *maybe_biased_p = true;
386     }
387 
388   return MAX (rank, op_rank);
389 }
390 
391 /* Look up the operand rank structure for expression E.  */
392 
393 static inline int64_t
find_operand_rank(tree e)394 find_operand_rank (tree e)
395 {
396   int64_t *slot = operand_rank->get (e);
397   return slot ? *slot : -1;
398 }
399 
400 /* Insert {E,RANK} into the operand rank hashtable.  */
401 
402 static inline void
insert_operand_rank(tree e,int64_t rank)403 insert_operand_rank (tree e, int64_t rank)
404 {
405   gcc_assert (rank > 0);
406   gcc_assert (!operand_rank->put (e, rank));
407 }
408 
409 /* Given an expression E, return the rank of the expression.  */
410 
411 static int64_t
get_rank(tree e)412 get_rank (tree e)
413 {
414   /* SSA_NAME's have the rank of the expression they are the result
415      of.
416      For globals and uninitialized values, the rank is 0.
417      For function arguments, use the pre-setup rank.
418      For PHI nodes, stores, asm statements, etc, we use the rank of
419      the BB.
420      For simple operations, the rank is the maximum rank of any of
421      its operands, or the bb_rank, whichever is less.
422      I make no claims that this is optimal, however, it gives good
423      results.  */
424 
425   /* We make an exception to the normal ranking system to break
426      dependences of accumulator variables in loops.  Suppose we
427      have a simple one-block loop containing:
428 
429        x_1 = phi(x_0, x_2)
430        b = a + x_1
431        c = b + d
432        x_2 = c + e
433 
434      As shown, each iteration of the calculation into x is fully
435      dependent upon the iteration before it.  We would prefer to
436      see this in the form:
437 
438        x_1 = phi(x_0, x_2)
439        b = a + d
440        c = b + e
441        x_2 = c + x_1
442 
443      If the loop is unrolled, the calculations of b and c from
444      different iterations can be interleaved.
445 
446      To obtain this result during reassociation, we bias the rank
447      of the phi definition x_1 upward, when it is recognized as an
448      accumulator pattern.  The artificial rank causes it to be
449      added last, providing the desired independence.  */
450 
451   if (TREE_CODE (e) == SSA_NAME)
452     {
453       ssa_op_iter iter;
454       gimple *stmt;
455       int64_t rank;
456       tree op;
457 
458       /* If we already have a rank for this expression, use that.  */
459       rank = find_operand_rank (e);
460       if (rank != -1)
461 	return rank;
462 
463       stmt = SSA_NAME_DEF_STMT (e);
464       if (gimple_code (stmt) == GIMPLE_PHI)
465 	{
466 	  rank = phi_rank (stmt);
467 	  if (rank != bb_rank[gimple_bb (stmt)->index])
468 	    bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
469 	}
470 
471       else if (!is_gimple_assign (stmt))
472 	rank = bb_rank[gimple_bb (stmt)->index];
473 
474       else
475 	{
476 	  bool biased_p = false;
477 	  bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
478 
479 	  /* Otherwise, find the maximum rank for the operands.  As an
480 	     exception, remove the bias from loop-carried phis when propagating
481 	     the rank so that dependent operations are not also biased.  */
482 	  /* Simply walk over all SSA uses - this takes advatage of the
483 	     fact that non-SSA operands are is_gimple_min_invariant and
484 	     thus have rank 0.  */
485 	  rank = 0;
486 	  FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 	    rank = propagate_rank (rank, op, maybe_biased_p);
488 
489 	  rank += 1;
490 	  if (biased_p)
491 	    bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
492 	}
493 
494       if (dump_file && (dump_flags & TDF_DETAILS))
495 	{
496 	  fprintf (dump_file, "Rank for ");
497 	  print_generic_expr (dump_file, e);
498 	  fprintf (dump_file, " is %" PRId64 "\n", rank);
499 	}
500 
501       /* Note the rank in the hashtable so we don't recompute it.  */
502       insert_operand_rank (e, rank);
503       return rank;
504     }
505 
506   /* Constants, globals, etc., are rank 0 */
507   return 0;
508 }
509 
510 
511 /* We want integer ones to end up last no matter what, since they are
512    the ones we can do the most with.  */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
517 
518 /* Classify an invariant tree into integer, float, or other, so that
519    we can sort them to be near other constants of the same type.  */
520 static inline int
constant_type(tree t)521 constant_type (tree t)
522 {
523   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524     return INTEGER_CONST_TYPE;
525   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
526     {
527       /* Sort -1.0 and 1.0 constants last, while in some cases
528 	 const_binop can't optimize some inexact operations, multiplication
529 	 by -1.0 or 1.0 can be always merged with others.  */
530       if (real_onep (t) || real_minus_onep (t))
531 	return FLOAT_ONE_CONST_TYPE;
532       return FLOAT_CONST_TYPE;
533     }
534   else
535     return OTHER_CONST_TYPE;
536 }
537 
538 /* qsort comparison function to sort operand entries PA and PB by rank
539    so that the sorted array is ordered by rank in decreasing order.  */
540 static int
sort_by_operand_rank(const void * pa,const void * pb)541 sort_by_operand_rank (const void *pa, const void *pb)
542 {
543   const operand_entry *oea = *(const operand_entry *const *)pa;
544   const operand_entry *oeb = *(const operand_entry *const *)pb;
545 
546   if (oeb->rank != oea->rank)
547     return oeb->rank > oea->rank ? 1 : -1;
548 
549   /* It's nicer for optimize_expression if constants that are likely
550      to fold when added/multiplied/whatever are put next to each
551      other.  Since all constants have rank 0, order them by type.  */
552   if (oea->rank == 0)
553     {
554       if (constant_type (oeb->op) != constant_type (oea->op))
555 	return constant_type (oea->op) - constant_type (oeb->op);
556       else
557 	/* To make sorting result stable, we use unique IDs to determine
558 	   order.  */
559 	return oeb->id > oea->id ? 1 : -1;
560     }
561 
562   if (TREE_CODE (oea->op) != SSA_NAME)
563     {
564       if (TREE_CODE (oeb->op) != SSA_NAME)
565 	return oeb->id > oea->id ? 1 : -1;
566       else
567 	return 1;
568     }
569   else if (TREE_CODE (oeb->op) != SSA_NAME)
570     return -1;
571 
572   /* Lastly, make sure the versions that are the same go next to each
573      other.  */
574   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
575     {
576       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 	 See PR60418.  */
580       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582       basic_block bba = gimple_bb (stmta);
583       basic_block bbb = gimple_bb (stmtb);
584       if (bbb != bba)
585 	{
586 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 	     but the other might not.  */
588 	  if (!bba)
589 	    return 1;
590 	  if (!bbb)
591 	    return -1;
592 	  /* If neither is, compare bb_rank.  */
593 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
594 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
595 	}
596 
597       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599       if (da != db)
600 	return da ? 1 : -1;
601 
602       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
603     }
604 
605   return oeb->id > oea->id ? 1 : -1;
606 }
607 
608 /* Add an operand entry to *OPS for the tree operand OP.  */
609 
610 static void
add_to_ops_vec(vec<operand_entry * > * ops,tree op,gimple * stmt_to_insert=NULL)611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
612 {
613   operand_entry *oe = operand_entry_pool.allocate ();
614 
615   oe->op = op;
616   oe->rank = get_rank (op);
617   oe->id = next_operand_entry_id++;
618   oe->count = 1;
619   oe->stmt_to_insert = stmt_to_insert;
620   ops->safe_push (oe);
621 }
622 
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624    count REPEAT.  */
625 
626 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 		       HOST_WIDE_INT repeat)
629 {
630   operand_entry *oe = operand_entry_pool.allocate ();
631 
632   oe->op = op;
633   oe->rank = get_rank (op);
634   oe->id = next_operand_entry_id++;
635   oe->count = repeat;
636   oe->stmt_to_insert = NULL;
637   ops->safe_push (oe);
638 
639   reassociate_stats.pows_encountered++;
640 }
641 
642 /* Returns true if we can associate the SSA def OP.  */
643 
644 static bool
can_reassociate_op_p(tree op)645 can_reassociate_op_p (tree op)
646 {
647   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648     return false;
649   /* Make sure asm goto outputs do not participate in reassociation since
650      we have no way to find an insertion place after asm goto.  */
651   if (TREE_CODE (op) == SSA_NAME
652       && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653       && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654     return false;
655   return true;
656 }
657 
658 /* Returns true if we can reassociate operations of TYPE.
659    That is for integral or non-saturating fixed-point types, and for
660    floating point type when associative-math is enabled.  */
661 
662 static bool
can_reassociate_type_p(tree type)663 can_reassociate_type_p (tree type)
664 {
665   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666       || NON_SAT_FIXED_POINT_TYPE_P (type)
667       || (flag_associative_math && FLOAT_TYPE_P (type)))
668     return true;
669   return false;
670 }
671 
672 /* Return true if STMT is reassociable operation containing a binary
673    operation with tree code CODE, and is inside LOOP.  */
674 
675 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
677 {
678   basic_block bb = gimple_bb (stmt);
679 
680   if (gimple_bb (stmt) == NULL)
681     return false;
682 
683   if (!flow_bb_inside_loop_p (loop, bb))
684     return false;
685 
686   if (is_gimple_assign (stmt)
687       && gimple_assign_rhs_code (stmt) == code
688       && has_single_use (gimple_assign_lhs (stmt)))
689     {
690       tree rhs1 = gimple_assign_rhs1 (stmt);
691       tree rhs2 = gimple_assign_rhs2 (stmt);
692       if (!can_reassociate_op_p (rhs1)
693 	  || (rhs2 && !can_reassociate_op_p (rhs2)))
694 	return false;
695       return true;
696     }
697 
698   return false;
699 }
700 
701 
702 /* Return true if STMT is a nop-conversion.  */
703 
704 static bool
gimple_nop_conversion_p(gimple * stmt)705 gimple_nop_conversion_p (gimple *stmt)
706 {
707   if (gassign *ass = dyn_cast <gassign *> (stmt))
708     {
709       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
712 	return true;
713     }
714   return false;
715 }
716 
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718    operand of the negate operation.  Otherwise, return NULL.  */
719 
720 static tree
get_unary_op(tree name,enum tree_code opcode)721 get_unary_op (tree name, enum tree_code opcode)
722 {
723   gimple *stmt = SSA_NAME_DEF_STMT (name);
724 
725   /* Look through nop conversions (sign changes).  */
726   if (gimple_nop_conversion_p (stmt)
727       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
729 
730   if (!is_gimple_assign (stmt))
731     return NULL_TREE;
732 
733   if (gimple_assign_rhs_code (stmt) == opcode)
734     return gimple_assign_rhs1 (stmt);
735   return NULL_TREE;
736 }
737 
738 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
739 
740 static bool
ops_equal_values_p(tree op1,tree op2)741 ops_equal_values_p (tree op1, tree op2)
742 {
743   if (op1 == op2)
744     return true;
745 
746   tree orig_op1 = op1;
747   if (TREE_CODE (op1) == SSA_NAME)
748     {
749       gimple *stmt = SSA_NAME_DEF_STMT (op1);
750       if (gimple_nop_conversion_p (stmt))
751 	{
752 	  op1 = gimple_assign_rhs1 (stmt);
753 	  if (op1 == op2)
754 	    return true;
755 	}
756     }
757 
758   if (TREE_CODE (op2) == SSA_NAME)
759     {
760       gimple *stmt = SSA_NAME_DEF_STMT (op2);
761       if (gimple_nop_conversion_p (stmt))
762 	{
763 	  op2 = gimple_assign_rhs1 (stmt);
764 	  if (op1 == op2
765 	      || orig_op1 == op2)
766 	    return true;
767 	}
768     }
769 
770   return false;
771 }
772 
773 
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775    eliminate through equivalences, do so, remove them from OPS, and
776    return true.  Otherwise, return false.  */
777 
778 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)779 eliminate_duplicate_pair (enum tree_code opcode,
780 			  vec<operand_entry *> *ops,
781 			  bool *all_done,
782 			  unsigned int i,
783 			  operand_entry *curr,
784 			  operand_entry *last)
785 {
786 
787   /* If we have two of the same op, and the opcode is & |, min, or max,
788      we can eliminate one of them.
789      If we have two of the same op, and the opcode is ^, we can
790      eliminate both of them.  */
791 
792   if (last && last->op == curr->op)
793     {
794       switch (opcode)
795 	{
796 	case MAX_EXPR:
797 	case MIN_EXPR:
798 	case BIT_IOR_EXPR:
799 	case BIT_AND_EXPR:
800 	  if (dump_file && (dump_flags & TDF_DETAILS))
801 	    {
802 	      fprintf (dump_file, "Equivalence: ");
803 	      print_generic_expr (dump_file, curr->op);
804 	      fprintf (dump_file, " [&|minmax] ");
805 	      print_generic_expr (dump_file, last->op);
806 	      fprintf (dump_file, " -> ");
807 	      print_generic_stmt (dump_file, last->op);
808 	    }
809 
810 	  ops->ordered_remove (i);
811 	  reassociate_stats.ops_eliminated ++;
812 
813 	  return true;
814 
815 	case BIT_XOR_EXPR:
816 	  if (dump_file && (dump_flags & TDF_DETAILS))
817 	    {
818 	      fprintf (dump_file, "Equivalence: ");
819 	      print_generic_expr (dump_file, curr->op);
820 	      fprintf (dump_file, " ^ ");
821 	      print_generic_expr (dump_file, last->op);
822 	      fprintf (dump_file, " -> nothing\n");
823 	    }
824 
825 	  reassociate_stats.ops_eliminated += 2;
826 
827 	  if (ops->length () == 2)
828 	    {
829 	      ops->truncate (0);
830 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 	      *all_done = true;
832 	    }
833 	  else
834 	    {
835 	      ops->ordered_remove (i-1);
836 	      ops->ordered_remove (i-1);
837 	    }
838 
839 	  return true;
840 
841 	default:
842 	  break;
843 	}
844     }
845   return false;
846 }
847 
848 static vec<tree> plus_negates;
849 
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851    expression, look in OPS for a corresponding positive operation to cancel
852    it out.  If we find one, remove the other from OPS, replace
853    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
854    return false. */
855 
856 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)857 eliminate_plus_minus_pair (enum tree_code opcode,
858 			   vec<operand_entry *> *ops,
859 			   unsigned int currindex,
860 			   operand_entry *curr)
861 {
862   tree negateop;
863   tree notop;
864   unsigned int i;
865   operand_entry *oe;
866 
867   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868     return false;
869 
870   negateop = get_unary_op (curr->op, NEGATE_EXPR);
871   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872   if (negateop == NULL_TREE && notop == NULL_TREE)
873     return false;
874 
875   /* Any non-negated version will have a rank that is one less than
876      the current rank.  So once we hit those ranks, if we don't find
877      one, we can stop.  */
878 
879   for (i = currindex + 1;
880        ops->iterate (i, &oe)
881        && oe->rank >= curr->rank - 1 ;
882        i++)
883     {
884       if (negateop
885 	  && ops_equal_values_p (oe->op, negateop))
886 	{
887 	  if (dump_file && (dump_flags & TDF_DETAILS))
888 	    {
889 	      fprintf (dump_file, "Equivalence: ");
890 	      print_generic_expr (dump_file, negateop);
891 	      fprintf (dump_file, " + -");
892 	      print_generic_expr (dump_file, oe->op);
893 	      fprintf (dump_file, " -> 0\n");
894 	    }
895 
896 	  ops->ordered_remove (i);
897 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 	  ops->ordered_remove (currindex);
899 	  reassociate_stats.ops_eliminated ++;
900 
901 	  return true;
902 	}
903       else if (notop
904 	       && ops_equal_values_p (oe->op, notop))
905 	{
906 	  tree op_type = TREE_TYPE (oe->op);
907 
908 	  if (dump_file && (dump_flags & TDF_DETAILS))
909 	    {
910 	      fprintf (dump_file, "Equivalence: ");
911 	      print_generic_expr (dump_file, notop);
912 	      fprintf (dump_file, " + ~");
913 	      print_generic_expr (dump_file, oe->op);
914 	      fprintf (dump_file, " -> -1\n");
915 	    }
916 
917 	  ops->ordered_remove (i);
918 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 	  ops->ordered_remove (currindex);
920 	  reassociate_stats.ops_eliminated ++;
921 
922 	  return true;
923 	}
924     }
925 
926   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927      save it for later inspection in repropagate_negates().  */
928   if (negateop != NULL_TREE
929       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930     plus_negates.safe_push (curr->op);
931 
932   return false;
933 }
934 
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936    bitwise not expression, look in OPS for a corresponding operand to
937    cancel it out.  If we find one, remove the other from OPS, replace
938    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
939    false. */
940 
941 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)942 eliminate_not_pairs (enum tree_code opcode,
943 		     vec<operand_entry *> *ops,
944 		     unsigned int currindex,
945 		     operand_entry *curr)
946 {
947   tree notop;
948   unsigned int i;
949   operand_entry *oe;
950 
951   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952       || TREE_CODE (curr->op) != SSA_NAME)
953     return false;
954 
955   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956   if (notop == NULL_TREE)
957     return false;
958 
959   /* Any non-not version will have a rank that is one less than
960      the current rank.  So once we hit those ranks, if we don't find
961      one, we can stop.  */
962 
963   for (i = currindex + 1;
964        ops->iterate (i, &oe)
965        && oe->rank >= curr->rank - 1;
966        i++)
967     {
968       if (oe->op == notop)
969 	{
970 	  if (dump_file && (dump_flags & TDF_DETAILS))
971 	    {
972 	      fprintf (dump_file, "Equivalence: ");
973 	      print_generic_expr (dump_file, notop);
974 	      if (opcode == BIT_AND_EXPR)
975 		fprintf (dump_file, " & ~");
976 	      else if (opcode == BIT_IOR_EXPR)
977 		fprintf (dump_file, " | ~");
978 	      print_generic_expr (dump_file, oe->op);
979 	      if (opcode == BIT_AND_EXPR)
980 		fprintf (dump_file, " -> 0\n");
981 	      else if (opcode == BIT_IOR_EXPR)
982 		fprintf (dump_file, " -> -1\n");
983 	    }
984 
985 	  if (opcode == BIT_AND_EXPR)
986 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 	  else if (opcode == BIT_IOR_EXPR)
988 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
989 
990 	  reassociate_stats.ops_eliminated += ops->length () - 1;
991 	  ops->truncate (0);
992 	  ops->quick_push (oe);
993 	  return true;
994 	}
995     }
996 
997   return false;
998 }
999 
1000 /* Use constant value that may be present in OPS to try to eliminate
1001    operands.  Note that this function is only really used when we've
1002    eliminated ops for other reasons, or merged constants.  Across
1003    single statements, fold already does all of this, plus more.  There
1004    is little point in duplicating logic, so I've only included the
1005    identities that I could ever construct testcases to trigger.  */
1006 
1007 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)1008 eliminate_using_constants (enum tree_code opcode,
1009 			   vec<operand_entry *> *ops)
1010 {
1011   operand_entry *oelast = ops->last ();
1012   tree type = TREE_TYPE (oelast->op);
1013 
1014   if (oelast->rank == 0
1015       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1016     {
1017       switch (opcode)
1018 	{
1019 	case BIT_AND_EXPR:
1020 	  if (integer_zerop (oelast->op))
1021 	    {
1022 	      if (ops->length () != 1)
1023 		{
1024 		  if (dump_file && (dump_flags & TDF_DETAILS))
1025 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
1026 
1027 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1028 
1029 		  ops->truncate (0);
1030 		  ops->quick_push (oelast);
1031 		  return;
1032 		}
1033 	    }
1034 	  else if (integer_all_onesp (oelast->op))
1035 	    {
1036 	      if (ops->length () != 1)
1037 		{
1038 		  if (dump_file && (dump_flags & TDF_DETAILS))
1039 		    fprintf (dump_file, "Found & -1, removing\n");
1040 		  ops->pop ();
1041 		  reassociate_stats.ops_eliminated++;
1042 		}
1043 	    }
1044 	  break;
1045 	case BIT_IOR_EXPR:
1046 	  if (integer_all_onesp (oelast->op))
1047 	    {
1048 	      if (ops->length () != 1)
1049 		{
1050 		  if (dump_file && (dump_flags & TDF_DETAILS))
1051 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
1052 
1053 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1054 
1055 		  ops->truncate (0);
1056 		  ops->quick_push (oelast);
1057 		  return;
1058 		}
1059 	    }
1060 	  else if (integer_zerop (oelast->op))
1061 	    {
1062 	      if (ops->length () != 1)
1063 		{
1064 		  if (dump_file && (dump_flags & TDF_DETAILS))
1065 		    fprintf (dump_file, "Found | 0, removing\n");
1066 		  ops->pop ();
1067 		  reassociate_stats.ops_eliminated++;
1068 		}
1069 	    }
1070 	  break;
1071 	case MULT_EXPR:
1072 	  if (integer_zerop (oelast->op)
1073 	      || (FLOAT_TYPE_P (type)
1074 		  && !HONOR_NANS (type)
1075 		  && !HONOR_SIGNED_ZEROS (type)
1076 		  && real_zerop (oelast->op)))
1077 	    {
1078 	      if (ops->length () != 1)
1079 		{
1080 		  if (dump_file && (dump_flags & TDF_DETAILS))
1081 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1082 
1083 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1084 		  ops->truncate (0);
1085 		  ops->quick_push (oelast);
1086 		  return;
1087 		}
1088 	    }
1089 	  else if (integer_onep (oelast->op)
1090 		   || (FLOAT_TYPE_P (type)
1091 		       && !HONOR_SNANS (type)
1092 		       && real_onep (oelast->op)))
1093 	    {
1094 	      if (ops->length () != 1)
1095 		{
1096 		  if (dump_file && (dump_flags & TDF_DETAILS))
1097 		    fprintf (dump_file, "Found * 1, removing\n");
1098 		  ops->pop ();
1099 		  reassociate_stats.ops_eliminated++;
1100 		  return;
1101 		}
1102 	    }
1103 	  break;
1104 	case BIT_XOR_EXPR:
1105 	case PLUS_EXPR:
1106 	case MINUS_EXPR:
1107 	  if (integer_zerop (oelast->op)
1108 	      || (FLOAT_TYPE_P (type)
1109 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 		  && fold_real_zero_addition_p (type, 0, oelast->op,
1111 						opcode == MINUS_EXPR)))
1112 	    {
1113 	      if (ops->length () != 1)
1114 		{
1115 		  if (dump_file && (dump_flags & TDF_DETAILS))
1116 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 		  ops->pop ();
1118 		  reassociate_stats.ops_eliminated++;
1119 		  return;
1120 		}
1121 	    }
1122 	  break;
1123 	default:
1124 	  break;
1125 	}
1126     }
1127 }
1128 
1129 
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 				 bool, bool);
1132 
1133 /* Structure for tracking and counting operands.  */
1134 struct oecount {
1135   unsigned int cnt;
1136   unsigned int id;
1137   enum tree_code oecode;
1138   tree op;
1139 };
1140 
1141 
1142 /* The heap for the oecount hashtable and the sorted list of operands.  */
1143 static vec<oecount> cvec;
1144 
1145 
1146 /* Oecount hashtable helpers.  */
1147 
1148 struct oecount_hasher : int_hash <int, 0, 1>
1149 {
1150   static inline hashval_t hash (int);
1151   static inline bool equal (int, int);
1152 };
1153 
1154 /* Hash function for oecount.  */
1155 
1156 inline hashval_t
hash(int p)1157 oecount_hasher::hash (int p)
1158 {
1159   const oecount *c = &cvec[p - 42];
1160   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1161 }
1162 
1163 /* Comparison function for oecount.  */
1164 
1165 inline bool
equal(int p1,int p2)1166 oecount_hasher::equal (int p1, int p2)
1167 {
1168   const oecount *c1 = &cvec[p1 - 42];
1169   const oecount *c2 = &cvec[p2 - 42];
1170   return c1->oecode == c2->oecode && c1->op == c2->op;
1171 }
1172 
1173 /* Comparison function for qsort sorting oecount elements by count.  */
1174 
1175 static int
oecount_cmp(const void * p1,const void * p2)1176 oecount_cmp (const void *p1, const void *p2)
1177 {
1178   const oecount *c1 = (const oecount *)p1;
1179   const oecount *c2 = (const oecount *)p2;
1180   if (c1->cnt != c2->cnt)
1181     return c1->cnt > c2->cnt ? 1 : -1;
1182   else
1183     /* If counts are identical, use unique IDs to stabilize qsort.  */
1184     return c1->id > c2->id ? 1 : -1;
1185 }
1186 
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188    to some exponent.  */
1189 
1190 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1191 stmt_is_power_of_op (gimple *stmt, tree op)
1192 {
1193   if (!is_gimple_call (stmt))
1194     return false;
1195 
1196   switch (gimple_call_combined_fn (stmt))
1197     {
1198     CASE_CFN_POW:
1199     CASE_CFN_POWI:
1200       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1201 
1202     default:
1203       return false;
1204     }
1205 }
1206 
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208    in place and return the result.  Assumes that stmt_is_power_of_op
1209    was previously called for STMT and returned TRUE.  */
1210 
1211 static HOST_WIDE_INT
decrement_power(gimple * stmt)1212 decrement_power (gimple *stmt)
1213 {
1214   REAL_VALUE_TYPE c, cint;
1215   HOST_WIDE_INT power;
1216   tree arg1;
1217 
1218   switch (gimple_call_combined_fn (stmt))
1219     {
1220     CASE_CFN_POW:
1221       arg1 = gimple_call_arg (stmt, 1);
1222       c = TREE_REAL_CST (arg1);
1223       power = real_to_integer (&c) - 1;
1224       real_from_integer (&cint, VOIDmode, power, SIGNED);
1225       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226       return power;
1227 
1228     CASE_CFN_POWI:
1229       arg1 = gimple_call_arg (stmt, 1);
1230       power = TREE_INT_CST_LOW (arg1) - 1;
1231       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232       return power;
1233 
1234     default:
1235       gcc_unreachable ();
1236     }
1237 }
1238 
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240    SSA.  Also return the new SSA.  */
1241 
1242 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1244 {
1245   gimple *use_stmt;
1246   use_operand_p use;
1247   imm_use_iterator iter;
1248   tree new_lhs, new_debug_lhs = NULL_TREE;
1249   tree lhs = gimple_get_lhs (stmt);
1250 
1251   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252   gimple_set_lhs (stmt, new_lhs);
1253 
1254   /* Also need to update GIMPLE_DEBUGs.  */
1255   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1256     {
1257       tree repl = new_lhs;
1258       if (is_gimple_debug (use_stmt))
1259 	{
1260 	  if (new_debug_lhs == NULL_TREE)
1261 	    {
1262 	      new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 	      gdebug *def_temp
1264 		= gimple_build_debug_bind (new_debug_lhs,
1265 					   build2 (opcode, TREE_TYPE (lhs),
1266 						   new_lhs, op),
1267 					   stmt);
1268 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1269 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1271 	    }
1272 	  repl = new_debug_lhs;
1273 	}
1274       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 	SET_USE (use, repl);
1276       update_stmt (use_stmt);
1277     }
1278   return new_lhs;
1279 }
1280 
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282    uses with new SSAs.  Also do this for the stmt that defines DEF
1283    if *DEF is not OP.  */
1284 
1285 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 			   vec<gimple *> &stmts_to_fix)
1288 {
1289   unsigned i;
1290   gimple *stmt;
1291 
1292   if (*def != op
1293       && TREE_CODE (*def) == SSA_NAME
1294       && (stmt = SSA_NAME_DEF_STMT (*def))
1295       && gimple_code (stmt) != GIMPLE_NOP)
1296     *def = make_new_ssa_for_def (stmt, opcode, op);
1297 
1298   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299     make_new_ssa_for_def (stmt, opcode, op);
1300 }
1301 
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1304    replace *DEF with OP as well.  */
1305 
1306 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1308 {
1309   tree lhs;
1310   gimple *use_stmt;
1311   use_operand_p use;
1312   gimple_stmt_iterator gsi;
1313 
1314   if (is_gimple_call (stmt))
1315     lhs = gimple_call_lhs (stmt);
1316   else
1317     lhs = gimple_assign_lhs (stmt);
1318 
1319   gcc_assert (has_single_use (lhs));
1320   single_imm_use (lhs, &use, &use_stmt);
1321   if (lhs == *def)
1322     *def = op;
1323   SET_USE (use, op);
1324   if (TREE_CODE (op) != SSA_NAME)
1325     update_stmt (use_stmt);
1326   gsi = gsi_for_stmt (stmt);
1327   unlink_stmt_vdef (stmt);
1328   reassoc_remove_stmt (&gsi);
1329   release_defs (stmt);
1330 }
1331 
1332 /* Walks the linear chain with result *DEF searching for an operation
1333    with operand OP and code OPCODE removing that from the chain.  *DEF
1334    is updated if there is only one operand but no operation left.  */
1335 
1336 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1338 {
1339   tree orig_def = *def;
1340   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341   /* PR72835 - Record the stmt chain that has to be updated such that
1342      we dont use the same LHS when the values computed are different.  */
1343   auto_vec<gimple *, 64> stmts_to_fix;
1344 
1345   do
1346     {
1347       tree name;
1348 
1349       if (opcode == MULT_EXPR)
1350 	{
1351 	  if (stmt_is_power_of_op (stmt, op))
1352 	    {
1353 	      if (decrement_power (stmt) == 1)
1354 		{
1355 		  if (stmts_to_fix.length () > 0)
1356 		    stmts_to_fix.pop ();
1357 		  propagate_op_to_single_use (op, stmt, def);
1358 		}
1359 	      break;
1360 	    }
1361 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1362 	    {
1363 	      if (gimple_assign_rhs1 (stmt) == op)
1364 		{
1365 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 		  if (stmts_to_fix.length () > 0)
1367 		    stmts_to_fix.pop ();
1368 		  propagate_op_to_single_use (cst, stmt, def);
1369 		  break;
1370 		}
1371 	      else if (integer_minus_onep (op)
1372 		       || real_minus_onep (op))
1373 		{
1374 		  gimple_assign_set_rhs_code
1375 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 		  break;
1377 		}
1378 	    }
1379 	}
1380 
1381       name = gimple_assign_rhs1 (stmt);
1382 
1383       /* If this is the operation we look for and one of the operands
1384          is ours simply propagate the other operand into the stmts
1385 	 single use.  */
1386       if (gimple_assign_rhs_code (stmt) == opcode
1387 	  && (name == op
1388 	      || gimple_assign_rhs2 (stmt) == op))
1389 	{
1390 	  if (name == op)
1391 	    name = gimple_assign_rhs2 (stmt);
1392 	  if (stmts_to_fix.length () > 0)
1393 	    stmts_to_fix.pop ();
1394 	  propagate_op_to_single_use (name, stmt, def);
1395 	  break;
1396 	}
1397 
1398       /* We might have a multiply of two __builtin_pow* calls, and
1399 	 the operand might be hiding in the rightmost one.  Likewise
1400 	 this can happen for a negate.  */
1401       if (opcode == MULT_EXPR
1402 	  && gimple_assign_rhs_code (stmt) == opcode
1403 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1405 	{
1406 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 	  if (stmt_is_power_of_op (stmt2, op))
1408 	    {
1409 	      if (decrement_power (stmt2) == 1)
1410 		propagate_op_to_single_use (op, stmt2, def);
1411 	      else
1412 		stmts_to_fix.safe_push (stmt2);
1413 	      break;
1414 	    }
1415 	  else if (is_gimple_assign (stmt2)
1416 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1417 	    {
1418 	      if (gimple_assign_rhs1 (stmt2) == op)
1419 		{
1420 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 		  propagate_op_to_single_use (cst, stmt2, def);
1422 		  break;
1423 		}
1424 	      else if (integer_minus_onep (op)
1425 		       || real_minus_onep (op))
1426 		{
1427 		  stmts_to_fix.safe_push (stmt2);
1428 		  gimple_assign_set_rhs_code
1429 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 		  break;
1431 		}
1432 	    }
1433 	}
1434 
1435       /* Continue walking the chain.  */
1436       gcc_assert (name != op
1437 		  && TREE_CODE (name) == SSA_NAME);
1438       stmt = SSA_NAME_DEF_STMT (name);
1439       stmts_to_fix.safe_push (stmt);
1440     }
1441   while (1);
1442 
1443   if (stmts_to_fix.length () > 0 || *def == orig_def)
1444     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1445 }
1446 
1447 /* Returns true if statement S1 dominates statement S2.  Like
1448    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1449 
1450 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1452 {
1453   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1454 
1455   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456      SSA_NAME.  Assume it lives at the beginning of function and
1457      thus dominates everything.  */
1458   if (!bb1 || s1 == s2)
1459     return true;
1460 
1461   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1462   if (!bb2)
1463     return false;
1464 
1465   if (bb1 == bb2)
1466     {
1467       /* PHIs in the same basic block are assumed to be
1468 	 executed all in parallel, if only one stmt is a PHI,
1469 	 it dominates the other stmt in the same basic block.  */
1470       if (gimple_code (s1) == GIMPLE_PHI)
1471 	return true;
1472 
1473       if (gimple_code (s2) == GIMPLE_PHI)
1474 	return false;
1475 
1476       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1477 
1478       if (gimple_uid (s1) < gimple_uid (s2))
1479 	return true;
1480 
1481       if (gimple_uid (s1) > gimple_uid (s2))
1482 	return false;
1483 
1484       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485       unsigned int uid = gimple_uid (s1);
1486       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1487 	{
1488 	  gimple *s = gsi_stmt (gsi);
1489 	  if (gimple_uid (s) != uid)
1490 	    break;
1491 	  if (s == s2)
1492 	    return true;
1493 	}
1494 
1495       return false;
1496     }
1497 
1498   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1499 }
1500 
1501 /* Insert STMT after INSERT_POINT.  */
1502 
1503 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1505 {
1506   gimple_stmt_iterator gsi;
1507   basic_block bb;
1508 
1509   if (gimple_code (insert_point) == GIMPLE_PHI)
1510     bb = gimple_bb (insert_point);
1511   else if (!stmt_ends_bb_p (insert_point))
1512     {
1513       gsi = gsi_for_stmt (insert_point);
1514       gimple_set_uid (stmt, gimple_uid (insert_point));
1515       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516       return;
1517     }
1518   else if (gimple_code (insert_point) == GIMPLE_ASM
1519 	   && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520     /* We have no idea where to insert - it depends on where the
1521        uses will be placed.  */
1522     gcc_unreachable ();
1523   else
1524     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525        thus if it must end a basic block, it should be a call that can
1526        throw, or some assignment that can throw.  If it throws, the LHS
1527        of it will not be initialized though, so only valid places using
1528        the SSA_NAME should be dominated by the fallthru edge.  */
1529     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530   gsi = gsi_after_labels (bb);
1531   if (gsi_end_p (gsi))
1532     {
1533       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534       gimple_set_uid (stmt,
1535 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1536     }
1537   else
1538     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1540 }
1541 
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543    the result.  Places the statement after the definition of either
1544    OP1 or OP2.  Returns the new statement.  */
1545 
1546 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1548 {
1549   gimple *op1def = NULL, *op2def = NULL;
1550   gimple_stmt_iterator gsi;
1551   tree op;
1552   gassign *sum;
1553 
1554   /* Create the addition statement.  */
1555   op = make_ssa_name (type);
1556   sum = gimple_build_assign (op, opcode, op1, op2);
1557 
1558   /* Find an insertion place and insert.  */
1559   if (TREE_CODE (op1) == SSA_NAME)
1560     op1def = SSA_NAME_DEF_STMT (op1);
1561   if (TREE_CODE (op2) == SSA_NAME)
1562     op2def = SSA_NAME_DEF_STMT (op2);
1563   if ((!op1def || gimple_nop_p (op1def))
1564       && (!op2def || gimple_nop_p (op2def)))
1565     {
1566       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567       if (!gsi_end_p (gsi)
1568 	  && is_gimple_call (gsi_stmt (gsi))
1569 	  && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1570 	{
1571 	  /* Don't add statements before a returns_twice call at the start
1572 	     of a function.  */
1573 	  split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1574 	  gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1575 	}
1576       if (gsi_end_p (gsi))
1577 	{
1578 	  gimple_stmt_iterator gsi2
1579 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 	  gimple_set_uid (sum,
1581 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1582 	}
1583       else
1584 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1585       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1586     }
1587   else
1588     {
1589       gimple *insert_point;
1590       if ((!op1def || gimple_nop_p (op1def))
1591 	   || (op2def && !gimple_nop_p (op2def)
1592 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1593 	insert_point = op2def;
1594       else
1595 	insert_point = op1def;
1596       insert_stmt_after (sum, insert_point);
1597     }
1598   update_stmt (sum);
1599 
1600   return sum;
1601 }
1602 
1603 /* Perform un-distribution of divisions and multiplications.
1604    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1605    to (A + B) / X for real X.
1606 
1607    The algorithm is organized as follows.
1608 
1609     - First we walk the addition chain *OPS looking for summands that
1610       are defined by a multiplication or a real division.  This results
1611       in the candidates bitmap with relevant indices into *OPS.
1612 
1613     - Second we build the chains of multiplications or divisions for
1614       these candidates, counting the number of occurrences of (operand, code)
1615       pairs in all of the candidates chains.
1616 
1617     - Third we sort the (operand, code) pairs by number of occurrence and
1618       process them starting with the pair with the most uses.
1619 
1620       * For each such pair we walk the candidates again to build a
1621         second candidate bitmap noting all multiplication/division chains
1622 	that have at least one occurrence of (operand, code).
1623 
1624       * We build an alternate addition chain only covering these
1625         candidates with one (operand, code) operation removed from their
1626 	multiplication/division chain.
1627 
1628       * The first candidate gets replaced by the alternate addition chain
1629         multiplied/divided by the operand.
1630 
1631       * All candidate chains get disabled for further processing and
1632         processing of (operand, code) pairs continues.
1633 
1634   The alternate addition chains built are re-processed by the main
1635   reassociation algorithm which allows optimizing a * x * y + b * y * x
1636   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1637 
1638 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1639 undistribute_ops_list (enum tree_code opcode,
1640 		       vec<operand_entry *> *ops, class loop *loop)
1641 {
1642   unsigned int length = ops->length ();
1643   operand_entry *oe1;
1644   unsigned i, j;
1645   unsigned nr_candidates, nr_candidates2;
1646   sbitmap_iterator sbi0;
1647   vec<operand_entry *> *subops;
1648   bool changed = false;
1649   unsigned int next_oecount_id = 0;
1650 
1651   if (length <= 1
1652       || opcode != PLUS_EXPR)
1653     return false;
1654 
1655   /* Build a list of candidates to process.  */
1656   auto_sbitmap candidates (length);
1657   bitmap_clear (candidates);
1658   nr_candidates = 0;
1659   FOR_EACH_VEC_ELT (*ops, i, oe1)
1660     {
1661       enum tree_code dcode;
1662       gimple *oe1def;
1663 
1664       if (TREE_CODE (oe1->op) != SSA_NAME)
1665 	continue;
1666       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1667       if (!is_gimple_assign (oe1def))
1668 	continue;
1669       dcode = gimple_assign_rhs_code (oe1def);
1670       if ((dcode != MULT_EXPR
1671 	   && dcode != RDIV_EXPR)
1672 	  || !is_reassociable_op (oe1def, dcode, loop))
1673 	continue;
1674 
1675       bitmap_set_bit (candidates, i);
1676       nr_candidates++;
1677     }
1678 
1679   if (nr_candidates < 2)
1680     return false;
1681 
1682   if (dump_file && (dump_flags & TDF_DETAILS))
1683     {
1684       fprintf (dump_file, "searching for un-distribute opportunities ");
1685       print_generic_expr (dump_file,
1686 	(*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1687       fprintf (dump_file, " %d\n", nr_candidates);
1688     }
1689 
1690   /* Build linearized sub-operand lists and the counting table.  */
1691   cvec.create (0);
1692 
1693   hash_table<oecount_hasher> ctable (15);
1694 
1695   /* ??? Macro arguments cannot have multi-argument template types in
1696      them.  This typedef is needed to workaround that limitation.  */
1697   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1698   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1699   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1700     {
1701       gimple *oedef;
1702       enum tree_code oecode;
1703       unsigned j;
1704 
1705       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1706       oecode = gimple_assign_rhs_code (oedef);
1707       linearize_expr_tree (&subops[i], oedef,
1708 			   associative_tree_code (oecode), false);
1709 
1710       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1711 	{
1712 	  oecount c;
1713 	  int *slot;
1714 	  int idx;
1715 	  c.oecode = oecode;
1716 	  c.cnt = 1;
1717 	  c.id = next_oecount_id++;
1718 	  c.op = oe1->op;
1719 	  cvec.safe_push (c);
1720 	  idx = cvec.length () + 41;
1721 	  slot = ctable.find_slot (idx, INSERT);
1722 	  if (!*slot)
1723 	    {
1724 	      *slot = idx;
1725 	    }
1726 	  else
1727 	    {
1728 	      cvec.pop ();
1729 	      cvec[*slot - 42].cnt++;
1730 	    }
1731 	}
1732     }
1733 
1734   /* Sort the counting table.  */
1735   cvec.qsort (oecount_cmp);
1736 
1737   if (dump_file && (dump_flags & TDF_DETAILS))
1738     {
1739       oecount *c;
1740       fprintf (dump_file, "Candidates:\n");
1741       FOR_EACH_VEC_ELT (cvec, j, c)
1742 	{
1743 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1744 		   c->oecode == MULT_EXPR
1745 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1746 	  print_generic_expr (dump_file, c->op);
1747 	  fprintf (dump_file, "\n");
1748 	}
1749     }
1750 
1751   /* Process the (operand, code) pairs in order of most occurrence.  */
1752   auto_sbitmap candidates2 (length);
1753   while (!cvec.is_empty ())
1754     {
1755       oecount *c = &cvec.last ();
1756       if (c->cnt < 2)
1757 	break;
1758 
1759       /* Now collect the operands in the outer chain that contain
1760          the common operand in their inner chain.  */
1761       bitmap_clear (candidates2);
1762       nr_candidates2 = 0;
1763       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1764 	{
1765 	  gimple *oedef;
1766 	  enum tree_code oecode;
1767 	  unsigned j;
1768 	  tree op = (*ops)[i]->op;
1769 
1770 	  /* If we undistributed in this chain already this may be
1771 	     a constant.  */
1772 	  if (TREE_CODE (op) != SSA_NAME)
1773 	    continue;
1774 
1775 	  oedef = SSA_NAME_DEF_STMT (op);
1776 	  oecode = gimple_assign_rhs_code (oedef);
1777 	  if (oecode != c->oecode)
1778 	    continue;
1779 
1780 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1781 	    {
1782 	      if (oe1->op == c->op)
1783 		{
1784 		  bitmap_set_bit (candidates2, i);
1785 		  ++nr_candidates2;
1786 		  break;
1787 		}
1788 	    }
1789 	}
1790 
1791       if (nr_candidates2 >= 2)
1792 	{
1793 	  operand_entry *oe1, *oe2;
1794 	  gimple *prod;
1795 	  int first = bitmap_first_set_bit (candidates2);
1796 
1797 	  /* Build the new addition chain.  */
1798 	  oe1 = (*ops)[first];
1799 	  if (dump_file && (dump_flags & TDF_DETAILS))
1800 	    {
1801 	      fprintf (dump_file, "Building (");
1802 	      print_generic_expr (dump_file, oe1->op);
1803 	    }
1804 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1805 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1806 	    {
1807 	      gimple *sum;
1808 	      oe2 = (*ops)[i];
1809 	      if (dump_file && (dump_flags & TDF_DETAILS))
1810 		{
1811 		  fprintf (dump_file, " + ");
1812 		  print_generic_expr (dump_file, oe2->op);
1813 		}
1814 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1815 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1816 				       oe1->op, oe2->op, opcode);
1817 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1818 	      oe2->rank = 0;
1819 	      oe1->op = gimple_get_lhs (sum);
1820 	    }
1821 
1822 	  /* Apply the multiplication/division.  */
1823 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1824 				    oe1->op, c->op, c->oecode);
1825 	  if (dump_file && (dump_flags & TDF_DETAILS))
1826 	    {
1827 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1828 	      print_generic_expr (dump_file, c->op);
1829 	      fprintf (dump_file, "\n");
1830 	    }
1831 
1832 	  /* Record it in the addition chain and disable further
1833 	     undistribution with this op.  */
1834 	  oe1->op = gimple_assign_lhs (prod);
1835 	  oe1->rank = get_rank (oe1->op);
1836 	  subops[first].release ();
1837 
1838 	  changed = true;
1839 	}
1840 
1841       cvec.pop ();
1842     }
1843 
1844   for (i = 0; i < ops->length (); ++i)
1845     subops[i].release ();
1846   free (subops);
1847   cvec.release ();
1848 
1849   return changed;
1850 }
1851 
1852 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1853    first: element index for each relevant BIT_FIELD_REF.
1854    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1855 typedef std::pair<unsigned, unsigned> v_info_elem;
1856 struct v_info {
1857   tree vec_type;
1858   auto_vec<v_info_elem, 32> vec;
1859 };
1860 typedef v_info *v_info_ptr;
1861 
1862 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1863 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1864 sort_by_mach_mode (const void *p_i, const void *p_j)
1865 {
1866   const tree tr1 = *((const tree *) p_i);
1867   const tree tr2 = *((const tree *) p_j);
1868   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1869   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1870   if (mode1 > mode2)
1871     return 1;
1872   else if (mode1 < mode2)
1873     return -1;
1874   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1875     return -1;
1876   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1877     return 1;
1878   return 0;
1879 }
1880 
1881 /* Cleanup hash map for VECTOR information.  */
1882 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1883 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1884 {
1885   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1886        it != info_map.end (); ++it)
1887     {
1888       v_info_ptr info = (*it).second;
1889       delete info;
1890       (*it).second = NULL;
1891     }
1892 }
1893 
1894 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1895      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1896    is transformed to
1897      Vs = (V1 + V2 + ... + Vn)
1898      Vs[0] + Vs[1] + ... + Vs[k]
1899 
1900    The basic steps are listed below:
1901 
1902     1) Check the addition chain *OPS by looking those summands coming from
1903        VECTOR bit_field_ref on VECTOR type.  Put the information into
1904        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1905 
1906     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1907        continuous, they can cover the whole VECTOR perfectly without any holes.
1908        Obtain one VECTOR list which contain candidates to be transformed.
1909 
1910     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1911        candidates with same mode, build the addition statements for them and
1912        generate BIT_FIELD_REFs accordingly.
1913 
1914    TODO:
1915        The current implementation requires the whole VECTORs should be fully
1916        covered, but it can be extended to support partial, checking adjacent
1917        but not fill the whole, it may need some cost model to define the
1918        boundary to do or not.
1919 */
1920 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1921 undistribute_bitref_for_vector (enum tree_code opcode,
1922 				vec<operand_entry *> *ops, struct loop *loop)
1923 {
1924   if (ops->length () <= 1)
1925     return false;
1926 
1927   if (opcode != PLUS_EXPR
1928       && opcode != MULT_EXPR
1929       && opcode != BIT_XOR_EXPR
1930       && opcode != BIT_IOR_EXPR
1931       && opcode != BIT_AND_EXPR)
1932     return false;
1933 
1934   hash_map<tree, v_info_ptr> v_info_map;
1935   operand_entry *oe1;
1936   unsigned i;
1937 
1938   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1939      information into map.  */
1940   FOR_EACH_VEC_ELT (*ops, i, oe1)
1941     {
1942       enum tree_code dcode;
1943       gimple *oe1def;
1944 
1945       if (TREE_CODE (oe1->op) != SSA_NAME)
1946 	continue;
1947       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1948       if (!is_gimple_assign (oe1def))
1949 	continue;
1950       dcode = gimple_assign_rhs_code (oe1def);
1951       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1952 	continue;
1953 
1954       tree rhs = gimple_assign_rhs1 (oe1def);
1955       tree vec = TREE_OPERAND (rhs, 0);
1956       tree vec_type = TREE_TYPE (vec);
1957 
1958       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1959 	continue;
1960 
1961       /* Ignore it if target machine can't support this VECTOR type.  */
1962       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1963 	continue;
1964 
1965       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1966       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1967 	continue;
1968 
1969       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1970 	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1971 	continue;
1972 
1973       /* The type of BIT_FIELD_REF might not be equal to the element type of
1974 	 the vector.  We want to use a vector type with element type the
1975 	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1976       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1977 	{
1978 	  machine_mode simd_mode;
1979 	  unsigned HOST_WIDE_INT size, nunits;
1980 	  unsigned HOST_WIDE_INT elem_size
1981 	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1982 	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1983 	    continue;
1984 	  if (size <= elem_size || (size % elem_size) != 0)
1985 	    continue;
1986 	  nunits = size / elem_size;
1987 	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1988 				nunits).exists (&simd_mode))
1989 	    continue;
1990 	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1991 
1992 	  /* Ignore it if target machine can't support this VECTOR type.  */
1993 	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1994 	    continue;
1995 
1996 	  /* Check const vector type, constrain BIT_FIELD_REF offset and
1997 	     size.  */
1998 	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1999 	    continue;
2000 
2001 	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2002 			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2003 	    continue;
2004 	}
2005 
2006       tree elem_type = TREE_TYPE (vec_type);
2007       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2008       if (maybe_ne (bit_field_size (rhs), elem_size))
2009 	continue;
2010 
2011       unsigned idx;
2012       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2013 	continue;
2014 
2015       /* Ignore it if target machine can't support this type of VECTOR
2016          operation.  */
2017       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2018       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2019 	continue;
2020 
2021       bool existed;
2022       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2023       if (!existed)
2024 	{
2025 	  info = new v_info;
2026 	  info->vec_type = vec_type;
2027 	}
2028       else if (!types_compatible_p (vec_type, info->vec_type))
2029 	continue;
2030       info->vec.safe_push (std::make_pair (idx, i));
2031     }
2032 
2033   /* At least two VECTOR to combine.  */
2034   if (v_info_map.elements () <= 1)
2035     {
2036       cleanup_vinfo_map (v_info_map);
2037       return false;
2038     }
2039 
2040   /* Verify all VECTOR candidates by checking two conditions:
2041        1) sorted offsets are adjacent, no holes.
2042        2) can fill the whole VECTOR perfectly.
2043      And add the valid candidates to a vector for further handling.  */
2044   auto_vec<tree> valid_vecs (v_info_map.elements ());
2045   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2046        it != v_info_map.end (); ++it)
2047     {
2048       tree cand_vec = (*it).first;
2049       v_info_ptr cand_info = (*it).second;
2050       unsigned int num_elems
2051 	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2052       if (cand_info->vec.length () != num_elems)
2053 	continue;
2054       sbitmap holes = sbitmap_alloc (num_elems);
2055       bitmap_ones (holes);
2056       bool valid = true;
2057       v_info_elem *curr;
2058       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2059 	{
2060 	  if (!bitmap_bit_p (holes, curr->first))
2061 	    {
2062 	      valid = false;
2063 	      break;
2064 	    }
2065 	  else
2066 	    bitmap_clear_bit (holes, curr->first);
2067 	}
2068       if (valid && bitmap_empty_p (holes))
2069 	valid_vecs.quick_push (cand_vec);
2070       sbitmap_free (holes);
2071     }
2072 
2073   /* At least two VECTOR to combine.  */
2074   if (valid_vecs.length () <= 1)
2075     {
2076       cleanup_vinfo_map (v_info_map);
2077       return false;
2078     }
2079 
2080   valid_vecs.qsort (sort_by_mach_mode);
2081   /* Go through all candidates by machine mode order, query the mode_to_total
2082      to get the total number for each mode and skip the single one.  */
2083   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2084     {
2085       tree tvec = valid_vecs[i];
2086       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2087 
2088       /* Skip modes with only a single candidate.  */
2089       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2090 	continue;
2091 
2092       unsigned int idx, j;
2093       gimple *sum = NULL;
2094       tree sum_vec = tvec;
2095       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2096       v_info_elem *elem;
2097       tree vec_type = info_ptr->vec_type;
2098 
2099       /* Build the sum for all candidates with same mode.  */
2100       do
2101 	{
2102 	  sum = build_and_add_sum (vec_type, sum_vec,
2103 				   valid_vecs[i + 1], opcode);
2104 	  /* Update the operands only after build_and_add_sum,
2105 	     so that we don't have to repeat the placement algorithm
2106 	     of build_and_add_sum.  */
2107 	  if (sum_vec == tvec
2108 	      && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2109 	    {
2110 	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2111 	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2112 	      tree lhs = make_ssa_name (vec_type);
2113 	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2114 	      gimple_set_uid (g, gimple_uid (sum));
2115 	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2116 	      gimple_assign_set_rhs1 (sum, lhs);
2117 	      update_stmt (sum);
2118 	    }
2119 	  if (!useless_type_conversion_p (vec_type,
2120 					  TREE_TYPE (valid_vecs[i + 1])))
2121 	    {
2122 	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2123 	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2124 				 valid_vecs[i + 1]);
2125 	      tree lhs = make_ssa_name (vec_type);
2126 	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2127 	      gimple_set_uid (g, gimple_uid (sum));
2128 	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2129 	      gimple_assign_set_rhs2 (sum, lhs);
2130 	      update_stmt (sum);
2131 	    }
2132 	  sum_vec = gimple_get_lhs (sum);
2133 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2134 	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2135 	  /* Update those related ops of current candidate VECTOR.  */
2136 	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2137 	    {
2138 	      idx = elem->second;
2139 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2140 	      /* Set this then op definition will get DCEd later.  */
2141 	      gimple_set_visited (def, true);
2142 	      if (opcode == PLUS_EXPR
2143 		  || opcode == BIT_XOR_EXPR
2144 		  || opcode == BIT_IOR_EXPR)
2145 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2146 	      else if (opcode == MULT_EXPR)
2147 		(*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2148 	      else
2149 		{
2150 		  gcc_assert (opcode == BIT_AND_EXPR);
2151 		  (*ops)[idx]->op
2152 		    = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2153 		}
2154 	      (*ops)[idx]->rank = 0;
2155 	    }
2156 	  if (dump_file && (dump_flags & TDF_DETAILS))
2157 	    {
2158 	      fprintf (dump_file, "Generating addition -> ");
2159 	      print_gimple_stmt (dump_file, sum, 0);
2160 	    }
2161 	  i++;
2162 	}
2163       while ((i < valid_vecs.length () - 1)
2164 	     && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2165 
2166       /* Referring to first valid VECTOR with this mode, generate the
2167          BIT_FIELD_REF statements accordingly.  */
2168       info_ptr = *(v_info_map.get (tvec));
2169       gcc_assert (sum);
2170       tree elem_type = TREE_TYPE (vec_type);
2171       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2172 	{
2173 	  idx = elem->second;
2174 	  tree dst = make_ssa_name (elem_type);
2175 	  tree pos = bitsize_int (elem->first
2176 				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
2177 	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2178 			     TYPE_SIZE (elem_type), pos);
2179 	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2180 	  insert_stmt_after (gs, sum);
2181 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2182 	  /* Set this then op definition will get DCEd later.  */
2183 	  gimple_set_visited (def, true);
2184 	  (*ops)[idx]->op = gimple_assign_lhs (gs);
2185 	  (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2186 	  if (dump_file && (dump_flags & TDF_DETAILS))
2187 	    {
2188 	      fprintf (dump_file, "Generating bit_field_ref -> ");
2189 	      print_gimple_stmt (dump_file, gs, 0);
2190 	    }
2191 	}
2192     }
2193 
2194   if (dump_file && (dump_flags & TDF_DETAILS))
2195     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2196 
2197   cleanup_vinfo_map (v_info_map);
2198 
2199   return true;
2200 }
2201 
2202 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2203    expression, examine the other OPS to see if any of them are comparisons
2204    of the same values, which we may be able to combine or eliminate.
2205    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2206 
2207 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2208 eliminate_redundant_comparison (enum tree_code opcode,
2209 				vec<operand_entry *> *ops,
2210 				unsigned int currindex,
2211 				operand_entry *curr)
2212 {
2213   tree op1, op2;
2214   enum tree_code lcode, rcode;
2215   gimple *def1, *def2;
2216   int i;
2217   operand_entry *oe;
2218 
2219   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2220     return false;
2221 
2222   /* Check that CURR is a comparison.  */
2223   if (TREE_CODE (curr->op) != SSA_NAME)
2224     return false;
2225   def1 = SSA_NAME_DEF_STMT (curr->op);
2226   if (!is_gimple_assign (def1))
2227     return false;
2228   lcode = gimple_assign_rhs_code (def1);
2229   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2230     return false;
2231   op1 = gimple_assign_rhs1 (def1);
2232   op2 = gimple_assign_rhs2 (def1);
2233 
2234   /* Now look for a similar comparison in the remaining OPS.  */
2235   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2236     {
2237       tree t;
2238 
2239       if (TREE_CODE (oe->op) != SSA_NAME)
2240 	continue;
2241       def2 = SSA_NAME_DEF_STMT (oe->op);
2242       if (!is_gimple_assign (def2))
2243 	continue;
2244       rcode = gimple_assign_rhs_code (def2);
2245       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2246 	continue;
2247 
2248       /* If we got here, we have a match.  See if we can combine the
2249 	 two comparisons.  */
2250       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2251       if (opcode == BIT_IOR_EXPR)
2252 	t = maybe_fold_or_comparisons (type,
2253 				       lcode, op1, op2,
2254 				       rcode, gimple_assign_rhs1 (def2),
2255 				       gimple_assign_rhs2 (def2));
2256       else
2257 	t = maybe_fold_and_comparisons (type,
2258 					lcode, op1, op2,
2259 					rcode, gimple_assign_rhs1 (def2),
2260 					gimple_assign_rhs2 (def2));
2261       if (!t)
2262 	continue;
2263 
2264       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2265 	 always give us a boolean_type_node value back.  If the original
2266 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2267 	 we need to convert.  */
2268       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2269 	{
2270 	  if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2271 	    continue;
2272 	  t = fold_convert (TREE_TYPE (curr->op), t);
2273 	}
2274 
2275       if (TREE_CODE (t) != INTEGER_CST
2276 	  && !operand_equal_p (t, curr->op, 0))
2277 	{
2278 	  enum tree_code subcode;
2279 	  tree newop1, newop2;
2280 	  if (!COMPARISON_CLASS_P (t))
2281 	    continue;
2282 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2283 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2284 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2285 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2286 	    continue;
2287 	}
2288 
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290 	{
2291 	  fprintf (dump_file, "Equivalence: ");
2292 	  print_generic_expr (dump_file, curr->op);
2293 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
2294 	  print_generic_expr (dump_file, oe->op);
2295 	  fprintf (dump_file, " -> ");
2296 	  print_generic_expr (dump_file, t);
2297 	  fprintf (dump_file, "\n");
2298 	}
2299 
2300       /* Now we can delete oe, as it has been subsumed by the new combined
2301          expression t.  */
2302       ops->ordered_remove (i);
2303       reassociate_stats.ops_eliminated ++;
2304 
2305       /* If t is the same as curr->op, we're done.  Otherwise we must
2306 	 replace curr->op with t.  Special case is if we got a constant
2307 	 back, in which case we add it to the end instead of in place of
2308 	 the current entry.  */
2309       if (TREE_CODE (t) == INTEGER_CST)
2310 	{
2311 	  ops->ordered_remove (currindex);
2312 	  add_to_ops_vec (ops, t);
2313 	}
2314       else if (!operand_equal_p (t, curr->op, 0))
2315 	{
2316 	  gimple *sum;
2317 	  enum tree_code subcode;
2318 	  tree newop1;
2319 	  tree newop2;
2320 	  gcc_assert (COMPARISON_CLASS_P (t));
2321 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2322 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2323 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2324 	  gcc_checking_assert (is_gimple_val (newop1)
2325 			       && is_gimple_val (newop2));
2326 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2327 	  curr->op = gimple_get_lhs (sum);
2328 	}
2329       return true;
2330     }
2331 
2332   return false;
2333 }
2334 
2335 
2336 /* Transform repeated addition of same values into multiply with
2337    constant.  */
2338 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2339 transform_add_to_multiply (vec<operand_entry *> *ops)
2340 {
2341   operand_entry *oe;
2342   tree op = NULL_TREE;
2343   int j;
2344   int i, start = -1, end = 0, count = 0;
2345   auto_vec<std::pair <int, int> > indxs;
2346   bool changed = false;
2347 
2348   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2349       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2350 	  || !flag_unsafe_math_optimizations))
2351     return false;
2352 
2353   /* Look for repeated operands.  */
2354   FOR_EACH_VEC_ELT (*ops, i, oe)
2355     {
2356       if (start == -1)
2357 	{
2358 	  count = 1;
2359 	  op = oe->op;
2360 	  start = i;
2361 	}
2362       else if (operand_equal_p (oe->op, op, 0))
2363 	{
2364 	  count++;
2365 	  end = i;
2366 	}
2367       else
2368 	{
2369 	  if (count > 1)
2370 	    indxs.safe_push (std::make_pair (start, end));
2371 	  count = 1;
2372 	  op = oe->op;
2373 	  start = i;
2374 	}
2375     }
2376 
2377   if (count > 1)
2378     indxs.safe_push (std::make_pair (start, end));
2379 
2380   for (j = indxs.length () - 1; j >= 0; --j)
2381     {
2382       /* Convert repeated operand addition to multiplication.  */
2383       start = indxs[j].first;
2384       end = indxs[j].second;
2385       op = (*ops)[start]->op;
2386       count = end - start + 1;
2387       for (i = end; i >= start; --i)
2388 	ops->unordered_remove (i);
2389       tree tmp = make_ssa_name (TREE_TYPE (op));
2390       tree cst = build_int_cst (integer_type_node, count);
2391       gassign *mul_stmt
2392 	= gimple_build_assign (tmp, MULT_EXPR,
2393 			       op, fold_convert (TREE_TYPE (op), cst));
2394       gimple_set_visited (mul_stmt, true);
2395       add_to_ops_vec (ops, tmp, mul_stmt);
2396       changed = true;
2397     }
2398 
2399   return changed;
2400 }
2401 
2402 
2403 /* Perform various identities and other optimizations on the list of
2404    operand entries, stored in OPS.  The tree code for the binary
2405    operation between all the operands is OPCODE.  */
2406 
2407 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2408 optimize_ops_list (enum tree_code opcode,
2409 		   vec<operand_entry *> *ops)
2410 {
2411   unsigned int length = ops->length ();
2412   unsigned int i;
2413   operand_entry *oe;
2414   operand_entry *oelast = NULL;
2415   bool iterate = false;
2416 
2417   if (length == 1)
2418     return;
2419 
2420   oelast = ops->last ();
2421 
2422   /* If the last two are constants, pop the constants off, merge them
2423      and try the next two.  */
2424   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2425     {
2426       operand_entry *oelm1 = (*ops)[length - 2];
2427 
2428       if (oelm1->rank == 0
2429 	  && is_gimple_min_invariant (oelm1->op)
2430 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2431 				       TREE_TYPE (oelast->op)))
2432 	{
2433 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2434 				     oelm1->op, oelast->op);
2435 
2436 	  if (folded && is_gimple_min_invariant (folded))
2437 	    {
2438 	      if (dump_file && (dump_flags & TDF_DETAILS))
2439 		fprintf (dump_file, "Merging constants\n");
2440 
2441 	      ops->pop ();
2442 	      ops->pop ();
2443 
2444 	      add_to_ops_vec (ops, folded);
2445 	      reassociate_stats.constants_eliminated++;
2446 
2447 	      optimize_ops_list (opcode, ops);
2448 	      return;
2449 	    }
2450 	}
2451     }
2452 
2453   eliminate_using_constants (opcode, ops);
2454   oelast = NULL;
2455 
2456   for (i = 0; ops->iterate (i, &oe);)
2457     {
2458       bool done = false;
2459 
2460       if (eliminate_not_pairs (opcode, ops, i, oe))
2461 	return;
2462       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2463 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2464 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2465 	{
2466 	  if (done)
2467 	    return;
2468 	  iterate = true;
2469 	  oelast = NULL;
2470 	  continue;
2471 	}
2472       oelast = oe;
2473       i++;
2474     }
2475 
2476   if (iterate)
2477     optimize_ops_list (opcode, ops);
2478 }
2479 
2480 /* The following functions are subroutines to optimize_range_tests and allow
2481    it to try to change a logical combination of comparisons into a range
2482    test.
2483 
2484    For example, both
2485 	X == 2 || X == 5 || X == 3 || X == 4
2486    and
2487 	X >= 2 && X <= 5
2488    are converted to
2489 	(unsigned) (X - 2) <= 3
2490 
2491    For more information see comments above fold_test_range in fold-const.cc,
2492    this implementation is for GIMPLE.  */
2493 
2494 
2495 
2496 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP.  */
2497 
2498 void
dump_range_entry(FILE * file,struct range_entry * r,bool skip_exp)2499 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2500 {
2501   if (!skip_exp)
2502     print_generic_expr (file, r->exp);
2503   fprintf (file, " %c[", r->in_p ? '+' : '-');
2504   print_generic_expr (file, r->low);
2505   fputs (", ", file);
2506   print_generic_expr (file, r->high);
2507   fputc (']', file);
2508 }
2509 
2510 /* Dump the range entry R to STDERR.  */
2511 
2512 DEBUG_FUNCTION void
debug_range_entry(struct range_entry * r)2513 debug_range_entry (struct range_entry *r)
2514 {
2515   dump_range_entry (stderr, r, false);
2516   fputc ('\n', stderr);
2517 }
2518 
2519 /* This is similar to make_range in fold-const.cc, but on top of
2520    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2521    an SSA_NAME and STMT argument is ignored, otherwise STMT
2522    argument should be a GIMPLE_COND.  */
2523 
2524 void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2525 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2526 {
2527   int in_p;
2528   tree low, high;
2529   bool is_bool, strict_overflow_p;
2530 
2531   r->exp = NULL_TREE;
2532   r->in_p = false;
2533   r->strict_overflow_p = false;
2534   r->low = NULL_TREE;
2535   r->high = NULL_TREE;
2536   if (exp != NULL_TREE
2537       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2538     return;
2539 
2540   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2541      and see if we can refine the range.  Some of the cases below may not
2542      happen, but it doesn't seem worth worrying about this.  We "continue"
2543      the outer loop when we've changed something; otherwise we "break"
2544      the switch, which will "break" the while.  */
2545   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2546   high = low;
2547   in_p = 0;
2548   strict_overflow_p = false;
2549   is_bool = false;
2550   if (exp == NULL_TREE)
2551     is_bool = true;
2552   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2553     {
2554       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2555 	is_bool = true;
2556       else
2557 	return;
2558     }
2559   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2560     is_bool = true;
2561 
2562   while (1)
2563     {
2564       enum tree_code code;
2565       tree arg0, arg1, exp_type;
2566       tree nexp;
2567       location_t loc;
2568 
2569       if (exp != NULL_TREE)
2570 	{
2571 	  if (TREE_CODE (exp) != SSA_NAME
2572 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2573 	    break;
2574 
2575 	  stmt = SSA_NAME_DEF_STMT (exp);
2576 	  if (!is_gimple_assign (stmt))
2577 	    break;
2578 
2579 	  code = gimple_assign_rhs_code (stmt);
2580 	  arg0 = gimple_assign_rhs1 (stmt);
2581 	  arg1 = gimple_assign_rhs2 (stmt);
2582 	  exp_type = TREE_TYPE (exp);
2583 	}
2584       else
2585 	{
2586 	  code = gimple_cond_code (stmt);
2587 	  arg0 = gimple_cond_lhs (stmt);
2588 	  arg1 = gimple_cond_rhs (stmt);
2589 	  exp_type = boolean_type_node;
2590 	}
2591 
2592       if (TREE_CODE (arg0) != SSA_NAME
2593 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2594 	break;
2595       loc = gimple_location (stmt);
2596       switch (code)
2597 	{
2598 	case BIT_NOT_EXPR:
2599 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2600 	      /* Ensure the range is either +[-,0], +[0,0],
2601 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2602 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2603 		 or similar expression of unconditional true or
2604 		 false, it should not be negated.  */
2605 	      && ((high && integer_zerop (high))
2606 		  || (low && integer_onep (low))))
2607 	    {
2608 	      in_p = !in_p;
2609 	      exp = arg0;
2610 	      continue;
2611 	    }
2612 	  break;
2613 	case SSA_NAME:
2614 	  exp = arg0;
2615 	  continue;
2616 	CASE_CONVERT:
2617 	  if (is_bool)
2618 	    {
2619 	      if ((TYPE_PRECISION (exp_type) == 1
2620 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2621 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2622 		return;
2623 	    }
2624 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2625 	    {
2626 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2627 		is_bool = true;
2628 	      else
2629 		return;
2630 	    }
2631 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2632 	    is_bool = true;
2633 	  goto do_default;
2634 	case EQ_EXPR:
2635 	case NE_EXPR:
2636 	case LT_EXPR:
2637 	case LE_EXPR:
2638 	case GE_EXPR:
2639 	case GT_EXPR:
2640 	  is_bool = true;
2641 	  /* FALLTHRU */
2642 	default:
2643 	  if (!is_bool)
2644 	    return;
2645 	do_default:
2646 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2647 				  &low, &high, &in_p,
2648 				  &strict_overflow_p);
2649 	  if (nexp != NULL_TREE)
2650 	    {
2651 	      exp = nexp;
2652 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2653 	      continue;
2654 	    }
2655 	  break;
2656 	}
2657       break;
2658     }
2659   if (is_bool)
2660     {
2661       r->exp = exp;
2662       r->in_p = in_p;
2663       r->low = low;
2664       r->high = high;
2665       r->strict_overflow_p = strict_overflow_p;
2666     }
2667 }
2668 
2669 /* Comparison function for qsort.  Sort entries
2670    without SSA_NAME exp first, then with SSA_NAMEs sorted
2671    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2672    by increasing ->low and if ->low is the same, by increasing
2673    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2674    maximum.  */
2675 
2676 static int
range_entry_cmp(const void * a,const void * b)2677 range_entry_cmp (const void *a, const void *b)
2678 {
2679   const struct range_entry *p = (const struct range_entry *) a;
2680   const struct range_entry *q = (const struct range_entry *) b;
2681 
2682   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2683     {
2684       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2685 	{
2686 	  /* Group range_entries for the same SSA_NAME together.  */
2687 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2688 	    return -1;
2689 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2690 	    return 1;
2691 	  /* If ->low is different, NULL low goes first, then by
2692 	     ascending low.  */
2693 	  if (p->low != NULL_TREE)
2694 	    {
2695 	      if (q->low != NULL_TREE)
2696 		{
2697 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2698 					  p->low, q->low);
2699 		  if (tem && integer_onep (tem))
2700 		    return -1;
2701 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2702 				     p->low, q->low);
2703 		  if (tem && integer_onep (tem))
2704 		    return 1;
2705 		}
2706 	      else
2707 		return 1;
2708 	    }
2709 	  else if (q->low != NULL_TREE)
2710 	    return -1;
2711 	  /* If ->high is different, NULL high goes last, before that by
2712 	     ascending high.  */
2713 	  if (p->high != NULL_TREE)
2714 	    {
2715 	      if (q->high != NULL_TREE)
2716 		{
2717 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2718 					  p->high, q->high);
2719 		  if (tem && integer_onep (tem))
2720 		    return -1;
2721 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2722 				     p->high, q->high);
2723 		  if (tem && integer_onep (tem))
2724 		    return 1;
2725 		}
2726 	      else
2727 		return -1;
2728 	    }
2729 	  else if (q->high != NULL_TREE)
2730 	    return 1;
2731 	  /* If both ranges are the same, sort below by ascending idx.  */
2732 	}
2733       else
2734 	return 1;
2735     }
2736   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2737     return -1;
2738 
2739   if (p->idx < q->idx)
2740     return -1;
2741   else
2742     {
2743       gcc_checking_assert (p->idx > q->idx);
2744       return 1;
2745     }
2746 }
2747 
2748 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2749    insert needed statements BEFORE or after GSI.  */
2750 
2751 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2752 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2753 {
2754   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2755   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2756   if (TREE_CODE (ret) != SSA_NAME)
2757     {
2758       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2759       if (before)
2760 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2761       else
2762 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2763       ret = gimple_assign_lhs (g);
2764     }
2765   return ret;
2766 }
2767 
2768 /* Helper routine of optimize_range_test.
2769    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2770    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2771    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2772    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2773    an array of COUNT pointers to other ranges.  Return
2774    true if the range merge has been successful.
2775    If OPCODE is ERROR_MARK, this is called from within
2776    maybe_optimize_range_tests and is performing inter-bb range optimization.
2777    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2778    oe->rank.  */
2779 
2780 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2781 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2782 		   struct range_entry **otherrangep,
2783 		   unsigned int count, enum tree_code opcode,
2784 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2785 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2786 {
2787   unsigned int idx = range->idx;
2788   struct range_entry *swap_with = NULL;
2789   basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2790   if (opcode == ERROR_MARK)
2791     {
2792       /* For inter-bb range test optimization, pick from the range tests
2793 	 the one which is tested in the earliest condition (one dominating
2794 	 the others), because otherwise there could be some UB (e.g. signed
2795 	 overflow) in following bbs that we'd expose which wasn't there in
2796 	 the original program.  See PR104196.  */
2797       basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2798       basic_block range_bb = orig_range_bb;
2799       for (unsigned int i = 0; i < count; i++)
2800 	{
2801 	  struct range_entry *this_range;
2802 	  if (otherrange)
2803 	    this_range = otherrange + i;
2804 	  else
2805 	    this_range = otherrangep[i];
2806 	  operand_entry *oe = (*ops)[this_range->idx];
2807 	  basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2808 	  if (range_bb != this_bb
2809 	      && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2810 	    {
2811 	      swap_with = this_range;
2812 	      range_bb = this_bb;
2813 	      idx = this_range->idx;
2814 	    }
2815 	}
2816       /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2817 	 only defined in later blocks.  In this case we can't move the
2818 	 merged comparison earlier, so instead check if there are any stmts
2819 	 that might trigger signed integer overflow in between and rewrite
2820 	 them.  But only after we check if the optimization is possible.  */
2821       if (seq && swap_with)
2822 	{
2823 	  rewrite_bb_first = range_bb;
2824 	  rewrite_bb_last = orig_range_bb;
2825 	  idx = range->idx;
2826 	  swap_with = NULL;
2827 	}
2828     }
2829   operand_entry *oe = (*ops)[idx];
2830   tree op = oe->op;
2831   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2832 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2833   location_t loc = gimple_location (stmt);
2834   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2835   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2836 				in_p, low, high);
2837   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2838   gimple_stmt_iterator gsi;
2839   unsigned int i, uid;
2840 
2841   if (tem == NULL_TREE)
2842     return false;
2843 
2844   /* If op is default def SSA_NAME, there is no place to insert the
2845      new comparison.  Give up, unless we can use OP itself as the
2846      range test.  */
2847   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2848     {
2849       if (op == range->exp
2850 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2851 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2852 	  && (op == tem
2853 	      || (TREE_CODE (tem) == EQ_EXPR
2854 		  && TREE_OPERAND (tem, 0) == op
2855 		  && integer_onep (TREE_OPERAND (tem, 1))))
2856 	  && opcode != BIT_IOR_EXPR
2857 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2858 	{
2859 	  stmt = NULL;
2860 	  tem = op;
2861 	}
2862       else
2863 	return false;
2864     }
2865 
2866   if (swap_with)
2867     std::swap (range->idx, swap_with->idx);
2868 
2869   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2870     warning_at (loc, OPT_Wstrict_overflow,
2871 		"assuming signed overflow does not occur "
2872 		"when simplifying range test");
2873 
2874   if (dump_file && (dump_flags & TDF_DETAILS))
2875     {
2876       struct range_entry *r;
2877       fprintf (dump_file, "Optimizing range tests ");
2878       dump_range_entry (dump_file, range, false);
2879       for (i = 0; i < count; i++)
2880 	{
2881 	  if (otherrange)
2882 	    r = otherrange + i;
2883 	  else
2884 	    r = otherrangep[i];
2885 	  if (r->exp
2886 	      && r->exp != range->exp
2887 	      && TREE_CODE (r->exp) == SSA_NAME)
2888 	    {
2889 	      fprintf (dump_file, " and ");
2890 	      dump_range_entry (dump_file, r, false);
2891 	    }
2892 	  else
2893 	    {
2894 	      fprintf (dump_file, " and");
2895 	      dump_range_entry (dump_file, r, true);
2896 	    }
2897 	}
2898       fprintf (dump_file, "\n into ");
2899       print_generic_expr (dump_file, tem);
2900       fprintf (dump_file, "\n");
2901     }
2902 
2903   /* In inter-bb range optimization mode, if we have a seq, we can't
2904      move the merged comparison to the earliest bb from the comparisons
2905      being replaced, so instead rewrite stmts that could trigger signed
2906      integer overflow.  */
2907   for (basic_block bb = rewrite_bb_last;
2908        bb != rewrite_bb_first; bb = single_pred (bb))
2909     for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2910 	 !gsi_end_p (gsi); gsi_next (&gsi))
2911       {
2912 	gimple *stmt = gsi_stmt (gsi);
2913 	if (is_gimple_assign (stmt))
2914 	  if (tree lhs = gimple_assign_lhs (stmt))
2915 	    if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2916 		 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2917 		&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2918 	      {
2919 		enum tree_code code = gimple_assign_rhs_code (stmt);
2920 		if (arith_code_with_undefined_signed_overflow (code))
2921 		  {
2922 		    gimple_stmt_iterator gsip = gsi;
2923 		    gimple_stmt_iterator gsin = gsi;
2924 		    gsi_prev (&gsip);
2925 		    gsi_next (&gsin);
2926 		    rewrite_to_defined_overflow (stmt, true);
2927 		    unsigned uid = gimple_uid (stmt);
2928 		    if (gsi_end_p (gsip))
2929 		      gsip = gsi_after_labels (bb);
2930 		    else
2931 		      gsi_next (&gsip);
2932 		    for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2933 			 gsi_next (&gsip))
2934 		      gimple_set_uid (gsi_stmt (gsip), uid);
2935 		  }
2936 	      }
2937       }
2938 
2939   if (opcode == BIT_IOR_EXPR
2940       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2941     tem = invert_truthvalue_loc (loc, tem);
2942 
2943   tem = fold_convert_loc (loc, optype, tem);
2944   if (stmt)
2945     {
2946       gsi = gsi_for_stmt (stmt);
2947       uid = gimple_uid (stmt);
2948     }
2949   else
2950     {
2951       gsi = gsi_none ();
2952       uid = 0;
2953     }
2954   if (stmt == NULL)
2955     gcc_checking_assert (tem == op);
2956   /* In rare cases range->exp can be equal to lhs of stmt.
2957      In that case we have to insert after the stmt rather then before
2958      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2959   else if (op != range->exp)
2960     {
2961       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2962       tem = force_into_ssa_name (&gsi, tem, true);
2963       gsi_prev (&gsi);
2964     }
2965   else if (gimple_code (stmt) != GIMPLE_PHI)
2966     {
2967       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2968       tem = force_into_ssa_name (&gsi, tem, false);
2969     }
2970   else
2971     {
2972       gsi = gsi_after_labels (gimple_bb (stmt));
2973       if (!gsi_end_p (gsi))
2974 	uid = gimple_uid (gsi_stmt (gsi));
2975       else
2976 	{
2977 	  gsi = gsi_start_bb (gimple_bb (stmt));
2978 	  uid = 1;
2979 	  while (!gsi_end_p (gsi))
2980 	    {
2981 	      uid = gimple_uid (gsi_stmt (gsi));
2982 	      gsi_next (&gsi);
2983 	    }
2984 	}
2985       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2986       tem = force_into_ssa_name (&gsi, tem, true);
2987       if (gsi_end_p (gsi))
2988 	gsi = gsi_last_bb (gimple_bb (stmt));
2989       else
2990 	gsi_prev (&gsi);
2991     }
2992   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2993     if (gimple_uid (gsi_stmt (gsi)))
2994       break;
2995     else
2996       gimple_set_uid (gsi_stmt (gsi), uid);
2997 
2998   oe->op = tem;
2999   range->exp = exp;
3000   range->low = low;
3001   range->high = high;
3002   range->in_p = in_p;
3003   range->strict_overflow_p = false;
3004 
3005   for (i = 0; i < count; i++)
3006     {
3007       if (otherrange)
3008 	range = otherrange + i;
3009       else
3010 	range = otherrangep[i];
3011       oe = (*ops)[range->idx];
3012       /* Now change all the other range test immediate uses, so that
3013 	 those tests will be optimized away.  */
3014       if (opcode == ERROR_MARK)
3015 	{
3016 	  if (oe->op)
3017 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3018 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3019 	  else
3020 	    oe->op = (oe->rank == BIT_IOR_EXPR
3021 		      ? boolean_false_node : boolean_true_node);
3022 	}
3023       else
3024 	oe->op = error_mark_node;
3025       range->exp = NULL_TREE;
3026       range->low = NULL_TREE;
3027       range->high = NULL_TREE;
3028     }
3029   return true;
3030 }
3031 
3032 /* Optimize X == CST1 || X == CST2
3033    if popcount (CST1 ^ CST2) == 1 into
3034    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3035    Similarly for ranges.  E.g.
3036    X != 2 && X != 3 && X != 10 && X != 11
3037    will be transformed by the previous optimization into
3038    !((X - 2U) <= 1U || (X - 10U) <= 1U)
3039    and this loop can transform that into
3040    !(((X & ~8) - 2U) <= 1U).  */
3041 
3042 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)3043 optimize_range_tests_xor (enum tree_code opcode, tree type,
3044 			  tree lowi, tree lowj, tree highi, tree highj,
3045 			  vec<operand_entry *> *ops,
3046 			  struct range_entry *rangei,
3047 			  struct range_entry *rangej)
3048 {
3049   tree lowxor, highxor, tem, exp;
3050   /* Check lowi ^ lowj == highi ^ highj and
3051      popcount (lowi ^ lowj) == 1.  */
3052   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3053   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3054     return false;
3055   if (!integer_pow2p (lowxor))
3056     return false;
3057   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3058   if (!tree_int_cst_equal (lowxor, highxor))
3059     return false;
3060 
3061   exp = rangei->exp;
3062   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3063   int prec = GET_MODE_PRECISION (mode);
3064   if (TYPE_PRECISION (type) < prec
3065       || (wi::to_wide (TYPE_MIN_VALUE (type))
3066 	  != wi::min_value (prec, TYPE_SIGN (type)))
3067       || (wi::to_wide (TYPE_MAX_VALUE (type))
3068 	  != wi::max_value (prec, TYPE_SIGN (type))))
3069     {
3070       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3071       exp = fold_convert (type, exp);
3072       lowxor = fold_convert (type, lowxor);
3073       lowi = fold_convert (type, lowi);
3074       highi = fold_convert (type, highi);
3075     }
3076   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3077   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3078   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3079   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3080   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3081 			 NULL, rangei->in_p, lowj, highj,
3082 			 rangei->strict_overflow_p
3083 			 || rangej->strict_overflow_p))
3084     return true;
3085   return false;
3086 }
3087 
3088 /* Optimize X == CST1 || X == CST2
3089    if popcount (CST2 - CST1) == 1 into
3090    ((X - CST1) & ~(CST2 - CST1)) == 0.
3091    Similarly for ranges.  E.g.
3092    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3093    || X == 75 || X == 45
3094    will be transformed by the previous optimization into
3095    (X - 43U) <= 3U || (X - 75U) <= 3U
3096    and this loop can transform that into
3097    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
3098 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)3099 optimize_range_tests_diff (enum tree_code opcode, tree type,
3100 			   tree lowi, tree lowj, tree highi, tree highj,
3101 			   vec<operand_entry *> *ops,
3102 			   struct range_entry *rangei,
3103 			   struct range_entry *rangej)
3104 {
3105   tree tem1, tem2, mask;
3106   /* Check highi - lowi == highj - lowj.  */
3107   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3108   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3109     return false;
3110   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3111   if (!tree_int_cst_equal (tem1, tem2))
3112     return false;
3113   /* Check popcount (lowj - lowi) == 1.  */
3114   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3115   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3116     return false;
3117   if (!integer_pow2p (tem1))
3118     return false;
3119 
3120   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3121   int prec = GET_MODE_PRECISION (mode);
3122   if (TYPE_PRECISION (type) < prec
3123       || (wi::to_wide (TYPE_MIN_VALUE (type))
3124 	  != wi::min_value (prec, TYPE_SIGN (type)))
3125       || (wi::to_wide (TYPE_MAX_VALUE (type))
3126 	  != wi::max_value (prec, TYPE_SIGN (type))))
3127     type = build_nonstandard_integer_type (prec, 1);
3128   else
3129     type = unsigned_type_for (type);
3130   tem1 = fold_convert (type, tem1);
3131   tem2 = fold_convert (type, tem2);
3132   lowi = fold_convert (type, lowi);
3133   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3134   tem1 = fold_build2 (MINUS_EXPR, type,
3135 		      fold_convert (type, rangei->exp), lowi);
3136   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3137   lowj = build_int_cst (type, 0);
3138   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3139 			 NULL, rangei->in_p, lowj, tem2,
3140 			 rangei->strict_overflow_p
3141 			 || rangej->strict_overflow_p))
3142     return true;
3143   return false;
3144 }
3145 
3146 /* It does some common checks for function optimize_range_tests_xor and
3147    optimize_range_tests_diff.
3148    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3149    Else it calls optimize_range_tests_diff.  */
3150 
3151 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)3152 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3153 			bool optimize_xor, vec<operand_entry *> *ops,
3154 			struct range_entry *ranges)
3155 {
3156   int i, j;
3157   bool any_changes = false;
3158   for (i = first; i < length; i++)
3159     {
3160       tree lowi, highi, lowj, highj, type, tem;
3161 
3162       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3163 	continue;
3164       type = TREE_TYPE (ranges[i].exp);
3165       if (!INTEGRAL_TYPE_P (type))
3166 	continue;
3167       lowi = ranges[i].low;
3168       if (lowi == NULL_TREE)
3169 	lowi = TYPE_MIN_VALUE (type);
3170       highi = ranges[i].high;
3171       if (highi == NULL_TREE)
3172 	continue;
3173       for (j = i + 1; j < length && j < i + 64; j++)
3174 	{
3175 	  bool changes;
3176 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3177 	    continue;
3178 	  lowj = ranges[j].low;
3179 	  if (lowj == NULL_TREE)
3180 	    continue;
3181 	  highj = ranges[j].high;
3182 	  if (highj == NULL_TREE)
3183 	    highj = TYPE_MAX_VALUE (type);
3184 	  /* Check lowj > highi.  */
3185 	  tem = fold_binary (GT_EXPR, boolean_type_node,
3186 			     lowj, highi);
3187 	  if (tem == NULL_TREE || !integer_onep (tem))
3188 	    continue;
3189 	  if (optimize_xor)
3190 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3191 						highi, highj, ops,
3192 						ranges + i, ranges + j);
3193 	  else
3194 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3195 						 highi, highj, ops,
3196 						 ranges + i, ranges + j);
3197 	  if (changes)
3198 	    {
3199 	      any_changes = true;
3200 	      break;
3201 	    }
3202 	}
3203     }
3204   return any_changes;
3205 }
3206 
3207 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3208    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3209    EXP on success, NULL otherwise.  */
3210 
3211 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3212 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3213 		       wide_int *mask, tree *totallowp)
3214 {
3215   tree tem = int_const_binop (MINUS_EXPR, high, low);
3216   if (tem == NULL_TREE
3217       || TREE_CODE (tem) != INTEGER_CST
3218       || TREE_OVERFLOW (tem)
3219       || tree_int_cst_sgn (tem) == -1
3220       || compare_tree_int (tem, prec) != -1)
3221     return NULL_TREE;
3222 
3223   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3224   *mask = wi::shifted_mask (0, max, false, prec);
3225   if (TREE_CODE (exp) == BIT_AND_EXPR
3226       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3227     {
3228       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3229       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3230       if (wi::popcount (msk) == 1
3231 	  && wi::ltu_p (msk, prec - max))
3232 	{
3233 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3234 	  max += msk.to_uhwi ();
3235 	  exp = TREE_OPERAND (exp, 0);
3236 	  if (integer_zerop (low)
3237 	      && TREE_CODE (exp) == PLUS_EXPR
3238 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3239 	    {
3240 	      tree ret = TREE_OPERAND (exp, 0);
3241 	      STRIP_NOPS (ret);
3242 	      widest_int bias
3243 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3244 				     TYPE_PRECISION (TREE_TYPE (low))));
3245 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3246 	      if (totallowp)
3247 		{
3248 		  *totallowp = tbias;
3249 		  return ret;
3250 		}
3251 	      else if (!tree_int_cst_lt (totallow, tbias))
3252 		return NULL_TREE;
3253 	      bias = wi::to_widest (tbias);
3254 	      bias -= wi::to_widest (totallow);
3255 	      if (bias >= 0 && bias < prec - max)
3256 		{
3257 		  *mask = wi::lshift (*mask, bias);
3258 		  return ret;
3259 		}
3260 	    }
3261 	}
3262     }
3263   if (totallowp)
3264     return exp;
3265   if (!tree_int_cst_lt (totallow, low))
3266     return exp;
3267   tem = int_const_binop (MINUS_EXPR, low, totallow);
3268   if (tem == NULL_TREE
3269       || TREE_CODE (tem) != INTEGER_CST
3270       || TREE_OVERFLOW (tem)
3271       || compare_tree_int (tem, prec - max) == 1)
3272     return NULL_TREE;
3273 
3274   *mask = wi::lshift (*mask, wi::to_widest (tem));
3275   return exp;
3276 }
3277 
3278 /* Attempt to optimize small range tests using bit test.
3279    E.g.
3280    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3281    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3282    has been by earlier optimizations optimized into:
3283    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3284    As all the 43 through 82 range is less than 64 numbers,
3285    for 64-bit word targets optimize that into:
3286    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3287 
3288 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3289 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3290 				  vec<operand_entry *> *ops,
3291 				  struct range_entry *ranges)
3292 {
3293   int i, j;
3294   bool any_changes = false;
3295   int prec = GET_MODE_BITSIZE (word_mode);
3296   auto_vec<struct range_entry *, 64> candidates;
3297 
3298   for (i = first; i < length - 1; i++)
3299     {
3300       tree lowi, highi, lowj, highj, type;
3301 
3302       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3303 	continue;
3304       type = TREE_TYPE (ranges[i].exp);
3305       if (!INTEGRAL_TYPE_P (type))
3306 	continue;
3307       lowi = ranges[i].low;
3308       if (lowi == NULL_TREE)
3309 	lowi = TYPE_MIN_VALUE (type);
3310       highi = ranges[i].high;
3311       if (highi == NULL_TREE)
3312 	continue;
3313       wide_int mask;
3314       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3315 					highi, &mask, &lowi);
3316       if (exp == NULL_TREE)
3317 	continue;
3318       bool strict_overflow_p = ranges[i].strict_overflow_p;
3319       candidates.truncate (0);
3320       int end = MIN (i + 64, length);
3321       for (j = i + 1; j < end; j++)
3322 	{
3323 	  tree exp2;
3324 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3325 	    continue;
3326 	  if (ranges[j].exp == exp)
3327 	    ;
3328 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3329 	    {
3330 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
3331 	      if (exp2 == exp)
3332 		;
3333 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
3334 		{
3335 		  exp2 = TREE_OPERAND (exp2, 0);
3336 		  STRIP_NOPS (exp2);
3337 		  if (exp2 != exp)
3338 		    continue;
3339 		}
3340 	      else
3341 		continue;
3342 	    }
3343 	  else
3344 	    continue;
3345 	  lowj = ranges[j].low;
3346 	  if (lowj == NULL_TREE)
3347 	    continue;
3348 	  highj = ranges[j].high;
3349 	  if (highj == NULL_TREE)
3350 	    highj = TYPE_MAX_VALUE (type);
3351 	  wide_int mask2;
3352 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3353 					highj, &mask2, NULL);
3354 	  if (exp2 != exp)
3355 	    continue;
3356 	  mask |= mask2;
3357 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3358 	  candidates.safe_push (&ranges[j]);
3359 	}
3360 
3361       /* If every possible relative value of the expression is a valid shift
3362 	 amount, then we can merge the entry test in the bit test.  In this
3363 	 case, if we would need otherwise 2 or more comparisons, then use
3364 	 the bit test; in the other cases, the threshold is 3 comparisons.  */
3365       bool entry_test_needed;
3366       value_range r;
3367       if (TREE_CODE (exp) == SSA_NAME
3368 	  && get_range_query (cfun)->range_of_expr (r, exp)
3369 	  && r.kind () == VR_RANGE
3370 	  && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3371 	{
3372 	  wide_int min = r.lower_bound ();
3373 	  wide_int ilowi = wi::to_wide (lowi);
3374 	  if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3375 	    {
3376 	      lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3377 	      mask = wi::lshift (mask, ilowi - min);
3378 	    }
3379 	  else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3380 	    {
3381 	      lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3382 	      mask = wi::lrshift (mask, min - ilowi);
3383 	    }
3384 	  entry_test_needed = false;
3385 	}
3386       else
3387 	entry_test_needed = true;
3388       if (candidates.length () >= (entry_test_needed ? 2 : 1))
3389 	{
3390 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
3391 					wi::to_widest (lowi)
3392 					+ prec - 1 - wi::clz (mask));
3393 	  operand_entry *oe = (*ops)[ranges[i].idx];
3394 	  tree op = oe->op;
3395 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3396 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3397 	  location_t loc = gimple_location (stmt);
3398 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3399 
3400 	  /* See if it isn't cheaper to pretend the minimum value of the
3401 	     range is 0, if maximum value is small enough.
3402 	     We can avoid then subtraction of the minimum value, but the
3403 	     mask constant could be perhaps more expensive.  */
3404 	  if (compare_tree_int (lowi, 0) > 0
3405 	      && compare_tree_int (high, prec) < 0)
3406 	    {
3407 	      int cost_diff;
3408 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
3409 	      rtx reg = gen_raw_REG (word_mode, 10000);
3410 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3411 	      cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3412 						      GEN_INT (-m)),
3413 					word_mode, speed_p);
3414 	      rtx r = immed_wide_int_const (mask, word_mode);
3415 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3416 					 word_mode, speed_p);
3417 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3418 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3419 					 word_mode, speed_p);
3420 	      if (cost_diff > 0)
3421 		{
3422 		  mask = wi::lshift (mask, m);
3423 		  lowi = build_zero_cst (TREE_TYPE (lowi));
3424 		}
3425 	    }
3426 
3427 	  tree tem;
3428 	  if (entry_test_needed)
3429 	    {
3430 	      tem = build_range_check (loc, optype, unshare_expr (exp),
3431 				       false, lowi, high);
3432 	      if (tem == NULL_TREE || is_gimple_val (tem))
3433 		continue;
3434 	    }
3435 	  else
3436 	    tem = NULL_TREE;
3437 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
3438 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3439 				 fold_convert_loc (loc, etype, exp),
3440 				 fold_convert_loc (loc, etype, lowi));
3441 	  exp = fold_convert_loc (loc, integer_type_node, exp);
3442 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3443 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3444 				 build_int_cst (word_type, 1), exp);
3445 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3446 				 wide_int_to_tree (word_type, mask));
3447 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3448 				 build_zero_cst (word_type));
3449 	  if (is_gimple_val (exp))
3450 	    continue;
3451 
3452 	  /* The shift might have undefined behavior if TEM is true,
3453 	     but reassociate_bb isn't prepared to have basic blocks
3454 	     split when it is running.  So, temporarily emit a code
3455 	     with BIT_IOR_EXPR instead of &&, and fix it up in
3456 	     branch_fixup.  */
3457 	  gimple_seq seq = NULL;
3458 	  if (tem)
3459 	    {
3460 	      tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3461 	      gcc_assert (TREE_CODE (tem) == SSA_NAME);
3462 	      gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3463 	    }
3464 	  gimple_seq seq2;
3465 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3466 	  gimple_seq_add_seq_without_update (&seq, seq2);
3467 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
3468 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3469 	  if (tem)
3470 	    {
3471 	      gimple *g = gimple_build_assign (make_ssa_name (optype),
3472 					       BIT_IOR_EXPR, tem, exp);
3473 	      gimple_set_location (g, loc);
3474 	      gimple_seq_add_stmt_without_update (&seq, g);
3475 	      exp = gimple_assign_lhs (g);
3476 	    }
3477 	  tree val = build_zero_cst (optype);
3478 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
3479 				 candidates.length (), opcode, ops, exp,
3480 				 seq, false, val, val, strict_overflow_p))
3481 	    {
3482 	      any_changes = true;
3483 	      if (tem)
3484 		reassoc_branch_fixups.safe_push (tem);
3485 	    }
3486 	  else
3487 	    gimple_seq_discard (seq);
3488 	}
3489     }
3490   return any_changes;
3491 }
3492 
3493 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3494    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3495    Also, handle x < C && y < C && z < C where C is power of two as
3496    (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
3497    as (x | y | z) < 0.  */
3498 
3499 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3500 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3501 				  vec<operand_entry *> *ops,
3502 				  struct range_entry *ranges)
3503 {
3504   int i;
3505   unsigned int b;
3506   bool any_changes = false;
3507   auto_vec<int, 128> buckets;
3508   auto_vec<int, 32> chains;
3509   auto_vec<struct range_entry *, 32> candidates;
3510 
3511   for (i = first; i < length; i++)
3512     {
3513       int idx;
3514 
3515       if (ranges[i].exp == NULL_TREE
3516 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3517 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3518 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3519 	continue;
3520 
3521       if (ranges[i].low != NULL_TREE
3522 	  && ranges[i].high != NULL_TREE
3523 	  && ranges[i].in_p
3524 	  && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3525 	{
3526 	  idx = !integer_zerop (ranges[i].low);
3527 	  if (idx && !integer_all_onesp (ranges[i].low))
3528 	    continue;
3529 	}
3530       else if (ranges[i].high != NULL_TREE
3531 	       && TREE_CODE (ranges[i].high) == INTEGER_CST
3532 	       && ranges[i].in_p)
3533 	{
3534 	  wide_int w = wi::to_wide (ranges[i].high);
3535 	  int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3536 	  int l = wi::clz (w);
3537 	  idx = 2;
3538 	  if (l <= 0
3539 	      || l >= prec
3540 	      || w != wi::mask (prec - l, false, prec))
3541 	    continue;
3542 	  if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3543 		 && ranges[i].low == NULL_TREE)
3544 		|| (ranges[i].low
3545 		    && integer_zerop (ranges[i].low))))
3546 	    continue;
3547 	}
3548       else if (ranges[i].high == NULL_TREE
3549 	       && ranges[i].low != NULL_TREE
3550 	       /* Perform this optimization only in the last
3551 		  reassoc pass, as it interferes with the reassociation
3552 		  itself or could also with VRP etc. which might not
3553 		  be able to virtually undo the optimization.  */
3554 	       && !reassoc_insert_powi_p
3555 	       && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3556 	       && integer_zerop (ranges[i].low))
3557 	idx = 3;
3558       else
3559 	continue;
3560 
3561       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3562       if (buckets.length () <= b)
3563 	buckets.safe_grow_cleared (b + 1, true);
3564       if (chains.length () <= (unsigned) i)
3565 	chains.safe_grow (i + 1, true);
3566       chains[i] = buckets[b];
3567       buckets[b] = i + 1;
3568     }
3569 
3570   FOR_EACH_VEC_ELT (buckets, b, i)
3571     if (i && chains[i - 1])
3572       {
3573 	int j, k = i;
3574 	if ((b % 4) == 2)
3575 	  {
3576 	    /* When ranges[X - 1].high + 1 is a power of two,
3577 	       we need to process the same bucket up to
3578 	       precision - 1 times, each time split the entries
3579 	       with the same high bound into one chain and the
3580 	       rest into another one to be processed later.  */
3581 	    int this_prev = i;
3582 	    int other_prev = 0;
3583 	    for (j = chains[i - 1]; j; j = chains[j - 1])
3584 	      {
3585 		if (tree_int_cst_equal (ranges[i - 1].high,
3586 					ranges[j - 1].high))
3587 		  {
3588 		    chains[this_prev - 1] = j;
3589 		    this_prev = j;
3590 		  }
3591 		else if (other_prev == 0)
3592 		  {
3593 		    buckets[b] = j;
3594 		    other_prev = j;
3595 		  }
3596 		else
3597 		  {
3598 		    chains[other_prev - 1] = j;
3599 		    other_prev = j;
3600 		  }
3601 	      }
3602 	    chains[this_prev - 1] = 0;
3603 	    if (other_prev)
3604 	      chains[other_prev - 1] = 0;
3605 	    if (chains[i - 1] == 0)
3606 	      {
3607 		if (other_prev)
3608 		  b--;
3609 		continue;
3610 	      }
3611 	  }
3612 	for (j = chains[i - 1]; j; j = chains[j - 1])
3613 	  {
3614 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3615 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3616 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
3617 	      k = j;
3618 	  }
3619 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
3620 	tree type2 = NULL_TREE;
3621 	bool strict_overflow_p = false;
3622 	candidates.truncate (0);
3623 	for (j = i; j; j = chains[j - 1])
3624 	  {
3625 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3626 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3627 	    if ((b % 4) == 3)
3628 	      {
3629 		/* For the signed < 0 cases, the types should be
3630 		   really compatible (all signed with the same precision,
3631 		   instead put ranges that have different in_p from
3632 		   k first.  */
3633 		if (!useless_type_conversion_p (type1, type))
3634 		  continue;
3635 		if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3636 		  candidates.safe_push (&ranges[j - 1]);
3637 		type2 = type1;
3638 		continue;
3639 	      }
3640 	    if (j == k
3641 		|| useless_type_conversion_p (type1, type))
3642 	      ;
3643 	    else if (type2 == NULL_TREE
3644 		     || useless_type_conversion_p (type2, type))
3645 	      {
3646 		if (type2 == NULL_TREE)
3647 		  type2 = type;
3648 		candidates.safe_push (&ranges[j - 1]);
3649 	      }
3650 	  }
3651 	unsigned l = candidates.length ();
3652 	for (j = i; j; j = chains[j - 1])
3653 	  {
3654 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3655 	    if (j == k)
3656 	      continue;
3657 	    if ((b % 4) == 3)
3658 	      {
3659 		if (!useless_type_conversion_p (type1, type))
3660 		  continue;
3661 		if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3662 		  candidates.safe_push (&ranges[j - 1]);
3663 		continue;
3664 	      }
3665 	    if (useless_type_conversion_p (type1, type))
3666 	      ;
3667 	    else if (type2 == NULL_TREE
3668 		     || useless_type_conversion_p (type2, type))
3669 	      continue;
3670 	    candidates.safe_push (&ranges[j - 1]);
3671 	  }
3672 	gimple_seq seq = NULL;
3673 	tree op = NULL_TREE;
3674 	unsigned int id;
3675 	struct range_entry *r;
3676 	candidates.safe_push (&ranges[k - 1]);
3677 	FOR_EACH_VEC_ELT (candidates, id, r)
3678 	  {
3679 	    gimple *g;
3680 	    enum tree_code code;
3681 	    if (id == 0)
3682 	      {
3683 		op = r->exp;
3684 		continue;
3685 	      }
3686 	    if (id == l)
3687 	      {
3688 		code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3689 		g = gimple_build_assign (make_ssa_name (type1), code, op);
3690 		gimple_seq_add_stmt_without_update (&seq, g);
3691 		op = gimple_assign_lhs (g);
3692 	      }
3693 	    tree type = TREE_TYPE (r->exp);
3694 	    tree exp = r->exp;
3695 	    if (id >= l && !useless_type_conversion_p (type1, type))
3696 	      {
3697 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3698 		gimple_seq_add_stmt_without_update (&seq, g);
3699 		exp = gimple_assign_lhs (g);
3700 	      }
3701 	    if ((b % 4) == 3)
3702 	      code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3703 	    else
3704 	      code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3705 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3706 				     code, op, exp);
3707 	    gimple_seq_add_stmt_without_update (&seq, g);
3708 	    op = gimple_assign_lhs (g);
3709 	  }
3710 	candidates.pop ();
3711 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3712 			       candidates.length (), opcode, ops, op,
3713 			       seq, ranges[k - 1].in_p, ranges[k - 1].low,
3714 			       ranges[k - 1].high, strict_overflow_p))
3715 	  any_changes = true;
3716 	else
3717 	  gimple_seq_discard (seq);
3718 	if ((b % 4) == 2 && buckets[b] != i)
3719 	  /* There is more work to do for this bucket.  */
3720 	  b--;
3721       }
3722 
3723   return any_changes;
3724 }
3725 
3726 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3727    a >= 0 && a < b into (unsigned) a < (unsigned) b
3728    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3729 
3730 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3731 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3732 				vec<operand_entry *> *ops,
3733 				struct range_entry *ranges,
3734 				basic_block first_bb)
3735 {
3736   int i;
3737   bool any_changes = false;
3738   hash_map<tree, int> *map = NULL;
3739 
3740   for (i = first; i < length; i++)
3741     {
3742       if (ranges[i].exp == NULL_TREE
3743 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3744 	  || !ranges[i].in_p)
3745 	continue;
3746 
3747       tree type = TREE_TYPE (ranges[i].exp);
3748       if (!INTEGRAL_TYPE_P (type)
3749 	  || TYPE_UNSIGNED (type)
3750 	  || ranges[i].low == NULL_TREE
3751 	  || !integer_zerop (ranges[i].low)
3752 	  || ranges[i].high != NULL_TREE)
3753 	continue;
3754       /* EXP >= 0 here.  */
3755       if (map == NULL)
3756 	map = new hash_map <tree, int>;
3757       map->put (ranges[i].exp, i);
3758     }
3759 
3760   if (map == NULL)
3761     return false;
3762 
3763   for (i = 0; i < length; i++)
3764     {
3765       bool in_p = ranges[i].in_p;
3766       if (ranges[i].low == NULL_TREE
3767 	  || ranges[i].high == NULL_TREE)
3768 	continue;
3769       if (!integer_zerop (ranges[i].low)
3770 	  || !integer_zerop (ranges[i].high))
3771 	{
3772 	  if (ranges[i].exp
3773 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3774 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3775 	      && integer_onep (ranges[i].low)
3776 	      && integer_onep (ranges[i].high))
3777 	    in_p = !in_p;
3778 	  else
3779 	    continue;
3780 	}
3781 
3782       gimple *stmt;
3783       tree_code ccode;
3784       tree rhs1, rhs2;
3785       if (ranges[i].exp)
3786 	{
3787 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3788 	    continue;
3789 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3790 	  if (!is_gimple_assign (stmt))
3791 	    continue;
3792 	  ccode = gimple_assign_rhs_code (stmt);
3793 	  rhs1 = gimple_assign_rhs1 (stmt);
3794 	  rhs2 = gimple_assign_rhs2 (stmt);
3795 	}
3796       else
3797 	{
3798 	  operand_entry *oe = (*ops)[ranges[i].idx];
3799 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3800 	  if (gimple_code (stmt) != GIMPLE_COND)
3801 	    continue;
3802 	  ccode = gimple_cond_code (stmt);
3803 	  rhs1 = gimple_cond_lhs (stmt);
3804 	  rhs2 = gimple_cond_rhs (stmt);
3805 	}
3806 
3807       if (TREE_CODE (rhs1) != SSA_NAME
3808 	  || rhs2 == NULL_TREE
3809 	  || TREE_CODE (rhs2) != SSA_NAME)
3810 	continue;
3811 
3812       switch (ccode)
3813 	{
3814 	case GT_EXPR:
3815 	case GE_EXPR:
3816 	case LT_EXPR:
3817 	case LE_EXPR:
3818 	  break;
3819 	default:
3820 	  continue;
3821 	}
3822       if (in_p)
3823 	ccode = invert_tree_comparison (ccode, false);
3824       switch (ccode)
3825 	{
3826 	case GT_EXPR:
3827 	case GE_EXPR:
3828 	  std::swap (rhs1, rhs2);
3829 	  ccode = swap_tree_comparison (ccode);
3830 	  break;
3831 	case LT_EXPR:
3832 	case LE_EXPR:
3833 	  break;
3834 	default:
3835 	  gcc_unreachable ();
3836 	}
3837 
3838       int *idx = map->get (rhs1);
3839       if (idx == NULL)
3840 	continue;
3841 
3842       /* maybe_optimize_range_tests allows statements without side-effects
3843 	 in the basic blocks as long as they are consumed in the same bb.
3844 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3845 	 use safely get_nonzero_bits on it.  E.g. in:
3846 	  # RANGE [-83, 1] NONZERO 173
3847 	  # k_32 = PHI <k_47(13), k_12(9)>
3848 	 ...
3849 	  if (k_32 >= 0)
3850 	    goto <bb 5>; [26.46%]
3851 	  else
3852 	    goto <bb 9>; [73.54%]
3853 
3854 	  <bb 5> [local count: 140323371]:
3855 	  # RANGE [0, 1] NONZERO 1
3856 	  _5 = (int) k_32;
3857 	  # RANGE [0, 4] NONZERO 4
3858 	  _21 = _5 << 2;
3859 	  # RANGE [0, 4] NONZERO 4
3860 	  iftmp.0_44 = (char) _21;
3861 	  if (k_32 < iftmp.0_44)
3862 	    goto <bb 6>; [84.48%]
3863 	  else
3864 	    goto <bb 9>; [15.52%]
3865 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3866 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3867 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3868 	 those stmts even for negative k_32 and the value ranges would be no
3869 	 longer guaranteed and so the optimization would be invalid.  */
3870       while (opcode == ERROR_MARK)
3871 	{
3872 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3873 	  basic_block bb2 = gimple_bb (g);
3874 	  if (bb2
3875 	      && bb2 != first_bb
3876 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3877 	    {
3878 	      /* As an exception, handle a few common cases.  */
3879 	      if (gimple_assign_cast_p (g)
3880 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3881 		{
3882 		  tree op0 = gimple_assign_rhs1 (g);
3883 		  if (TYPE_UNSIGNED (TREE_TYPE (op0))
3884 		      && (TYPE_PRECISION (TREE_TYPE (rhs2))
3885 			  > TYPE_PRECISION (TREE_TYPE (op0))))
3886 		    /* Zero-extension is always ok.  */
3887 		    break;
3888 		  else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3889 			   == TYPE_PRECISION (TREE_TYPE (op0))
3890 			   && TREE_CODE (op0) == SSA_NAME)
3891 		    {
3892 		      /* Cast from signed to unsigned or vice versa.  Retry
3893 			 with the op0 as new rhs2.  */
3894 		      rhs2 = op0;
3895 		      continue;
3896 		    }
3897 		}
3898 	      else if (is_gimple_assign (g)
3899 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3900 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3901 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3902 		/* Masking with INTEGER_CST with MSB clear is always ok
3903 		   too.  */
3904 		break;
3905 	      rhs2 = NULL_TREE;
3906 	    }
3907 	  break;
3908 	}
3909       if (rhs2 == NULL_TREE)
3910 	continue;
3911 
3912       wide_int nz = get_nonzero_bits (rhs2);
3913       if (wi::neg_p (nz))
3914 	continue;
3915 
3916       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3917 	 and RHS2 is known to be RHS2 >= 0.  */
3918       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3919 
3920       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3921       if ((ranges[*idx].strict_overflow_p
3922 	   || ranges[i].strict_overflow_p)
3923 	  && issue_strict_overflow_warning (wc))
3924 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3925 		    "assuming signed overflow does not occur "
3926 		    "when simplifying range test");
3927 
3928       if (dump_file && (dump_flags & TDF_DETAILS))
3929 	{
3930 	  struct range_entry *r = &ranges[*idx];
3931 	  fprintf (dump_file, "Optimizing range test ");
3932 	  print_generic_expr (dump_file, r->exp);
3933 	  fprintf (dump_file, " +[");
3934 	  print_generic_expr (dump_file, r->low);
3935 	  fprintf (dump_file, ", ");
3936 	  print_generic_expr (dump_file, r->high);
3937 	  fprintf (dump_file, "] and comparison ");
3938 	  print_generic_expr (dump_file, rhs1);
3939 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3940 	  print_generic_expr (dump_file, rhs2);
3941 	  fprintf (dump_file, "\n into (");
3942 	  print_generic_expr (dump_file, utype);
3943 	  fprintf (dump_file, ") ");
3944 	  print_generic_expr (dump_file, rhs1);
3945 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3946 	  print_generic_expr (dump_file, utype);
3947 	  fprintf (dump_file, ") ");
3948 	  print_generic_expr (dump_file, rhs2);
3949 	  fprintf (dump_file, "\n");
3950 	}
3951 
3952       operand_entry *oe = (*ops)[ranges[i].idx];
3953       ranges[i].in_p = 0;
3954       if (opcode == BIT_IOR_EXPR
3955 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3956 	{
3957 	  ranges[i].in_p = 1;
3958 	  ccode = invert_tree_comparison (ccode, false);
3959 	}
3960 
3961       unsigned int uid = gimple_uid (stmt);
3962       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3963       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3964       gimple_set_uid (g, uid);
3965       rhs1 = gimple_assign_lhs (g);
3966       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3967       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3968 	{
3969 	  g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3970 	  gimple_set_uid (g, uid);
3971 	  rhs2 = gimple_assign_lhs (g);
3972 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3973 	}
3974       if (tree_swap_operands_p (rhs1, rhs2))
3975 	{
3976 	  std::swap (rhs1, rhs2);
3977 	  ccode = swap_tree_comparison (ccode);
3978 	}
3979       if (gimple_code (stmt) == GIMPLE_COND)
3980 	{
3981 	  gcond *c = as_a <gcond *> (stmt);
3982 	  gimple_cond_set_code (c, ccode);
3983 	  gimple_cond_set_lhs (c, rhs1);
3984 	  gimple_cond_set_rhs (c, rhs2);
3985 	  update_stmt (stmt);
3986 	}
3987       else
3988 	{
3989 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3990 	  if (!INTEGRAL_TYPE_P (ctype)
3991 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3992 		  && TYPE_PRECISION (ctype) != 1))
3993 	    ctype = boolean_type_node;
3994 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3995 	  gimple_set_uid (g, uid);
3996 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3997 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3998 	    {
3999 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4000 				       NOP_EXPR, gimple_assign_lhs (g));
4001 	      gimple_set_uid (g, uid);
4002 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4003 	    }
4004 	  ranges[i].exp = gimple_assign_lhs (g);
4005 	  oe->op = ranges[i].exp;
4006 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4007 	  ranges[i].high = ranges[i].low;
4008 	}
4009       ranges[i].strict_overflow_p = false;
4010       oe = (*ops)[ranges[*idx].idx];
4011       /* Now change all the other range test immediate uses, so that
4012 	 those tests will be optimized away.  */
4013       if (opcode == ERROR_MARK)
4014 	{
4015 	  if (oe->op)
4016 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
4017 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
4018 	  else
4019 	    oe->op = (oe->rank == BIT_IOR_EXPR
4020 		      ? boolean_false_node : boolean_true_node);
4021 	}
4022       else
4023 	oe->op = error_mark_node;
4024       ranges[*idx].exp = NULL_TREE;
4025       ranges[*idx].low = NULL_TREE;
4026       ranges[*idx].high = NULL_TREE;
4027       any_changes = true;
4028     }
4029 
4030   delete map;
4031   return any_changes;
4032 }
4033 
4034 /* Optimize range tests, similarly how fold_range_test optimizes
4035    it on trees.  The tree code for the binary
4036    operation between all the operands is OPCODE.
4037    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4038    maybe_optimize_range_tests for inter-bb range optimization.
4039    In that case if oe->op is NULL, oe->id is bb->index whose
4040    GIMPLE_COND is && or ||ed into the test, and oe->rank says
4041    the actual opcode.
4042    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
4043 
4044 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)4045 optimize_range_tests (enum tree_code opcode,
4046 		      vec<operand_entry *> *ops, basic_block first_bb)
4047 {
4048   unsigned int length = ops->length (), i, j, first;
4049   operand_entry *oe;
4050   struct range_entry *ranges;
4051   bool any_changes = false;
4052 
4053   if (length == 1)
4054     return false;
4055 
4056   ranges = XNEWVEC (struct range_entry, length);
4057   for (i = 0; i < length; i++)
4058     {
4059       oe = (*ops)[i];
4060       ranges[i].idx = i;
4061       init_range_entry (ranges + i, oe->op,
4062 			oe->op
4063 			? NULL
4064 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4065       /* For | invert it now, we will invert it again before emitting
4066 	 the optimized expression.  */
4067       if (opcode == BIT_IOR_EXPR
4068 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4069 	ranges[i].in_p = !ranges[i].in_p;
4070     }
4071 
4072   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4073   for (i = 0; i < length; i++)
4074     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4075       break;
4076 
4077   /* Try to merge ranges.  */
4078   for (first = i; i < length; i++)
4079     {
4080       tree low = ranges[i].low;
4081       tree high = ranges[i].high;
4082       int in_p = ranges[i].in_p;
4083       bool strict_overflow_p = ranges[i].strict_overflow_p;
4084       int update_fail_count = 0;
4085 
4086       for (j = i + 1; j < length; j++)
4087 	{
4088 	  if (ranges[i].exp != ranges[j].exp)
4089 	    break;
4090 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4091 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
4092 	    break;
4093 	  strict_overflow_p |= ranges[j].strict_overflow_p;
4094 	}
4095 
4096       if (j == i + 1)
4097 	continue;
4098 
4099       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4100 			     opcode, ops, ranges[i].exp, NULL, in_p,
4101 			     low, high, strict_overflow_p))
4102 	{
4103 	  i = j - 1;
4104 	  any_changes = true;
4105 	}
4106       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4107 	 while update_range_test would fail.  */
4108       else if (update_fail_count == 64)
4109 	i = j - 1;
4110       else
4111 	++update_fail_count;
4112     }
4113 
4114   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4115 					 ops, ranges);
4116 
4117   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4118     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4119 					   ops, ranges);
4120   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4121     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4122 						     ops, ranges);
4123   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4124 						 ranges, first_bb);
4125   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4126 						   ops, ranges);
4127 
4128   if (any_changes && opcode != ERROR_MARK)
4129     {
4130       j = 0;
4131       FOR_EACH_VEC_ELT (*ops, i, oe)
4132 	{
4133 	  if (oe->op == error_mark_node)
4134 	    continue;
4135 	  else if (i != j)
4136 	    (*ops)[j] = oe;
4137 	  j++;
4138 	}
4139       ops->truncate (j);
4140     }
4141 
4142   XDELETEVEC (ranges);
4143   return any_changes;
4144 }
4145 
4146 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4147    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
4148    otherwise the comparison code.  TYPE is a return value that is set
4149    to type of comparison.  */
4150 
4151 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type,tree * lhs,tree * rhs,gassign ** vcond)4152 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4153 		  tree *lhs, tree *rhs, gassign **vcond)
4154 {
4155   if (TREE_CODE (var) != SSA_NAME)
4156     return ERROR_MARK;
4157 
4158   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4159   if (stmt == NULL)
4160     return ERROR_MARK;
4161   if (vcond)
4162     *vcond = stmt;
4163 
4164   /* ??? If we start creating more COND_EXPR, we could perform
4165      this same optimization with them.	For now, simplify.  */
4166   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4167     return ERROR_MARK;
4168 
4169   tree cond = gimple_assign_rhs1 (stmt);
4170   tree_code cmp = TREE_CODE (cond);
4171   if (cmp != SSA_NAME)
4172     return ERROR_MARK;
4173 
4174   gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4175   if (assign == NULL
4176       || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4177     return ERROR_MARK;
4178 
4179   cmp = gimple_assign_rhs_code (assign);
4180   if (lhs)
4181     *lhs = gimple_assign_rhs1 (assign);
4182   if (rhs)
4183     *rhs = gimple_assign_rhs2 (assign);
4184 
4185   /* ??? For now, allow only canonical true and false result vectors.
4186      We could expand this to other constants should the need arise,
4187      but at the moment we don't create them.  */
4188   tree t = gimple_assign_rhs2 (stmt);
4189   tree f = gimple_assign_rhs3 (stmt);
4190   bool inv;
4191   if (integer_all_onesp (t))
4192     inv = false;
4193   else if (integer_all_onesp (f))
4194     {
4195       cmp = invert_tree_comparison (cmp, false);
4196       inv = true;
4197     }
4198   else
4199     return ERROR_MARK;
4200   if (!integer_zerop (f))
4201     return ERROR_MARK;
4202 
4203   /* Success!  */
4204   if (rets)
4205     *rets = assign;
4206   if (reti)
4207     *reti = inv;
4208   if (type)
4209     *type = TREE_TYPE (cond);
4210   return cmp;
4211 }
4212 
4213 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4214    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
4215 
4216 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)4217 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4218 {
4219   unsigned int length = ops->length (), i, j;
4220   bool any_changes = false;
4221 
4222   if (length == 1)
4223     return false;
4224 
4225   for (i = 0; i < length; ++i)
4226     {
4227       tree elt0 = (*ops)[i]->op;
4228 
4229       gassign *stmt0, *vcond0;
4230       bool invert;
4231       tree type, lhs0, rhs0;
4232       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4233 					 &rhs0, &vcond0);
4234       if (cmp0 == ERROR_MARK)
4235 	continue;
4236 
4237       for (j = i + 1; j < length; ++j)
4238 	{
4239 	  tree &elt1 = (*ops)[j]->op;
4240 
4241 	  gassign *stmt1, *vcond1;
4242 	  tree lhs1, rhs1;
4243 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4244 					     &rhs1, &vcond1);
4245 	  if (cmp1 == ERROR_MARK)
4246 	    continue;
4247 
4248 	  tree comb;
4249 	  if (opcode == BIT_AND_EXPR)
4250 	    comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4251 					       cmp1, lhs1, rhs1);
4252 	  else if (opcode == BIT_IOR_EXPR)
4253 	    comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4254 					      cmp1, lhs1, rhs1);
4255 	  else
4256 	    gcc_unreachable ();
4257 	  if (comb == NULL)
4258 	    continue;
4259 
4260 	  /* Success! */
4261 	  if (dump_file && (dump_flags & TDF_DETAILS))
4262 	    {
4263 	      fprintf (dump_file, "Transforming ");
4264 	      print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4265 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4266 	      print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4267 	      fprintf (dump_file, " into ");
4268 	      print_generic_expr (dump_file, comb);
4269 	      fputc ('\n', dump_file);
4270 	    }
4271 
4272 	  gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4273 	  tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4274 					       true, GSI_SAME_STMT);
4275 	  if (invert)
4276 	    swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4277 			       gimple_assign_rhs3_ptr (vcond0));
4278 	  gimple_assign_set_rhs1 (vcond0, exp);
4279 	  update_stmt (vcond0);
4280 
4281 	  elt1 = error_mark_node;
4282 	  any_changes = true;
4283 	}
4284     }
4285 
4286   if (any_changes)
4287     {
4288       operand_entry *oe;
4289       j = 0;
4290       FOR_EACH_VEC_ELT (*ops, i, oe)
4291 	{
4292 	  if (oe->op == error_mark_node)
4293 	    continue;
4294 	  else if (i != j)
4295 	    (*ops)[j] = oe;
4296 	  j++;
4297 	}
4298       ops->truncate (j);
4299     }
4300 
4301   return any_changes;
4302 }
4303 
4304 /* Return true if STMT is a cast like:
4305    <bb N>:
4306    ...
4307    _123 = (int) _234;
4308 
4309    <bb M>:
4310    # _345 = PHI <_123(N), 1(...), 1(...)>
4311    where _234 has bool type, _123 has single use and
4312    bb N has a single successor M.  This is commonly used in
4313    the last block of a range test.
4314 
4315    Also Return true if STMT is tcc_compare like:
4316    <bb N>:
4317    ...
4318    _234 = a_2(D) == 2;
4319 
4320    <bb M>:
4321    # _345 = PHI <_234(N), 1(...), 1(...)>
4322    _346 = (int) _345;
4323    where _234 has booltype, single use and
4324    bb N has a single successor M.  This is commonly used in
4325    the last block of a range test.  */
4326 
4327 static bool
final_range_test_p(gimple * stmt)4328 final_range_test_p (gimple *stmt)
4329 {
4330   basic_block bb, rhs_bb, lhs_bb;
4331   edge e;
4332   tree lhs, rhs;
4333   use_operand_p use_p;
4334   gimple *use_stmt;
4335 
4336   if (!gimple_assign_cast_p (stmt)
4337       && (!is_gimple_assign (stmt)
4338 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4339 	      != tcc_comparison)))
4340     return false;
4341   bb = gimple_bb (stmt);
4342   if (!single_succ_p (bb))
4343     return false;
4344   e = single_succ_edge (bb);
4345   if (e->flags & EDGE_COMPLEX)
4346     return false;
4347 
4348   lhs = gimple_assign_lhs (stmt);
4349   rhs = gimple_assign_rhs1 (stmt);
4350   if (gimple_assign_cast_p (stmt)
4351       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4352 	  || TREE_CODE (rhs) != SSA_NAME
4353 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4354     return false;
4355 
4356   if (!gimple_assign_cast_p (stmt)
4357       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4358       return false;
4359 
4360   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4361   if (!single_imm_use (lhs, &use_p, &use_stmt))
4362     return false;
4363 
4364   if (gimple_code (use_stmt) != GIMPLE_PHI
4365       || gimple_bb (use_stmt) != e->dest)
4366     return false;
4367 
4368   /* And that the rhs is defined in the same loop.  */
4369   if (gimple_assign_cast_p (stmt))
4370     {
4371       if (TREE_CODE (rhs) != SSA_NAME
4372 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4373 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4374 	return false;
4375     }
4376   else
4377     {
4378       if (TREE_CODE (lhs) != SSA_NAME
4379 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4380 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4381 	return false;
4382     }
4383 
4384   return true;
4385 }
4386 
4387 /* Return true if BB is suitable basic block for inter-bb range test
4388    optimization.  If BACKWARD is true, BB should be the only predecessor
4389    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4390    or compared with to find a common basic block to which all conditions
4391    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4392    be the only predecessor of BB.  *TEST_SWAPPED_P is set to true if
4393    TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4394    block points to an empty block that falls through into *OTHER_BB and
4395    the phi args match that path.  */
4396 
4397 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool * test_swapped_p,bool backward)4398 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4399 		  bool *test_swapped_p, bool backward)
4400 {
4401   edge_iterator ei, ei2;
4402   edge e, e2;
4403   gimple *stmt;
4404   gphi_iterator gsi;
4405   bool other_edge_seen = false;
4406   bool is_cond;
4407 
4408   if (test_bb == bb)
4409     return false;
4410   /* Check last stmt first.  */
4411   stmt = last_stmt (bb);
4412   if (stmt == NULL
4413       || (gimple_code (stmt) != GIMPLE_COND
4414 	  && (backward || !final_range_test_p (stmt)))
4415       || gimple_visited_p (stmt)
4416       || stmt_could_throw_p (cfun, stmt)
4417       || *other_bb == bb)
4418     return false;
4419   is_cond = gimple_code (stmt) == GIMPLE_COND;
4420   if (is_cond)
4421     {
4422       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4423 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4424 	 to *OTHER_BB (if not set yet, try to find it out).  */
4425       if (EDGE_COUNT (bb->succs) != 2)
4426 	return false;
4427       FOR_EACH_EDGE (e, ei, bb->succs)
4428 	{
4429 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4430 	    return false;
4431 	  if (e->dest == test_bb)
4432 	    {
4433 	      if (backward)
4434 		continue;
4435 	      else
4436 		return false;
4437 	    }
4438 	  if (e->dest == bb)
4439 	    return false;
4440 	  if (*other_bb == NULL)
4441 	    {
4442 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4443 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4444 		  return false;
4445 		else if (e->dest == e2->dest)
4446 		  *other_bb = e->dest;
4447 	      if (*other_bb == NULL)
4448 		return false;
4449 	    }
4450 	  if (e->dest == *other_bb)
4451 	    other_edge_seen = true;
4452 	  else if (backward)
4453 	    return false;
4454 	}
4455       if (*other_bb == NULL || !other_edge_seen)
4456 	return false;
4457     }
4458   else if (single_succ (bb) != *other_bb)
4459     return false;
4460 
4461   /* Now check all PHIs of *OTHER_BB.  */
4462   e = find_edge (bb, *other_bb);
4463   e2 = find_edge (test_bb, *other_bb);
4464  retry:;
4465   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4466     {
4467       gphi *phi = gsi.phi ();
4468       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4469 	 corresponding to BB and TEST_BB predecessor must be the same.  */
4470       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4471 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
4472 	{
4473 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4474 	     one of the PHIs should have the lhs of the last stmt in
4475 	     that block as PHI arg and that PHI should have 0 or 1
4476 	     corresponding to it in all other range test basic blocks
4477 	     considered.  */
4478 	  if (!is_cond)
4479 	    {
4480 	      if (gimple_phi_arg_def (phi, e->dest_idx)
4481 		  == gimple_assign_lhs (stmt)
4482 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4483 		      || integer_onep (gimple_phi_arg_def (phi,
4484 							   e2->dest_idx))))
4485 		continue;
4486 	    }
4487 	  else
4488 	    {
4489 	      gimple *test_last = last_stmt (test_bb);
4490 	      if (gimple_code (test_last) == GIMPLE_COND)
4491 		{
4492 		  if (backward ? e2->src != test_bb : e->src != bb)
4493 		    return false;
4494 
4495 		  /* For last_bb, handle also:
4496 		     if (x_3(D) == 3)
4497 		       goto <bb 6>; [34.00%]
4498 		     else
4499 		       goto <bb 7>; [66.00%]
4500 
4501 		     <bb 6> [local count: 79512730]:
4502 
4503 		     <bb 7> [local count: 1073741824]:
4504 		     # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4505 		     where bb 7 is *OTHER_BB, but the PHI values from the
4506 		     earlier bbs match the path through the empty bb
4507 		     in between.  */
4508 		  edge e3;
4509 		  if (backward)
4510 		    e3 = EDGE_SUCC (test_bb,
4511 				    e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4512 		  else
4513 		    e3 = EDGE_SUCC (bb,
4514 				    e == EDGE_SUCC (bb, 0) ? 1 : 0);
4515 		  if (empty_block_p (e3->dest)
4516 		      && single_succ_p (e3->dest)
4517 		      && single_succ (e3->dest) == *other_bb
4518 		      && single_pred_p (e3->dest)
4519 		      && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4520 		    {
4521 		      if (backward)
4522 			e2 = single_succ_edge (e3->dest);
4523 		      else
4524 			e = single_succ_edge (e3->dest);
4525 		      if (test_swapped_p)
4526 			*test_swapped_p = true;
4527 		      goto retry;
4528 		    }
4529 		}
4530 	      else if (gimple_phi_arg_def (phi, e2->dest_idx)
4531 		       == gimple_assign_lhs (test_last)
4532 		       && (integer_zerop (gimple_phi_arg_def (phi,
4533 							      e->dest_idx))
4534 			   || integer_onep (gimple_phi_arg_def (phi,
4535 								e->dest_idx))))
4536 		continue;
4537 	    }
4538 
4539 	  return false;
4540 	}
4541     }
4542   return true;
4543 }
4544 
4545 /* Return true if BB doesn't have side-effects that would disallow
4546    range test optimization, all SSA_NAMEs set in the bb are consumed
4547    in the bb and there are no PHIs.  */
4548 
4549 bool
no_side_effect_bb(basic_block bb)4550 no_side_effect_bb (basic_block bb)
4551 {
4552   gimple_stmt_iterator gsi;
4553   gimple *last;
4554 
4555   if (!gimple_seq_empty_p (phi_nodes (bb)))
4556     return false;
4557   last = last_stmt (bb);
4558   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4559     {
4560       gimple *stmt = gsi_stmt (gsi);
4561       tree lhs;
4562       imm_use_iterator imm_iter;
4563       use_operand_p use_p;
4564 
4565       if (is_gimple_debug (stmt))
4566 	continue;
4567       if (gimple_has_side_effects (stmt))
4568 	return false;
4569       if (stmt == last)
4570 	return true;
4571       if (!is_gimple_assign (stmt))
4572 	return false;
4573       lhs = gimple_assign_lhs (stmt);
4574       if (TREE_CODE (lhs) != SSA_NAME)
4575 	return false;
4576       if (gimple_assign_rhs_could_trap_p (stmt))
4577 	return false;
4578       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4579 	{
4580 	  gimple *use_stmt = USE_STMT (use_p);
4581 	  if (is_gimple_debug (use_stmt))
4582 	    continue;
4583 	  if (gimple_bb (use_stmt) != bb)
4584 	    return false;
4585 	}
4586     }
4587   return false;
4588 }
4589 
4590 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4591    return true and fill in *OPS recursively.  */
4592 
4593 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4594 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4595 	 class loop *loop)
4596 {
4597   gimple *stmt = SSA_NAME_DEF_STMT (var);
4598   tree rhs[2];
4599   int i;
4600 
4601   if (!is_reassociable_op (stmt, code, loop))
4602     return false;
4603 
4604   rhs[0] = gimple_assign_rhs1 (stmt);
4605   rhs[1] = gimple_assign_rhs2 (stmt);
4606   gimple_set_visited (stmt, true);
4607   for (i = 0; i < 2; i++)
4608     if (TREE_CODE (rhs[i]) == SSA_NAME
4609 	&& !get_ops (rhs[i], code, ops, loop)
4610 	&& has_single_use (rhs[i]))
4611       {
4612 	operand_entry *oe = operand_entry_pool.allocate ();
4613 
4614 	oe->op = rhs[i];
4615 	oe->rank = code;
4616 	oe->id = 0;
4617 	oe->count = 1;
4618 	oe->stmt_to_insert = NULL;
4619 	ops->safe_push (oe);
4620       }
4621   return true;
4622 }
4623 
4624 /* Find the ops that were added by get_ops starting from VAR, see if
4625    they were changed during update_range_test and if yes, create new
4626    stmts.  */
4627 
4628 static tree
update_ops(tree var,enum tree_code code,const vec<operand_entry * > & ops,unsigned int * pidx,class loop * loop)4629 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4630 	    unsigned int *pidx, class loop *loop)
4631 {
4632   gimple *stmt = SSA_NAME_DEF_STMT (var);
4633   tree rhs[4];
4634   int i;
4635 
4636   if (!is_reassociable_op (stmt, code, loop))
4637     return NULL;
4638 
4639   rhs[0] = gimple_assign_rhs1 (stmt);
4640   rhs[1] = gimple_assign_rhs2 (stmt);
4641   rhs[2] = rhs[0];
4642   rhs[3] = rhs[1];
4643   for (i = 0; i < 2; i++)
4644     if (TREE_CODE (rhs[i]) == SSA_NAME)
4645       {
4646 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4647 	if (rhs[2 + i] == NULL_TREE)
4648 	  {
4649 	    if (has_single_use (rhs[i]))
4650 	      rhs[2 + i] = ops[(*pidx)++]->op;
4651 	    else
4652 	      rhs[2 + i] = rhs[i];
4653 	  }
4654       }
4655   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4656       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4657     {
4658       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4659       var = make_ssa_name (TREE_TYPE (var));
4660       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4661 					rhs[2], rhs[3]);
4662       gimple_set_uid (g, gimple_uid (stmt));
4663       gimple_set_visited (g, true);
4664       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4665       gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4666       if (fold_stmt_inplace (&gsi2))
4667 	update_stmt (g);
4668     }
4669   return var;
4670 }
4671 
4672 /* Structure to track the initial value passed to get_ops and
4673    the range in the ops vector for each basic block.  */
4674 
4675 struct inter_bb_range_test_entry
4676 {
4677   tree op;
4678   unsigned int first_idx, last_idx;
4679 };
4680 
4681 /* Inter-bb range test optimization.
4682 
4683    Returns TRUE if a gimple conditional is optimized to a true/false,
4684    otherwise return FALSE.
4685 
4686    This indicates to the caller that it should run a CFG cleanup pass
4687    once reassociation is completed.  */
4688 
4689 static bool
maybe_optimize_range_tests(gimple * stmt)4690 maybe_optimize_range_tests (gimple *stmt)
4691 {
4692   basic_block first_bb = gimple_bb (stmt);
4693   basic_block last_bb = first_bb;
4694   basic_block other_bb = NULL;
4695   basic_block bb;
4696   edge_iterator ei;
4697   edge e;
4698   auto_vec<operand_entry *> ops;
4699   auto_vec<inter_bb_range_test_entry> bbinfo;
4700   bool any_changes = false;
4701   bool cfg_cleanup_needed = false;
4702 
4703   /* Consider only basic blocks that end with GIMPLE_COND or
4704      a cast statement satisfying final_range_test_p.  All
4705      but the last bb in the first_bb .. last_bb range
4706      should end with GIMPLE_COND.  */
4707   if (gimple_code (stmt) == GIMPLE_COND)
4708     {
4709       if (EDGE_COUNT (first_bb->succs) != 2)
4710 	return cfg_cleanup_needed;
4711     }
4712   else if (final_range_test_p (stmt))
4713     other_bb = single_succ (first_bb);
4714   else
4715     return cfg_cleanup_needed;
4716 
4717   if (stmt_could_throw_p (cfun, stmt))
4718     return cfg_cleanup_needed;
4719 
4720   /* As relative ordering of post-dominator sons isn't fixed,
4721      maybe_optimize_range_tests can be called first on any
4722      bb in the range we want to optimize.  So, start searching
4723      backwards, if first_bb can be set to a predecessor.  */
4724   while (single_pred_p (first_bb))
4725     {
4726       basic_block pred_bb = single_pred (first_bb);
4727       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4728 	break;
4729       if (!no_side_effect_bb (first_bb))
4730 	break;
4731       first_bb = pred_bb;
4732     }
4733   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4734      Before starting forward search in last_bb successors, find
4735      out the other_bb.  */
4736   if (first_bb == last_bb)
4737     {
4738       other_bb = NULL;
4739       /* As non-GIMPLE_COND last stmt always terminates the range,
4740 	 if forward search didn't discover anything, just give up.  */
4741       if (gimple_code (stmt) != GIMPLE_COND)
4742 	return cfg_cleanup_needed;
4743       /* Look at both successors.  Either it ends with a GIMPLE_COND
4744 	 and satisfies suitable_cond_bb, or ends with a cast and
4745 	 other_bb is that cast's successor.  */
4746       FOR_EACH_EDGE (e, ei, first_bb->succs)
4747 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4748 	    || e->dest == first_bb)
4749 	  return cfg_cleanup_needed;
4750 	else if (single_pred_p (e->dest))
4751 	  {
4752 	    stmt = last_stmt (e->dest);
4753 	    if (stmt
4754 		&& gimple_code (stmt) == GIMPLE_COND
4755 		&& EDGE_COUNT (e->dest->succs) == 2)
4756 	      {
4757 		if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4758 				      NULL, true))
4759 		  break;
4760 		else
4761 		  other_bb = NULL;
4762 	      }
4763 	    else if (stmt
4764 		     && final_range_test_p (stmt)
4765 		     && find_edge (first_bb, single_succ (e->dest)))
4766 	      {
4767 		other_bb = single_succ (e->dest);
4768 		if (other_bb == first_bb)
4769 		  other_bb = NULL;
4770 	      }
4771 	  }
4772       if (other_bb == NULL)
4773 	return cfg_cleanup_needed;
4774     }
4775   /* Now do the forward search, moving last_bb to successor bbs
4776      that aren't other_bb.  */
4777   while (EDGE_COUNT (last_bb->succs) == 2)
4778     {
4779       FOR_EACH_EDGE (e, ei, last_bb->succs)
4780 	if (e->dest != other_bb)
4781 	  break;
4782       if (e == NULL)
4783 	break;
4784       if (!single_pred_p (e->dest))
4785 	break;
4786       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4787 	break;
4788       if (!no_side_effect_bb (e->dest))
4789 	break;
4790       last_bb = e->dest;
4791     }
4792   if (first_bb == last_bb)
4793     return cfg_cleanup_needed;
4794   /* Here basic blocks first_bb through last_bb's predecessor
4795      end with GIMPLE_COND, all of them have one of the edges to
4796      other_bb and another to another block in the range,
4797      all blocks except first_bb don't have side-effects and
4798      last_bb ends with either GIMPLE_COND, or cast satisfying
4799      final_range_test_p.  */
4800   for (bb = last_bb; ; bb = single_pred (bb))
4801     {
4802       enum tree_code code;
4803       tree lhs, rhs;
4804       inter_bb_range_test_entry bb_ent;
4805 
4806       bb_ent.op = NULL_TREE;
4807       bb_ent.first_idx = ops.length ();
4808       bb_ent.last_idx = bb_ent.first_idx;
4809       e = find_edge (bb, other_bb);
4810       stmt = last_stmt (bb);
4811       gimple_set_visited (stmt, true);
4812       if (gimple_code (stmt) != GIMPLE_COND)
4813 	{
4814 	  use_operand_p use_p;
4815 	  gimple *phi;
4816 	  edge e2;
4817 	  unsigned int d;
4818 
4819 	  lhs = gimple_assign_lhs (stmt);
4820 	  rhs = gimple_assign_rhs1 (stmt);
4821 	  gcc_assert (bb == last_bb);
4822 
4823 	  /* stmt is
4824 	     _123 = (int) _234;
4825 	     OR
4826 	     _234 = a_2(D) == 2;
4827 
4828 	     followed by:
4829 	     <bb M>:
4830 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4831 
4832 	     or 0 instead of 1.  If it is 0, the _234
4833 	     range test is anded together with all the
4834 	     other range tests, if it is 1, it is ored with
4835 	     them.  */
4836 	  single_imm_use (lhs, &use_p, &phi);
4837 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4838 	  e2 = find_edge (first_bb, other_bb);
4839 	  d = e2->dest_idx;
4840 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4841 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4842 	    code = BIT_AND_EXPR;
4843 	  else
4844 	    {
4845 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4846 	      code = BIT_IOR_EXPR;
4847 	    }
4848 
4849 	  /* If _234 SSA_NAME_DEF_STMT is
4850 	     _234 = _567 | _789;
4851 	     (or &, corresponding to 1/0 in the phi arguments,
4852 	     push into ops the individual range test arguments
4853 	     of the bitwise or resp. and, recursively.  */
4854 	  if (TREE_CODE (rhs) == SSA_NAME
4855 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4856 		  != tcc_comparison)
4857 	      && !get_ops (rhs, code, &ops,
4858 			   loop_containing_stmt (stmt))
4859 	      && has_single_use (rhs))
4860 	    {
4861 	      /* Otherwise, push the _234 range test itself.  */
4862 	      operand_entry *oe = operand_entry_pool.allocate ();
4863 
4864 	      oe->op = rhs;
4865 	      oe->rank = code;
4866 	      oe->id = 0;
4867 	      oe->count = 1;
4868 	      oe->stmt_to_insert = NULL;
4869 	      ops.safe_push (oe);
4870 	      bb_ent.last_idx++;
4871 	      bb_ent.op = rhs;
4872 	    }
4873 	  else if (is_gimple_assign (stmt)
4874 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4875 		       == tcc_comparison)
4876 		   && !get_ops (lhs, code, &ops,
4877 				loop_containing_stmt (stmt))
4878 		   && has_single_use (lhs))
4879 	    {
4880 	      operand_entry *oe = operand_entry_pool.allocate ();
4881 	      oe->op = lhs;
4882 	      oe->rank = code;
4883 	      oe->id = 0;
4884 	      oe->count = 1;
4885 	      ops.safe_push (oe);
4886 	      bb_ent.last_idx++;
4887 	      bb_ent.op = lhs;
4888 	    }
4889 	  else
4890 	    {
4891 	      bb_ent.last_idx = ops.length ();
4892 	      bb_ent.op = rhs;
4893 	    }
4894 	  bbinfo.safe_push (bb_ent);
4895 	  for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4896 	    ops[i]->id = bb->index;
4897 	  continue;
4898 	}
4899       else if (bb == last_bb)
4900 	{
4901 	  /* For last_bb, handle also:
4902 	     if (x_3(D) == 3)
4903 	       goto <bb 6>; [34.00%]
4904 	     else
4905 	       goto <bb 7>; [66.00%]
4906 
4907 	     <bb 6> [local count: 79512730]:
4908 
4909 	     <bb 7> [local count: 1073741824]:
4910 	     # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4911 	     where bb 7 is OTHER_BB, but the PHI values from the
4912 	     earlier bbs match the path through the empty bb
4913 	     in between.  */
4914 	  bool test_swapped_p = false;
4915 	  bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4916 				      &other_bb, &test_swapped_p, true);
4917 	  gcc_assert (ok);
4918 	  if (test_swapped_p)
4919 	    e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4920 	}
4921       /* Otherwise stmt is GIMPLE_COND.  */
4922       code = gimple_cond_code (stmt);
4923       lhs = gimple_cond_lhs (stmt);
4924       rhs = gimple_cond_rhs (stmt);
4925       if (TREE_CODE (lhs) == SSA_NAME
4926 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4927 	  && ((code != EQ_EXPR && code != NE_EXPR)
4928 	      || rhs != boolean_false_node
4929 		 /* Either push into ops the individual bitwise
4930 		    or resp. and operands, depending on which
4931 		    edge is other_bb.  */
4932 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4933 				 ^ (code == EQ_EXPR))
4934 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4935 			   loop_containing_stmt (stmt))))
4936 	{
4937 	  /* Or push the GIMPLE_COND stmt itself.  */
4938 	  operand_entry *oe = operand_entry_pool.allocate ();
4939 
4940 	  oe->op = NULL;
4941 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4942 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4943 	  /* oe->op = NULL signs that there is no SSA_NAME
4944 	     for the range test, and oe->id instead is the
4945 	     basic block number, at which's end the GIMPLE_COND
4946 	     is.  */
4947 	  oe->id = bb->index;
4948 	  oe->count = 1;
4949 	  oe->stmt_to_insert = NULL;
4950 	  ops.safe_push (oe);
4951 	  bb_ent.op = NULL;
4952 	  bb_ent.last_idx++;
4953 	}
4954       else if (ops.length () > bb_ent.first_idx)
4955 	{
4956 	  bb_ent.op = lhs;
4957 	  bb_ent.last_idx = ops.length ();
4958 	}
4959       bbinfo.safe_push (bb_ent);
4960       for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4961 	ops[i]->id = bb->index;
4962       if (bb == first_bb)
4963 	break;
4964     }
4965   if (ops.length () > 1)
4966     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4967   if (any_changes)
4968     {
4969       unsigned int idx, max_idx = 0;
4970       /* update_ops relies on has_single_use predicates returning the
4971 	 same values as it did during get_ops earlier.  Additionally it
4972 	 never removes statements, only adds new ones and it should walk
4973 	 from the single imm use and check the predicate already before
4974 	 making those changes.
4975 	 On the other side, the handling of GIMPLE_COND directly can turn
4976 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4977 	 it needs to be done in a separate loop afterwards.  */
4978       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4979 	{
4980 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4981 	      && bbinfo[idx].op != NULL_TREE)
4982 	    {
4983 	      tree new_op;
4984 
4985 	      max_idx = idx;
4986 	      stmt = last_stmt (bb);
4987 	      new_op = update_ops (bbinfo[idx].op,
4988 				   (enum tree_code)
4989 				   ops[bbinfo[idx].first_idx]->rank,
4990 				   ops, &bbinfo[idx].first_idx,
4991 				   loop_containing_stmt (stmt));
4992 	      if (new_op == NULL_TREE)
4993 		{
4994 		  gcc_assert (bb == last_bb);
4995 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4996 		}
4997 	      if (bbinfo[idx].op != new_op)
4998 		{
4999 		  imm_use_iterator iter;
5000 		  use_operand_p use_p;
5001 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5002 
5003 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5004 		    if (is_gimple_debug (use_stmt))
5005 		      continue;
5006 		    else if (gimple_code (use_stmt) == GIMPLE_COND
5007 			     || gimple_code (use_stmt) == GIMPLE_PHI)
5008 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5009 			SET_USE (use_p, new_op);
5010 		    else if ((is_gimple_assign (use_stmt)
5011 			      && (TREE_CODE_CLASS
5012 				  (gimple_assign_rhs_code (use_stmt))
5013 				  == tcc_comparison)))
5014 		      cast_or_tcc_cmp_stmt = use_stmt;
5015 		    else if (gimple_assign_cast_p (use_stmt))
5016 		      cast_or_tcc_cmp_stmt = use_stmt;
5017 		    else
5018 		      gcc_unreachable ();
5019 
5020 		  if (cast_or_tcc_cmp_stmt)
5021 		    {
5022 		      gcc_assert (bb == last_bb);
5023 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5024 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5025 		      enum tree_code rhs_code
5026 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5027 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5028 			: CONVERT_EXPR;
5029 		      gassign *g;
5030 		      if (is_gimple_min_invariant (new_op))
5031 			{
5032 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
5033 			  g = gimple_build_assign (new_lhs, new_op);
5034 			}
5035 		      else
5036 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
5037 		      gimple_stmt_iterator gsi
5038 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
5039 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5040 		      gimple_set_visited (g, true);
5041 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5042 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5043 			if (is_gimple_debug (use_stmt))
5044 			  continue;
5045 			else if (gimple_code (use_stmt) == GIMPLE_COND
5046 				 || gimple_code (use_stmt) == GIMPLE_PHI)
5047 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5048 			    SET_USE (use_p, new_lhs);
5049 			else
5050 			  gcc_unreachable ();
5051 		    }
5052 		}
5053 	    }
5054 	  if (bb == first_bb)
5055 	    break;
5056 	}
5057       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5058 	{
5059 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5060 	      && bbinfo[idx].op == NULL_TREE
5061 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5062 	    {
5063 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
5064 
5065 	      if (idx > max_idx)
5066 		max_idx = idx;
5067 
5068 	      /* If we collapse the conditional to a true/false
5069 		 condition, then bubble that knowledge up to our caller.  */
5070 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5071 		{
5072 		  gimple_cond_make_false (cond_stmt);
5073 		  cfg_cleanup_needed = true;
5074 		}
5075 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5076 		{
5077 		  gimple_cond_make_true (cond_stmt);
5078 		  cfg_cleanup_needed = true;
5079 		}
5080 	      else
5081 		{
5082 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
5083 		  gimple_cond_set_lhs (cond_stmt,
5084 				       ops[bbinfo[idx].first_idx]->op);
5085 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5086 		}
5087 	      update_stmt (cond_stmt);
5088 	    }
5089 	  if (bb == first_bb)
5090 	    break;
5091 	}
5092 
5093       /* The above changes could result in basic blocks after the first
5094 	 modified one, up to and including last_bb, to be executed even if
5095 	 they would not be in the original program.  If the value ranges of
5096 	 assignment lhs' in those bbs were dependent on the conditions
5097 	 guarding those basic blocks which now can change, the VRs might
5098 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
5099 	 are only used within the same bb, it should be not a big deal if
5100 	 we just reset all the VRs in those bbs.  See PR68671.  */
5101       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5102 	reset_flow_sensitive_info_in_bb (bb);
5103     }
5104   return cfg_cleanup_needed;
5105 }
5106 
5107 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5108    of STMT in it's operands.  This is also known as a "destructive
5109    update" operation.  */
5110 
5111 static bool
is_phi_for_stmt(gimple * stmt,tree operand)5112 is_phi_for_stmt (gimple *stmt, tree operand)
5113 {
5114   gimple *def_stmt;
5115   gphi *def_phi;
5116   tree lhs;
5117   use_operand_p arg_p;
5118   ssa_op_iter i;
5119 
5120   if (TREE_CODE (operand) != SSA_NAME)
5121     return false;
5122 
5123   lhs = gimple_assign_lhs (stmt);
5124 
5125   def_stmt = SSA_NAME_DEF_STMT (operand);
5126   def_phi = dyn_cast <gphi *> (def_stmt);
5127   if (!def_phi)
5128     return false;
5129 
5130   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5131     if (lhs == USE_FROM_PTR (arg_p))
5132       return true;
5133   return false;
5134 }
5135 
5136 /* Remove def stmt of VAR if VAR has zero uses and recurse
5137    on rhs1 operand if so.  */
5138 
5139 static void
remove_visited_stmt_chain(tree var)5140 remove_visited_stmt_chain (tree var)
5141 {
5142   gimple *stmt;
5143   gimple_stmt_iterator gsi;
5144 
5145   while (1)
5146     {
5147       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5148 	return;
5149       stmt = SSA_NAME_DEF_STMT (var);
5150       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5151 	{
5152 	  var = gimple_assign_rhs1 (stmt);
5153 	  gsi = gsi_for_stmt (stmt);
5154 	  reassoc_remove_stmt (&gsi);
5155 	  release_defs (stmt);
5156 	}
5157       else
5158 	return;
5159     }
5160 }
5161 
5162 /* This function checks three consequtive operands in
5163    passed operands vector OPS starting from OPINDEX and
5164    swaps two operands if it is profitable for binary operation
5165    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5166 
5167    We pair ops with the same rank if possible.
5168 
5169    The alternative we try is to see if STMT is a destructive
5170    update style statement, which is like:
5171    b = phi (a, ...)
5172    a = c + b;
5173    In that case, we want to use the destructive update form to
5174    expose the possible vectorizer sum reduction opportunity.
5175    In that case, the third operand will be the phi node. This
5176    check is not performed if STMT is null.
5177 
5178    We could, of course, try to be better as noted above, and do a
5179    lot of work to try to find these opportunities in >3 operand
5180    cases, but it is unlikely to be worth it.  */
5181 
5182 static void
swap_ops_for_binary_stmt(const vec<operand_entry * > & ops,unsigned int opindex,gimple * stmt)5183 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5184 			  unsigned int opindex, gimple *stmt)
5185 {
5186   operand_entry *oe1, *oe2, *oe3;
5187 
5188   oe1 = ops[opindex];
5189   oe2 = ops[opindex + 1];
5190   oe3 = ops[opindex + 2];
5191 
5192   if ((oe1->rank == oe2->rank
5193        && oe2->rank != oe3->rank)
5194       || (stmt && is_phi_for_stmt (stmt, oe3->op)
5195 	  && !is_phi_for_stmt (stmt, oe1->op)
5196 	  && !is_phi_for_stmt (stmt, oe2->op)))
5197     std::swap (*oe1, *oe3);
5198   else if ((oe1->rank == oe3->rank
5199 	    && oe2->rank != oe3->rank)
5200 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
5201 	       && !is_phi_for_stmt (stmt, oe1->op)
5202 	       && !is_phi_for_stmt (stmt, oe3->op)))
5203     std::swap (*oe1, *oe2);
5204 }
5205 
5206 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5207    two definitions, otherwise return STMT.  Sets INSERT_BEFORE to indicate
5208    whether RHS1 op RHS2 can be inserted before or needs to be inserted
5209    after the returned stmt.  */
5210 
5211 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2,bool & insert_before)5212 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5213 {
5214   insert_before = true;
5215   if (TREE_CODE (rhs1) == SSA_NAME
5216       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5217     {
5218       stmt = SSA_NAME_DEF_STMT (rhs1);
5219       insert_before = false;
5220     }
5221   if (TREE_CODE (rhs2) == SSA_NAME
5222       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5223     {
5224       stmt = SSA_NAME_DEF_STMT (rhs2);
5225       insert_before = false;
5226     }
5227   return stmt;
5228 }
5229 
5230 /* If the stmt that defines operand has to be inserted, insert it
5231    before the use.  */
5232 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)5233 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5234 {
5235   gcc_assert (is_gimple_assign (stmt_to_insert));
5236   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5237   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5238   bool insert_before;
5239   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5240   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5241   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5242 
5243   /* If the insert point is not stmt, then insert_point would be
5244      the point where operand rhs1 or rhs2 is defined. In this case,
5245      stmt_to_insert has to be inserted afterwards. This would
5246      only happen when the stmt insertion point is flexible. */
5247   if (insert_before)
5248     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5249   else
5250     insert_stmt_after (stmt_to_insert, insert_point);
5251 }
5252 
5253 
5254 /* Recursively rewrite our linearized statements so that the operators
5255    match those in OPS[OPINDEX], putting the computation in rank
5256    order.  Return new lhs.
5257    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5258    the current stmt and during recursive invocations.
5259    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5260    recursive invocations.  */
5261 
5262 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,const vec<operand_entry * > & ops,bool changed,bool next_changed)5263 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5264 		   const vec<operand_entry *> &ops, bool changed,
5265 		   bool next_changed)
5266 {
5267   tree rhs1 = gimple_assign_rhs1 (stmt);
5268   tree rhs2 = gimple_assign_rhs2 (stmt);
5269   tree lhs = gimple_assign_lhs (stmt);
5270   operand_entry *oe;
5271 
5272   /* The final recursion case for this function is that you have
5273      exactly two operations left.
5274      If we had exactly one op in the entire list to start with, we
5275      would have never called this function, and the tail recursion
5276      rewrites them one at a time.  */
5277   if (opindex + 2 == ops.length ())
5278     {
5279       operand_entry *oe1, *oe2;
5280 
5281       oe1 = ops[opindex];
5282       oe2 = ops[opindex + 1];
5283 
5284       if (rhs1 != oe1->op || rhs2 != oe2->op)
5285 	{
5286 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5287 	  unsigned int uid = gimple_uid (stmt);
5288 
5289 	  if (dump_file && (dump_flags & TDF_DETAILS))
5290 	    {
5291 	      fprintf (dump_file, "Transforming ");
5292 	      print_gimple_stmt (dump_file, stmt, 0);
5293 	    }
5294 
5295 	  /* If the stmt that defines operand has to be inserted, insert it
5296 	     before the use.  */
5297 	  if (oe1->stmt_to_insert)
5298 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5299 	  if (oe2->stmt_to_insert)
5300 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5301 	  /* Even when changed is false, reassociation could have e.g. removed
5302 	     some redundant operations, so unless we are just swapping the
5303 	     arguments or unless there is no change at all (then we just
5304 	     return lhs), force creation of a new SSA_NAME.  */
5305 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5306 	    {
5307 	      bool insert_before;
5308 	      gimple *insert_point
5309 		= find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5310 	      lhs = make_ssa_name (TREE_TYPE (lhs));
5311 	      stmt
5312 		= gimple_build_assign (lhs, rhs_code,
5313 				       oe1->op, oe2->op);
5314 	      gimple_set_uid (stmt, uid);
5315 	      gimple_set_visited (stmt, true);
5316 	      if (insert_before)
5317 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5318 	      else
5319 		insert_stmt_after (stmt, insert_point);
5320 	    }
5321 	  else
5322 	    {
5323 	      bool insert_before;
5324 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5325 						      insert_before)
5326 				   == stmt);
5327 	      gimple_assign_set_rhs1 (stmt, oe1->op);
5328 	      gimple_assign_set_rhs2 (stmt, oe2->op);
5329 	      update_stmt (stmt);
5330 	    }
5331 
5332 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
5333 	    remove_visited_stmt_chain (rhs1);
5334 
5335 	  if (dump_file && (dump_flags & TDF_DETAILS))
5336 	    {
5337 	      fprintf (dump_file, " into ");
5338 	      print_gimple_stmt (dump_file, stmt, 0);
5339 	    }
5340 	}
5341       return lhs;
5342     }
5343 
5344   /* If we hit here, we should have 3 or more ops left.  */
5345   gcc_assert (opindex + 2 < ops.length ());
5346 
5347   /* Rewrite the next operator.  */
5348   oe = ops[opindex];
5349 
5350   /* If the stmt that defines operand has to be inserted, insert it
5351      before the use.  */
5352   if (oe->stmt_to_insert)
5353     insert_stmt_before_use (stmt, oe->stmt_to_insert);
5354 
5355   /* Recurse on the LHS of the binary operator, which is guaranteed to
5356      be the non-leaf side.  */
5357   tree new_rhs1
5358     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5359 			 changed || oe->op != rhs2 || next_changed,
5360 			 false);
5361 
5362   if (oe->op != rhs2 || new_rhs1 != rhs1)
5363     {
5364       if (dump_file && (dump_flags & TDF_DETAILS))
5365 	{
5366 	  fprintf (dump_file, "Transforming ");
5367 	  print_gimple_stmt (dump_file, stmt, 0);
5368 	}
5369 
5370       /* If changed is false, this is either opindex == 0
5371 	 or all outer rhs2's were equal to corresponding oe->op,
5372 	 and powi_result is NULL.
5373 	 That means lhs is equivalent before and after reassociation.
5374 	 Otherwise ensure the old lhs SSA_NAME is not reused and
5375 	 create a new stmt as well, so that any debug stmts will be
5376 	 properly adjusted.  */
5377       if (changed)
5378 	{
5379 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5380 	  unsigned int uid = gimple_uid (stmt);
5381 	  bool insert_before;
5382 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5383 						    insert_before);
5384 
5385 	  lhs = make_ssa_name (TREE_TYPE (lhs));
5386 	  stmt = gimple_build_assign (lhs, rhs_code,
5387 				      new_rhs1, oe->op);
5388 	  gimple_set_uid (stmt, uid);
5389 	  gimple_set_visited (stmt, true);
5390 	  if (insert_before)
5391 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5392 	  else
5393 	    insert_stmt_after (stmt, insert_point);
5394 	}
5395       else
5396 	{
5397 	  bool insert_before;
5398 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5399 						  insert_before)
5400 			       == stmt);
5401 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
5402 	  gimple_assign_set_rhs2 (stmt, oe->op);
5403 	  update_stmt (stmt);
5404 	}
5405 
5406       if (dump_file && (dump_flags & TDF_DETAILS))
5407 	{
5408 	  fprintf (dump_file, " into ");
5409 	  print_gimple_stmt (dump_file, stmt, 0);
5410 	}
5411     }
5412   return lhs;
5413 }
5414 
5415 /* Find out how many cycles we need to compute statements chain.
5416    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5417    maximum number of independent statements we may execute per cycle.  */
5418 
5419 static int
get_required_cycles(int ops_num,int cpu_width)5420 get_required_cycles (int ops_num, int cpu_width)
5421 {
5422   int res;
5423   int elog;
5424   unsigned int rest;
5425 
5426   /* While we have more than 2 * cpu_width operands
5427      we may reduce number of operands by cpu_width
5428      per cycle.  */
5429   res = ops_num / (2 * cpu_width);
5430 
5431   /* Remained operands count may be reduced twice per cycle
5432      until we have only one operand.  */
5433   rest = (unsigned)(ops_num - res * cpu_width);
5434   elog = exact_log2 (rest);
5435   if (elog >= 0)
5436     res += elog;
5437   else
5438     res += floor_log2 (rest) + 1;
5439 
5440   return res;
5441 }
5442 
5443 /* Returns an optimal number of registers to use for computation of
5444    given statements.  */
5445 
5446 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5447 get_reassociation_width (int ops_num, enum tree_code opc,
5448 			 machine_mode mode)
5449 {
5450   int param_width = param_tree_reassoc_width;
5451   int width;
5452   int width_min;
5453   int cycles_best;
5454 
5455   if (param_width > 0)
5456     width = param_width;
5457   else
5458     width = targetm.sched.reassociation_width (opc, mode);
5459 
5460   if (width == 1)
5461     return width;
5462 
5463   /* Get the minimal time required for sequence computation.  */
5464   cycles_best = get_required_cycles (ops_num, width);
5465 
5466   /* Check if we may use less width and still compute sequence for
5467      the same time.  It will allow us to reduce registers usage.
5468      get_required_cycles is monotonically increasing with lower width
5469      so we can perform a binary search for the minimal width that still
5470      results in the optimal cycle count.  */
5471   width_min = 1;
5472   while (width > width_min)
5473     {
5474       int width_mid = (width + width_min) / 2;
5475 
5476       if (get_required_cycles (ops_num, width_mid) == cycles_best)
5477 	width = width_mid;
5478       else if (width_min < width_mid)
5479 	width_min = width_mid;
5480       else
5481 	break;
5482     }
5483 
5484   return width;
5485 }
5486 
5487 /* Recursively rewrite our linearized statements so that the operators
5488    match those in OPS[OPINDEX], putting the computation in rank
5489    order and trying to allow operations to be executed in
5490    parallel.  */
5491 
5492 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,const vec<operand_entry * > & ops)5493 rewrite_expr_tree_parallel (gassign *stmt, int width,
5494 			    const vec<operand_entry *> &ops)
5495 {
5496   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5497   int op_num = ops.length ();
5498   gcc_assert (op_num > 0);
5499   int stmt_num = op_num - 1;
5500   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5501   int op_index = op_num - 1;
5502   int stmt_index = 0;
5503   int ready_stmts_end = 0;
5504   int i = 0;
5505   gimple *stmt1 = NULL, *stmt2 = NULL;
5506   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5507 
5508   /* We start expression rewriting from the top statements.
5509      So, in this loop we create a full list of statements
5510      we will work with.  */
5511   stmts[stmt_num - 1] = stmt;
5512   for (i = stmt_num - 2; i >= 0; i--)
5513     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5514 
5515   for (i = 0; i < stmt_num; i++)
5516     {
5517       tree op1, op2;
5518 
5519       /* Determine whether we should use results of
5520 	 already handled statements or not.  */
5521       if (ready_stmts_end == 0
5522 	  && (i - stmt_index >= width || op_index < 1))
5523 	ready_stmts_end = i;
5524 
5525       /* Now we choose operands for the next statement.  Non zero
5526 	 value in ready_stmts_end means here that we should use
5527 	 the result of already generated statements as new operand.  */
5528       if (ready_stmts_end > 0)
5529 	{
5530 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
5531 	  if (ready_stmts_end > stmt_index)
5532 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
5533 	  else if (op_index >= 0)
5534 	    {
5535 	      operand_entry *oe = ops[op_index--];
5536 	      stmt2 = oe->stmt_to_insert;
5537 	      op2 = oe->op;
5538 	    }
5539 	  else
5540 	    {
5541 	      gcc_assert (stmt_index < i);
5542 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
5543 	    }
5544 
5545 	  if (stmt_index >= ready_stmts_end)
5546 	    ready_stmts_end = 0;
5547 	}
5548       else
5549 	{
5550 	  if (op_index > 1)
5551 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5552 	  operand_entry *oe2 = ops[op_index--];
5553 	  operand_entry *oe1 = ops[op_index--];
5554 	  op2 = oe2->op;
5555 	  stmt2 = oe2->stmt_to_insert;
5556 	  op1 = oe1->op;
5557 	  stmt1 = oe1->stmt_to_insert;
5558 	}
5559 
5560       /* If we emit the last statement then we should put
5561 	 operands into the last statement.  It will also
5562 	 break the loop.  */
5563       if (op_index < 0 && stmt_index == i)
5564 	i = stmt_num - 1;
5565 
5566       if (dump_file && (dump_flags & TDF_DETAILS))
5567 	{
5568 	  fprintf (dump_file, "Transforming ");
5569 	  print_gimple_stmt (dump_file, stmts[i], 0);
5570 	}
5571 
5572       /* If the stmt that defines operand has to be inserted, insert it
5573 	 before the use.  */
5574       if (stmt1)
5575 	insert_stmt_before_use (stmts[i], stmt1);
5576       if (stmt2)
5577 	insert_stmt_before_use (stmts[i], stmt2);
5578       stmt1 = stmt2 = NULL;
5579 
5580       /* We keep original statement only for the last one.  All
5581 	 others are recreated.  */
5582       if (i == stmt_num - 1)
5583 	{
5584 	  gimple_assign_set_rhs1 (stmts[i], op1);
5585 	  gimple_assign_set_rhs2 (stmts[i], op2);
5586 	  update_stmt (stmts[i]);
5587 	}
5588       else
5589 	{
5590 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5591 	  gimple_set_visited (stmts[i], true);
5592 	}
5593       if (dump_file && (dump_flags & TDF_DETAILS))
5594 	{
5595 	  fprintf (dump_file, " into ");
5596 	  print_gimple_stmt (dump_file, stmts[i], 0);
5597 	}
5598     }
5599 
5600   remove_visited_stmt_chain (last_rhs1);
5601 }
5602 
5603 /* Transform STMT, which is really (A +B) + (C + D) into the left
5604    linear form, ((A+B)+C)+D.
5605    Recurse on D if necessary.  */
5606 
5607 static void
linearize_expr(gimple * stmt)5608 linearize_expr (gimple *stmt)
5609 {
5610   gimple_stmt_iterator gsi;
5611   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5612   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5613   gimple *oldbinrhs = binrhs;
5614   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5615   gimple *newbinrhs = NULL;
5616   class loop *loop = loop_containing_stmt (stmt);
5617   tree lhs = gimple_assign_lhs (stmt);
5618 
5619   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5620 	      && is_reassociable_op (binrhs, rhscode, loop));
5621 
5622   gsi = gsi_for_stmt (stmt);
5623 
5624   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5625   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5626 				gimple_assign_rhs_code (binrhs),
5627 				gimple_assign_lhs (binlhs),
5628 				gimple_assign_rhs2 (binrhs));
5629   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5630   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5631   gimple_set_uid (binrhs, gimple_uid (stmt));
5632 
5633   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5634     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5635 
5636   if (dump_file && (dump_flags & TDF_DETAILS))
5637     {
5638       fprintf (dump_file, "Linearized: ");
5639       print_gimple_stmt (dump_file, stmt, 0);
5640     }
5641 
5642   reassociate_stats.linearized++;
5643   update_stmt (stmt);
5644 
5645   gsi = gsi_for_stmt (oldbinrhs);
5646   reassoc_remove_stmt (&gsi);
5647   release_defs (oldbinrhs);
5648 
5649   gimple_set_visited (stmt, true);
5650   gimple_set_visited (binlhs, true);
5651   gimple_set_visited (binrhs, true);
5652 
5653   /* Tail recurse on the new rhs if it still needs reassociation.  */
5654   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5655     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5656 	   want to change the algorithm while converting to tuples.  */
5657     linearize_expr (stmt);
5658 }
5659 
5660 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5661    it.  Otherwise, return NULL.  */
5662 
5663 static gimple *
get_single_immediate_use(tree lhs)5664 get_single_immediate_use (tree lhs)
5665 {
5666   use_operand_p immuse;
5667   gimple *immusestmt;
5668 
5669   if (TREE_CODE (lhs) == SSA_NAME
5670       && single_imm_use (lhs, &immuse, &immusestmt)
5671       && is_gimple_assign (immusestmt))
5672     return immusestmt;
5673 
5674   return NULL;
5675 }
5676 
5677 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5678    representing the negated value.  Insertions of any necessary
5679    instructions go before GSI.
5680    This function is recursive in that, if you hand it "a_5" as the
5681    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5682    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5683 
5684 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5685 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5686 {
5687   gimple *negatedefstmt = NULL;
5688   tree resultofnegate;
5689   gimple_stmt_iterator gsi;
5690   unsigned int uid;
5691 
5692   /* If we are trying to negate a name, defined by an add, negate the
5693      add operands instead.  */
5694   if (TREE_CODE (tonegate) == SSA_NAME)
5695     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5696   if (TREE_CODE (tonegate) == SSA_NAME
5697       && is_gimple_assign (negatedefstmt)
5698       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5699       && has_single_use (gimple_assign_lhs (negatedefstmt))
5700       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5701     {
5702       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5703       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5704       tree lhs = gimple_assign_lhs (negatedefstmt);
5705       gimple *g;
5706 
5707       gsi = gsi_for_stmt (negatedefstmt);
5708       rhs1 = negate_value (rhs1, &gsi);
5709 
5710       gsi = gsi_for_stmt (negatedefstmt);
5711       rhs2 = negate_value (rhs2, &gsi);
5712 
5713       gsi = gsi_for_stmt (negatedefstmt);
5714       lhs = make_ssa_name (TREE_TYPE (lhs));
5715       gimple_set_visited (negatedefstmt, true);
5716       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5717       gimple_set_uid (g, gimple_uid (negatedefstmt));
5718       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5719       return lhs;
5720     }
5721 
5722   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5723   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5724 					     NULL_TREE, true, GSI_SAME_STMT);
5725   gsi = *gsip;
5726   uid = gimple_uid (gsi_stmt (gsi));
5727   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5728     {
5729       gimple *stmt = gsi_stmt (gsi);
5730       if (gimple_uid (stmt) != 0)
5731 	break;
5732       gimple_set_uid (stmt, uid);
5733     }
5734   return resultofnegate;
5735 }
5736 
5737 /* Return true if we should break up the subtract in STMT into an add
5738    with negate.  This is true when we the subtract operands are really
5739    adds, or the subtract itself is used in an add expression.  In
5740    either case, breaking up the subtract into an add with negate
5741    exposes the adds to reassociation.  */
5742 
5743 static bool
should_break_up_subtract(gimple * stmt)5744 should_break_up_subtract (gimple *stmt)
5745 {
5746   tree lhs = gimple_assign_lhs (stmt);
5747   tree binlhs = gimple_assign_rhs1 (stmt);
5748   tree binrhs = gimple_assign_rhs2 (stmt);
5749   gimple *immusestmt;
5750   class loop *loop = loop_containing_stmt (stmt);
5751 
5752   if (TREE_CODE (binlhs) == SSA_NAME
5753       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5754     return true;
5755 
5756   if (TREE_CODE (binrhs) == SSA_NAME
5757       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5758     return true;
5759 
5760   if (TREE_CODE (lhs) == SSA_NAME
5761       && (immusestmt = get_single_immediate_use (lhs))
5762       && is_gimple_assign (immusestmt)
5763       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5764 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5765 	      && gimple_assign_rhs1 (immusestmt) == lhs)
5766 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5767     return true;
5768   return false;
5769 }
5770 
5771 /* Transform STMT from A - B into A + -B.  */
5772 
5773 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5774 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5775 {
5776   tree rhs1 = gimple_assign_rhs1 (stmt);
5777   tree rhs2 = gimple_assign_rhs2 (stmt);
5778 
5779   if (dump_file && (dump_flags & TDF_DETAILS))
5780     {
5781       fprintf (dump_file, "Breaking up subtract ");
5782       print_gimple_stmt (dump_file, stmt, 0);
5783     }
5784 
5785   rhs2 = negate_value (rhs2, gsip);
5786   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5787   update_stmt (stmt);
5788 }
5789 
5790 /* Determine whether STMT is a builtin call that raises an SSA name
5791    to an integer power and has only one use.  If so, and this is early
5792    reassociation and unsafe math optimizations are permitted, place
5793    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5794    If any of these conditions does not hold, return FALSE.  */
5795 
5796 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5797 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5798 {
5799   tree arg1;
5800   REAL_VALUE_TYPE c, cint;
5801 
5802   switch (gimple_call_combined_fn (stmt))
5803     {
5804     CASE_CFN_POW:
5805       if (flag_errno_math)
5806 	return false;
5807 
5808       *base = gimple_call_arg (stmt, 0);
5809       arg1 = gimple_call_arg (stmt, 1);
5810 
5811       if (TREE_CODE (arg1) != REAL_CST)
5812 	return false;
5813 
5814       c = TREE_REAL_CST (arg1);
5815 
5816       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5817 	return false;
5818 
5819       *exponent = real_to_integer (&c);
5820       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5821       if (!real_identical (&c, &cint))
5822 	return false;
5823 
5824       break;
5825 
5826     CASE_CFN_POWI:
5827       *base = gimple_call_arg (stmt, 0);
5828       arg1 = gimple_call_arg (stmt, 1);
5829 
5830       if (!tree_fits_shwi_p (arg1))
5831 	return false;
5832 
5833       *exponent = tree_to_shwi (arg1);
5834       break;
5835 
5836     default:
5837       return false;
5838     }
5839 
5840   /* Expanding negative exponents is generally unproductive, so we don't
5841      complicate matters with those.  Exponents of zero and one should
5842      have been handled by expression folding.  */
5843   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5844     return false;
5845 
5846   return true;
5847 }
5848 
5849 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5850    unsuccessful.  */
5851 
5852 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5853 try_special_add_to_ops (vec<operand_entry *> *ops,
5854 			enum tree_code code,
5855 			tree op, gimple* def_stmt)
5856 {
5857   tree base = NULL_TREE;
5858   HOST_WIDE_INT exponent = 0;
5859 
5860   if (TREE_CODE (op) != SSA_NAME
5861       || ! has_single_use (op))
5862     return false;
5863 
5864   if (code == MULT_EXPR
5865       && reassoc_insert_powi_p
5866       && flag_unsafe_math_optimizations
5867       && is_gimple_call (def_stmt)
5868       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5869     {
5870       add_repeat_to_ops_vec (ops, base, exponent);
5871       gimple_set_visited (def_stmt, true);
5872       return true;
5873     }
5874   else if (code == MULT_EXPR
5875 	   && is_gimple_assign (def_stmt)
5876 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5877 	   && !HONOR_SNANS (TREE_TYPE (op))
5878 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5879 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5880 	   && (!FLOAT_TYPE_P (TREE_TYPE (op))
5881 	       || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5882     {
5883       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5884       tree cst = build_minus_one_cst (TREE_TYPE (op));
5885       add_to_ops_vec (ops, rhs1);
5886       add_to_ops_vec (ops, cst);
5887       gimple_set_visited (def_stmt, true);
5888       return true;
5889     }
5890 
5891   return false;
5892 }
5893 
5894 /* Recursively linearize a binary expression that is the RHS of STMT.
5895    Place the operands of the expression tree in the vector named OPS.  */
5896 
5897 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5898 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5899 		     bool is_associative, bool set_visited)
5900 {
5901   tree binlhs = gimple_assign_rhs1 (stmt);
5902   tree binrhs = gimple_assign_rhs2 (stmt);
5903   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5904   bool binlhsisreassoc = false;
5905   bool binrhsisreassoc = false;
5906   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5907   class loop *loop = loop_containing_stmt (stmt);
5908 
5909   if (set_visited)
5910     gimple_set_visited (stmt, true);
5911 
5912   if (TREE_CODE (binlhs) == SSA_NAME)
5913     {
5914       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5915       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5916 			 && !stmt_could_throw_p (cfun, binlhsdef));
5917     }
5918 
5919   if (TREE_CODE (binrhs) == SSA_NAME)
5920     {
5921       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5922       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5923 			 && !stmt_could_throw_p (cfun, binrhsdef));
5924     }
5925 
5926   /* If the LHS is not reassociable, but the RHS is, we need to swap
5927      them.  If neither is reassociable, there is nothing we can do, so
5928      just put them in the ops vector.  If the LHS is reassociable,
5929      linearize it.  If both are reassociable, then linearize the RHS
5930      and the LHS.  */
5931 
5932   if (!binlhsisreassoc)
5933     {
5934       /* If this is not a associative operation like division, give up.  */
5935       if (!is_associative)
5936 	{
5937 	  add_to_ops_vec (ops, binrhs);
5938 	  return;
5939 	}
5940 
5941       if (!binrhsisreassoc)
5942 	{
5943 	  bool swap = false;
5944 	  if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5945 	    /* If we add ops for the rhs we expect to be able to recurse
5946 	       to it via the lhs during expression rewrite so swap
5947 	       operands.  */
5948 	    swap = true;
5949 	  else
5950 	    add_to_ops_vec (ops, binrhs);
5951 
5952 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5953 	    add_to_ops_vec (ops, binlhs);
5954 
5955 	  if (!swap)
5956 	    return;
5957 	}
5958 
5959       if (dump_file && (dump_flags & TDF_DETAILS))
5960 	{
5961 	  fprintf (dump_file, "swapping operands of ");
5962 	  print_gimple_stmt (dump_file, stmt, 0);
5963 	}
5964 
5965       swap_ssa_operands (stmt,
5966 			 gimple_assign_rhs1_ptr (stmt),
5967 			 gimple_assign_rhs2_ptr (stmt));
5968       update_stmt (stmt);
5969 
5970       if (dump_file && (dump_flags & TDF_DETAILS))
5971 	{
5972 	  fprintf (dump_file, " is now ");
5973 	  print_gimple_stmt (dump_file, stmt, 0);
5974 	}
5975       if (!binrhsisreassoc)
5976 	return;
5977 
5978       /* We want to make it so the lhs is always the reassociative op,
5979 	 so swap.  */
5980       std::swap (binlhs, binrhs);
5981     }
5982   else if (binrhsisreassoc)
5983     {
5984       linearize_expr (stmt);
5985       binlhs = gimple_assign_rhs1 (stmt);
5986       binrhs = gimple_assign_rhs2 (stmt);
5987     }
5988 
5989   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5990 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5991 				      rhscode, loop));
5992   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5993 		       is_associative, set_visited);
5994 
5995   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5996     add_to_ops_vec (ops, binrhs);
5997 }
5998 
5999 /* Repropagate the negates back into subtracts, since no other pass
6000    currently does it.  */
6001 
6002 static void
repropagate_negates(void)6003 repropagate_negates (void)
6004 {
6005   unsigned int i = 0;
6006   tree negate;
6007 
6008   FOR_EACH_VEC_ELT (plus_negates, i, negate)
6009     {
6010       gimple *user = get_single_immediate_use (negate);
6011       if (!user || !is_gimple_assign (user))
6012 	continue;
6013 
6014       tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6015       if (TREE_CODE (negateop) == SSA_NAME
6016 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6017 	continue;
6018 
6019       /* The negate operand can be either operand of a PLUS_EXPR
6020 	 (it can be the LHS if the RHS is a constant for example).
6021 
6022 	 Force the negate operand to the RHS of the PLUS_EXPR, then
6023 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
6024       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6025 	{
6026 	  /* If the negated operand appears on the LHS of the
6027 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
6028 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
6029 	  if (gimple_assign_rhs1 (user) == negate)
6030 	    {
6031 	      swap_ssa_operands (user,
6032 				 gimple_assign_rhs1_ptr (user),
6033 				 gimple_assign_rhs2_ptr (user));
6034 	    }
6035 
6036 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6037 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
6038 	  if (gimple_assign_rhs2 (user) == negate)
6039 	    {
6040 	      tree rhs1 = gimple_assign_rhs1 (user);
6041 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
6042 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6043 					      negateop);
6044 	      update_stmt (user);
6045 	    }
6046 	}
6047       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6048 	{
6049 	  if (gimple_assign_rhs1 (user) == negate)
6050 	    {
6051 	      /* We have
6052 		   x = -negateop
6053 		   y = x - b
6054 		 which we transform into
6055 		   x = negateop + b
6056 		   y = -x .
6057 		 This pushes down the negate which we possibly can merge
6058 		 into some other operation, hence insert it into the
6059 		 plus_negates vector.  */
6060 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
6061 	      tree b = gimple_assign_rhs2 (user);
6062 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6063 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6064 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6065 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6066 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6067 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6068 	      user = gsi_stmt (gsi2);
6069 	      update_stmt (user);
6070 	      reassoc_remove_stmt (&gsi);
6071 	      release_defs (feed);
6072 	      plus_negates.safe_push (gimple_assign_lhs (user));
6073 	    }
6074 	  else
6075 	    {
6076 	      /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6077 		 getting rid of one operation.  */
6078 	      tree rhs1 = gimple_assign_rhs1 (user);
6079 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
6080 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6081 	      update_stmt (gsi_stmt (gsi));
6082 	    }
6083 	}
6084     }
6085 }
6086 
6087 /* Break up subtract operations in block BB.
6088 
6089    We do this top down because we don't know whether the subtract is
6090    part of a possible chain of reassociation except at the top.
6091 
6092    IE given
6093    d = f + g
6094    c = a + e
6095    b = c - d
6096    q = b - r
6097    k = t - q
6098 
6099    we want to break up k = t - q, but we won't until we've transformed q
6100    = b - r, which won't be broken up until we transform b = c - d.
6101 
6102    En passant, clear the GIMPLE visited flag on every statement
6103    and set UIDs within each basic block.  */
6104 
6105 static void
break_up_subtract_bb(basic_block bb)6106 break_up_subtract_bb (basic_block bb)
6107 {
6108   gimple_stmt_iterator gsi;
6109   basic_block son;
6110   unsigned int uid = 1;
6111 
6112   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6113     {
6114       gimple *stmt = gsi_stmt (gsi);
6115       gimple_set_visited (stmt, false);
6116       gimple_set_uid (stmt, uid++);
6117 
6118       if (!is_gimple_assign (stmt)
6119 	  || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6120 	  || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6121 	continue;
6122 
6123       /* Look for simple gimple subtract operations.  */
6124       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6125 	{
6126 	  if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6127 	      || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6128 	    continue;
6129 
6130 	  /* Check for a subtract used only in an addition.  If this
6131 	     is the case, transform it into add of a negate for better
6132 	     reassociation.  IE transform C = A-B into C = A + -B if C
6133 	     is only used in an addition.  */
6134 	  if (should_break_up_subtract (stmt))
6135 	    break_up_subtract (stmt, &gsi);
6136 	}
6137       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6138 	       && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6139 	plus_negates.safe_push (gimple_assign_lhs (stmt));
6140     }
6141   for (son = first_dom_son (CDI_DOMINATORS, bb);
6142        son;
6143        son = next_dom_son (CDI_DOMINATORS, son))
6144     break_up_subtract_bb (son);
6145 }
6146 
6147 /* Used for repeated factor analysis.  */
6148 struct repeat_factor
6149 {
6150   /* An SSA name that occurs in a multiply chain.  */
6151   tree factor;
6152 
6153   /* Cached rank of the factor.  */
6154   unsigned rank;
6155 
6156   /* Number of occurrences of the factor in the chain.  */
6157   HOST_WIDE_INT count;
6158 
6159   /* An SSA name representing the product of this factor and
6160      all factors appearing later in the repeated factor vector.  */
6161   tree repr;
6162 };
6163 
6164 
6165 static vec<repeat_factor> repeat_factor_vec;
6166 
6167 /* Used for sorting the repeat factor vector.  Sort primarily by
6168    ascending occurrence count, secondarily by descending rank.  */
6169 
6170 static int
compare_repeat_factors(const void * x1,const void * x2)6171 compare_repeat_factors (const void *x1, const void *x2)
6172 {
6173   const repeat_factor *rf1 = (const repeat_factor *) x1;
6174   const repeat_factor *rf2 = (const repeat_factor *) x2;
6175 
6176   if (rf1->count != rf2->count)
6177     return rf1->count - rf2->count;
6178 
6179   return rf2->rank - rf1->rank;
6180 }
6181 
6182 /* Look for repeated operands in OPS in the multiply tree rooted at
6183    STMT.  Replace them with an optimal sequence of multiplies and powi
6184    builtin calls, and remove the used operands from OPS.  Return an
6185    SSA name representing the value of the replacement sequence.  */
6186 
6187 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)6188 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6189 {
6190   unsigned i, j, vec_len;
6191   int ii;
6192   operand_entry *oe;
6193   repeat_factor *rf1, *rf2;
6194   repeat_factor rfnew;
6195   tree result = NULL_TREE;
6196   tree target_ssa, iter_result;
6197   tree type = TREE_TYPE (gimple_get_lhs (stmt));
6198   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6199   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6200   gimple *mul_stmt, *pow_stmt;
6201 
6202   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6203      target, unless type is integral.  */
6204   if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6205     return NULL_TREE;
6206 
6207   /* Allocate the repeated factor vector.  */
6208   repeat_factor_vec.create (10);
6209 
6210   /* Scan the OPS vector for all SSA names in the product and build
6211      up a vector of occurrence counts for each factor.  */
6212   FOR_EACH_VEC_ELT (*ops, i, oe)
6213     {
6214       if (TREE_CODE (oe->op) == SSA_NAME)
6215 	{
6216 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6217 	    {
6218 	      if (rf1->factor == oe->op)
6219 		{
6220 		  rf1->count += oe->count;
6221 		  break;
6222 		}
6223 	    }
6224 
6225 	  if (j >= repeat_factor_vec.length ())
6226 	    {
6227 	      rfnew.factor = oe->op;
6228 	      rfnew.rank = oe->rank;
6229 	      rfnew.count = oe->count;
6230 	      rfnew.repr = NULL_TREE;
6231 	      repeat_factor_vec.safe_push (rfnew);
6232 	    }
6233 	}
6234     }
6235 
6236   /* Sort the repeated factor vector by (a) increasing occurrence count,
6237      and (b) decreasing rank.  */
6238   repeat_factor_vec.qsort (compare_repeat_factors);
6239 
6240   /* It is generally best to combine as many base factors as possible
6241      into a product before applying __builtin_powi to the result.
6242      However, the sort order chosen for the repeated factor vector
6243      allows us to cache partial results for the product of the base
6244      factors for subsequent use.  When we already have a cached partial
6245      result from a previous iteration, it is best to make use of it
6246      before looking for another __builtin_pow opportunity.
6247 
6248      As an example, consider x * x * y * y * y * z * z * z * z.
6249      We want to first compose the product x * y * z, raise it to the
6250      second power, then multiply this by y * z, and finally multiply
6251      by z.  This can be done in 5 multiplies provided we cache y * z
6252      for use in both expressions:
6253 
6254         t1 = y * z
6255 	t2 = t1 * x
6256 	t3 = t2 * t2
6257 	t4 = t1 * t3
6258 	result = t4 * z
6259 
6260      If we instead ignored the cached y * z and first multiplied by
6261      the __builtin_pow opportunity z * z, we would get the inferior:
6262 
6263         t1 = y * z
6264 	t2 = t1 * x
6265 	t3 = t2 * t2
6266 	t4 = z * z
6267 	t5 = t3 * t4
6268         result = t5 * y  */
6269 
6270   vec_len = repeat_factor_vec.length ();
6271 
6272   /* Repeatedly look for opportunities to create a builtin_powi call.  */
6273   while (true)
6274     {
6275       HOST_WIDE_INT power;
6276 
6277       /* First look for the largest cached product of factors from
6278 	 preceding iterations.  If found, create a builtin_powi for
6279 	 it if the minimum occurrence count for its factors is at
6280 	 least 2, or just use this cached product as our next
6281 	 multiplicand if the minimum occurrence count is 1.  */
6282       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6283 	{
6284 	  if (rf1->repr && rf1->count > 0)
6285 	    break;
6286 	}
6287 
6288       if (j < vec_len)
6289 	{
6290 	  power = rf1->count;
6291 
6292 	  if (power == 1)
6293 	    {
6294 	      iter_result = rf1->repr;
6295 
6296 	      if (dump_file && (dump_flags & TDF_DETAILS))
6297 		{
6298 		  unsigned elt;
6299 		  repeat_factor *rf;
6300 		  fputs ("Multiplying by cached product ", dump_file);
6301 		  for (elt = j; elt < vec_len; elt++)
6302 		    {
6303 		      rf = &repeat_factor_vec[elt];
6304 		      print_generic_expr (dump_file, rf->factor);
6305 		      if (elt < vec_len - 1)
6306 			fputs (" * ", dump_file);
6307 		    }
6308 		  fputs ("\n", dump_file);
6309 		}
6310 	    }
6311 	  else
6312 	    {
6313 	      if (INTEGRAL_TYPE_P (type))
6314 		{
6315 		  gcc_assert (power > 1);
6316 		  gimple_stmt_iterator gsip = gsi;
6317 		  gsi_prev (&gsip);
6318 		  iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6319 					       rf1->repr, power);
6320 		  gimple_stmt_iterator gsic = gsi;
6321 		  while (gsi_stmt (gsic) != gsi_stmt (gsip))
6322 		    {
6323 		      gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6324 		      gimple_set_visited (gsi_stmt (gsic), true);
6325 		      gsi_prev (&gsic);
6326 		    }
6327 		}
6328 	      else
6329 		{
6330 		  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6331 		  pow_stmt
6332 		    = gimple_build_call (powi_fndecl, 2, rf1->repr,
6333 					 build_int_cst (integer_type_node,
6334 							power));
6335 		  gimple_call_set_lhs (pow_stmt, iter_result);
6336 		  gimple_set_location (pow_stmt, gimple_location (stmt));
6337 		  gimple_set_uid (pow_stmt, gimple_uid (stmt));
6338 		  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6339 		}
6340 
6341 	      if (dump_file && (dump_flags & TDF_DETAILS))
6342 		{
6343 		  unsigned elt;
6344 		  repeat_factor *rf;
6345 		  fputs ("Building __builtin_pow call for cached product (",
6346 			 dump_file);
6347 		  for (elt = j; elt < vec_len; elt++)
6348 		    {
6349 		      rf = &repeat_factor_vec[elt];
6350 		      print_generic_expr (dump_file, rf->factor);
6351 		      if (elt < vec_len - 1)
6352 			fputs (" * ", dump_file);
6353 		    }
6354 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6355 			   power);
6356 		}
6357 	    }
6358 	}
6359       else
6360 	{
6361 	  /* Otherwise, find the first factor in the repeated factor
6362 	     vector whose occurrence count is at least 2.  If no such
6363 	     factor exists, there are no builtin_powi opportunities
6364 	     remaining.  */
6365 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6366 	    {
6367 	      if (rf1->count >= 2)
6368 		break;
6369 	    }
6370 
6371 	  if (j >= vec_len)
6372 	    break;
6373 
6374 	  power = rf1->count;
6375 
6376 	  if (dump_file && (dump_flags & TDF_DETAILS))
6377 	    {
6378 	      unsigned elt;
6379 	      repeat_factor *rf;
6380 	      fputs ("Building __builtin_pow call for (", dump_file);
6381 	      for (elt = j; elt < vec_len; elt++)
6382 		{
6383 		  rf = &repeat_factor_vec[elt];
6384 		  print_generic_expr (dump_file, rf->factor);
6385 		  if (elt < vec_len - 1)
6386 		    fputs (" * ", dump_file);
6387 		}
6388 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6389 	    }
6390 
6391 	  reassociate_stats.pows_created++;
6392 
6393 	  /* Visit each element of the vector in reverse order (so that
6394 	     high-occurrence elements are visited first, and within the
6395 	     same occurrence count, lower-ranked elements are visited
6396 	     first).  Form a linear product of all elements in this order
6397 	     whose occurrencce count is at least that of element J.
6398 	     Record the SSA name representing the product of each element
6399 	     with all subsequent elements in the vector.  */
6400 	  if (j == vec_len - 1)
6401 	    rf1->repr = rf1->factor;
6402 	  else
6403 	    {
6404 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
6405 		{
6406 		  tree op1, op2;
6407 
6408 		  rf1 = &repeat_factor_vec[ii];
6409 		  rf2 = &repeat_factor_vec[ii + 1];
6410 
6411 		  /* Init the last factor's representative to be itself.  */
6412 		  if (!rf2->repr)
6413 		    rf2->repr = rf2->factor;
6414 
6415 		  op1 = rf1->factor;
6416 		  op2 = rf2->repr;
6417 
6418 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6419 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6420 						  op1, op2);
6421 		  gimple_set_location (mul_stmt, gimple_location (stmt));
6422 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6423 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6424 		  rf1->repr = target_ssa;
6425 
6426 		  /* Don't reprocess the multiply we just introduced.  */
6427 		  gimple_set_visited (mul_stmt, true);
6428 		}
6429 	    }
6430 
6431 	  /* Form a call to __builtin_powi for the maximum product
6432 	     just formed, raised to the power obtained earlier.  */
6433 	  rf1 = &repeat_factor_vec[j];
6434 	  if (INTEGRAL_TYPE_P (type))
6435 	    {
6436 	      gcc_assert (power > 1);
6437 	      gimple_stmt_iterator gsip = gsi;
6438 	      gsi_prev (&gsip);
6439 	      iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6440 					   rf1->repr, power);
6441 	      gimple_stmt_iterator gsic = gsi;
6442 	      while (gsi_stmt (gsic) != gsi_stmt (gsip))
6443 		{
6444 		  gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6445 		  gimple_set_visited (gsi_stmt (gsic), true);
6446 		  gsi_prev (&gsic);
6447 		}
6448 	    }
6449 	  else
6450 	    {
6451 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6452 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6453 					    build_int_cst (integer_type_node,
6454 							   power));
6455 	      gimple_call_set_lhs (pow_stmt, iter_result);
6456 	      gimple_set_location (pow_stmt, gimple_location (stmt));
6457 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
6458 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6459 	    }
6460 	}
6461 
6462       /* If we previously formed at least one other builtin_powi call,
6463 	 form the product of this one and those others.  */
6464       if (result)
6465 	{
6466 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6467 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6468 					  result, iter_result);
6469 	  gimple_set_location (mul_stmt, gimple_location (stmt));
6470 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6471 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6472 	  gimple_set_visited (mul_stmt, true);
6473 	  result = new_result;
6474 	}
6475       else
6476 	result = iter_result;
6477 
6478       /* Decrement the occurrence count of each element in the product
6479 	 by the count found above, and remove this many copies of each
6480 	 factor from OPS.  */
6481       for (i = j; i < vec_len; i++)
6482 	{
6483 	  unsigned k = power;
6484 	  unsigned n;
6485 
6486 	  rf1 = &repeat_factor_vec[i];
6487 	  rf1->count -= power;
6488 
6489 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6490 	    {
6491 	      if (oe->op == rf1->factor)
6492 		{
6493 		  if (oe->count <= k)
6494 		    {
6495 		      ops->ordered_remove (n);
6496 		      k -= oe->count;
6497 
6498 		      if (k == 0)
6499 			break;
6500 		    }
6501 		  else
6502 		    {
6503 		      oe->count -= k;
6504 		      break;
6505 		    }
6506 		}
6507 	    }
6508 	}
6509     }
6510 
6511   /* At this point all elements in the repeated factor vector have a
6512      remaining occurrence count of 0 or 1, and those with a count of 1
6513      don't have cached representatives.  Re-sort the ops vector and
6514      clean up.  */
6515   ops->qsort (sort_by_operand_rank);
6516   repeat_factor_vec.release ();
6517 
6518   /* Return the final product computed herein.  Note that there may
6519      still be some elements with single occurrence count left in OPS;
6520      those will be handled by the normal reassociation logic.  */
6521   return result;
6522 }
6523 
6524 /* Attempt to optimize
6525    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6526    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6527 
6528 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6529 attempt_builtin_copysign (vec<operand_entry *> *ops)
6530 {
6531   operand_entry *oe;
6532   unsigned int i;
6533   unsigned int length = ops->length ();
6534   tree cst = ops->last ()->op;
6535 
6536   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6537     return;
6538 
6539   FOR_EACH_VEC_ELT (*ops, i, oe)
6540     {
6541       if (TREE_CODE (oe->op) == SSA_NAME
6542 	  && has_single_use (oe->op))
6543 	{
6544 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6545 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6546 	    {
6547 	      tree arg0, arg1;
6548 	      switch (gimple_call_combined_fn (old_call))
6549 		{
6550 		CASE_CFN_COPYSIGN:
6551 		CASE_CFN_COPYSIGN_FN:
6552 		  arg0 = gimple_call_arg (old_call, 0);
6553 		  arg1 = gimple_call_arg (old_call, 1);
6554 		  /* The first argument of copysign must be a constant,
6555 		     otherwise there's nothing to do.  */
6556 		  if (TREE_CODE (arg0) == REAL_CST)
6557 		    {
6558 		      tree type = TREE_TYPE (arg0);
6559 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6560 		      /* If we couldn't fold to a single constant, skip it.
6561 			 That happens e.g. for inexact multiplication when
6562 			 -frounding-math.  */
6563 		      if (mul == NULL_TREE)
6564 			break;
6565 		      /* Instead of adjusting OLD_CALL, let's build a new
6566 			 call to not leak the LHS and prevent keeping bogus
6567 			 debug statements.  DCE will clean up the old call.  */
6568 		      gcall *new_call;
6569 		      if (gimple_call_internal_p (old_call))
6570 			new_call = gimple_build_call_internal
6571 			  (IFN_COPYSIGN, 2, mul, arg1);
6572 		      else
6573 			new_call = gimple_build_call
6574 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
6575 		      tree lhs = make_ssa_name (type);
6576 		      gimple_call_set_lhs (new_call, lhs);
6577 		      gimple_set_location (new_call,
6578 					   gimple_location (old_call));
6579 		      insert_stmt_after (new_call, old_call);
6580 		      /* We've used the constant, get rid of it.  */
6581 		      ops->pop ();
6582 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6583 		      /* Handle the CST1 < 0 case by negating the result.  */
6584 		      if (cst1_neg)
6585 			{
6586 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6587 			  gimple *negate_stmt
6588 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6589 			  insert_stmt_after (negate_stmt, new_call);
6590 			  oe->op = negrhs;
6591 			}
6592 		      else
6593 			oe->op = lhs;
6594 		      if (dump_file && (dump_flags & TDF_DETAILS))
6595 			{
6596 			  fprintf (dump_file, "Optimizing copysign: ");
6597 			  print_generic_expr (dump_file, cst);
6598 			  fprintf (dump_file, " * COPYSIGN (");
6599 			  print_generic_expr (dump_file, arg0);
6600 			  fprintf (dump_file, ", ");
6601 			  print_generic_expr (dump_file, arg1);
6602 			  fprintf (dump_file, ") into %sCOPYSIGN (",
6603 				   cst1_neg ? "-" : "");
6604 			  print_generic_expr (dump_file, mul);
6605 			  fprintf (dump_file, ", ");
6606 			  print_generic_expr (dump_file, arg1);
6607 			  fprintf (dump_file, "\n");
6608 			}
6609 		      return;
6610 		    }
6611 		  break;
6612 		default:
6613 		  break;
6614 		}
6615 	    }
6616 	}
6617     }
6618 }
6619 
6620 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6621 
6622 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6623 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6624 {
6625   tree rhs1;
6626 
6627   if (dump_file && (dump_flags & TDF_DETAILS))
6628     {
6629       fprintf (dump_file, "Transforming ");
6630       print_gimple_stmt (dump_file, stmt, 0);
6631     }
6632 
6633   rhs1 = gimple_assign_rhs1 (stmt);
6634   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6635   update_stmt (stmt);
6636   remove_visited_stmt_chain (rhs1);
6637 
6638   if (dump_file && (dump_flags & TDF_DETAILS))
6639     {
6640       fprintf (dump_file, " into ");
6641       print_gimple_stmt (dump_file, stmt, 0);
6642     }
6643 }
6644 
6645 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6646 
6647 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6648 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6649 			    tree rhs1, tree rhs2)
6650 {
6651   if (dump_file && (dump_flags & TDF_DETAILS))
6652     {
6653       fprintf (dump_file, "Transforming ");
6654       print_gimple_stmt (dump_file, stmt, 0);
6655     }
6656 
6657   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6658   update_stmt (gsi_stmt (*gsi));
6659   remove_visited_stmt_chain (rhs1);
6660 
6661   if (dump_file && (dump_flags & TDF_DETAILS))
6662     {
6663       fprintf (dump_file, " into ");
6664       print_gimple_stmt (dump_file, stmt, 0);
6665     }
6666 }
6667 
6668 /* Reassociate expressions in basic block BB and its post-dominator as
6669    children.
6670 
6671    Bubble up return status from maybe_optimize_range_tests.  */
6672 
6673 static bool
reassociate_bb(basic_block bb)6674 reassociate_bb (basic_block bb)
6675 {
6676   gimple_stmt_iterator gsi;
6677   basic_block son;
6678   gimple *stmt = last_stmt (bb);
6679   bool cfg_cleanup_needed = false;
6680 
6681   if (stmt && !gimple_visited_p (stmt))
6682     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6683 
6684   bool do_prev = false;
6685   for (gsi = gsi_last_bb (bb);
6686        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6687     {
6688       do_prev = true;
6689       stmt = gsi_stmt (gsi);
6690 
6691       if (is_gimple_assign (stmt)
6692 	  && !stmt_could_throw_p (cfun, stmt))
6693 	{
6694 	  tree lhs, rhs1, rhs2;
6695 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6696 
6697 	  /* If this was part of an already processed statement,
6698 	     we don't need to touch it again. */
6699 	  if (gimple_visited_p (stmt))
6700 	    {
6701 	      /* This statement might have become dead because of previous
6702 		 reassociations.  */
6703 	      if (has_zero_uses (gimple_get_lhs (stmt)))
6704 		{
6705 		  reassoc_remove_stmt (&gsi);
6706 		  release_defs (stmt);
6707 		  /* We might end up removing the last stmt above which
6708 		     places the iterator to the end of the sequence.
6709 		     Reset it to the last stmt in this case and make sure
6710 		     we don't do gsi_prev in that case.  */
6711 		  if (gsi_end_p (gsi))
6712 		    {
6713 		      gsi = gsi_last_bb (bb);
6714 		      do_prev = false;
6715 		    }
6716 		}
6717 	      continue;
6718 	    }
6719 
6720 	  /* If this is not a gimple binary expression, there is
6721 	     nothing for us to do with it.  */
6722 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6723 	    continue;
6724 
6725 	  lhs = gimple_assign_lhs (stmt);
6726 	  rhs1 = gimple_assign_rhs1 (stmt);
6727 	  rhs2 = gimple_assign_rhs2 (stmt);
6728 
6729 	  /* For non-bit or min/max operations we can't associate
6730 	     all types.  Verify that here.  */
6731 	  if ((rhs_code != BIT_IOR_EXPR
6732 	       && rhs_code != BIT_AND_EXPR
6733 	       && rhs_code != BIT_XOR_EXPR
6734 	       && rhs_code != MIN_EXPR
6735 	       && rhs_code != MAX_EXPR
6736 	       && !can_reassociate_type_p (TREE_TYPE (lhs)))
6737 	      || !can_reassociate_op_p (rhs1)
6738 	      || !can_reassociate_op_p (rhs2))
6739 	    continue;
6740 
6741 	  if (associative_tree_code (rhs_code))
6742 	    {
6743 	      auto_vec<operand_entry *> ops;
6744 	      tree powi_result = NULL_TREE;
6745 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6746 
6747 	      /* There may be no immediate uses left by the time we
6748 		 get here because we may have eliminated them all.  */
6749 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6750 		continue;
6751 
6752 	      gimple_set_visited (stmt, true);
6753 	      linearize_expr_tree (&ops, stmt, true, true);
6754 	      ops.qsort (sort_by_operand_rank);
6755 	      int orig_len = ops.length ();
6756 	      optimize_ops_list (rhs_code, &ops);
6757 	      if (undistribute_ops_list (rhs_code, &ops,
6758 					 loop_containing_stmt (stmt)))
6759 		{
6760 		  ops.qsort (sort_by_operand_rank);
6761 		  optimize_ops_list (rhs_code, &ops);
6762 		}
6763 	      if (undistribute_bitref_for_vector (rhs_code, &ops,
6764 						  loop_containing_stmt (stmt)))
6765 		{
6766 		  ops.qsort (sort_by_operand_rank);
6767 		  optimize_ops_list (rhs_code, &ops);
6768 		}
6769 	      if (rhs_code == PLUS_EXPR
6770 		  && transform_add_to_multiply (&ops))
6771 		ops.qsort (sort_by_operand_rank);
6772 
6773 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6774 		{
6775 		  if (is_vector)
6776 		    optimize_vec_cond_expr (rhs_code, &ops);
6777 		  else
6778 		    optimize_range_tests (rhs_code, &ops, NULL);
6779 	        }
6780 
6781 	      if (rhs_code == MULT_EXPR && !is_vector)
6782 	        {
6783 		  attempt_builtin_copysign (&ops);
6784 
6785 		  if (reassoc_insert_powi_p
6786 		      && (flag_unsafe_math_optimizations
6787 			  || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6788 		    powi_result = attempt_builtin_powi (stmt, &ops);
6789 		}
6790 
6791 	      operand_entry *last;
6792 	      bool negate_result = false;
6793 	      if (ops.length () > 1
6794 		  && rhs_code == MULT_EXPR)
6795 		{
6796 		  last = ops.last ();
6797 		  if ((integer_minus_onep (last->op)
6798 		       || real_minus_onep (last->op))
6799 		      && !HONOR_SNANS (TREE_TYPE (lhs))
6800 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6801 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6802 		    {
6803 		      ops.pop ();
6804 		      negate_result = true;
6805 		    }
6806 		}
6807 
6808 	      tree new_lhs = lhs;
6809 	      /* If the operand vector is now empty, all operands were
6810 		 consumed by the __builtin_powi optimization.  */
6811 	      if (ops.length () == 0)
6812 		transform_stmt_to_copy (&gsi, stmt, powi_result);
6813 	      else if (ops.length () == 1)
6814 		{
6815 		  tree last_op = ops.last ()->op;
6816 
6817 		  /* If the stmt that defines operand has to be inserted, insert it
6818 		     before the use.  */
6819 		  if (ops.last ()->stmt_to_insert)
6820 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6821 		  if (powi_result)
6822 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
6823 						powi_result);
6824 		  else
6825 		    transform_stmt_to_copy (&gsi, stmt, last_op);
6826 		}
6827 	      else
6828 		{
6829 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6830 		  int ops_num = ops.length ();
6831 		  int width;
6832 
6833 		  /* For binary bit operations, if there are at least 3
6834 		     operands and the last operand in OPS is a constant,
6835 		     move it to the front.  This helps ensure that we generate
6836 		     (X & Y) & C rather than (X & C) & Y.  The former will
6837 		     often match a canonical bit test when we get to RTL.  */
6838 		  if (ops.length () > 2
6839 		      && (rhs_code == BIT_AND_EXPR
6840 		          || rhs_code == BIT_IOR_EXPR
6841 		          || rhs_code == BIT_XOR_EXPR)
6842 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6843 		    std::swap (*ops[0], *ops[ops_num - 1]);
6844 
6845 		  /* Only rewrite the expression tree to parallel in the
6846 		     last reassoc pass to avoid useless work back-and-forth
6847 		     with initial linearization.  */
6848 		  if (!reassoc_insert_powi_p
6849 		      && ops.length () > 3
6850 		      && (width = get_reassociation_width (ops_num, rhs_code,
6851 							   mode)) > 1)
6852 		    {
6853 		      if (dump_file && (dump_flags & TDF_DETAILS))
6854 			fprintf (dump_file,
6855 				 "Width = %d was chosen for reassociation\n",
6856 				 width);
6857 		      rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6858 						  width, ops);
6859 		    }
6860 		  else
6861                     {
6862                       /* When there are three operands left, we want
6863                          to make sure the ones that get the double
6864                          binary op are chosen wisely.  */
6865                       int len = ops.length ();
6866                       if (len >= 3)
6867                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6868 
6869 		      new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6870 						   powi_result != NULL
6871 						   || negate_result,
6872 						   len != orig_len);
6873                     }
6874 
6875 		  /* If we combined some repeated factors into a
6876 		     __builtin_powi call, multiply that result by the
6877 		     reassociated operands.  */
6878 		  if (powi_result)
6879 		    {
6880 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6881 		      tree type = TREE_TYPE (lhs);
6882 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6883 							    "reassocpow");
6884 		      gimple_set_lhs (lhs_stmt, target_ssa);
6885 		      update_stmt (lhs_stmt);
6886 		      if (lhs != new_lhs)
6887 			{
6888 			  target_ssa = new_lhs;
6889 			  new_lhs = lhs;
6890 			}
6891 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6892 						      powi_result, target_ssa);
6893 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6894 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6895 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6896 		    }
6897 		}
6898 
6899 	      if (negate_result)
6900 		{
6901 		  stmt = SSA_NAME_DEF_STMT (lhs);
6902 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6903 		  gimple_set_lhs (stmt, tmp);
6904 		  if (lhs != new_lhs)
6905 		    tmp = new_lhs;
6906 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6907 							   tmp);
6908 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6909 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6910 		  update_stmt (stmt);
6911 		}
6912 	    }
6913 	}
6914     }
6915   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6916        son;
6917        son = next_dom_son (CDI_POST_DOMINATORS, son))
6918     cfg_cleanup_needed |= reassociate_bb (son);
6919 
6920   return cfg_cleanup_needed;
6921 }
6922 
6923 /* Add jumps around shifts for range tests turned into bit tests.
6924    For each SSA_NAME VAR we have code like:
6925    VAR = ...; // final stmt of range comparison
6926    // bit test here...;
6927    OTHERVAR = ...; // final stmt of the bit test sequence
6928    RES = VAR | OTHERVAR;
6929    Turn the above into:
6930    VAR = ...;
6931    if (VAR != 0)
6932      goto <l3>;
6933    else
6934      goto <l2>;
6935    <l2>:
6936    // bit test here...;
6937    OTHERVAR = ...;
6938    <l3>:
6939    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6940 
6941 static void
branch_fixup(void)6942 branch_fixup (void)
6943 {
6944   tree var;
6945   unsigned int i;
6946 
6947   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6948     {
6949       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6950       gimple *use_stmt;
6951       use_operand_p use;
6952       bool ok = single_imm_use (var, &use, &use_stmt);
6953       gcc_assert (ok
6954 		  && is_gimple_assign (use_stmt)
6955 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6956 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6957 
6958       basic_block cond_bb = gimple_bb (def_stmt);
6959       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6960       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6961 
6962       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6963       gimple *g = gimple_build_cond (NE_EXPR, var,
6964 				     build_zero_cst (TREE_TYPE (var)),
6965 				     NULL_TREE, NULL_TREE);
6966       location_t loc = gimple_location (use_stmt);
6967       gimple_set_location (g, loc);
6968       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6969 
6970       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6971       etrue->probability = profile_probability::even ();
6972       edge efalse = find_edge (cond_bb, then_bb);
6973       efalse->flags = EDGE_FALSE_VALUE;
6974       efalse->probability -= etrue->probability;
6975       then_bb->count -= etrue->count ();
6976 
6977       tree othervar = NULL_TREE;
6978       if (gimple_assign_rhs1 (use_stmt) == var)
6979 	othervar = gimple_assign_rhs2 (use_stmt);
6980       else if (gimple_assign_rhs2 (use_stmt) == var)
6981 	othervar = gimple_assign_rhs1 (use_stmt);
6982       else
6983 	gcc_unreachable ();
6984       tree lhs = gimple_assign_lhs (use_stmt);
6985       gphi *phi = create_phi_node (lhs, merge_bb);
6986       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6987       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6988       gsi = gsi_for_stmt (use_stmt);
6989       gsi_remove (&gsi, true);
6990 
6991       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6992       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6993     }
6994   reassoc_branch_fixups.release ();
6995 }
6996 
6997 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6998 void debug_ops_vector (vec<operand_entry *> ops);
6999 
7000 /* Dump the operand entry vector OPS to FILE.  */
7001 
7002 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)7003 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7004 {
7005   operand_entry *oe;
7006   unsigned int i;
7007 
7008   FOR_EACH_VEC_ELT (ops, i, oe)
7009     {
7010       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7011       print_generic_expr (file, oe->op);
7012       fprintf (file, "\n");
7013     }
7014 }
7015 
7016 /* Dump the operand entry vector OPS to STDERR.  */
7017 
7018 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)7019 debug_ops_vector (vec<operand_entry *> ops)
7020 {
7021   dump_ops_vector (stderr, ops);
7022 }
7023 
7024 /* Bubble up return status from reassociate_bb.  */
7025 
7026 static bool
do_reassoc(void)7027 do_reassoc (void)
7028 {
7029   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7030   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7031 }
7032 
7033 /* Initialize the reassociation pass.  */
7034 
7035 static void
init_reassoc(void)7036 init_reassoc (void)
7037 {
7038   int i;
7039   int64_t rank = 2;
7040   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7041 
7042   /* Find the loops, so that we can prevent moving calculations in
7043      them.  */
7044   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7045 
7046   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7047 
7048   next_operand_entry_id = 0;
7049 
7050   /* Reverse RPO (Reverse Post Order) will give us something where
7051      deeper loops come later.  */
7052   pre_and_rev_post_order_compute (NULL, bbs, false);
7053   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7054   operand_rank = new hash_map<tree, int64_t>;
7055 
7056   /* Give each default definition a distinct rank.  This includes
7057      parameters and the static chain.  Walk backwards over all
7058      SSA names so that we get proper rank ordering according
7059      to tree_swap_operands_p.  */
7060   for (i = num_ssa_names - 1; i > 0; --i)
7061     {
7062       tree name = ssa_name (i);
7063       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7064 	insert_operand_rank (name, ++rank);
7065     }
7066 
7067   /* Set up rank for each BB  */
7068   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7069     bb_rank[bbs[i]] = ++rank << 16;
7070 
7071   free (bbs);
7072   calculate_dominance_info (CDI_POST_DOMINATORS);
7073   plus_negates = vNULL;
7074 }
7075 
7076 /* Cleanup after the reassociation pass, and print stats if
7077    requested.  */
7078 
7079 static void
fini_reassoc(void)7080 fini_reassoc (void)
7081 {
7082   statistics_counter_event (cfun, "Linearized",
7083 			    reassociate_stats.linearized);
7084   statistics_counter_event (cfun, "Constants eliminated",
7085 			    reassociate_stats.constants_eliminated);
7086   statistics_counter_event (cfun, "Ops eliminated",
7087 			    reassociate_stats.ops_eliminated);
7088   statistics_counter_event (cfun, "Statements rewritten",
7089 			    reassociate_stats.rewritten);
7090   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7091 			    reassociate_stats.pows_encountered);
7092   statistics_counter_event (cfun, "Built-in powi calls created",
7093 			    reassociate_stats.pows_created);
7094 
7095   delete operand_rank;
7096   bitmap_clear (biased_names);
7097   operand_entry_pool.release ();
7098   free (bb_rank);
7099   plus_negates.release ();
7100   free_dominance_info (CDI_POST_DOMINATORS);
7101   loop_optimizer_finalize ();
7102 }
7103 
7104 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
7105    insertion of __builtin_powi calls.
7106 
7107    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7108    optimization of a gimple conditional.  Otherwise returns zero.  */
7109 
7110 static unsigned int
execute_reassoc(bool insert_powi_p,bool bias_loop_carried_phi_ranks_p)7111 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7112 {
7113   reassoc_insert_powi_p = insert_powi_p;
7114   reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7115 
7116   init_reassoc ();
7117 
7118   bool cfg_cleanup_needed;
7119   cfg_cleanup_needed = do_reassoc ();
7120   repropagate_negates ();
7121   branch_fixup ();
7122 
7123   fini_reassoc ();
7124   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7125 }
7126 
7127 namespace {
7128 
7129 const pass_data pass_data_reassoc =
7130 {
7131   GIMPLE_PASS, /* type */
7132   "reassoc", /* name */
7133   OPTGROUP_NONE, /* optinfo_flags */
7134   TV_TREE_REASSOC, /* tv_id */
7135   ( PROP_cfg | PROP_ssa ), /* properties_required */
7136   0, /* properties_provided */
7137   0, /* properties_destroyed */
7138   0, /* todo_flags_start */
7139   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7140 };
7141 
7142 class pass_reassoc : public gimple_opt_pass
7143 {
7144 public:
pass_reassoc(gcc::context * ctxt)7145   pass_reassoc (gcc::context *ctxt)
7146     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7147   {}
7148 
7149   /* opt_pass methods: */
clone()7150   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)7151   void set_pass_param (unsigned int n, bool param)
7152     {
7153       gcc_assert (n == 0);
7154       insert_powi_p = param;
7155       bias_loop_carried_phi_ranks_p = !param;
7156     }
gate(function *)7157   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)7158   virtual unsigned int execute (function *)
7159   {
7160     return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7161   }
7162 
7163  private:
7164   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
7165      point 3a in the pass header comment.  */
7166   bool insert_powi_p;
7167   bool bias_loop_carried_phi_ranks_p;
7168 }; // class pass_reassoc
7169 
7170 } // anon namespace
7171 
7172 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)7173 make_pass_reassoc (gcc::context *ctxt)
7174 {
7175   return new pass_reassoc (ctxt);
7176 }
7177