1 /* Reassociation for trees.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
61
62 It consists of five steps:
63
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
66
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
71
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
74
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
78
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
81
82 5. Repropagate negates, as nothing else will clean it up ATM.
83
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
86
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
89
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
94
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
99
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
102
103 a (1), b (1), c (1), d (2), e (2)
104
105
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
108
109 You want to first merge all leaves of the same rank, as much as
110 possible.
111
112 So first build a binary op of
113
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
115
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
118
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
123
124 Then build a binary op of d + e
125 mergetmp2 = d + e
126
127 and put mergetmp2 on the merge worklist.
128
129 so merge worklist = {mergetmp, c, mergetmp2}
130
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
133
134 So we have
135
136 build binary op
137 mergetmp3 = mergetmp + c
138
139 worklist = {mergetmp2, mergetmp3}
140
141 mergetmp4 = mergetmp2 + mergetmp3
142
143 worklist = {mergetmp4}
144
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
147
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
150
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
153
154 So why don't we do this?
155
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
161
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
164
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
168
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
172
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
178
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
182
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
186
187 /* Statistics */
188 static struct
189 {
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
197
198
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
201
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
205
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
210
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
213
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
217
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
223
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
230
231 static bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 {
234 gimple *stmt = gsi_stmt (*gsi);
235
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
238
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 {
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
254 }
255 return ret;
256 }
257
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
262
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
266
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
273
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
propagate_bias_p(gimple * stmt)278 propagate_bias_p (gimple *stmt)
279 {
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
283
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
286
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
288 {
289 gimple *current_use_stmt = USE_STMT (use);
290
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
293 {
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
297 }
298 }
299
300 if (single_use_stmt == NULL)
301 return false;
302
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
306
307 return true;
308 }
309
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
phi_rank(gimple * stmt)318 phi_rank (gimple *stmt)
319 {
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
326
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
329
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
333
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
338
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
343
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
349
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
352 {
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
356 {
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
360 }
361 }
362
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
365 }
366
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
propagate_rank(int64_t rank,tree op,bool * maybe_biased_p)372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
373 {
374 int64_t op_rank;
375
376 op_rank = get_rank (op);
377
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
382 {
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
386 }
387
388 return MAX (rank, op_rank);
389 }
390
391 /* Look up the operand rank structure for expression E. */
392
393 static inline int64_t
find_operand_rank(tree e)394 find_operand_rank (tree e)
395 {
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
398 }
399
400 /* Insert {E,RANK} into the operand rank hashtable. */
401
402 static inline void
insert_operand_rank(tree e,int64_t rank)403 insert_operand_rank (tree e, int64_t rank)
404 {
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
407 }
408
409 /* Given an expression E, return the rank of the expression. */
410
411 static int64_t
get_rank(tree e)412 get_rank (tree e)
413 {
414 /* SSA_NAME's have the rank of the expression they are the result
415 of.
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
424
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
428
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
433
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
437
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
442
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
445
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
450
451 if (TREE_CODE (e) == SSA_NAME)
452 {
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
457
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
462
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
465 {
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
469 }
470
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
473
474 else
475 {
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
478
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
488
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
492 }
493
494 if (dump_file && (dump_flags & TDF_DETAILS))
495 {
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
499 }
500
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
504 }
505
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
508 }
509
510
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
517
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
constant_type(tree t)521 constant_type (tree t)
522 {
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
526 {
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
533 }
534 else
535 return OTHER_CONST_TYPE;
536 }
537
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
sort_by_operand_rank(const void * pa,const void * pb)541 sort_by_operand_rank (const void *pa, const void *pb)
542 {
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
545
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
548
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
553 {
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
560 }
561
562 if (TREE_CODE (oea->op) != SSA_NAME)
563 {
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
568 }
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
571
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
575 {
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
585 {
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
595 }
596
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
601
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
603 }
604
605 return oeb->id > oea->id ? 1 : -1;
606 }
607
608 /* Add an operand entry to *OPS for the tree operand OP. */
609
610 static void
add_to_ops_vec(vec<operand_entry * > * ops,tree op,gimple * stmt_to_insert=NULL)611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
612 {
613 operand_entry *oe = operand_entry_pool.allocate ();
614
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
621 }
622
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
625
626 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
629 {
630 operand_entry *oe = operand_entry_pool.allocate ();
631
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
638
639 reassociate_stats.pows_encountered++;
640 }
641
642 /* Returns true if we can associate the SSA def OP. */
643
644 static bool
can_reassociate_op_p(tree op)645 can_reassociate_op_p (tree op)
646 {
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
656 }
657
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
661
662 static bool
can_reassociate_type_p(tree type)663 can_reassociate_type_p (tree type)
664 {
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
670 }
671
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
674
675 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
677 {
678 basic_block bb = gimple_bb (stmt);
679
680 if (gimple_bb (stmt) == NULL)
681 return false;
682
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
685
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
689 {
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
696 }
697
698 return false;
699 }
700
701
702 /* Return true if STMT is a nop-conversion. */
703
704 static bool
gimple_nop_conversion_p(gimple * stmt)705 gimple_nop_conversion_p (gimple *stmt)
706 {
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
708 {
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
713 }
714 return false;
715 }
716
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
719
720 static tree
get_unary_op(tree name,enum tree_code opcode)721 get_unary_op (tree name, enum tree_code opcode)
722 {
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
724
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
729
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
732
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
736 }
737
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
739
740 static bool
ops_equal_values_p(tree op1,tree op2)741 ops_equal_values_p (tree op1, tree op2)
742 {
743 if (op1 == op2)
744 return true;
745
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
748 {
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
751 {
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
755 }
756 }
757
758 if (TREE_CODE (op2) == SSA_NAME)
759 {
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
762 {
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
767 }
768 }
769
770 return false;
771 }
772
773
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
777
778 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
785 {
786
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
791
792 if (last && last->op == curr->op)
793 {
794 switch (opcode)
795 {
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
801 {
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
808 }
809
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
812
813 return true;
814
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 {
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
823 }
824
825 reassociate_stats.ops_eliminated += 2;
826
827 if (ops->length () == 2)
828 {
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
832 }
833 else
834 {
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
837 }
838
839 return true;
840
841 default:
842 break;
843 }
844 }
845 return false;
846 }
847
848 static vec<tree> plus_negates;
849
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
855
856 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
861 {
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
866
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
869
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
874
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
878
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
883 {
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
886 {
887 if (dump_file && (dump_flags & TDF_DETAILS))
888 {
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
894 }
895
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
900
901 return true;
902 }
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
905 {
906 tree op_type = TREE_TYPE (oe->op);
907
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 {
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
915 }
916
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
921
922 return true;
923 }
924 }
925
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
931
932 return false;
933 }
934
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
940
941 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
946 {
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
950
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
954
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
958
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
962
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
967 {
968 if (oe->op == notop)
969 {
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 {
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
983 }
984
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
989
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
994 }
995 }
996
997 return false;
998 }
999
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1006
1007 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1010 {
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1013
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1016 {
1017 switch (opcode)
1018 {
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1021 {
1022 if (ops->length () != 1)
1023 {
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1026
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1028
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1032 }
1033 }
1034 else if (integer_all_onesp (oelast->op))
1035 {
1036 if (ops->length () != 1)
1037 {
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1042 }
1043 }
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1047 {
1048 if (ops->length () != 1)
1049 {
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1052
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1054
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1058 }
1059 }
1060 else if (integer_zerop (oelast->op))
1061 {
1062 if (ops->length () != 1)
1063 {
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1068 }
1069 }
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1077 {
1078 if (ops->length () != 1)
1079 {
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1082
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1087 }
1088 }
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1093 {
1094 if (ops->length () != 1)
1095 {
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1101 }
1102 }
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1112 {
1113 if (ops->length () != 1)
1114 {
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1120 }
1121 }
1122 break;
1123 default:
1124 break;
1125 }
1126 }
1127 }
1128
1129
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1132
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1139 };
1140
1141
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1144
1145
1146 /* Oecount hashtable helpers. */
1147
1148 struct oecount_hasher : int_hash <int, 0, 1>
1149 {
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1152 };
1153
1154 /* Hash function for oecount. */
1155
1156 inline hashval_t
hash(int p)1157 oecount_hasher::hash (int p)
1158 {
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1161 }
1162
1163 /* Comparison function for oecount. */
1164
1165 inline bool
equal(int p1,int p2)1166 oecount_hasher::equal (int p1, int p2)
1167 {
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1171 }
1172
1173 /* Comparison function for qsort sorting oecount elements by count. */
1174
1175 static int
oecount_cmp(const void * p1,const void * p2)1176 oecount_cmp (const void *p1, const void *p2)
1177 {
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1185 }
1186
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1189
1190 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1191 stmt_is_power_of_op (gimple *stmt, tree op)
1192 {
1193 if (!is_gimple_call (stmt))
1194 return false;
1195
1196 switch (gimple_call_combined_fn (stmt))
1197 {
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1201
1202 default:
1203 return false;
1204 }
1205 }
1206
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1210
1211 static HOST_WIDE_INT
decrement_power(gimple * stmt)1212 decrement_power (gimple *stmt)
1213 {
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1217
1218 switch (gimple_call_combined_fn (stmt))
1219 {
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1227
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1233
1234 default:
1235 gcc_unreachable ();
1236 }
1237 }
1238
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1241
1242 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1244 {
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1250
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1253
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1256 {
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1259 {
1260 if (new_debug_lhs == NULL_TREE)
1261 {
1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 gimple_set_uid (def_temp, gimple_uid (stmt));
1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1271 }
1272 repl = new_debug_lhs;
1273 }
1274 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 SET_USE (use, repl);
1276 update_stmt (use_stmt);
1277 }
1278 return new_lhs;
1279 }
1280
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1284
1285 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 vec<gimple *> &stmts_to_fix)
1288 {
1289 unsigned i;
1290 gimple *stmt;
1291
1292 if (*def != op
1293 && TREE_CODE (*def) == SSA_NAME
1294 && (stmt = SSA_NAME_DEF_STMT (*def))
1295 && gimple_code (stmt) != GIMPLE_NOP)
1296 *def = make_new_ssa_for_def (stmt, opcode, op);
1297
1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299 make_new_ssa_for_def (stmt, opcode, op);
1300 }
1301
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1305
1306 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1308 {
1309 tree lhs;
1310 gimple *use_stmt;
1311 use_operand_p use;
1312 gimple_stmt_iterator gsi;
1313
1314 if (is_gimple_call (stmt))
1315 lhs = gimple_call_lhs (stmt);
1316 else
1317 lhs = gimple_assign_lhs (stmt);
1318
1319 gcc_assert (has_single_use (lhs));
1320 single_imm_use (lhs, &use, &use_stmt);
1321 if (lhs == *def)
1322 *def = op;
1323 SET_USE (use, op);
1324 if (TREE_CODE (op) != SSA_NAME)
1325 update_stmt (use_stmt);
1326 gsi = gsi_for_stmt (stmt);
1327 unlink_stmt_vdef (stmt);
1328 reassoc_remove_stmt (&gsi);
1329 release_defs (stmt);
1330 }
1331
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1335
1336 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1338 {
1339 tree orig_def = *def;
1340 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec<gimple *, 64> stmts_to_fix;
1344
1345 do
1346 {
1347 tree name;
1348
1349 if (opcode == MULT_EXPR)
1350 {
1351 if (stmt_is_power_of_op (stmt, op))
1352 {
1353 if (decrement_power (stmt) == 1)
1354 {
1355 if (stmts_to_fix.length () > 0)
1356 stmts_to_fix.pop ();
1357 propagate_op_to_single_use (op, stmt, def);
1358 }
1359 break;
1360 }
1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1362 {
1363 if (gimple_assign_rhs1 (stmt) == op)
1364 {
1365 tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 if (stmts_to_fix.length () > 0)
1367 stmts_to_fix.pop ();
1368 propagate_op_to_single_use (cst, stmt, def);
1369 break;
1370 }
1371 else if (integer_minus_onep (op)
1372 || real_minus_onep (op))
1373 {
1374 gimple_assign_set_rhs_code
1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 break;
1377 }
1378 }
1379 }
1380
1381 name = gimple_assign_rhs1 (stmt);
1382
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1385 single use. */
1386 if (gimple_assign_rhs_code (stmt) == opcode
1387 && (name == op
1388 || gimple_assign_rhs2 (stmt) == op))
1389 {
1390 if (name == op)
1391 name = gimple_assign_rhs2 (stmt);
1392 if (stmts_to_fix.length () > 0)
1393 stmts_to_fix.pop ();
1394 propagate_op_to_single_use (name, stmt, def);
1395 break;
1396 }
1397
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode == MULT_EXPR
1402 && gimple_assign_rhs_code (stmt) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt)))
1405 {
1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 if (stmt_is_power_of_op (stmt2, op))
1408 {
1409 if (decrement_power (stmt2) == 1)
1410 propagate_op_to_single_use (op, stmt2, def);
1411 else
1412 stmts_to_fix.safe_push (stmt2);
1413 break;
1414 }
1415 else if (is_gimple_assign (stmt2)
1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1417 {
1418 if (gimple_assign_rhs1 (stmt2) == op)
1419 {
1420 tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 propagate_op_to_single_use (cst, stmt2, def);
1422 break;
1423 }
1424 else if (integer_minus_onep (op)
1425 || real_minus_onep (op))
1426 {
1427 stmts_to_fix.safe_push (stmt2);
1428 gimple_assign_set_rhs_code
1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 break;
1431 }
1432 }
1433 }
1434
1435 /* Continue walking the chain. */
1436 gcc_assert (name != op
1437 && TREE_CODE (name) == SSA_NAME);
1438 stmt = SSA_NAME_DEF_STMT (name);
1439 stmts_to_fix.safe_push (stmt);
1440 }
1441 while (1);
1442
1443 if (stmts_to_fix.length () > 0 || *def == orig_def)
1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1445 }
1446
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1449
1450 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1452 {
1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1454
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1 || s1 == s2)
1459 return true;
1460
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1462 if (!bb2)
1463 return false;
1464
1465 if (bb1 == bb2)
1466 {
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1) == GIMPLE_PHI)
1471 return true;
1472
1473 if (gimple_code (s2) == GIMPLE_PHI)
1474 return false;
1475
1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1477
1478 if (gimple_uid (s1) < gimple_uid (s2))
1479 return true;
1480
1481 if (gimple_uid (s1) > gimple_uid (s2))
1482 return false;
1483
1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485 unsigned int uid = gimple_uid (s1);
1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1487 {
1488 gimple *s = gsi_stmt (gsi);
1489 if (gimple_uid (s) != uid)
1490 break;
1491 if (s == s2)
1492 return true;
1493 }
1494
1495 return false;
1496 }
1497
1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1499 }
1500
1501 /* Insert STMT after INSERT_POINT. */
1502
1503 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1505 {
1506 gimple_stmt_iterator gsi;
1507 basic_block bb;
1508
1509 if (gimple_code (insert_point) == GIMPLE_PHI)
1510 bb = gimple_bb (insert_point);
1511 else if (!stmt_ends_bb_p (insert_point))
1512 {
1513 gsi = gsi_for_stmt (insert_point);
1514 gimple_set_uid (stmt, gimple_uid (insert_point));
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 return;
1517 }
1518 else if (gimple_code (insert_point) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1522 gcc_unreachable ();
1523 else
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530 gsi = gsi_after_labels (bb);
1531 if (gsi_end_p (gsi))
1532 {
1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534 gimple_set_uid (stmt,
1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1536 }
1537 else
1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1540 }
1541
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1545
1546 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1548 {
1549 gimple *op1def = NULL, *op2def = NULL;
1550 gimple_stmt_iterator gsi;
1551 tree op;
1552 gassign *sum;
1553
1554 /* Create the addition statement. */
1555 op = make_ssa_name (type);
1556 sum = gimple_build_assign (op, opcode, op1, op2);
1557
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1) == SSA_NAME)
1560 op1def = SSA_NAME_DEF_STMT (op1);
1561 if (TREE_CODE (op2) == SSA_NAME)
1562 op2def = SSA_NAME_DEF_STMT (op2);
1563 if ((!op1def || gimple_nop_p (op1def))
1564 && (!op2def || gimple_nop_p (op2def)))
1565 {
1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567 if (!gsi_end_p (gsi)
1568 && is_gimple_call (gsi_stmt (gsi))
1569 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1570 {
1571 /* Don't add statements before a returns_twice call at the start
1572 of a function. */
1573 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1574 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1575 }
1576 if (gsi_end_p (gsi))
1577 {
1578 gimple_stmt_iterator gsi2
1579 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 gimple_set_uid (sum,
1581 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1582 }
1583 else
1584 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1585 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1586 }
1587 else
1588 {
1589 gimple *insert_point;
1590 if ((!op1def || gimple_nop_p (op1def))
1591 || (op2def && !gimple_nop_p (op2def)
1592 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1593 insert_point = op2def;
1594 else
1595 insert_point = op1def;
1596 insert_stmt_after (sum, insert_point);
1597 }
1598 update_stmt (sum);
1599
1600 return sum;
1601 }
1602
1603 /* Perform un-distribution of divisions and multiplications.
1604 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1605 to (A + B) / X for real X.
1606
1607 The algorithm is organized as follows.
1608
1609 - First we walk the addition chain *OPS looking for summands that
1610 are defined by a multiplication or a real division. This results
1611 in the candidates bitmap with relevant indices into *OPS.
1612
1613 - Second we build the chains of multiplications or divisions for
1614 these candidates, counting the number of occurrences of (operand, code)
1615 pairs in all of the candidates chains.
1616
1617 - Third we sort the (operand, code) pairs by number of occurrence and
1618 process them starting with the pair with the most uses.
1619
1620 * For each such pair we walk the candidates again to build a
1621 second candidate bitmap noting all multiplication/division chains
1622 that have at least one occurrence of (operand, code).
1623
1624 * We build an alternate addition chain only covering these
1625 candidates with one (operand, code) operation removed from their
1626 multiplication/division chain.
1627
1628 * The first candidate gets replaced by the alternate addition chain
1629 multiplied/divided by the operand.
1630
1631 * All candidate chains get disabled for further processing and
1632 processing of (operand, code) pairs continues.
1633
1634 The alternate addition chains built are re-processed by the main
1635 reassociation algorithm which allows optimizing a * x * y + b * y * x
1636 to (a + b ) * x * y in one invocation of the reassociation pass. */
1637
1638 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1639 undistribute_ops_list (enum tree_code opcode,
1640 vec<operand_entry *> *ops, class loop *loop)
1641 {
1642 unsigned int length = ops->length ();
1643 operand_entry *oe1;
1644 unsigned i, j;
1645 unsigned nr_candidates, nr_candidates2;
1646 sbitmap_iterator sbi0;
1647 vec<operand_entry *> *subops;
1648 bool changed = false;
1649 unsigned int next_oecount_id = 0;
1650
1651 if (length <= 1
1652 || opcode != PLUS_EXPR)
1653 return false;
1654
1655 /* Build a list of candidates to process. */
1656 auto_sbitmap candidates (length);
1657 bitmap_clear (candidates);
1658 nr_candidates = 0;
1659 FOR_EACH_VEC_ELT (*ops, i, oe1)
1660 {
1661 enum tree_code dcode;
1662 gimple *oe1def;
1663
1664 if (TREE_CODE (oe1->op) != SSA_NAME)
1665 continue;
1666 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1667 if (!is_gimple_assign (oe1def))
1668 continue;
1669 dcode = gimple_assign_rhs_code (oe1def);
1670 if ((dcode != MULT_EXPR
1671 && dcode != RDIV_EXPR)
1672 || !is_reassociable_op (oe1def, dcode, loop))
1673 continue;
1674
1675 bitmap_set_bit (candidates, i);
1676 nr_candidates++;
1677 }
1678
1679 if (nr_candidates < 2)
1680 return false;
1681
1682 if (dump_file && (dump_flags & TDF_DETAILS))
1683 {
1684 fprintf (dump_file, "searching for un-distribute opportunities ");
1685 print_generic_expr (dump_file,
1686 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1687 fprintf (dump_file, " %d\n", nr_candidates);
1688 }
1689
1690 /* Build linearized sub-operand lists and the counting table. */
1691 cvec.create (0);
1692
1693 hash_table<oecount_hasher> ctable (15);
1694
1695 /* ??? Macro arguments cannot have multi-argument template types in
1696 them. This typedef is needed to workaround that limitation. */
1697 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1698 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1699 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1700 {
1701 gimple *oedef;
1702 enum tree_code oecode;
1703 unsigned j;
1704
1705 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1706 oecode = gimple_assign_rhs_code (oedef);
1707 linearize_expr_tree (&subops[i], oedef,
1708 associative_tree_code (oecode), false);
1709
1710 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1711 {
1712 oecount c;
1713 int *slot;
1714 int idx;
1715 c.oecode = oecode;
1716 c.cnt = 1;
1717 c.id = next_oecount_id++;
1718 c.op = oe1->op;
1719 cvec.safe_push (c);
1720 idx = cvec.length () + 41;
1721 slot = ctable.find_slot (idx, INSERT);
1722 if (!*slot)
1723 {
1724 *slot = idx;
1725 }
1726 else
1727 {
1728 cvec.pop ();
1729 cvec[*slot - 42].cnt++;
1730 }
1731 }
1732 }
1733
1734 /* Sort the counting table. */
1735 cvec.qsort (oecount_cmp);
1736
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1738 {
1739 oecount *c;
1740 fprintf (dump_file, "Candidates:\n");
1741 FOR_EACH_VEC_ELT (cvec, j, c)
1742 {
1743 fprintf (dump_file, " %u %s: ", c->cnt,
1744 c->oecode == MULT_EXPR
1745 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1746 print_generic_expr (dump_file, c->op);
1747 fprintf (dump_file, "\n");
1748 }
1749 }
1750
1751 /* Process the (operand, code) pairs in order of most occurrence. */
1752 auto_sbitmap candidates2 (length);
1753 while (!cvec.is_empty ())
1754 {
1755 oecount *c = &cvec.last ();
1756 if (c->cnt < 2)
1757 break;
1758
1759 /* Now collect the operands in the outer chain that contain
1760 the common operand in their inner chain. */
1761 bitmap_clear (candidates2);
1762 nr_candidates2 = 0;
1763 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1764 {
1765 gimple *oedef;
1766 enum tree_code oecode;
1767 unsigned j;
1768 tree op = (*ops)[i]->op;
1769
1770 /* If we undistributed in this chain already this may be
1771 a constant. */
1772 if (TREE_CODE (op) != SSA_NAME)
1773 continue;
1774
1775 oedef = SSA_NAME_DEF_STMT (op);
1776 oecode = gimple_assign_rhs_code (oedef);
1777 if (oecode != c->oecode)
1778 continue;
1779
1780 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1781 {
1782 if (oe1->op == c->op)
1783 {
1784 bitmap_set_bit (candidates2, i);
1785 ++nr_candidates2;
1786 break;
1787 }
1788 }
1789 }
1790
1791 if (nr_candidates2 >= 2)
1792 {
1793 operand_entry *oe1, *oe2;
1794 gimple *prod;
1795 int first = bitmap_first_set_bit (candidates2);
1796
1797 /* Build the new addition chain. */
1798 oe1 = (*ops)[first];
1799 if (dump_file && (dump_flags & TDF_DETAILS))
1800 {
1801 fprintf (dump_file, "Building (");
1802 print_generic_expr (dump_file, oe1->op);
1803 }
1804 zero_one_operation (&oe1->op, c->oecode, c->op);
1805 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1806 {
1807 gimple *sum;
1808 oe2 = (*ops)[i];
1809 if (dump_file && (dump_flags & TDF_DETAILS))
1810 {
1811 fprintf (dump_file, " + ");
1812 print_generic_expr (dump_file, oe2->op);
1813 }
1814 zero_one_operation (&oe2->op, c->oecode, c->op);
1815 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1816 oe1->op, oe2->op, opcode);
1817 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1818 oe2->rank = 0;
1819 oe1->op = gimple_get_lhs (sum);
1820 }
1821
1822 /* Apply the multiplication/division. */
1823 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1824 oe1->op, c->op, c->oecode);
1825 if (dump_file && (dump_flags & TDF_DETAILS))
1826 {
1827 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1828 print_generic_expr (dump_file, c->op);
1829 fprintf (dump_file, "\n");
1830 }
1831
1832 /* Record it in the addition chain and disable further
1833 undistribution with this op. */
1834 oe1->op = gimple_assign_lhs (prod);
1835 oe1->rank = get_rank (oe1->op);
1836 subops[first].release ();
1837
1838 changed = true;
1839 }
1840
1841 cvec.pop ();
1842 }
1843
1844 for (i = 0; i < ops->length (); ++i)
1845 subops[i].release ();
1846 free (subops);
1847 cvec.release ();
1848
1849 return changed;
1850 }
1851
1852 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1853 first: element index for each relevant BIT_FIELD_REF.
1854 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1855 typedef std::pair<unsigned, unsigned> v_info_elem;
1856 struct v_info {
1857 tree vec_type;
1858 auto_vec<v_info_elem, 32> vec;
1859 };
1860 typedef v_info *v_info_ptr;
1861
1862 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1863 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1864 sort_by_mach_mode (const void *p_i, const void *p_j)
1865 {
1866 const tree tr1 = *((const tree *) p_i);
1867 const tree tr2 = *((const tree *) p_j);
1868 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1869 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1870 if (mode1 > mode2)
1871 return 1;
1872 else if (mode1 < mode2)
1873 return -1;
1874 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1875 return -1;
1876 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1877 return 1;
1878 return 0;
1879 }
1880
1881 /* Cleanup hash map for VECTOR information. */
1882 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1883 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1884 {
1885 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1886 it != info_map.end (); ++it)
1887 {
1888 v_info_ptr info = (*it).second;
1889 delete info;
1890 (*it).second = NULL;
1891 }
1892 }
1893
1894 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1895 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1896 is transformed to
1897 Vs = (V1 + V2 + ... + Vn)
1898 Vs[0] + Vs[1] + ... + Vs[k]
1899
1900 The basic steps are listed below:
1901
1902 1) Check the addition chain *OPS by looking those summands coming from
1903 VECTOR bit_field_ref on VECTOR type. Put the information into
1904 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1905
1906 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1907 continuous, they can cover the whole VECTOR perfectly without any holes.
1908 Obtain one VECTOR list which contain candidates to be transformed.
1909
1910 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1911 candidates with same mode, build the addition statements for them and
1912 generate BIT_FIELD_REFs accordingly.
1913
1914 TODO:
1915 The current implementation requires the whole VECTORs should be fully
1916 covered, but it can be extended to support partial, checking adjacent
1917 but not fill the whole, it may need some cost model to define the
1918 boundary to do or not.
1919 */
1920 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1921 undistribute_bitref_for_vector (enum tree_code opcode,
1922 vec<operand_entry *> *ops, struct loop *loop)
1923 {
1924 if (ops->length () <= 1)
1925 return false;
1926
1927 if (opcode != PLUS_EXPR
1928 && opcode != MULT_EXPR
1929 && opcode != BIT_XOR_EXPR
1930 && opcode != BIT_IOR_EXPR
1931 && opcode != BIT_AND_EXPR)
1932 return false;
1933
1934 hash_map<tree, v_info_ptr> v_info_map;
1935 operand_entry *oe1;
1936 unsigned i;
1937
1938 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1939 information into map. */
1940 FOR_EACH_VEC_ELT (*ops, i, oe1)
1941 {
1942 enum tree_code dcode;
1943 gimple *oe1def;
1944
1945 if (TREE_CODE (oe1->op) != SSA_NAME)
1946 continue;
1947 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1948 if (!is_gimple_assign (oe1def))
1949 continue;
1950 dcode = gimple_assign_rhs_code (oe1def);
1951 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1952 continue;
1953
1954 tree rhs = gimple_assign_rhs1 (oe1def);
1955 tree vec = TREE_OPERAND (rhs, 0);
1956 tree vec_type = TREE_TYPE (vec);
1957
1958 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1959 continue;
1960
1961 /* Ignore it if target machine can't support this VECTOR type. */
1962 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1963 continue;
1964
1965 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1966 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1967 continue;
1968
1969 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1970 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1971 continue;
1972
1973 /* The type of BIT_FIELD_REF might not be equal to the element type of
1974 the vector. We want to use a vector type with element type the
1975 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1976 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1977 {
1978 machine_mode simd_mode;
1979 unsigned HOST_WIDE_INT size, nunits;
1980 unsigned HOST_WIDE_INT elem_size
1981 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1982 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1983 continue;
1984 if (size <= elem_size || (size % elem_size) != 0)
1985 continue;
1986 nunits = size / elem_size;
1987 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1988 nunits).exists (&simd_mode))
1989 continue;
1990 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1991
1992 /* Ignore it if target machine can't support this VECTOR type. */
1993 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1994 continue;
1995
1996 /* Check const vector type, constrain BIT_FIELD_REF offset and
1997 size. */
1998 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1999 continue;
2000
2001 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2002 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2003 continue;
2004 }
2005
2006 tree elem_type = TREE_TYPE (vec_type);
2007 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2008 if (maybe_ne (bit_field_size (rhs), elem_size))
2009 continue;
2010
2011 unsigned idx;
2012 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2013 continue;
2014
2015 /* Ignore it if target machine can't support this type of VECTOR
2016 operation. */
2017 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2018 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2019 continue;
2020
2021 bool existed;
2022 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2023 if (!existed)
2024 {
2025 info = new v_info;
2026 info->vec_type = vec_type;
2027 }
2028 else if (!types_compatible_p (vec_type, info->vec_type))
2029 continue;
2030 info->vec.safe_push (std::make_pair (idx, i));
2031 }
2032
2033 /* At least two VECTOR to combine. */
2034 if (v_info_map.elements () <= 1)
2035 {
2036 cleanup_vinfo_map (v_info_map);
2037 return false;
2038 }
2039
2040 /* Verify all VECTOR candidates by checking two conditions:
2041 1) sorted offsets are adjacent, no holes.
2042 2) can fill the whole VECTOR perfectly.
2043 And add the valid candidates to a vector for further handling. */
2044 auto_vec<tree> valid_vecs (v_info_map.elements ());
2045 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2046 it != v_info_map.end (); ++it)
2047 {
2048 tree cand_vec = (*it).first;
2049 v_info_ptr cand_info = (*it).second;
2050 unsigned int num_elems
2051 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2052 if (cand_info->vec.length () != num_elems)
2053 continue;
2054 sbitmap holes = sbitmap_alloc (num_elems);
2055 bitmap_ones (holes);
2056 bool valid = true;
2057 v_info_elem *curr;
2058 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2059 {
2060 if (!bitmap_bit_p (holes, curr->first))
2061 {
2062 valid = false;
2063 break;
2064 }
2065 else
2066 bitmap_clear_bit (holes, curr->first);
2067 }
2068 if (valid && bitmap_empty_p (holes))
2069 valid_vecs.quick_push (cand_vec);
2070 sbitmap_free (holes);
2071 }
2072
2073 /* At least two VECTOR to combine. */
2074 if (valid_vecs.length () <= 1)
2075 {
2076 cleanup_vinfo_map (v_info_map);
2077 return false;
2078 }
2079
2080 valid_vecs.qsort (sort_by_mach_mode);
2081 /* Go through all candidates by machine mode order, query the mode_to_total
2082 to get the total number for each mode and skip the single one. */
2083 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2084 {
2085 tree tvec = valid_vecs[i];
2086 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2087
2088 /* Skip modes with only a single candidate. */
2089 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2090 continue;
2091
2092 unsigned int idx, j;
2093 gimple *sum = NULL;
2094 tree sum_vec = tvec;
2095 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2096 v_info_elem *elem;
2097 tree vec_type = info_ptr->vec_type;
2098
2099 /* Build the sum for all candidates with same mode. */
2100 do
2101 {
2102 sum = build_and_add_sum (vec_type, sum_vec,
2103 valid_vecs[i + 1], opcode);
2104 /* Update the operands only after build_and_add_sum,
2105 so that we don't have to repeat the placement algorithm
2106 of build_and_add_sum. */
2107 if (sum_vec == tvec
2108 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2109 {
2110 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2111 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2112 tree lhs = make_ssa_name (vec_type);
2113 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2114 gimple_set_uid (g, gimple_uid (sum));
2115 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2116 gimple_assign_set_rhs1 (sum, lhs);
2117 update_stmt (sum);
2118 }
2119 if (!useless_type_conversion_p (vec_type,
2120 TREE_TYPE (valid_vecs[i + 1])))
2121 {
2122 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2123 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2124 valid_vecs[i + 1]);
2125 tree lhs = make_ssa_name (vec_type);
2126 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2127 gimple_set_uid (g, gimple_uid (sum));
2128 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2129 gimple_assign_set_rhs2 (sum, lhs);
2130 update_stmt (sum);
2131 }
2132 sum_vec = gimple_get_lhs (sum);
2133 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2134 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2135 /* Update those related ops of current candidate VECTOR. */
2136 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2137 {
2138 idx = elem->second;
2139 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2140 /* Set this then op definition will get DCEd later. */
2141 gimple_set_visited (def, true);
2142 if (opcode == PLUS_EXPR
2143 || opcode == BIT_XOR_EXPR
2144 || opcode == BIT_IOR_EXPR)
2145 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2146 else if (opcode == MULT_EXPR)
2147 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2148 else
2149 {
2150 gcc_assert (opcode == BIT_AND_EXPR);
2151 (*ops)[idx]->op
2152 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2153 }
2154 (*ops)[idx]->rank = 0;
2155 }
2156 if (dump_file && (dump_flags & TDF_DETAILS))
2157 {
2158 fprintf (dump_file, "Generating addition -> ");
2159 print_gimple_stmt (dump_file, sum, 0);
2160 }
2161 i++;
2162 }
2163 while ((i < valid_vecs.length () - 1)
2164 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2165
2166 /* Referring to first valid VECTOR with this mode, generate the
2167 BIT_FIELD_REF statements accordingly. */
2168 info_ptr = *(v_info_map.get (tvec));
2169 gcc_assert (sum);
2170 tree elem_type = TREE_TYPE (vec_type);
2171 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2172 {
2173 idx = elem->second;
2174 tree dst = make_ssa_name (elem_type);
2175 tree pos = bitsize_int (elem->first
2176 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2177 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2178 TYPE_SIZE (elem_type), pos);
2179 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2180 insert_stmt_after (gs, sum);
2181 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2182 /* Set this then op definition will get DCEd later. */
2183 gimple_set_visited (def, true);
2184 (*ops)[idx]->op = gimple_assign_lhs (gs);
2185 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 {
2188 fprintf (dump_file, "Generating bit_field_ref -> ");
2189 print_gimple_stmt (dump_file, gs, 0);
2190 }
2191 }
2192 }
2193
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2196
2197 cleanup_vinfo_map (v_info_map);
2198
2199 return true;
2200 }
2201
2202 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2203 expression, examine the other OPS to see if any of them are comparisons
2204 of the same values, which we may be able to combine or eliminate.
2205 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2206
2207 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2208 eliminate_redundant_comparison (enum tree_code opcode,
2209 vec<operand_entry *> *ops,
2210 unsigned int currindex,
2211 operand_entry *curr)
2212 {
2213 tree op1, op2;
2214 enum tree_code lcode, rcode;
2215 gimple *def1, *def2;
2216 int i;
2217 operand_entry *oe;
2218
2219 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2220 return false;
2221
2222 /* Check that CURR is a comparison. */
2223 if (TREE_CODE (curr->op) != SSA_NAME)
2224 return false;
2225 def1 = SSA_NAME_DEF_STMT (curr->op);
2226 if (!is_gimple_assign (def1))
2227 return false;
2228 lcode = gimple_assign_rhs_code (def1);
2229 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2230 return false;
2231 op1 = gimple_assign_rhs1 (def1);
2232 op2 = gimple_assign_rhs2 (def1);
2233
2234 /* Now look for a similar comparison in the remaining OPS. */
2235 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2236 {
2237 tree t;
2238
2239 if (TREE_CODE (oe->op) != SSA_NAME)
2240 continue;
2241 def2 = SSA_NAME_DEF_STMT (oe->op);
2242 if (!is_gimple_assign (def2))
2243 continue;
2244 rcode = gimple_assign_rhs_code (def2);
2245 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2246 continue;
2247
2248 /* If we got here, we have a match. See if we can combine the
2249 two comparisons. */
2250 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2251 if (opcode == BIT_IOR_EXPR)
2252 t = maybe_fold_or_comparisons (type,
2253 lcode, op1, op2,
2254 rcode, gimple_assign_rhs1 (def2),
2255 gimple_assign_rhs2 (def2));
2256 else
2257 t = maybe_fold_and_comparisons (type,
2258 lcode, op1, op2,
2259 rcode, gimple_assign_rhs1 (def2),
2260 gimple_assign_rhs2 (def2));
2261 if (!t)
2262 continue;
2263
2264 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2265 always give us a boolean_type_node value back. If the original
2266 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2267 we need to convert. */
2268 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2269 {
2270 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2271 continue;
2272 t = fold_convert (TREE_TYPE (curr->op), t);
2273 }
2274
2275 if (TREE_CODE (t) != INTEGER_CST
2276 && !operand_equal_p (t, curr->op, 0))
2277 {
2278 enum tree_code subcode;
2279 tree newop1, newop2;
2280 if (!COMPARISON_CLASS_P (t))
2281 continue;
2282 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2283 STRIP_USELESS_TYPE_CONVERSION (newop1);
2284 STRIP_USELESS_TYPE_CONVERSION (newop2);
2285 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2286 continue;
2287 }
2288
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2290 {
2291 fprintf (dump_file, "Equivalence: ");
2292 print_generic_expr (dump_file, curr->op);
2293 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2294 print_generic_expr (dump_file, oe->op);
2295 fprintf (dump_file, " -> ");
2296 print_generic_expr (dump_file, t);
2297 fprintf (dump_file, "\n");
2298 }
2299
2300 /* Now we can delete oe, as it has been subsumed by the new combined
2301 expression t. */
2302 ops->ordered_remove (i);
2303 reassociate_stats.ops_eliminated ++;
2304
2305 /* If t is the same as curr->op, we're done. Otherwise we must
2306 replace curr->op with t. Special case is if we got a constant
2307 back, in which case we add it to the end instead of in place of
2308 the current entry. */
2309 if (TREE_CODE (t) == INTEGER_CST)
2310 {
2311 ops->ordered_remove (currindex);
2312 add_to_ops_vec (ops, t);
2313 }
2314 else if (!operand_equal_p (t, curr->op, 0))
2315 {
2316 gimple *sum;
2317 enum tree_code subcode;
2318 tree newop1;
2319 tree newop2;
2320 gcc_assert (COMPARISON_CLASS_P (t));
2321 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2322 STRIP_USELESS_TYPE_CONVERSION (newop1);
2323 STRIP_USELESS_TYPE_CONVERSION (newop2);
2324 gcc_checking_assert (is_gimple_val (newop1)
2325 && is_gimple_val (newop2));
2326 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2327 curr->op = gimple_get_lhs (sum);
2328 }
2329 return true;
2330 }
2331
2332 return false;
2333 }
2334
2335
2336 /* Transform repeated addition of same values into multiply with
2337 constant. */
2338 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2339 transform_add_to_multiply (vec<operand_entry *> *ops)
2340 {
2341 operand_entry *oe;
2342 tree op = NULL_TREE;
2343 int j;
2344 int i, start = -1, end = 0, count = 0;
2345 auto_vec<std::pair <int, int> > indxs;
2346 bool changed = false;
2347
2348 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2349 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2350 || !flag_unsafe_math_optimizations))
2351 return false;
2352
2353 /* Look for repeated operands. */
2354 FOR_EACH_VEC_ELT (*ops, i, oe)
2355 {
2356 if (start == -1)
2357 {
2358 count = 1;
2359 op = oe->op;
2360 start = i;
2361 }
2362 else if (operand_equal_p (oe->op, op, 0))
2363 {
2364 count++;
2365 end = i;
2366 }
2367 else
2368 {
2369 if (count > 1)
2370 indxs.safe_push (std::make_pair (start, end));
2371 count = 1;
2372 op = oe->op;
2373 start = i;
2374 }
2375 }
2376
2377 if (count > 1)
2378 indxs.safe_push (std::make_pair (start, end));
2379
2380 for (j = indxs.length () - 1; j >= 0; --j)
2381 {
2382 /* Convert repeated operand addition to multiplication. */
2383 start = indxs[j].first;
2384 end = indxs[j].second;
2385 op = (*ops)[start]->op;
2386 count = end - start + 1;
2387 for (i = end; i >= start; --i)
2388 ops->unordered_remove (i);
2389 tree tmp = make_ssa_name (TREE_TYPE (op));
2390 tree cst = build_int_cst (integer_type_node, count);
2391 gassign *mul_stmt
2392 = gimple_build_assign (tmp, MULT_EXPR,
2393 op, fold_convert (TREE_TYPE (op), cst));
2394 gimple_set_visited (mul_stmt, true);
2395 add_to_ops_vec (ops, tmp, mul_stmt);
2396 changed = true;
2397 }
2398
2399 return changed;
2400 }
2401
2402
2403 /* Perform various identities and other optimizations on the list of
2404 operand entries, stored in OPS. The tree code for the binary
2405 operation between all the operands is OPCODE. */
2406
2407 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2408 optimize_ops_list (enum tree_code opcode,
2409 vec<operand_entry *> *ops)
2410 {
2411 unsigned int length = ops->length ();
2412 unsigned int i;
2413 operand_entry *oe;
2414 operand_entry *oelast = NULL;
2415 bool iterate = false;
2416
2417 if (length == 1)
2418 return;
2419
2420 oelast = ops->last ();
2421
2422 /* If the last two are constants, pop the constants off, merge them
2423 and try the next two. */
2424 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2425 {
2426 operand_entry *oelm1 = (*ops)[length - 2];
2427
2428 if (oelm1->rank == 0
2429 && is_gimple_min_invariant (oelm1->op)
2430 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2431 TREE_TYPE (oelast->op)))
2432 {
2433 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2434 oelm1->op, oelast->op);
2435
2436 if (folded && is_gimple_min_invariant (folded))
2437 {
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 fprintf (dump_file, "Merging constants\n");
2440
2441 ops->pop ();
2442 ops->pop ();
2443
2444 add_to_ops_vec (ops, folded);
2445 reassociate_stats.constants_eliminated++;
2446
2447 optimize_ops_list (opcode, ops);
2448 return;
2449 }
2450 }
2451 }
2452
2453 eliminate_using_constants (opcode, ops);
2454 oelast = NULL;
2455
2456 for (i = 0; ops->iterate (i, &oe);)
2457 {
2458 bool done = false;
2459
2460 if (eliminate_not_pairs (opcode, ops, i, oe))
2461 return;
2462 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2463 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2464 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2465 {
2466 if (done)
2467 return;
2468 iterate = true;
2469 oelast = NULL;
2470 continue;
2471 }
2472 oelast = oe;
2473 i++;
2474 }
2475
2476 if (iterate)
2477 optimize_ops_list (opcode, ops);
2478 }
2479
2480 /* The following functions are subroutines to optimize_range_tests and allow
2481 it to try to change a logical combination of comparisons into a range
2482 test.
2483
2484 For example, both
2485 X == 2 || X == 5 || X == 3 || X == 4
2486 and
2487 X >= 2 && X <= 5
2488 are converted to
2489 (unsigned) (X - 2) <= 3
2490
2491 For more information see comments above fold_test_range in fold-const.cc,
2492 this implementation is for GIMPLE. */
2493
2494
2495
2496 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2497
2498 void
dump_range_entry(FILE * file,struct range_entry * r,bool skip_exp)2499 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2500 {
2501 if (!skip_exp)
2502 print_generic_expr (file, r->exp);
2503 fprintf (file, " %c[", r->in_p ? '+' : '-');
2504 print_generic_expr (file, r->low);
2505 fputs (", ", file);
2506 print_generic_expr (file, r->high);
2507 fputc (']', file);
2508 }
2509
2510 /* Dump the range entry R to STDERR. */
2511
2512 DEBUG_FUNCTION void
debug_range_entry(struct range_entry * r)2513 debug_range_entry (struct range_entry *r)
2514 {
2515 dump_range_entry (stderr, r, false);
2516 fputc ('\n', stderr);
2517 }
2518
2519 /* This is similar to make_range in fold-const.cc, but on top of
2520 GIMPLE instead of trees. If EXP is non-NULL, it should be
2521 an SSA_NAME and STMT argument is ignored, otherwise STMT
2522 argument should be a GIMPLE_COND. */
2523
2524 void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2525 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2526 {
2527 int in_p;
2528 tree low, high;
2529 bool is_bool, strict_overflow_p;
2530
2531 r->exp = NULL_TREE;
2532 r->in_p = false;
2533 r->strict_overflow_p = false;
2534 r->low = NULL_TREE;
2535 r->high = NULL_TREE;
2536 if (exp != NULL_TREE
2537 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2538 return;
2539
2540 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2541 and see if we can refine the range. Some of the cases below may not
2542 happen, but it doesn't seem worth worrying about this. We "continue"
2543 the outer loop when we've changed something; otherwise we "break"
2544 the switch, which will "break" the while. */
2545 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2546 high = low;
2547 in_p = 0;
2548 strict_overflow_p = false;
2549 is_bool = false;
2550 if (exp == NULL_TREE)
2551 is_bool = true;
2552 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2553 {
2554 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2555 is_bool = true;
2556 else
2557 return;
2558 }
2559 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2560 is_bool = true;
2561
2562 while (1)
2563 {
2564 enum tree_code code;
2565 tree arg0, arg1, exp_type;
2566 tree nexp;
2567 location_t loc;
2568
2569 if (exp != NULL_TREE)
2570 {
2571 if (TREE_CODE (exp) != SSA_NAME
2572 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2573 break;
2574
2575 stmt = SSA_NAME_DEF_STMT (exp);
2576 if (!is_gimple_assign (stmt))
2577 break;
2578
2579 code = gimple_assign_rhs_code (stmt);
2580 arg0 = gimple_assign_rhs1 (stmt);
2581 arg1 = gimple_assign_rhs2 (stmt);
2582 exp_type = TREE_TYPE (exp);
2583 }
2584 else
2585 {
2586 code = gimple_cond_code (stmt);
2587 arg0 = gimple_cond_lhs (stmt);
2588 arg1 = gimple_cond_rhs (stmt);
2589 exp_type = boolean_type_node;
2590 }
2591
2592 if (TREE_CODE (arg0) != SSA_NAME
2593 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2594 break;
2595 loc = gimple_location (stmt);
2596 switch (code)
2597 {
2598 case BIT_NOT_EXPR:
2599 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2600 /* Ensure the range is either +[-,0], +[0,0],
2601 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2602 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2603 or similar expression of unconditional true or
2604 false, it should not be negated. */
2605 && ((high && integer_zerop (high))
2606 || (low && integer_onep (low))))
2607 {
2608 in_p = !in_p;
2609 exp = arg0;
2610 continue;
2611 }
2612 break;
2613 case SSA_NAME:
2614 exp = arg0;
2615 continue;
2616 CASE_CONVERT:
2617 if (is_bool)
2618 {
2619 if ((TYPE_PRECISION (exp_type) == 1
2620 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2621 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2622 return;
2623 }
2624 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2625 {
2626 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2627 is_bool = true;
2628 else
2629 return;
2630 }
2631 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2632 is_bool = true;
2633 goto do_default;
2634 case EQ_EXPR:
2635 case NE_EXPR:
2636 case LT_EXPR:
2637 case LE_EXPR:
2638 case GE_EXPR:
2639 case GT_EXPR:
2640 is_bool = true;
2641 /* FALLTHRU */
2642 default:
2643 if (!is_bool)
2644 return;
2645 do_default:
2646 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2647 &low, &high, &in_p,
2648 &strict_overflow_p);
2649 if (nexp != NULL_TREE)
2650 {
2651 exp = nexp;
2652 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2653 continue;
2654 }
2655 break;
2656 }
2657 break;
2658 }
2659 if (is_bool)
2660 {
2661 r->exp = exp;
2662 r->in_p = in_p;
2663 r->low = low;
2664 r->high = high;
2665 r->strict_overflow_p = strict_overflow_p;
2666 }
2667 }
2668
2669 /* Comparison function for qsort. Sort entries
2670 without SSA_NAME exp first, then with SSA_NAMEs sorted
2671 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2672 by increasing ->low and if ->low is the same, by increasing
2673 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2674 maximum. */
2675
2676 static int
range_entry_cmp(const void * a,const void * b)2677 range_entry_cmp (const void *a, const void *b)
2678 {
2679 const struct range_entry *p = (const struct range_entry *) a;
2680 const struct range_entry *q = (const struct range_entry *) b;
2681
2682 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2683 {
2684 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2685 {
2686 /* Group range_entries for the same SSA_NAME together. */
2687 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2688 return -1;
2689 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2690 return 1;
2691 /* If ->low is different, NULL low goes first, then by
2692 ascending low. */
2693 if (p->low != NULL_TREE)
2694 {
2695 if (q->low != NULL_TREE)
2696 {
2697 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2698 p->low, q->low);
2699 if (tem && integer_onep (tem))
2700 return -1;
2701 tem = fold_binary (GT_EXPR, boolean_type_node,
2702 p->low, q->low);
2703 if (tem && integer_onep (tem))
2704 return 1;
2705 }
2706 else
2707 return 1;
2708 }
2709 else if (q->low != NULL_TREE)
2710 return -1;
2711 /* If ->high is different, NULL high goes last, before that by
2712 ascending high. */
2713 if (p->high != NULL_TREE)
2714 {
2715 if (q->high != NULL_TREE)
2716 {
2717 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2718 p->high, q->high);
2719 if (tem && integer_onep (tem))
2720 return -1;
2721 tem = fold_binary (GT_EXPR, boolean_type_node,
2722 p->high, q->high);
2723 if (tem && integer_onep (tem))
2724 return 1;
2725 }
2726 else
2727 return -1;
2728 }
2729 else if (q->high != NULL_TREE)
2730 return 1;
2731 /* If both ranges are the same, sort below by ascending idx. */
2732 }
2733 else
2734 return 1;
2735 }
2736 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2737 return -1;
2738
2739 if (p->idx < q->idx)
2740 return -1;
2741 else
2742 {
2743 gcc_checking_assert (p->idx > q->idx);
2744 return 1;
2745 }
2746 }
2747
2748 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2749 insert needed statements BEFORE or after GSI. */
2750
2751 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2752 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2753 {
2754 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2755 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2756 if (TREE_CODE (ret) != SSA_NAME)
2757 {
2758 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2759 if (before)
2760 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2761 else
2762 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2763 ret = gimple_assign_lhs (g);
2764 }
2765 return ret;
2766 }
2767
2768 /* Helper routine of optimize_range_test.
2769 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2770 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2771 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2772 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2773 an array of COUNT pointers to other ranges. Return
2774 true if the range merge has been successful.
2775 If OPCODE is ERROR_MARK, this is called from within
2776 maybe_optimize_range_tests and is performing inter-bb range optimization.
2777 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2778 oe->rank. */
2779
2780 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2781 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2782 struct range_entry **otherrangep,
2783 unsigned int count, enum tree_code opcode,
2784 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2785 bool in_p, tree low, tree high, bool strict_overflow_p)
2786 {
2787 unsigned int idx = range->idx;
2788 struct range_entry *swap_with = NULL;
2789 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2790 if (opcode == ERROR_MARK)
2791 {
2792 /* For inter-bb range test optimization, pick from the range tests
2793 the one which is tested in the earliest condition (one dominating
2794 the others), because otherwise there could be some UB (e.g. signed
2795 overflow) in following bbs that we'd expose which wasn't there in
2796 the original program. See PR104196. */
2797 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2798 basic_block range_bb = orig_range_bb;
2799 for (unsigned int i = 0; i < count; i++)
2800 {
2801 struct range_entry *this_range;
2802 if (otherrange)
2803 this_range = otherrange + i;
2804 else
2805 this_range = otherrangep[i];
2806 operand_entry *oe = (*ops)[this_range->idx];
2807 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2808 if (range_bb != this_bb
2809 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2810 {
2811 swap_with = this_range;
2812 range_bb = this_bb;
2813 idx = this_range->idx;
2814 }
2815 }
2816 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2817 only defined in later blocks. In this case we can't move the
2818 merged comparison earlier, so instead check if there are any stmts
2819 that might trigger signed integer overflow in between and rewrite
2820 them. But only after we check if the optimization is possible. */
2821 if (seq && swap_with)
2822 {
2823 rewrite_bb_first = range_bb;
2824 rewrite_bb_last = orig_range_bb;
2825 idx = range->idx;
2826 swap_with = NULL;
2827 }
2828 }
2829 operand_entry *oe = (*ops)[idx];
2830 tree op = oe->op;
2831 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2832 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2833 location_t loc = gimple_location (stmt);
2834 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2835 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2836 in_p, low, high);
2837 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2838 gimple_stmt_iterator gsi;
2839 unsigned int i, uid;
2840
2841 if (tem == NULL_TREE)
2842 return false;
2843
2844 /* If op is default def SSA_NAME, there is no place to insert the
2845 new comparison. Give up, unless we can use OP itself as the
2846 range test. */
2847 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2848 {
2849 if (op == range->exp
2850 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2851 || TREE_CODE (optype) == BOOLEAN_TYPE)
2852 && (op == tem
2853 || (TREE_CODE (tem) == EQ_EXPR
2854 && TREE_OPERAND (tem, 0) == op
2855 && integer_onep (TREE_OPERAND (tem, 1))))
2856 && opcode != BIT_IOR_EXPR
2857 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2858 {
2859 stmt = NULL;
2860 tem = op;
2861 }
2862 else
2863 return false;
2864 }
2865
2866 if (swap_with)
2867 std::swap (range->idx, swap_with->idx);
2868
2869 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2870 warning_at (loc, OPT_Wstrict_overflow,
2871 "assuming signed overflow does not occur "
2872 "when simplifying range test");
2873
2874 if (dump_file && (dump_flags & TDF_DETAILS))
2875 {
2876 struct range_entry *r;
2877 fprintf (dump_file, "Optimizing range tests ");
2878 dump_range_entry (dump_file, range, false);
2879 for (i = 0; i < count; i++)
2880 {
2881 if (otherrange)
2882 r = otherrange + i;
2883 else
2884 r = otherrangep[i];
2885 if (r->exp
2886 && r->exp != range->exp
2887 && TREE_CODE (r->exp) == SSA_NAME)
2888 {
2889 fprintf (dump_file, " and ");
2890 dump_range_entry (dump_file, r, false);
2891 }
2892 else
2893 {
2894 fprintf (dump_file, " and");
2895 dump_range_entry (dump_file, r, true);
2896 }
2897 }
2898 fprintf (dump_file, "\n into ");
2899 print_generic_expr (dump_file, tem);
2900 fprintf (dump_file, "\n");
2901 }
2902
2903 /* In inter-bb range optimization mode, if we have a seq, we can't
2904 move the merged comparison to the earliest bb from the comparisons
2905 being replaced, so instead rewrite stmts that could trigger signed
2906 integer overflow. */
2907 for (basic_block bb = rewrite_bb_last;
2908 bb != rewrite_bb_first; bb = single_pred (bb))
2909 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2910 !gsi_end_p (gsi); gsi_next (&gsi))
2911 {
2912 gimple *stmt = gsi_stmt (gsi);
2913 if (is_gimple_assign (stmt))
2914 if (tree lhs = gimple_assign_lhs (stmt))
2915 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2916 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2917 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2918 {
2919 enum tree_code code = gimple_assign_rhs_code (stmt);
2920 if (arith_code_with_undefined_signed_overflow (code))
2921 {
2922 gimple_stmt_iterator gsip = gsi;
2923 gimple_stmt_iterator gsin = gsi;
2924 gsi_prev (&gsip);
2925 gsi_next (&gsin);
2926 rewrite_to_defined_overflow (stmt, true);
2927 unsigned uid = gimple_uid (stmt);
2928 if (gsi_end_p (gsip))
2929 gsip = gsi_after_labels (bb);
2930 else
2931 gsi_next (&gsip);
2932 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2933 gsi_next (&gsip))
2934 gimple_set_uid (gsi_stmt (gsip), uid);
2935 }
2936 }
2937 }
2938
2939 if (opcode == BIT_IOR_EXPR
2940 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2941 tem = invert_truthvalue_loc (loc, tem);
2942
2943 tem = fold_convert_loc (loc, optype, tem);
2944 if (stmt)
2945 {
2946 gsi = gsi_for_stmt (stmt);
2947 uid = gimple_uid (stmt);
2948 }
2949 else
2950 {
2951 gsi = gsi_none ();
2952 uid = 0;
2953 }
2954 if (stmt == NULL)
2955 gcc_checking_assert (tem == op);
2956 /* In rare cases range->exp can be equal to lhs of stmt.
2957 In that case we have to insert after the stmt rather then before
2958 it. If stmt is a PHI, insert it at the start of the basic block. */
2959 else if (op != range->exp)
2960 {
2961 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2962 tem = force_into_ssa_name (&gsi, tem, true);
2963 gsi_prev (&gsi);
2964 }
2965 else if (gimple_code (stmt) != GIMPLE_PHI)
2966 {
2967 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2968 tem = force_into_ssa_name (&gsi, tem, false);
2969 }
2970 else
2971 {
2972 gsi = gsi_after_labels (gimple_bb (stmt));
2973 if (!gsi_end_p (gsi))
2974 uid = gimple_uid (gsi_stmt (gsi));
2975 else
2976 {
2977 gsi = gsi_start_bb (gimple_bb (stmt));
2978 uid = 1;
2979 while (!gsi_end_p (gsi))
2980 {
2981 uid = gimple_uid (gsi_stmt (gsi));
2982 gsi_next (&gsi);
2983 }
2984 }
2985 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2986 tem = force_into_ssa_name (&gsi, tem, true);
2987 if (gsi_end_p (gsi))
2988 gsi = gsi_last_bb (gimple_bb (stmt));
2989 else
2990 gsi_prev (&gsi);
2991 }
2992 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2993 if (gimple_uid (gsi_stmt (gsi)))
2994 break;
2995 else
2996 gimple_set_uid (gsi_stmt (gsi), uid);
2997
2998 oe->op = tem;
2999 range->exp = exp;
3000 range->low = low;
3001 range->high = high;
3002 range->in_p = in_p;
3003 range->strict_overflow_p = false;
3004
3005 for (i = 0; i < count; i++)
3006 {
3007 if (otherrange)
3008 range = otherrange + i;
3009 else
3010 range = otherrangep[i];
3011 oe = (*ops)[range->idx];
3012 /* Now change all the other range test immediate uses, so that
3013 those tests will be optimized away. */
3014 if (opcode == ERROR_MARK)
3015 {
3016 if (oe->op)
3017 oe->op = build_int_cst (TREE_TYPE (oe->op),
3018 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3019 else
3020 oe->op = (oe->rank == BIT_IOR_EXPR
3021 ? boolean_false_node : boolean_true_node);
3022 }
3023 else
3024 oe->op = error_mark_node;
3025 range->exp = NULL_TREE;
3026 range->low = NULL_TREE;
3027 range->high = NULL_TREE;
3028 }
3029 return true;
3030 }
3031
3032 /* Optimize X == CST1 || X == CST2
3033 if popcount (CST1 ^ CST2) == 1 into
3034 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3035 Similarly for ranges. E.g.
3036 X != 2 && X != 3 && X != 10 && X != 11
3037 will be transformed by the previous optimization into
3038 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3039 and this loop can transform that into
3040 !(((X & ~8) - 2U) <= 1U). */
3041
3042 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)3043 optimize_range_tests_xor (enum tree_code opcode, tree type,
3044 tree lowi, tree lowj, tree highi, tree highj,
3045 vec<operand_entry *> *ops,
3046 struct range_entry *rangei,
3047 struct range_entry *rangej)
3048 {
3049 tree lowxor, highxor, tem, exp;
3050 /* Check lowi ^ lowj == highi ^ highj and
3051 popcount (lowi ^ lowj) == 1. */
3052 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3053 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3054 return false;
3055 if (!integer_pow2p (lowxor))
3056 return false;
3057 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3058 if (!tree_int_cst_equal (lowxor, highxor))
3059 return false;
3060
3061 exp = rangei->exp;
3062 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3063 int prec = GET_MODE_PRECISION (mode);
3064 if (TYPE_PRECISION (type) < prec
3065 || (wi::to_wide (TYPE_MIN_VALUE (type))
3066 != wi::min_value (prec, TYPE_SIGN (type)))
3067 || (wi::to_wide (TYPE_MAX_VALUE (type))
3068 != wi::max_value (prec, TYPE_SIGN (type))))
3069 {
3070 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3071 exp = fold_convert (type, exp);
3072 lowxor = fold_convert (type, lowxor);
3073 lowi = fold_convert (type, lowi);
3074 highi = fold_convert (type, highi);
3075 }
3076 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3077 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3078 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3079 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3080 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3081 NULL, rangei->in_p, lowj, highj,
3082 rangei->strict_overflow_p
3083 || rangej->strict_overflow_p))
3084 return true;
3085 return false;
3086 }
3087
3088 /* Optimize X == CST1 || X == CST2
3089 if popcount (CST2 - CST1) == 1 into
3090 ((X - CST1) & ~(CST2 - CST1)) == 0.
3091 Similarly for ranges. E.g.
3092 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3093 || X == 75 || X == 45
3094 will be transformed by the previous optimization into
3095 (X - 43U) <= 3U || (X - 75U) <= 3U
3096 and this loop can transform that into
3097 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3098 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)3099 optimize_range_tests_diff (enum tree_code opcode, tree type,
3100 tree lowi, tree lowj, tree highi, tree highj,
3101 vec<operand_entry *> *ops,
3102 struct range_entry *rangei,
3103 struct range_entry *rangej)
3104 {
3105 tree tem1, tem2, mask;
3106 /* Check highi - lowi == highj - lowj. */
3107 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3108 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3109 return false;
3110 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3111 if (!tree_int_cst_equal (tem1, tem2))
3112 return false;
3113 /* Check popcount (lowj - lowi) == 1. */
3114 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3115 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3116 return false;
3117 if (!integer_pow2p (tem1))
3118 return false;
3119
3120 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3121 int prec = GET_MODE_PRECISION (mode);
3122 if (TYPE_PRECISION (type) < prec
3123 || (wi::to_wide (TYPE_MIN_VALUE (type))
3124 != wi::min_value (prec, TYPE_SIGN (type)))
3125 || (wi::to_wide (TYPE_MAX_VALUE (type))
3126 != wi::max_value (prec, TYPE_SIGN (type))))
3127 type = build_nonstandard_integer_type (prec, 1);
3128 else
3129 type = unsigned_type_for (type);
3130 tem1 = fold_convert (type, tem1);
3131 tem2 = fold_convert (type, tem2);
3132 lowi = fold_convert (type, lowi);
3133 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3134 tem1 = fold_build2 (MINUS_EXPR, type,
3135 fold_convert (type, rangei->exp), lowi);
3136 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3137 lowj = build_int_cst (type, 0);
3138 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3139 NULL, rangei->in_p, lowj, tem2,
3140 rangei->strict_overflow_p
3141 || rangej->strict_overflow_p))
3142 return true;
3143 return false;
3144 }
3145
3146 /* It does some common checks for function optimize_range_tests_xor and
3147 optimize_range_tests_diff.
3148 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3149 Else it calls optimize_range_tests_diff. */
3150
3151 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)3152 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3153 bool optimize_xor, vec<operand_entry *> *ops,
3154 struct range_entry *ranges)
3155 {
3156 int i, j;
3157 bool any_changes = false;
3158 for (i = first; i < length; i++)
3159 {
3160 tree lowi, highi, lowj, highj, type, tem;
3161
3162 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3163 continue;
3164 type = TREE_TYPE (ranges[i].exp);
3165 if (!INTEGRAL_TYPE_P (type))
3166 continue;
3167 lowi = ranges[i].low;
3168 if (lowi == NULL_TREE)
3169 lowi = TYPE_MIN_VALUE (type);
3170 highi = ranges[i].high;
3171 if (highi == NULL_TREE)
3172 continue;
3173 for (j = i + 1; j < length && j < i + 64; j++)
3174 {
3175 bool changes;
3176 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3177 continue;
3178 lowj = ranges[j].low;
3179 if (lowj == NULL_TREE)
3180 continue;
3181 highj = ranges[j].high;
3182 if (highj == NULL_TREE)
3183 highj = TYPE_MAX_VALUE (type);
3184 /* Check lowj > highi. */
3185 tem = fold_binary (GT_EXPR, boolean_type_node,
3186 lowj, highi);
3187 if (tem == NULL_TREE || !integer_onep (tem))
3188 continue;
3189 if (optimize_xor)
3190 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3191 highi, highj, ops,
3192 ranges + i, ranges + j);
3193 else
3194 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3195 highi, highj, ops,
3196 ranges + i, ranges + j);
3197 if (changes)
3198 {
3199 any_changes = true;
3200 break;
3201 }
3202 }
3203 }
3204 return any_changes;
3205 }
3206
3207 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3208 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3209 EXP on success, NULL otherwise. */
3210
3211 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3212 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3213 wide_int *mask, tree *totallowp)
3214 {
3215 tree tem = int_const_binop (MINUS_EXPR, high, low);
3216 if (tem == NULL_TREE
3217 || TREE_CODE (tem) != INTEGER_CST
3218 || TREE_OVERFLOW (tem)
3219 || tree_int_cst_sgn (tem) == -1
3220 || compare_tree_int (tem, prec) != -1)
3221 return NULL_TREE;
3222
3223 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3224 *mask = wi::shifted_mask (0, max, false, prec);
3225 if (TREE_CODE (exp) == BIT_AND_EXPR
3226 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3227 {
3228 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3229 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3230 if (wi::popcount (msk) == 1
3231 && wi::ltu_p (msk, prec - max))
3232 {
3233 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3234 max += msk.to_uhwi ();
3235 exp = TREE_OPERAND (exp, 0);
3236 if (integer_zerop (low)
3237 && TREE_CODE (exp) == PLUS_EXPR
3238 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3239 {
3240 tree ret = TREE_OPERAND (exp, 0);
3241 STRIP_NOPS (ret);
3242 widest_int bias
3243 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3244 TYPE_PRECISION (TREE_TYPE (low))));
3245 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3246 if (totallowp)
3247 {
3248 *totallowp = tbias;
3249 return ret;
3250 }
3251 else if (!tree_int_cst_lt (totallow, tbias))
3252 return NULL_TREE;
3253 bias = wi::to_widest (tbias);
3254 bias -= wi::to_widest (totallow);
3255 if (bias >= 0 && bias < prec - max)
3256 {
3257 *mask = wi::lshift (*mask, bias);
3258 return ret;
3259 }
3260 }
3261 }
3262 }
3263 if (totallowp)
3264 return exp;
3265 if (!tree_int_cst_lt (totallow, low))
3266 return exp;
3267 tem = int_const_binop (MINUS_EXPR, low, totallow);
3268 if (tem == NULL_TREE
3269 || TREE_CODE (tem) != INTEGER_CST
3270 || TREE_OVERFLOW (tem)
3271 || compare_tree_int (tem, prec - max) == 1)
3272 return NULL_TREE;
3273
3274 *mask = wi::lshift (*mask, wi::to_widest (tem));
3275 return exp;
3276 }
3277
3278 /* Attempt to optimize small range tests using bit test.
3279 E.g.
3280 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3281 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3282 has been by earlier optimizations optimized into:
3283 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3284 As all the 43 through 82 range is less than 64 numbers,
3285 for 64-bit word targets optimize that into:
3286 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3287
3288 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3289 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3290 vec<operand_entry *> *ops,
3291 struct range_entry *ranges)
3292 {
3293 int i, j;
3294 bool any_changes = false;
3295 int prec = GET_MODE_BITSIZE (word_mode);
3296 auto_vec<struct range_entry *, 64> candidates;
3297
3298 for (i = first; i < length - 1; i++)
3299 {
3300 tree lowi, highi, lowj, highj, type;
3301
3302 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3303 continue;
3304 type = TREE_TYPE (ranges[i].exp);
3305 if (!INTEGRAL_TYPE_P (type))
3306 continue;
3307 lowi = ranges[i].low;
3308 if (lowi == NULL_TREE)
3309 lowi = TYPE_MIN_VALUE (type);
3310 highi = ranges[i].high;
3311 if (highi == NULL_TREE)
3312 continue;
3313 wide_int mask;
3314 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3315 highi, &mask, &lowi);
3316 if (exp == NULL_TREE)
3317 continue;
3318 bool strict_overflow_p = ranges[i].strict_overflow_p;
3319 candidates.truncate (0);
3320 int end = MIN (i + 64, length);
3321 for (j = i + 1; j < end; j++)
3322 {
3323 tree exp2;
3324 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3325 continue;
3326 if (ranges[j].exp == exp)
3327 ;
3328 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3329 {
3330 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3331 if (exp2 == exp)
3332 ;
3333 else if (TREE_CODE (exp2) == PLUS_EXPR)
3334 {
3335 exp2 = TREE_OPERAND (exp2, 0);
3336 STRIP_NOPS (exp2);
3337 if (exp2 != exp)
3338 continue;
3339 }
3340 else
3341 continue;
3342 }
3343 else
3344 continue;
3345 lowj = ranges[j].low;
3346 if (lowj == NULL_TREE)
3347 continue;
3348 highj = ranges[j].high;
3349 if (highj == NULL_TREE)
3350 highj = TYPE_MAX_VALUE (type);
3351 wide_int mask2;
3352 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3353 highj, &mask2, NULL);
3354 if (exp2 != exp)
3355 continue;
3356 mask |= mask2;
3357 strict_overflow_p |= ranges[j].strict_overflow_p;
3358 candidates.safe_push (&ranges[j]);
3359 }
3360
3361 /* If every possible relative value of the expression is a valid shift
3362 amount, then we can merge the entry test in the bit test. In this
3363 case, if we would need otherwise 2 or more comparisons, then use
3364 the bit test; in the other cases, the threshold is 3 comparisons. */
3365 bool entry_test_needed;
3366 value_range r;
3367 if (TREE_CODE (exp) == SSA_NAME
3368 && get_range_query (cfun)->range_of_expr (r, exp)
3369 && r.kind () == VR_RANGE
3370 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3371 {
3372 wide_int min = r.lower_bound ();
3373 wide_int ilowi = wi::to_wide (lowi);
3374 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3375 {
3376 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3377 mask = wi::lshift (mask, ilowi - min);
3378 }
3379 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3380 {
3381 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3382 mask = wi::lrshift (mask, min - ilowi);
3383 }
3384 entry_test_needed = false;
3385 }
3386 else
3387 entry_test_needed = true;
3388 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3389 {
3390 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3391 wi::to_widest (lowi)
3392 + prec - 1 - wi::clz (mask));
3393 operand_entry *oe = (*ops)[ranges[i].idx];
3394 tree op = oe->op;
3395 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3396 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3397 location_t loc = gimple_location (stmt);
3398 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3399
3400 /* See if it isn't cheaper to pretend the minimum value of the
3401 range is 0, if maximum value is small enough.
3402 We can avoid then subtraction of the minimum value, but the
3403 mask constant could be perhaps more expensive. */
3404 if (compare_tree_int (lowi, 0) > 0
3405 && compare_tree_int (high, prec) < 0)
3406 {
3407 int cost_diff;
3408 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3409 rtx reg = gen_raw_REG (word_mode, 10000);
3410 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3411 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3412 GEN_INT (-m)),
3413 word_mode, speed_p);
3414 rtx r = immed_wide_int_const (mask, word_mode);
3415 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3416 word_mode, speed_p);
3417 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3418 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3419 word_mode, speed_p);
3420 if (cost_diff > 0)
3421 {
3422 mask = wi::lshift (mask, m);
3423 lowi = build_zero_cst (TREE_TYPE (lowi));
3424 }
3425 }
3426
3427 tree tem;
3428 if (entry_test_needed)
3429 {
3430 tem = build_range_check (loc, optype, unshare_expr (exp),
3431 false, lowi, high);
3432 if (tem == NULL_TREE || is_gimple_val (tem))
3433 continue;
3434 }
3435 else
3436 tem = NULL_TREE;
3437 tree etype = unsigned_type_for (TREE_TYPE (exp));
3438 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3439 fold_convert_loc (loc, etype, exp),
3440 fold_convert_loc (loc, etype, lowi));
3441 exp = fold_convert_loc (loc, integer_type_node, exp);
3442 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3443 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3444 build_int_cst (word_type, 1), exp);
3445 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3446 wide_int_to_tree (word_type, mask));
3447 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3448 build_zero_cst (word_type));
3449 if (is_gimple_val (exp))
3450 continue;
3451
3452 /* The shift might have undefined behavior if TEM is true,
3453 but reassociate_bb isn't prepared to have basic blocks
3454 split when it is running. So, temporarily emit a code
3455 with BIT_IOR_EXPR instead of &&, and fix it up in
3456 branch_fixup. */
3457 gimple_seq seq = NULL;
3458 if (tem)
3459 {
3460 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3461 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3462 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3463 }
3464 gimple_seq seq2;
3465 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3466 gimple_seq_add_seq_without_update (&seq, seq2);
3467 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3468 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3469 if (tem)
3470 {
3471 gimple *g = gimple_build_assign (make_ssa_name (optype),
3472 BIT_IOR_EXPR, tem, exp);
3473 gimple_set_location (g, loc);
3474 gimple_seq_add_stmt_without_update (&seq, g);
3475 exp = gimple_assign_lhs (g);
3476 }
3477 tree val = build_zero_cst (optype);
3478 if (update_range_test (&ranges[i], NULL, candidates.address (),
3479 candidates.length (), opcode, ops, exp,
3480 seq, false, val, val, strict_overflow_p))
3481 {
3482 any_changes = true;
3483 if (tem)
3484 reassoc_branch_fixups.safe_push (tem);
3485 }
3486 else
3487 gimple_seq_discard (seq);
3488 }
3489 }
3490 return any_changes;
3491 }
3492
3493 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3494 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3495 Also, handle x < C && y < C && z < C where C is power of two as
3496 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3497 as (x | y | z) < 0. */
3498
3499 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3500 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3501 vec<operand_entry *> *ops,
3502 struct range_entry *ranges)
3503 {
3504 int i;
3505 unsigned int b;
3506 bool any_changes = false;
3507 auto_vec<int, 128> buckets;
3508 auto_vec<int, 32> chains;
3509 auto_vec<struct range_entry *, 32> candidates;
3510
3511 for (i = first; i < length; i++)
3512 {
3513 int idx;
3514
3515 if (ranges[i].exp == NULL_TREE
3516 || TREE_CODE (ranges[i].exp) != SSA_NAME
3517 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3518 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3519 continue;
3520
3521 if (ranges[i].low != NULL_TREE
3522 && ranges[i].high != NULL_TREE
3523 && ranges[i].in_p
3524 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3525 {
3526 idx = !integer_zerop (ranges[i].low);
3527 if (idx && !integer_all_onesp (ranges[i].low))
3528 continue;
3529 }
3530 else if (ranges[i].high != NULL_TREE
3531 && TREE_CODE (ranges[i].high) == INTEGER_CST
3532 && ranges[i].in_p)
3533 {
3534 wide_int w = wi::to_wide (ranges[i].high);
3535 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3536 int l = wi::clz (w);
3537 idx = 2;
3538 if (l <= 0
3539 || l >= prec
3540 || w != wi::mask (prec - l, false, prec))
3541 continue;
3542 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3543 && ranges[i].low == NULL_TREE)
3544 || (ranges[i].low
3545 && integer_zerop (ranges[i].low))))
3546 continue;
3547 }
3548 else if (ranges[i].high == NULL_TREE
3549 && ranges[i].low != NULL_TREE
3550 /* Perform this optimization only in the last
3551 reassoc pass, as it interferes with the reassociation
3552 itself or could also with VRP etc. which might not
3553 be able to virtually undo the optimization. */
3554 && !reassoc_insert_powi_p
3555 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3556 && integer_zerop (ranges[i].low))
3557 idx = 3;
3558 else
3559 continue;
3560
3561 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3562 if (buckets.length () <= b)
3563 buckets.safe_grow_cleared (b + 1, true);
3564 if (chains.length () <= (unsigned) i)
3565 chains.safe_grow (i + 1, true);
3566 chains[i] = buckets[b];
3567 buckets[b] = i + 1;
3568 }
3569
3570 FOR_EACH_VEC_ELT (buckets, b, i)
3571 if (i && chains[i - 1])
3572 {
3573 int j, k = i;
3574 if ((b % 4) == 2)
3575 {
3576 /* When ranges[X - 1].high + 1 is a power of two,
3577 we need to process the same bucket up to
3578 precision - 1 times, each time split the entries
3579 with the same high bound into one chain and the
3580 rest into another one to be processed later. */
3581 int this_prev = i;
3582 int other_prev = 0;
3583 for (j = chains[i - 1]; j; j = chains[j - 1])
3584 {
3585 if (tree_int_cst_equal (ranges[i - 1].high,
3586 ranges[j - 1].high))
3587 {
3588 chains[this_prev - 1] = j;
3589 this_prev = j;
3590 }
3591 else if (other_prev == 0)
3592 {
3593 buckets[b] = j;
3594 other_prev = j;
3595 }
3596 else
3597 {
3598 chains[other_prev - 1] = j;
3599 other_prev = j;
3600 }
3601 }
3602 chains[this_prev - 1] = 0;
3603 if (other_prev)
3604 chains[other_prev - 1] = 0;
3605 if (chains[i - 1] == 0)
3606 {
3607 if (other_prev)
3608 b--;
3609 continue;
3610 }
3611 }
3612 for (j = chains[i - 1]; j; j = chains[j - 1])
3613 {
3614 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3615 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3616 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3617 k = j;
3618 }
3619 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3620 tree type2 = NULL_TREE;
3621 bool strict_overflow_p = false;
3622 candidates.truncate (0);
3623 for (j = i; j; j = chains[j - 1])
3624 {
3625 tree type = TREE_TYPE (ranges[j - 1].exp);
3626 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3627 if ((b % 4) == 3)
3628 {
3629 /* For the signed < 0 cases, the types should be
3630 really compatible (all signed with the same precision,
3631 instead put ranges that have different in_p from
3632 k first. */
3633 if (!useless_type_conversion_p (type1, type))
3634 continue;
3635 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3636 candidates.safe_push (&ranges[j - 1]);
3637 type2 = type1;
3638 continue;
3639 }
3640 if (j == k
3641 || useless_type_conversion_p (type1, type))
3642 ;
3643 else if (type2 == NULL_TREE
3644 || useless_type_conversion_p (type2, type))
3645 {
3646 if (type2 == NULL_TREE)
3647 type2 = type;
3648 candidates.safe_push (&ranges[j - 1]);
3649 }
3650 }
3651 unsigned l = candidates.length ();
3652 for (j = i; j; j = chains[j - 1])
3653 {
3654 tree type = TREE_TYPE (ranges[j - 1].exp);
3655 if (j == k)
3656 continue;
3657 if ((b % 4) == 3)
3658 {
3659 if (!useless_type_conversion_p (type1, type))
3660 continue;
3661 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3662 candidates.safe_push (&ranges[j - 1]);
3663 continue;
3664 }
3665 if (useless_type_conversion_p (type1, type))
3666 ;
3667 else if (type2 == NULL_TREE
3668 || useless_type_conversion_p (type2, type))
3669 continue;
3670 candidates.safe_push (&ranges[j - 1]);
3671 }
3672 gimple_seq seq = NULL;
3673 tree op = NULL_TREE;
3674 unsigned int id;
3675 struct range_entry *r;
3676 candidates.safe_push (&ranges[k - 1]);
3677 FOR_EACH_VEC_ELT (candidates, id, r)
3678 {
3679 gimple *g;
3680 enum tree_code code;
3681 if (id == 0)
3682 {
3683 op = r->exp;
3684 continue;
3685 }
3686 if (id == l)
3687 {
3688 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3689 g = gimple_build_assign (make_ssa_name (type1), code, op);
3690 gimple_seq_add_stmt_without_update (&seq, g);
3691 op = gimple_assign_lhs (g);
3692 }
3693 tree type = TREE_TYPE (r->exp);
3694 tree exp = r->exp;
3695 if (id >= l && !useless_type_conversion_p (type1, type))
3696 {
3697 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3698 gimple_seq_add_stmt_without_update (&seq, g);
3699 exp = gimple_assign_lhs (g);
3700 }
3701 if ((b % 4) == 3)
3702 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3703 else
3704 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3705 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3706 code, op, exp);
3707 gimple_seq_add_stmt_without_update (&seq, g);
3708 op = gimple_assign_lhs (g);
3709 }
3710 candidates.pop ();
3711 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3712 candidates.length (), opcode, ops, op,
3713 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3714 ranges[k - 1].high, strict_overflow_p))
3715 any_changes = true;
3716 else
3717 gimple_seq_discard (seq);
3718 if ((b % 4) == 2 && buckets[b] != i)
3719 /* There is more work to do for this bucket. */
3720 b--;
3721 }
3722
3723 return any_changes;
3724 }
3725
3726 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3727 a >= 0 && a < b into (unsigned) a < (unsigned) b
3728 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3729
3730 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3731 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3732 vec<operand_entry *> *ops,
3733 struct range_entry *ranges,
3734 basic_block first_bb)
3735 {
3736 int i;
3737 bool any_changes = false;
3738 hash_map<tree, int> *map = NULL;
3739
3740 for (i = first; i < length; i++)
3741 {
3742 if (ranges[i].exp == NULL_TREE
3743 || TREE_CODE (ranges[i].exp) != SSA_NAME
3744 || !ranges[i].in_p)
3745 continue;
3746
3747 tree type = TREE_TYPE (ranges[i].exp);
3748 if (!INTEGRAL_TYPE_P (type)
3749 || TYPE_UNSIGNED (type)
3750 || ranges[i].low == NULL_TREE
3751 || !integer_zerop (ranges[i].low)
3752 || ranges[i].high != NULL_TREE)
3753 continue;
3754 /* EXP >= 0 here. */
3755 if (map == NULL)
3756 map = new hash_map <tree, int>;
3757 map->put (ranges[i].exp, i);
3758 }
3759
3760 if (map == NULL)
3761 return false;
3762
3763 for (i = 0; i < length; i++)
3764 {
3765 bool in_p = ranges[i].in_p;
3766 if (ranges[i].low == NULL_TREE
3767 || ranges[i].high == NULL_TREE)
3768 continue;
3769 if (!integer_zerop (ranges[i].low)
3770 || !integer_zerop (ranges[i].high))
3771 {
3772 if (ranges[i].exp
3773 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3774 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3775 && integer_onep (ranges[i].low)
3776 && integer_onep (ranges[i].high))
3777 in_p = !in_p;
3778 else
3779 continue;
3780 }
3781
3782 gimple *stmt;
3783 tree_code ccode;
3784 tree rhs1, rhs2;
3785 if (ranges[i].exp)
3786 {
3787 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3788 continue;
3789 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3790 if (!is_gimple_assign (stmt))
3791 continue;
3792 ccode = gimple_assign_rhs_code (stmt);
3793 rhs1 = gimple_assign_rhs1 (stmt);
3794 rhs2 = gimple_assign_rhs2 (stmt);
3795 }
3796 else
3797 {
3798 operand_entry *oe = (*ops)[ranges[i].idx];
3799 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3800 if (gimple_code (stmt) != GIMPLE_COND)
3801 continue;
3802 ccode = gimple_cond_code (stmt);
3803 rhs1 = gimple_cond_lhs (stmt);
3804 rhs2 = gimple_cond_rhs (stmt);
3805 }
3806
3807 if (TREE_CODE (rhs1) != SSA_NAME
3808 || rhs2 == NULL_TREE
3809 || TREE_CODE (rhs2) != SSA_NAME)
3810 continue;
3811
3812 switch (ccode)
3813 {
3814 case GT_EXPR:
3815 case GE_EXPR:
3816 case LT_EXPR:
3817 case LE_EXPR:
3818 break;
3819 default:
3820 continue;
3821 }
3822 if (in_p)
3823 ccode = invert_tree_comparison (ccode, false);
3824 switch (ccode)
3825 {
3826 case GT_EXPR:
3827 case GE_EXPR:
3828 std::swap (rhs1, rhs2);
3829 ccode = swap_tree_comparison (ccode);
3830 break;
3831 case LT_EXPR:
3832 case LE_EXPR:
3833 break;
3834 default:
3835 gcc_unreachable ();
3836 }
3837
3838 int *idx = map->get (rhs1);
3839 if (idx == NULL)
3840 continue;
3841
3842 /* maybe_optimize_range_tests allows statements without side-effects
3843 in the basic blocks as long as they are consumed in the same bb.
3844 Make sure rhs2's def stmt is not among them, otherwise we can't
3845 use safely get_nonzero_bits on it. E.g. in:
3846 # RANGE [-83, 1] NONZERO 173
3847 # k_32 = PHI <k_47(13), k_12(9)>
3848 ...
3849 if (k_32 >= 0)
3850 goto <bb 5>; [26.46%]
3851 else
3852 goto <bb 9>; [73.54%]
3853
3854 <bb 5> [local count: 140323371]:
3855 # RANGE [0, 1] NONZERO 1
3856 _5 = (int) k_32;
3857 # RANGE [0, 4] NONZERO 4
3858 _21 = _5 << 2;
3859 # RANGE [0, 4] NONZERO 4
3860 iftmp.0_44 = (char) _21;
3861 if (k_32 < iftmp.0_44)
3862 goto <bb 6>; [84.48%]
3863 else
3864 goto <bb 9>; [15.52%]
3865 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3866 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3867 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3868 those stmts even for negative k_32 and the value ranges would be no
3869 longer guaranteed and so the optimization would be invalid. */
3870 while (opcode == ERROR_MARK)
3871 {
3872 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3873 basic_block bb2 = gimple_bb (g);
3874 if (bb2
3875 && bb2 != first_bb
3876 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3877 {
3878 /* As an exception, handle a few common cases. */
3879 if (gimple_assign_cast_p (g)
3880 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3881 {
3882 tree op0 = gimple_assign_rhs1 (g);
3883 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3884 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3885 > TYPE_PRECISION (TREE_TYPE (op0))))
3886 /* Zero-extension is always ok. */
3887 break;
3888 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3889 == TYPE_PRECISION (TREE_TYPE (op0))
3890 && TREE_CODE (op0) == SSA_NAME)
3891 {
3892 /* Cast from signed to unsigned or vice versa. Retry
3893 with the op0 as new rhs2. */
3894 rhs2 = op0;
3895 continue;
3896 }
3897 }
3898 else if (is_gimple_assign (g)
3899 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3900 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3901 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3902 /* Masking with INTEGER_CST with MSB clear is always ok
3903 too. */
3904 break;
3905 rhs2 = NULL_TREE;
3906 }
3907 break;
3908 }
3909 if (rhs2 == NULL_TREE)
3910 continue;
3911
3912 wide_int nz = get_nonzero_bits (rhs2);
3913 if (wi::neg_p (nz))
3914 continue;
3915
3916 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3917 and RHS2 is known to be RHS2 >= 0. */
3918 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3919
3920 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3921 if ((ranges[*idx].strict_overflow_p
3922 || ranges[i].strict_overflow_p)
3923 && issue_strict_overflow_warning (wc))
3924 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3925 "assuming signed overflow does not occur "
3926 "when simplifying range test");
3927
3928 if (dump_file && (dump_flags & TDF_DETAILS))
3929 {
3930 struct range_entry *r = &ranges[*idx];
3931 fprintf (dump_file, "Optimizing range test ");
3932 print_generic_expr (dump_file, r->exp);
3933 fprintf (dump_file, " +[");
3934 print_generic_expr (dump_file, r->low);
3935 fprintf (dump_file, ", ");
3936 print_generic_expr (dump_file, r->high);
3937 fprintf (dump_file, "] and comparison ");
3938 print_generic_expr (dump_file, rhs1);
3939 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3940 print_generic_expr (dump_file, rhs2);
3941 fprintf (dump_file, "\n into (");
3942 print_generic_expr (dump_file, utype);
3943 fprintf (dump_file, ") ");
3944 print_generic_expr (dump_file, rhs1);
3945 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3946 print_generic_expr (dump_file, utype);
3947 fprintf (dump_file, ") ");
3948 print_generic_expr (dump_file, rhs2);
3949 fprintf (dump_file, "\n");
3950 }
3951
3952 operand_entry *oe = (*ops)[ranges[i].idx];
3953 ranges[i].in_p = 0;
3954 if (opcode == BIT_IOR_EXPR
3955 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3956 {
3957 ranges[i].in_p = 1;
3958 ccode = invert_tree_comparison (ccode, false);
3959 }
3960
3961 unsigned int uid = gimple_uid (stmt);
3962 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3963 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3964 gimple_set_uid (g, uid);
3965 rhs1 = gimple_assign_lhs (g);
3966 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3967 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3968 {
3969 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3970 gimple_set_uid (g, uid);
3971 rhs2 = gimple_assign_lhs (g);
3972 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3973 }
3974 if (tree_swap_operands_p (rhs1, rhs2))
3975 {
3976 std::swap (rhs1, rhs2);
3977 ccode = swap_tree_comparison (ccode);
3978 }
3979 if (gimple_code (stmt) == GIMPLE_COND)
3980 {
3981 gcond *c = as_a <gcond *> (stmt);
3982 gimple_cond_set_code (c, ccode);
3983 gimple_cond_set_lhs (c, rhs1);
3984 gimple_cond_set_rhs (c, rhs2);
3985 update_stmt (stmt);
3986 }
3987 else
3988 {
3989 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3990 if (!INTEGRAL_TYPE_P (ctype)
3991 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3992 && TYPE_PRECISION (ctype) != 1))
3993 ctype = boolean_type_node;
3994 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3995 gimple_set_uid (g, uid);
3996 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3997 if (oe->op && ctype != TREE_TYPE (oe->op))
3998 {
3999 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4000 NOP_EXPR, gimple_assign_lhs (g));
4001 gimple_set_uid (g, uid);
4002 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4003 }
4004 ranges[i].exp = gimple_assign_lhs (g);
4005 oe->op = ranges[i].exp;
4006 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4007 ranges[i].high = ranges[i].low;
4008 }
4009 ranges[i].strict_overflow_p = false;
4010 oe = (*ops)[ranges[*idx].idx];
4011 /* Now change all the other range test immediate uses, so that
4012 those tests will be optimized away. */
4013 if (opcode == ERROR_MARK)
4014 {
4015 if (oe->op)
4016 oe->op = build_int_cst (TREE_TYPE (oe->op),
4017 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4018 else
4019 oe->op = (oe->rank == BIT_IOR_EXPR
4020 ? boolean_false_node : boolean_true_node);
4021 }
4022 else
4023 oe->op = error_mark_node;
4024 ranges[*idx].exp = NULL_TREE;
4025 ranges[*idx].low = NULL_TREE;
4026 ranges[*idx].high = NULL_TREE;
4027 any_changes = true;
4028 }
4029
4030 delete map;
4031 return any_changes;
4032 }
4033
4034 /* Optimize range tests, similarly how fold_range_test optimizes
4035 it on trees. The tree code for the binary
4036 operation between all the operands is OPCODE.
4037 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4038 maybe_optimize_range_tests for inter-bb range optimization.
4039 In that case if oe->op is NULL, oe->id is bb->index whose
4040 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4041 the actual opcode.
4042 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4043
4044 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)4045 optimize_range_tests (enum tree_code opcode,
4046 vec<operand_entry *> *ops, basic_block first_bb)
4047 {
4048 unsigned int length = ops->length (), i, j, first;
4049 operand_entry *oe;
4050 struct range_entry *ranges;
4051 bool any_changes = false;
4052
4053 if (length == 1)
4054 return false;
4055
4056 ranges = XNEWVEC (struct range_entry, length);
4057 for (i = 0; i < length; i++)
4058 {
4059 oe = (*ops)[i];
4060 ranges[i].idx = i;
4061 init_range_entry (ranges + i, oe->op,
4062 oe->op
4063 ? NULL
4064 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4065 /* For | invert it now, we will invert it again before emitting
4066 the optimized expression. */
4067 if (opcode == BIT_IOR_EXPR
4068 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4069 ranges[i].in_p = !ranges[i].in_p;
4070 }
4071
4072 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4073 for (i = 0; i < length; i++)
4074 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4075 break;
4076
4077 /* Try to merge ranges. */
4078 for (first = i; i < length; i++)
4079 {
4080 tree low = ranges[i].low;
4081 tree high = ranges[i].high;
4082 int in_p = ranges[i].in_p;
4083 bool strict_overflow_p = ranges[i].strict_overflow_p;
4084 int update_fail_count = 0;
4085
4086 for (j = i + 1; j < length; j++)
4087 {
4088 if (ranges[i].exp != ranges[j].exp)
4089 break;
4090 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4091 ranges[j].in_p, ranges[j].low, ranges[j].high))
4092 break;
4093 strict_overflow_p |= ranges[j].strict_overflow_p;
4094 }
4095
4096 if (j == i + 1)
4097 continue;
4098
4099 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4100 opcode, ops, ranges[i].exp, NULL, in_p,
4101 low, high, strict_overflow_p))
4102 {
4103 i = j - 1;
4104 any_changes = true;
4105 }
4106 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4107 while update_range_test would fail. */
4108 else if (update_fail_count == 64)
4109 i = j - 1;
4110 else
4111 ++update_fail_count;
4112 }
4113
4114 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4115 ops, ranges);
4116
4117 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4118 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4119 ops, ranges);
4120 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4121 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4122 ops, ranges);
4123 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4124 ranges, first_bb);
4125 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4126 ops, ranges);
4127
4128 if (any_changes && opcode != ERROR_MARK)
4129 {
4130 j = 0;
4131 FOR_EACH_VEC_ELT (*ops, i, oe)
4132 {
4133 if (oe->op == error_mark_node)
4134 continue;
4135 else if (i != j)
4136 (*ops)[j] = oe;
4137 j++;
4138 }
4139 ops->truncate (j);
4140 }
4141
4142 XDELETEVEC (ranges);
4143 return any_changes;
4144 }
4145
4146 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4147 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4148 otherwise the comparison code. TYPE is a return value that is set
4149 to type of comparison. */
4150
4151 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type,tree * lhs,tree * rhs,gassign ** vcond)4152 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4153 tree *lhs, tree *rhs, gassign **vcond)
4154 {
4155 if (TREE_CODE (var) != SSA_NAME)
4156 return ERROR_MARK;
4157
4158 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4159 if (stmt == NULL)
4160 return ERROR_MARK;
4161 if (vcond)
4162 *vcond = stmt;
4163
4164 /* ??? If we start creating more COND_EXPR, we could perform
4165 this same optimization with them. For now, simplify. */
4166 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4167 return ERROR_MARK;
4168
4169 tree cond = gimple_assign_rhs1 (stmt);
4170 tree_code cmp = TREE_CODE (cond);
4171 if (cmp != SSA_NAME)
4172 return ERROR_MARK;
4173
4174 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4175 if (assign == NULL
4176 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4177 return ERROR_MARK;
4178
4179 cmp = gimple_assign_rhs_code (assign);
4180 if (lhs)
4181 *lhs = gimple_assign_rhs1 (assign);
4182 if (rhs)
4183 *rhs = gimple_assign_rhs2 (assign);
4184
4185 /* ??? For now, allow only canonical true and false result vectors.
4186 We could expand this to other constants should the need arise,
4187 but at the moment we don't create them. */
4188 tree t = gimple_assign_rhs2 (stmt);
4189 tree f = gimple_assign_rhs3 (stmt);
4190 bool inv;
4191 if (integer_all_onesp (t))
4192 inv = false;
4193 else if (integer_all_onesp (f))
4194 {
4195 cmp = invert_tree_comparison (cmp, false);
4196 inv = true;
4197 }
4198 else
4199 return ERROR_MARK;
4200 if (!integer_zerop (f))
4201 return ERROR_MARK;
4202
4203 /* Success! */
4204 if (rets)
4205 *rets = assign;
4206 if (reti)
4207 *reti = inv;
4208 if (type)
4209 *type = TREE_TYPE (cond);
4210 return cmp;
4211 }
4212
4213 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4214 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4215
4216 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)4217 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4218 {
4219 unsigned int length = ops->length (), i, j;
4220 bool any_changes = false;
4221
4222 if (length == 1)
4223 return false;
4224
4225 for (i = 0; i < length; ++i)
4226 {
4227 tree elt0 = (*ops)[i]->op;
4228
4229 gassign *stmt0, *vcond0;
4230 bool invert;
4231 tree type, lhs0, rhs0;
4232 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4233 &rhs0, &vcond0);
4234 if (cmp0 == ERROR_MARK)
4235 continue;
4236
4237 for (j = i + 1; j < length; ++j)
4238 {
4239 tree &elt1 = (*ops)[j]->op;
4240
4241 gassign *stmt1, *vcond1;
4242 tree lhs1, rhs1;
4243 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4244 &rhs1, &vcond1);
4245 if (cmp1 == ERROR_MARK)
4246 continue;
4247
4248 tree comb;
4249 if (opcode == BIT_AND_EXPR)
4250 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4251 cmp1, lhs1, rhs1);
4252 else if (opcode == BIT_IOR_EXPR)
4253 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4254 cmp1, lhs1, rhs1);
4255 else
4256 gcc_unreachable ();
4257 if (comb == NULL)
4258 continue;
4259
4260 /* Success! */
4261 if (dump_file && (dump_flags & TDF_DETAILS))
4262 {
4263 fprintf (dump_file, "Transforming ");
4264 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4265 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4266 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4267 fprintf (dump_file, " into ");
4268 print_generic_expr (dump_file, comb);
4269 fputc ('\n', dump_file);
4270 }
4271
4272 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4273 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4274 true, GSI_SAME_STMT);
4275 if (invert)
4276 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4277 gimple_assign_rhs3_ptr (vcond0));
4278 gimple_assign_set_rhs1 (vcond0, exp);
4279 update_stmt (vcond0);
4280
4281 elt1 = error_mark_node;
4282 any_changes = true;
4283 }
4284 }
4285
4286 if (any_changes)
4287 {
4288 operand_entry *oe;
4289 j = 0;
4290 FOR_EACH_VEC_ELT (*ops, i, oe)
4291 {
4292 if (oe->op == error_mark_node)
4293 continue;
4294 else if (i != j)
4295 (*ops)[j] = oe;
4296 j++;
4297 }
4298 ops->truncate (j);
4299 }
4300
4301 return any_changes;
4302 }
4303
4304 /* Return true if STMT is a cast like:
4305 <bb N>:
4306 ...
4307 _123 = (int) _234;
4308
4309 <bb M>:
4310 # _345 = PHI <_123(N), 1(...), 1(...)>
4311 where _234 has bool type, _123 has single use and
4312 bb N has a single successor M. This is commonly used in
4313 the last block of a range test.
4314
4315 Also Return true if STMT is tcc_compare like:
4316 <bb N>:
4317 ...
4318 _234 = a_2(D) == 2;
4319
4320 <bb M>:
4321 # _345 = PHI <_234(N), 1(...), 1(...)>
4322 _346 = (int) _345;
4323 where _234 has booltype, single use and
4324 bb N has a single successor M. This is commonly used in
4325 the last block of a range test. */
4326
4327 static bool
final_range_test_p(gimple * stmt)4328 final_range_test_p (gimple *stmt)
4329 {
4330 basic_block bb, rhs_bb, lhs_bb;
4331 edge e;
4332 tree lhs, rhs;
4333 use_operand_p use_p;
4334 gimple *use_stmt;
4335
4336 if (!gimple_assign_cast_p (stmt)
4337 && (!is_gimple_assign (stmt)
4338 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4339 != tcc_comparison)))
4340 return false;
4341 bb = gimple_bb (stmt);
4342 if (!single_succ_p (bb))
4343 return false;
4344 e = single_succ_edge (bb);
4345 if (e->flags & EDGE_COMPLEX)
4346 return false;
4347
4348 lhs = gimple_assign_lhs (stmt);
4349 rhs = gimple_assign_rhs1 (stmt);
4350 if (gimple_assign_cast_p (stmt)
4351 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4352 || TREE_CODE (rhs) != SSA_NAME
4353 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4354 return false;
4355
4356 if (!gimple_assign_cast_p (stmt)
4357 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4358 return false;
4359
4360 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4361 if (!single_imm_use (lhs, &use_p, &use_stmt))
4362 return false;
4363
4364 if (gimple_code (use_stmt) != GIMPLE_PHI
4365 || gimple_bb (use_stmt) != e->dest)
4366 return false;
4367
4368 /* And that the rhs is defined in the same loop. */
4369 if (gimple_assign_cast_p (stmt))
4370 {
4371 if (TREE_CODE (rhs) != SSA_NAME
4372 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4373 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4374 return false;
4375 }
4376 else
4377 {
4378 if (TREE_CODE (lhs) != SSA_NAME
4379 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4380 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4381 return false;
4382 }
4383
4384 return true;
4385 }
4386
4387 /* Return true if BB is suitable basic block for inter-bb range test
4388 optimization. If BACKWARD is true, BB should be the only predecessor
4389 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4390 or compared with to find a common basic block to which all conditions
4391 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4392 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4393 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4394 block points to an empty block that falls through into *OTHER_BB and
4395 the phi args match that path. */
4396
4397 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool * test_swapped_p,bool backward)4398 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4399 bool *test_swapped_p, bool backward)
4400 {
4401 edge_iterator ei, ei2;
4402 edge e, e2;
4403 gimple *stmt;
4404 gphi_iterator gsi;
4405 bool other_edge_seen = false;
4406 bool is_cond;
4407
4408 if (test_bb == bb)
4409 return false;
4410 /* Check last stmt first. */
4411 stmt = last_stmt (bb);
4412 if (stmt == NULL
4413 || (gimple_code (stmt) != GIMPLE_COND
4414 && (backward || !final_range_test_p (stmt)))
4415 || gimple_visited_p (stmt)
4416 || stmt_could_throw_p (cfun, stmt)
4417 || *other_bb == bb)
4418 return false;
4419 is_cond = gimple_code (stmt) == GIMPLE_COND;
4420 if (is_cond)
4421 {
4422 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4423 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4424 to *OTHER_BB (if not set yet, try to find it out). */
4425 if (EDGE_COUNT (bb->succs) != 2)
4426 return false;
4427 FOR_EACH_EDGE (e, ei, bb->succs)
4428 {
4429 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4430 return false;
4431 if (e->dest == test_bb)
4432 {
4433 if (backward)
4434 continue;
4435 else
4436 return false;
4437 }
4438 if (e->dest == bb)
4439 return false;
4440 if (*other_bb == NULL)
4441 {
4442 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4443 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4444 return false;
4445 else if (e->dest == e2->dest)
4446 *other_bb = e->dest;
4447 if (*other_bb == NULL)
4448 return false;
4449 }
4450 if (e->dest == *other_bb)
4451 other_edge_seen = true;
4452 else if (backward)
4453 return false;
4454 }
4455 if (*other_bb == NULL || !other_edge_seen)
4456 return false;
4457 }
4458 else if (single_succ (bb) != *other_bb)
4459 return false;
4460
4461 /* Now check all PHIs of *OTHER_BB. */
4462 e = find_edge (bb, *other_bb);
4463 e2 = find_edge (test_bb, *other_bb);
4464 retry:;
4465 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4466 {
4467 gphi *phi = gsi.phi ();
4468 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4469 corresponding to BB and TEST_BB predecessor must be the same. */
4470 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4471 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4472 {
4473 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4474 one of the PHIs should have the lhs of the last stmt in
4475 that block as PHI arg and that PHI should have 0 or 1
4476 corresponding to it in all other range test basic blocks
4477 considered. */
4478 if (!is_cond)
4479 {
4480 if (gimple_phi_arg_def (phi, e->dest_idx)
4481 == gimple_assign_lhs (stmt)
4482 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4483 || integer_onep (gimple_phi_arg_def (phi,
4484 e2->dest_idx))))
4485 continue;
4486 }
4487 else
4488 {
4489 gimple *test_last = last_stmt (test_bb);
4490 if (gimple_code (test_last) == GIMPLE_COND)
4491 {
4492 if (backward ? e2->src != test_bb : e->src != bb)
4493 return false;
4494
4495 /* For last_bb, handle also:
4496 if (x_3(D) == 3)
4497 goto <bb 6>; [34.00%]
4498 else
4499 goto <bb 7>; [66.00%]
4500
4501 <bb 6> [local count: 79512730]:
4502
4503 <bb 7> [local count: 1073741824]:
4504 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4505 where bb 7 is *OTHER_BB, but the PHI values from the
4506 earlier bbs match the path through the empty bb
4507 in between. */
4508 edge e3;
4509 if (backward)
4510 e3 = EDGE_SUCC (test_bb,
4511 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4512 else
4513 e3 = EDGE_SUCC (bb,
4514 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4515 if (empty_block_p (e3->dest)
4516 && single_succ_p (e3->dest)
4517 && single_succ (e3->dest) == *other_bb
4518 && single_pred_p (e3->dest)
4519 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4520 {
4521 if (backward)
4522 e2 = single_succ_edge (e3->dest);
4523 else
4524 e = single_succ_edge (e3->dest);
4525 if (test_swapped_p)
4526 *test_swapped_p = true;
4527 goto retry;
4528 }
4529 }
4530 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4531 == gimple_assign_lhs (test_last)
4532 && (integer_zerop (gimple_phi_arg_def (phi,
4533 e->dest_idx))
4534 || integer_onep (gimple_phi_arg_def (phi,
4535 e->dest_idx))))
4536 continue;
4537 }
4538
4539 return false;
4540 }
4541 }
4542 return true;
4543 }
4544
4545 /* Return true if BB doesn't have side-effects that would disallow
4546 range test optimization, all SSA_NAMEs set in the bb are consumed
4547 in the bb and there are no PHIs. */
4548
4549 bool
no_side_effect_bb(basic_block bb)4550 no_side_effect_bb (basic_block bb)
4551 {
4552 gimple_stmt_iterator gsi;
4553 gimple *last;
4554
4555 if (!gimple_seq_empty_p (phi_nodes (bb)))
4556 return false;
4557 last = last_stmt (bb);
4558 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4559 {
4560 gimple *stmt = gsi_stmt (gsi);
4561 tree lhs;
4562 imm_use_iterator imm_iter;
4563 use_operand_p use_p;
4564
4565 if (is_gimple_debug (stmt))
4566 continue;
4567 if (gimple_has_side_effects (stmt))
4568 return false;
4569 if (stmt == last)
4570 return true;
4571 if (!is_gimple_assign (stmt))
4572 return false;
4573 lhs = gimple_assign_lhs (stmt);
4574 if (TREE_CODE (lhs) != SSA_NAME)
4575 return false;
4576 if (gimple_assign_rhs_could_trap_p (stmt))
4577 return false;
4578 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4579 {
4580 gimple *use_stmt = USE_STMT (use_p);
4581 if (is_gimple_debug (use_stmt))
4582 continue;
4583 if (gimple_bb (use_stmt) != bb)
4584 return false;
4585 }
4586 }
4587 return false;
4588 }
4589
4590 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4591 return true and fill in *OPS recursively. */
4592
4593 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4594 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4595 class loop *loop)
4596 {
4597 gimple *stmt = SSA_NAME_DEF_STMT (var);
4598 tree rhs[2];
4599 int i;
4600
4601 if (!is_reassociable_op (stmt, code, loop))
4602 return false;
4603
4604 rhs[0] = gimple_assign_rhs1 (stmt);
4605 rhs[1] = gimple_assign_rhs2 (stmt);
4606 gimple_set_visited (stmt, true);
4607 for (i = 0; i < 2; i++)
4608 if (TREE_CODE (rhs[i]) == SSA_NAME
4609 && !get_ops (rhs[i], code, ops, loop)
4610 && has_single_use (rhs[i]))
4611 {
4612 operand_entry *oe = operand_entry_pool.allocate ();
4613
4614 oe->op = rhs[i];
4615 oe->rank = code;
4616 oe->id = 0;
4617 oe->count = 1;
4618 oe->stmt_to_insert = NULL;
4619 ops->safe_push (oe);
4620 }
4621 return true;
4622 }
4623
4624 /* Find the ops that were added by get_ops starting from VAR, see if
4625 they were changed during update_range_test and if yes, create new
4626 stmts. */
4627
4628 static tree
update_ops(tree var,enum tree_code code,const vec<operand_entry * > & ops,unsigned int * pidx,class loop * loop)4629 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4630 unsigned int *pidx, class loop *loop)
4631 {
4632 gimple *stmt = SSA_NAME_DEF_STMT (var);
4633 tree rhs[4];
4634 int i;
4635
4636 if (!is_reassociable_op (stmt, code, loop))
4637 return NULL;
4638
4639 rhs[0] = gimple_assign_rhs1 (stmt);
4640 rhs[1] = gimple_assign_rhs2 (stmt);
4641 rhs[2] = rhs[0];
4642 rhs[3] = rhs[1];
4643 for (i = 0; i < 2; i++)
4644 if (TREE_CODE (rhs[i]) == SSA_NAME)
4645 {
4646 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4647 if (rhs[2 + i] == NULL_TREE)
4648 {
4649 if (has_single_use (rhs[i]))
4650 rhs[2 + i] = ops[(*pidx)++]->op;
4651 else
4652 rhs[2 + i] = rhs[i];
4653 }
4654 }
4655 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4656 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4657 {
4658 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4659 var = make_ssa_name (TREE_TYPE (var));
4660 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4661 rhs[2], rhs[3]);
4662 gimple_set_uid (g, gimple_uid (stmt));
4663 gimple_set_visited (g, true);
4664 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4665 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4666 if (fold_stmt_inplace (&gsi2))
4667 update_stmt (g);
4668 }
4669 return var;
4670 }
4671
4672 /* Structure to track the initial value passed to get_ops and
4673 the range in the ops vector for each basic block. */
4674
4675 struct inter_bb_range_test_entry
4676 {
4677 tree op;
4678 unsigned int first_idx, last_idx;
4679 };
4680
4681 /* Inter-bb range test optimization.
4682
4683 Returns TRUE if a gimple conditional is optimized to a true/false,
4684 otherwise return FALSE.
4685
4686 This indicates to the caller that it should run a CFG cleanup pass
4687 once reassociation is completed. */
4688
4689 static bool
maybe_optimize_range_tests(gimple * stmt)4690 maybe_optimize_range_tests (gimple *stmt)
4691 {
4692 basic_block first_bb = gimple_bb (stmt);
4693 basic_block last_bb = first_bb;
4694 basic_block other_bb = NULL;
4695 basic_block bb;
4696 edge_iterator ei;
4697 edge e;
4698 auto_vec<operand_entry *> ops;
4699 auto_vec<inter_bb_range_test_entry> bbinfo;
4700 bool any_changes = false;
4701 bool cfg_cleanup_needed = false;
4702
4703 /* Consider only basic blocks that end with GIMPLE_COND or
4704 a cast statement satisfying final_range_test_p. All
4705 but the last bb in the first_bb .. last_bb range
4706 should end with GIMPLE_COND. */
4707 if (gimple_code (stmt) == GIMPLE_COND)
4708 {
4709 if (EDGE_COUNT (first_bb->succs) != 2)
4710 return cfg_cleanup_needed;
4711 }
4712 else if (final_range_test_p (stmt))
4713 other_bb = single_succ (first_bb);
4714 else
4715 return cfg_cleanup_needed;
4716
4717 if (stmt_could_throw_p (cfun, stmt))
4718 return cfg_cleanup_needed;
4719
4720 /* As relative ordering of post-dominator sons isn't fixed,
4721 maybe_optimize_range_tests can be called first on any
4722 bb in the range we want to optimize. So, start searching
4723 backwards, if first_bb can be set to a predecessor. */
4724 while (single_pred_p (first_bb))
4725 {
4726 basic_block pred_bb = single_pred (first_bb);
4727 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4728 break;
4729 if (!no_side_effect_bb (first_bb))
4730 break;
4731 first_bb = pred_bb;
4732 }
4733 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4734 Before starting forward search in last_bb successors, find
4735 out the other_bb. */
4736 if (first_bb == last_bb)
4737 {
4738 other_bb = NULL;
4739 /* As non-GIMPLE_COND last stmt always terminates the range,
4740 if forward search didn't discover anything, just give up. */
4741 if (gimple_code (stmt) != GIMPLE_COND)
4742 return cfg_cleanup_needed;
4743 /* Look at both successors. Either it ends with a GIMPLE_COND
4744 and satisfies suitable_cond_bb, or ends with a cast and
4745 other_bb is that cast's successor. */
4746 FOR_EACH_EDGE (e, ei, first_bb->succs)
4747 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4748 || e->dest == first_bb)
4749 return cfg_cleanup_needed;
4750 else if (single_pred_p (e->dest))
4751 {
4752 stmt = last_stmt (e->dest);
4753 if (stmt
4754 && gimple_code (stmt) == GIMPLE_COND
4755 && EDGE_COUNT (e->dest->succs) == 2)
4756 {
4757 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4758 NULL, true))
4759 break;
4760 else
4761 other_bb = NULL;
4762 }
4763 else if (stmt
4764 && final_range_test_p (stmt)
4765 && find_edge (first_bb, single_succ (e->dest)))
4766 {
4767 other_bb = single_succ (e->dest);
4768 if (other_bb == first_bb)
4769 other_bb = NULL;
4770 }
4771 }
4772 if (other_bb == NULL)
4773 return cfg_cleanup_needed;
4774 }
4775 /* Now do the forward search, moving last_bb to successor bbs
4776 that aren't other_bb. */
4777 while (EDGE_COUNT (last_bb->succs) == 2)
4778 {
4779 FOR_EACH_EDGE (e, ei, last_bb->succs)
4780 if (e->dest != other_bb)
4781 break;
4782 if (e == NULL)
4783 break;
4784 if (!single_pred_p (e->dest))
4785 break;
4786 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4787 break;
4788 if (!no_side_effect_bb (e->dest))
4789 break;
4790 last_bb = e->dest;
4791 }
4792 if (first_bb == last_bb)
4793 return cfg_cleanup_needed;
4794 /* Here basic blocks first_bb through last_bb's predecessor
4795 end with GIMPLE_COND, all of them have one of the edges to
4796 other_bb and another to another block in the range,
4797 all blocks except first_bb don't have side-effects and
4798 last_bb ends with either GIMPLE_COND, or cast satisfying
4799 final_range_test_p. */
4800 for (bb = last_bb; ; bb = single_pred (bb))
4801 {
4802 enum tree_code code;
4803 tree lhs, rhs;
4804 inter_bb_range_test_entry bb_ent;
4805
4806 bb_ent.op = NULL_TREE;
4807 bb_ent.first_idx = ops.length ();
4808 bb_ent.last_idx = bb_ent.first_idx;
4809 e = find_edge (bb, other_bb);
4810 stmt = last_stmt (bb);
4811 gimple_set_visited (stmt, true);
4812 if (gimple_code (stmt) != GIMPLE_COND)
4813 {
4814 use_operand_p use_p;
4815 gimple *phi;
4816 edge e2;
4817 unsigned int d;
4818
4819 lhs = gimple_assign_lhs (stmt);
4820 rhs = gimple_assign_rhs1 (stmt);
4821 gcc_assert (bb == last_bb);
4822
4823 /* stmt is
4824 _123 = (int) _234;
4825 OR
4826 _234 = a_2(D) == 2;
4827
4828 followed by:
4829 <bb M>:
4830 # _345 = PHI <_123(N), 1(...), 1(...)>
4831
4832 or 0 instead of 1. If it is 0, the _234
4833 range test is anded together with all the
4834 other range tests, if it is 1, it is ored with
4835 them. */
4836 single_imm_use (lhs, &use_p, &phi);
4837 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4838 e2 = find_edge (first_bb, other_bb);
4839 d = e2->dest_idx;
4840 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4841 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4842 code = BIT_AND_EXPR;
4843 else
4844 {
4845 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4846 code = BIT_IOR_EXPR;
4847 }
4848
4849 /* If _234 SSA_NAME_DEF_STMT is
4850 _234 = _567 | _789;
4851 (or &, corresponding to 1/0 in the phi arguments,
4852 push into ops the individual range test arguments
4853 of the bitwise or resp. and, recursively. */
4854 if (TREE_CODE (rhs) == SSA_NAME
4855 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4856 != tcc_comparison)
4857 && !get_ops (rhs, code, &ops,
4858 loop_containing_stmt (stmt))
4859 && has_single_use (rhs))
4860 {
4861 /* Otherwise, push the _234 range test itself. */
4862 operand_entry *oe = operand_entry_pool.allocate ();
4863
4864 oe->op = rhs;
4865 oe->rank = code;
4866 oe->id = 0;
4867 oe->count = 1;
4868 oe->stmt_to_insert = NULL;
4869 ops.safe_push (oe);
4870 bb_ent.last_idx++;
4871 bb_ent.op = rhs;
4872 }
4873 else if (is_gimple_assign (stmt)
4874 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4875 == tcc_comparison)
4876 && !get_ops (lhs, code, &ops,
4877 loop_containing_stmt (stmt))
4878 && has_single_use (lhs))
4879 {
4880 operand_entry *oe = operand_entry_pool.allocate ();
4881 oe->op = lhs;
4882 oe->rank = code;
4883 oe->id = 0;
4884 oe->count = 1;
4885 ops.safe_push (oe);
4886 bb_ent.last_idx++;
4887 bb_ent.op = lhs;
4888 }
4889 else
4890 {
4891 bb_ent.last_idx = ops.length ();
4892 bb_ent.op = rhs;
4893 }
4894 bbinfo.safe_push (bb_ent);
4895 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4896 ops[i]->id = bb->index;
4897 continue;
4898 }
4899 else if (bb == last_bb)
4900 {
4901 /* For last_bb, handle also:
4902 if (x_3(D) == 3)
4903 goto <bb 6>; [34.00%]
4904 else
4905 goto <bb 7>; [66.00%]
4906
4907 <bb 6> [local count: 79512730]:
4908
4909 <bb 7> [local count: 1073741824]:
4910 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4911 where bb 7 is OTHER_BB, but the PHI values from the
4912 earlier bbs match the path through the empty bb
4913 in between. */
4914 bool test_swapped_p = false;
4915 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4916 &other_bb, &test_swapped_p, true);
4917 gcc_assert (ok);
4918 if (test_swapped_p)
4919 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4920 }
4921 /* Otherwise stmt is GIMPLE_COND. */
4922 code = gimple_cond_code (stmt);
4923 lhs = gimple_cond_lhs (stmt);
4924 rhs = gimple_cond_rhs (stmt);
4925 if (TREE_CODE (lhs) == SSA_NAME
4926 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4927 && ((code != EQ_EXPR && code != NE_EXPR)
4928 || rhs != boolean_false_node
4929 /* Either push into ops the individual bitwise
4930 or resp. and operands, depending on which
4931 edge is other_bb. */
4932 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4933 ^ (code == EQ_EXPR))
4934 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4935 loop_containing_stmt (stmt))))
4936 {
4937 /* Or push the GIMPLE_COND stmt itself. */
4938 operand_entry *oe = operand_entry_pool.allocate ();
4939
4940 oe->op = NULL;
4941 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4942 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4943 /* oe->op = NULL signs that there is no SSA_NAME
4944 for the range test, and oe->id instead is the
4945 basic block number, at which's end the GIMPLE_COND
4946 is. */
4947 oe->id = bb->index;
4948 oe->count = 1;
4949 oe->stmt_to_insert = NULL;
4950 ops.safe_push (oe);
4951 bb_ent.op = NULL;
4952 bb_ent.last_idx++;
4953 }
4954 else if (ops.length () > bb_ent.first_idx)
4955 {
4956 bb_ent.op = lhs;
4957 bb_ent.last_idx = ops.length ();
4958 }
4959 bbinfo.safe_push (bb_ent);
4960 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4961 ops[i]->id = bb->index;
4962 if (bb == first_bb)
4963 break;
4964 }
4965 if (ops.length () > 1)
4966 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4967 if (any_changes)
4968 {
4969 unsigned int idx, max_idx = 0;
4970 /* update_ops relies on has_single_use predicates returning the
4971 same values as it did during get_ops earlier. Additionally it
4972 never removes statements, only adds new ones and it should walk
4973 from the single imm use and check the predicate already before
4974 making those changes.
4975 On the other side, the handling of GIMPLE_COND directly can turn
4976 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4977 it needs to be done in a separate loop afterwards. */
4978 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4979 {
4980 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4981 && bbinfo[idx].op != NULL_TREE)
4982 {
4983 tree new_op;
4984
4985 max_idx = idx;
4986 stmt = last_stmt (bb);
4987 new_op = update_ops (bbinfo[idx].op,
4988 (enum tree_code)
4989 ops[bbinfo[idx].first_idx]->rank,
4990 ops, &bbinfo[idx].first_idx,
4991 loop_containing_stmt (stmt));
4992 if (new_op == NULL_TREE)
4993 {
4994 gcc_assert (bb == last_bb);
4995 new_op = ops[bbinfo[idx].first_idx++]->op;
4996 }
4997 if (bbinfo[idx].op != new_op)
4998 {
4999 imm_use_iterator iter;
5000 use_operand_p use_p;
5001 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5002
5003 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5004 if (is_gimple_debug (use_stmt))
5005 continue;
5006 else if (gimple_code (use_stmt) == GIMPLE_COND
5007 || gimple_code (use_stmt) == GIMPLE_PHI)
5008 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5009 SET_USE (use_p, new_op);
5010 else if ((is_gimple_assign (use_stmt)
5011 && (TREE_CODE_CLASS
5012 (gimple_assign_rhs_code (use_stmt))
5013 == tcc_comparison)))
5014 cast_or_tcc_cmp_stmt = use_stmt;
5015 else if (gimple_assign_cast_p (use_stmt))
5016 cast_or_tcc_cmp_stmt = use_stmt;
5017 else
5018 gcc_unreachable ();
5019
5020 if (cast_or_tcc_cmp_stmt)
5021 {
5022 gcc_assert (bb == last_bb);
5023 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5024 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5025 enum tree_code rhs_code
5026 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5027 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5028 : CONVERT_EXPR;
5029 gassign *g;
5030 if (is_gimple_min_invariant (new_op))
5031 {
5032 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5033 g = gimple_build_assign (new_lhs, new_op);
5034 }
5035 else
5036 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5037 gimple_stmt_iterator gsi
5038 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5039 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5040 gimple_set_visited (g, true);
5041 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5042 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5043 if (is_gimple_debug (use_stmt))
5044 continue;
5045 else if (gimple_code (use_stmt) == GIMPLE_COND
5046 || gimple_code (use_stmt) == GIMPLE_PHI)
5047 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5048 SET_USE (use_p, new_lhs);
5049 else
5050 gcc_unreachable ();
5051 }
5052 }
5053 }
5054 if (bb == first_bb)
5055 break;
5056 }
5057 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5058 {
5059 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5060 && bbinfo[idx].op == NULL_TREE
5061 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5062 {
5063 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
5064
5065 if (idx > max_idx)
5066 max_idx = idx;
5067
5068 /* If we collapse the conditional to a true/false
5069 condition, then bubble that knowledge up to our caller. */
5070 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5071 {
5072 gimple_cond_make_false (cond_stmt);
5073 cfg_cleanup_needed = true;
5074 }
5075 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5076 {
5077 gimple_cond_make_true (cond_stmt);
5078 cfg_cleanup_needed = true;
5079 }
5080 else
5081 {
5082 gimple_cond_set_code (cond_stmt, NE_EXPR);
5083 gimple_cond_set_lhs (cond_stmt,
5084 ops[bbinfo[idx].first_idx]->op);
5085 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5086 }
5087 update_stmt (cond_stmt);
5088 }
5089 if (bb == first_bb)
5090 break;
5091 }
5092
5093 /* The above changes could result in basic blocks after the first
5094 modified one, up to and including last_bb, to be executed even if
5095 they would not be in the original program. If the value ranges of
5096 assignment lhs' in those bbs were dependent on the conditions
5097 guarding those basic blocks which now can change, the VRs might
5098 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5099 are only used within the same bb, it should be not a big deal if
5100 we just reset all the VRs in those bbs. See PR68671. */
5101 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5102 reset_flow_sensitive_info_in_bb (bb);
5103 }
5104 return cfg_cleanup_needed;
5105 }
5106
5107 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5108 of STMT in it's operands. This is also known as a "destructive
5109 update" operation. */
5110
5111 static bool
is_phi_for_stmt(gimple * stmt,tree operand)5112 is_phi_for_stmt (gimple *stmt, tree operand)
5113 {
5114 gimple *def_stmt;
5115 gphi *def_phi;
5116 tree lhs;
5117 use_operand_p arg_p;
5118 ssa_op_iter i;
5119
5120 if (TREE_CODE (operand) != SSA_NAME)
5121 return false;
5122
5123 lhs = gimple_assign_lhs (stmt);
5124
5125 def_stmt = SSA_NAME_DEF_STMT (operand);
5126 def_phi = dyn_cast <gphi *> (def_stmt);
5127 if (!def_phi)
5128 return false;
5129
5130 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5131 if (lhs == USE_FROM_PTR (arg_p))
5132 return true;
5133 return false;
5134 }
5135
5136 /* Remove def stmt of VAR if VAR has zero uses and recurse
5137 on rhs1 operand if so. */
5138
5139 static void
remove_visited_stmt_chain(tree var)5140 remove_visited_stmt_chain (tree var)
5141 {
5142 gimple *stmt;
5143 gimple_stmt_iterator gsi;
5144
5145 while (1)
5146 {
5147 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5148 return;
5149 stmt = SSA_NAME_DEF_STMT (var);
5150 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5151 {
5152 var = gimple_assign_rhs1 (stmt);
5153 gsi = gsi_for_stmt (stmt);
5154 reassoc_remove_stmt (&gsi);
5155 release_defs (stmt);
5156 }
5157 else
5158 return;
5159 }
5160 }
5161
5162 /* This function checks three consequtive operands in
5163 passed operands vector OPS starting from OPINDEX and
5164 swaps two operands if it is profitable for binary operation
5165 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5166
5167 We pair ops with the same rank if possible.
5168
5169 The alternative we try is to see if STMT is a destructive
5170 update style statement, which is like:
5171 b = phi (a, ...)
5172 a = c + b;
5173 In that case, we want to use the destructive update form to
5174 expose the possible vectorizer sum reduction opportunity.
5175 In that case, the third operand will be the phi node. This
5176 check is not performed if STMT is null.
5177
5178 We could, of course, try to be better as noted above, and do a
5179 lot of work to try to find these opportunities in >3 operand
5180 cases, but it is unlikely to be worth it. */
5181
5182 static void
swap_ops_for_binary_stmt(const vec<operand_entry * > & ops,unsigned int opindex,gimple * stmt)5183 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5184 unsigned int opindex, gimple *stmt)
5185 {
5186 operand_entry *oe1, *oe2, *oe3;
5187
5188 oe1 = ops[opindex];
5189 oe2 = ops[opindex + 1];
5190 oe3 = ops[opindex + 2];
5191
5192 if ((oe1->rank == oe2->rank
5193 && oe2->rank != oe3->rank)
5194 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5195 && !is_phi_for_stmt (stmt, oe1->op)
5196 && !is_phi_for_stmt (stmt, oe2->op)))
5197 std::swap (*oe1, *oe3);
5198 else if ((oe1->rank == oe3->rank
5199 && oe2->rank != oe3->rank)
5200 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5201 && !is_phi_for_stmt (stmt, oe1->op)
5202 && !is_phi_for_stmt (stmt, oe3->op)))
5203 std::swap (*oe1, *oe2);
5204 }
5205
5206 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5207 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5208 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5209 after the returned stmt. */
5210
5211 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2,bool & insert_before)5212 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5213 {
5214 insert_before = true;
5215 if (TREE_CODE (rhs1) == SSA_NAME
5216 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5217 {
5218 stmt = SSA_NAME_DEF_STMT (rhs1);
5219 insert_before = false;
5220 }
5221 if (TREE_CODE (rhs2) == SSA_NAME
5222 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5223 {
5224 stmt = SSA_NAME_DEF_STMT (rhs2);
5225 insert_before = false;
5226 }
5227 return stmt;
5228 }
5229
5230 /* If the stmt that defines operand has to be inserted, insert it
5231 before the use. */
5232 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)5233 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5234 {
5235 gcc_assert (is_gimple_assign (stmt_to_insert));
5236 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5237 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5238 bool insert_before;
5239 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5240 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5241 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5242
5243 /* If the insert point is not stmt, then insert_point would be
5244 the point where operand rhs1 or rhs2 is defined. In this case,
5245 stmt_to_insert has to be inserted afterwards. This would
5246 only happen when the stmt insertion point is flexible. */
5247 if (insert_before)
5248 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5249 else
5250 insert_stmt_after (stmt_to_insert, insert_point);
5251 }
5252
5253
5254 /* Recursively rewrite our linearized statements so that the operators
5255 match those in OPS[OPINDEX], putting the computation in rank
5256 order. Return new lhs.
5257 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5258 the current stmt and during recursive invocations.
5259 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5260 recursive invocations. */
5261
5262 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,const vec<operand_entry * > & ops,bool changed,bool next_changed)5263 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5264 const vec<operand_entry *> &ops, bool changed,
5265 bool next_changed)
5266 {
5267 tree rhs1 = gimple_assign_rhs1 (stmt);
5268 tree rhs2 = gimple_assign_rhs2 (stmt);
5269 tree lhs = gimple_assign_lhs (stmt);
5270 operand_entry *oe;
5271
5272 /* The final recursion case for this function is that you have
5273 exactly two operations left.
5274 If we had exactly one op in the entire list to start with, we
5275 would have never called this function, and the tail recursion
5276 rewrites them one at a time. */
5277 if (opindex + 2 == ops.length ())
5278 {
5279 operand_entry *oe1, *oe2;
5280
5281 oe1 = ops[opindex];
5282 oe2 = ops[opindex + 1];
5283
5284 if (rhs1 != oe1->op || rhs2 != oe2->op)
5285 {
5286 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5287 unsigned int uid = gimple_uid (stmt);
5288
5289 if (dump_file && (dump_flags & TDF_DETAILS))
5290 {
5291 fprintf (dump_file, "Transforming ");
5292 print_gimple_stmt (dump_file, stmt, 0);
5293 }
5294
5295 /* If the stmt that defines operand has to be inserted, insert it
5296 before the use. */
5297 if (oe1->stmt_to_insert)
5298 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5299 if (oe2->stmt_to_insert)
5300 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5301 /* Even when changed is false, reassociation could have e.g. removed
5302 some redundant operations, so unless we are just swapping the
5303 arguments or unless there is no change at all (then we just
5304 return lhs), force creation of a new SSA_NAME. */
5305 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5306 {
5307 bool insert_before;
5308 gimple *insert_point
5309 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5310 lhs = make_ssa_name (TREE_TYPE (lhs));
5311 stmt
5312 = gimple_build_assign (lhs, rhs_code,
5313 oe1->op, oe2->op);
5314 gimple_set_uid (stmt, uid);
5315 gimple_set_visited (stmt, true);
5316 if (insert_before)
5317 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5318 else
5319 insert_stmt_after (stmt, insert_point);
5320 }
5321 else
5322 {
5323 bool insert_before;
5324 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5325 insert_before)
5326 == stmt);
5327 gimple_assign_set_rhs1 (stmt, oe1->op);
5328 gimple_assign_set_rhs2 (stmt, oe2->op);
5329 update_stmt (stmt);
5330 }
5331
5332 if (rhs1 != oe1->op && rhs1 != oe2->op)
5333 remove_visited_stmt_chain (rhs1);
5334
5335 if (dump_file && (dump_flags & TDF_DETAILS))
5336 {
5337 fprintf (dump_file, " into ");
5338 print_gimple_stmt (dump_file, stmt, 0);
5339 }
5340 }
5341 return lhs;
5342 }
5343
5344 /* If we hit here, we should have 3 or more ops left. */
5345 gcc_assert (opindex + 2 < ops.length ());
5346
5347 /* Rewrite the next operator. */
5348 oe = ops[opindex];
5349
5350 /* If the stmt that defines operand has to be inserted, insert it
5351 before the use. */
5352 if (oe->stmt_to_insert)
5353 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5354
5355 /* Recurse on the LHS of the binary operator, which is guaranteed to
5356 be the non-leaf side. */
5357 tree new_rhs1
5358 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5359 changed || oe->op != rhs2 || next_changed,
5360 false);
5361
5362 if (oe->op != rhs2 || new_rhs1 != rhs1)
5363 {
5364 if (dump_file && (dump_flags & TDF_DETAILS))
5365 {
5366 fprintf (dump_file, "Transforming ");
5367 print_gimple_stmt (dump_file, stmt, 0);
5368 }
5369
5370 /* If changed is false, this is either opindex == 0
5371 or all outer rhs2's were equal to corresponding oe->op,
5372 and powi_result is NULL.
5373 That means lhs is equivalent before and after reassociation.
5374 Otherwise ensure the old lhs SSA_NAME is not reused and
5375 create a new stmt as well, so that any debug stmts will be
5376 properly adjusted. */
5377 if (changed)
5378 {
5379 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5380 unsigned int uid = gimple_uid (stmt);
5381 bool insert_before;
5382 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5383 insert_before);
5384
5385 lhs = make_ssa_name (TREE_TYPE (lhs));
5386 stmt = gimple_build_assign (lhs, rhs_code,
5387 new_rhs1, oe->op);
5388 gimple_set_uid (stmt, uid);
5389 gimple_set_visited (stmt, true);
5390 if (insert_before)
5391 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5392 else
5393 insert_stmt_after (stmt, insert_point);
5394 }
5395 else
5396 {
5397 bool insert_before;
5398 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5399 insert_before)
5400 == stmt);
5401 gimple_assign_set_rhs1 (stmt, new_rhs1);
5402 gimple_assign_set_rhs2 (stmt, oe->op);
5403 update_stmt (stmt);
5404 }
5405
5406 if (dump_file && (dump_flags & TDF_DETAILS))
5407 {
5408 fprintf (dump_file, " into ");
5409 print_gimple_stmt (dump_file, stmt, 0);
5410 }
5411 }
5412 return lhs;
5413 }
5414
5415 /* Find out how many cycles we need to compute statements chain.
5416 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5417 maximum number of independent statements we may execute per cycle. */
5418
5419 static int
get_required_cycles(int ops_num,int cpu_width)5420 get_required_cycles (int ops_num, int cpu_width)
5421 {
5422 int res;
5423 int elog;
5424 unsigned int rest;
5425
5426 /* While we have more than 2 * cpu_width operands
5427 we may reduce number of operands by cpu_width
5428 per cycle. */
5429 res = ops_num / (2 * cpu_width);
5430
5431 /* Remained operands count may be reduced twice per cycle
5432 until we have only one operand. */
5433 rest = (unsigned)(ops_num - res * cpu_width);
5434 elog = exact_log2 (rest);
5435 if (elog >= 0)
5436 res += elog;
5437 else
5438 res += floor_log2 (rest) + 1;
5439
5440 return res;
5441 }
5442
5443 /* Returns an optimal number of registers to use for computation of
5444 given statements. */
5445
5446 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5447 get_reassociation_width (int ops_num, enum tree_code opc,
5448 machine_mode mode)
5449 {
5450 int param_width = param_tree_reassoc_width;
5451 int width;
5452 int width_min;
5453 int cycles_best;
5454
5455 if (param_width > 0)
5456 width = param_width;
5457 else
5458 width = targetm.sched.reassociation_width (opc, mode);
5459
5460 if (width == 1)
5461 return width;
5462
5463 /* Get the minimal time required for sequence computation. */
5464 cycles_best = get_required_cycles (ops_num, width);
5465
5466 /* Check if we may use less width and still compute sequence for
5467 the same time. It will allow us to reduce registers usage.
5468 get_required_cycles is monotonically increasing with lower width
5469 so we can perform a binary search for the minimal width that still
5470 results in the optimal cycle count. */
5471 width_min = 1;
5472 while (width > width_min)
5473 {
5474 int width_mid = (width + width_min) / 2;
5475
5476 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5477 width = width_mid;
5478 else if (width_min < width_mid)
5479 width_min = width_mid;
5480 else
5481 break;
5482 }
5483
5484 return width;
5485 }
5486
5487 /* Recursively rewrite our linearized statements so that the operators
5488 match those in OPS[OPINDEX], putting the computation in rank
5489 order and trying to allow operations to be executed in
5490 parallel. */
5491
5492 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,const vec<operand_entry * > & ops)5493 rewrite_expr_tree_parallel (gassign *stmt, int width,
5494 const vec<operand_entry *> &ops)
5495 {
5496 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5497 int op_num = ops.length ();
5498 gcc_assert (op_num > 0);
5499 int stmt_num = op_num - 1;
5500 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5501 int op_index = op_num - 1;
5502 int stmt_index = 0;
5503 int ready_stmts_end = 0;
5504 int i = 0;
5505 gimple *stmt1 = NULL, *stmt2 = NULL;
5506 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5507
5508 /* We start expression rewriting from the top statements.
5509 So, in this loop we create a full list of statements
5510 we will work with. */
5511 stmts[stmt_num - 1] = stmt;
5512 for (i = stmt_num - 2; i >= 0; i--)
5513 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5514
5515 for (i = 0; i < stmt_num; i++)
5516 {
5517 tree op1, op2;
5518
5519 /* Determine whether we should use results of
5520 already handled statements or not. */
5521 if (ready_stmts_end == 0
5522 && (i - stmt_index >= width || op_index < 1))
5523 ready_stmts_end = i;
5524
5525 /* Now we choose operands for the next statement. Non zero
5526 value in ready_stmts_end means here that we should use
5527 the result of already generated statements as new operand. */
5528 if (ready_stmts_end > 0)
5529 {
5530 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5531 if (ready_stmts_end > stmt_index)
5532 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5533 else if (op_index >= 0)
5534 {
5535 operand_entry *oe = ops[op_index--];
5536 stmt2 = oe->stmt_to_insert;
5537 op2 = oe->op;
5538 }
5539 else
5540 {
5541 gcc_assert (stmt_index < i);
5542 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5543 }
5544
5545 if (stmt_index >= ready_stmts_end)
5546 ready_stmts_end = 0;
5547 }
5548 else
5549 {
5550 if (op_index > 1)
5551 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5552 operand_entry *oe2 = ops[op_index--];
5553 operand_entry *oe1 = ops[op_index--];
5554 op2 = oe2->op;
5555 stmt2 = oe2->stmt_to_insert;
5556 op1 = oe1->op;
5557 stmt1 = oe1->stmt_to_insert;
5558 }
5559
5560 /* If we emit the last statement then we should put
5561 operands into the last statement. It will also
5562 break the loop. */
5563 if (op_index < 0 && stmt_index == i)
5564 i = stmt_num - 1;
5565
5566 if (dump_file && (dump_flags & TDF_DETAILS))
5567 {
5568 fprintf (dump_file, "Transforming ");
5569 print_gimple_stmt (dump_file, stmts[i], 0);
5570 }
5571
5572 /* If the stmt that defines operand has to be inserted, insert it
5573 before the use. */
5574 if (stmt1)
5575 insert_stmt_before_use (stmts[i], stmt1);
5576 if (stmt2)
5577 insert_stmt_before_use (stmts[i], stmt2);
5578 stmt1 = stmt2 = NULL;
5579
5580 /* We keep original statement only for the last one. All
5581 others are recreated. */
5582 if (i == stmt_num - 1)
5583 {
5584 gimple_assign_set_rhs1 (stmts[i], op1);
5585 gimple_assign_set_rhs2 (stmts[i], op2);
5586 update_stmt (stmts[i]);
5587 }
5588 else
5589 {
5590 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5591 gimple_set_visited (stmts[i], true);
5592 }
5593 if (dump_file && (dump_flags & TDF_DETAILS))
5594 {
5595 fprintf (dump_file, " into ");
5596 print_gimple_stmt (dump_file, stmts[i], 0);
5597 }
5598 }
5599
5600 remove_visited_stmt_chain (last_rhs1);
5601 }
5602
5603 /* Transform STMT, which is really (A +B) + (C + D) into the left
5604 linear form, ((A+B)+C)+D.
5605 Recurse on D if necessary. */
5606
5607 static void
linearize_expr(gimple * stmt)5608 linearize_expr (gimple *stmt)
5609 {
5610 gimple_stmt_iterator gsi;
5611 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5612 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5613 gimple *oldbinrhs = binrhs;
5614 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5615 gimple *newbinrhs = NULL;
5616 class loop *loop = loop_containing_stmt (stmt);
5617 tree lhs = gimple_assign_lhs (stmt);
5618
5619 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5620 && is_reassociable_op (binrhs, rhscode, loop));
5621
5622 gsi = gsi_for_stmt (stmt);
5623
5624 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5625 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5626 gimple_assign_rhs_code (binrhs),
5627 gimple_assign_lhs (binlhs),
5628 gimple_assign_rhs2 (binrhs));
5629 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5630 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5631 gimple_set_uid (binrhs, gimple_uid (stmt));
5632
5633 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5634 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5635
5636 if (dump_file && (dump_flags & TDF_DETAILS))
5637 {
5638 fprintf (dump_file, "Linearized: ");
5639 print_gimple_stmt (dump_file, stmt, 0);
5640 }
5641
5642 reassociate_stats.linearized++;
5643 update_stmt (stmt);
5644
5645 gsi = gsi_for_stmt (oldbinrhs);
5646 reassoc_remove_stmt (&gsi);
5647 release_defs (oldbinrhs);
5648
5649 gimple_set_visited (stmt, true);
5650 gimple_set_visited (binlhs, true);
5651 gimple_set_visited (binrhs, true);
5652
5653 /* Tail recurse on the new rhs if it still needs reassociation. */
5654 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5655 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5656 want to change the algorithm while converting to tuples. */
5657 linearize_expr (stmt);
5658 }
5659
5660 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5661 it. Otherwise, return NULL. */
5662
5663 static gimple *
get_single_immediate_use(tree lhs)5664 get_single_immediate_use (tree lhs)
5665 {
5666 use_operand_p immuse;
5667 gimple *immusestmt;
5668
5669 if (TREE_CODE (lhs) == SSA_NAME
5670 && single_imm_use (lhs, &immuse, &immusestmt)
5671 && is_gimple_assign (immusestmt))
5672 return immusestmt;
5673
5674 return NULL;
5675 }
5676
5677 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5678 representing the negated value. Insertions of any necessary
5679 instructions go before GSI.
5680 This function is recursive in that, if you hand it "a_5" as the
5681 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5682 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5683
5684 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5685 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5686 {
5687 gimple *negatedefstmt = NULL;
5688 tree resultofnegate;
5689 gimple_stmt_iterator gsi;
5690 unsigned int uid;
5691
5692 /* If we are trying to negate a name, defined by an add, negate the
5693 add operands instead. */
5694 if (TREE_CODE (tonegate) == SSA_NAME)
5695 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5696 if (TREE_CODE (tonegate) == SSA_NAME
5697 && is_gimple_assign (negatedefstmt)
5698 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5699 && has_single_use (gimple_assign_lhs (negatedefstmt))
5700 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5701 {
5702 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5703 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5704 tree lhs = gimple_assign_lhs (negatedefstmt);
5705 gimple *g;
5706
5707 gsi = gsi_for_stmt (negatedefstmt);
5708 rhs1 = negate_value (rhs1, &gsi);
5709
5710 gsi = gsi_for_stmt (negatedefstmt);
5711 rhs2 = negate_value (rhs2, &gsi);
5712
5713 gsi = gsi_for_stmt (negatedefstmt);
5714 lhs = make_ssa_name (TREE_TYPE (lhs));
5715 gimple_set_visited (negatedefstmt, true);
5716 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5717 gimple_set_uid (g, gimple_uid (negatedefstmt));
5718 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5719 return lhs;
5720 }
5721
5722 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5723 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5724 NULL_TREE, true, GSI_SAME_STMT);
5725 gsi = *gsip;
5726 uid = gimple_uid (gsi_stmt (gsi));
5727 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5728 {
5729 gimple *stmt = gsi_stmt (gsi);
5730 if (gimple_uid (stmt) != 0)
5731 break;
5732 gimple_set_uid (stmt, uid);
5733 }
5734 return resultofnegate;
5735 }
5736
5737 /* Return true if we should break up the subtract in STMT into an add
5738 with negate. This is true when we the subtract operands are really
5739 adds, or the subtract itself is used in an add expression. In
5740 either case, breaking up the subtract into an add with negate
5741 exposes the adds to reassociation. */
5742
5743 static bool
should_break_up_subtract(gimple * stmt)5744 should_break_up_subtract (gimple *stmt)
5745 {
5746 tree lhs = gimple_assign_lhs (stmt);
5747 tree binlhs = gimple_assign_rhs1 (stmt);
5748 tree binrhs = gimple_assign_rhs2 (stmt);
5749 gimple *immusestmt;
5750 class loop *loop = loop_containing_stmt (stmt);
5751
5752 if (TREE_CODE (binlhs) == SSA_NAME
5753 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5754 return true;
5755
5756 if (TREE_CODE (binrhs) == SSA_NAME
5757 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5758 return true;
5759
5760 if (TREE_CODE (lhs) == SSA_NAME
5761 && (immusestmt = get_single_immediate_use (lhs))
5762 && is_gimple_assign (immusestmt)
5763 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5764 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5765 && gimple_assign_rhs1 (immusestmt) == lhs)
5766 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5767 return true;
5768 return false;
5769 }
5770
5771 /* Transform STMT from A - B into A + -B. */
5772
5773 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5774 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5775 {
5776 tree rhs1 = gimple_assign_rhs1 (stmt);
5777 tree rhs2 = gimple_assign_rhs2 (stmt);
5778
5779 if (dump_file && (dump_flags & TDF_DETAILS))
5780 {
5781 fprintf (dump_file, "Breaking up subtract ");
5782 print_gimple_stmt (dump_file, stmt, 0);
5783 }
5784
5785 rhs2 = negate_value (rhs2, gsip);
5786 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5787 update_stmt (stmt);
5788 }
5789
5790 /* Determine whether STMT is a builtin call that raises an SSA name
5791 to an integer power and has only one use. If so, and this is early
5792 reassociation and unsafe math optimizations are permitted, place
5793 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5794 If any of these conditions does not hold, return FALSE. */
5795
5796 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5797 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5798 {
5799 tree arg1;
5800 REAL_VALUE_TYPE c, cint;
5801
5802 switch (gimple_call_combined_fn (stmt))
5803 {
5804 CASE_CFN_POW:
5805 if (flag_errno_math)
5806 return false;
5807
5808 *base = gimple_call_arg (stmt, 0);
5809 arg1 = gimple_call_arg (stmt, 1);
5810
5811 if (TREE_CODE (arg1) != REAL_CST)
5812 return false;
5813
5814 c = TREE_REAL_CST (arg1);
5815
5816 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5817 return false;
5818
5819 *exponent = real_to_integer (&c);
5820 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5821 if (!real_identical (&c, &cint))
5822 return false;
5823
5824 break;
5825
5826 CASE_CFN_POWI:
5827 *base = gimple_call_arg (stmt, 0);
5828 arg1 = gimple_call_arg (stmt, 1);
5829
5830 if (!tree_fits_shwi_p (arg1))
5831 return false;
5832
5833 *exponent = tree_to_shwi (arg1);
5834 break;
5835
5836 default:
5837 return false;
5838 }
5839
5840 /* Expanding negative exponents is generally unproductive, so we don't
5841 complicate matters with those. Exponents of zero and one should
5842 have been handled by expression folding. */
5843 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5844 return false;
5845
5846 return true;
5847 }
5848
5849 /* Try to derive and add operand entry for OP to *OPS. Return false if
5850 unsuccessful. */
5851
5852 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5853 try_special_add_to_ops (vec<operand_entry *> *ops,
5854 enum tree_code code,
5855 tree op, gimple* def_stmt)
5856 {
5857 tree base = NULL_TREE;
5858 HOST_WIDE_INT exponent = 0;
5859
5860 if (TREE_CODE (op) != SSA_NAME
5861 || ! has_single_use (op))
5862 return false;
5863
5864 if (code == MULT_EXPR
5865 && reassoc_insert_powi_p
5866 && flag_unsafe_math_optimizations
5867 && is_gimple_call (def_stmt)
5868 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5869 {
5870 add_repeat_to_ops_vec (ops, base, exponent);
5871 gimple_set_visited (def_stmt, true);
5872 return true;
5873 }
5874 else if (code == MULT_EXPR
5875 && is_gimple_assign (def_stmt)
5876 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5877 && !HONOR_SNANS (TREE_TYPE (op))
5878 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5879 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5880 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5881 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5882 {
5883 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5884 tree cst = build_minus_one_cst (TREE_TYPE (op));
5885 add_to_ops_vec (ops, rhs1);
5886 add_to_ops_vec (ops, cst);
5887 gimple_set_visited (def_stmt, true);
5888 return true;
5889 }
5890
5891 return false;
5892 }
5893
5894 /* Recursively linearize a binary expression that is the RHS of STMT.
5895 Place the operands of the expression tree in the vector named OPS. */
5896
5897 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5898 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5899 bool is_associative, bool set_visited)
5900 {
5901 tree binlhs = gimple_assign_rhs1 (stmt);
5902 tree binrhs = gimple_assign_rhs2 (stmt);
5903 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5904 bool binlhsisreassoc = false;
5905 bool binrhsisreassoc = false;
5906 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5907 class loop *loop = loop_containing_stmt (stmt);
5908
5909 if (set_visited)
5910 gimple_set_visited (stmt, true);
5911
5912 if (TREE_CODE (binlhs) == SSA_NAME)
5913 {
5914 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5915 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5916 && !stmt_could_throw_p (cfun, binlhsdef));
5917 }
5918
5919 if (TREE_CODE (binrhs) == SSA_NAME)
5920 {
5921 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5922 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5923 && !stmt_could_throw_p (cfun, binrhsdef));
5924 }
5925
5926 /* If the LHS is not reassociable, but the RHS is, we need to swap
5927 them. If neither is reassociable, there is nothing we can do, so
5928 just put them in the ops vector. If the LHS is reassociable,
5929 linearize it. If both are reassociable, then linearize the RHS
5930 and the LHS. */
5931
5932 if (!binlhsisreassoc)
5933 {
5934 /* If this is not a associative operation like division, give up. */
5935 if (!is_associative)
5936 {
5937 add_to_ops_vec (ops, binrhs);
5938 return;
5939 }
5940
5941 if (!binrhsisreassoc)
5942 {
5943 bool swap = false;
5944 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5945 /* If we add ops for the rhs we expect to be able to recurse
5946 to it via the lhs during expression rewrite so swap
5947 operands. */
5948 swap = true;
5949 else
5950 add_to_ops_vec (ops, binrhs);
5951
5952 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5953 add_to_ops_vec (ops, binlhs);
5954
5955 if (!swap)
5956 return;
5957 }
5958
5959 if (dump_file && (dump_flags & TDF_DETAILS))
5960 {
5961 fprintf (dump_file, "swapping operands of ");
5962 print_gimple_stmt (dump_file, stmt, 0);
5963 }
5964
5965 swap_ssa_operands (stmt,
5966 gimple_assign_rhs1_ptr (stmt),
5967 gimple_assign_rhs2_ptr (stmt));
5968 update_stmt (stmt);
5969
5970 if (dump_file && (dump_flags & TDF_DETAILS))
5971 {
5972 fprintf (dump_file, " is now ");
5973 print_gimple_stmt (dump_file, stmt, 0);
5974 }
5975 if (!binrhsisreassoc)
5976 return;
5977
5978 /* We want to make it so the lhs is always the reassociative op,
5979 so swap. */
5980 std::swap (binlhs, binrhs);
5981 }
5982 else if (binrhsisreassoc)
5983 {
5984 linearize_expr (stmt);
5985 binlhs = gimple_assign_rhs1 (stmt);
5986 binrhs = gimple_assign_rhs2 (stmt);
5987 }
5988
5989 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5990 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5991 rhscode, loop));
5992 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5993 is_associative, set_visited);
5994
5995 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5996 add_to_ops_vec (ops, binrhs);
5997 }
5998
5999 /* Repropagate the negates back into subtracts, since no other pass
6000 currently does it. */
6001
6002 static void
repropagate_negates(void)6003 repropagate_negates (void)
6004 {
6005 unsigned int i = 0;
6006 tree negate;
6007
6008 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6009 {
6010 gimple *user = get_single_immediate_use (negate);
6011 if (!user || !is_gimple_assign (user))
6012 continue;
6013
6014 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6015 if (TREE_CODE (negateop) == SSA_NAME
6016 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6017 continue;
6018
6019 /* The negate operand can be either operand of a PLUS_EXPR
6020 (it can be the LHS if the RHS is a constant for example).
6021
6022 Force the negate operand to the RHS of the PLUS_EXPR, then
6023 transform the PLUS_EXPR into a MINUS_EXPR. */
6024 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6025 {
6026 /* If the negated operand appears on the LHS of the
6027 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6028 to force the negated operand to the RHS of the PLUS_EXPR. */
6029 if (gimple_assign_rhs1 (user) == negate)
6030 {
6031 swap_ssa_operands (user,
6032 gimple_assign_rhs1_ptr (user),
6033 gimple_assign_rhs2_ptr (user));
6034 }
6035
6036 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6037 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6038 if (gimple_assign_rhs2 (user) == negate)
6039 {
6040 tree rhs1 = gimple_assign_rhs1 (user);
6041 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6042 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6043 negateop);
6044 update_stmt (user);
6045 }
6046 }
6047 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6048 {
6049 if (gimple_assign_rhs1 (user) == negate)
6050 {
6051 /* We have
6052 x = -negateop
6053 y = x - b
6054 which we transform into
6055 x = negateop + b
6056 y = -x .
6057 This pushes down the negate which we possibly can merge
6058 into some other operation, hence insert it into the
6059 plus_negates vector. */
6060 gimple *feed = SSA_NAME_DEF_STMT (negate);
6061 tree b = gimple_assign_rhs2 (user);
6062 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6063 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6064 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6065 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6066 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6067 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6068 user = gsi_stmt (gsi2);
6069 update_stmt (user);
6070 reassoc_remove_stmt (&gsi);
6071 release_defs (feed);
6072 plus_negates.safe_push (gimple_assign_lhs (user));
6073 }
6074 else
6075 {
6076 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6077 getting rid of one operation. */
6078 tree rhs1 = gimple_assign_rhs1 (user);
6079 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6080 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6081 update_stmt (gsi_stmt (gsi));
6082 }
6083 }
6084 }
6085 }
6086
6087 /* Break up subtract operations in block BB.
6088
6089 We do this top down because we don't know whether the subtract is
6090 part of a possible chain of reassociation except at the top.
6091
6092 IE given
6093 d = f + g
6094 c = a + e
6095 b = c - d
6096 q = b - r
6097 k = t - q
6098
6099 we want to break up k = t - q, but we won't until we've transformed q
6100 = b - r, which won't be broken up until we transform b = c - d.
6101
6102 En passant, clear the GIMPLE visited flag on every statement
6103 and set UIDs within each basic block. */
6104
6105 static void
break_up_subtract_bb(basic_block bb)6106 break_up_subtract_bb (basic_block bb)
6107 {
6108 gimple_stmt_iterator gsi;
6109 basic_block son;
6110 unsigned int uid = 1;
6111
6112 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6113 {
6114 gimple *stmt = gsi_stmt (gsi);
6115 gimple_set_visited (stmt, false);
6116 gimple_set_uid (stmt, uid++);
6117
6118 if (!is_gimple_assign (stmt)
6119 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6120 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6121 continue;
6122
6123 /* Look for simple gimple subtract operations. */
6124 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6125 {
6126 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6127 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6128 continue;
6129
6130 /* Check for a subtract used only in an addition. If this
6131 is the case, transform it into add of a negate for better
6132 reassociation. IE transform C = A-B into C = A + -B if C
6133 is only used in an addition. */
6134 if (should_break_up_subtract (stmt))
6135 break_up_subtract (stmt, &gsi);
6136 }
6137 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6138 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6139 plus_negates.safe_push (gimple_assign_lhs (stmt));
6140 }
6141 for (son = first_dom_son (CDI_DOMINATORS, bb);
6142 son;
6143 son = next_dom_son (CDI_DOMINATORS, son))
6144 break_up_subtract_bb (son);
6145 }
6146
6147 /* Used for repeated factor analysis. */
6148 struct repeat_factor
6149 {
6150 /* An SSA name that occurs in a multiply chain. */
6151 tree factor;
6152
6153 /* Cached rank of the factor. */
6154 unsigned rank;
6155
6156 /* Number of occurrences of the factor in the chain. */
6157 HOST_WIDE_INT count;
6158
6159 /* An SSA name representing the product of this factor and
6160 all factors appearing later in the repeated factor vector. */
6161 tree repr;
6162 };
6163
6164
6165 static vec<repeat_factor> repeat_factor_vec;
6166
6167 /* Used for sorting the repeat factor vector. Sort primarily by
6168 ascending occurrence count, secondarily by descending rank. */
6169
6170 static int
compare_repeat_factors(const void * x1,const void * x2)6171 compare_repeat_factors (const void *x1, const void *x2)
6172 {
6173 const repeat_factor *rf1 = (const repeat_factor *) x1;
6174 const repeat_factor *rf2 = (const repeat_factor *) x2;
6175
6176 if (rf1->count != rf2->count)
6177 return rf1->count - rf2->count;
6178
6179 return rf2->rank - rf1->rank;
6180 }
6181
6182 /* Look for repeated operands in OPS in the multiply tree rooted at
6183 STMT. Replace them with an optimal sequence of multiplies and powi
6184 builtin calls, and remove the used operands from OPS. Return an
6185 SSA name representing the value of the replacement sequence. */
6186
6187 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)6188 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6189 {
6190 unsigned i, j, vec_len;
6191 int ii;
6192 operand_entry *oe;
6193 repeat_factor *rf1, *rf2;
6194 repeat_factor rfnew;
6195 tree result = NULL_TREE;
6196 tree target_ssa, iter_result;
6197 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6198 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6200 gimple *mul_stmt, *pow_stmt;
6201
6202 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6203 target, unless type is integral. */
6204 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6205 return NULL_TREE;
6206
6207 /* Allocate the repeated factor vector. */
6208 repeat_factor_vec.create (10);
6209
6210 /* Scan the OPS vector for all SSA names in the product and build
6211 up a vector of occurrence counts for each factor. */
6212 FOR_EACH_VEC_ELT (*ops, i, oe)
6213 {
6214 if (TREE_CODE (oe->op) == SSA_NAME)
6215 {
6216 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6217 {
6218 if (rf1->factor == oe->op)
6219 {
6220 rf1->count += oe->count;
6221 break;
6222 }
6223 }
6224
6225 if (j >= repeat_factor_vec.length ())
6226 {
6227 rfnew.factor = oe->op;
6228 rfnew.rank = oe->rank;
6229 rfnew.count = oe->count;
6230 rfnew.repr = NULL_TREE;
6231 repeat_factor_vec.safe_push (rfnew);
6232 }
6233 }
6234 }
6235
6236 /* Sort the repeated factor vector by (a) increasing occurrence count,
6237 and (b) decreasing rank. */
6238 repeat_factor_vec.qsort (compare_repeat_factors);
6239
6240 /* It is generally best to combine as many base factors as possible
6241 into a product before applying __builtin_powi to the result.
6242 However, the sort order chosen for the repeated factor vector
6243 allows us to cache partial results for the product of the base
6244 factors for subsequent use. When we already have a cached partial
6245 result from a previous iteration, it is best to make use of it
6246 before looking for another __builtin_pow opportunity.
6247
6248 As an example, consider x * x * y * y * y * z * z * z * z.
6249 We want to first compose the product x * y * z, raise it to the
6250 second power, then multiply this by y * z, and finally multiply
6251 by z. This can be done in 5 multiplies provided we cache y * z
6252 for use in both expressions:
6253
6254 t1 = y * z
6255 t2 = t1 * x
6256 t3 = t2 * t2
6257 t4 = t1 * t3
6258 result = t4 * z
6259
6260 If we instead ignored the cached y * z and first multiplied by
6261 the __builtin_pow opportunity z * z, we would get the inferior:
6262
6263 t1 = y * z
6264 t2 = t1 * x
6265 t3 = t2 * t2
6266 t4 = z * z
6267 t5 = t3 * t4
6268 result = t5 * y */
6269
6270 vec_len = repeat_factor_vec.length ();
6271
6272 /* Repeatedly look for opportunities to create a builtin_powi call. */
6273 while (true)
6274 {
6275 HOST_WIDE_INT power;
6276
6277 /* First look for the largest cached product of factors from
6278 preceding iterations. If found, create a builtin_powi for
6279 it if the minimum occurrence count for its factors is at
6280 least 2, or just use this cached product as our next
6281 multiplicand if the minimum occurrence count is 1. */
6282 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6283 {
6284 if (rf1->repr && rf1->count > 0)
6285 break;
6286 }
6287
6288 if (j < vec_len)
6289 {
6290 power = rf1->count;
6291
6292 if (power == 1)
6293 {
6294 iter_result = rf1->repr;
6295
6296 if (dump_file && (dump_flags & TDF_DETAILS))
6297 {
6298 unsigned elt;
6299 repeat_factor *rf;
6300 fputs ("Multiplying by cached product ", dump_file);
6301 for (elt = j; elt < vec_len; elt++)
6302 {
6303 rf = &repeat_factor_vec[elt];
6304 print_generic_expr (dump_file, rf->factor);
6305 if (elt < vec_len - 1)
6306 fputs (" * ", dump_file);
6307 }
6308 fputs ("\n", dump_file);
6309 }
6310 }
6311 else
6312 {
6313 if (INTEGRAL_TYPE_P (type))
6314 {
6315 gcc_assert (power > 1);
6316 gimple_stmt_iterator gsip = gsi;
6317 gsi_prev (&gsip);
6318 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6319 rf1->repr, power);
6320 gimple_stmt_iterator gsic = gsi;
6321 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6322 {
6323 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6324 gimple_set_visited (gsi_stmt (gsic), true);
6325 gsi_prev (&gsic);
6326 }
6327 }
6328 else
6329 {
6330 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6331 pow_stmt
6332 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6333 build_int_cst (integer_type_node,
6334 power));
6335 gimple_call_set_lhs (pow_stmt, iter_result);
6336 gimple_set_location (pow_stmt, gimple_location (stmt));
6337 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6338 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6339 }
6340
6341 if (dump_file && (dump_flags & TDF_DETAILS))
6342 {
6343 unsigned elt;
6344 repeat_factor *rf;
6345 fputs ("Building __builtin_pow call for cached product (",
6346 dump_file);
6347 for (elt = j; elt < vec_len; elt++)
6348 {
6349 rf = &repeat_factor_vec[elt];
6350 print_generic_expr (dump_file, rf->factor);
6351 if (elt < vec_len - 1)
6352 fputs (" * ", dump_file);
6353 }
6354 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6355 power);
6356 }
6357 }
6358 }
6359 else
6360 {
6361 /* Otherwise, find the first factor in the repeated factor
6362 vector whose occurrence count is at least 2. If no such
6363 factor exists, there are no builtin_powi opportunities
6364 remaining. */
6365 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6366 {
6367 if (rf1->count >= 2)
6368 break;
6369 }
6370
6371 if (j >= vec_len)
6372 break;
6373
6374 power = rf1->count;
6375
6376 if (dump_file && (dump_flags & TDF_DETAILS))
6377 {
6378 unsigned elt;
6379 repeat_factor *rf;
6380 fputs ("Building __builtin_pow call for (", dump_file);
6381 for (elt = j; elt < vec_len; elt++)
6382 {
6383 rf = &repeat_factor_vec[elt];
6384 print_generic_expr (dump_file, rf->factor);
6385 if (elt < vec_len - 1)
6386 fputs (" * ", dump_file);
6387 }
6388 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6389 }
6390
6391 reassociate_stats.pows_created++;
6392
6393 /* Visit each element of the vector in reverse order (so that
6394 high-occurrence elements are visited first, and within the
6395 same occurrence count, lower-ranked elements are visited
6396 first). Form a linear product of all elements in this order
6397 whose occurrencce count is at least that of element J.
6398 Record the SSA name representing the product of each element
6399 with all subsequent elements in the vector. */
6400 if (j == vec_len - 1)
6401 rf1->repr = rf1->factor;
6402 else
6403 {
6404 for (ii = vec_len - 2; ii >= (int)j; ii--)
6405 {
6406 tree op1, op2;
6407
6408 rf1 = &repeat_factor_vec[ii];
6409 rf2 = &repeat_factor_vec[ii + 1];
6410
6411 /* Init the last factor's representative to be itself. */
6412 if (!rf2->repr)
6413 rf2->repr = rf2->factor;
6414
6415 op1 = rf1->factor;
6416 op2 = rf2->repr;
6417
6418 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6419 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6420 op1, op2);
6421 gimple_set_location (mul_stmt, gimple_location (stmt));
6422 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6423 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6424 rf1->repr = target_ssa;
6425
6426 /* Don't reprocess the multiply we just introduced. */
6427 gimple_set_visited (mul_stmt, true);
6428 }
6429 }
6430
6431 /* Form a call to __builtin_powi for the maximum product
6432 just formed, raised to the power obtained earlier. */
6433 rf1 = &repeat_factor_vec[j];
6434 if (INTEGRAL_TYPE_P (type))
6435 {
6436 gcc_assert (power > 1);
6437 gimple_stmt_iterator gsip = gsi;
6438 gsi_prev (&gsip);
6439 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6440 rf1->repr, power);
6441 gimple_stmt_iterator gsic = gsi;
6442 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6443 {
6444 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6445 gimple_set_visited (gsi_stmt (gsic), true);
6446 gsi_prev (&gsic);
6447 }
6448 }
6449 else
6450 {
6451 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6452 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6453 build_int_cst (integer_type_node,
6454 power));
6455 gimple_call_set_lhs (pow_stmt, iter_result);
6456 gimple_set_location (pow_stmt, gimple_location (stmt));
6457 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6458 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6459 }
6460 }
6461
6462 /* If we previously formed at least one other builtin_powi call,
6463 form the product of this one and those others. */
6464 if (result)
6465 {
6466 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6467 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6468 result, iter_result);
6469 gimple_set_location (mul_stmt, gimple_location (stmt));
6470 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6471 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6472 gimple_set_visited (mul_stmt, true);
6473 result = new_result;
6474 }
6475 else
6476 result = iter_result;
6477
6478 /* Decrement the occurrence count of each element in the product
6479 by the count found above, and remove this many copies of each
6480 factor from OPS. */
6481 for (i = j; i < vec_len; i++)
6482 {
6483 unsigned k = power;
6484 unsigned n;
6485
6486 rf1 = &repeat_factor_vec[i];
6487 rf1->count -= power;
6488
6489 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6490 {
6491 if (oe->op == rf1->factor)
6492 {
6493 if (oe->count <= k)
6494 {
6495 ops->ordered_remove (n);
6496 k -= oe->count;
6497
6498 if (k == 0)
6499 break;
6500 }
6501 else
6502 {
6503 oe->count -= k;
6504 break;
6505 }
6506 }
6507 }
6508 }
6509 }
6510
6511 /* At this point all elements in the repeated factor vector have a
6512 remaining occurrence count of 0 or 1, and those with a count of 1
6513 don't have cached representatives. Re-sort the ops vector and
6514 clean up. */
6515 ops->qsort (sort_by_operand_rank);
6516 repeat_factor_vec.release ();
6517
6518 /* Return the final product computed herein. Note that there may
6519 still be some elements with single occurrence count left in OPS;
6520 those will be handled by the normal reassociation logic. */
6521 return result;
6522 }
6523
6524 /* Attempt to optimize
6525 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6526 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6527
6528 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6529 attempt_builtin_copysign (vec<operand_entry *> *ops)
6530 {
6531 operand_entry *oe;
6532 unsigned int i;
6533 unsigned int length = ops->length ();
6534 tree cst = ops->last ()->op;
6535
6536 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6537 return;
6538
6539 FOR_EACH_VEC_ELT (*ops, i, oe)
6540 {
6541 if (TREE_CODE (oe->op) == SSA_NAME
6542 && has_single_use (oe->op))
6543 {
6544 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6545 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6546 {
6547 tree arg0, arg1;
6548 switch (gimple_call_combined_fn (old_call))
6549 {
6550 CASE_CFN_COPYSIGN:
6551 CASE_CFN_COPYSIGN_FN:
6552 arg0 = gimple_call_arg (old_call, 0);
6553 arg1 = gimple_call_arg (old_call, 1);
6554 /* The first argument of copysign must be a constant,
6555 otherwise there's nothing to do. */
6556 if (TREE_CODE (arg0) == REAL_CST)
6557 {
6558 tree type = TREE_TYPE (arg0);
6559 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6560 /* If we couldn't fold to a single constant, skip it.
6561 That happens e.g. for inexact multiplication when
6562 -frounding-math. */
6563 if (mul == NULL_TREE)
6564 break;
6565 /* Instead of adjusting OLD_CALL, let's build a new
6566 call to not leak the LHS and prevent keeping bogus
6567 debug statements. DCE will clean up the old call. */
6568 gcall *new_call;
6569 if (gimple_call_internal_p (old_call))
6570 new_call = gimple_build_call_internal
6571 (IFN_COPYSIGN, 2, mul, arg1);
6572 else
6573 new_call = gimple_build_call
6574 (gimple_call_fndecl (old_call), 2, mul, arg1);
6575 tree lhs = make_ssa_name (type);
6576 gimple_call_set_lhs (new_call, lhs);
6577 gimple_set_location (new_call,
6578 gimple_location (old_call));
6579 insert_stmt_after (new_call, old_call);
6580 /* We've used the constant, get rid of it. */
6581 ops->pop ();
6582 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6583 /* Handle the CST1 < 0 case by negating the result. */
6584 if (cst1_neg)
6585 {
6586 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6587 gimple *negate_stmt
6588 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6589 insert_stmt_after (negate_stmt, new_call);
6590 oe->op = negrhs;
6591 }
6592 else
6593 oe->op = lhs;
6594 if (dump_file && (dump_flags & TDF_DETAILS))
6595 {
6596 fprintf (dump_file, "Optimizing copysign: ");
6597 print_generic_expr (dump_file, cst);
6598 fprintf (dump_file, " * COPYSIGN (");
6599 print_generic_expr (dump_file, arg0);
6600 fprintf (dump_file, ", ");
6601 print_generic_expr (dump_file, arg1);
6602 fprintf (dump_file, ") into %sCOPYSIGN (",
6603 cst1_neg ? "-" : "");
6604 print_generic_expr (dump_file, mul);
6605 fprintf (dump_file, ", ");
6606 print_generic_expr (dump_file, arg1);
6607 fprintf (dump_file, "\n");
6608 }
6609 return;
6610 }
6611 break;
6612 default:
6613 break;
6614 }
6615 }
6616 }
6617 }
6618 }
6619
6620 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6621
6622 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6623 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6624 {
6625 tree rhs1;
6626
6627 if (dump_file && (dump_flags & TDF_DETAILS))
6628 {
6629 fprintf (dump_file, "Transforming ");
6630 print_gimple_stmt (dump_file, stmt, 0);
6631 }
6632
6633 rhs1 = gimple_assign_rhs1 (stmt);
6634 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6635 update_stmt (stmt);
6636 remove_visited_stmt_chain (rhs1);
6637
6638 if (dump_file && (dump_flags & TDF_DETAILS))
6639 {
6640 fprintf (dump_file, " into ");
6641 print_gimple_stmt (dump_file, stmt, 0);
6642 }
6643 }
6644
6645 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6646
6647 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6648 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6649 tree rhs1, tree rhs2)
6650 {
6651 if (dump_file && (dump_flags & TDF_DETAILS))
6652 {
6653 fprintf (dump_file, "Transforming ");
6654 print_gimple_stmt (dump_file, stmt, 0);
6655 }
6656
6657 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6658 update_stmt (gsi_stmt (*gsi));
6659 remove_visited_stmt_chain (rhs1);
6660
6661 if (dump_file && (dump_flags & TDF_DETAILS))
6662 {
6663 fprintf (dump_file, " into ");
6664 print_gimple_stmt (dump_file, stmt, 0);
6665 }
6666 }
6667
6668 /* Reassociate expressions in basic block BB and its post-dominator as
6669 children.
6670
6671 Bubble up return status from maybe_optimize_range_tests. */
6672
6673 static bool
reassociate_bb(basic_block bb)6674 reassociate_bb (basic_block bb)
6675 {
6676 gimple_stmt_iterator gsi;
6677 basic_block son;
6678 gimple *stmt = last_stmt (bb);
6679 bool cfg_cleanup_needed = false;
6680
6681 if (stmt && !gimple_visited_p (stmt))
6682 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6683
6684 bool do_prev = false;
6685 for (gsi = gsi_last_bb (bb);
6686 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6687 {
6688 do_prev = true;
6689 stmt = gsi_stmt (gsi);
6690
6691 if (is_gimple_assign (stmt)
6692 && !stmt_could_throw_p (cfun, stmt))
6693 {
6694 tree lhs, rhs1, rhs2;
6695 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6696
6697 /* If this was part of an already processed statement,
6698 we don't need to touch it again. */
6699 if (gimple_visited_p (stmt))
6700 {
6701 /* This statement might have become dead because of previous
6702 reassociations. */
6703 if (has_zero_uses (gimple_get_lhs (stmt)))
6704 {
6705 reassoc_remove_stmt (&gsi);
6706 release_defs (stmt);
6707 /* We might end up removing the last stmt above which
6708 places the iterator to the end of the sequence.
6709 Reset it to the last stmt in this case and make sure
6710 we don't do gsi_prev in that case. */
6711 if (gsi_end_p (gsi))
6712 {
6713 gsi = gsi_last_bb (bb);
6714 do_prev = false;
6715 }
6716 }
6717 continue;
6718 }
6719
6720 /* If this is not a gimple binary expression, there is
6721 nothing for us to do with it. */
6722 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6723 continue;
6724
6725 lhs = gimple_assign_lhs (stmt);
6726 rhs1 = gimple_assign_rhs1 (stmt);
6727 rhs2 = gimple_assign_rhs2 (stmt);
6728
6729 /* For non-bit or min/max operations we can't associate
6730 all types. Verify that here. */
6731 if ((rhs_code != BIT_IOR_EXPR
6732 && rhs_code != BIT_AND_EXPR
6733 && rhs_code != BIT_XOR_EXPR
6734 && rhs_code != MIN_EXPR
6735 && rhs_code != MAX_EXPR
6736 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6737 || !can_reassociate_op_p (rhs1)
6738 || !can_reassociate_op_p (rhs2))
6739 continue;
6740
6741 if (associative_tree_code (rhs_code))
6742 {
6743 auto_vec<operand_entry *> ops;
6744 tree powi_result = NULL_TREE;
6745 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6746
6747 /* There may be no immediate uses left by the time we
6748 get here because we may have eliminated them all. */
6749 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6750 continue;
6751
6752 gimple_set_visited (stmt, true);
6753 linearize_expr_tree (&ops, stmt, true, true);
6754 ops.qsort (sort_by_operand_rank);
6755 int orig_len = ops.length ();
6756 optimize_ops_list (rhs_code, &ops);
6757 if (undistribute_ops_list (rhs_code, &ops,
6758 loop_containing_stmt (stmt)))
6759 {
6760 ops.qsort (sort_by_operand_rank);
6761 optimize_ops_list (rhs_code, &ops);
6762 }
6763 if (undistribute_bitref_for_vector (rhs_code, &ops,
6764 loop_containing_stmt (stmt)))
6765 {
6766 ops.qsort (sort_by_operand_rank);
6767 optimize_ops_list (rhs_code, &ops);
6768 }
6769 if (rhs_code == PLUS_EXPR
6770 && transform_add_to_multiply (&ops))
6771 ops.qsort (sort_by_operand_rank);
6772
6773 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6774 {
6775 if (is_vector)
6776 optimize_vec_cond_expr (rhs_code, &ops);
6777 else
6778 optimize_range_tests (rhs_code, &ops, NULL);
6779 }
6780
6781 if (rhs_code == MULT_EXPR && !is_vector)
6782 {
6783 attempt_builtin_copysign (&ops);
6784
6785 if (reassoc_insert_powi_p
6786 && (flag_unsafe_math_optimizations
6787 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6788 powi_result = attempt_builtin_powi (stmt, &ops);
6789 }
6790
6791 operand_entry *last;
6792 bool negate_result = false;
6793 if (ops.length () > 1
6794 && rhs_code == MULT_EXPR)
6795 {
6796 last = ops.last ();
6797 if ((integer_minus_onep (last->op)
6798 || real_minus_onep (last->op))
6799 && !HONOR_SNANS (TREE_TYPE (lhs))
6800 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6801 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6802 {
6803 ops.pop ();
6804 negate_result = true;
6805 }
6806 }
6807
6808 tree new_lhs = lhs;
6809 /* If the operand vector is now empty, all operands were
6810 consumed by the __builtin_powi optimization. */
6811 if (ops.length () == 0)
6812 transform_stmt_to_copy (&gsi, stmt, powi_result);
6813 else if (ops.length () == 1)
6814 {
6815 tree last_op = ops.last ()->op;
6816
6817 /* If the stmt that defines operand has to be inserted, insert it
6818 before the use. */
6819 if (ops.last ()->stmt_to_insert)
6820 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6821 if (powi_result)
6822 transform_stmt_to_multiply (&gsi, stmt, last_op,
6823 powi_result);
6824 else
6825 transform_stmt_to_copy (&gsi, stmt, last_op);
6826 }
6827 else
6828 {
6829 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6830 int ops_num = ops.length ();
6831 int width;
6832
6833 /* For binary bit operations, if there are at least 3
6834 operands and the last operand in OPS is a constant,
6835 move it to the front. This helps ensure that we generate
6836 (X & Y) & C rather than (X & C) & Y. The former will
6837 often match a canonical bit test when we get to RTL. */
6838 if (ops.length () > 2
6839 && (rhs_code == BIT_AND_EXPR
6840 || rhs_code == BIT_IOR_EXPR
6841 || rhs_code == BIT_XOR_EXPR)
6842 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6843 std::swap (*ops[0], *ops[ops_num - 1]);
6844
6845 /* Only rewrite the expression tree to parallel in the
6846 last reassoc pass to avoid useless work back-and-forth
6847 with initial linearization. */
6848 if (!reassoc_insert_powi_p
6849 && ops.length () > 3
6850 && (width = get_reassociation_width (ops_num, rhs_code,
6851 mode)) > 1)
6852 {
6853 if (dump_file && (dump_flags & TDF_DETAILS))
6854 fprintf (dump_file,
6855 "Width = %d was chosen for reassociation\n",
6856 width);
6857 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6858 width, ops);
6859 }
6860 else
6861 {
6862 /* When there are three operands left, we want
6863 to make sure the ones that get the double
6864 binary op are chosen wisely. */
6865 int len = ops.length ();
6866 if (len >= 3)
6867 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6868
6869 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6870 powi_result != NULL
6871 || negate_result,
6872 len != orig_len);
6873 }
6874
6875 /* If we combined some repeated factors into a
6876 __builtin_powi call, multiply that result by the
6877 reassociated operands. */
6878 if (powi_result)
6879 {
6880 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6881 tree type = TREE_TYPE (lhs);
6882 tree target_ssa = make_temp_ssa_name (type, NULL,
6883 "reassocpow");
6884 gimple_set_lhs (lhs_stmt, target_ssa);
6885 update_stmt (lhs_stmt);
6886 if (lhs != new_lhs)
6887 {
6888 target_ssa = new_lhs;
6889 new_lhs = lhs;
6890 }
6891 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6892 powi_result, target_ssa);
6893 gimple_set_location (mul_stmt, gimple_location (stmt));
6894 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6895 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6896 }
6897 }
6898
6899 if (negate_result)
6900 {
6901 stmt = SSA_NAME_DEF_STMT (lhs);
6902 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6903 gimple_set_lhs (stmt, tmp);
6904 if (lhs != new_lhs)
6905 tmp = new_lhs;
6906 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6907 tmp);
6908 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6909 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6910 update_stmt (stmt);
6911 }
6912 }
6913 }
6914 }
6915 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6916 son;
6917 son = next_dom_son (CDI_POST_DOMINATORS, son))
6918 cfg_cleanup_needed |= reassociate_bb (son);
6919
6920 return cfg_cleanup_needed;
6921 }
6922
6923 /* Add jumps around shifts for range tests turned into bit tests.
6924 For each SSA_NAME VAR we have code like:
6925 VAR = ...; // final stmt of range comparison
6926 // bit test here...;
6927 OTHERVAR = ...; // final stmt of the bit test sequence
6928 RES = VAR | OTHERVAR;
6929 Turn the above into:
6930 VAR = ...;
6931 if (VAR != 0)
6932 goto <l3>;
6933 else
6934 goto <l2>;
6935 <l2>:
6936 // bit test here...;
6937 OTHERVAR = ...;
6938 <l3>:
6939 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6940
6941 static void
branch_fixup(void)6942 branch_fixup (void)
6943 {
6944 tree var;
6945 unsigned int i;
6946
6947 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6948 {
6949 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6950 gimple *use_stmt;
6951 use_operand_p use;
6952 bool ok = single_imm_use (var, &use, &use_stmt);
6953 gcc_assert (ok
6954 && is_gimple_assign (use_stmt)
6955 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6956 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6957
6958 basic_block cond_bb = gimple_bb (def_stmt);
6959 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6960 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6961
6962 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6963 gimple *g = gimple_build_cond (NE_EXPR, var,
6964 build_zero_cst (TREE_TYPE (var)),
6965 NULL_TREE, NULL_TREE);
6966 location_t loc = gimple_location (use_stmt);
6967 gimple_set_location (g, loc);
6968 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6969
6970 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6971 etrue->probability = profile_probability::even ();
6972 edge efalse = find_edge (cond_bb, then_bb);
6973 efalse->flags = EDGE_FALSE_VALUE;
6974 efalse->probability -= etrue->probability;
6975 then_bb->count -= etrue->count ();
6976
6977 tree othervar = NULL_TREE;
6978 if (gimple_assign_rhs1 (use_stmt) == var)
6979 othervar = gimple_assign_rhs2 (use_stmt);
6980 else if (gimple_assign_rhs2 (use_stmt) == var)
6981 othervar = gimple_assign_rhs1 (use_stmt);
6982 else
6983 gcc_unreachable ();
6984 tree lhs = gimple_assign_lhs (use_stmt);
6985 gphi *phi = create_phi_node (lhs, merge_bb);
6986 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6987 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6988 gsi = gsi_for_stmt (use_stmt);
6989 gsi_remove (&gsi, true);
6990
6991 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6992 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6993 }
6994 reassoc_branch_fixups.release ();
6995 }
6996
6997 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6998 void debug_ops_vector (vec<operand_entry *> ops);
6999
7000 /* Dump the operand entry vector OPS to FILE. */
7001
7002 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)7003 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7004 {
7005 operand_entry *oe;
7006 unsigned int i;
7007
7008 FOR_EACH_VEC_ELT (ops, i, oe)
7009 {
7010 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7011 print_generic_expr (file, oe->op);
7012 fprintf (file, "\n");
7013 }
7014 }
7015
7016 /* Dump the operand entry vector OPS to STDERR. */
7017
7018 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)7019 debug_ops_vector (vec<operand_entry *> ops)
7020 {
7021 dump_ops_vector (stderr, ops);
7022 }
7023
7024 /* Bubble up return status from reassociate_bb. */
7025
7026 static bool
do_reassoc(void)7027 do_reassoc (void)
7028 {
7029 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7030 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7031 }
7032
7033 /* Initialize the reassociation pass. */
7034
7035 static void
init_reassoc(void)7036 init_reassoc (void)
7037 {
7038 int i;
7039 int64_t rank = 2;
7040 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7041
7042 /* Find the loops, so that we can prevent moving calculations in
7043 them. */
7044 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7045
7046 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7047
7048 next_operand_entry_id = 0;
7049
7050 /* Reverse RPO (Reverse Post Order) will give us something where
7051 deeper loops come later. */
7052 pre_and_rev_post_order_compute (NULL, bbs, false);
7053 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7054 operand_rank = new hash_map<tree, int64_t>;
7055
7056 /* Give each default definition a distinct rank. This includes
7057 parameters and the static chain. Walk backwards over all
7058 SSA names so that we get proper rank ordering according
7059 to tree_swap_operands_p. */
7060 for (i = num_ssa_names - 1; i > 0; --i)
7061 {
7062 tree name = ssa_name (i);
7063 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7064 insert_operand_rank (name, ++rank);
7065 }
7066
7067 /* Set up rank for each BB */
7068 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7069 bb_rank[bbs[i]] = ++rank << 16;
7070
7071 free (bbs);
7072 calculate_dominance_info (CDI_POST_DOMINATORS);
7073 plus_negates = vNULL;
7074 }
7075
7076 /* Cleanup after the reassociation pass, and print stats if
7077 requested. */
7078
7079 static void
fini_reassoc(void)7080 fini_reassoc (void)
7081 {
7082 statistics_counter_event (cfun, "Linearized",
7083 reassociate_stats.linearized);
7084 statistics_counter_event (cfun, "Constants eliminated",
7085 reassociate_stats.constants_eliminated);
7086 statistics_counter_event (cfun, "Ops eliminated",
7087 reassociate_stats.ops_eliminated);
7088 statistics_counter_event (cfun, "Statements rewritten",
7089 reassociate_stats.rewritten);
7090 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7091 reassociate_stats.pows_encountered);
7092 statistics_counter_event (cfun, "Built-in powi calls created",
7093 reassociate_stats.pows_created);
7094
7095 delete operand_rank;
7096 bitmap_clear (biased_names);
7097 operand_entry_pool.release ();
7098 free (bb_rank);
7099 plus_negates.release ();
7100 free_dominance_info (CDI_POST_DOMINATORS);
7101 loop_optimizer_finalize ();
7102 }
7103
7104 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7105 insertion of __builtin_powi calls.
7106
7107 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7108 optimization of a gimple conditional. Otherwise returns zero. */
7109
7110 static unsigned int
execute_reassoc(bool insert_powi_p,bool bias_loop_carried_phi_ranks_p)7111 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7112 {
7113 reassoc_insert_powi_p = insert_powi_p;
7114 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7115
7116 init_reassoc ();
7117
7118 bool cfg_cleanup_needed;
7119 cfg_cleanup_needed = do_reassoc ();
7120 repropagate_negates ();
7121 branch_fixup ();
7122
7123 fini_reassoc ();
7124 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7125 }
7126
7127 namespace {
7128
7129 const pass_data pass_data_reassoc =
7130 {
7131 GIMPLE_PASS, /* type */
7132 "reassoc", /* name */
7133 OPTGROUP_NONE, /* optinfo_flags */
7134 TV_TREE_REASSOC, /* tv_id */
7135 ( PROP_cfg | PROP_ssa ), /* properties_required */
7136 0, /* properties_provided */
7137 0, /* properties_destroyed */
7138 0, /* todo_flags_start */
7139 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7140 };
7141
7142 class pass_reassoc : public gimple_opt_pass
7143 {
7144 public:
pass_reassoc(gcc::context * ctxt)7145 pass_reassoc (gcc::context *ctxt)
7146 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7147 {}
7148
7149 /* opt_pass methods: */
clone()7150 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)7151 void set_pass_param (unsigned int n, bool param)
7152 {
7153 gcc_assert (n == 0);
7154 insert_powi_p = param;
7155 bias_loop_carried_phi_ranks_p = !param;
7156 }
gate(function *)7157 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)7158 virtual unsigned int execute (function *)
7159 {
7160 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7161 }
7162
7163 private:
7164 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7165 point 3a in the pass header comment. */
7166 bool insert_powi_p;
7167 bool bias_loop_carried_phi_ranks_p;
7168 }; // class pass_reassoc
7169
7170 } // anon namespace
7171
7172 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)7173 make_pass_reassoc (gcc::context *ctxt)
7174 {
7175 return new pass_reassoc (ctxt);
7176 }
7177