1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
58
59 It consists of five steps:
60
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
63
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
68
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
71
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
75
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
78
79 5. Repropagate negates, as nothing else will clean it up ATM.
80
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
83
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
86
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
91
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
96
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
99
100 a (1), b (1), c (1), d (2), e (2)
101
102
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
105
106 You want to first merge all leaves of the same rank, as much as
107 possible.
108
109 So first build a binary op of
110
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
112
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
115
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
120
121 Then build a binary op of d + e
122 mergetmp2 = d + e
123
124 and put mergetmp2 on the merge worklist.
125
126 so merge worklist = {mergetmp, c, mergetmp2}
127
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
130
131 So we have
132
133 build binary op
134 mergetmp3 = mergetmp + c
135
136 worklist = {mergetmp2, mergetmp3}
137
138 mergetmp4 = mergetmp2 + mergetmp3
139
140 worklist = {mergetmp4}
141
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
144
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
147
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
150
151 So why don't we do this?
152
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
158
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
161
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
165
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
169
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
175
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
179
180 /* Statistics */
181 static struct
182 {
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
190
191 /* Operator, rank pair. */
192 struct operand_entry
193 {
194 unsigned int rank;
195 unsigned int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
199 };
200
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
203
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id;
207
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static int64_t *bb_rank;
212
213 /* Operand->rank hashtable. */
214 static hash_map<tree, int64_t> *operand_rank;
215
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
221
222 /* Forward decls. */
223 static int64_t get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
228
229 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 {
232 gimple *stmt = gsi_stmt (*gsi);
233
234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
236
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 {
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
252 }
253 return ret;
254 }
255
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
260
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static int64_t
phi_rank(gimple * stmt)269 phi_rank (gimple *stmt)
270 {
271 basic_block bb = gimple_bb (stmt);
272 class loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
277
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
281
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
286
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
291
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
297
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 {
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 {
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
308 }
309 }
310
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
313 }
314
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
loop_carried_phi(tree exp)319 loop_carried_phi (tree exp)
320 {
321 gimple *phi_stmt;
322 int64_t block_rank;
323
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
327
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
329
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
332
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
340
341 return false;
342 }
343
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static int64_t
propagate_rank(int64_t rank,tree op)349 propagate_rank (int64_t rank, tree op)
350 {
351 int64_t op_rank;
352
353 if (loop_carried_phi (op))
354 return rank;
355
356 op_rank = get_rank (op);
357
358 return MAX (rank, op_rank);
359 }
360
361 /* Look up the operand rank structure for expression E. */
362
363 static inline int64_t
find_operand_rank(tree e)364 find_operand_rank (tree e)
365 {
366 int64_t *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
368 }
369
370 /* Insert {E,RANK} into the operand rank hashtable. */
371
372 static inline void
insert_operand_rank(tree e,int64_t rank)373 insert_operand_rank (tree e, int64_t rank)
374 {
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
377 }
378
379 /* Given an expression E, return the rank of the expression. */
380
381 static int64_t
get_rank(tree e)382 get_rank (tree e)
383 {
384 /* SSA_NAME's have the rank of the expression they are the result
385 of.
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
394
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
398
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
403
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
407
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
412
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
415
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
420
421 if (TREE_CODE (e) == SSA_NAME)
422 {
423 ssa_op_iter iter;
424 gimple *stmt;
425 int64_t rank;
426 tree op;
427
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
430
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
434
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
437
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
442
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
452
453 if (dump_file && (dump_flags & TDF_DETAILS))
454 {
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e);
457 fprintf (dump_file, " is %" PRId64 "\n", (rank + 1));
458 }
459
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
463 }
464
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
467 }
468
469
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
476
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
constant_type(tree t)480 constant_type (tree t)
481 {
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 {
486 /* Sort -1.0 and 1.0 constants last, while in some cases
487 const_binop can't optimize some inexact operations, multiplication
488 by -1.0 or 1.0 can be always merged with others. */
489 if (real_onep (t) || real_minus_onep (t))
490 return FLOAT_ONE_CONST_TYPE;
491 return FLOAT_CONST_TYPE;
492 }
493 else
494 return OTHER_CONST_TYPE;
495 }
496
497 /* qsort comparison function to sort operand entries PA and PB by rank
498 so that the sorted array is ordered by rank in decreasing order. */
499 static int
sort_by_operand_rank(const void * pa,const void * pb)500 sort_by_operand_rank (const void *pa, const void *pb)
501 {
502 const operand_entry *oea = *(const operand_entry *const *)pa;
503 const operand_entry *oeb = *(const operand_entry *const *)pb;
504
505 if (oeb->rank != oea->rank)
506 return oeb->rank > oea->rank ? 1 : -1;
507
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied/whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oea->rank == 0)
512 {
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oea->op) - constant_type (oeb->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id > oea->id ? 1 : -1;
519 }
520
521 if (TREE_CODE (oea->op) != SSA_NAME)
522 {
523 if (TREE_CODE (oeb->op) != SSA_NAME)
524 return oeb->id > oea->id ? 1 : -1;
525 else
526 return 1;
527 }
528 else if (TREE_CODE (oeb->op) != SSA_NAME)
529 return -1;
530
531 /* Lastly, make sure the versions that are the same go next to each
532 other. */
533 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
534 {
535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 See PR60418. */
539 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541 basic_block bba = gimple_bb (stmta);
542 basic_block bbb = gimple_bb (stmtb);
543 if (bbb != bba)
544 {
545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 but the other might not. */
547 if (!bba)
548 return 1;
549 if (!bbb)
550 return -1;
551 /* If neither is, compare bb_rank. */
552 if (bb_rank[bbb->index] != bb_rank[bba->index])
553 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
554 }
555
556 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558 if (da != db)
559 return da ? 1 : -1;
560
561 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
562 }
563
564 return oeb->id > oea->id ? 1 : -1;
565 }
566
567 /* Add an operand entry to *OPS for the tree operand OP. */
568
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
571 {
572 operand_entry *oe = operand_entry_pool.allocate ();
573
574 oe->op = op;
575 oe->rank = get_rank (op);
576 oe->id = next_operand_entry_id++;
577 oe->count = 1;
578 oe->stmt_to_insert = stmt_to_insert;
579 ops->safe_push (oe);
580 }
581
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583 count REPEAT. */
584
585 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 HOST_WIDE_INT repeat)
588 {
589 operand_entry *oe = operand_entry_pool.allocate ();
590
591 oe->op = op;
592 oe->rank = get_rank (op);
593 oe->id = next_operand_entry_id++;
594 oe->count = repeat;
595 oe->stmt_to_insert = NULL;
596 ops->safe_push (oe);
597
598 reassociate_stats.pows_encountered++;
599 }
600
601 /* Return true if STMT is reassociable operation containing a binary
602 operation with tree code CODE, and is inside LOOP. */
603
604 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
606 {
607 basic_block bb = gimple_bb (stmt);
608
609 if (gimple_bb (stmt) == NULL)
610 return false;
611
612 if (!flow_bb_inside_loop_p (loop, bb))
613 return false;
614
615 if (is_gimple_assign (stmt)
616 && gimple_assign_rhs_code (stmt) == code
617 && has_single_use (gimple_assign_lhs (stmt)))
618 {
619 tree rhs1 = gimple_assign_rhs1 (stmt);
620 tree rhs2 = gimple_assign_rhs2 (stmt);
621 if (TREE_CODE (rhs1) == SSA_NAME
622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 return false;
624 if (rhs2
625 && TREE_CODE (rhs2) == SSA_NAME
626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 return false;
628 return true;
629 }
630
631 return false;
632 }
633
634
635 /* Return true if STMT is a nop-conversion. */
636
637 static bool
gimple_nop_conversion_p(gimple * stmt)638 gimple_nop_conversion_p (gimple *stmt)
639 {
640 if (gassign *ass = dyn_cast <gassign *> (stmt))
641 {
642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 TREE_TYPE (gimple_assign_rhs1 (ass))))
645 return true;
646 }
647 return false;
648 }
649
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651 operand of the negate operation. Otherwise, return NULL. */
652
653 static tree
get_unary_op(tree name,enum tree_code opcode)654 get_unary_op (tree name, enum tree_code opcode)
655 {
656 gimple *stmt = SSA_NAME_DEF_STMT (name);
657
658 /* Look through nop conversions (sign changes). */
659 if (gimple_nop_conversion_p (stmt)
660 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
662
663 if (!is_gimple_assign (stmt))
664 return NULL_TREE;
665
666 if (gimple_assign_rhs_code (stmt) == opcode)
667 return gimple_assign_rhs1 (stmt);
668 return NULL_TREE;
669 }
670
671 /* Return true if OP1 and OP2 have the same value if casted to either type. */
672
673 static bool
ops_equal_values_p(tree op1,tree op2)674 ops_equal_values_p (tree op1, tree op2)
675 {
676 if (op1 == op2)
677 return true;
678
679 tree orig_op1 = op1;
680 if (TREE_CODE (op1) == SSA_NAME)
681 {
682 gimple *stmt = SSA_NAME_DEF_STMT (op1);
683 if (gimple_nop_conversion_p (stmt))
684 {
685 op1 = gimple_assign_rhs1 (stmt);
686 if (op1 == op2)
687 return true;
688 }
689 }
690
691 if (TREE_CODE (op2) == SSA_NAME)
692 {
693 gimple *stmt = SSA_NAME_DEF_STMT (op2);
694 if (gimple_nop_conversion_p (stmt))
695 {
696 op2 = gimple_assign_rhs1 (stmt);
697 if (op1 == op2
698 || orig_op1 == op2)
699 return true;
700 }
701 }
702
703 return false;
704 }
705
706
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708 eliminate through equivalences, do so, remove them from OPS, and
709 return true. Otherwise, return false. */
710
711 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)712 eliminate_duplicate_pair (enum tree_code opcode,
713 vec<operand_entry *> *ops,
714 bool *all_done,
715 unsigned int i,
716 operand_entry *curr,
717 operand_entry *last)
718 {
719
720 /* If we have two of the same op, and the opcode is & |, min, or max,
721 we can eliminate one of them.
722 If we have two of the same op, and the opcode is ^, we can
723 eliminate both of them. */
724
725 if (last && last->op == curr->op)
726 {
727 switch (opcode)
728 {
729 case MAX_EXPR:
730 case MIN_EXPR:
731 case BIT_IOR_EXPR:
732 case BIT_AND_EXPR:
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 {
735 fprintf (dump_file, "Equivalence: ");
736 print_generic_expr (dump_file, curr->op);
737 fprintf (dump_file, " [&|minmax] ");
738 print_generic_expr (dump_file, last->op);
739 fprintf (dump_file, " -> ");
740 print_generic_stmt (dump_file, last->op);
741 }
742
743 ops->ordered_remove (i);
744 reassociate_stats.ops_eliminated ++;
745
746 return true;
747
748 case BIT_XOR_EXPR:
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 {
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, curr->op);
753 fprintf (dump_file, " ^ ");
754 print_generic_expr (dump_file, last->op);
755 fprintf (dump_file, " -> nothing\n");
756 }
757
758 reassociate_stats.ops_eliminated += 2;
759
760 if (ops->length () == 2)
761 {
762 ops->truncate (0);
763 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 *all_done = true;
765 }
766 else
767 {
768 ops->ordered_remove (i-1);
769 ops->ordered_remove (i-1);
770 }
771
772 return true;
773
774 default:
775 break;
776 }
777 }
778 return false;
779 }
780
781 static vec<tree> plus_negates;
782
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784 expression, look in OPS for a corresponding positive operation to cancel
785 it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
787 return false. */
788
789 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)790 eliminate_plus_minus_pair (enum tree_code opcode,
791 vec<operand_entry *> *ops,
792 unsigned int currindex,
793 operand_entry *curr)
794 {
795 tree negateop;
796 tree notop;
797 unsigned int i;
798 operand_entry *oe;
799
800 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
802
803 negateop = get_unary_op (curr->op, NEGATE_EXPR);
804 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805 if (negateop == NULL_TREE && notop == NULL_TREE)
806 return false;
807
808 /* Any non-negated version will have a rank that is one less than
809 the current rank. So once we hit those ranks, if we don't find
810 one, we can stop. */
811
812 for (i = currindex + 1;
813 ops->iterate (i, &oe)
814 && oe->rank >= curr->rank - 1 ;
815 i++)
816 {
817 if (negateop
818 && ops_equal_values_p (oe->op, negateop))
819 {
820 if (dump_file && (dump_flags & TDF_DETAILS))
821 {
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, negateop);
824 fprintf (dump_file, " + -");
825 print_generic_expr (dump_file, oe->op);
826 fprintf (dump_file, " -> 0\n");
827 }
828
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
833
834 return true;
835 }
836 else if (notop
837 && ops_equal_values_p (oe->op, notop))
838 {
839 tree op_type = TREE_TYPE (oe->op);
840
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 {
843 fprintf (dump_file, "Equivalence: ");
844 print_generic_expr (dump_file, notop);
845 fprintf (dump_file, " + ~");
846 print_generic_expr (dump_file, oe->op);
847 fprintf (dump_file, " -> -1\n");
848 }
849
850 ops->ordered_remove (i);
851 add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 ops->ordered_remove (currindex);
853 reassociate_stats.ops_eliminated ++;
854
855 return true;
856 }
857 }
858
859 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860 save it for later inspection in repropagate_negates(). */
861 if (negateop != NULL_TREE
862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863 plus_negates.safe_push (curr->op);
864
865 return false;
866 }
867
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869 bitwise not expression, look in OPS for a corresponding operand to
870 cancel it out. If we find one, remove the other from OPS, replace
871 OPS[CURRINDEX] with 0, and return true. Otherwise, return
872 false. */
873
874 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)875 eliminate_not_pairs (enum tree_code opcode,
876 vec<operand_entry *> *ops,
877 unsigned int currindex,
878 operand_entry *curr)
879 {
880 tree notop;
881 unsigned int i;
882 operand_entry *oe;
883
884 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885 || TREE_CODE (curr->op) != SSA_NAME)
886 return false;
887
888 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889 if (notop == NULL_TREE)
890 return false;
891
892 /* Any non-not version will have a rank that is one less than
893 the current rank. So once we hit those ranks, if we don't find
894 one, we can stop. */
895
896 for (i = currindex + 1;
897 ops->iterate (i, &oe)
898 && oe->rank >= curr->rank - 1;
899 i++)
900 {
901 if (oe->op == notop)
902 {
903 if (dump_file && (dump_flags & TDF_DETAILS))
904 {
905 fprintf (dump_file, "Equivalence: ");
906 print_generic_expr (dump_file, notop);
907 if (opcode == BIT_AND_EXPR)
908 fprintf (dump_file, " & ~");
909 else if (opcode == BIT_IOR_EXPR)
910 fprintf (dump_file, " | ~");
911 print_generic_expr (dump_file, oe->op);
912 if (opcode == BIT_AND_EXPR)
913 fprintf (dump_file, " -> 0\n");
914 else if (opcode == BIT_IOR_EXPR)
915 fprintf (dump_file, " -> -1\n");
916 }
917
918 if (opcode == BIT_AND_EXPR)
919 oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 else if (opcode == BIT_IOR_EXPR)
921 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
922
923 reassociate_stats.ops_eliminated += ops->length () - 1;
924 ops->truncate (0);
925 ops->quick_push (oe);
926 return true;
927 }
928 }
929
930 return false;
931 }
932
933 /* Use constant value that may be present in OPS to try to eliminate
934 operands. Note that this function is only really used when we've
935 eliminated ops for other reasons, or merged constants. Across
936 single statements, fold already does all of this, plus more. There
937 is little point in duplicating logic, so I've only included the
938 identities that I could ever construct testcases to trigger. */
939
940 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)941 eliminate_using_constants (enum tree_code opcode,
942 vec<operand_entry *> *ops)
943 {
944 operand_entry *oelast = ops->last ();
945 tree type = TREE_TYPE (oelast->op);
946
947 if (oelast->rank == 0
948 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
949 {
950 switch (opcode)
951 {
952 case BIT_AND_EXPR:
953 if (integer_zerop (oelast->op))
954 {
955 if (ops->length () != 1)
956 {
957 if (dump_file && (dump_flags & TDF_DETAILS))
958 fprintf (dump_file, "Found & 0, removing all other ops\n");
959
960 reassociate_stats.ops_eliminated += ops->length () - 1;
961
962 ops->truncate (0);
963 ops->quick_push (oelast);
964 return;
965 }
966 }
967 else if (integer_all_onesp (oelast->op))
968 {
969 if (ops->length () != 1)
970 {
971 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Found & -1, removing\n");
973 ops->pop ();
974 reassociate_stats.ops_eliminated++;
975 }
976 }
977 break;
978 case BIT_IOR_EXPR:
979 if (integer_all_onesp (oelast->op))
980 {
981 if (ops->length () != 1)
982 {
983 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Found | -1, removing all other ops\n");
985
986 reassociate_stats.ops_eliminated += ops->length () - 1;
987
988 ops->truncate (0);
989 ops->quick_push (oelast);
990 return;
991 }
992 }
993 else if (integer_zerop (oelast->op))
994 {
995 if (ops->length () != 1)
996 {
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Found | 0, removing\n");
999 ops->pop ();
1000 reassociate_stats.ops_eliminated++;
1001 }
1002 }
1003 break;
1004 case MULT_EXPR:
1005 if (integer_zerop (oelast->op)
1006 || (FLOAT_TYPE_P (type)
1007 && !HONOR_NANS (type)
1008 && !HONOR_SIGNED_ZEROS (type)
1009 && real_zerop (oelast->op)))
1010 {
1011 if (ops->length () != 1)
1012 {
1013 if (dump_file && (dump_flags & TDF_DETAILS))
1014 fprintf (dump_file, "Found * 0, removing all other ops\n");
1015
1016 reassociate_stats.ops_eliminated += ops->length () - 1;
1017 ops->truncate (0);
1018 ops->quick_push (oelast);
1019 return;
1020 }
1021 }
1022 else if (integer_onep (oelast->op)
1023 || (FLOAT_TYPE_P (type)
1024 && !HONOR_SNANS (type)
1025 && real_onep (oelast->op)))
1026 {
1027 if (ops->length () != 1)
1028 {
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "Found * 1, removing\n");
1031 ops->pop ();
1032 reassociate_stats.ops_eliminated++;
1033 return;
1034 }
1035 }
1036 break;
1037 case BIT_XOR_EXPR:
1038 case PLUS_EXPR:
1039 case MINUS_EXPR:
1040 if (integer_zerop (oelast->op)
1041 || (FLOAT_TYPE_P (type)
1042 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 && fold_real_zero_addition_p (type, oelast->op,
1044 opcode == MINUS_EXPR)))
1045 {
1046 if (ops->length () != 1)
1047 {
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 ops->pop ();
1051 reassociate_stats.ops_eliminated++;
1052 return;
1053 }
1054 }
1055 break;
1056 default:
1057 break;
1058 }
1059 }
1060 }
1061
1062
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 bool, bool);
1065
1066 /* Structure for tracking and counting operands. */
1067 struct oecount {
1068 unsigned int cnt;
1069 unsigned int id;
1070 enum tree_code oecode;
1071 tree op;
1072 };
1073
1074
1075 /* The heap for the oecount hashtable and the sorted list of operands. */
1076 static vec<oecount> cvec;
1077
1078
1079 /* Oecount hashtable helpers. */
1080
1081 struct oecount_hasher : int_hash <int, 0, 1>
1082 {
1083 static inline hashval_t hash (int);
1084 static inline bool equal (int, int);
1085 };
1086
1087 /* Hash function for oecount. */
1088
1089 inline hashval_t
hash(int p)1090 oecount_hasher::hash (int p)
1091 {
1092 const oecount *c = &cvec[p - 42];
1093 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1094 }
1095
1096 /* Comparison function for oecount. */
1097
1098 inline bool
equal(int p1,int p2)1099 oecount_hasher::equal (int p1, int p2)
1100 {
1101 const oecount *c1 = &cvec[p1 - 42];
1102 const oecount *c2 = &cvec[p2 - 42];
1103 return c1->oecode == c2->oecode && c1->op == c2->op;
1104 }
1105
1106 /* Comparison function for qsort sorting oecount elements by count. */
1107
1108 static int
oecount_cmp(const void * p1,const void * p2)1109 oecount_cmp (const void *p1, const void *p2)
1110 {
1111 const oecount *c1 = (const oecount *)p1;
1112 const oecount *c2 = (const oecount *)p2;
1113 if (c1->cnt != c2->cnt)
1114 return c1->cnt > c2->cnt ? 1 : -1;
1115 else
1116 /* If counts are identical, use unique IDs to stabilize qsort. */
1117 return c1->id > c2->id ? 1 : -1;
1118 }
1119
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121 to some exponent. */
1122
1123 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1124 stmt_is_power_of_op (gimple *stmt, tree op)
1125 {
1126 if (!is_gimple_call (stmt))
1127 return false;
1128
1129 switch (gimple_call_combined_fn (stmt))
1130 {
1131 CASE_CFN_POW:
1132 CASE_CFN_POWI:
1133 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1134
1135 default:
1136 return false;
1137 }
1138 }
1139
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141 in place and return the result. Assumes that stmt_is_power_of_op
1142 was previously called for STMT and returned TRUE. */
1143
1144 static HOST_WIDE_INT
decrement_power(gimple * stmt)1145 decrement_power (gimple *stmt)
1146 {
1147 REAL_VALUE_TYPE c, cint;
1148 HOST_WIDE_INT power;
1149 tree arg1;
1150
1151 switch (gimple_call_combined_fn (stmt))
1152 {
1153 CASE_CFN_POW:
1154 arg1 = gimple_call_arg (stmt, 1);
1155 c = TREE_REAL_CST (arg1);
1156 power = real_to_integer (&c) - 1;
1157 real_from_integer (&cint, VOIDmode, power, SIGNED);
1158 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159 return power;
1160
1161 CASE_CFN_POWI:
1162 arg1 = gimple_call_arg (stmt, 1);
1163 power = TREE_INT_CST_LOW (arg1) - 1;
1164 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165 return power;
1166
1167 default:
1168 gcc_unreachable ();
1169 }
1170 }
1171
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173 SSA. Also return the new SSA. */
1174
1175 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1177 {
1178 gimple *use_stmt;
1179 use_operand_p use;
1180 imm_use_iterator iter;
1181 tree new_lhs, new_debug_lhs = NULL_TREE;
1182 tree lhs = gimple_get_lhs (stmt);
1183
1184 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185 gimple_set_lhs (stmt, new_lhs);
1186
1187 /* Also need to update GIMPLE_DEBUGs. */
1188 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1189 {
1190 tree repl = new_lhs;
1191 if (is_gimple_debug (use_stmt))
1192 {
1193 if (new_debug_lhs == NULL_TREE)
1194 {
1195 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 gdebug *def_temp
1197 = gimple_build_debug_bind (new_debug_lhs,
1198 build2 (opcode, TREE_TYPE (lhs),
1199 new_lhs, op),
1200 stmt);
1201 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 gimple_set_uid (def_temp, gimple_uid (stmt));
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1207 }
1208 repl = new_debug_lhs;
1209 }
1210 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 SET_USE (use, repl);
1212 update_stmt (use_stmt);
1213 }
1214 return new_lhs;
1215 }
1216
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218 uses with new SSAs. Also do this for the stmt that defines DEF
1219 if *DEF is not OP. */
1220
1221 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 vec<gimple *> &stmts_to_fix)
1224 {
1225 unsigned i;
1226 gimple *stmt;
1227
1228 if (*def != op
1229 && TREE_CODE (*def) == SSA_NAME
1230 && (stmt = SSA_NAME_DEF_STMT (*def))
1231 && gimple_code (stmt) != GIMPLE_NOP)
1232 *def = make_new_ssa_for_def (stmt, opcode, op);
1233
1234 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235 make_new_ssa_for_def (stmt, opcode, op);
1236 }
1237
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1240 replace *DEF with OP as well. */
1241
1242 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1244 {
1245 tree lhs;
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 gimple_stmt_iterator gsi;
1249
1250 if (is_gimple_call (stmt))
1251 lhs = gimple_call_lhs (stmt);
1252 else
1253 lhs = gimple_assign_lhs (stmt);
1254
1255 gcc_assert (has_single_use (lhs));
1256 single_imm_use (lhs, &use, &use_stmt);
1257 if (lhs == *def)
1258 *def = op;
1259 SET_USE (use, op);
1260 if (TREE_CODE (op) != SSA_NAME)
1261 update_stmt (use_stmt);
1262 gsi = gsi_for_stmt (stmt);
1263 unlink_stmt_vdef (stmt);
1264 reassoc_remove_stmt (&gsi);
1265 release_defs (stmt);
1266 }
1267
1268 /* Walks the linear chain with result *DEF searching for an operation
1269 with operand OP and code OPCODE removing that from the chain. *DEF
1270 is updated if there is only one operand but no operation left. */
1271
1272 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1274 {
1275 tree orig_def = *def;
1276 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277 /* PR72835 - Record the stmt chain that has to be updated such that
1278 we dont use the same LHS when the values computed are different. */
1279 auto_vec<gimple *, 64> stmts_to_fix;
1280
1281 do
1282 {
1283 tree name;
1284
1285 if (opcode == MULT_EXPR)
1286 {
1287 if (stmt_is_power_of_op (stmt, op))
1288 {
1289 if (decrement_power (stmt) == 1)
1290 {
1291 if (stmts_to_fix.length () > 0)
1292 stmts_to_fix.pop ();
1293 propagate_op_to_single_use (op, stmt, def);
1294 }
1295 break;
1296 }
1297 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1298 {
1299 if (gimple_assign_rhs1 (stmt) == op)
1300 {
1301 tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 if (stmts_to_fix.length () > 0)
1303 stmts_to_fix.pop ();
1304 propagate_op_to_single_use (cst, stmt, def);
1305 break;
1306 }
1307 else if (integer_minus_onep (op)
1308 || real_minus_onep (op))
1309 {
1310 gimple_assign_set_rhs_code
1311 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 break;
1313 }
1314 }
1315 }
1316
1317 name = gimple_assign_rhs1 (stmt);
1318
1319 /* If this is the operation we look for and one of the operands
1320 is ours simply propagate the other operand into the stmts
1321 single use. */
1322 if (gimple_assign_rhs_code (stmt) == opcode
1323 && (name == op
1324 || gimple_assign_rhs2 (stmt) == op))
1325 {
1326 if (name == op)
1327 name = gimple_assign_rhs2 (stmt);
1328 if (stmts_to_fix.length () > 0)
1329 stmts_to_fix.pop ();
1330 propagate_op_to_single_use (name, stmt, def);
1331 break;
1332 }
1333
1334 /* We might have a multiply of two __builtin_pow* calls, and
1335 the operand might be hiding in the rightmost one. Likewise
1336 this can happen for a negate. */
1337 if (opcode == MULT_EXPR
1338 && gimple_assign_rhs_code (stmt) == opcode
1339 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 && has_single_use (gimple_assign_rhs2 (stmt)))
1341 {
1342 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 if (stmt_is_power_of_op (stmt2, op))
1344 {
1345 if (decrement_power (stmt2) == 1)
1346 propagate_op_to_single_use (op, stmt2, def);
1347 else
1348 stmts_to_fix.safe_push (stmt2);
1349 break;
1350 }
1351 else if (is_gimple_assign (stmt2)
1352 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1353 {
1354 if (gimple_assign_rhs1 (stmt2) == op)
1355 {
1356 tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 propagate_op_to_single_use (cst, stmt2, def);
1358 break;
1359 }
1360 else if (integer_minus_onep (op)
1361 || real_minus_onep (op))
1362 {
1363 stmts_to_fix.safe_push (stmt2);
1364 gimple_assign_set_rhs_code
1365 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 break;
1367 }
1368 }
1369 }
1370
1371 /* Continue walking the chain. */
1372 gcc_assert (name != op
1373 && TREE_CODE (name) == SSA_NAME);
1374 stmt = SSA_NAME_DEF_STMT (name);
1375 stmts_to_fix.safe_push (stmt);
1376 }
1377 while (1);
1378
1379 if (stmts_to_fix.length () > 0 || *def == orig_def)
1380 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1381 }
1382
1383 /* Returns true if statement S1 dominates statement S2. Like
1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1385
1386 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1388 {
1389 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1390
1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392 SSA_NAME. Assume it lives at the beginning of function and
1393 thus dominates everything. */
1394 if (!bb1 || s1 == s2)
1395 return true;
1396
1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1398 if (!bb2)
1399 return false;
1400
1401 if (bb1 == bb2)
1402 {
1403 /* PHIs in the same basic block are assumed to be
1404 executed all in parallel, if only one stmt is a PHI,
1405 it dominates the other stmt in the same basic block. */
1406 if (gimple_code (s1) == GIMPLE_PHI)
1407 return true;
1408
1409 if (gimple_code (s2) == GIMPLE_PHI)
1410 return false;
1411
1412 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1413
1414 if (gimple_uid (s1) < gimple_uid (s2))
1415 return true;
1416
1417 if (gimple_uid (s1) > gimple_uid (s2))
1418 return false;
1419
1420 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421 unsigned int uid = gimple_uid (s1);
1422 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1423 {
1424 gimple *s = gsi_stmt (gsi);
1425 if (gimple_uid (s) != uid)
1426 break;
1427 if (s == s2)
1428 return true;
1429 }
1430
1431 return false;
1432 }
1433
1434 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1435 }
1436
1437 /* Insert STMT after INSERT_POINT. */
1438
1439 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1441 {
1442 gimple_stmt_iterator gsi;
1443 basic_block bb;
1444
1445 if (gimple_code (insert_point) == GIMPLE_PHI)
1446 bb = gimple_bb (insert_point);
1447 else if (!stmt_ends_bb_p (insert_point))
1448 {
1449 gsi = gsi_for_stmt (insert_point);
1450 gimple_set_uid (stmt, gimple_uid (insert_point));
1451 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452 return;
1453 }
1454 else
1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456 thus if it must end a basic block, it should be a call that can
1457 throw, or some assignment that can throw. If it throws, the LHS
1458 of it will not be initialized though, so only valid places using
1459 the SSA_NAME should be dominated by the fallthru edge. */
1460 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461 gsi = gsi_after_labels (bb);
1462 if (gsi_end_p (gsi))
1463 {
1464 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465 gimple_set_uid (stmt,
1466 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1467 }
1468 else
1469 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1471 }
1472
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474 the result. Places the statement after the definition of either
1475 OP1 or OP2. Returns the new statement. */
1476
1477 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1479 {
1480 gimple *op1def = NULL, *op2def = NULL;
1481 gimple_stmt_iterator gsi;
1482 tree op;
1483 gassign *sum;
1484
1485 /* Create the addition statement. */
1486 op = make_ssa_name (type);
1487 sum = gimple_build_assign (op, opcode, op1, op2);
1488
1489 /* Find an insertion place and insert. */
1490 if (TREE_CODE (op1) == SSA_NAME)
1491 op1def = SSA_NAME_DEF_STMT (op1);
1492 if (TREE_CODE (op2) == SSA_NAME)
1493 op2def = SSA_NAME_DEF_STMT (op2);
1494 if ((!op1def || gimple_nop_p (op1def))
1495 && (!op2def || gimple_nop_p (op2def)))
1496 {
1497 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498 if (!gsi_end_p (gsi)
1499 && is_gimple_call (gsi_stmt (gsi))
1500 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1501 {
1502 /* Don't add statements before a returns_twice call at the start
1503 of a function. */
1504 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1505 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1506 }
1507 if (gsi_end_p (gsi))
1508 {
1509 gimple_stmt_iterator gsi2
1510 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1511 gimple_set_uid (sum,
1512 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1513 }
1514 else
1515 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1516 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1517 }
1518 else
1519 {
1520 gimple *insert_point;
1521 if ((!op1def || gimple_nop_p (op1def))
1522 || (op2def && !gimple_nop_p (op2def)
1523 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1524 insert_point = op2def;
1525 else
1526 insert_point = op1def;
1527 insert_stmt_after (sum, insert_point);
1528 }
1529 update_stmt (sum);
1530
1531 return sum;
1532 }
1533
1534 /* Perform un-distribution of divisions and multiplications.
1535 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1536 to (A + B) / X for real X.
1537
1538 The algorithm is organized as follows.
1539
1540 - First we walk the addition chain *OPS looking for summands that
1541 are defined by a multiplication or a real division. This results
1542 in the candidates bitmap with relevant indices into *OPS.
1543
1544 - Second we build the chains of multiplications or divisions for
1545 these candidates, counting the number of occurrences of (operand, code)
1546 pairs in all of the candidates chains.
1547
1548 - Third we sort the (operand, code) pairs by number of occurrence and
1549 process them starting with the pair with the most uses.
1550
1551 * For each such pair we walk the candidates again to build a
1552 second candidate bitmap noting all multiplication/division chains
1553 that have at least one occurrence of (operand, code).
1554
1555 * We build an alternate addition chain only covering these
1556 candidates with one (operand, code) operation removed from their
1557 multiplication/division chain.
1558
1559 * The first candidate gets replaced by the alternate addition chain
1560 multiplied/divided by the operand.
1561
1562 * All candidate chains get disabled for further processing and
1563 processing of (operand, code) pairs continues.
1564
1565 The alternate addition chains built are re-processed by the main
1566 reassociation algorithm which allows optimizing a * x * y + b * y * x
1567 to (a + b ) * x * y in one invocation of the reassociation pass. */
1568
1569 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1570 undistribute_ops_list (enum tree_code opcode,
1571 vec<operand_entry *> *ops, class loop *loop)
1572 {
1573 unsigned int length = ops->length ();
1574 operand_entry *oe1;
1575 unsigned i, j;
1576 unsigned nr_candidates, nr_candidates2;
1577 sbitmap_iterator sbi0;
1578 vec<operand_entry *> *subops;
1579 bool changed = false;
1580 unsigned int next_oecount_id = 0;
1581
1582 if (length <= 1
1583 || opcode != PLUS_EXPR)
1584 return false;
1585
1586 /* Build a list of candidates to process. */
1587 auto_sbitmap candidates (length);
1588 bitmap_clear (candidates);
1589 nr_candidates = 0;
1590 FOR_EACH_VEC_ELT (*ops, i, oe1)
1591 {
1592 enum tree_code dcode;
1593 gimple *oe1def;
1594
1595 if (TREE_CODE (oe1->op) != SSA_NAME)
1596 continue;
1597 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1598 if (!is_gimple_assign (oe1def))
1599 continue;
1600 dcode = gimple_assign_rhs_code (oe1def);
1601 if ((dcode != MULT_EXPR
1602 && dcode != RDIV_EXPR)
1603 || !is_reassociable_op (oe1def, dcode, loop))
1604 continue;
1605
1606 bitmap_set_bit (candidates, i);
1607 nr_candidates++;
1608 }
1609
1610 if (nr_candidates < 2)
1611 return false;
1612
1613 if (dump_file && (dump_flags & TDF_DETAILS))
1614 {
1615 fprintf (dump_file, "searching for un-distribute opportunities ");
1616 print_generic_expr (dump_file,
1617 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1618 fprintf (dump_file, " %d\n", nr_candidates);
1619 }
1620
1621 /* Build linearized sub-operand lists and the counting table. */
1622 cvec.create (0);
1623
1624 hash_table<oecount_hasher> ctable (15);
1625
1626 /* ??? Macro arguments cannot have multi-argument template types in
1627 them. This typedef is needed to workaround that limitation. */
1628 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1629 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1630 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1631 {
1632 gimple *oedef;
1633 enum tree_code oecode;
1634 unsigned j;
1635
1636 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1637 oecode = gimple_assign_rhs_code (oedef);
1638 linearize_expr_tree (&subops[i], oedef,
1639 associative_tree_code (oecode), false);
1640
1641 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1642 {
1643 oecount c;
1644 int *slot;
1645 int idx;
1646 c.oecode = oecode;
1647 c.cnt = 1;
1648 c.id = next_oecount_id++;
1649 c.op = oe1->op;
1650 cvec.safe_push (c);
1651 idx = cvec.length () + 41;
1652 slot = ctable.find_slot (idx, INSERT);
1653 if (!*slot)
1654 {
1655 *slot = idx;
1656 }
1657 else
1658 {
1659 cvec.pop ();
1660 cvec[*slot - 42].cnt++;
1661 }
1662 }
1663 }
1664
1665 /* Sort the counting table. */
1666 cvec.qsort (oecount_cmp);
1667
1668 if (dump_file && (dump_flags & TDF_DETAILS))
1669 {
1670 oecount *c;
1671 fprintf (dump_file, "Candidates:\n");
1672 FOR_EACH_VEC_ELT (cvec, j, c)
1673 {
1674 fprintf (dump_file, " %u %s: ", c->cnt,
1675 c->oecode == MULT_EXPR
1676 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1677 print_generic_expr (dump_file, c->op);
1678 fprintf (dump_file, "\n");
1679 }
1680 }
1681
1682 /* Process the (operand, code) pairs in order of most occurrence. */
1683 auto_sbitmap candidates2 (length);
1684 while (!cvec.is_empty ())
1685 {
1686 oecount *c = &cvec.last ();
1687 if (c->cnt < 2)
1688 break;
1689
1690 /* Now collect the operands in the outer chain that contain
1691 the common operand in their inner chain. */
1692 bitmap_clear (candidates2);
1693 nr_candidates2 = 0;
1694 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1695 {
1696 gimple *oedef;
1697 enum tree_code oecode;
1698 unsigned j;
1699 tree op = (*ops)[i]->op;
1700
1701 /* If we undistributed in this chain already this may be
1702 a constant. */
1703 if (TREE_CODE (op) != SSA_NAME)
1704 continue;
1705
1706 oedef = SSA_NAME_DEF_STMT (op);
1707 oecode = gimple_assign_rhs_code (oedef);
1708 if (oecode != c->oecode)
1709 continue;
1710
1711 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1712 {
1713 if (oe1->op == c->op)
1714 {
1715 bitmap_set_bit (candidates2, i);
1716 ++nr_candidates2;
1717 break;
1718 }
1719 }
1720 }
1721
1722 if (nr_candidates2 >= 2)
1723 {
1724 operand_entry *oe1, *oe2;
1725 gimple *prod;
1726 int first = bitmap_first_set_bit (candidates2);
1727
1728 /* Build the new addition chain. */
1729 oe1 = (*ops)[first];
1730 if (dump_file && (dump_flags & TDF_DETAILS))
1731 {
1732 fprintf (dump_file, "Building (");
1733 print_generic_expr (dump_file, oe1->op);
1734 }
1735 zero_one_operation (&oe1->op, c->oecode, c->op);
1736 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1737 {
1738 gimple *sum;
1739 oe2 = (*ops)[i];
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 {
1742 fprintf (dump_file, " + ");
1743 print_generic_expr (dump_file, oe2->op);
1744 }
1745 zero_one_operation (&oe2->op, c->oecode, c->op);
1746 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, oe2->op, opcode);
1748 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1749 oe2->rank = 0;
1750 oe1->op = gimple_get_lhs (sum);
1751 }
1752
1753 /* Apply the multiplication/division. */
1754 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1755 oe1->op, c->op, c->oecode);
1756 if (dump_file && (dump_flags & TDF_DETAILS))
1757 {
1758 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1759 print_generic_expr (dump_file, c->op);
1760 fprintf (dump_file, "\n");
1761 }
1762
1763 /* Record it in the addition chain and disable further
1764 undistribution with this op. */
1765 oe1->op = gimple_assign_lhs (prod);
1766 oe1->rank = get_rank (oe1->op);
1767 subops[first].release ();
1768
1769 changed = true;
1770 }
1771
1772 cvec.pop ();
1773 }
1774
1775 for (i = 0; i < ops->length (); ++i)
1776 subops[i].release ();
1777 free (subops);
1778 cvec.release ();
1779
1780 return changed;
1781 }
1782
1783 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1784 first: element index for each relevant BIT_FIELD_REF.
1785 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1786 typedef std::pair<unsigned, unsigned> v_info_elem;
1787 struct v_info {
1788 tree vec_type;
1789 auto_vec<v_info_elem, 32> vec;
1790 };
1791 typedef v_info *v_info_ptr;
1792
1793 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1794 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1795 sort_by_mach_mode (const void *p_i, const void *p_j)
1796 {
1797 const tree tr1 = *((const tree *) p_i);
1798 const tree tr2 = *((const tree *) p_j);
1799 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1800 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1801 if (mode1 > mode2)
1802 return 1;
1803 else if (mode1 < mode2)
1804 return -1;
1805 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1806 return -1;
1807 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1808 return 1;
1809 return 0;
1810 }
1811
1812 /* Cleanup hash map for VECTOR information. */
1813 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1814 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1815 {
1816 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1817 it != info_map.end (); ++it)
1818 {
1819 v_info_ptr info = (*it).second;
1820 delete info;
1821 (*it).second = NULL;
1822 }
1823 }
1824
1825 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1826 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1827 is transformed to
1828 Vs = (V1 + V2 + ... + Vn)
1829 Vs[0] + Vs[1] + ... + Vs[k]
1830
1831 The basic steps are listed below:
1832
1833 1) Check the addition chain *OPS by looking those summands coming from
1834 VECTOR bit_field_ref on VECTOR type. Put the information into
1835 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1836
1837 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1838 continuous, they can cover the whole VECTOR perfectly without any holes.
1839 Obtain one VECTOR list which contain candidates to be transformed.
1840
1841 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1842 candidates with same mode, build the addition statements for them and
1843 generate BIT_FIELD_REFs accordingly.
1844
1845 TODO:
1846 The current implementation requires the whole VECTORs should be fully
1847 covered, but it can be extended to support partial, checking adjacent
1848 but not fill the whole, it may need some cost model to define the
1849 boundary to do or not.
1850 */
1851 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1852 undistribute_bitref_for_vector (enum tree_code opcode,
1853 vec<operand_entry *> *ops, struct loop *loop)
1854 {
1855 if (ops->length () <= 1)
1856 return false;
1857
1858 if (opcode != PLUS_EXPR
1859 && opcode != MULT_EXPR
1860 && opcode != BIT_XOR_EXPR
1861 && opcode != BIT_IOR_EXPR
1862 && opcode != BIT_AND_EXPR)
1863 return false;
1864
1865 hash_map<tree, v_info_ptr> v_info_map;
1866 operand_entry *oe1;
1867 unsigned i;
1868
1869 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1870 information into map. */
1871 FOR_EACH_VEC_ELT (*ops, i, oe1)
1872 {
1873 enum tree_code dcode;
1874 gimple *oe1def;
1875
1876 if (TREE_CODE (oe1->op) != SSA_NAME)
1877 continue;
1878 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1879 if (!is_gimple_assign (oe1def))
1880 continue;
1881 dcode = gimple_assign_rhs_code (oe1def);
1882 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1883 continue;
1884
1885 tree rhs = gimple_assign_rhs1 (oe1def);
1886 tree vec = TREE_OPERAND (rhs, 0);
1887 tree vec_type = TREE_TYPE (vec);
1888
1889 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1890 continue;
1891
1892 /* Ignore it if target machine can't support this VECTOR type. */
1893 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1894 continue;
1895
1896 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1897 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1898 continue;
1899
1900 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1901 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1902 continue;
1903
1904 /* The type of BIT_FIELD_REF might not be equal to the element type of
1905 the vector. We want to use a vector type with element type the
1906 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1907 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1908 {
1909 machine_mode simd_mode;
1910 unsigned HOST_WIDE_INT size, nunits;
1911 unsigned HOST_WIDE_INT elem_size
1912 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1913 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1914 continue;
1915 if (size <= elem_size || (size % elem_size) != 0)
1916 continue;
1917 nunits = size / elem_size;
1918 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1919 nunits).exists (&simd_mode))
1920 continue;
1921 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1922
1923 /* Ignore it if target machine can't support this VECTOR type. */
1924 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1925 continue;
1926
1927 /* Check const vector type, constrain BIT_FIELD_REF offset and
1928 size. */
1929 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1930 continue;
1931
1932 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1933 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1934 continue;
1935 }
1936
1937 tree elem_type = TREE_TYPE (vec_type);
1938 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1939 if (maybe_ne (bit_field_size (rhs), elem_size))
1940 continue;
1941
1942 unsigned idx;
1943 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1944 continue;
1945
1946 /* Ignore it if target machine can't support this type of VECTOR
1947 operation. */
1948 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1949 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1950 continue;
1951
1952 bool existed;
1953 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1954 if (!existed)
1955 {
1956 info = new v_info;
1957 info->vec_type = vec_type;
1958 }
1959 else if (!types_compatible_p (vec_type, info->vec_type))
1960 continue;
1961 info->vec.safe_push (std::make_pair (idx, i));
1962 }
1963
1964 /* At least two VECTOR to combine. */
1965 if (v_info_map.elements () <= 1)
1966 {
1967 cleanup_vinfo_map (v_info_map);
1968 return false;
1969 }
1970
1971 /* Verify all VECTOR candidates by checking two conditions:
1972 1) sorted offsets are adjacent, no holes.
1973 2) can fill the whole VECTOR perfectly.
1974 And add the valid candidates to a vector for further handling. */
1975 auto_vec<tree> valid_vecs (v_info_map.elements ());
1976 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1977 it != v_info_map.end (); ++it)
1978 {
1979 tree cand_vec = (*it).first;
1980 v_info_ptr cand_info = (*it).second;
1981 unsigned int num_elems
1982 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1983 if (cand_info->vec.length () != num_elems)
1984 continue;
1985 sbitmap holes = sbitmap_alloc (num_elems);
1986 bitmap_ones (holes);
1987 bool valid = true;
1988 v_info_elem *curr;
1989 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1990 {
1991 if (!bitmap_bit_p (holes, curr->first))
1992 {
1993 valid = false;
1994 break;
1995 }
1996 else
1997 bitmap_clear_bit (holes, curr->first);
1998 }
1999 if (valid && bitmap_empty_p (holes))
2000 valid_vecs.quick_push (cand_vec);
2001 sbitmap_free (holes);
2002 }
2003
2004 /* At least two VECTOR to combine. */
2005 if (valid_vecs.length () <= 1)
2006 {
2007 cleanup_vinfo_map (v_info_map);
2008 return false;
2009 }
2010
2011 valid_vecs.qsort (sort_by_mach_mode);
2012 /* Go through all candidates by machine mode order, query the mode_to_total
2013 to get the total number for each mode and skip the single one. */
2014 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2015 {
2016 tree tvec = valid_vecs[i];
2017 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2018
2019 /* Skip modes with only a single candidate. */
2020 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2021 continue;
2022
2023 unsigned int idx, j;
2024 gimple *sum = NULL;
2025 tree sum_vec = tvec;
2026 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2027 v_info_elem *elem;
2028 tree vec_type = info_ptr->vec_type;
2029
2030 /* Build the sum for all candidates with same mode. */
2031 do
2032 {
2033 sum = build_and_add_sum (vec_type, sum_vec,
2034 valid_vecs[i + 1], opcode);
2035 if (!useless_type_conversion_p (vec_type,
2036 TREE_TYPE (valid_vecs[i + 1])))
2037 {
2038 /* Update the operands only after build_and_add_sum,
2039 so that we don't have to repeat the placement algorithm
2040 of build_and_add_sum. */
2041 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2042 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2043 valid_vecs[i + 1]);
2044 tree lhs = make_ssa_name (vec_type);
2045 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2046 gimple_set_uid (g, gimple_uid (sum));
2047 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2048 gimple_assign_set_rhs2 (sum, lhs);
2049 if (sum_vec == tvec)
2050 {
2051 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2052 lhs = make_ssa_name (vec_type);
2053 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2054 gimple_set_uid (g, gimple_uid (sum));
2055 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2056 gimple_assign_set_rhs1 (sum, lhs);
2057 }
2058 update_stmt (sum);
2059 }
2060 sum_vec = gimple_get_lhs (sum);
2061 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2062 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2063 /* Update those related ops of current candidate VECTOR. */
2064 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2065 {
2066 idx = elem->second;
2067 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2068 /* Set this then op definition will get DCEd later. */
2069 gimple_set_visited (def, true);
2070 if (opcode == PLUS_EXPR
2071 || opcode == BIT_XOR_EXPR
2072 || opcode == BIT_IOR_EXPR)
2073 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2074 else if (opcode == MULT_EXPR)
2075 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2076 else
2077 {
2078 gcc_assert (opcode == BIT_AND_EXPR);
2079 (*ops)[idx]->op
2080 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2081 }
2082 (*ops)[idx]->rank = 0;
2083 }
2084 if (dump_file && (dump_flags & TDF_DETAILS))
2085 {
2086 fprintf (dump_file, "Generating addition -> ");
2087 print_gimple_stmt (dump_file, sum, 0);
2088 }
2089 i++;
2090 }
2091 while ((i < valid_vecs.length () - 1)
2092 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2093
2094 /* Referring to first valid VECTOR with this mode, generate the
2095 BIT_FIELD_REF statements accordingly. */
2096 info_ptr = *(v_info_map.get (tvec));
2097 gcc_assert (sum);
2098 tree elem_type = TREE_TYPE (vec_type);
2099 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2100 {
2101 idx = elem->second;
2102 tree dst = make_ssa_name (elem_type);
2103 tree pos = bitsize_int (elem->first
2104 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2105 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2106 TYPE_SIZE (elem_type), pos);
2107 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2108 insert_stmt_after (gs, sum);
2109 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2110 /* Set this then op definition will get DCEd later. */
2111 gimple_set_visited (def, true);
2112 (*ops)[idx]->op = gimple_assign_lhs (gs);
2113 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2114 if (dump_file && (dump_flags & TDF_DETAILS))
2115 {
2116 fprintf (dump_file, "Generating bit_field_ref -> ");
2117 print_gimple_stmt (dump_file, gs, 0);
2118 }
2119 }
2120 }
2121
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2123 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2124
2125 cleanup_vinfo_map (v_info_map);
2126
2127 return true;
2128 }
2129
2130 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2131 expression, examine the other OPS to see if any of them are comparisons
2132 of the same values, which we may be able to combine or eliminate.
2133 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2134
2135 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2136 eliminate_redundant_comparison (enum tree_code opcode,
2137 vec<operand_entry *> *ops,
2138 unsigned int currindex,
2139 operand_entry *curr)
2140 {
2141 tree op1, op2;
2142 enum tree_code lcode, rcode;
2143 gimple *def1, *def2;
2144 int i;
2145 operand_entry *oe;
2146
2147 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2148 return false;
2149
2150 /* Check that CURR is a comparison. */
2151 if (TREE_CODE (curr->op) != SSA_NAME)
2152 return false;
2153 def1 = SSA_NAME_DEF_STMT (curr->op);
2154 if (!is_gimple_assign (def1))
2155 return false;
2156 lcode = gimple_assign_rhs_code (def1);
2157 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2158 return false;
2159 op1 = gimple_assign_rhs1 (def1);
2160 op2 = gimple_assign_rhs2 (def1);
2161
2162 /* Now look for a similar comparison in the remaining OPS. */
2163 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2164 {
2165 tree t;
2166
2167 if (TREE_CODE (oe->op) != SSA_NAME)
2168 continue;
2169 def2 = SSA_NAME_DEF_STMT (oe->op);
2170 if (!is_gimple_assign (def2))
2171 continue;
2172 rcode = gimple_assign_rhs_code (def2);
2173 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2174 continue;
2175
2176 /* If we got here, we have a match. See if we can combine the
2177 two comparisons. */
2178 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2179 if (opcode == BIT_IOR_EXPR)
2180 t = maybe_fold_or_comparisons (type,
2181 lcode, op1, op2,
2182 rcode, gimple_assign_rhs1 (def2),
2183 gimple_assign_rhs2 (def2));
2184 else
2185 t = maybe_fold_and_comparisons (type,
2186 lcode, op1, op2,
2187 rcode, gimple_assign_rhs1 (def2),
2188 gimple_assign_rhs2 (def2));
2189 if (!t)
2190 continue;
2191
2192 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2193 always give us a boolean_type_node value back. If the original
2194 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2195 we need to convert. */
2196 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2197 t = fold_convert (TREE_TYPE (curr->op), t);
2198
2199 if (TREE_CODE (t) != INTEGER_CST
2200 && !operand_equal_p (t, curr->op, 0))
2201 {
2202 enum tree_code subcode;
2203 tree newop1, newop2;
2204 if (!COMPARISON_CLASS_P (t))
2205 continue;
2206 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2207 STRIP_USELESS_TYPE_CONVERSION (newop1);
2208 STRIP_USELESS_TYPE_CONVERSION (newop2);
2209 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2210 continue;
2211 }
2212
2213 if (dump_file && (dump_flags & TDF_DETAILS))
2214 {
2215 fprintf (dump_file, "Equivalence: ");
2216 print_generic_expr (dump_file, curr->op);
2217 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2218 print_generic_expr (dump_file, oe->op);
2219 fprintf (dump_file, " -> ");
2220 print_generic_expr (dump_file, t);
2221 fprintf (dump_file, "\n");
2222 }
2223
2224 /* Now we can delete oe, as it has been subsumed by the new combined
2225 expression t. */
2226 ops->ordered_remove (i);
2227 reassociate_stats.ops_eliminated ++;
2228
2229 /* If t is the same as curr->op, we're done. Otherwise we must
2230 replace curr->op with t. Special case is if we got a constant
2231 back, in which case we add it to the end instead of in place of
2232 the current entry. */
2233 if (TREE_CODE (t) == INTEGER_CST)
2234 {
2235 ops->ordered_remove (currindex);
2236 add_to_ops_vec (ops, t);
2237 }
2238 else if (!operand_equal_p (t, curr->op, 0))
2239 {
2240 gimple *sum;
2241 enum tree_code subcode;
2242 tree newop1;
2243 tree newop2;
2244 gcc_assert (COMPARISON_CLASS_P (t));
2245 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2246 STRIP_USELESS_TYPE_CONVERSION (newop1);
2247 STRIP_USELESS_TYPE_CONVERSION (newop2);
2248 gcc_checking_assert (is_gimple_val (newop1)
2249 && is_gimple_val (newop2));
2250 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2251 curr->op = gimple_get_lhs (sum);
2252 }
2253 return true;
2254 }
2255
2256 return false;
2257 }
2258
2259
2260 /* Transform repeated addition of same values into multiply with
2261 constant. */
2262 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2263 transform_add_to_multiply (vec<operand_entry *> *ops)
2264 {
2265 operand_entry *oe;
2266 tree op = NULL_TREE;
2267 int j;
2268 int i, start = -1, end = 0, count = 0;
2269 auto_vec<std::pair <int, int> > indxs;
2270 bool changed = false;
2271
2272 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2273 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2274 || !flag_unsafe_math_optimizations))
2275 return false;
2276
2277 /* Look for repeated operands. */
2278 FOR_EACH_VEC_ELT (*ops, i, oe)
2279 {
2280 if (start == -1)
2281 {
2282 count = 1;
2283 op = oe->op;
2284 start = i;
2285 }
2286 else if (operand_equal_p (oe->op, op, 0))
2287 {
2288 count++;
2289 end = i;
2290 }
2291 else
2292 {
2293 if (count > 1)
2294 indxs.safe_push (std::make_pair (start, end));
2295 count = 1;
2296 op = oe->op;
2297 start = i;
2298 }
2299 }
2300
2301 if (count > 1)
2302 indxs.safe_push (std::make_pair (start, end));
2303
2304 for (j = indxs.length () - 1; j >= 0; --j)
2305 {
2306 /* Convert repeated operand addition to multiplication. */
2307 start = indxs[j].first;
2308 end = indxs[j].second;
2309 op = (*ops)[start]->op;
2310 count = end - start + 1;
2311 for (i = end; i >= start; --i)
2312 ops->unordered_remove (i);
2313 tree tmp = make_ssa_name (TREE_TYPE (op));
2314 tree cst = build_int_cst (integer_type_node, count);
2315 gassign *mul_stmt
2316 = gimple_build_assign (tmp, MULT_EXPR,
2317 op, fold_convert (TREE_TYPE (op), cst));
2318 gimple_set_visited (mul_stmt, true);
2319 add_to_ops_vec (ops, tmp, mul_stmt);
2320 changed = true;
2321 }
2322
2323 return changed;
2324 }
2325
2326
2327 /* Perform various identities and other optimizations on the list of
2328 operand entries, stored in OPS. The tree code for the binary
2329 operation between all the operands is OPCODE. */
2330
2331 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2332 optimize_ops_list (enum tree_code opcode,
2333 vec<operand_entry *> *ops)
2334 {
2335 unsigned int length = ops->length ();
2336 unsigned int i;
2337 operand_entry *oe;
2338 operand_entry *oelast = NULL;
2339 bool iterate = false;
2340
2341 if (length == 1)
2342 return;
2343
2344 oelast = ops->last ();
2345
2346 /* If the last two are constants, pop the constants off, merge them
2347 and try the next two. */
2348 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2349 {
2350 operand_entry *oelm1 = (*ops)[length - 2];
2351
2352 if (oelm1->rank == 0
2353 && is_gimple_min_invariant (oelm1->op)
2354 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2355 TREE_TYPE (oelast->op)))
2356 {
2357 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2358 oelm1->op, oelast->op);
2359
2360 if (folded && is_gimple_min_invariant (folded))
2361 {
2362 if (dump_file && (dump_flags & TDF_DETAILS))
2363 fprintf (dump_file, "Merging constants\n");
2364
2365 ops->pop ();
2366 ops->pop ();
2367
2368 add_to_ops_vec (ops, folded);
2369 reassociate_stats.constants_eliminated++;
2370
2371 optimize_ops_list (opcode, ops);
2372 return;
2373 }
2374 }
2375 }
2376
2377 eliminate_using_constants (opcode, ops);
2378 oelast = NULL;
2379
2380 for (i = 0; ops->iterate (i, &oe);)
2381 {
2382 bool done = false;
2383
2384 if (eliminate_not_pairs (opcode, ops, i, oe))
2385 return;
2386 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2387 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2388 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2389 {
2390 if (done)
2391 return;
2392 iterate = true;
2393 oelast = NULL;
2394 continue;
2395 }
2396 oelast = oe;
2397 i++;
2398 }
2399
2400 if (iterate)
2401 optimize_ops_list (opcode, ops);
2402 }
2403
2404 /* The following functions are subroutines to optimize_range_tests and allow
2405 it to try to change a logical combination of comparisons into a range
2406 test.
2407
2408 For example, both
2409 X == 2 || X == 5 || X == 3 || X == 4
2410 and
2411 X >= 2 && X <= 5
2412 are converted to
2413 (unsigned) (X - 2) <= 3
2414
2415 For more information see comments above fold_test_range in fold-const.c,
2416 this implementation is for GIMPLE. */
2417
2418 struct range_entry
2419 {
2420 tree exp;
2421 tree low;
2422 tree high;
2423 bool in_p;
2424 bool strict_overflow_p;
2425 unsigned int idx, next;
2426 };
2427
2428 /* This is similar to make_range in fold-const.c, but on top of
2429 GIMPLE instead of trees. If EXP is non-NULL, it should be
2430 an SSA_NAME and STMT argument is ignored, otherwise STMT
2431 argument should be a GIMPLE_COND. */
2432
2433 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2434 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2435 {
2436 int in_p;
2437 tree low, high;
2438 bool is_bool, strict_overflow_p;
2439
2440 r->exp = NULL_TREE;
2441 r->in_p = false;
2442 r->strict_overflow_p = false;
2443 r->low = NULL_TREE;
2444 r->high = NULL_TREE;
2445 if (exp != NULL_TREE
2446 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2447 return;
2448
2449 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2450 and see if we can refine the range. Some of the cases below may not
2451 happen, but it doesn't seem worth worrying about this. We "continue"
2452 the outer loop when we've changed something; otherwise we "break"
2453 the switch, which will "break" the while. */
2454 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2455 high = low;
2456 in_p = 0;
2457 strict_overflow_p = false;
2458 is_bool = false;
2459 if (exp == NULL_TREE)
2460 is_bool = true;
2461 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2462 {
2463 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2464 is_bool = true;
2465 else
2466 return;
2467 }
2468 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2469 is_bool = true;
2470
2471 while (1)
2472 {
2473 enum tree_code code;
2474 tree arg0, arg1, exp_type;
2475 tree nexp;
2476 location_t loc;
2477
2478 if (exp != NULL_TREE)
2479 {
2480 if (TREE_CODE (exp) != SSA_NAME
2481 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2482 break;
2483
2484 stmt = SSA_NAME_DEF_STMT (exp);
2485 if (!is_gimple_assign (stmt))
2486 break;
2487
2488 code = gimple_assign_rhs_code (stmt);
2489 arg0 = gimple_assign_rhs1 (stmt);
2490 arg1 = gimple_assign_rhs2 (stmt);
2491 exp_type = TREE_TYPE (exp);
2492 }
2493 else
2494 {
2495 code = gimple_cond_code (stmt);
2496 arg0 = gimple_cond_lhs (stmt);
2497 arg1 = gimple_cond_rhs (stmt);
2498 exp_type = boolean_type_node;
2499 }
2500
2501 if (TREE_CODE (arg0) != SSA_NAME
2502 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2503 break;
2504 loc = gimple_location (stmt);
2505 switch (code)
2506 {
2507 case BIT_NOT_EXPR:
2508 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2509 /* Ensure the range is either +[-,0], +[0,0],
2510 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2511 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2512 or similar expression of unconditional true or
2513 false, it should not be negated. */
2514 && ((high && integer_zerop (high))
2515 || (low && integer_onep (low))))
2516 {
2517 in_p = !in_p;
2518 exp = arg0;
2519 continue;
2520 }
2521 break;
2522 case SSA_NAME:
2523 exp = arg0;
2524 continue;
2525 CASE_CONVERT:
2526 if (is_bool)
2527 {
2528 if ((TYPE_PRECISION (exp_type) == 1
2529 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2530 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2531 return;
2532 }
2533 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2534 {
2535 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2536 is_bool = true;
2537 else
2538 return;
2539 }
2540 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2541 is_bool = true;
2542 goto do_default;
2543 case EQ_EXPR:
2544 case NE_EXPR:
2545 case LT_EXPR:
2546 case LE_EXPR:
2547 case GE_EXPR:
2548 case GT_EXPR:
2549 is_bool = true;
2550 /* FALLTHRU */
2551 default:
2552 if (!is_bool)
2553 return;
2554 do_default:
2555 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2556 &low, &high, &in_p,
2557 &strict_overflow_p);
2558 if (nexp != NULL_TREE)
2559 {
2560 exp = nexp;
2561 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2562 continue;
2563 }
2564 break;
2565 }
2566 break;
2567 }
2568 if (is_bool)
2569 {
2570 r->exp = exp;
2571 r->in_p = in_p;
2572 r->low = low;
2573 r->high = high;
2574 r->strict_overflow_p = strict_overflow_p;
2575 }
2576 }
2577
2578 /* Comparison function for qsort. Sort entries
2579 without SSA_NAME exp first, then with SSA_NAMEs sorted
2580 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2581 by increasing ->low and if ->low is the same, by increasing
2582 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2583 maximum. */
2584
2585 static int
range_entry_cmp(const void * a,const void * b)2586 range_entry_cmp (const void *a, const void *b)
2587 {
2588 const struct range_entry *p = (const struct range_entry *) a;
2589 const struct range_entry *q = (const struct range_entry *) b;
2590
2591 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2592 {
2593 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2594 {
2595 /* Group range_entries for the same SSA_NAME together. */
2596 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2597 return -1;
2598 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2599 return 1;
2600 /* If ->low is different, NULL low goes first, then by
2601 ascending low. */
2602 if (p->low != NULL_TREE)
2603 {
2604 if (q->low != NULL_TREE)
2605 {
2606 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2607 p->low, q->low);
2608 if (tem && integer_onep (tem))
2609 return -1;
2610 tem = fold_binary (GT_EXPR, boolean_type_node,
2611 p->low, q->low);
2612 if (tem && integer_onep (tem))
2613 return 1;
2614 }
2615 else
2616 return 1;
2617 }
2618 else if (q->low != NULL_TREE)
2619 return -1;
2620 /* If ->high is different, NULL high goes last, before that by
2621 ascending high. */
2622 if (p->high != NULL_TREE)
2623 {
2624 if (q->high != NULL_TREE)
2625 {
2626 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2627 p->high, q->high);
2628 if (tem && integer_onep (tem))
2629 return -1;
2630 tem = fold_binary (GT_EXPR, boolean_type_node,
2631 p->high, q->high);
2632 if (tem && integer_onep (tem))
2633 return 1;
2634 }
2635 else
2636 return -1;
2637 }
2638 else if (q->high != NULL_TREE)
2639 return 1;
2640 /* If both ranges are the same, sort below by ascending idx. */
2641 }
2642 else
2643 return 1;
2644 }
2645 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2646 return -1;
2647
2648 if (p->idx < q->idx)
2649 return -1;
2650 else
2651 {
2652 gcc_checking_assert (p->idx > q->idx);
2653 return 1;
2654 }
2655 }
2656
2657 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2658 insert needed statements BEFORE or after GSI. */
2659
2660 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2661 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2662 {
2663 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2664 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2665 if (TREE_CODE (ret) != SSA_NAME)
2666 {
2667 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2668 if (before)
2669 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2670 else
2671 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2672 ret = gimple_assign_lhs (g);
2673 }
2674 return ret;
2675 }
2676
2677 /* Helper routine of optimize_range_test.
2678 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2679 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2680 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2681 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2682 an array of COUNT pointers to other ranges. Return
2683 true if the range merge has been successful.
2684 If OPCODE is ERROR_MARK, this is called from within
2685 maybe_optimize_range_tests and is performing inter-bb range optimization.
2686 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2687 oe->rank. */
2688
2689 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2690 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2691 struct range_entry **otherrangep,
2692 unsigned int count, enum tree_code opcode,
2693 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2694 bool in_p, tree low, tree high, bool strict_overflow_p)
2695 {
2696 operand_entry *oe = (*ops)[range->idx];
2697 tree op = oe->op;
2698 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2699 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2700 location_t loc = gimple_location (stmt);
2701 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2702 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2703 in_p, low, high);
2704 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2705 gimple_stmt_iterator gsi;
2706 unsigned int i, uid;
2707
2708 if (tem == NULL_TREE)
2709 return false;
2710
2711 /* If op is default def SSA_NAME, there is no place to insert the
2712 new comparison. Give up, unless we can use OP itself as the
2713 range test. */
2714 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2715 {
2716 if (op == range->exp
2717 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2718 || TREE_CODE (optype) == BOOLEAN_TYPE)
2719 && (op == tem
2720 || (TREE_CODE (tem) == EQ_EXPR
2721 && TREE_OPERAND (tem, 0) == op
2722 && integer_onep (TREE_OPERAND (tem, 1))))
2723 && opcode != BIT_IOR_EXPR
2724 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2725 {
2726 stmt = NULL;
2727 tem = op;
2728 }
2729 else
2730 return false;
2731 }
2732
2733 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2734 warning_at (loc, OPT_Wstrict_overflow,
2735 "assuming signed overflow does not occur "
2736 "when simplifying range test");
2737
2738 if (dump_file && (dump_flags & TDF_DETAILS))
2739 {
2740 struct range_entry *r;
2741 fprintf (dump_file, "Optimizing range tests ");
2742 print_generic_expr (dump_file, range->exp);
2743 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2744 print_generic_expr (dump_file, range->low);
2745 fprintf (dump_file, ", ");
2746 print_generic_expr (dump_file, range->high);
2747 fprintf (dump_file, "]");
2748 for (i = 0; i < count; i++)
2749 {
2750 if (otherrange)
2751 r = otherrange + i;
2752 else
2753 r = otherrangep[i];
2754 if (r->exp
2755 && r->exp != range->exp
2756 && TREE_CODE (r->exp) == SSA_NAME)
2757 {
2758 fprintf (dump_file, " and ");
2759 print_generic_expr (dump_file, r->exp);
2760 }
2761 else
2762 fprintf (dump_file, " and");
2763 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2764 print_generic_expr (dump_file, r->low);
2765 fprintf (dump_file, ", ");
2766 print_generic_expr (dump_file, r->high);
2767 fprintf (dump_file, "]");
2768 }
2769 fprintf (dump_file, "\n into ");
2770 print_generic_expr (dump_file, tem);
2771 fprintf (dump_file, "\n");
2772 }
2773
2774 if (opcode == BIT_IOR_EXPR
2775 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2776 tem = invert_truthvalue_loc (loc, tem);
2777
2778 tem = fold_convert_loc (loc, optype, tem);
2779 if (stmt)
2780 {
2781 gsi = gsi_for_stmt (stmt);
2782 uid = gimple_uid (stmt);
2783 }
2784 else
2785 {
2786 gsi = gsi_none ();
2787 uid = 0;
2788 }
2789 if (stmt == NULL)
2790 gcc_checking_assert (tem == op);
2791 /* In rare cases range->exp can be equal to lhs of stmt.
2792 In that case we have to insert after the stmt rather then before
2793 it. If stmt is a PHI, insert it at the start of the basic block. */
2794 else if (op != range->exp)
2795 {
2796 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2797 tem = force_into_ssa_name (&gsi, tem, true);
2798 gsi_prev (&gsi);
2799 }
2800 else if (gimple_code (stmt) != GIMPLE_PHI)
2801 {
2802 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2803 tem = force_into_ssa_name (&gsi, tem, false);
2804 }
2805 else
2806 {
2807 gsi = gsi_after_labels (gimple_bb (stmt));
2808 if (!gsi_end_p (gsi))
2809 uid = gimple_uid (gsi_stmt (gsi));
2810 else
2811 {
2812 gsi = gsi_start_bb (gimple_bb (stmt));
2813 uid = 1;
2814 while (!gsi_end_p (gsi))
2815 {
2816 uid = gimple_uid (gsi_stmt (gsi));
2817 gsi_next (&gsi);
2818 }
2819 }
2820 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2821 tem = force_into_ssa_name (&gsi, tem, true);
2822 if (gsi_end_p (gsi))
2823 gsi = gsi_last_bb (gimple_bb (stmt));
2824 else
2825 gsi_prev (&gsi);
2826 }
2827 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2828 if (gimple_uid (gsi_stmt (gsi)))
2829 break;
2830 else
2831 gimple_set_uid (gsi_stmt (gsi), uid);
2832
2833 oe->op = tem;
2834 range->exp = exp;
2835 range->low = low;
2836 range->high = high;
2837 range->in_p = in_p;
2838 range->strict_overflow_p = false;
2839
2840 for (i = 0; i < count; i++)
2841 {
2842 if (otherrange)
2843 range = otherrange + i;
2844 else
2845 range = otherrangep[i];
2846 oe = (*ops)[range->idx];
2847 /* Now change all the other range test immediate uses, so that
2848 those tests will be optimized away. */
2849 if (opcode == ERROR_MARK)
2850 {
2851 if (oe->op)
2852 oe->op = build_int_cst (TREE_TYPE (oe->op),
2853 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2854 else
2855 oe->op = (oe->rank == BIT_IOR_EXPR
2856 ? boolean_false_node : boolean_true_node);
2857 }
2858 else
2859 oe->op = error_mark_node;
2860 range->exp = NULL_TREE;
2861 range->low = NULL_TREE;
2862 range->high = NULL_TREE;
2863 }
2864 return true;
2865 }
2866
2867 /* Optimize X == CST1 || X == CST2
2868 if popcount (CST1 ^ CST2) == 1 into
2869 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2870 Similarly for ranges. E.g.
2871 X != 2 && X != 3 && X != 10 && X != 11
2872 will be transformed by the previous optimization into
2873 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2874 and this loop can transform that into
2875 !(((X & ~8) - 2U) <= 1U). */
2876
2877 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2878 optimize_range_tests_xor (enum tree_code opcode, tree type,
2879 tree lowi, tree lowj, tree highi, tree highj,
2880 vec<operand_entry *> *ops,
2881 struct range_entry *rangei,
2882 struct range_entry *rangej)
2883 {
2884 tree lowxor, highxor, tem, exp;
2885 /* Check lowi ^ lowj == highi ^ highj and
2886 popcount (lowi ^ lowj) == 1. */
2887 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2888 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2889 return false;
2890 if (!integer_pow2p (lowxor))
2891 return false;
2892 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2893 if (!tree_int_cst_equal (lowxor, highxor))
2894 return false;
2895
2896 exp = rangei->exp;
2897 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2898 int prec = GET_MODE_PRECISION (mode);
2899 if (TYPE_PRECISION (type) < prec
2900 || (wi::to_wide (TYPE_MIN_VALUE (type))
2901 != wi::min_value (prec, TYPE_SIGN (type)))
2902 || (wi::to_wide (TYPE_MAX_VALUE (type))
2903 != wi::max_value (prec, TYPE_SIGN (type))))
2904 {
2905 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2906 exp = fold_convert (type, exp);
2907 lowxor = fold_convert (type, lowxor);
2908 lowi = fold_convert (type, lowi);
2909 highi = fold_convert (type, highi);
2910 }
2911 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2912 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2913 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2914 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2915 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2916 NULL, rangei->in_p, lowj, highj,
2917 rangei->strict_overflow_p
2918 || rangej->strict_overflow_p))
2919 return true;
2920 return false;
2921 }
2922
2923 /* Optimize X == CST1 || X == CST2
2924 if popcount (CST2 - CST1) == 1 into
2925 ((X - CST1) & ~(CST2 - CST1)) == 0.
2926 Similarly for ranges. E.g.
2927 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2928 || X == 75 || X == 45
2929 will be transformed by the previous optimization into
2930 (X - 43U) <= 3U || (X - 75U) <= 3U
2931 and this loop can transform that into
2932 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2933 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2934 optimize_range_tests_diff (enum tree_code opcode, tree type,
2935 tree lowi, tree lowj, tree highi, tree highj,
2936 vec<operand_entry *> *ops,
2937 struct range_entry *rangei,
2938 struct range_entry *rangej)
2939 {
2940 tree tem1, tem2, mask;
2941 /* Check highi - lowi == highj - lowj. */
2942 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2943 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2944 return false;
2945 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2946 if (!tree_int_cst_equal (tem1, tem2))
2947 return false;
2948 /* Check popcount (lowj - lowi) == 1. */
2949 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2950 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2951 return false;
2952 if (!integer_pow2p (tem1))
2953 return false;
2954
2955 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2956 int prec = GET_MODE_PRECISION (mode);
2957 if (TYPE_PRECISION (type) < prec
2958 || (wi::to_wide (TYPE_MIN_VALUE (type))
2959 != wi::min_value (prec, TYPE_SIGN (type)))
2960 || (wi::to_wide (TYPE_MAX_VALUE (type))
2961 != wi::max_value (prec, TYPE_SIGN (type))))
2962 type = build_nonstandard_integer_type (prec, 1);
2963 else
2964 type = unsigned_type_for (type);
2965 tem1 = fold_convert (type, tem1);
2966 tem2 = fold_convert (type, tem2);
2967 lowi = fold_convert (type, lowi);
2968 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2969 tem1 = fold_build2 (MINUS_EXPR, type,
2970 fold_convert (type, rangei->exp), lowi);
2971 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2972 lowj = build_int_cst (type, 0);
2973 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2974 NULL, rangei->in_p, lowj, tem2,
2975 rangei->strict_overflow_p
2976 || rangej->strict_overflow_p))
2977 return true;
2978 return false;
2979 }
2980
2981 /* It does some common checks for function optimize_range_tests_xor and
2982 optimize_range_tests_diff.
2983 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2984 Else it calls optimize_range_tests_diff. */
2985
2986 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2987 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2988 bool optimize_xor, vec<operand_entry *> *ops,
2989 struct range_entry *ranges)
2990 {
2991 int i, j;
2992 bool any_changes = false;
2993 for (i = first; i < length; i++)
2994 {
2995 tree lowi, highi, lowj, highj, type, tem;
2996
2997 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2998 continue;
2999 type = TREE_TYPE (ranges[i].exp);
3000 if (!INTEGRAL_TYPE_P (type))
3001 continue;
3002 lowi = ranges[i].low;
3003 if (lowi == NULL_TREE)
3004 lowi = TYPE_MIN_VALUE (type);
3005 highi = ranges[i].high;
3006 if (highi == NULL_TREE)
3007 continue;
3008 for (j = i + 1; j < length && j < i + 64; j++)
3009 {
3010 bool changes;
3011 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3012 continue;
3013 lowj = ranges[j].low;
3014 if (lowj == NULL_TREE)
3015 continue;
3016 highj = ranges[j].high;
3017 if (highj == NULL_TREE)
3018 highj = TYPE_MAX_VALUE (type);
3019 /* Check lowj > highi. */
3020 tem = fold_binary (GT_EXPR, boolean_type_node,
3021 lowj, highi);
3022 if (tem == NULL_TREE || !integer_onep (tem))
3023 continue;
3024 if (optimize_xor)
3025 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3026 highi, highj, ops,
3027 ranges + i, ranges + j);
3028 else
3029 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3030 highi, highj, ops,
3031 ranges + i, ranges + j);
3032 if (changes)
3033 {
3034 any_changes = true;
3035 break;
3036 }
3037 }
3038 }
3039 return any_changes;
3040 }
3041
3042 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3043 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3044 EXP on success, NULL otherwise. */
3045
3046 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3047 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3048 wide_int *mask, tree *totallowp)
3049 {
3050 tree tem = int_const_binop (MINUS_EXPR, high, low);
3051 if (tem == NULL_TREE
3052 || TREE_CODE (tem) != INTEGER_CST
3053 || TREE_OVERFLOW (tem)
3054 || tree_int_cst_sgn (tem) == -1
3055 || compare_tree_int (tem, prec) != -1)
3056 return NULL_TREE;
3057
3058 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3059 *mask = wi::shifted_mask (0, max, false, prec);
3060 if (TREE_CODE (exp) == BIT_AND_EXPR
3061 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3062 {
3063 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3064 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3065 if (wi::popcount (msk) == 1
3066 && wi::ltu_p (msk, prec - max))
3067 {
3068 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3069 max += msk.to_uhwi ();
3070 exp = TREE_OPERAND (exp, 0);
3071 if (integer_zerop (low)
3072 && TREE_CODE (exp) == PLUS_EXPR
3073 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3074 {
3075 tree ret = TREE_OPERAND (exp, 0);
3076 STRIP_NOPS (ret);
3077 widest_int bias
3078 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3079 TYPE_PRECISION (TREE_TYPE (low))));
3080 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3081 if (totallowp)
3082 {
3083 *totallowp = tbias;
3084 return ret;
3085 }
3086 else if (!tree_int_cst_lt (totallow, tbias))
3087 return NULL_TREE;
3088 bias = wi::to_widest (tbias);
3089 bias -= wi::to_widest (totallow);
3090 if (bias >= 0 && bias < prec - max)
3091 {
3092 *mask = wi::lshift (*mask, bias);
3093 return ret;
3094 }
3095 }
3096 }
3097 }
3098 if (totallowp)
3099 return exp;
3100 if (!tree_int_cst_lt (totallow, low))
3101 return exp;
3102 tem = int_const_binop (MINUS_EXPR, low, totallow);
3103 if (tem == NULL_TREE
3104 || TREE_CODE (tem) != INTEGER_CST
3105 || TREE_OVERFLOW (tem)
3106 || compare_tree_int (tem, prec - max) == 1)
3107 return NULL_TREE;
3108
3109 *mask = wi::lshift (*mask, wi::to_widest (tem));
3110 return exp;
3111 }
3112
3113 /* Attempt to optimize small range tests using bit test.
3114 E.g.
3115 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3116 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3117 has been by earlier optimizations optimized into:
3118 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3119 As all the 43 through 82 range is less than 64 numbers,
3120 for 64-bit word targets optimize that into:
3121 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3122
3123 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3124 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3125 vec<operand_entry *> *ops,
3126 struct range_entry *ranges)
3127 {
3128 int i, j;
3129 bool any_changes = false;
3130 int prec = GET_MODE_BITSIZE (word_mode);
3131 auto_vec<struct range_entry *, 64> candidates;
3132
3133 for (i = first; i < length - 2; i++)
3134 {
3135 tree lowi, highi, lowj, highj, type;
3136
3137 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3138 continue;
3139 type = TREE_TYPE (ranges[i].exp);
3140 if (!INTEGRAL_TYPE_P (type))
3141 continue;
3142 lowi = ranges[i].low;
3143 if (lowi == NULL_TREE)
3144 lowi = TYPE_MIN_VALUE (type);
3145 highi = ranges[i].high;
3146 if (highi == NULL_TREE)
3147 continue;
3148 wide_int mask;
3149 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3150 highi, &mask, &lowi);
3151 if (exp == NULL_TREE)
3152 continue;
3153 bool strict_overflow_p = ranges[i].strict_overflow_p;
3154 candidates.truncate (0);
3155 int end = MIN (i + 64, length);
3156 for (j = i + 1; j < end; j++)
3157 {
3158 tree exp2;
3159 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3160 continue;
3161 if (ranges[j].exp == exp)
3162 ;
3163 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3164 {
3165 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3166 if (exp2 == exp)
3167 ;
3168 else if (TREE_CODE (exp2) == PLUS_EXPR)
3169 {
3170 exp2 = TREE_OPERAND (exp2, 0);
3171 STRIP_NOPS (exp2);
3172 if (exp2 != exp)
3173 continue;
3174 }
3175 else
3176 continue;
3177 }
3178 else
3179 continue;
3180 lowj = ranges[j].low;
3181 if (lowj == NULL_TREE)
3182 continue;
3183 highj = ranges[j].high;
3184 if (highj == NULL_TREE)
3185 highj = TYPE_MAX_VALUE (type);
3186 wide_int mask2;
3187 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3188 highj, &mask2, NULL);
3189 if (exp2 != exp)
3190 continue;
3191 mask |= mask2;
3192 strict_overflow_p |= ranges[j].strict_overflow_p;
3193 candidates.safe_push (&ranges[j]);
3194 }
3195
3196 /* If we need otherwise 3 or more comparisons, use a bit test. */
3197 if (candidates.length () >= 2)
3198 {
3199 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3200 wi::to_widest (lowi)
3201 + prec - 1 - wi::clz (mask));
3202 operand_entry *oe = (*ops)[ranges[i].idx];
3203 tree op = oe->op;
3204 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3205 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3206 location_t loc = gimple_location (stmt);
3207 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3208
3209 /* See if it isn't cheaper to pretend the minimum value of the
3210 range is 0, if maximum value is small enough.
3211 We can avoid then subtraction of the minimum value, but the
3212 mask constant could be perhaps more expensive. */
3213 if (compare_tree_int (lowi, 0) > 0
3214 && compare_tree_int (high, prec) < 0)
3215 {
3216 int cost_diff;
3217 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3218 rtx reg = gen_raw_REG (word_mode, 10000);
3219 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3220 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3221 GEN_INT (-m)), speed_p);
3222 rtx r = immed_wide_int_const (mask, word_mode);
3223 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3224 word_mode, speed_p);
3225 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3226 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3227 word_mode, speed_p);
3228 if (cost_diff > 0)
3229 {
3230 mask = wi::lshift (mask, m);
3231 lowi = build_zero_cst (TREE_TYPE (lowi));
3232 }
3233 }
3234
3235 tree tem = build_range_check (loc, optype, unshare_expr (exp),
3236 false, lowi, high);
3237 if (tem == NULL_TREE || is_gimple_val (tem))
3238 continue;
3239 tree etype = unsigned_type_for (TREE_TYPE (exp));
3240 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3241 fold_convert_loc (loc, etype, exp),
3242 fold_convert_loc (loc, etype, lowi));
3243 exp = fold_convert_loc (loc, integer_type_node, exp);
3244 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3245 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3246 build_int_cst (word_type, 1), exp);
3247 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3248 wide_int_to_tree (word_type, mask));
3249 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3250 build_zero_cst (word_type));
3251 if (is_gimple_val (exp))
3252 continue;
3253
3254 /* The shift might have undefined behavior if TEM is true,
3255 but reassociate_bb isn't prepared to have basic blocks
3256 split when it is running. So, temporarily emit a code
3257 with BIT_IOR_EXPR instead of &&, and fix it up in
3258 branch_fixup. */
3259 gimple_seq seq;
3260 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3261 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3262 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3263 gimple_seq seq2;
3264 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3265 gimple_seq_add_seq_without_update (&seq, seq2);
3266 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3267 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3268 gimple *g = gimple_build_assign (make_ssa_name (optype),
3269 BIT_IOR_EXPR, tem, exp);
3270 gimple_set_location (g, loc);
3271 gimple_seq_add_stmt_without_update (&seq, g);
3272 exp = gimple_assign_lhs (g);
3273 tree val = build_zero_cst (optype);
3274 if (update_range_test (&ranges[i], NULL, candidates.address (),
3275 candidates.length (), opcode, ops, exp,
3276 seq, false, val, val, strict_overflow_p))
3277 {
3278 any_changes = true;
3279 reassoc_branch_fixups.safe_push (tem);
3280 }
3281 else
3282 gimple_seq_discard (seq);
3283 }
3284 }
3285 return any_changes;
3286 }
3287
3288 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3289 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3290
3291 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3292 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3293 vec<operand_entry *> *ops,
3294 struct range_entry *ranges)
3295 {
3296 int i;
3297 unsigned int b;
3298 bool any_changes = false;
3299 auto_vec<int, 128> buckets;
3300 auto_vec<int, 32> chains;
3301 auto_vec<struct range_entry *, 32> candidates;
3302
3303 for (i = first; i < length; i++)
3304 {
3305 if (ranges[i].exp == NULL_TREE
3306 || TREE_CODE (ranges[i].exp) != SSA_NAME
3307 || !ranges[i].in_p
3308 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3309 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3310 || ranges[i].low == NULL_TREE
3311 || ranges[i].low != ranges[i].high)
3312 continue;
3313
3314 bool zero_p = integer_zerop (ranges[i].low);
3315 if (!zero_p && !integer_all_onesp (ranges[i].low))
3316 continue;
3317
3318 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3319 if (buckets.length () <= b)
3320 buckets.safe_grow_cleared (b + 1);
3321 if (chains.length () <= (unsigned) i)
3322 chains.safe_grow (i + 1);
3323 chains[i] = buckets[b];
3324 buckets[b] = i + 1;
3325 }
3326
3327 FOR_EACH_VEC_ELT (buckets, b, i)
3328 if (i && chains[i - 1])
3329 {
3330 int j, k = i;
3331 for (j = chains[i - 1]; j; j = chains[j - 1])
3332 {
3333 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3334 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3335 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3336 k = j;
3337 }
3338 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3339 tree type2 = NULL_TREE;
3340 bool strict_overflow_p = false;
3341 candidates.truncate (0);
3342 for (j = i; j; j = chains[j - 1])
3343 {
3344 tree type = TREE_TYPE (ranges[j - 1].exp);
3345 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3346 if (j == k
3347 || useless_type_conversion_p (type1, type))
3348 ;
3349 else if (type2 == NULL_TREE
3350 || useless_type_conversion_p (type2, type))
3351 {
3352 if (type2 == NULL_TREE)
3353 type2 = type;
3354 candidates.safe_push (&ranges[j - 1]);
3355 }
3356 }
3357 unsigned l = candidates.length ();
3358 for (j = i; j; j = chains[j - 1])
3359 {
3360 tree type = TREE_TYPE (ranges[j - 1].exp);
3361 if (j == k)
3362 continue;
3363 if (useless_type_conversion_p (type1, type))
3364 ;
3365 else if (type2 == NULL_TREE
3366 || useless_type_conversion_p (type2, type))
3367 continue;
3368 candidates.safe_push (&ranges[j - 1]);
3369 }
3370 gimple_seq seq = NULL;
3371 tree op = NULL_TREE;
3372 unsigned int id;
3373 struct range_entry *r;
3374 candidates.safe_push (&ranges[k - 1]);
3375 FOR_EACH_VEC_ELT (candidates, id, r)
3376 {
3377 gimple *g;
3378 if (id == 0)
3379 {
3380 op = r->exp;
3381 continue;
3382 }
3383 if (id == l)
3384 {
3385 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3386 gimple_seq_add_stmt_without_update (&seq, g);
3387 op = gimple_assign_lhs (g);
3388 }
3389 tree type = TREE_TYPE (r->exp);
3390 tree exp = r->exp;
3391 if (id >= l && !useless_type_conversion_p (type1, type))
3392 {
3393 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3394 gimple_seq_add_stmt_without_update (&seq, g);
3395 exp = gimple_assign_lhs (g);
3396 }
3397 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3398 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3399 op, exp);
3400 gimple_seq_add_stmt_without_update (&seq, g);
3401 op = gimple_assign_lhs (g);
3402 }
3403 candidates.pop ();
3404 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3405 candidates.length (), opcode, ops, op,
3406 seq, true, ranges[k - 1].low,
3407 ranges[k - 1].low, strict_overflow_p))
3408 any_changes = true;
3409 else
3410 gimple_seq_discard (seq);
3411 }
3412
3413 return any_changes;
3414 }
3415
3416 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3417 a >= 0 && a < b into (unsigned) a < (unsigned) b
3418 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3419
3420 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3421 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3422 vec<operand_entry *> *ops,
3423 struct range_entry *ranges,
3424 basic_block first_bb)
3425 {
3426 int i;
3427 bool any_changes = false;
3428 hash_map<tree, int> *map = NULL;
3429
3430 for (i = first; i < length; i++)
3431 {
3432 if (ranges[i].exp == NULL_TREE
3433 || TREE_CODE (ranges[i].exp) != SSA_NAME
3434 || !ranges[i].in_p)
3435 continue;
3436
3437 tree type = TREE_TYPE (ranges[i].exp);
3438 if (!INTEGRAL_TYPE_P (type)
3439 || TYPE_UNSIGNED (type)
3440 || ranges[i].low == NULL_TREE
3441 || !integer_zerop (ranges[i].low)
3442 || ranges[i].high != NULL_TREE)
3443 continue;
3444 /* EXP >= 0 here. */
3445 if (map == NULL)
3446 map = new hash_map <tree, int>;
3447 map->put (ranges[i].exp, i);
3448 }
3449
3450 if (map == NULL)
3451 return false;
3452
3453 for (i = 0; i < length; i++)
3454 {
3455 bool in_p = ranges[i].in_p;
3456 if (ranges[i].low == NULL_TREE
3457 || ranges[i].high == NULL_TREE)
3458 continue;
3459 if (!integer_zerop (ranges[i].low)
3460 || !integer_zerop (ranges[i].high))
3461 {
3462 if (ranges[i].exp
3463 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3464 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3465 && integer_onep (ranges[i].low)
3466 && integer_onep (ranges[i].high))
3467 in_p = !in_p;
3468 else
3469 continue;
3470 }
3471
3472 gimple *stmt;
3473 tree_code ccode;
3474 tree rhs1, rhs2;
3475 if (ranges[i].exp)
3476 {
3477 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3478 continue;
3479 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3480 if (!is_gimple_assign (stmt))
3481 continue;
3482 ccode = gimple_assign_rhs_code (stmt);
3483 rhs1 = gimple_assign_rhs1 (stmt);
3484 rhs2 = gimple_assign_rhs2 (stmt);
3485 }
3486 else
3487 {
3488 operand_entry *oe = (*ops)[ranges[i].idx];
3489 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3490 if (gimple_code (stmt) != GIMPLE_COND)
3491 continue;
3492 ccode = gimple_cond_code (stmt);
3493 rhs1 = gimple_cond_lhs (stmt);
3494 rhs2 = gimple_cond_rhs (stmt);
3495 }
3496
3497 if (TREE_CODE (rhs1) != SSA_NAME
3498 || rhs2 == NULL_TREE
3499 || TREE_CODE (rhs2) != SSA_NAME)
3500 continue;
3501
3502 switch (ccode)
3503 {
3504 case GT_EXPR:
3505 case GE_EXPR:
3506 case LT_EXPR:
3507 case LE_EXPR:
3508 break;
3509 default:
3510 continue;
3511 }
3512 if (in_p)
3513 ccode = invert_tree_comparison (ccode, false);
3514 switch (ccode)
3515 {
3516 case GT_EXPR:
3517 case GE_EXPR:
3518 std::swap (rhs1, rhs2);
3519 ccode = swap_tree_comparison (ccode);
3520 break;
3521 case LT_EXPR:
3522 case LE_EXPR:
3523 break;
3524 default:
3525 gcc_unreachable ();
3526 }
3527
3528 int *idx = map->get (rhs1);
3529 if (idx == NULL)
3530 continue;
3531
3532 /* maybe_optimize_range_tests allows statements without side-effects
3533 in the basic blocks as long as they are consumed in the same bb.
3534 Make sure rhs2's def stmt is not among them, otherwise we can't
3535 use safely get_nonzero_bits on it. E.g. in:
3536 # RANGE [-83, 1] NONZERO 173
3537 # k_32 = PHI <k_47(13), k_12(9)>
3538 ...
3539 if (k_32 >= 0)
3540 goto <bb 5>; [26.46%]
3541 else
3542 goto <bb 9>; [73.54%]
3543
3544 <bb 5> [local count: 140323371]:
3545 # RANGE [0, 1] NONZERO 1
3546 _5 = (int) k_32;
3547 # RANGE [0, 4] NONZERO 4
3548 _21 = _5 << 2;
3549 # RANGE [0, 4] NONZERO 4
3550 iftmp.0_44 = (char) _21;
3551 if (k_32 < iftmp.0_44)
3552 goto <bb 6>; [84.48%]
3553 else
3554 goto <bb 9>; [15.52%]
3555 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3556 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3557 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3558 those stmts even for negative k_32 and the value ranges would be no
3559 longer guaranteed and so the optimization would be invalid. */
3560 while (opcode == ERROR_MARK)
3561 {
3562 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3563 basic_block bb2 = gimple_bb (g);
3564 if (bb2
3565 && bb2 != first_bb
3566 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3567 {
3568 /* As an exception, handle a few common cases. */
3569 if (gimple_assign_cast_p (g)
3570 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3571 {
3572 tree op0 = gimple_assign_rhs1 (g);
3573 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3574 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3575 > TYPE_PRECISION (TREE_TYPE (op0))))
3576 /* Zero-extension is always ok. */
3577 break;
3578 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3579 == TYPE_PRECISION (TREE_TYPE (op0))
3580 && TREE_CODE (op0) == SSA_NAME)
3581 {
3582 /* Cast from signed to unsigned or vice versa. Retry
3583 with the op0 as new rhs2. */
3584 rhs2 = op0;
3585 continue;
3586 }
3587 }
3588 else if (is_gimple_assign (g)
3589 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3590 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3591 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3592 /* Masking with INTEGER_CST with MSB clear is always ok
3593 too. */
3594 break;
3595 rhs2 = NULL_TREE;
3596 }
3597 break;
3598 }
3599 if (rhs2 == NULL_TREE)
3600 continue;
3601
3602 wide_int nz = get_nonzero_bits (rhs2);
3603 if (wi::neg_p (nz))
3604 continue;
3605
3606 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3607 and RHS2 is known to be RHS2 >= 0. */
3608 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3609
3610 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3611 if ((ranges[*idx].strict_overflow_p
3612 || ranges[i].strict_overflow_p)
3613 && issue_strict_overflow_warning (wc))
3614 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3615 "assuming signed overflow does not occur "
3616 "when simplifying range test");
3617
3618 if (dump_file && (dump_flags & TDF_DETAILS))
3619 {
3620 struct range_entry *r = &ranges[*idx];
3621 fprintf (dump_file, "Optimizing range test ");
3622 print_generic_expr (dump_file, r->exp);
3623 fprintf (dump_file, " +[");
3624 print_generic_expr (dump_file, r->low);
3625 fprintf (dump_file, ", ");
3626 print_generic_expr (dump_file, r->high);
3627 fprintf (dump_file, "] and comparison ");
3628 print_generic_expr (dump_file, rhs1);
3629 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3630 print_generic_expr (dump_file, rhs2);
3631 fprintf (dump_file, "\n into (");
3632 print_generic_expr (dump_file, utype);
3633 fprintf (dump_file, ") ");
3634 print_generic_expr (dump_file, rhs1);
3635 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3636 print_generic_expr (dump_file, utype);
3637 fprintf (dump_file, ") ");
3638 print_generic_expr (dump_file, rhs2);
3639 fprintf (dump_file, "\n");
3640 }
3641
3642 operand_entry *oe = (*ops)[ranges[i].idx];
3643 ranges[i].in_p = 0;
3644 if (opcode == BIT_IOR_EXPR
3645 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3646 {
3647 ranges[i].in_p = 1;
3648 ccode = invert_tree_comparison (ccode, false);
3649 }
3650
3651 unsigned int uid = gimple_uid (stmt);
3652 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3653 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3654 gimple_set_uid (g, uid);
3655 rhs1 = gimple_assign_lhs (g);
3656 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3657 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3658 {
3659 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3660 gimple_set_uid (g, uid);
3661 rhs2 = gimple_assign_lhs (g);
3662 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3663 }
3664 if (tree_swap_operands_p (rhs1, rhs2))
3665 {
3666 std::swap (rhs1, rhs2);
3667 ccode = swap_tree_comparison (ccode);
3668 }
3669 if (gimple_code (stmt) == GIMPLE_COND)
3670 {
3671 gcond *c = as_a <gcond *> (stmt);
3672 gimple_cond_set_code (c, ccode);
3673 gimple_cond_set_lhs (c, rhs1);
3674 gimple_cond_set_rhs (c, rhs2);
3675 update_stmt (stmt);
3676 }
3677 else
3678 {
3679 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3680 if (!INTEGRAL_TYPE_P (ctype)
3681 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3682 && TYPE_PRECISION (ctype) != 1))
3683 ctype = boolean_type_node;
3684 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3685 gimple_set_uid (g, uid);
3686 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3687 if (oe->op && ctype != TREE_TYPE (oe->op))
3688 {
3689 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3690 NOP_EXPR, gimple_assign_lhs (g));
3691 gimple_set_uid (g, uid);
3692 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3693 }
3694 ranges[i].exp = gimple_assign_lhs (g);
3695 oe->op = ranges[i].exp;
3696 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3697 ranges[i].high = ranges[i].low;
3698 }
3699 ranges[i].strict_overflow_p = false;
3700 oe = (*ops)[ranges[*idx].idx];
3701 /* Now change all the other range test immediate uses, so that
3702 those tests will be optimized away. */
3703 if (opcode == ERROR_MARK)
3704 {
3705 if (oe->op)
3706 oe->op = build_int_cst (TREE_TYPE (oe->op),
3707 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3708 else
3709 oe->op = (oe->rank == BIT_IOR_EXPR
3710 ? boolean_false_node : boolean_true_node);
3711 }
3712 else
3713 oe->op = error_mark_node;
3714 ranges[*idx].exp = NULL_TREE;
3715 ranges[*idx].low = NULL_TREE;
3716 ranges[*idx].high = NULL_TREE;
3717 any_changes = true;
3718 }
3719
3720 delete map;
3721 return any_changes;
3722 }
3723
3724 /* Optimize range tests, similarly how fold_range_test optimizes
3725 it on trees. The tree code for the binary
3726 operation between all the operands is OPCODE.
3727 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3728 maybe_optimize_range_tests for inter-bb range optimization.
3729 In that case if oe->op is NULL, oe->id is bb->index whose
3730 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3731 the actual opcode.
3732 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3733
3734 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3735 optimize_range_tests (enum tree_code opcode,
3736 vec<operand_entry *> *ops, basic_block first_bb)
3737 {
3738 unsigned int length = ops->length (), i, j, first;
3739 operand_entry *oe;
3740 struct range_entry *ranges;
3741 bool any_changes = false;
3742
3743 if (length == 1)
3744 return false;
3745
3746 ranges = XNEWVEC (struct range_entry, length);
3747 for (i = 0; i < length; i++)
3748 {
3749 oe = (*ops)[i];
3750 ranges[i].idx = i;
3751 init_range_entry (ranges + i, oe->op,
3752 oe->op
3753 ? NULL
3754 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3755 /* For | invert it now, we will invert it again before emitting
3756 the optimized expression. */
3757 if (opcode == BIT_IOR_EXPR
3758 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3759 ranges[i].in_p = !ranges[i].in_p;
3760 }
3761
3762 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3763 for (i = 0; i < length; i++)
3764 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3765 break;
3766
3767 /* Try to merge ranges. */
3768 for (first = i; i < length; i++)
3769 {
3770 tree low = ranges[i].low;
3771 tree high = ranges[i].high;
3772 int in_p = ranges[i].in_p;
3773 bool strict_overflow_p = ranges[i].strict_overflow_p;
3774 int update_fail_count = 0;
3775
3776 for (j = i + 1; j < length; j++)
3777 {
3778 if (ranges[i].exp != ranges[j].exp)
3779 break;
3780 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3781 ranges[j].in_p, ranges[j].low, ranges[j].high))
3782 break;
3783 strict_overflow_p |= ranges[j].strict_overflow_p;
3784 }
3785
3786 if (j == i + 1)
3787 continue;
3788
3789 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3790 opcode, ops, ranges[i].exp, NULL, in_p,
3791 low, high, strict_overflow_p))
3792 {
3793 i = j - 1;
3794 any_changes = true;
3795 }
3796 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3797 while update_range_test would fail. */
3798 else if (update_fail_count == 64)
3799 i = j - 1;
3800 else
3801 ++update_fail_count;
3802 }
3803
3804 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3805 ops, ranges);
3806
3807 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3808 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3809 ops, ranges);
3810 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3811 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3812 ops, ranges);
3813 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3814 ops, ranges);
3815 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3816 ranges, first_bb);
3817
3818 if (any_changes && opcode != ERROR_MARK)
3819 {
3820 j = 0;
3821 FOR_EACH_VEC_ELT (*ops, i, oe)
3822 {
3823 if (oe->op == error_mark_node)
3824 continue;
3825 else if (i != j)
3826 (*ops)[j] = oe;
3827 j++;
3828 }
3829 ops->truncate (j);
3830 }
3831
3832 XDELETEVEC (ranges);
3833 return any_changes;
3834 }
3835
3836 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3837 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3838 otherwise the comparison code. TYPE is a return value that is set
3839 to type of comparison. */
3840
3841 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type)3842 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3843 {
3844 if (TREE_CODE (var) != SSA_NAME)
3845 return ERROR_MARK;
3846
3847 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3848 if (stmt == NULL)
3849 return ERROR_MARK;
3850
3851 /* ??? If we start creating more COND_EXPR, we could perform
3852 this same optimization with them. For now, simplify. */
3853 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3854 return ERROR_MARK;
3855
3856 tree cond = gimple_assign_rhs1 (stmt);
3857 tree_code cmp = TREE_CODE (cond);
3858 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3859 return ERROR_MARK;
3860
3861 /* ??? For now, allow only canonical true and false result vectors.
3862 We could expand this to other constants should the need arise,
3863 but at the moment we don't create them. */
3864 tree t = gimple_assign_rhs2 (stmt);
3865 tree f = gimple_assign_rhs3 (stmt);
3866 bool inv;
3867 if (integer_all_onesp (t))
3868 inv = false;
3869 else if (integer_all_onesp (f))
3870 {
3871 cmp = invert_tree_comparison (cmp, false);
3872 inv = true;
3873 }
3874 else
3875 return ERROR_MARK;
3876 if (!integer_zerop (f))
3877 return ERROR_MARK;
3878
3879 /* Success! */
3880 if (rets)
3881 *rets = stmt;
3882 if (reti)
3883 *reti = inv;
3884 if (type)
3885 *type = TREE_TYPE (cond);
3886 return cmp;
3887 }
3888
3889 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3890 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3891
3892 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3893 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3894 {
3895 unsigned int length = ops->length (), i, j;
3896 bool any_changes = false;
3897
3898 if (length == 1)
3899 return false;
3900
3901 for (i = 0; i < length; ++i)
3902 {
3903 tree elt0 = (*ops)[i]->op;
3904
3905 gassign *stmt0;
3906 bool invert;
3907 tree type;
3908 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3909 if (cmp0 == ERROR_MARK)
3910 continue;
3911
3912 for (j = i + 1; j < length; ++j)
3913 {
3914 tree &elt1 = (*ops)[j]->op;
3915
3916 gassign *stmt1;
3917 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3918 if (cmp1 == ERROR_MARK)
3919 continue;
3920
3921 tree cond0 = gimple_assign_rhs1 (stmt0);
3922 tree x0 = TREE_OPERAND (cond0, 0);
3923 tree y0 = TREE_OPERAND (cond0, 1);
3924
3925 tree cond1 = gimple_assign_rhs1 (stmt1);
3926 tree x1 = TREE_OPERAND (cond1, 0);
3927 tree y1 = TREE_OPERAND (cond1, 1);
3928
3929 tree comb;
3930 if (opcode == BIT_AND_EXPR)
3931 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3932 y1);
3933 else if (opcode == BIT_IOR_EXPR)
3934 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3935 y1);
3936 else
3937 gcc_unreachable ();
3938 if (comb == NULL)
3939 continue;
3940
3941 /* Success! */
3942 if (dump_file && (dump_flags & TDF_DETAILS))
3943 {
3944 fprintf (dump_file, "Transforming ");
3945 print_generic_expr (dump_file, cond0);
3946 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3947 print_generic_expr (dump_file, cond1);
3948 fprintf (dump_file, " into ");
3949 print_generic_expr (dump_file, comb);
3950 fputc ('\n', dump_file);
3951 }
3952
3953 gimple_assign_set_rhs1 (stmt0, comb);
3954 if (invert)
3955 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3956 *gimple_assign_rhs3_ptr (stmt0));
3957 update_stmt (stmt0);
3958
3959 elt1 = error_mark_node;
3960 any_changes = true;
3961 }
3962 }
3963
3964 if (any_changes)
3965 {
3966 operand_entry *oe;
3967 j = 0;
3968 FOR_EACH_VEC_ELT (*ops, i, oe)
3969 {
3970 if (oe->op == error_mark_node)
3971 continue;
3972 else if (i != j)
3973 (*ops)[j] = oe;
3974 j++;
3975 }
3976 ops->truncate (j);
3977 }
3978
3979 return any_changes;
3980 }
3981
3982 /* Return true if STMT is a cast like:
3983 <bb N>:
3984 ...
3985 _123 = (int) _234;
3986
3987 <bb M>:
3988 # _345 = PHI <_123(N), 1(...), 1(...)>
3989 where _234 has bool type, _123 has single use and
3990 bb N has a single successor M. This is commonly used in
3991 the last block of a range test.
3992
3993 Also Return true if STMT is tcc_compare like:
3994 <bb N>:
3995 ...
3996 _234 = a_2(D) == 2;
3997
3998 <bb M>:
3999 # _345 = PHI <_234(N), 1(...), 1(...)>
4000 _346 = (int) _345;
4001 where _234 has booltype, single use and
4002 bb N has a single successor M. This is commonly used in
4003 the last block of a range test. */
4004
4005 static bool
final_range_test_p(gimple * stmt)4006 final_range_test_p (gimple *stmt)
4007 {
4008 basic_block bb, rhs_bb, lhs_bb;
4009 edge e;
4010 tree lhs, rhs;
4011 use_operand_p use_p;
4012 gimple *use_stmt;
4013
4014 if (!gimple_assign_cast_p (stmt)
4015 && (!is_gimple_assign (stmt)
4016 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4017 != tcc_comparison)))
4018 return false;
4019 bb = gimple_bb (stmt);
4020 if (!single_succ_p (bb))
4021 return false;
4022 e = single_succ_edge (bb);
4023 if (e->flags & EDGE_COMPLEX)
4024 return false;
4025
4026 lhs = gimple_assign_lhs (stmt);
4027 rhs = gimple_assign_rhs1 (stmt);
4028 if (gimple_assign_cast_p (stmt)
4029 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4030 || TREE_CODE (rhs) != SSA_NAME
4031 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4032 return false;
4033
4034 if (!gimple_assign_cast_p (stmt)
4035 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4036 return false;
4037
4038 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4039 if (!single_imm_use (lhs, &use_p, &use_stmt))
4040 return false;
4041
4042 if (gimple_code (use_stmt) != GIMPLE_PHI
4043 || gimple_bb (use_stmt) != e->dest)
4044 return false;
4045
4046 /* And that the rhs is defined in the same loop. */
4047 if (gimple_assign_cast_p (stmt))
4048 {
4049 if (TREE_CODE (rhs) != SSA_NAME
4050 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4051 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4052 return false;
4053 }
4054 else
4055 {
4056 if (TREE_CODE (lhs) != SSA_NAME
4057 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4058 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4059 return false;
4060 }
4061
4062 return true;
4063 }
4064
4065 /* Return true if BB is suitable basic block for inter-bb range test
4066 optimization. If BACKWARD is true, BB should be the only predecessor
4067 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4068 or compared with to find a common basic block to which all conditions
4069 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4070 be the only predecessor of BB. */
4071
4072 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)4073 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4074 bool backward)
4075 {
4076 edge_iterator ei, ei2;
4077 edge e, e2;
4078 gimple *stmt;
4079 gphi_iterator gsi;
4080 bool other_edge_seen = false;
4081 bool is_cond;
4082
4083 if (test_bb == bb)
4084 return false;
4085 /* Check last stmt first. */
4086 stmt = last_stmt (bb);
4087 if (stmt == NULL
4088 || (gimple_code (stmt) != GIMPLE_COND
4089 && (backward || !final_range_test_p (stmt)))
4090 || gimple_visited_p (stmt)
4091 || stmt_could_throw_p (cfun, stmt)
4092 || *other_bb == bb)
4093 return false;
4094 is_cond = gimple_code (stmt) == GIMPLE_COND;
4095 if (is_cond)
4096 {
4097 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4098 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4099 to *OTHER_BB (if not set yet, try to find it out). */
4100 if (EDGE_COUNT (bb->succs) != 2)
4101 return false;
4102 FOR_EACH_EDGE (e, ei, bb->succs)
4103 {
4104 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4105 return false;
4106 if (e->dest == test_bb)
4107 {
4108 if (backward)
4109 continue;
4110 else
4111 return false;
4112 }
4113 if (e->dest == bb)
4114 return false;
4115 if (*other_bb == NULL)
4116 {
4117 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4118 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4119 return false;
4120 else if (e->dest == e2->dest)
4121 *other_bb = e->dest;
4122 if (*other_bb == NULL)
4123 return false;
4124 }
4125 if (e->dest == *other_bb)
4126 other_edge_seen = true;
4127 else if (backward)
4128 return false;
4129 }
4130 if (*other_bb == NULL || !other_edge_seen)
4131 return false;
4132 }
4133 else if (single_succ (bb) != *other_bb)
4134 return false;
4135
4136 /* Now check all PHIs of *OTHER_BB. */
4137 e = find_edge (bb, *other_bb);
4138 e2 = find_edge (test_bb, *other_bb);
4139 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4140 {
4141 gphi *phi = gsi.phi ();
4142 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4143 corresponding to BB and TEST_BB predecessor must be the same. */
4144 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4145 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4146 {
4147 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4148 one of the PHIs should have the lhs of the last stmt in
4149 that block as PHI arg and that PHI should have 0 or 1
4150 corresponding to it in all other range test basic blocks
4151 considered. */
4152 if (!is_cond)
4153 {
4154 if (gimple_phi_arg_def (phi, e->dest_idx)
4155 == gimple_assign_lhs (stmt)
4156 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4157 || integer_onep (gimple_phi_arg_def (phi,
4158 e2->dest_idx))))
4159 continue;
4160 }
4161 else
4162 {
4163 gimple *test_last = last_stmt (test_bb);
4164 if (gimple_code (test_last) != GIMPLE_COND
4165 && gimple_phi_arg_def (phi, e2->dest_idx)
4166 == gimple_assign_lhs (test_last)
4167 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4168 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4169 continue;
4170 }
4171
4172 return false;
4173 }
4174 }
4175 return true;
4176 }
4177
4178 /* Return true if BB doesn't have side-effects that would disallow
4179 range test optimization, all SSA_NAMEs set in the bb are consumed
4180 in the bb and there are no PHIs. */
4181
4182 static bool
no_side_effect_bb(basic_block bb)4183 no_side_effect_bb (basic_block bb)
4184 {
4185 gimple_stmt_iterator gsi;
4186 gimple *last;
4187
4188 if (!gimple_seq_empty_p (phi_nodes (bb)))
4189 return false;
4190 last = last_stmt (bb);
4191 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4192 {
4193 gimple *stmt = gsi_stmt (gsi);
4194 tree lhs;
4195 imm_use_iterator imm_iter;
4196 use_operand_p use_p;
4197
4198 if (is_gimple_debug (stmt))
4199 continue;
4200 if (gimple_has_side_effects (stmt))
4201 return false;
4202 if (stmt == last)
4203 return true;
4204 if (!is_gimple_assign (stmt))
4205 return false;
4206 lhs = gimple_assign_lhs (stmt);
4207 if (TREE_CODE (lhs) != SSA_NAME)
4208 return false;
4209 if (gimple_assign_rhs_could_trap_p (stmt))
4210 return false;
4211 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4212 {
4213 gimple *use_stmt = USE_STMT (use_p);
4214 if (is_gimple_debug (use_stmt))
4215 continue;
4216 if (gimple_bb (use_stmt) != bb)
4217 return false;
4218 }
4219 }
4220 return false;
4221 }
4222
4223 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4224 return true and fill in *OPS recursively. */
4225
4226 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4227 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4228 class loop *loop)
4229 {
4230 gimple *stmt = SSA_NAME_DEF_STMT (var);
4231 tree rhs[2];
4232 int i;
4233
4234 if (!is_reassociable_op (stmt, code, loop))
4235 return false;
4236
4237 rhs[0] = gimple_assign_rhs1 (stmt);
4238 rhs[1] = gimple_assign_rhs2 (stmt);
4239 gimple_set_visited (stmt, true);
4240 for (i = 0; i < 2; i++)
4241 if (TREE_CODE (rhs[i]) == SSA_NAME
4242 && !get_ops (rhs[i], code, ops, loop)
4243 && has_single_use (rhs[i]))
4244 {
4245 operand_entry *oe = operand_entry_pool.allocate ();
4246
4247 oe->op = rhs[i];
4248 oe->rank = code;
4249 oe->id = 0;
4250 oe->count = 1;
4251 oe->stmt_to_insert = NULL;
4252 ops->safe_push (oe);
4253 }
4254 return true;
4255 }
4256
4257 /* Find the ops that were added by get_ops starting from VAR, see if
4258 they were changed during update_range_test and if yes, create new
4259 stmts. */
4260
4261 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,class loop * loop)4262 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4263 unsigned int *pidx, class loop *loop)
4264 {
4265 gimple *stmt = SSA_NAME_DEF_STMT (var);
4266 tree rhs[4];
4267 int i;
4268
4269 if (!is_reassociable_op (stmt, code, loop))
4270 return NULL;
4271
4272 rhs[0] = gimple_assign_rhs1 (stmt);
4273 rhs[1] = gimple_assign_rhs2 (stmt);
4274 rhs[2] = rhs[0];
4275 rhs[3] = rhs[1];
4276 for (i = 0; i < 2; i++)
4277 if (TREE_CODE (rhs[i]) == SSA_NAME)
4278 {
4279 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4280 if (rhs[2 + i] == NULL_TREE)
4281 {
4282 if (has_single_use (rhs[i]))
4283 rhs[2 + i] = ops[(*pidx)++]->op;
4284 else
4285 rhs[2 + i] = rhs[i];
4286 }
4287 }
4288 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4289 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4290 {
4291 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4292 var = make_ssa_name (TREE_TYPE (var));
4293 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4294 rhs[2], rhs[3]);
4295 gimple_set_uid (g, gimple_uid (stmt));
4296 gimple_set_visited (g, true);
4297 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4298 }
4299 return var;
4300 }
4301
4302 /* Structure to track the initial value passed to get_ops and
4303 the range in the ops vector for each basic block. */
4304
4305 struct inter_bb_range_test_entry
4306 {
4307 tree op;
4308 unsigned int first_idx, last_idx;
4309 };
4310
4311 /* Inter-bb range test optimization.
4312
4313 Returns TRUE if a gimple conditional is optimized to a true/false,
4314 otherwise return FALSE.
4315
4316 This indicates to the caller that it should run a CFG cleanup pass
4317 once reassociation is completed. */
4318
4319 static bool
maybe_optimize_range_tests(gimple * stmt)4320 maybe_optimize_range_tests (gimple *stmt)
4321 {
4322 basic_block first_bb = gimple_bb (stmt);
4323 basic_block last_bb = first_bb;
4324 basic_block other_bb = NULL;
4325 basic_block bb;
4326 edge_iterator ei;
4327 edge e;
4328 auto_vec<operand_entry *> ops;
4329 auto_vec<inter_bb_range_test_entry> bbinfo;
4330 bool any_changes = false;
4331 bool cfg_cleanup_needed = false;
4332
4333 /* Consider only basic blocks that end with GIMPLE_COND or
4334 a cast statement satisfying final_range_test_p. All
4335 but the last bb in the first_bb .. last_bb range
4336 should end with GIMPLE_COND. */
4337 if (gimple_code (stmt) == GIMPLE_COND)
4338 {
4339 if (EDGE_COUNT (first_bb->succs) != 2)
4340 return cfg_cleanup_needed;
4341 }
4342 else if (final_range_test_p (stmt))
4343 other_bb = single_succ (first_bb);
4344 else
4345 return cfg_cleanup_needed;
4346
4347 if (stmt_could_throw_p (cfun, stmt))
4348 return cfg_cleanup_needed;
4349
4350 /* As relative ordering of post-dominator sons isn't fixed,
4351 maybe_optimize_range_tests can be called first on any
4352 bb in the range we want to optimize. So, start searching
4353 backwards, if first_bb can be set to a predecessor. */
4354 while (single_pred_p (first_bb))
4355 {
4356 basic_block pred_bb = single_pred (first_bb);
4357 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4358 break;
4359 if (!no_side_effect_bb (first_bb))
4360 break;
4361 first_bb = pred_bb;
4362 }
4363 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4364 Before starting forward search in last_bb successors, find
4365 out the other_bb. */
4366 if (first_bb == last_bb)
4367 {
4368 other_bb = NULL;
4369 /* As non-GIMPLE_COND last stmt always terminates the range,
4370 if forward search didn't discover anything, just give up. */
4371 if (gimple_code (stmt) != GIMPLE_COND)
4372 return cfg_cleanup_needed;
4373 /* Look at both successors. Either it ends with a GIMPLE_COND
4374 and satisfies suitable_cond_bb, or ends with a cast and
4375 other_bb is that cast's successor. */
4376 FOR_EACH_EDGE (e, ei, first_bb->succs)
4377 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4378 || e->dest == first_bb)
4379 return cfg_cleanup_needed;
4380 else if (single_pred_p (e->dest))
4381 {
4382 stmt = last_stmt (e->dest);
4383 if (stmt
4384 && gimple_code (stmt) == GIMPLE_COND
4385 && EDGE_COUNT (e->dest->succs) == 2)
4386 {
4387 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4388 break;
4389 else
4390 other_bb = NULL;
4391 }
4392 else if (stmt
4393 && final_range_test_p (stmt)
4394 && find_edge (first_bb, single_succ (e->dest)))
4395 {
4396 other_bb = single_succ (e->dest);
4397 if (other_bb == first_bb)
4398 other_bb = NULL;
4399 }
4400 }
4401 if (other_bb == NULL)
4402 return cfg_cleanup_needed;
4403 }
4404 /* Now do the forward search, moving last_bb to successor bbs
4405 that aren't other_bb. */
4406 while (EDGE_COUNT (last_bb->succs) == 2)
4407 {
4408 FOR_EACH_EDGE (e, ei, last_bb->succs)
4409 if (e->dest != other_bb)
4410 break;
4411 if (e == NULL)
4412 break;
4413 if (!single_pred_p (e->dest))
4414 break;
4415 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4416 break;
4417 if (!no_side_effect_bb (e->dest))
4418 break;
4419 last_bb = e->dest;
4420 }
4421 if (first_bb == last_bb)
4422 return cfg_cleanup_needed;
4423 /* Here basic blocks first_bb through last_bb's predecessor
4424 end with GIMPLE_COND, all of them have one of the edges to
4425 other_bb and another to another block in the range,
4426 all blocks except first_bb don't have side-effects and
4427 last_bb ends with either GIMPLE_COND, or cast satisfying
4428 final_range_test_p. */
4429 for (bb = last_bb; ; bb = single_pred (bb))
4430 {
4431 enum tree_code code;
4432 tree lhs, rhs;
4433 inter_bb_range_test_entry bb_ent;
4434
4435 bb_ent.op = NULL_TREE;
4436 bb_ent.first_idx = ops.length ();
4437 bb_ent.last_idx = bb_ent.first_idx;
4438 e = find_edge (bb, other_bb);
4439 stmt = last_stmt (bb);
4440 gimple_set_visited (stmt, true);
4441 if (gimple_code (stmt) != GIMPLE_COND)
4442 {
4443 use_operand_p use_p;
4444 gimple *phi;
4445 edge e2;
4446 unsigned int d;
4447
4448 lhs = gimple_assign_lhs (stmt);
4449 rhs = gimple_assign_rhs1 (stmt);
4450 gcc_assert (bb == last_bb);
4451
4452 /* stmt is
4453 _123 = (int) _234;
4454 OR
4455 _234 = a_2(D) == 2;
4456
4457 followed by:
4458 <bb M>:
4459 # _345 = PHI <_123(N), 1(...), 1(...)>
4460
4461 or 0 instead of 1. If it is 0, the _234
4462 range test is anded together with all the
4463 other range tests, if it is 1, it is ored with
4464 them. */
4465 single_imm_use (lhs, &use_p, &phi);
4466 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4467 e2 = find_edge (first_bb, other_bb);
4468 d = e2->dest_idx;
4469 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4470 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4471 code = BIT_AND_EXPR;
4472 else
4473 {
4474 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4475 code = BIT_IOR_EXPR;
4476 }
4477
4478 /* If _234 SSA_NAME_DEF_STMT is
4479 _234 = _567 | _789;
4480 (or &, corresponding to 1/0 in the phi arguments,
4481 push into ops the individual range test arguments
4482 of the bitwise or resp. and, recursively. */
4483 if (TREE_CODE (rhs) == SSA_NAME
4484 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4485 != tcc_comparison)
4486 && !get_ops (rhs, code, &ops,
4487 loop_containing_stmt (stmt))
4488 && has_single_use (rhs))
4489 {
4490 /* Otherwise, push the _234 range test itself. */
4491 operand_entry *oe = operand_entry_pool.allocate ();
4492
4493 oe->op = rhs;
4494 oe->rank = code;
4495 oe->id = 0;
4496 oe->count = 1;
4497 oe->stmt_to_insert = NULL;
4498 ops.safe_push (oe);
4499 bb_ent.last_idx++;
4500 bb_ent.op = rhs;
4501 }
4502 else if (is_gimple_assign (stmt)
4503 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4504 == tcc_comparison)
4505 && !get_ops (lhs, code, &ops,
4506 loop_containing_stmt (stmt))
4507 && has_single_use (lhs))
4508 {
4509 operand_entry *oe = operand_entry_pool.allocate ();
4510 oe->op = lhs;
4511 oe->rank = code;
4512 oe->id = 0;
4513 oe->count = 1;
4514 ops.safe_push (oe);
4515 bb_ent.last_idx++;
4516 bb_ent.op = lhs;
4517 }
4518 else
4519 {
4520 bb_ent.last_idx = ops.length ();
4521 bb_ent.op = rhs;
4522 }
4523 bbinfo.safe_push (bb_ent);
4524 continue;
4525 }
4526 /* Otherwise stmt is GIMPLE_COND. */
4527 code = gimple_cond_code (stmt);
4528 lhs = gimple_cond_lhs (stmt);
4529 rhs = gimple_cond_rhs (stmt);
4530 if (TREE_CODE (lhs) == SSA_NAME
4531 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4532 && ((code != EQ_EXPR && code != NE_EXPR)
4533 || rhs != boolean_false_node
4534 /* Either push into ops the individual bitwise
4535 or resp. and operands, depending on which
4536 edge is other_bb. */
4537 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4538 ^ (code == EQ_EXPR))
4539 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4540 loop_containing_stmt (stmt))))
4541 {
4542 /* Or push the GIMPLE_COND stmt itself. */
4543 operand_entry *oe = operand_entry_pool.allocate ();
4544
4545 oe->op = NULL;
4546 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4547 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4548 /* oe->op = NULL signs that there is no SSA_NAME
4549 for the range test, and oe->id instead is the
4550 basic block number, at which's end the GIMPLE_COND
4551 is. */
4552 oe->id = bb->index;
4553 oe->count = 1;
4554 oe->stmt_to_insert = NULL;
4555 ops.safe_push (oe);
4556 bb_ent.op = NULL;
4557 bb_ent.last_idx++;
4558 }
4559 else if (ops.length () > bb_ent.first_idx)
4560 {
4561 bb_ent.op = lhs;
4562 bb_ent.last_idx = ops.length ();
4563 }
4564 bbinfo.safe_push (bb_ent);
4565 if (bb == first_bb)
4566 break;
4567 }
4568 if (ops.length () > 1)
4569 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4570 if (any_changes)
4571 {
4572 unsigned int idx, max_idx = 0;
4573 /* update_ops relies on has_single_use predicates returning the
4574 same values as it did during get_ops earlier. Additionally it
4575 never removes statements, only adds new ones and it should walk
4576 from the single imm use and check the predicate already before
4577 making those changes.
4578 On the other side, the handling of GIMPLE_COND directly can turn
4579 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4580 it needs to be done in a separate loop afterwards. */
4581 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4582 {
4583 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4584 && bbinfo[idx].op != NULL_TREE)
4585 {
4586 tree new_op;
4587
4588 max_idx = idx;
4589 stmt = last_stmt (bb);
4590 new_op = update_ops (bbinfo[idx].op,
4591 (enum tree_code)
4592 ops[bbinfo[idx].first_idx]->rank,
4593 ops, &bbinfo[idx].first_idx,
4594 loop_containing_stmt (stmt));
4595 if (new_op == NULL_TREE)
4596 {
4597 gcc_assert (bb == last_bb);
4598 new_op = ops[bbinfo[idx].first_idx++]->op;
4599 }
4600 if (bbinfo[idx].op != new_op)
4601 {
4602 imm_use_iterator iter;
4603 use_operand_p use_p;
4604 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4605
4606 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4607 if (is_gimple_debug (use_stmt))
4608 continue;
4609 else if (gimple_code (use_stmt) == GIMPLE_COND
4610 || gimple_code (use_stmt) == GIMPLE_PHI)
4611 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4612 SET_USE (use_p, new_op);
4613 else if ((is_gimple_assign (use_stmt)
4614 && (TREE_CODE_CLASS
4615 (gimple_assign_rhs_code (use_stmt))
4616 == tcc_comparison)))
4617 cast_or_tcc_cmp_stmt = use_stmt;
4618 else if (gimple_assign_cast_p (use_stmt))
4619 cast_or_tcc_cmp_stmt = use_stmt;
4620 else
4621 gcc_unreachable ();
4622
4623 if (cast_or_tcc_cmp_stmt)
4624 {
4625 gcc_assert (bb == last_bb);
4626 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4627 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4628 enum tree_code rhs_code
4629 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4630 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4631 : CONVERT_EXPR;
4632 gassign *g;
4633 if (is_gimple_min_invariant (new_op))
4634 {
4635 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4636 g = gimple_build_assign (new_lhs, new_op);
4637 }
4638 else
4639 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4640 gimple_stmt_iterator gsi
4641 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4642 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4643 gimple_set_visited (g, true);
4644 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4645 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4646 if (is_gimple_debug (use_stmt))
4647 continue;
4648 else if (gimple_code (use_stmt) == GIMPLE_COND
4649 || gimple_code (use_stmt) == GIMPLE_PHI)
4650 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4651 SET_USE (use_p, new_lhs);
4652 else
4653 gcc_unreachable ();
4654 }
4655 }
4656 }
4657 if (bb == first_bb)
4658 break;
4659 }
4660 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4661 {
4662 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4663 && bbinfo[idx].op == NULL_TREE
4664 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4665 {
4666 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4667
4668 if (idx > max_idx)
4669 max_idx = idx;
4670
4671 /* If we collapse the conditional to a true/false
4672 condition, then bubble that knowledge up to our caller. */
4673 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4674 {
4675 gimple_cond_make_false (cond_stmt);
4676 cfg_cleanup_needed = true;
4677 }
4678 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4679 {
4680 gimple_cond_make_true (cond_stmt);
4681 cfg_cleanup_needed = true;
4682 }
4683 else
4684 {
4685 gimple_cond_set_code (cond_stmt, NE_EXPR);
4686 gimple_cond_set_lhs (cond_stmt,
4687 ops[bbinfo[idx].first_idx]->op);
4688 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4689 }
4690 update_stmt (cond_stmt);
4691 }
4692 if (bb == first_bb)
4693 break;
4694 }
4695
4696 /* The above changes could result in basic blocks after the first
4697 modified one, up to and including last_bb, to be executed even if
4698 they would not be in the original program. If the value ranges of
4699 assignment lhs' in those bbs were dependent on the conditions
4700 guarding those basic blocks which now can change, the VRs might
4701 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4702 are only used within the same bb, it should be not a big deal if
4703 we just reset all the VRs in those bbs. See PR68671. */
4704 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4705 reset_flow_sensitive_info_in_bb (bb);
4706 }
4707 return cfg_cleanup_needed;
4708 }
4709
4710 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4711 of STMT in it's operands. This is also known as a "destructive
4712 update" operation. */
4713
4714 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4715 is_phi_for_stmt (gimple *stmt, tree operand)
4716 {
4717 gimple *def_stmt;
4718 gphi *def_phi;
4719 tree lhs;
4720 use_operand_p arg_p;
4721 ssa_op_iter i;
4722
4723 if (TREE_CODE (operand) != SSA_NAME)
4724 return false;
4725
4726 lhs = gimple_assign_lhs (stmt);
4727
4728 def_stmt = SSA_NAME_DEF_STMT (operand);
4729 def_phi = dyn_cast <gphi *> (def_stmt);
4730 if (!def_phi)
4731 return false;
4732
4733 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4734 if (lhs == USE_FROM_PTR (arg_p))
4735 return true;
4736 return false;
4737 }
4738
4739 /* Remove def stmt of VAR if VAR has zero uses and recurse
4740 on rhs1 operand if so. */
4741
4742 static void
remove_visited_stmt_chain(tree var)4743 remove_visited_stmt_chain (tree var)
4744 {
4745 gimple *stmt;
4746 gimple_stmt_iterator gsi;
4747
4748 while (1)
4749 {
4750 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4751 return;
4752 stmt = SSA_NAME_DEF_STMT (var);
4753 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4754 {
4755 var = gimple_assign_rhs1 (stmt);
4756 gsi = gsi_for_stmt (stmt);
4757 reassoc_remove_stmt (&gsi);
4758 release_defs (stmt);
4759 }
4760 else
4761 return;
4762 }
4763 }
4764
4765 /* This function checks three consequtive operands in
4766 passed operands vector OPS starting from OPINDEX and
4767 swaps two operands if it is profitable for binary operation
4768 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4769
4770 We pair ops with the same rank if possible.
4771
4772 The alternative we try is to see if STMT is a destructive
4773 update style statement, which is like:
4774 b = phi (a, ...)
4775 a = c + b;
4776 In that case, we want to use the destructive update form to
4777 expose the possible vectorizer sum reduction opportunity.
4778 In that case, the third operand will be the phi node. This
4779 check is not performed if STMT is null.
4780
4781 We could, of course, try to be better as noted above, and do a
4782 lot of work to try to find these opportunities in >3 operand
4783 cases, but it is unlikely to be worth it. */
4784
4785 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4786 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4787 unsigned int opindex, gimple *stmt)
4788 {
4789 operand_entry *oe1, *oe2, *oe3;
4790
4791 oe1 = ops[opindex];
4792 oe2 = ops[opindex + 1];
4793 oe3 = ops[opindex + 2];
4794
4795 if ((oe1->rank == oe2->rank
4796 && oe2->rank != oe3->rank)
4797 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4798 && !is_phi_for_stmt (stmt, oe1->op)
4799 && !is_phi_for_stmt (stmt, oe2->op)))
4800 std::swap (*oe1, *oe3);
4801 else if ((oe1->rank == oe3->rank
4802 && oe2->rank != oe3->rank)
4803 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4804 && !is_phi_for_stmt (stmt, oe1->op)
4805 && !is_phi_for_stmt (stmt, oe3->op)))
4806 std::swap (*oe1, *oe2);
4807 }
4808
4809 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4810 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
4811 whether RHS1 op RHS2 can be inserted before or needs to be inserted
4812 after the returned stmt. */
4813
4814 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2,bool & insert_before)4815 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
4816 {
4817 insert_before = true;
4818 if (TREE_CODE (rhs1) == SSA_NAME
4819 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4820 {
4821 stmt = SSA_NAME_DEF_STMT (rhs1);
4822 insert_before = false;
4823 }
4824 if (TREE_CODE (rhs2) == SSA_NAME
4825 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4826 {
4827 stmt = SSA_NAME_DEF_STMT (rhs2);
4828 insert_before = false;
4829 }
4830 return stmt;
4831 }
4832
4833 /* If the stmt that defines operand has to be inserted, insert it
4834 before the use. */
4835 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4836 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4837 {
4838 gcc_assert (is_gimple_assign (stmt_to_insert));
4839 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4840 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4841 bool insert_before;
4842 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
4843 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4844 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4845
4846 /* If the insert point is not stmt, then insert_point would be
4847 the point where operand rhs1 or rhs2 is defined. In this case,
4848 stmt_to_insert has to be inserted afterwards. This would
4849 only happen when the stmt insertion point is flexible. */
4850 if (insert_before)
4851 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4852 else
4853 insert_stmt_after (stmt_to_insert, insert_point);
4854 }
4855
4856
4857 /* Recursively rewrite our linearized statements so that the operators
4858 match those in OPS[OPINDEX], putting the computation in rank
4859 order. Return new lhs.
4860 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4861 the current stmt and during recursive invocations.
4862 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4863 recursive invocations. */
4864
4865 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4866 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
4867 vec<operand_entry *> ops, bool changed, bool next_changed)
4868 {
4869 tree rhs1 = gimple_assign_rhs1 (stmt);
4870 tree rhs2 = gimple_assign_rhs2 (stmt);
4871 tree lhs = gimple_assign_lhs (stmt);
4872 operand_entry *oe;
4873
4874 /* The final recursion case for this function is that you have
4875 exactly two operations left.
4876 If we had exactly one op in the entire list to start with, we
4877 would have never called this function, and the tail recursion
4878 rewrites them one at a time. */
4879 if (opindex + 2 == ops.length ())
4880 {
4881 operand_entry *oe1, *oe2;
4882
4883 oe1 = ops[opindex];
4884 oe2 = ops[opindex + 1];
4885
4886 if (rhs1 != oe1->op || rhs2 != oe2->op)
4887 {
4888 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4889 unsigned int uid = gimple_uid (stmt);
4890
4891 if (dump_file && (dump_flags & TDF_DETAILS))
4892 {
4893 fprintf (dump_file, "Transforming ");
4894 print_gimple_stmt (dump_file, stmt, 0);
4895 }
4896
4897 /* If the stmt that defines operand has to be inserted, insert it
4898 before the use. */
4899 if (oe1->stmt_to_insert)
4900 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4901 if (oe2->stmt_to_insert)
4902 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4903 /* Even when changed is false, reassociation could have e.g. removed
4904 some redundant operations, so unless we are just swapping the
4905 arguments or unless there is no change at all (then we just
4906 return lhs), force creation of a new SSA_NAME. */
4907 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4908 {
4909 bool insert_before;
4910 gimple *insert_point
4911 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
4912 lhs = make_ssa_name (TREE_TYPE (lhs));
4913 stmt
4914 = gimple_build_assign (lhs, rhs_code,
4915 oe1->op, oe2->op);
4916 gimple_set_uid (stmt, uid);
4917 gimple_set_visited (stmt, true);
4918 if (insert_before)
4919 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4920 else
4921 insert_stmt_after (stmt, insert_point);
4922 }
4923 else
4924 {
4925 bool insert_before;
4926 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
4927 insert_before)
4928 == stmt);
4929 gimple_assign_set_rhs1 (stmt, oe1->op);
4930 gimple_assign_set_rhs2 (stmt, oe2->op);
4931 update_stmt (stmt);
4932 }
4933
4934 if (rhs1 != oe1->op && rhs1 != oe2->op)
4935 remove_visited_stmt_chain (rhs1);
4936
4937 if (dump_file && (dump_flags & TDF_DETAILS))
4938 {
4939 fprintf (dump_file, " into ");
4940 print_gimple_stmt (dump_file, stmt, 0);
4941 }
4942 }
4943 return lhs;
4944 }
4945
4946 /* If we hit here, we should have 3 or more ops left. */
4947 gcc_assert (opindex + 2 < ops.length ());
4948
4949 /* Rewrite the next operator. */
4950 oe = ops[opindex];
4951
4952 /* If the stmt that defines operand has to be inserted, insert it
4953 before the use. */
4954 if (oe->stmt_to_insert)
4955 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4956
4957 /* Recurse on the LHS of the binary operator, which is guaranteed to
4958 be the non-leaf side. */
4959 tree new_rhs1
4960 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
4961 changed || oe->op != rhs2 || next_changed,
4962 false);
4963
4964 if (oe->op != rhs2 || new_rhs1 != rhs1)
4965 {
4966 if (dump_file && (dump_flags & TDF_DETAILS))
4967 {
4968 fprintf (dump_file, "Transforming ");
4969 print_gimple_stmt (dump_file, stmt, 0);
4970 }
4971
4972 /* If changed is false, this is either opindex == 0
4973 or all outer rhs2's were equal to corresponding oe->op,
4974 and powi_result is NULL.
4975 That means lhs is equivalent before and after reassociation.
4976 Otherwise ensure the old lhs SSA_NAME is not reused and
4977 create a new stmt as well, so that any debug stmts will be
4978 properly adjusted. */
4979 if (changed)
4980 {
4981 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4982 unsigned int uid = gimple_uid (stmt);
4983 bool insert_before;
4984 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
4985 insert_before);
4986
4987 lhs = make_ssa_name (TREE_TYPE (lhs));
4988 stmt = gimple_build_assign (lhs, rhs_code,
4989 new_rhs1, oe->op);
4990 gimple_set_uid (stmt, uid);
4991 gimple_set_visited (stmt, true);
4992 if (insert_before)
4993 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4994 else
4995 insert_stmt_after (stmt, insert_point);
4996 }
4997 else
4998 {
4999 bool insert_before;
5000 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5001 insert_before)
5002 == stmt);
5003 gimple_assign_set_rhs1 (stmt, new_rhs1);
5004 gimple_assign_set_rhs2 (stmt, oe->op);
5005 update_stmt (stmt);
5006 }
5007
5008 if (dump_file && (dump_flags & TDF_DETAILS))
5009 {
5010 fprintf (dump_file, " into ");
5011 print_gimple_stmt (dump_file, stmt, 0);
5012 }
5013 }
5014 return lhs;
5015 }
5016
5017 /* Find out how many cycles we need to compute statements chain.
5018 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5019 maximum number of independent statements we may execute per cycle. */
5020
5021 static int
get_required_cycles(int ops_num,int cpu_width)5022 get_required_cycles (int ops_num, int cpu_width)
5023 {
5024 int res;
5025 int elog;
5026 unsigned int rest;
5027
5028 /* While we have more than 2 * cpu_width operands
5029 we may reduce number of operands by cpu_width
5030 per cycle. */
5031 res = ops_num / (2 * cpu_width);
5032
5033 /* Remained operands count may be reduced twice per cycle
5034 until we have only one operand. */
5035 rest = (unsigned)(ops_num - res * cpu_width);
5036 elog = exact_log2 (rest);
5037 if (elog >= 0)
5038 res += elog;
5039 else
5040 res += floor_log2 (rest) + 1;
5041
5042 return res;
5043 }
5044
5045 /* Returns an optimal number of registers to use for computation of
5046 given statements. */
5047
5048 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5049 get_reassociation_width (int ops_num, enum tree_code opc,
5050 machine_mode mode)
5051 {
5052 int param_width = param_tree_reassoc_width;
5053 int width;
5054 int width_min;
5055 int cycles_best;
5056
5057 if (param_width > 0)
5058 width = param_width;
5059 else
5060 width = targetm.sched.reassociation_width (opc, mode);
5061
5062 if (width == 1)
5063 return width;
5064
5065 /* Get the minimal time required for sequence computation. */
5066 cycles_best = get_required_cycles (ops_num, width);
5067
5068 /* Check if we may use less width and still compute sequence for
5069 the same time. It will allow us to reduce registers usage.
5070 get_required_cycles is monotonically increasing with lower width
5071 so we can perform a binary search for the minimal width that still
5072 results in the optimal cycle count. */
5073 width_min = 1;
5074 while (width > width_min)
5075 {
5076 int width_mid = (width + width_min) / 2;
5077
5078 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5079 width = width_mid;
5080 else if (width_min < width_mid)
5081 width_min = width_mid;
5082 else
5083 break;
5084 }
5085
5086 return width;
5087 }
5088
5089 /* Recursively rewrite our linearized statements so that the operators
5090 match those in OPS[OPINDEX], putting the computation in rank
5091 order and trying to allow operations to be executed in
5092 parallel. */
5093
5094 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)5095 rewrite_expr_tree_parallel (gassign *stmt, int width,
5096 vec<operand_entry *> ops)
5097 {
5098 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5099 int op_num = ops.length ();
5100 gcc_assert (op_num > 0);
5101 int stmt_num = op_num - 1;
5102 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5103 int op_index = op_num - 1;
5104 int stmt_index = 0;
5105 int ready_stmts_end = 0;
5106 int i = 0;
5107 gimple *stmt1 = NULL, *stmt2 = NULL;
5108 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5109
5110 /* We start expression rewriting from the top statements.
5111 So, in this loop we create a full list of statements
5112 we will work with. */
5113 stmts[stmt_num - 1] = stmt;
5114 for (i = stmt_num - 2; i >= 0; i--)
5115 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5116
5117 for (i = 0; i < stmt_num; i++)
5118 {
5119 tree op1, op2;
5120
5121 /* Determine whether we should use results of
5122 already handled statements or not. */
5123 if (ready_stmts_end == 0
5124 && (i - stmt_index >= width || op_index < 1))
5125 ready_stmts_end = i;
5126
5127 /* Now we choose operands for the next statement. Non zero
5128 value in ready_stmts_end means here that we should use
5129 the result of already generated statements as new operand. */
5130 if (ready_stmts_end > 0)
5131 {
5132 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5133 if (ready_stmts_end > stmt_index)
5134 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5135 else if (op_index >= 0)
5136 {
5137 operand_entry *oe = ops[op_index--];
5138 stmt2 = oe->stmt_to_insert;
5139 op2 = oe->op;
5140 }
5141 else
5142 {
5143 gcc_assert (stmt_index < i);
5144 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5145 }
5146
5147 if (stmt_index >= ready_stmts_end)
5148 ready_stmts_end = 0;
5149 }
5150 else
5151 {
5152 if (op_index > 1)
5153 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5154 operand_entry *oe2 = ops[op_index--];
5155 operand_entry *oe1 = ops[op_index--];
5156 op2 = oe2->op;
5157 stmt2 = oe2->stmt_to_insert;
5158 op1 = oe1->op;
5159 stmt1 = oe1->stmt_to_insert;
5160 }
5161
5162 /* If we emit the last statement then we should put
5163 operands into the last statement. It will also
5164 break the loop. */
5165 if (op_index < 0 && stmt_index == i)
5166 i = stmt_num - 1;
5167
5168 if (dump_file && (dump_flags & TDF_DETAILS))
5169 {
5170 fprintf (dump_file, "Transforming ");
5171 print_gimple_stmt (dump_file, stmts[i], 0);
5172 }
5173
5174 /* If the stmt that defines operand has to be inserted, insert it
5175 before the use. */
5176 if (stmt1)
5177 insert_stmt_before_use (stmts[i], stmt1);
5178 if (stmt2)
5179 insert_stmt_before_use (stmts[i], stmt2);
5180 stmt1 = stmt2 = NULL;
5181
5182 /* We keep original statement only for the last one. All
5183 others are recreated. */
5184 if (i == stmt_num - 1)
5185 {
5186 gimple_assign_set_rhs1 (stmts[i], op1);
5187 gimple_assign_set_rhs2 (stmts[i], op2);
5188 update_stmt (stmts[i]);
5189 }
5190 else
5191 {
5192 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5193 gimple_set_visited (stmts[i], true);
5194 }
5195 if (dump_file && (dump_flags & TDF_DETAILS))
5196 {
5197 fprintf (dump_file, " into ");
5198 print_gimple_stmt (dump_file, stmts[i], 0);
5199 }
5200 }
5201
5202 remove_visited_stmt_chain (last_rhs1);
5203 }
5204
5205 /* Transform STMT, which is really (A +B) + (C + D) into the left
5206 linear form, ((A+B)+C)+D.
5207 Recurse on D if necessary. */
5208
5209 static void
linearize_expr(gimple * stmt)5210 linearize_expr (gimple *stmt)
5211 {
5212 gimple_stmt_iterator gsi;
5213 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5214 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5215 gimple *oldbinrhs = binrhs;
5216 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5217 gimple *newbinrhs = NULL;
5218 class loop *loop = loop_containing_stmt (stmt);
5219 tree lhs = gimple_assign_lhs (stmt);
5220
5221 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5222 && is_reassociable_op (binrhs, rhscode, loop));
5223
5224 gsi = gsi_for_stmt (stmt);
5225
5226 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5227 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5228 gimple_assign_rhs_code (binrhs),
5229 gimple_assign_lhs (binlhs),
5230 gimple_assign_rhs2 (binrhs));
5231 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5232 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5233 gimple_set_uid (binrhs, gimple_uid (stmt));
5234
5235 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5236 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5237
5238 if (dump_file && (dump_flags & TDF_DETAILS))
5239 {
5240 fprintf (dump_file, "Linearized: ");
5241 print_gimple_stmt (dump_file, stmt, 0);
5242 }
5243
5244 reassociate_stats.linearized++;
5245 update_stmt (stmt);
5246
5247 gsi = gsi_for_stmt (oldbinrhs);
5248 reassoc_remove_stmt (&gsi);
5249 release_defs (oldbinrhs);
5250
5251 gimple_set_visited (stmt, true);
5252 gimple_set_visited (binlhs, true);
5253 gimple_set_visited (binrhs, true);
5254
5255 /* Tail recurse on the new rhs if it still needs reassociation. */
5256 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5257 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5258 want to change the algorithm while converting to tuples. */
5259 linearize_expr (stmt);
5260 }
5261
5262 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5263 it. Otherwise, return NULL. */
5264
5265 static gimple *
get_single_immediate_use(tree lhs)5266 get_single_immediate_use (tree lhs)
5267 {
5268 use_operand_p immuse;
5269 gimple *immusestmt;
5270
5271 if (TREE_CODE (lhs) == SSA_NAME
5272 && single_imm_use (lhs, &immuse, &immusestmt)
5273 && is_gimple_assign (immusestmt))
5274 return immusestmt;
5275
5276 return NULL;
5277 }
5278
5279 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5280 representing the negated value. Insertions of any necessary
5281 instructions go before GSI.
5282 This function is recursive in that, if you hand it "a_5" as the
5283 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5284 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5285
5286 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5287 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5288 {
5289 gimple *negatedefstmt = NULL;
5290 tree resultofnegate;
5291 gimple_stmt_iterator gsi;
5292 unsigned int uid;
5293
5294 /* If we are trying to negate a name, defined by an add, negate the
5295 add operands instead. */
5296 if (TREE_CODE (tonegate) == SSA_NAME)
5297 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5298 if (TREE_CODE (tonegate) == SSA_NAME
5299 && is_gimple_assign (negatedefstmt)
5300 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5301 && has_single_use (gimple_assign_lhs (negatedefstmt))
5302 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5303 {
5304 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5305 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5306 tree lhs = gimple_assign_lhs (negatedefstmt);
5307 gimple *g;
5308
5309 gsi = gsi_for_stmt (negatedefstmt);
5310 rhs1 = negate_value (rhs1, &gsi);
5311
5312 gsi = gsi_for_stmt (negatedefstmt);
5313 rhs2 = negate_value (rhs2, &gsi);
5314
5315 gsi = gsi_for_stmt (negatedefstmt);
5316 lhs = make_ssa_name (TREE_TYPE (lhs));
5317 gimple_set_visited (negatedefstmt, true);
5318 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5319 gimple_set_uid (g, gimple_uid (negatedefstmt));
5320 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5321 return lhs;
5322 }
5323
5324 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5325 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5326 NULL_TREE, true, GSI_SAME_STMT);
5327 gsi = *gsip;
5328 uid = gimple_uid (gsi_stmt (gsi));
5329 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5330 {
5331 gimple *stmt = gsi_stmt (gsi);
5332 if (gimple_uid (stmt) != 0)
5333 break;
5334 gimple_set_uid (stmt, uid);
5335 }
5336 return resultofnegate;
5337 }
5338
5339 /* Return true if we should break up the subtract in STMT into an add
5340 with negate. This is true when we the subtract operands are really
5341 adds, or the subtract itself is used in an add expression. In
5342 either case, breaking up the subtract into an add with negate
5343 exposes the adds to reassociation. */
5344
5345 static bool
should_break_up_subtract(gimple * stmt)5346 should_break_up_subtract (gimple *stmt)
5347 {
5348 tree lhs = gimple_assign_lhs (stmt);
5349 tree binlhs = gimple_assign_rhs1 (stmt);
5350 tree binrhs = gimple_assign_rhs2 (stmt);
5351 gimple *immusestmt;
5352 class loop *loop = loop_containing_stmt (stmt);
5353
5354 if (TREE_CODE (binlhs) == SSA_NAME
5355 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5356 return true;
5357
5358 if (TREE_CODE (binrhs) == SSA_NAME
5359 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5360 return true;
5361
5362 if (TREE_CODE (lhs) == SSA_NAME
5363 && (immusestmt = get_single_immediate_use (lhs))
5364 && is_gimple_assign (immusestmt)
5365 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5366 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5367 && gimple_assign_rhs1 (immusestmt) == lhs)
5368 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5369 return true;
5370 return false;
5371 }
5372
5373 /* Transform STMT from A - B into A + -B. */
5374
5375 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5376 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5377 {
5378 tree rhs1 = gimple_assign_rhs1 (stmt);
5379 tree rhs2 = gimple_assign_rhs2 (stmt);
5380
5381 if (dump_file && (dump_flags & TDF_DETAILS))
5382 {
5383 fprintf (dump_file, "Breaking up subtract ");
5384 print_gimple_stmt (dump_file, stmt, 0);
5385 }
5386
5387 rhs2 = negate_value (rhs2, gsip);
5388 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5389 update_stmt (stmt);
5390 }
5391
5392 /* Determine whether STMT is a builtin call that raises an SSA name
5393 to an integer power and has only one use. If so, and this is early
5394 reassociation and unsafe math optimizations are permitted, place
5395 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5396 If any of these conditions does not hold, return FALSE. */
5397
5398 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5399 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5400 {
5401 tree arg1;
5402 REAL_VALUE_TYPE c, cint;
5403
5404 switch (gimple_call_combined_fn (stmt))
5405 {
5406 CASE_CFN_POW:
5407 if (flag_errno_math)
5408 return false;
5409
5410 *base = gimple_call_arg (stmt, 0);
5411 arg1 = gimple_call_arg (stmt, 1);
5412
5413 if (TREE_CODE (arg1) != REAL_CST)
5414 return false;
5415
5416 c = TREE_REAL_CST (arg1);
5417
5418 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5419 return false;
5420
5421 *exponent = real_to_integer (&c);
5422 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5423 if (!real_identical (&c, &cint))
5424 return false;
5425
5426 break;
5427
5428 CASE_CFN_POWI:
5429 *base = gimple_call_arg (stmt, 0);
5430 arg1 = gimple_call_arg (stmt, 1);
5431
5432 if (!tree_fits_shwi_p (arg1))
5433 return false;
5434
5435 *exponent = tree_to_shwi (arg1);
5436 break;
5437
5438 default:
5439 return false;
5440 }
5441
5442 /* Expanding negative exponents is generally unproductive, so we don't
5443 complicate matters with those. Exponents of zero and one should
5444 have been handled by expression folding. */
5445 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5446 return false;
5447
5448 return true;
5449 }
5450
5451 /* Try to derive and add operand entry for OP to *OPS. Return false if
5452 unsuccessful. */
5453
5454 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5455 try_special_add_to_ops (vec<operand_entry *> *ops,
5456 enum tree_code code,
5457 tree op, gimple* def_stmt)
5458 {
5459 tree base = NULL_TREE;
5460 HOST_WIDE_INT exponent = 0;
5461
5462 if (TREE_CODE (op) != SSA_NAME
5463 || ! has_single_use (op))
5464 return false;
5465
5466 if (code == MULT_EXPR
5467 && reassoc_insert_powi_p
5468 && flag_unsafe_math_optimizations
5469 && is_gimple_call (def_stmt)
5470 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5471 {
5472 add_repeat_to_ops_vec (ops, base, exponent);
5473 gimple_set_visited (def_stmt, true);
5474 return true;
5475 }
5476 else if (code == MULT_EXPR
5477 && is_gimple_assign (def_stmt)
5478 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5479 && !HONOR_SNANS (TREE_TYPE (op))
5480 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5481 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5482 {
5483 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5484 tree cst = build_minus_one_cst (TREE_TYPE (op));
5485 add_to_ops_vec (ops, rhs1);
5486 add_to_ops_vec (ops, cst);
5487 gimple_set_visited (def_stmt, true);
5488 return true;
5489 }
5490
5491 return false;
5492 }
5493
5494 /* Recursively linearize a binary expression that is the RHS of STMT.
5495 Place the operands of the expression tree in the vector named OPS. */
5496
5497 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5498 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5499 bool is_associative, bool set_visited)
5500 {
5501 tree binlhs = gimple_assign_rhs1 (stmt);
5502 tree binrhs = gimple_assign_rhs2 (stmt);
5503 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5504 bool binlhsisreassoc = false;
5505 bool binrhsisreassoc = false;
5506 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5507 class loop *loop = loop_containing_stmt (stmt);
5508
5509 if (set_visited)
5510 gimple_set_visited (stmt, true);
5511
5512 if (TREE_CODE (binlhs) == SSA_NAME)
5513 {
5514 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5515 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5516 && !stmt_could_throw_p (cfun, binlhsdef));
5517 }
5518
5519 if (TREE_CODE (binrhs) == SSA_NAME)
5520 {
5521 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5522 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5523 && !stmt_could_throw_p (cfun, binrhsdef));
5524 }
5525
5526 /* If the LHS is not reassociable, but the RHS is, we need to swap
5527 them. If neither is reassociable, there is nothing we can do, so
5528 just put them in the ops vector. If the LHS is reassociable,
5529 linearize it. If both are reassociable, then linearize the RHS
5530 and the LHS. */
5531
5532 if (!binlhsisreassoc)
5533 {
5534 /* If this is not a associative operation like division, give up. */
5535 if (!is_associative)
5536 {
5537 add_to_ops_vec (ops, binrhs);
5538 return;
5539 }
5540
5541 if (!binrhsisreassoc)
5542 {
5543 bool swap = false;
5544 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5545 /* If we add ops for the rhs we expect to be able to recurse
5546 to it via the lhs during expression rewrite so swap
5547 operands. */
5548 swap = true;
5549 else
5550 add_to_ops_vec (ops, binrhs);
5551
5552 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5553 add_to_ops_vec (ops, binlhs);
5554
5555 if (!swap)
5556 return;
5557 }
5558
5559 if (dump_file && (dump_flags & TDF_DETAILS))
5560 {
5561 fprintf (dump_file, "swapping operands of ");
5562 print_gimple_stmt (dump_file, stmt, 0);
5563 }
5564
5565 swap_ssa_operands (stmt,
5566 gimple_assign_rhs1_ptr (stmt),
5567 gimple_assign_rhs2_ptr (stmt));
5568 update_stmt (stmt);
5569
5570 if (dump_file && (dump_flags & TDF_DETAILS))
5571 {
5572 fprintf (dump_file, " is now ");
5573 print_gimple_stmt (dump_file, stmt, 0);
5574 }
5575 if (!binrhsisreassoc)
5576 return;
5577
5578 /* We want to make it so the lhs is always the reassociative op,
5579 so swap. */
5580 std::swap (binlhs, binrhs);
5581 }
5582 else if (binrhsisreassoc)
5583 {
5584 linearize_expr (stmt);
5585 binlhs = gimple_assign_rhs1 (stmt);
5586 binrhs = gimple_assign_rhs2 (stmt);
5587 }
5588
5589 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5590 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5591 rhscode, loop));
5592 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5593 is_associative, set_visited);
5594
5595 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5596 add_to_ops_vec (ops, binrhs);
5597 }
5598
5599 /* Repropagate the negates back into subtracts, since no other pass
5600 currently does it. */
5601
5602 static void
repropagate_negates(void)5603 repropagate_negates (void)
5604 {
5605 unsigned int i = 0;
5606 tree negate;
5607
5608 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5609 {
5610 gimple *user = get_single_immediate_use (negate);
5611 if (!user || !is_gimple_assign (user))
5612 continue;
5613
5614 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5615 if (TREE_CODE (negateop) == SSA_NAME
5616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
5617 continue;
5618
5619 /* The negate operand can be either operand of a PLUS_EXPR
5620 (it can be the LHS if the RHS is a constant for example).
5621
5622 Force the negate operand to the RHS of the PLUS_EXPR, then
5623 transform the PLUS_EXPR into a MINUS_EXPR. */
5624 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5625 {
5626 /* If the negated operand appears on the LHS of the
5627 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5628 to force the negated operand to the RHS of the PLUS_EXPR. */
5629 if (gimple_assign_rhs1 (user) == negate)
5630 {
5631 swap_ssa_operands (user,
5632 gimple_assign_rhs1_ptr (user),
5633 gimple_assign_rhs2_ptr (user));
5634 }
5635
5636 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5637 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5638 if (gimple_assign_rhs2 (user) == negate)
5639 {
5640 tree rhs1 = gimple_assign_rhs1 (user);
5641 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5642 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
5643 negateop);
5644 update_stmt (user);
5645 }
5646 }
5647 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5648 {
5649 if (gimple_assign_rhs1 (user) == negate)
5650 {
5651 /* We have
5652 x = -negateop
5653 y = x - b
5654 which we transform into
5655 x = negateop + b
5656 y = -x .
5657 This pushes down the negate which we possibly can merge
5658 into some other operation, hence insert it into the
5659 plus_negates vector. */
5660 gimple *feed = SSA_NAME_DEF_STMT (negate);
5661 tree b = gimple_assign_rhs2 (user);
5662 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5663 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5664 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5665 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
5666 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5667 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5668 user = gsi_stmt (gsi2);
5669 update_stmt (user);
5670 reassoc_remove_stmt (&gsi);
5671 release_defs (feed);
5672 plus_negates.safe_push (gimple_assign_lhs (user));
5673 }
5674 else
5675 {
5676 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
5677 getting rid of one operation. */
5678 tree rhs1 = gimple_assign_rhs1 (user);
5679 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5680 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
5681 update_stmt (gsi_stmt (gsi));
5682 }
5683 }
5684 }
5685 }
5686
5687 /* Returns true if OP is of a type for which we can do reassociation.
5688 That is for integral or non-saturating fixed-point types, and for
5689 floating point type when associative-math is enabled. */
5690
5691 static bool
can_reassociate_p(tree op)5692 can_reassociate_p (tree op)
5693 {
5694 tree type = TREE_TYPE (op);
5695 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5696 return false;
5697 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5698 || NON_SAT_FIXED_POINT_TYPE_P (type)
5699 || (flag_associative_math && FLOAT_TYPE_P (type)))
5700 return true;
5701 return false;
5702 }
5703
5704 /* Break up subtract operations in block BB.
5705
5706 We do this top down because we don't know whether the subtract is
5707 part of a possible chain of reassociation except at the top.
5708
5709 IE given
5710 d = f + g
5711 c = a + e
5712 b = c - d
5713 q = b - r
5714 k = t - q
5715
5716 we want to break up k = t - q, but we won't until we've transformed q
5717 = b - r, which won't be broken up until we transform b = c - d.
5718
5719 En passant, clear the GIMPLE visited flag on every statement
5720 and set UIDs within each basic block. */
5721
5722 static void
break_up_subtract_bb(basic_block bb)5723 break_up_subtract_bb (basic_block bb)
5724 {
5725 gimple_stmt_iterator gsi;
5726 basic_block son;
5727 unsigned int uid = 1;
5728
5729 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5730 {
5731 gimple *stmt = gsi_stmt (gsi);
5732 gimple_set_visited (stmt, false);
5733 gimple_set_uid (stmt, uid++);
5734
5735 if (!is_gimple_assign (stmt)
5736 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5737 continue;
5738
5739 /* Look for simple gimple subtract operations. */
5740 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5741 {
5742 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5743 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5744 continue;
5745
5746 /* Check for a subtract used only in an addition. If this
5747 is the case, transform it into add of a negate for better
5748 reassociation. IE transform C = A-B into C = A + -B if C
5749 is only used in an addition. */
5750 if (should_break_up_subtract (stmt))
5751 break_up_subtract (stmt, &gsi);
5752 }
5753 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5754 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5755 plus_negates.safe_push (gimple_assign_lhs (stmt));
5756 }
5757 for (son = first_dom_son (CDI_DOMINATORS, bb);
5758 son;
5759 son = next_dom_son (CDI_DOMINATORS, son))
5760 break_up_subtract_bb (son);
5761 }
5762
5763 /* Used for repeated factor analysis. */
5764 struct repeat_factor
5765 {
5766 /* An SSA name that occurs in a multiply chain. */
5767 tree factor;
5768
5769 /* Cached rank of the factor. */
5770 unsigned rank;
5771
5772 /* Number of occurrences of the factor in the chain. */
5773 HOST_WIDE_INT count;
5774
5775 /* An SSA name representing the product of this factor and
5776 all factors appearing later in the repeated factor vector. */
5777 tree repr;
5778 };
5779
5780
5781 static vec<repeat_factor> repeat_factor_vec;
5782
5783 /* Used for sorting the repeat factor vector. Sort primarily by
5784 ascending occurrence count, secondarily by descending rank. */
5785
5786 static int
compare_repeat_factors(const void * x1,const void * x2)5787 compare_repeat_factors (const void *x1, const void *x2)
5788 {
5789 const repeat_factor *rf1 = (const repeat_factor *) x1;
5790 const repeat_factor *rf2 = (const repeat_factor *) x2;
5791
5792 if (rf1->count != rf2->count)
5793 return rf1->count - rf2->count;
5794
5795 return rf2->rank - rf1->rank;
5796 }
5797
5798 /* Look for repeated operands in OPS in the multiply tree rooted at
5799 STMT. Replace them with an optimal sequence of multiplies and powi
5800 builtin calls, and remove the used operands from OPS. Return an
5801 SSA name representing the value of the replacement sequence. */
5802
5803 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5804 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5805 {
5806 unsigned i, j, vec_len;
5807 int ii;
5808 operand_entry *oe;
5809 repeat_factor *rf1, *rf2;
5810 repeat_factor rfnew;
5811 tree result = NULL_TREE;
5812 tree target_ssa, iter_result;
5813 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5814 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5815 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5816 gimple *mul_stmt, *pow_stmt;
5817
5818 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5819 target. */
5820 if (!powi_fndecl)
5821 return NULL_TREE;
5822
5823 /* Allocate the repeated factor vector. */
5824 repeat_factor_vec.create (10);
5825
5826 /* Scan the OPS vector for all SSA names in the product and build
5827 up a vector of occurrence counts for each factor. */
5828 FOR_EACH_VEC_ELT (*ops, i, oe)
5829 {
5830 if (TREE_CODE (oe->op) == SSA_NAME)
5831 {
5832 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5833 {
5834 if (rf1->factor == oe->op)
5835 {
5836 rf1->count += oe->count;
5837 break;
5838 }
5839 }
5840
5841 if (j >= repeat_factor_vec.length ())
5842 {
5843 rfnew.factor = oe->op;
5844 rfnew.rank = oe->rank;
5845 rfnew.count = oe->count;
5846 rfnew.repr = NULL_TREE;
5847 repeat_factor_vec.safe_push (rfnew);
5848 }
5849 }
5850 }
5851
5852 /* Sort the repeated factor vector by (a) increasing occurrence count,
5853 and (b) decreasing rank. */
5854 repeat_factor_vec.qsort (compare_repeat_factors);
5855
5856 /* It is generally best to combine as many base factors as possible
5857 into a product before applying __builtin_powi to the result.
5858 However, the sort order chosen for the repeated factor vector
5859 allows us to cache partial results for the product of the base
5860 factors for subsequent use. When we already have a cached partial
5861 result from a previous iteration, it is best to make use of it
5862 before looking for another __builtin_pow opportunity.
5863
5864 As an example, consider x * x * y * y * y * z * z * z * z.
5865 We want to first compose the product x * y * z, raise it to the
5866 second power, then multiply this by y * z, and finally multiply
5867 by z. This can be done in 5 multiplies provided we cache y * z
5868 for use in both expressions:
5869
5870 t1 = y * z
5871 t2 = t1 * x
5872 t3 = t2 * t2
5873 t4 = t1 * t3
5874 result = t4 * z
5875
5876 If we instead ignored the cached y * z and first multiplied by
5877 the __builtin_pow opportunity z * z, we would get the inferior:
5878
5879 t1 = y * z
5880 t2 = t1 * x
5881 t3 = t2 * t2
5882 t4 = z * z
5883 t5 = t3 * t4
5884 result = t5 * y */
5885
5886 vec_len = repeat_factor_vec.length ();
5887
5888 /* Repeatedly look for opportunities to create a builtin_powi call. */
5889 while (true)
5890 {
5891 HOST_WIDE_INT power;
5892
5893 /* First look for the largest cached product of factors from
5894 preceding iterations. If found, create a builtin_powi for
5895 it if the minimum occurrence count for its factors is at
5896 least 2, or just use this cached product as our next
5897 multiplicand if the minimum occurrence count is 1. */
5898 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5899 {
5900 if (rf1->repr && rf1->count > 0)
5901 break;
5902 }
5903
5904 if (j < vec_len)
5905 {
5906 power = rf1->count;
5907
5908 if (power == 1)
5909 {
5910 iter_result = rf1->repr;
5911
5912 if (dump_file && (dump_flags & TDF_DETAILS))
5913 {
5914 unsigned elt;
5915 repeat_factor *rf;
5916 fputs ("Multiplying by cached product ", dump_file);
5917 for (elt = j; elt < vec_len; elt++)
5918 {
5919 rf = &repeat_factor_vec[elt];
5920 print_generic_expr (dump_file, rf->factor);
5921 if (elt < vec_len - 1)
5922 fputs (" * ", dump_file);
5923 }
5924 fputs ("\n", dump_file);
5925 }
5926 }
5927 else
5928 {
5929 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5930 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5931 build_int_cst (integer_type_node,
5932 power));
5933 gimple_call_set_lhs (pow_stmt, iter_result);
5934 gimple_set_location (pow_stmt, gimple_location (stmt));
5935 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5936 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5937
5938 if (dump_file && (dump_flags & TDF_DETAILS))
5939 {
5940 unsigned elt;
5941 repeat_factor *rf;
5942 fputs ("Building __builtin_pow call for cached product (",
5943 dump_file);
5944 for (elt = j; elt < vec_len; elt++)
5945 {
5946 rf = &repeat_factor_vec[elt];
5947 print_generic_expr (dump_file, rf->factor);
5948 if (elt < vec_len - 1)
5949 fputs (" * ", dump_file);
5950 }
5951 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5952 power);
5953 }
5954 }
5955 }
5956 else
5957 {
5958 /* Otherwise, find the first factor in the repeated factor
5959 vector whose occurrence count is at least 2. If no such
5960 factor exists, there are no builtin_powi opportunities
5961 remaining. */
5962 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5963 {
5964 if (rf1->count >= 2)
5965 break;
5966 }
5967
5968 if (j >= vec_len)
5969 break;
5970
5971 power = rf1->count;
5972
5973 if (dump_file && (dump_flags & TDF_DETAILS))
5974 {
5975 unsigned elt;
5976 repeat_factor *rf;
5977 fputs ("Building __builtin_pow call for (", dump_file);
5978 for (elt = j; elt < vec_len; elt++)
5979 {
5980 rf = &repeat_factor_vec[elt];
5981 print_generic_expr (dump_file, rf->factor);
5982 if (elt < vec_len - 1)
5983 fputs (" * ", dump_file);
5984 }
5985 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5986 }
5987
5988 reassociate_stats.pows_created++;
5989
5990 /* Visit each element of the vector in reverse order (so that
5991 high-occurrence elements are visited first, and within the
5992 same occurrence count, lower-ranked elements are visited
5993 first). Form a linear product of all elements in this order
5994 whose occurrencce count is at least that of element J.
5995 Record the SSA name representing the product of each element
5996 with all subsequent elements in the vector. */
5997 if (j == vec_len - 1)
5998 rf1->repr = rf1->factor;
5999 else
6000 {
6001 for (ii = vec_len - 2; ii >= (int)j; ii--)
6002 {
6003 tree op1, op2;
6004
6005 rf1 = &repeat_factor_vec[ii];
6006 rf2 = &repeat_factor_vec[ii + 1];
6007
6008 /* Init the last factor's representative to be itself. */
6009 if (!rf2->repr)
6010 rf2->repr = rf2->factor;
6011
6012 op1 = rf1->factor;
6013 op2 = rf2->repr;
6014
6015 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6016 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6017 op1, op2);
6018 gimple_set_location (mul_stmt, gimple_location (stmt));
6019 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6020 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6021 rf1->repr = target_ssa;
6022
6023 /* Don't reprocess the multiply we just introduced. */
6024 gimple_set_visited (mul_stmt, true);
6025 }
6026 }
6027
6028 /* Form a call to __builtin_powi for the maximum product
6029 just formed, raised to the power obtained earlier. */
6030 rf1 = &repeat_factor_vec[j];
6031 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6032 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6033 build_int_cst (integer_type_node,
6034 power));
6035 gimple_call_set_lhs (pow_stmt, iter_result);
6036 gimple_set_location (pow_stmt, gimple_location (stmt));
6037 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6038 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6039 }
6040
6041 /* If we previously formed at least one other builtin_powi call,
6042 form the product of this one and those others. */
6043 if (result)
6044 {
6045 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6046 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6047 result, iter_result);
6048 gimple_set_location (mul_stmt, gimple_location (stmt));
6049 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6050 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6051 gimple_set_visited (mul_stmt, true);
6052 result = new_result;
6053 }
6054 else
6055 result = iter_result;
6056
6057 /* Decrement the occurrence count of each element in the product
6058 by the count found above, and remove this many copies of each
6059 factor from OPS. */
6060 for (i = j; i < vec_len; i++)
6061 {
6062 unsigned k = power;
6063 unsigned n;
6064
6065 rf1 = &repeat_factor_vec[i];
6066 rf1->count -= power;
6067
6068 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6069 {
6070 if (oe->op == rf1->factor)
6071 {
6072 if (oe->count <= k)
6073 {
6074 ops->ordered_remove (n);
6075 k -= oe->count;
6076
6077 if (k == 0)
6078 break;
6079 }
6080 else
6081 {
6082 oe->count -= k;
6083 break;
6084 }
6085 }
6086 }
6087 }
6088 }
6089
6090 /* At this point all elements in the repeated factor vector have a
6091 remaining occurrence count of 0 or 1, and those with a count of 1
6092 don't have cached representatives. Re-sort the ops vector and
6093 clean up. */
6094 ops->qsort (sort_by_operand_rank);
6095 repeat_factor_vec.release ();
6096
6097 /* Return the final product computed herein. Note that there may
6098 still be some elements with single occurrence count left in OPS;
6099 those will be handled by the normal reassociation logic. */
6100 return result;
6101 }
6102
6103 /* Attempt to optimize
6104 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6105 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6106
6107 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6108 attempt_builtin_copysign (vec<operand_entry *> *ops)
6109 {
6110 operand_entry *oe;
6111 unsigned int i;
6112 unsigned int length = ops->length ();
6113 tree cst = ops->last ()->op;
6114
6115 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6116 return;
6117
6118 FOR_EACH_VEC_ELT (*ops, i, oe)
6119 {
6120 if (TREE_CODE (oe->op) == SSA_NAME
6121 && has_single_use (oe->op))
6122 {
6123 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6124 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6125 {
6126 tree arg0, arg1;
6127 switch (gimple_call_combined_fn (old_call))
6128 {
6129 CASE_CFN_COPYSIGN:
6130 CASE_CFN_COPYSIGN_FN:
6131 arg0 = gimple_call_arg (old_call, 0);
6132 arg1 = gimple_call_arg (old_call, 1);
6133 /* The first argument of copysign must be a constant,
6134 otherwise there's nothing to do. */
6135 if (TREE_CODE (arg0) == REAL_CST)
6136 {
6137 tree type = TREE_TYPE (arg0);
6138 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6139 /* If we couldn't fold to a single constant, skip it.
6140 That happens e.g. for inexact multiplication when
6141 -frounding-math. */
6142 if (mul == NULL_TREE)
6143 break;
6144 /* Instead of adjusting OLD_CALL, let's build a new
6145 call to not leak the LHS and prevent keeping bogus
6146 debug statements. DCE will clean up the old call. */
6147 gcall *new_call;
6148 if (gimple_call_internal_p (old_call))
6149 new_call = gimple_build_call_internal
6150 (IFN_COPYSIGN, 2, mul, arg1);
6151 else
6152 new_call = gimple_build_call
6153 (gimple_call_fndecl (old_call), 2, mul, arg1);
6154 tree lhs = make_ssa_name (type);
6155 gimple_call_set_lhs (new_call, lhs);
6156 gimple_set_location (new_call,
6157 gimple_location (old_call));
6158 insert_stmt_after (new_call, old_call);
6159 /* We've used the constant, get rid of it. */
6160 ops->pop ();
6161 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6162 /* Handle the CST1 < 0 case by negating the result. */
6163 if (cst1_neg)
6164 {
6165 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6166 gimple *negate_stmt
6167 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6168 insert_stmt_after (negate_stmt, new_call);
6169 oe->op = negrhs;
6170 }
6171 else
6172 oe->op = lhs;
6173 if (dump_file && (dump_flags & TDF_DETAILS))
6174 {
6175 fprintf (dump_file, "Optimizing copysign: ");
6176 print_generic_expr (dump_file, cst);
6177 fprintf (dump_file, " * COPYSIGN (");
6178 print_generic_expr (dump_file, arg0);
6179 fprintf (dump_file, ", ");
6180 print_generic_expr (dump_file, arg1);
6181 fprintf (dump_file, ") into %sCOPYSIGN (",
6182 cst1_neg ? "-" : "");
6183 print_generic_expr (dump_file, mul);
6184 fprintf (dump_file, ", ");
6185 print_generic_expr (dump_file, arg1);
6186 fprintf (dump_file, "\n");
6187 }
6188 return;
6189 }
6190 break;
6191 default:
6192 break;
6193 }
6194 }
6195 }
6196 }
6197 }
6198
6199 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6200
6201 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6202 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6203 {
6204 tree rhs1;
6205
6206 if (dump_file && (dump_flags & TDF_DETAILS))
6207 {
6208 fprintf (dump_file, "Transforming ");
6209 print_gimple_stmt (dump_file, stmt, 0);
6210 }
6211
6212 rhs1 = gimple_assign_rhs1 (stmt);
6213 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6214 update_stmt (stmt);
6215 remove_visited_stmt_chain (rhs1);
6216
6217 if (dump_file && (dump_flags & TDF_DETAILS))
6218 {
6219 fprintf (dump_file, " into ");
6220 print_gimple_stmt (dump_file, stmt, 0);
6221 }
6222 }
6223
6224 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6225
6226 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6227 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6228 tree rhs1, tree rhs2)
6229 {
6230 if (dump_file && (dump_flags & TDF_DETAILS))
6231 {
6232 fprintf (dump_file, "Transforming ");
6233 print_gimple_stmt (dump_file, stmt, 0);
6234 }
6235
6236 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6237 update_stmt (gsi_stmt (*gsi));
6238 remove_visited_stmt_chain (rhs1);
6239
6240 if (dump_file && (dump_flags & TDF_DETAILS))
6241 {
6242 fprintf (dump_file, " into ");
6243 print_gimple_stmt (dump_file, stmt, 0);
6244 }
6245 }
6246
6247 /* Reassociate expressions in basic block BB and its post-dominator as
6248 children.
6249
6250 Bubble up return status from maybe_optimize_range_tests. */
6251
6252 static bool
reassociate_bb(basic_block bb)6253 reassociate_bb (basic_block bb)
6254 {
6255 gimple_stmt_iterator gsi;
6256 basic_block son;
6257 gimple *stmt = last_stmt (bb);
6258 bool cfg_cleanup_needed = false;
6259
6260 if (stmt && !gimple_visited_p (stmt))
6261 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6262
6263 bool do_prev = false;
6264 for (gsi = gsi_last_bb (bb);
6265 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6266 {
6267 do_prev = true;
6268 stmt = gsi_stmt (gsi);
6269
6270 if (is_gimple_assign (stmt)
6271 && !stmt_could_throw_p (cfun, stmt))
6272 {
6273 tree lhs, rhs1, rhs2;
6274 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6275
6276 /* If this was part of an already processed statement,
6277 we don't need to touch it again. */
6278 if (gimple_visited_p (stmt))
6279 {
6280 /* This statement might have become dead because of previous
6281 reassociations. */
6282 if (has_zero_uses (gimple_get_lhs (stmt)))
6283 {
6284 reassoc_remove_stmt (&gsi);
6285 release_defs (stmt);
6286 /* We might end up removing the last stmt above which
6287 places the iterator to the end of the sequence.
6288 Reset it to the last stmt in this case and make sure
6289 we don't do gsi_prev in that case. */
6290 if (gsi_end_p (gsi))
6291 {
6292 gsi = gsi_last_bb (bb);
6293 do_prev = false;
6294 }
6295 }
6296 continue;
6297 }
6298
6299 /* If this is not a gimple binary expression, there is
6300 nothing for us to do with it. */
6301 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6302 continue;
6303
6304 lhs = gimple_assign_lhs (stmt);
6305 rhs1 = gimple_assign_rhs1 (stmt);
6306 rhs2 = gimple_assign_rhs2 (stmt);
6307
6308 /* For non-bit or min/max operations we can't associate
6309 all types. Verify that here. */
6310 if (rhs_code != BIT_IOR_EXPR
6311 && rhs_code != BIT_AND_EXPR
6312 && rhs_code != BIT_XOR_EXPR
6313 && rhs_code != MIN_EXPR
6314 && rhs_code != MAX_EXPR
6315 && (!can_reassociate_p (lhs)
6316 || !can_reassociate_p (rhs1)
6317 || !can_reassociate_p (rhs2)))
6318 continue;
6319
6320 if (associative_tree_code (rhs_code))
6321 {
6322 auto_vec<operand_entry *> ops;
6323 tree powi_result = NULL_TREE;
6324 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6325
6326 /* There may be no immediate uses left by the time we
6327 get here because we may have eliminated them all. */
6328 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6329 continue;
6330
6331 gimple_set_visited (stmt, true);
6332 linearize_expr_tree (&ops, stmt, true, true);
6333 ops.qsort (sort_by_operand_rank);
6334 int orig_len = ops.length ();
6335 optimize_ops_list (rhs_code, &ops);
6336 if (undistribute_ops_list (rhs_code, &ops,
6337 loop_containing_stmt (stmt)))
6338 {
6339 ops.qsort (sort_by_operand_rank);
6340 optimize_ops_list (rhs_code, &ops);
6341 }
6342 if (undistribute_bitref_for_vector (rhs_code, &ops,
6343 loop_containing_stmt (stmt)))
6344 {
6345 ops.qsort (sort_by_operand_rank);
6346 optimize_ops_list (rhs_code, &ops);
6347 }
6348 if (rhs_code == PLUS_EXPR
6349 && transform_add_to_multiply (&ops))
6350 ops.qsort (sort_by_operand_rank);
6351
6352 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6353 {
6354 if (is_vector)
6355 optimize_vec_cond_expr (rhs_code, &ops);
6356 else
6357 optimize_range_tests (rhs_code, &ops, NULL);
6358 }
6359
6360 if (rhs_code == MULT_EXPR && !is_vector)
6361 {
6362 attempt_builtin_copysign (&ops);
6363
6364 if (reassoc_insert_powi_p
6365 && flag_unsafe_math_optimizations)
6366 powi_result = attempt_builtin_powi (stmt, &ops);
6367 }
6368
6369 operand_entry *last;
6370 bool negate_result = false;
6371 if (ops.length () > 1
6372 && rhs_code == MULT_EXPR)
6373 {
6374 last = ops.last ();
6375 if ((integer_minus_onep (last->op)
6376 || real_minus_onep (last->op))
6377 && !HONOR_SNANS (TREE_TYPE (lhs))
6378 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6379 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6380 {
6381 ops.pop ();
6382 negate_result = true;
6383 }
6384 }
6385
6386 tree new_lhs = lhs;
6387 /* If the operand vector is now empty, all operands were
6388 consumed by the __builtin_powi optimization. */
6389 if (ops.length () == 0)
6390 transform_stmt_to_copy (&gsi, stmt, powi_result);
6391 else if (ops.length () == 1)
6392 {
6393 tree last_op = ops.last ()->op;
6394
6395 /* If the stmt that defines operand has to be inserted, insert it
6396 before the use. */
6397 if (ops.last ()->stmt_to_insert)
6398 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6399 if (powi_result)
6400 transform_stmt_to_multiply (&gsi, stmt, last_op,
6401 powi_result);
6402 else
6403 transform_stmt_to_copy (&gsi, stmt, last_op);
6404 }
6405 else
6406 {
6407 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6408 int ops_num = ops.length ();
6409 int width;
6410
6411 /* For binary bit operations, if there are at least 3
6412 operands and the last operand in OPS is a constant,
6413 move it to the front. This helps ensure that we generate
6414 (X & Y) & C rather than (X & C) & Y. The former will
6415 often match a canonical bit test when we get to RTL. */
6416 if (ops.length () > 2
6417 && (rhs_code == BIT_AND_EXPR
6418 || rhs_code == BIT_IOR_EXPR
6419 || rhs_code == BIT_XOR_EXPR)
6420 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6421 std::swap (*ops[0], *ops[ops_num - 1]);
6422
6423 /* Only rewrite the expression tree to parallel in the
6424 last reassoc pass to avoid useless work back-and-forth
6425 with initial linearization. */
6426 if (!reassoc_insert_powi_p
6427 && ops.length () > 3
6428 && (width = get_reassociation_width (ops_num, rhs_code,
6429 mode)) > 1)
6430 {
6431 if (dump_file && (dump_flags & TDF_DETAILS))
6432 fprintf (dump_file,
6433 "Width = %d was chosen for reassociation\n",
6434 width);
6435 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6436 width, ops);
6437 }
6438 else
6439 {
6440 /* When there are three operands left, we want
6441 to make sure the ones that get the double
6442 binary op are chosen wisely. */
6443 int len = ops.length ();
6444 if (len >= 3)
6445 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6446
6447 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6448 powi_result != NULL
6449 || negate_result,
6450 len != orig_len);
6451 }
6452
6453 /* If we combined some repeated factors into a
6454 __builtin_powi call, multiply that result by the
6455 reassociated operands. */
6456 if (powi_result)
6457 {
6458 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6459 tree type = TREE_TYPE (lhs);
6460 tree target_ssa = make_temp_ssa_name (type, NULL,
6461 "reassocpow");
6462 gimple_set_lhs (lhs_stmt, target_ssa);
6463 update_stmt (lhs_stmt);
6464 if (lhs != new_lhs)
6465 {
6466 target_ssa = new_lhs;
6467 new_lhs = lhs;
6468 }
6469 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6470 powi_result, target_ssa);
6471 gimple_set_location (mul_stmt, gimple_location (stmt));
6472 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6473 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6474 }
6475 }
6476
6477 if (negate_result)
6478 {
6479 stmt = SSA_NAME_DEF_STMT (lhs);
6480 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6481 gimple_set_lhs (stmt, tmp);
6482 if (lhs != new_lhs)
6483 tmp = new_lhs;
6484 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6485 tmp);
6486 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6487 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6488 update_stmt (stmt);
6489 }
6490 }
6491 }
6492 }
6493 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6494 son;
6495 son = next_dom_son (CDI_POST_DOMINATORS, son))
6496 cfg_cleanup_needed |= reassociate_bb (son);
6497
6498 return cfg_cleanup_needed;
6499 }
6500
6501 /* Add jumps around shifts for range tests turned into bit tests.
6502 For each SSA_NAME VAR we have code like:
6503 VAR = ...; // final stmt of range comparison
6504 // bit test here...;
6505 OTHERVAR = ...; // final stmt of the bit test sequence
6506 RES = VAR | OTHERVAR;
6507 Turn the above into:
6508 VAR = ...;
6509 if (VAR != 0)
6510 goto <l3>;
6511 else
6512 goto <l2>;
6513 <l2>:
6514 // bit test here...;
6515 OTHERVAR = ...;
6516 <l3>:
6517 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6518
6519 static void
branch_fixup(void)6520 branch_fixup (void)
6521 {
6522 tree var;
6523 unsigned int i;
6524
6525 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6526 {
6527 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6528 gimple *use_stmt;
6529 use_operand_p use;
6530 bool ok = single_imm_use (var, &use, &use_stmt);
6531 gcc_assert (ok
6532 && is_gimple_assign (use_stmt)
6533 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6534 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6535
6536 basic_block cond_bb = gimple_bb (def_stmt);
6537 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6538 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6539
6540 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6541 gimple *g = gimple_build_cond (NE_EXPR, var,
6542 build_zero_cst (TREE_TYPE (var)),
6543 NULL_TREE, NULL_TREE);
6544 location_t loc = gimple_location (use_stmt);
6545 gimple_set_location (g, loc);
6546 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6547
6548 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6549 etrue->probability = profile_probability::even ();
6550 edge efalse = find_edge (cond_bb, then_bb);
6551 efalse->flags = EDGE_FALSE_VALUE;
6552 efalse->probability -= etrue->probability;
6553 then_bb->count -= etrue->count ();
6554
6555 tree othervar = NULL_TREE;
6556 if (gimple_assign_rhs1 (use_stmt) == var)
6557 othervar = gimple_assign_rhs2 (use_stmt);
6558 else if (gimple_assign_rhs2 (use_stmt) == var)
6559 othervar = gimple_assign_rhs1 (use_stmt);
6560 else
6561 gcc_unreachable ();
6562 tree lhs = gimple_assign_lhs (use_stmt);
6563 gphi *phi = create_phi_node (lhs, merge_bb);
6564 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6565 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6566 gsi = gsi_for_stmt (use_stmt);
6567 gsi_remove (&gsi, true);
6568
6569 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6570 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6571 }
6572 reassoc_branch_fixups.release ();
6573 }
6574
6575 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6576 void debug_ops_vector (vec<operand_entry *> ops);
6577
6578 /* Dump the operand entry vector OPS to FILE. */
6579
6580 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6581 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6582 {
6583 operand_entry *oe;
6584 unsigned int i;
6585
6586 FOR_EACH_VEC_ELT (ops, i, oe)
6587 {
6588 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6589 print_generic_expr (file, oe->op);
6590 fprintf (file, "\n");
6591 }
6592 }
6593
6594 /* Dump the operand entry vector OPS to STDERR. */
6595
6596 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6597 debug_ops_vector (vec<operand_entry *> ops)
6598 {
6599 dump_ops_vector (stderr, ops);
6600 }
6601
6602 /* Bubble up return status from reassociate_bb. */
6603
6604 static bool
do_reassoc(void)6605 do_reassoc (void)
6606 {
6607 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6608 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6609 }
6610
6611 /* Initialize the reassociation pass. */
6612
6613 static void
init_reassoc(void)6614 init_reassoc (void)
6615 {
6616 int i;
6617 int64_t rank = 2;
6618 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6619
6620 /* Find the loops, so that we can prevent moving calculations in
6621 them. */
6622 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6623
6624 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6625
6626 next_operand_entry_id = 0;
6627
6628 /* Reverse RPO (Reverse Post Order) will give us something where
6629 deeper loops come later. */
6630 pre_and_rev_post_order_compute (NULL, bbs, false);
6631 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6632 operand_rank = new hash_map<tree, int64_t>;
6633
6634 /* Give each default definition a distinct rank. This includes
6635 parameters and the static chain. Walk backwards over all
6636 SSA names so that we get proper rank ordering according
6637 to tree_swap_operands_p. */
6638 for (i = num_ssa_names - 1; i > 0; --i)
6639 {
6640 tree name = ssa_name (i);
6641 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6642 insert_operand_rank (name, ++rank);
6643 }
6644
6645 /* Set up rank for each BB */
6646 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6647 bb_rank[bbs[i]] = ++rank << 16;
6648
6649 free (bbs);
6650 calculate_dominance_info (CDI_POST_DOMINATORS);
6651 plus_negates = vNULL;
6652 }
6653
6654 /* Cleanup after the reassociation pass, and print stats if
6655 requested. */
6656
6657 static void
fini_reassoc(void)6658 fini_reassoc (void)
6659 {
6660 statistics_counter_event (cfun, "Linearized",
6661 reassociate_stats.linearized);
6662 statistics_counter_event (cfun, "Constants eliminated",
6663 reassociate_stats.constants_eliminated);
6664 statistics_counter_event (cfun, "Ops eliminated",
6665 reassociate_stats.ops_eliminated);
6666 statistics_counter_event (cfun, "Statements rewritten",
6667 reassociate_stats.rewritten);
6668 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6669 reassociate_stats.pows_encountered);
6670 statistics_counter_event (cfun, "Built-in powi calls created",
6671 reassociate_stats.pows_created);
6672
6673 delete operand_rank;
6674 operand_entry_pool.release ();
6675 free (bb_rank);
6676 plus_negates.release ();
6677 free_dominance_info (CDI_POST_DOMINATORS);
6678 loop_optimizer_finalize ();
6679 }
6680
6681 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6682 insertion of __builtin_powi calls.
6683
6684 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6685 optimization of a gimple conditional. Otherwise returns zero. */
6686
6687 static unsigned int
execute_reassoc(bool insert_powi_p)6688 execute_reassoc (bool insert_powi_p)
6689 {
6690 reassoc_insert_powi_p = insert_powi_p;
6691
6692 init_reassoc ();
6693
6694 bool cfg_cleanup_needed;
6695 cfg_cleanup_needed = do_reassoc ();
6696 repropagate_negates ();
6697 branch_fixup ();
6698
6699 fini_reassoc ();
6700 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6701 }
6702
6703 namespace {
6704
6705 const pass_data pass_data_reassoc =
6706 {
6707 GIMPLE_PASS, /* type */
6708 "reassoc", /* name */
6709 OPTGROUP_NONE, /* optinfo_flags */
6710 TV_TREE_REASSOC, /* tv_id */
6711 ( PROP_cfg | PROP_ssa ), /* properties_required */
6712 0, /* properties_provided */
6713 0, /* properties_destroyed */
6714 0, /* todo_flags_start */
6715 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6716 };
6717
6718 class pass_reassoc : public gimple_opt_pass
6719 {
6720 public:
pass_reassoc(gcc::context * ctxt)6721 pass_reassoc (gcc::context *ctxt)
6722 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6723 {}
6724
6725 /* opt_pass methods: */
clone()6726 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6727 void set_pass_param (unsigned int n, bool param)
6728 {
6729 gcc_assert (n == 0);
6730 insert_powi_p = param;
6731 }
gate(function *)6732 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6733 virtual unsigned int execute (function *)
6734 { return execute_reassoc (insert_powi_p); }
6735
6736 private:
6737 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6738 point 3a in the pass header comment. */
6739 bool insert_powi_p;
6740 }; // class pass_reassoc
6741
6742 } // anon namespace
6743
6744 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6745 make_pass_reassoc (gcc::context *ctxt)
6746 {
6747 return new pass_reassoc (ctxt);
6748 }
6749