1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
119
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123 struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb = basic_block();
126
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def = tree();
130
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def = tree();
134
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple *recip_def_stmt = nullptr;
138
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children = nullptr;
142
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next = nullptr;
146
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions = 0;
151
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division = false;
156
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
occurrenceoccurrence159 occurrence (basic_block bb, struct occurrence *children)
160 : bb (bb), children (children)
161 {
162 bb->aux = this;
163 }
164
165 /* Destroy a struct occurrence and remove it from its basic block. */
~occurrenceoccurrence166 ~occurrence ()
167 {
168 bb->aux = nullptr;
169 }
170
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
173
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
176 };
177
178 static struct
179 {
180 /* Number of 1.0/X ops inserted. */
181 int rdivs_inserted;
182
183 /* Number of 1.0/FUNC ops inserted. */
184 int rfuncs_inserted;
185 } reciprocal_stats;
186
187 static struct
188 {
189 /* Number of cexpi calls inserted. */
190 int inserted;
191
192 /* Number of conversions removed. */
193 int conv_removed;
194
195 } sincos_stats;
196
197 static struct
198 {
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted;
201
202 /* Number of integer multiply-and-accumulate ops inserted. */
203 int maccs_inserted;
204
205 /* Number of fp fused multiply-add ops inserted. */
206 int fmas_inserted;
207
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted;
210
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted;
213 } widen_mul_stats;
214
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence *occ_head;
218
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator<occurrence> *occ_pool;
221
operator new(size_t n)222 void* occurrence::operator new (size_t n)
223 {
224 gcc_assert (n == sizeof(occurrence));
225 return occ_pool->allocate_raw ();
226 }
227
operator delete(void * occ,size_t n)228 void occurrence::operator delete (void *occ, size_t n)
229 {
230 gcc_assert (n == sizeof(occurrence));
231 occ_pool->remove_raw (occ);
232 }
233
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
237
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
241
242 static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)243 insert_bb (struct occurrence *new_occ, basic_block idom,
244 struct occurrence **p_head)
245 {
246 struct occurrence *occ, **p_occ;
247
248 for (p_occ = p_head; (occ = *p_occ) != NULL; )
249 {
250 basic_block bb = new_occ->bb, occ_bb = occ->bb;
251 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252 if (dom == bb)
253 {
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
255 from its list. */
256 *p_occ = occ->next;
257 occ->next = new_occ->children;
258 new_occ->children = occ;
259
260 /* Try the next block (it may as well be dominated by BB). */
261 }
262
263 else if (dom == occ_bb)
264 {
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ, dom, &occ->children);
267 return;
268 }
269
270 else if (dom != idom)
271 {
272 gcc_assert (!dom->aux);
273
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
276 its list. */
277 *p_occ = occ->next;
278 new_occ->next = occ;
279 occ->next = NULL;
280
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ = new occurrence (dom, new_occ);
285 }
286
287 else
288 {
289 /* Nothing special, go on with the next element. */
290 p_occ = &occ->next;
291 }
292 }
293
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ->next = *p_head;
296 *p_head = new_occ;
297 }
298
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
304
305 static inline void
register_division_in(basic_block bb,int importance)306 register_division_in (basic_block bb, int importance)
307 {
308 struct occurrence *occ;
309
310 occ = (struct occurrence *) bb->aux;
311 if (!occ)
312 {
313 occ = new occurrence (bb, NULL);
314 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
315 }
316
317 occ->bb_has_division = true;
318 occ->num_divisions += importance;
319 }
320
321
322 /* Compute the number of divisions that postdominate each block in OCC and
323 its children. */
324
325 static void
compute_merit(struct occurrence * occ)326 compute_merit (struct occurrence *occ)
327 {
328 struct occurrence *occ_child;
329 basic_block dom = occ->bb;
330
331 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
332 {
333 basic_block bb;
334 if (occ_child->children)
335 compute_merit (occ_child);
336
337 if (flag_exceptions)
338 bb = single_noncomplex_succ (dom);
339 else
340 bb = dom;
341
342 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343 occ->num_divisions += occ_child->num_divisions;
344 }
345 }
346
347
348 /* Return whether USE_STMT is a floating-point division by DEF. */
349 static inline bool
is_division_by(gimple * use_stmt,tree def)350 is_division_by (gimple *use_stmt, tree def)
351 {
352 return is_gimple_assign (use_stmt)
353 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
357 a stmt. */
358 && gimple_assign_rhs1 (use_stmt) != def
359 && !stmt_can_throw_internal (cfun, use_stmt);
360 }
361
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
363 static inline bool
is_mult_by(gimple * use_stmt,tree def,tree a)364 is_mult_by (gimple *use_stmt, tree def, tree a)
365 {
366 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
368 {
369 tree op0 = gimple_assign_rhs1 (use_stmt);
370 tree op1 = gimple_assign_rhs2 (use_stmt);
371
372 return (op0 == def && op1 == a)
373 || (op0 == a && op1 == def);
374 }
375 return 0;
376 }
377
378 /* Return whether USE_STMT is DEF * DEF. */
379 static inline bool
is_square_of(gimple * use_stmt,tree def)380 is_square_of (gimple *use_stmt, tree def)
381 {
382 return is_mult_by (use_stmt, def, def);
383 }
384
385 /* Return whether USE_STMT is a floating-point division by
386 DEF * DEF. */
387 static inline bool
is_division_by_square(gimple * use_stmt,tree def)388 is_division_by_square (gimple *use_stmt, tree def)
389 {
390 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393 && !stmt_can_throw_internal (cfun, use_stmt))
394 {
395 tree denominator = gimple_assign_rhs2 (use_stmt);
396 if (TREE_CODE (denominator) == SSA_NAME)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
398 }
399 return 0;
400 }
401
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
406
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
409 be used.
410
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
414
415 static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 tree def, tree recip_def, tree square_recip_def,
418 int should_insert_square_recip, int threshold)
419 {
420 tree type;
421 gassign *new_stmt, *new_square_stmt;
422 gimple_stmt_iterator gsi;
423 struct occurrence *occ_child;
424
425 if (!recip_def
426 && (occ->bb_has_division || !flag_trapping_math)
427 /* Divide by two as all divisions are counted twice in
428 the costing loop. */
429 && occ->num_divisions / 2 >= threshold)
430 {
431 /* Make a variable with the replacement and substitute it. */
432 type = TREE_TYPE (def);
433 recip_def = create_tmp_reg (type, "reciptmp");
434 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 build_one_cst (type), def);
436
437 if (should_insert_square_recip)
438 {
439 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 recip_def, recip_def);
442 }
443
444 if (occ->bb_has_division)
445 {
446 /* Case 1: insert before an existing division. */
447 gsi = gsi_after_labels (occ->bb);
448 while (!gsi_end_p (gsi)
449 && (!is_division_by (gsi_stmt (gsi), def))
450 && (!is_division_by_square (gsi_stmt (gsi), def)))
451 gsi_next (&gsi);
452
453 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 if (should_insert_square_recip)
455 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
456 }
457 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
458 {
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 if (should_insert_square_recip)
465 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
466 }
467 else
468 {
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi = gsi_after_labels (occ->bb);
471 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 if (should_insert_square_recip)
473 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
474 }
475
476 reciprocal_stats.rdivs_inserted++;
477
478 occ->recip_def_stmt = new_stmt;
479 }
480
481 occ->recip_def = recip_def;
482 occ->square_recip_def = square_recip_def;
483 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484 insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 square_recip_def, should_insert_square_recip,
486 threshold);
487 }
488
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
491 static inline void
replace_reciprocal_squares(use_operand_p use_p)492 replace_reciprocal_squares (use_operand_p use_p)
493 {
494 gimple *use_stmt = USE_STMT (use_p);
495 basic_block bb = gimple_bb (use_stmt);
496 struct occurrence *occ = (struct occurrence *) bb->aux;
497
498 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499 && occ->recip_def)
500 {
501 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504 SET_USE (use_p, occ->square_recip_def);
505 fold_stmt_inplace (&gsi);
506 update_stmt (use_stmt);
507 }
508 }
509
510
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 possible. */
513
514 static inline void
replace_reciprocal(use_operand_p use_p)515 replace_reciprocal (use_operand_p use_p)
516 {
517 gimple *use_stmt = USE_STMT (use_p);
518 basic_block bb = gimple_bb (use_stmt);
519 struct occurrence *occ = (struct occurrence *) bb->aux;
520
521 if (optimize_bb_for_speed_p (bb)
522 && occ->recip_def && use_stmt != occ->recip_def_stmt)
523 {
524 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526 SET_USE (use_p, occ->recip_def);
527 fold_stmt_inplace (&gsi);
528 update_stmt (use_stmt);
529 }
530 }
531
532
533 /* Free OCC and return one more "struct occurrence" to be freed. */
534
535 static struct occurrence *
free_bb(struct occurrence * occ)536 free_bb (struct occurrence *occ)
537 {
538 struct occurrence *child, *next;
539
540 /* First get the two pointers hanging off OCC. */
541 next = occ->next;
542 child = occ->children;
543 delete occ;
544
545 /* Now ensure that we don't recurse unless it is necessary. */
546 if (!child)
547 return next;
548 else
549 {
550 while (next)
551 next = free_bb (next);
552
553 return child;
554 }
555 }
556
557 /* Transform sequences like
558 t = sqrt (a)
559 x = 1.0 / t;
560 r1 = x * x;
561 r2 = a * x;
562 into:
563 t = sqrt (a)
564 r1 = 1.0 / a;
565 r2 = t;
566 x = r1 * r2;
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
571
572 static void
optimize_recip_sqrt(gimple_stmt_iterator * def_gsi,tree def)573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
574 {
575 gimple *use_stmt;
576 imm_use_iterator use_iter;
577 gimple *stmt = gsi_stmt (*def_gsi);
578 tree x = def;
579 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580 tree div_rhs1 = gimple_assign_rhs1 (stmt);
581
582 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583 || TREE_CODE (div_rhs1) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585 return;
586
587 gcall *sqrt_stmt
588 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
589
590 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591 return;
592
593 switch (gimple_call_combined_fn (sqrt_stmt))
594 {
595 CASE_CFN_SQRT:
596 CASE_CFN_SQRT_FN:
597 break;
598
599 default:
600 return;
601 }
602 tree a = gimple_call_arg (sqrt_stmt, 0);
603
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
605
606 /* Statements that use x in x * x. */
607 auto_vec<gimple *> sqr_stmts;
608 /* Statements that use x in a * x. */
609 auto_vec<gimple *> mult_stmts;
610 bool has_other_use = false;
611 bool mult_on_main_path = false;
612
613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
614 {
615 if (is_gimple_debug (use_stmt))
616 continue;
617 if (is_square_of (use_stmt, x))
618 {
619 sqr_stmts.safe_push (use_stmt);
620 if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 mult_on_main_path = true;
622 }
623 else if (is_mult_by (use_stmt, x, a))
624 {
625 mult_stmts.safe_push (use_stmt);
626 if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 mult_on_main_path = true;
628 }
629 else
630 has_other_use = true;
631 }
632
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use && !mult_on_main_path)
638 return;
639
640 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641 return;
642
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646 return;
647
648 if (dump_file)
649 {
650 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 fprintf (dump_file, "\n");
654 }
655
656 bool delete_div = !has_other_use;
657 tree sqr_ssa_name = NULL_TREE;
658 if (!sqr_stmts.is_empty ())
659 {
660 /* r1 = x * x. Transform the original
661 x = 1.0 / t
662 into
663 tmp1 = 1.0 / a
664 r1 = tmp1. */
665
666 sqr_ssa_name
667 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
668
669 if (dump_file)
670 {
671 fprintf (dump_file, "Replacing original division\n");
672 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 fprintf (dump_file, "with new division\n");
674 }
675 stmt
676 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 gimple_assign_rhs1 (stmt), a);
678 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679 gsi_remove (def_gsi, true);
680 *def_gsi = gsi_for_stmt (stmt);
681 fold_stmt_inplace (def_gsi);
682 update_stmt (stmt);
683
684 if (dump_file)
685 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
686
687 delete_div = false;
688 gimple *sqr_stmt;
689 unsigned int i;
690 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
691 {
692 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 update_stmt (sqr_stmt);
695 }
696 }
697 if (!mult_stmts.is_empty ())
698 {
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
701 unsigned int i;
702 gimple *mult_stmt = NULL;
703 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
704 {
705 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
706
707 if (dump_file)
708 {
709 fprintf (dump_file, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 fprintf (dump_file, "with assignment\n");
712 }
713 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 fold_stmt_inplace (&gsi2);
715 update_stmt (mult_stmt);
716 if (dump_file)
717 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
718 }
719 }
720
721 if (has_other_use)
722 {
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
725 x = tmp1 * tmp2. */
726 gcc_assert (orig_sqrt_ssa_name);
727 gcc_assert (sqr_ssa_name);
728
729 gimple *new_stmt
730 = gimple_build_assign (x, MULT_EXPR,
731 orig_sqrt_ssa_name, sqr_ssa_name);
732 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733 update_stmt (stmt);
734 }
735 else if (delete_div)
736 {
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739 gsi_remove (&gsi2, true);
740 release_defs (stmt);
741 }
742 else
743 release_ssa_name (x);
744 }
745
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
749
750 DEF must be a GIMPLE register of a floating-point type. */
751
752 static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
754 {
755 use_operand_p use_p, square_use_p;
756 imm_use_iterator use_iter, square_use_iter;
757 tree square_def;
758 struct occurrence *occ;
759 int count = 0;
760 int threshold;
761 int square_recip_count = 0;
762 int sqrt_recip_count = 0;
763
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
766
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
770 def = x * x
771 t0 = a / def
772 t1 = b / def
773 t2 = c / x
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
776
777 if (is_gimple_assign (def_stmt)
778 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
781 {
782 tree op0 = gimple_assign_rhs1 (def_stmt);
783
784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
785 {
786 gimple *use_stmt = USE_STMT (use_p);
787 if (is_division_by (use_stmt, op0))
788 sqrt_recip_count++;
789 }
790 }
791
792 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
793 {
794 gimple *use_stmt = USE_STMT (use_p);
795 if (is_division_by (use_stmt, def))
796 {
797 register_division_in (gimple_bb (use_stmt), 2);
798 count++;
799 }
800
801 if (is_square_of (use_stmt, def))
802 {
803 square_def = gimple_assign_lhs (use_stmt);
804 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
805 {
806 gimple *square_use_stmt = USE_STMT (square_use_p);
807 if (is_division_by (square_use_stmt, square_def))
808 {
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt), 1);
811 square_recip_count++;
812 }
813 }
814 }
815 }
816
817 /* Square reciprocals were counted twice above. */
818 square_recip_count /= 2;
819
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count > square_recip_count)
822 goto out;
823
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count + square_recip_count >= threshold && count >= 1)
826 {
827 gimple *use_stmt;
828 for (occ = occ_head; occ; occ = occ->next)
829 {
830 compute_merit (occ);
831 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 square_recip_count, threshold);
833 }
834
835 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
836 {
837 if (is_division_by (use_stmt, def))
838 {
839 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 replace_reciprocal (use_p);
841 }
842 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
843 {
844 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
845 {
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator;
849 gimple *powmult_use_stmt = USE_STMT (use_p);
850 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
851
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 square_iterator, powmult_def_name)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
855 {
856 gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 if (is_division_by (powmult_use_stmt, powmult_def_name))
858 replace_reciprocal_squares (square_use_p);
859 }
860 }
861 }
862 }
863 }
864
865 out:
866 for (occ = occ_head; occ; )
867 occ = free_bb (occ);
868
869 occ_head = NULL;
870 }
871
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
874
875 internal_fn
internal_fn_reciprocal(gcall * call)876 internal_fn_reciprocal (gcall *call)
877 {
878 internal_fn ifn;
879
880 switch (gimple_call_combined_fn (call))
881 {
882 CASE_CFN_SQRT:
883 CASE_CFN_SQRT_FN:
884 ifn = IFN_RSQRT;
885 break;
886
887 default:
888 return IFN_LAST;
889 }
890
891 tree_pair types = direct_internal_fn_types (ifn, call);
892 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893 return IFN_LAST;
894
895 return ifn;
896 }
897
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
900 namespace {
901
902 const pass_data pass_data_cse_reciprocals =
903 {
904 GIMPLE_PASS, /* type */
905 "recip", /* name */
906 OPTGROUP_NONE, /* optinfo_flags */
907 TV_TREE_RECIP, /* tv_id */
908 PROP_ssa, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa, /* todo_flags_finish */
913 };
914
915 class pass_cse_reciprocals : public gimple_opt_pass
916 {
917 public:
pass_cse_reciprocals(gcc::context * ctxt)918 pass_cse_reciprocals (gcc::context *ctxt)
919 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
920 {}
921
922 /* opt_pass methods: */
gate(function *)923 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
924 virtual unsigned int execute (function *);
925
926 }; // class pass_cse_reciprocals
927
928 unsigned int
execute(function * fun)929 pass_cse_reciprocals::execute (function *fun)
930 {
931 basic_block bb;
932 tree arg;
933
934 occ_pool = new object_allocator<occurrence> ("dominators for recip");
935
936 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
937 calculate_dominance_info (CDI_DOMINATORS);
938 calculate_dominance_info (CDI_POST_DOMINATORS);
939
940 if (flag_checking)
941 FOR_EACH_BB_FN (bb, fun)
942 gcc_assert (!bb->aux);
943
944 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
945 if (FLOAT_TYPE_P (TREE_TYPE (arg))
946 && is_gimple_reg (arg))
947 {
948 tree name = ssa_default_def (fun, arg);
949 if (name)
950 execute_cse_reciprocals_1 (NULL, name);
951 }
952
953 FOR_EACH_BB_FN (bb, fun)
954 {
955 tree def;
956
957 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
958 gsi_next (&gsi))
959 {
960 gphi *phi = gsi.phi ();
961 def = PHI_RESULT (phi);
962 if (! virtual_operand_p (def)
963 && FLOAT_TYPE_P (TREE_TYPE (def)))
964 execute_cse_reciprocals_1 (NULL, def);
965 }
966
967 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
968 gsi_next (&gsi))
969 {
970 gimple *stmt = gsi_stmt (gsi);
971
972 if (gimple_has_lhs (stmt)
973 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
974 && FLOAT_TYPE_P (TREE_TYPE (def))
975 && TREE_CODE (def) == SSA_NAME)
976 {
977 execute_cse_reciprocals_1 (&gsi, def);
978 stmt = gsi_stmt (gsi);
979 if (flag_unsafe_math_optimizations
980 && is_gimple_assign (stmt)
981 && gimple_assign_lhs (stmt) == def
982 && !stmt_can_throw_internal (cfun, stmt)
983 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
984 optimize_recip_sqrt (&gsi, def);
985 }
986 }
987
988 if (optimize_bb_for_size_p (bb))
989 continue;
990
991 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
992 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
993 gsi_next (&gsi))
994 {
995 gimple *stmt = gsi_stmt (gsi);
996
997 if (is_gimple_assign (stmt)
998 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
999 {
1000 tree arg1 = gimple_assign_rhs2 (stmt);
1001 gimple *stmt1;
1002
1003 if (TREE_CODE (arg1) != SSA_NAME)
1004 continue;
1005
1006 stmt1 = SSA_NAME_DEF_STMT (arg1);
1007
1008 if (is_gimple_call (stmt1)
1009 && gimple_call_lhs (stmt1))
1010 {
1011 bool fail;
1012 imm_use_iterator ui;
1013 use_operand_p use_p;
1014 tree fndecl = NULL_TREE;
1015
1016 gcall *call = as_a <gcall *> (stmt1);
1017 internal_fn ifn = internal_fn_reciprocal (call);
1018 if (ifn == IFN_LAST)
1019 {
1020 fndecl = gimple_call_fndecl (call);
1021 if (!fndecl
1022 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023 continue;
1024 fndecl = targetm.builtin_reciprocal (fndecl);
1025 if (!fndecl)
1026 continue;
1027 }
1028
1029 /* Check that all uses of the SSA name are divisions,
1030 otherwise replacing the defining statement will do
1031 the wrong thing. */
1032 fail = false;
1033 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1034 {
1035 gimple *stmt2 = USE_STMT (use_p);
1036 if (is_gimple_debug (stmt2))
1037 continue;
1038 if (!is_gimple_assign (stmt2)
1039 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040 || gimple_assign_rhs1 (stmt2) == arg1
1041 || gimple_assign_rhs2 (stmt2) != arg1)
1042 {
1043 fail = true;
1044 break;
1045 }
1046 }
1047 if (fail)
1048 continue;
1049
1050 gimple_replace_ssa_lhs (call, arg1);
1051 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1052 {
1053 auto_vec<tree, 4> args;
1054 for (unsigned int i = 0;
1055 i < gimple_call_num_args (call); i++)
1056 args.safe_push (gimple_call_arg (call, i));
1057 gcall *stmt2;
1058 if (ifn == IFN_LAST)
1059 stmt2 = gimple_build_call_vec (fndecl, args);
1060 else
1061 stmt2 = gimple_build_call_internal_vec (ifn, args);
1062 gimple_call_set_lhs (stmt2, arg1);
1063 gimple_move_vops (stmt2, call);
1064 gimple_call_set_nothrow (stmt2,
1065 gimple_call_nothrow_p (call));
1066 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067 gsi_replace (&gsi2, stmt2, true);
1068 }
1069 else
1070 {
1071 if (ifn == IFN_LAST)
1072 gimple_call_set_fndecl (call, fndecl);
1073 else
1074 gimple_call_set_internal_fn (call, ifn);
1075 update_stmt (call);
1076 }
1077 reciprocal_stats.rfuncs_inserted++;
1078
1079 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1080 {
1081 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083 fold_stmt_inplace (&gsi);
1084 update_stmt (stmt);
1085 }
1086 }
1087 }
1088 }
1089 }
1090
1091 statistics_counter_event (fun, "reciprocal divs inserted",
1092 reciprocal_stats.rdivs_inserted);
1093 statistics_counter_event (fun, "reciprocal functions inserted",
1094 reciprocal_stats.rfuncs_inserted);
1095
1096 free_dominance_info (CDI_DOMINATORS);
1097 free_dominance_info (CDI_POST_DOMINATORS);
1098 delete occ_pool;
1099 return 0;
1100 }
1101
1102 } // anon namespace
1103
1104 gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1106 {
1107 return new pass_cse_reciprocals (ctxt);
1108 }
1109
1110 /* If NAME is the result of a type conversion, look for other
1111 equivalent dominating or dominated conversions, and replace all
1112 uses with the earliest dominating name, removing the redundant
1113 conversions. Return the prevailing name. */
1114
1115 static tree
execute_cse_conv_1(tree name,bool * cfg_changed)1116 execute_cse_conv_1 (tree name, bool *cfg_changed)
1117 {
1118 if (SSA_NAME_IS_DEFAULT_DEF (name)
1119 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120 return name;
1121
1122 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1123
1124 if (!gimple_assign_cast_p (def_stmt))
1125 return name;
1126
1127 tree src = gimple_assign_rhs1 (def_stmt);
1128
1129 if (TREE_CODE (src) != SSA_NAME)
1130 return name;
1131
1132 imm_use_iterator use_iter;
1133 gimple *use_stmt;
1134
1135 /* Find the earliest dominating def. */
1136 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1137 {
1138 if (use_stmt == def_stmt
1139 || !gimple_assign_cast_p (use_stmt))
1140 continue;
1141
1142 tree lhs = gimple_assign_lhs (use_stmt);
1143
1144 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145 || (gimple_assign_rhs1 (use_stmt)
1146 != gimple_assign_rhs1 (def_stmt))
1147 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148 continue;
1149
1150 bool use_dominates;
1151 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1152 {
1153 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155 gsi_next (&gsi);
1156 use_dominates = !gsi_end_p (gsi);
1157 }
1158 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159 gimple_bb (def_stmt)))
1160 use_dominates = false;
1161 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162 gimple_bb (use_stmt)))
1163 use_dominates = true;
1164 else
1165 continue;
1166
1167 if (use_dominates)
1168 {
1169 std::swap (name, lhs);
1170 std::swap (def_stmt, use_stmt);
1171 }
1172 }
1173
1174 /* Now go through all uses of SRC again, replacing the equivalent
1175 dominated conversions. We may replace defs that were not
1176 dominated by the then-prevailing defs when we first visited
1177 them. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1179 {
1180 if (use_stmt == def_stmt
1181 || !gimple_assign_cast_p (use_stmt))
1182 continue;
1183
1184 tree lhs = gimple_assign_lhs (use_stmt);
1185
1186 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187 || (gimple_assign_rhs1 (use_stmt)
1188 != gimple_assign_rhs1 (def_stmt))
1189 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190 continue;
1191
1192 basic_block use_bb = gimple_bb (use_stmt);
1193 if (gimple_bb (def_stmt) == use_bb
1194 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1195 {
1196 sincos_stats.conv_removed++;
1197
1198 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199 replace_uses_by (lhs, name);
1200 if (gsi_remove (&gsi, true)
1201 && gimple_purge_dead_eh_edges (use_bb))
1202 *cfg_changed = true;
1203 release_defs (use_stmt);
1204 }
1205 }
1206
1207 return name;
1208 }
1209
1210 /* Records an occurrence at statement USE_STMT in the vector of trees
1211 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212 is not yet initialized. Returns true if the occurrence was pushed on
1213 the vector. Adjusts *TOP_BB to be the basic block dominating all
1214 statements in the vector. */
1215
1216 static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)1217 maybe_record_sincos (vec<gimple *> *stmts,
1218 basic_block *top_bb, gimple *use_stmt)
1219 {
1220 basic_block use_bb = gimple_bb (use_stmt);
1221 if (*top_bb
1222 && (*top_bb == use_bb
1223 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1224 stmts->safe_push (use_stmt);
1225 else if (!*top_bb
1226 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1227 {
1228 stmts->safe_push (use_stmt);
1229 *top_bb = use_bb;
1230 }
1231 else
1232 return false;
1233
1234 return true;
1235 }
1236
1237 /* Look for sin, cos and cexpi calls with the same argument NAME and
1238 create a single call to cexpi CSEing the result in this case.
1239 We first walk over all immediate uses of the argument collecting
1240 statements that we can CSE in a vector and in a second pass replace
1241 the statement rhs with a REALPART or IMAGPART expression on the
1242 result of the cexpi call we insert before the use statement that
1243 dominates all other candidates. */
1244
1245 static bool
execute_cse_sincos_1(tree name)1246 execute_cse_sincos_1 (tree name)
1247 {
1248 gimple_stmt_iterator gsi;
1249 imm_use_iterator use_iter;
1250 tree fndecl, res, type = NULL_TREE;
1251 gimple *def_stmt, *use_stmt, *stmt;
1252 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1253 auto_vec<gimple *> stmts;
1254 basic_block top_bb = NULL;
1255 int i;
1256 bool cfg_changed = false;
1257
1258 name = execute_cse_conv_1 (name, &cfg_changed);
1259
1260 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1261 {
1262 if (gimple_code (use_stmt) != GIMPLE_CALL
1263 || !gimple_call_lhs (use_stmt))
1264 continue;
1265
1266 switch (gimple_call_combined_fn (use_stmt))
1267 {
1268 CASE_CFN_COS:
1269 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1270 break;
1271
1272 CASE_CFN_SIN:
1273 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274 break;
1275
1276 CASE_CFN_CEXPI:
1277 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278 break;
1279
1280 default:;
1281 continue;
1282 }
1283
1284 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1285 if (!type)
1286 {
1287 type = t;
1288 t = TREE_TYPE (name);
1289 }
1290 /* This checks that NAME has the right type in the first round,
1291 and, in subsequent rounds, that the built_in type is the same
1292 type, or a compatible type. */
1293 if (type != t && !types_compatible_p (type, t))
1294 return false;
1295 }
1296 if (seen_cos + seen_sin + seen_cexpi <= 1)
1297 return false;
1298
1299 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300 the name def statement. */
1301 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1302 if (!fndecl)
1303 return false;
1304 stmt = gimple_build_call (fndecl, 1, name);
1305 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1306 gimple_call_set_lhs (stmt, res);
1307
1308 def_stmt = SSA_NAME_DEF_STMT (name);
1309 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1310 && gimple_code (def_stmt) != GIMPLE_PHI
1311 && gimple_bb (def_stmt) == top_bb)
1312 {
1313 gsi = gsi_for_stmt (def_stmt);
1314 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1315 }
1316 else
1317 {
1318 gsi = gsi_after_labels (top_bb);
1319 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1320 }
1321 sincos_stats.inserted++;
1322
1323 /* And adjust the recorded old call sites. */
1324 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1325 {
1326 tree rhs = NULL;
1327
1328 switch (gimple_call_combined_fn (use_stmt))
1329 {
1330 CASE_CFN_COS:
1331 rhs = fold_build1 (REALPART_EXPR, type, res);
1332 break;
1333
1334 CASE_CFN_SIN:
1335 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1336 break;
1337
1338 CASE_CFN_CEXPI:
1339 rhs = res;
1340 break;
1341
1342 default:;
1343 gcc_unreachable ();
1344 }
1345
1346 /* Replace call with a copy. */
1347 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1348
1349 gsi = gsi_for_stmt (use_stmt);
1350 gsi_replace (&gsi, stmt, true);
1351 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1352 cfg_changed = true;
1353 }
1354
1355 return cfg_changed;
1356 }
1357
1358 /* To evaluate powi(x,n), the floating point value x raised to the
1359 constant integer exponent n, we use a hybrid algorithm that
1360 combines the "window method" with look-up tables. For an
1361 introduction to exponentiation algorithms and "addition chains",
1362 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1366
1367 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368 multiplications to inline before calling the system library's pow
1369 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370 so this default never requires calling pow, powf or powl. */
1371
1372 #ifndef POWI_MAX_MULTS
1373 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1374 #endif
1375
1376 /* The size of the "optimal power tree" lookup table. All
1377 exponents less than this value are simply looked up in the
1378 powi_table below. This threshold is also used to size the
1379 cache of pseudo registers that hold intermediate results. */
1380 #define POWI_TABLE_SIZE 256
1381
1382 /* The size, in bits of the window, used in the "window method"
1383 exponentiation algorithm. This is equivalent to a radix of
1384 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1385 #define POWI_WINDOW_SIZE 3
1386
1387 /* The following table is an efficient representation of an
1388 "optimal power tree". For each value, i, the corresponding
1389 value, j, in the table states than an optimal evaluation
1390 sequence for calculating pow(x,i) can be found by evaluating
1391 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1392 100 integers is given in Knuth's "Seminumerical algorithms". */
1393
1394 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1395 {
1396 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1397 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1398 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1399 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1400 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1401 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1402 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1403 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1404 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1405 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1406 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1407 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1408 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1409 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1410 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1411 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1412 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1413 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1414 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1415 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1416 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1417 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1418 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1419 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1420 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1421 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1422 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1423 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1424 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1425 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1426 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1427 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1428 };
1429
1430
1431 /* Return the number of multiplications required to calculate
1432 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1433 subroutine of powi_cost. CACHE is an array indicating
1434 which exponents have already been calculated. */
1435
1436 static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)1437 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1438 {
1439 /* If we've already calculated this exponent, then this evaluation
1440 doesn't require any additional multiplications. */
1441 if (cache[n])
1442 return 0;
1443
1444 cache[n] = true;
1445 return powi_lookup_cost (n - powi_table[n], cache)
1446 + powi_lookup_cost (powi_table[n], cache) + 1;
1447 }
1448
1449 /* Return the number of multiplications required to calculate
1450 powi(x,n) for an arbitrary x, given the exponent N. This
1451 function needs to be kept in sync with powi_as_mults below. */
1452
1453 static int
powi_cost(HOST_WIDE_INT n)1454 powi_cost (HOST_WIDE_INT n)
1455 {
1456 bool cache[POWI_TABLE_SIZE];
1457 unsigned HOST_WIDE_INT digit;
1458 unsigned HOST_WIDE_INT val;
1459 int result;
1460
1461 if (n == 0)
1462 return 0;
1463
1464 /* Ignore the reciprocal when calculating the cost. */
1465 val = absu_hwi (n);
1466
1467 /* Initialize the exponent cache. */
1468 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1469 cache[1] = true;
1470
1471 result = 0;
1472
1473 while (val >= POWI_TABLE_SIZE)
1474 {
1475 if (val & 1)
1476 {
1477 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1478 result += powi_lookup_cost (digit, cache)
1479 + POWI_WINDOW_SIZE + 1;
1480 val >>= POWI_WINDOW_SIZE;
1481 }
1482 else
1483 {
1484 val >>= 1;
1485 result++;
1486 }
1487 }
1488
1489 return result + powi_lookup_cost (val, cache);
1490 }
1491
1492 /* Recursive subroutine of powi_as_mults. This function takes the
1493 array, CACHE, of already calculated exponents and an exponent N and
1494 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1495
1496 static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,unsigned HOST_WIDE_INT n,tree * cache)1497 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1498 unsigned HOST_WIDE_INT n, tree *cache)
1499 {
1500 tree op0, op1, ssa_target;
1501 unsigned HOST_WIDE_INT digit;
1502 gassign *mult_stmt;
1503
1504 if (n < POWI_TABLE_SIZE && cache[n])
1505 return cache[n];
1506
1507 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1508
1509 if (n < POWI_TABLE_SIZE)
1510 {
1511 cache[n] = ssa_target;
1512 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1513 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1514 }
1515 else if (n & 1)
1516 {
1517 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1518 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1519 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1520 }
1521 else
1522 {
1523 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1524 op1 = op0;
1525 }
1526
1527 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1528 gimple_set_location (mult_stmt, loc);
1529 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1530
1531 return ssa_target;
1532 }
1533
1534 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535 This function needs to be kept in sync with powi_cost above. */
1536
1537 tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1538 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1539 tree arg0, HOST_WIDE_INT n)
1540 {
1541 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1542 gassign *div_stmt;
1543 tree target;
1544
1545 if (n == 0)
1546 return build_one_cst (type);
1547
1548 memset (cache, 0, sizeof (cache));
1549 cache[1] = arg0;
1550
1551 result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1552 if (n >= 0)
1553 return result;
1554
1555 /* If the original exponent was negative, reciprocate the result. */
1556 target = make_temp_ssa_name (type, NULL, "powmult");
1557 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1558 build_real (type, dconst1), result);
1559 gimple_set_location (div_stmt, loc);
1560 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1561
1562 return target;
1563 }
1564
1565 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1566 location info LOC. If the arguments are appropriate, create an
1567 equivalent sequence of statements prior to GSI using an optimal
1568 number of multiplications, and return an expession holding the
1569 result. */
1570
1571 static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1572 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1573 tree arg0, HOST_WIDE_INT n)
1574 {
1575 if ((n >= -1 && n <= 2)
1576 || (optimize_function_for_speed_p (cfun)
1577 && powi_cost (n) <= POWI_MAX_MULTS))
1578 return powi_as_mults (gsi, loc, arg0, n);
1579
1580 return NULL_TREE;
1581 }
1582
1583 /* Build a gimple call statement that calls FN with argument ARG.
1584 Set the lhs of the call statement to a fresh SSA name. Insert the
1585 statement prior to GSI's current position, and return the fresh
1586 SSA name. */
1587
1588 static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)1589 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1590 tree fn, tree arg)
1591 {
1592 gcall *call_stmt;
1593 tree ssa_target;
1594
1595 call_stmt = gimple_build_call (fn, 1, arg);
1596 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1597 gimple_set_lhs (call_stmt, ssa_target);
1598 gimple_set_location (call_stmt, loc);
1599 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1600
1601 return ssa_target;
1602 }
1603
1604 /* Build a gimple binary operation with the given CODE and arguments
1605 ARG0, ARG1, assigning the result to a new SSA name for variable
1606 TARGET. Insert the statement prior to GSI's current position, and
1607 return the fresh SSA name.*/
1608
1609 static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)1610 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1611 const char *name, enum tree_code code,
1612 tree arg0, tree arg1)
1613 {
1614 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1615 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1616 gimple_set_location (stmt, loc);
1617 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1618 return result;
1619 }
1620
1621 /* Build a gimple reference operation with the given CODE and argument
1622 ARG, assigning the result to a new SSA name of TYPE with NAME.
1623 Insert the statement prior to GSI's current position, and return
1624 the fresh SSA name. */
1625
1626 static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)1627 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1628 const char *name, enum tree_code code, tree arg0)
1629 {
1630 tree result = make_temp_ssa_name (type, NULL, name);
1631 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1632 gimple_set_location (stmt, loc);
1633 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634 return result;
1635 }
1636
1637 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1638 prior to GSI's current position, and return the fresh SSA name. */
1639
1640 static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)1641 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642 tree type, tree val)
1643 {
1644 tree result = make_ssa_name (type);
1645 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1646 gimple_set_location (stmt, loc);
1647 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1648 return result;
1649 }
1650
1651 struct pow_synth_sqrt_info
1652 {
1653 bool *factors;
1654 unsigned int deepest;
1655 unsigned int num_mults;
1656 };
1657
1658 /* Return true iff the real value C can be represented as a
1659 sum of powers of 0.5 up to N. That is:
1660 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661 Record in INFO the various parameters of the synthesis algorithm such
1662 as the factors a[i], the maximum 0.5 power and the number of
1663 multiplications that will be required. */
1664
1665 bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)1666 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1667 struct pow_synth_sqrt_info *info)
1668 {
1669 REAL_VALUE_TYPE factor = dconsthalf;
1670 REAL_VALUE_TYPE remainder = c;
1671
1672 info->deepest = 0;
1673 info->num_mults = 0;
1674 memset (info->factors, 0, n * sizeof (bool));
1675
1676 for (unsigned i = 0; i < n; i++)
1677 {
1678 REAL_VALUE_TYPE res;
1679
1680 /* If something inexact happened bail out now. */
1681 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1682 return false;
1683
1684 /* We have hit zero. The number is representable as a sum
1685 of powers of 0.5. */
1686 if (real_equal (&res, &dconst0))
1687 {
1688 info->factors[i] = true;
1689 info->deepest = i + 1;
1690 return true;
1691 }
1692 else if (!REAL_VALUE_NEGATIVE (res))
1693 {
1694 remainder = res;
1695 info->factors[i] = true;
1696 info->num_mults++;
1697 }
1698 else
1699 info->factors[i] = false;
1700
1701 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1702 }
1703 return false;
1704 }
1705
1706 /* Return the tree corresponding to FN being applied
1707 to ARG N times at GSI and LOC.
1708 Look up previous results from CACHE if need be.
1709 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1710
1711 static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)1712 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1713 tree fn, location_t loc, tree *cache)
1714 {
1715 tree res = cache[n];
1716 if (!res)
1717 {
1718 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1719 res = build_and_insert_call (gsi, loc, fn, prev);
1720 cache[n] = res;
1721 }
1722
1723 return res;
1724 }
1725
1726 /* Print to STREAM the repeated application of function FNAME to ARG
1727 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1728 "foo (foo (x))". */
1729
1730 static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)1731 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1732 unsigned int n)
1733 {
1734 if (n == 0)
1735 fprintf (stream, "%s", arg);
1736 else
1737 {
1738 fprintf (stream, "%s (", fname);
1739 print_nested_fn (stream, fname, arg, n - 1);
1740 fprintf (stream, ")");
1741 }
1742 }
1743
1744 /* Print to STREAM the fractional sequence of sqrt chains
1745 applied to ARG, described by INFO. Used for the dump file. */
1746
1747 static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)1748 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1749 struct pow_synth_sqrt_info *info)
1750 {
1751 for (unsigned int i = 0; i < info->deepest; i++)
1752 {
1753 bool is_set = info->factors[i];
1754 if (is_set)
1755 {
1756 print_nested_fn (stream, "sqrt", arg, i + 1);
1757 if (i != info->deepest - 1)
1758 fprintf (stream, " * ");
1759 }
1760 }
1761 }
1762
1763 /* Print to STREAM a representation of raising ARG to an integer
1764 power N. Used for the dump file. */
1765
1766 static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)1767 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1768 {
1769 if (n > 1)
1770 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1771 else if (n == 1)
1772 fprintf (stream, "%s", arg);
1773 }
1774
1775 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776 square roots. Place at GSI and LOC. Limit the maximum depth
1777 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1778 result of the expanded sequence or NULL_TREE if the expansion failed.
1779
1780 This routine assumes that ARG1 is a real number with a fractional part
1781 (the integer exponent case will have been handled earlier in
1782 gimple_expand_builtin_pow).
1783
1784 For ARG1 > 0.0:
1785 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787 FRAC_PART == ARG1 - WHOLE_PART:
1788 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790 if it can be expressed as such, that is if FRAC_PART satisfies:
1791 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792 where integer a[i] is either 0 or 1.
1793
1794 Example:
1795 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1797
1798 For ARG1 < 0.0 there are two approaches:
1799 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800 is calculated as above.
1801
1802 Example:
1803 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1805
1806 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807 FRAC_PART := ARG1 - WHOLE_PART
1808 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1809 Example:
1810 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1812
1813 For ARG1 < 0.0 we choose between (A) and (B) depending on
1814 how many multiplications we'd have to do.
1815 So, for the example in (B): POW (x, -5.875), if we were to
1816 follow algorithm (A) we would produce:
1817 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818 which contains more multiplications than approach (B).
1819
1820 Hopefully, this approach will eliminate potentially expensive POW library
1821 calls when unsafe floating point math is enabled and allow the compiler to
1822 further optimise the multiplies, square roots and divides produced by this
1823 function. */
1824
1825 static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)1826 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1827 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1828 {
1829 tree type = TREE_TYPE (arg0);
1830 machine_mode mode = TYPE_MODE (type);
1831 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1832 bool one_over = true;
1833
1834 if (!sqrtfn)
1835 return NULL_TREE;
1836
1837 if (TREE_CODE (arg1) != REAL_CST)
1838 return NULL_TREE;
1839
1840 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1841
1842 gcc_assert (max_depth > 0);
1843 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1844
1845 struct pow_synth_sqrt_info synth_info;
1846 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1847 synth_info.deepest = 0;
1848 synth_info.num_mults = 0;
1849
1850 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1851 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1852
1853 /* The whole and fractional parts of exp. */
1854 REAL_VALUE_TYPE whole_part;
1855 REAL_VALUE_TYPE frac_part;
1856
1857 real_floor (&whole_part, mode, &exp);
1858 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1859
1860
1861 REAL_VALUE_TYPE ceil_whole = dconst0;
1862 REAL_VALUE_TYPE ceil_fract = dconst0;
1863
1864 if (neg_exp)
1865 {
1866 real_ceil (&ceil_whole, mode, &exp);
1867 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1868 }
1869
1870 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1871 return NULL_TREE;
1872
1873 /* Check whether it's more profitable to not use 1.0 / ... */
1874 if (neg_exp)
1875 {
1876 struct pow_synth_sqrt_info alt_synth_info;
1877 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1878 alt_synth_info.deepest = 0;
1879 alt_synth_info.num_mults = 0;
1880
1881 if (representable_as_half_series_p (ceil_fract, max_depth,
1882 &alt_synth_info)
1883 && alt_synth_info.deepest <= synth_info.deepest
1884 && alt_synth_info.num_mults < synth_info.num_mults)
1885 {
1886 whole_part = ceil_whole;
1887 frac_part = ceil_fract;
1888 synth_info.deepest = alt_synth_info.deepest;
1889 synth_info.num_mults = alt_synth_info.num_mults;
1890 memcpy (synth_info.factors, alt_synth_info.factors,
1891 (max_depth + 1) * sizeof (bool));
1892 one_over = false;
1893 }
1894 }
1895
1896 HOST_WIDE_INT n = real_to_integer (&whole_part);
1897 REAL_VALUE_TYPE cint;
1898 real_from_integer (&cint, VOIDmode, n, SIGNED);
1899
1900 if (!real_identical (&whole_part, &cint))
1901 return NULL_TREE;
1902
1903 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1904 return NULL_TREE;
1905
1906 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1907
1908 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1909
1910 /* Calculate the integer part of the exponent. */
1911 if (n > 1)
1912 {
1913 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914 if (!integer_res)
1915 return NULL_TREE;
1916 }
1917
1918 if (dump_file)
1919 {
1920 char string[64];
1921
1922 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1923 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1924
1925 if (neg_exp)
1926 {
1927 if (one_over)
1928 {
1929 fprintf (dump_file, "1.0 / (");
1930 dump_integer_part (dump_file, "x", n);
1931 if (n > 0)
1932 fprintf (dump_file, " * ");
1933 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934 fprintf (dump_file, ")");
1935 }
1936 else
1937 {
1938 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1939 fprintf (dump_file, " / (");
1940 dump_integer_part (dump_file, "x", n);
1941 fprintf (dump_file, ")");
1942 }
1943 }
1944 else
1945 {
1946 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1947 if (n > 0)
1948 fprintf (dump_file, " * ");
1949 dump_integer_part (dump_file, "x", n);
1950 }
1951
1952 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1953 }
1954
1955
1956 tree fract_res = NULL_TREE;
1957 cache[0] = arg0;
1958
1959 /* Calculate the fractional part of the exponent. */
1960 for (unsigned i = 0; i < synth_info.deepest; i++)
1961 {
1962 if (synth_info.factors[i])
1963 {
1964 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1965
1966 if (!fract_res)
1967 fract_res = sqrt_chain;
1968
1969 else
1970 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1971 fract_res, sqrt_chain);
1972 }
1973 }
1974
1975 tree res = NULL_TREE;
1976
1977 if (neg_exp)
1978 {
1979 if (one_over)
1980 {
1981 if (n > 0)
1982 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1983 fract_res, integer_res);
1984 else
1985 res = fract_res;
1986
1987 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1988 build_real (type, dconst1), res);
1989 }
1990 else
1991 {
1992 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1993 fract_res, integer_res);
1994 }
1995 }
1996 else
1997 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1998 fract_res, integer_res);
1999 return res;
2000 }
2001
2002 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003 with location info LOC. If possible, create an equivalent and
2004 less expensive sequence of statements prior to GSI, and return an
2005 expession holding the result. */
2006
2007 static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)2008 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2009 tree arg0, tree arg1)
2010 {
2011 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2012 REAL_VALUE_TYPE c2, dconst3;
2013 HOST_WIDE_INT n;
2014 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2015 machine_mode mode;
2016 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2017 bool hw_sqrt_exists, c_is_int, c2_is_int;
2018
2019 dconst1_4 = dconst1;
2020 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2021
2022 /* If the exponent isn't a constant, there's nothing of interest
2023 to be done. */
2024 if (TREE_CODE (arg1) != REAL_CST)
2025 return NULL_TREE;
2026
2027 /* Don't perform the operation if flag_signaling_nans is on
2028 and the operand is a signaling NaN. */
2029 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2030 && ((TREE_CODE (arg0) == REAL_CST
2031 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2032 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2033 return NULL_TREE;
2034
2035 /* If the exponent is equivalent to an integer, expand to an optimal
2036 multiplication sequence when profitable. */
2037 c = TREE_REAL_CST (arg1);
2038 n = real_to_integer (&c);
2039 real_from_integer (&cint, VOIDmode, n, SIGNED);
2040 c_is_int = real_identical (&c, &cint);
2041
2042 if (c_is_int
2043 && ((n >= -1 && n <= 2)
2044 || (flag_unsafe_math_optimizations
2045 && speed_p
2046 && powi_cost (n) <= POWI_MAX_MULTS)))
2047 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048
2049 /* Attempt various optimizations using sqrt and cbrt. */
2050 type = TREE_TYPE (arg0);
2051 mode = TYPE_MODE (type);
2052 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053
2054 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2055 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2056 sqrt(-0) = -0. */
2057 if (sqrtfn
2058 && real_equal (&c, &dconsthalf)
2059 && !HONOR_SIGNED_ZEROS (mode))
2060 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061
2062 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063
2064 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2065 optimizations since 1./3. is not exactly representable. If x
2066 is negative and finite, the correct value of pow(x,1./3.) is
2067 a NaN with the "invalid" exception raised, because the value
2068 of 1./3. actually has an even denominator. The correct value
2069 of cbrt(x) is a negative real value. */
2070 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071 dconst1_3 = real_value_truncate (mode, dconst_third ());
2072
2073 if (flag_unsafe_math_optimizations
2074 && cbrtfn
2075 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076 && real_equal (&c, &dconst1_3))
2077 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078
2079 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2080 if we don't have a hardware sqrt insn. */
2081 dconst1_6 = dconst1_3;
2082 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083
2084 if (flag_unsafe_math_optimizations
2085 && sqrtfn
2086 && cbrtfn
2087 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088 && speed_p
2089 && hw_sqrt_exists
2090 && real_equal (&c, &dconst1_6))
2091 {
2092 /* sqrt(x) */
2093 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094
2095 /* cbrt(sqrt(x)) */
2096 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097 }
2098
2099
2100 /* Attempt to expand the POW as a product of square root chains.
2101 Expand the 0.25 case even when otpimising for size. */
2102 if (flag_unsafe_math_optimizations
2103 && sqrtfn
2104 && hw_sqrt_exists
2105 && (speed_p || real_equal (&c, &dconst1_4))
2106 && !HONOR_SIGNED_ZEROS (mode))
2107 {
2108 unsigned int max_depth = speed_p
2109 ? param_max_pow_sqrt_depth
2110 : 2;
2111
2112 tree expand_with_sqrts
2113 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114
2115 if (expand_with_sqrts)
2116 return expand_with_sqrts;
2117 }
2118
2119 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120 n = real_to_integer (&c2);
2121 real_from_integer (&cint, VOIDmode, n, SIGNED);
2122 c2_is_int = real_identical (&c2, &cint);
2123
2124 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125
2126 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2127 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2128
2129 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2130 different from pow(x, 1./3.) due to rounding and behavior with
2131 negative x, we need to constrain this transformation to unsafe
2132 math and positive x or finite math. */
2133 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135 real_round (&c2, mode, &c2);
2136 n = real_to_integer (&c2);
2137 real_from_integer (&cint, VOIDmode, n, SIGNED);
2138 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139 real_convert (&c2, mode, &c2);
2140
2141 if (flag_unsafe_math_optimizations
2142 && cbrtfn
2143 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144 && real_identical (&c2, &c)
2145 && !c2_is_int
2146 && optimize_function_for_speed_p (cfun)
2147 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148 {
2149 tree powi_x_ndiv3 = NULL_TREE;
2150
2151 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2152 possible or profitable, give up. Skip the degenerate case when
2153 abs(n) < 3, where the result is always 1. */
2154 if (absu_hwi (n) >= 3)
2155 {
2156 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157 abs_hwi (n / 3));
2158 if (!powi_x_ndiv3)
2159 return NULL_TREE;
2160 }
2161
2162 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2163 as that creates an unnecessary variable. Instead, just produce
2164 either cbrt(x) or cbrt(x) * cbrt(x). */
2165 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166
2167 if (absu_hwi (n) % 3 == 1)
2168 powi_cbrt_x = cbrt_x;
2169 else
2170 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171 cbrt_x, cbrt_x);
2172
2173 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2174 if (absu_hwi (n) < 3)
2175 result = powi_cbrt_x;
2176 else
2177 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178 powi_x_ndiv3, powi_cbrt_x);
2179
2180 /* If n is negative, reciprocate the result. */
2181 if (n < 0)
2182 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183 build_real (type, dconst1), result);
2184
2185 return result;
2186 }
2187
2188 /* No optimizations succeeded. */
2189 return NULL_TREE;
2190 }
2191
2192 /* ARG is the argument to a cabs builtin call in GSI with location info
2193 LOC. Create a sequence of statements prior to GSI that calculates
2194 sqrt(R*R + I*I), where R and I are the real and imaginary components
2195 of ARG, respectively. Return an expression holding the result. */
2196
2197 static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)2198 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2199 {
2200 tree real_part, imag_part, addend1, addend2, sum, result;
2201 tree type = TREE_TYPE (TREE_TYPE (arg));
2202 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2203 machine_mode mode = TYPE_MODE (type);
2204
2205 if (!flag_unsafe_math_optimizations
2206 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2207 || !sqrtfn
2208 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2209 return NULL_TREE;
2210
2211 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212 REALPART_EXPR, arg);
2213 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214 real_part, real_part);
2215 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216 IMAGPART_EXPR, arg);
2217 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218 imag_part, imag_part);
2219 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2220 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2221
2222 return result;
2223 }
2224
2225 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2227 an optimal number of multiplies, when n is a constant. */
2228
2229 namespace {
2230
2231 const pass_data pass_data_cse_sincos =
2232 {
2233 GIMPLE_PASS, /* type */
2234 "sincos", /* name */
2235 OPTGROUP_NONE, /* optinfo_flags */
2236 TV_TREE_SINCOS, /* tv_id */
2237 PROP_ssa, /* properties_required */
2238 PROP_gimple_opt_math, /* properties_provided */
2239 0, /* properties_destroyed */
2240 0, /* todo_flags_start */
2241 TODO_update_ssa, /* todo_flags_finish */
2242 };
2243
2244 class pass_cse_sincos : public gimple_opt_pass
2245 {
2246 public:
pass_cse_sincos(gcc::context * ctxt)2247 pass_cse_sincos (gcc::context *ctxt)
2248 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2249 {}
2250
2251 /* opt_pass methods: */
gate(function *)2252 virtual bool gate (function *)
2253 {
2254 /* We no longer require either sincos or cexp, since powi expansion
2255 piggybacks on this pass. */
2256 return optimize;
2257 }
2258
2259 virtual unsigned int execute (function *);
2260
2261 }; // class pass_cse_sincos
2262
2263 unsigned int
execute(function * fun)2264 pass_cse_sincos::execute (function *fun)
2265 {
2266 basic_block bb;
2267 bool cfg_changed = false;
2268
2269 calculate_dominance_info (CDI_DOMINATORS);
2270 memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272 FOR_EACH_BB_FN (bb, fun)
2273 {
2274 gimple_stmt_iterator gsi;
2275 bool cleanup_eh = false;
2276
2277 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278 {
2279 gimple *stmt = gsi_stmt (gsi);
2280
2281 /* Only the last stmt in a bb could throw, no need to call
2282 gimple_purge_dead_eh_edges if we change something in the middle
2283 of a basic block. */
2284 cleanup_eh = false;
2285
2286 if (is_gimple_call (stmt)
2287 && gimple_call_lhs (stmt))
2288 {
2289 tree arg, arg0, arg1, result;
2290 HOST_WIDE_INT n;
2291 location_t loc;
2292
2293 switch (gimple_call_combined_fn (stmt))
2294 {
2295 CASE_CFN_COS:
2296 CASE_CFN_SIN:
2297 CASE_CFN_CEXPI:
2298 arg = gimple_call_arg (stmt, 0);
2299 /* Make sure we have either sincos or cexp. */
2300 if (!targetm.libc_has_function (function_c99_math_complex,
2301 TREE_TYPE (arg))
2302 && !targetm.libc_has_function (function_sincos,
2303 TREE_TYPE (arg)))
2304 break;
2305
2306 if (TREE_CODE (arg) == SSA_NAME)
2307 cfg_changed |= execute_cse_sincos_1 (arg);
2308 break;
2309
2310 CASE_CFN_POW:
2311 arg0 = gimple_call_arg (stmt, 0);
2312 arg1 = gimple_call_arg (stmt, 1);
2313
2314 loc = gimple_location (stmt);
2315 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2316
2317 if (result)
2318 {
2319 tree lhs = gimple_get_lhs (stmt);
2320 gassign *new_stmt = gimple_build_assign (lhs, result);
2321 gimple_set_location (new_stmt, loc);
2322 unlink_stmt_vdef (stmt);
2323 gsi_replace (&gsi, new_stmt, true);
2324 cleanup_eh = true;
2325 if (gimple_vdef (stmt))
2326 release_ssa_name (gimple_vdef (stmt));
2327 }
2328 break;
2329
2330 CASE_CFN_POWI:
2331 arg0 = gimple_call_arg (stmt, 0);
2332 arg1 = gimple_call_arg (stmt, 1);
2333 loc = gimple_location (stmt);
2334
2335 if (real_minus_onep (arg0))
2336 {
2337 tree t0, t1, cond, one, minus_one;
2338 gassign *stmt;
2339
2340 t0 = TREE_TYPE (arg0);
2341 t1 = TREE_TYPE (arg1);
2342 one = build_real (t0, dconst1);
2343 minus_one = build_real (t0, dconstm1);
2344
2345 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2346 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2347 arg1, build_int_cst (t1, 1));
2348 gimple_set_location (stmt, loc);
2349 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2350
2351 result = make_temp_ssa_name (t0, NULL, "powi");
2352 stmt = gimple_build_assign (result, COND_EXPR, cond,
2353 minus_one, one);
2354 gimple_set_location (stmt, loc);
2355 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2356 }
2357 else
2358 {
2359 if (!tree_fits_shwi_p (arg1))
2360 break;
2361
2362 n = tree_to_shwi (arg1);
2363 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2364 }
2365
2366 if (result)
2367 {
2368 tree lhs = gimple_get_lhs (stmt);
2369 gassign *new_stmt = gimple_build_assign (lhs, result);
2370 gimple_set_location (new_stmt, loc);
2371 unlink_stmt_vdef (stmt);
2372 gsi_replace (&gsi, new_stmt, true);
2373 cleanup_eh = true;
2374 if (gimple_vdef (stmt))
2375 release_ssa_name (gimple_vdef (stmt));
2376 }
2377 break;
2378
2379 CASE_CFN_CABS:
2380 arg0 = gimple_call_arg (stmt, 0);
2381 loc = gimple_location (stmt);
2382 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2383
2384 if (result)
2385 {
2386 tree lhs = gimple_get_lhs (stmt);
2387 gassign *new_stmt = gimple_build_assign (lhs, result);
2388 gimple_set_location (new_stmt, loc);
2389 unlink_stmt_vdef (stmt);
2390 gsi_replace (&gsi, new_stmt, true);
2391 cleanup_eh = true;
2392 if (gimple_vdef (stmt))
2393 release_ssa_name (gimple_vdef (stmt));
2394 }
2395 break;
2396
2397 default:;
2398 }
2399 }
2400 }
2401 if (cleanup_eh)
2402 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2403 }
2404
2405 statistics_counter_event (fun, "sincos statements inserted",
2406 sincos_stats.inserted);
2407 statistics_counter_event (fun, "conv statements removed",
2408 sincos_stats.conv_removed);
2409
2410 return cfg_changed ? TODO_cleanup_cfg : 0;
2411 }
2412
2413 } // anon namespace
2414
2415 gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)2416 make_pass_cse_sincos (gcc::context *ctxt)
2417 {
2418 return new pass_cse_sincos (ctxt);
2419 }
2420
2421 /* Return true if stmt is a type conversion operation that can be stripped
2422 when used in a widening multiply operation. */
2423 static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)2424 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2425 {
2426 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2427
2428 if (TREE_CODE (result_type) == INTEGER_TYPE)
2429 {
2430 tree op_type;
2431 tree inner_op_type;
2432
2433 if (!CONVERT_EXPR_CODE_P (rhs_code))
2434 return false;
2435
2436 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2437
2438 /* If the type of OP has the same precision as the result, then
2439 we can strip this conversion. The multiply operation will be
2440 selected to create the correct extension as a by-product. */
2441 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2442 return true;
2443
2444 /* We can also strip a conversion if it preserves the signed-ness of
2445 the operation and doesn't narrow the range. */
2446 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2447
2448 /* If the inner-most type is unsigned, then we can strip any
2449 intermediate widening operation. If it's signed, then the
2450 intermediate widening operation must also be signed. */
2451 if ((TYPE_UNSIGNED (inner_op_type)
2452 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2453 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2454 return true;
2455
2456 return false;
2457 }
2458
2459 return rhs_code == FIXED_CONVERT_EXPR;
2460 }
2461
2462 /* Return true if RHS is a suitable operand for a widening multiplication,
2463 assuming a target type of TYPE.
2464 There are two cases:
2465
2466 - RHS makes some value at least twice as wide. Store that value
2467 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2468
2469 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2470 but leave *TYPE_OUT untouched. */
2471
2472 static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)2473 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2474 tree *new_rhs_out)
2475 {
2476 gimple *stmt;
2477 tree type1, rhs1;
2478
2479 if (TREE_CODE (rhs) == SSA_NAME)
2480 {
2481 stmt = SSA_NAME_DEF_STMT (rhs);
2482 if (is_gimple_assign (stmt))
2483 {
2484 if (! widening_mult_conversion_strippable_p (type, stmt))
2485 rhs1 = rhs;
2486 else
2487 {
2488 rhs1 = gimple_assign_rhs1 (stmt);
2489
2490 if (TREE_CODE (rhs1) == INTEGER_CST)
2491 {
2492 *new_rhs_out = rhs1;
2493 *type_out = NULL;
2494 return true;
2495 }
2496 }
2497 }
2498 else
2499 rhs1 = rhs;
2500
2501 type1 = TREE_TYPE (rhs1);
2502
2503 if (TREE_CODE (type1) != TREE_CODE (type)
2504 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2505 return false;
2506
2507 *new_rhs_out = rhs1;
2508 *type_out = type1;
2509 return true;
2510 }
2511
2512 if (TREE_CODE (rhs) == INTEGER_CST)
2513 {
2514 *new_rhs_out = rhs;
2515 *type_out = NULL;
2516 return true;
2517 }
2518
2519 return false;
2520 }
2521
2522 /* Return true if STMT performs a widening multiplication, assuming the
2523 output type is TYPE. If so, store the unwidened types of the operands
2524 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2525 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526 and *TYPE2_OUT would give the operands of the multiplication. */
2527
2528 static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2529 is_widening_mult_p (gimple *stmt,
2530 tree *type1_out, tree *rhs1_out,
2531 tree *type2_out, tree *rhs2_out)
2532 {
2533 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2534
2535 if (TREE_CODE (type) == INTEGER_TYPE)
2536 {
2537 if (TYPE_OVERFLOW_TRAPS (type))
2538 return false;
2539 }
2540 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2541 return false;
2542
2543 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2544 rhs1_out))
2545 return false;
2546
2547 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2548 rhs2_out))
2549 return false;
2550
2551 if (*type1_out == NULL)
2552 {
2553 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2554 return false;
2555 *type1_out = *type2_out;
2556 }
2557
2558 if (*type2_out == NULL)
2559 {
2560 if (!int_fits_type_p (*rhs2_out, *type1_out))
2561 return false;
2562 *type2_out = *type1_out;
2563 }
2564
2565 /* Ensure that the larger of the two operands comes first. */
2566 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2567 {
2568 std::swap (*type1_out, *type2_out);
2569 std::swap (*rhs1_out, *rhs2_out);
2570 }
2571
2572 return true;
2573 }
2574
2575 /* Check to see if the CALL statement is an invocation of copysign
2576 with 1. being the first argument. */
2577 static bool
is_copysign_call_with_1(gimple * call)2578 is_copysign_call_with_1 (gimple *call)
2579 {
2580 gcall *c = dyn_cast <gcall *> (call);
2581 if (! c)
2582 return false;
2583
2584 enum combined_fn code = gimple_call_combined_fn (c);
2585
2586 if (code == CFN_LAST)
2587 return false;
2588
2589 if (builtin_fn_p (code))
2590 {
2591 switch (as_builtin_fn (code))
2592 {
2593 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2594 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2595 return real_onep (gimple_call_arg (c, 0));
2596 default:
2597 return false;
2598 }
2599 }
2600
2601 if (internal_fn_p (code))
2602 {
2603 switch (as_internal_fn (code))
2604 {
2605 case IFN_COPYSIGN:
2606 return real_onep (gimple_call_arg (c, 0));
2607 default:
2608 return false;
2609 }
2610 }
2611
2612 return false;
2613 }
2614
2615 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616 This only happens when the xorsign optab is defined, if the
2617 pattern is not a xorsign pattern or if expansion fails FALSE is
2618 returned, otherwise TRUE is returned. */
2619 static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)2620 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2621 {
2622 tree treeop0, treeop1, lhs, type;
2623 location_t loc = gimple_location (stmt);
2624 lhs = gimple_assign_lhs (stmt);
2625 treeop0 = gimple_assign_rhs1 (stmt);
2626 treeop1 = gimple_assign_rhs2 (stmt);
2627 type = TREE_TYPE (lhs);
2628 machine_mode mode = TYPE_MODE (type);
2629
2630 if (HONOR_SNANS (type))
2631 return false;
2632
2633 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2634 {
2635 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2636 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2637 {
2638 call0 = SSA_NAME_DEF_STMT (treeop1);
2639 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2640 return false;
2641
2642 treeop1 = treeop0;
2643 }
2644 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2645 return false;
2646
2647 gcall *c = as_a<gcall*> (call0);
2648 treeop0 = gimple_call_arg (c, 1);
2649
2650 gcall *call_stmt
2651 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2652 gimple_set_lhs (call_stmt, lhs);
2653 gimple_set_location (call_stmt, loc);
2654 gsi_replace (gsi, call_stmt, true);
2655 return true;
2656 }
2657
2658 return false;
2659 }
2660
2661 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2662 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2663 value is true iff we converted the statement. */
2664
2665 static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)2666 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2667 {
2668 tree lhs, rhs1, rhs2, type, type1, type2;
2669 enum insn_code handler;
2670 scalar_int_mode to_mode, from_mode, actual_mode;
2671 optab op;
2672 int actual_precision;
2673 location_t loc = gimple_location (stmt);
2674 bool from_unsigned1, from_unsigned2;
2675
2676 lhs = gimple_assign_lhs (stmt);
2677 type = TREE_TYPE (lhs);
2678 if (TREE_CODE (type) != INTEGER_TYPE)
2679 return false;
2680
2681 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2682 return false;
2683
2684 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2685 avoid the tranform. */
2686 if ((TREE_CODE (rhs1) == SSA_NAME
2687 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2688 || (TREE_CODE (rhs2) == SSA_NAME
2689 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2690 return false;
2691
2692 to_mode = SCALAR_INT_TYPE_MODE (type);
2693 from_mode = SCALAR_INT_TYPE_MODE (type1);
2694 if (to_mode == from_mode)
2695 return false;
2696
2697 from_unsigned1 = TYPE_UNSIGNED (type1);
2698 from_unsigned2 = TYPE_UNSIGNED (type2);
2699
2700 if (from_unsigned1 && from_unsigned2)
2701 op = umul_widen_optab;
2702 else if (!from_unsigned1 && !from_unsigned2)
2703 op = smul_widen_optab;
2704 else
2705 op = usmul_widen_optab;
2706
2707 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2708 &actual_mode);
2709
2710 if (handler == CODE_FOR_nothing)
2711 {
2712 if (op != smul_widen_optab)
2713 {
2714 /* We can use a signed multiply with unsigned types as long as
2715 there is a wider mode to use, or it is the smaller of the two
2716 types that is unsigned. Note that type1 >= type2, always. */
2717 if ((TYPE_UNSIGNED (type1)
2718 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2719 || (TYPE_UNSIGNED (type2)
2720 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2721 {
2722 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2723 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2724 return false;
2725 }
2726
2727 op = smul_widen_optab;
2728 handler = find_widening_optab_handler_and_mode (op, to_mode,
2729 from_mode,
2730 &actual_mode);
2731
2732 if (handler == CODE_FOR_nothing)
2733 return false;
2734
2735 from_unsigned1 = from_unsigned2 = false;
2736 }
2737 else
2738 {
2739 /* Expand can synthesize smul_widen_optab if the target
2740 supports umul_widen_optab. */
2741 op = umul_widen_optab;
2742 handler = find_widening_optab_handler_and_mode (op, to_mode,
2743 from_mode,
2744 &actual_mode);
2745 if (handler == CODE_FOR_nothing)
2746 return false;
2747 }
2748 }
2749
2750 /* Ensure that the inputs to the handler are in the correct precison
2751 for the opcode. This will be the full mode size. */
2752 actual_precision = GET_MODE_PRECISION (actual_mode);
2753 if (2 * actual_precision > TYPE_PRECISION (type))
2754 return false;
2755 if (actual_precision != TYPE_PRECISION (type1)
2756 || from_unsigned1 != TYPE_UNSIGNED (type1))
2757 rhs1 = build_and_insert_cast (gsi, loc,
2758 build_nonstandard_integer_type
2759 (actual_precision, from_unsigned1), rhs1);
2760 if (actual_precision != TYPE_PRECISION (type2)
2761 || from_unsigned2 != TYPE_UNSIGNED (type2))
2762 rhs2 = build_and_insert_cast (gsi, loc,
2763 build_nonstandard_integer_type
2764 (actual_precision, from_unsigned2), rhs2);
2765
2766 /* Handle constants. */
2767 if (TREE_CODE (rhs1) == INTEGER_CST)
2768 rhs1 = fold_convert (type1, rhs1);
2769 if (TREE_CODE (rhs2) == INTEGER_CST)
2770 rhs2 = fold_convert (type2, rhs2);
2771
2772 gimple_assign_set_rhs1 (stmt, rhs1);
2773 gimple_assign_set_rhs2 (stmt, rhs2);
2774 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2775 update_stmt (stmt);
2776 widen_mul_stats.widen_mults_inserted++;
2777 return true;
2778 }
2779
2780 /* Process a single gimple statement STMT, which is found at the
2781 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2782 rhs (given by CODE), and try to convert it into a
2783 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2784 is true iff we converted the statement. */
2785
2786 static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)2787 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2788 enum tree_code code)
2789 {
2790 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2791 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2792 tree type, type1, type2, optype;
2793 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2794 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2795 optab this_optab;
2796 enum tree_code wmult_code;
2797 enum insn_code handler;
2798 scalar_mode to_mode, from_mode, actual_mode;
2799 location_t loc = gimple_location (stmt);
2800 int actual_precision;
2801 bool from_unsigned1, from_unsigned2;
2802
2803 lhs = gimple_assign_lhs (stmt);
2804 type = TREE_TYPE (lhs);
2805 if ((TREE_CODE (type) != INTEGER_TYPE
2806 && TREE_CODE (type) != FIXED_POINT_TYPE)
2807 || !type_has_mode_precision_p (type))
2808 return false;
2809
2810 if (code == MINUS_EXPR)
2811 wmult_code = WIDEN_MULT_MINUS_EXPR;
2812 else
2813 wmult_code = WIDEN_MULT_PLUS_EXPR;
2814
2815 rhs1 = gimple_assign_rhs1 (stmt);
2816 rhs2 = gimple_assign_rhs2 (stmt);
2817
2818 if (TREE_CODE (rhs1) == SSA_NAME)
2819 {
2820 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2821 if (is_gimple_assign (rhs1_stmt))
2822 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2823 }
2824
2825 if (TREE_CODE (rhs2) == SSA_NAME)
2826 {
2827 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2828 if (is_gimple_assign (rhs2_stmt))
2829 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2830 }
2831
2832 /* Allow for one conversion statement between the multiply
2833 and addition/subtraction statement. If there are more than
2834 one conversions then we assume they would invalidate this
2835 transformation. If that's not the case then they should have
2836 been folded before now. */
2837 if (CONVERT_EXPR_CODE_P (rhs1_code))
2838 {
2839 conv1_stmt = rhs1_stmt;
2840 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2841 if (TREE_CODE (rhs1) == SSA_NAME)
2842 {
2843 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2844 if (is_gimple_assign (rhs1_stmt))
2845 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2846 }
2847 else
2848 return false;
2849 }
2850 if (CONVERT_EXPR_CODE_P (rhs2_code))
2851 {
2852 conv2_stmt = rhs2_stmt;
2853 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2854 if (TREE_CODE (rhs2) == SSA_NAME)
2855 {
2856 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2857 if (is_gimple_assign (rhs2_stmt))
2858 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2859 }
2860 else
2861 return false;
2862 }
2863
2864 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2865 is_widening_mult_p, but we still need the rhs returns.
2866
2867 It might also appear that it would be sufficient to use the existing
2868 operands of the widening multiply, but that would limit the choice of
2869 multiply-and-accumulate instructions.
2870
2871 If the widened-multiplication result has more than one uses, it is
2872 probably wiser not to do the conversion. Also restrict this operation
2873 to single basic block to avoid moving the multiply to a different block
2874 with a higher execution frequency. */
2875 if (code == PLUS_EXPR
2876 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2877 {
2878 if (!has_single_use (rhs1)
2879 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2880 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2881 &type2, &mult_rhs2))
2882 return false;
2883 add_rhs = rhs2;
2884 conv_stmt = conv1_stmt;
2885 }
2886 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2887 {
2888 if (!has_single_use (rhs2)
2889 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2890 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2891 &type2, &mult_rhs2))
2892 return false;
2893 add_rhs = rhs1;
2894 conv_stmt = conv2_stmt;
2895 }
2896 else
2897 return false;
2898
2899 to_mode = SCALAR_TYPE_MODE (type);
2900 from_mode = SCALAR_TYPE_MODE (type1);
2901 if (to_mode == from_mode)
2902 return false;
2903
2904 from_unsigned1 = TYPE_UNSIGNED (type1);
2905 from_unsigned2 = TYPE_UNSIGNED (type2);
2906 optype = type1;
2907
2908 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2909 if (from_unsigned1 != from_unsigned2)
2910 {
2911 if (!INTEGRAL_TYPE_P (type))
2912 return false;
2913 /* We can use a signed multiply with unsigned types as long as
2914 there is a wider mode to use, or it is the smaller of the two
2915 types that is unsigned. Note that type1 >= type2, always. */
2916 if ((from_unsigned1
2917 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2918 || (from_unsigned2
2919 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2920 {
2921 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2922 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2923 return false;
2924 }
2925
2926 from_unsigned1 = from_unsigned2 = false;
2927 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2928 false);
2929 }
2930
2931 /* If there was a conversion between the multiply and addition
2932 then we need to make sure it fits a multiply-and-accumulate.
2933 The should be a single mode change which does not change the
2934 value. */
2935 if (conv_stmt)
2936 {
2937 /* We use the original, unmodified data types for this. */
2938 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2939 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2940 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2941 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2942
2943 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2944 {
2945 /* Conversion is a truncate. */
2946 if (TYPE_PRECISION (to_type) < data_size)
2947 return false;
2948 }
2949 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2950 {
2951 /* Conversion is an extend. Check it's the right sort. */
2952 if (TYPE_UNSIGNED (from_type) != is_unsigned
2953 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2954 return false;
2955 }
2956 /* else convert is a no-op for our purposes. */
2957 }
2958
2959 /* Verify that the machine can perform a widening multiply
2960 accumulate in this mode/signedness combination, otherwise
2961 this transformation is likely to pessimize code. */
2962 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2963 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2964 from_mode, &actual_mode);
2965
2966 if (handler == CODE_FOR_nothing)
2967 return false;
2968
2969 /* Ensure that the inputs to the handler are in the correct precison
2970 for the opcode. This will be the full mode size. */
2971 actual_precision = GET_MODE_PRECISION (actual_mode);
2972 if (actual_precision != TYPE_PRECISION (type1)
2973 || from_unsigned1 != TYPE_UNSIGNED (type1))
2974 mult_rhs1 = build_and_insert_cast (gsi, loc,
2975 build_nonstandard_integer_type
2976 (actual_precision, from_unsigned1),
2977 mult_rhs1);
2978 if (actual_precision != TYPE_PRECISION (type2)
2979 || from_unsigned2 != TYPE_UNSIGNED (type2))
2980 mult_rhs2 = build_and_insert_cast (gsi, loc,
2981 build_nonstandard_integer_type
2982 (actual_precision, from_unsigned2),
2983 mult_rhs2);
2984
2985 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2986 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2987
2988 /* Handle constants. */
2989 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2990 mult_rhs1 = fold_convert (type1, mult_rhs1);
2991 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2992 mult_rhs2 = fold_convert (type2, mult_rhs2);
2993
2994 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2995 add_rhs);
2996 update_stmt (gsi_stmt (*gsi));
2997 widen_mul_stats.maccs_inserted++;
2998 return true;
2999 }
3000
3001 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3002 OP2 and which we know is used in statements that can be, together with the
3003 multiplication, converted to FMAs, perform the transformation. */
3004
3005 static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)3006 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3007 {
3008 tree type = TREE_TYPE (mul_result);
3009 gimple *use_stmt;
3010 imm_use_iterator imm_iter;
3011 gcall *fma_stmt;
3012
3013 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3014 {
3015 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3016 tree addop, mulop1 = op1, result = mul_result;
3017 bool negate_p = false;
3018 gimple_seq seq = NULL;
3019
3020 if (is_gimple_debug (use_stmt))
3021 continue;
3022
3023 if (is_gimple_assign (use_stmt)
3024 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3025 {
3026 result = gimple_assign_lhs (use_stmt);
3027 use_operand_p use_p;
3028 gimple *neguse_stmt;
3029 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3030 gsi_remove (&gsi, true);
3031 release_defs (use_stmt);
3032
3033 use_stmt = neguse_stmt;
3034 gsi = gsi_for_stmt (use_stmt);
3035 negate_p = true;
3036 }
3037
3038 tree cond, else_value, ops[3];
3039 tree_code code;
3040 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3041 ops, &else_value))
3042 gcc_unreachable ();
3043 addop = ops[0] == result ? ops[1] : ops[0];
3044
3045 if (code == MINUS_EXPR)
3046 {
3047 if (ops[0] == result)
3048 /* a * b - c -> a * b + (-c) */
3049 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3050 else
3051 /* a - b * c -> (-b) * c + a */
3052 negate_p = !negate_p;
3053 }
3054
3055 if (negate_p)
3056 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3057
3058 if (seq)
3059 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3060
3061 if (cond)
3062 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3063 op2, addop, else_value);
3064 else
3065 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3066 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3067 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3068 use_stmt));
3069 gsi_replace (&gsi, fma_stmt, true);
3070 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3071 regardless of where the negation occurs. */
3072 gimple *orig_stmt = gsi_stmt (gsi);
3073 if (fold_stmt (&gsi, follow_all_ssa_edges))
3074 {
3075 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3076 gcc_unreachable ();
3077 update_stmt (gsi_stmt (gsi));
3078 }
3079
3080 if (dump_file && (dump_flags & TDF_DETAILS))
3081 {
3082 fprintf (dump_file, "Generated FMA ");
3083 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3084 fprintf (dump_file, "\n");
3085 }
3086
3087 /* If the FMA result is negated in a single use, fold the negation
3088 too. */
3089 orig_stmt = gsi_stmt (gsi);
3090 use_operand_p use_p;
3091 gimple *neg_stmt;
3092 if (is_gimple_call (orig_stmt)
3093 && gimple_call_internal_p (orig_stmt)
3094 && gimple_call_lhs (orig_stmt)
3095 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3096 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3097 && is_gimple_assign (neg_stmt)
3098 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3099 && !stmt_could_throw_p (cfun, neg_stmt))
3100 {
3101 gsi = gsi_for_stmt (neg_stmt);
3102 if (fold_stmt (&gsi, follow_all_ssa_edges))
3103 {
3104 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3105 gcc_unreachable ();
3106 update_stmt (gsi_stmt (gsi));
3107 if (dump_file && (dump_flags & TDF_DETAILS))
3108 {
3109 fprintf (dump_file, "Folded FMA negation ");
3110 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3111 fprintf (dump_file, "\n");
3112 }
3113 }
3114 }
3115
3116 widen_mul_stats.fmas_inserted++;
3117 }
3118 }
3119
3120 /* Data necessary to perform the actual transformation from a multiplication
3121 and an addition to an FMA after decision is taken it should be done and to
3122 then delete the multiplication statement from the function IL. */
3123
3124 struct fma_transformation_info
3125 {
3126 gimple *mul_stmt;
3127 tree mul_result;
3128 tree op1;
3129 tree op2;
3130 };
3131
3132 /* Structure containing the current state of FMA deferring, i.e. whether we are
3133 deferring, whether to continue deferring, and all data necessary to come
3134 back and perform all deferred transformations. */
3135
3136 class fma_deferring_state
3137 {
3138 public:
3139 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3140 do any deferring. */
3141
fma_deferring_state(bool perform_deferring)3142 fma_deferring_state (bool perform_deferring)
3143 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3144 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3145
3146 /* List of FMA candidates for which we the transformation has been determined
3147 possible but we at this point in BB analysis we do not consider them
3148 beneficial. */
3149 auto_vec<fma_transformation_info, 8> m_candidates;
3150
3151 /* Set of results of multiplication that are part of an already deferred FMA
3152 candidates. */
3153 hash_set<tree> m_mul_result_set;
3154
3155 /* The PHI that supposedly feeds back result of a FMA to another over loop
3156 boundary. */
3157 gphi *m_initial_phi;
3158
3159 /* Result of the last produced FMA candidate or NULL if there has not been
3160 one. */
3161 tree m_last_result;
3162
3163 /* If true, deferring might still be profitable. If false, transform all
3164 candidates and no longer defer. */
3165 bool m_deferring_p;
3166 };
3167
3168 /* Transform all deferred FMA candidates and mark STATE as no longer
3169 deferring. */
3170
3171 static void
cancel_fma_deferring(fma_deferring_state * state)3172 cancel_fma_deferring (fma_deferring_state *state)
3173 {
3174 if (!state->m_deferring_p)
3175 return;
3176
3177 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3178 {
3179 if (dump_file && (dump_flags & TDF_DETAILS))
3180 fprintf (dump_file, "Generating deferred FMA\n");
3181
3182 const fma_transformation_info &fti = state->m_candidates[i];
3183 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3184
3185 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3186 gsi_remove (&gsi, true);
3187 release_defs (fti.mul_stmt);
3188 }
3189 state->m_deferring_p = false;
3190 }
3191
3192 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3193 Otherwise return NULL. */
3194
3195 static gphi *
result_of_phi(tree op)3196 result_of_phi (tree op)
3197 {
3198 if (TREE_CODE (op) != SSA_NAME)
3199 return NULL;
3200
3201 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3202 }
3203
3204 /* After processing statements of a BB and recording STATE, return true if the
3205 initial phi is fed by the last FMA candidate result ore one such result from
3206 previously processed BBs marked in LAST_RESULT_SET. */
3207
3208 static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)3209 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3210 hash_set<tree> *last_result_set)
3211 {
3212 ssa_op_iter iter;
3213 use_operand_p use;
3214 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3215 {
3216 tree t = USE_FROM_PTR (use);
3217 if (t == state->m_last_result
3218 || last_result_set->contains (t))
3219 return true;
3220 }
3221
3222 return false;
3223 }
3224
3225 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3226 with uses in additions and subtractions to form fused multiply-add
3227 operations. Returns true if successful and MUL_STMT should be removed.
3228 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3229 on MUL_COND, otherwise it is unconditional.
3230
3231 If STATE indicates that we are deferring FMA transformation, that means
3232 that we do not produce FMAs for basic blocks which look like:
3233
3234 <bb 6>
3235 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3236 _65 = _14 * _16;
3237 accumulator_66 = _65 + accumulator_111;
3238
3239 or its unrolled version, i.e. with several FMA candidates that feed result
3240 of one into the addend of another. Instead, we add them to a list in STATE
3241 and if we later discover an FMA candidate that is not part of such a chain,
3242 we go back and perform all deferred past candidates. */
3243
3244 static bool
convert_mult_to_fma(gimple * mul_stmt,tree op1,tree op2,fma_deferring_state * state,tree mul_cond=NULL_TREE)3245 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3246 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3247 {
3248 tree mul_result = gimple_get_lhs (mul_stmt);
3249 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3250 if the statement was left just for the side-effects. */
3251 if (!mul_result)
3252 return false;
3253 tree type = TREE_TYPE (mul_result);
3254 gimple *use_stmt, *neguse_stmt;
3255 use_operand_p use_p;
3256 imm_use_iterator imm_iter;
3257
3258 if (FLOAT_TYPE_P (type)
3259 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3260 return false;
3261
3262 /* We don't want to do bitfield reduction ops. */
3263 if (INTEGRAL_TYPE_P (type)
3264 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3265 return false;
3266
3267 /* If the target doesn't support it, don't generate it. We assume that
3268 if fma isn't available then fms, fnma or fnms are not either. */
3269 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3270 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3271 return false;
3272
3273 /* If the multiplication has zero uses, it is kept around probably because
3274 of -fnon-call-exceptions. Don't optimize it away in that case,
3275 it is DCE job. */
3276 if (has_zero_uses (mul_result))
3277 return false;
3278
3279 bool check_defer
3280 = (state->m_deferring_p
3281 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3282 param_avoid_fma_max_bits));
3283 bool defer = check_defer;
3284 bool seen_negate_p = false;
3285 /* Make sure that the multiplication statement becomes dead after
3286 the transformation, thus that all uses are transformed to FMAs.
3287 This means we assume that an FMA operation has the same cost
3288 as an addition. */
3289 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3290 {
3291 tree result = mul_result;
3292 bool negate_p = false;
3293
3294 use_stmt = USE_STMT (use_p);
3295
3296 if (is_gimple_debug (use_stmt))
3297 continue;
3298
3299 /* For now restrict this operations to single basic blocks. In theory
3300 we would want to support sinking the multiplication in
3301 m = a*b;
3302 if ()
3303 ma = m + c;
3304 else
3305 d = m;
3306 to form a fma in the then block and sink the multiplication to the
3307 else block. */
3308 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3309 return false;
3310
3311 /* A negate on the multiplication leads to FNMA. */
3312 if (is_gimple_assign (use_stmt)
3313 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3314 {
3315 ssa_op_iter iter;
3316 use_operand_p usep;
3317
3318 /* If (due to earlier missed optimizations) we have two
3319 negates of the same value, treat them as equivalent
3320 to a single negate with multiple uses. */
3321 if (seen_negate_p)
3322 return false;
3323
3324 result = gimple_assign_lhs (use_stmt);
3325
3326 /* Make sure the negate statement becomes dead with this
3327 single transformation. */
3328 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3329 &use_p, &neguse_stmt))
3330 return false;
3331
3332 /* Make sure the multiplication isn't also used on that stmt. */
3333 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3334 if (USE_FROM_PTR (usep) == mul_result)
3335 return false;
3336
3337 /* Re-validate. */
3338 use_stmt = neguse_stmt;
3339 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3340 return false;
3341
3342 negate_p = seen_negate_p = true;
3343 }
3344
3345 tree cond, else_value, ops[3];
3346 tree_code code;
3347 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3348 &else_value))
3349 return false;
3350
3351 switch (code)
3352 {
3353 case MINUS_EXPR:
3354 if (ops[1] == result)
3355 negate_p = !negate_p;
3356 break;
3357 case PLUS_EXPR:
3358 break;
3359 default:
3360 /* FMA can only be formed from PLUS and MINUS. */
3361 return false;
3362 }
3363
3364 if (mul_cond && cond != mul_cond)
3365 return false;
3366
3367 if (cond)
3368 {
3369 if (cond == result || else_value == result)
3370 return false;
3371 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3372 return false;
3373 }
3374
3375 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3376 we'll visit later, we might be able to get a more profitable
3377 match with fnma.
3378 OTOH, if we don't, a negate / fma pair has likely lower latency
3379 that a mult / subtract pair. */
3380 if (code == MINUS_EXPR
3381 && !negate_p
3382 && ops[0] == result
3383 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3384 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3385 && TREE_CODE (ops[1]) == SSA_NAME
3386 && has_single_use (ops[1]))
3387 {
3388 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3389 if (is_gimple_assign (stmt2)
3390 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3391 return false;
3392 }
3393
3394 /* We can't handle a * b + a * b. */
3395 if (ops[0] == ops[1])
3396 return false;
3397 /* If deferring, make sure we are not looking at an instruction that
3398 wouldn't have existed if we were not. */
3399 if (state->m_deferring_p
3400 && (state->m_mul_result_set.contains (ops[0])
3401 || state->m_mul_result_set.contains (ops[1])))
3402 return false;
3403
3404 if (check_defer)
3405 {
3406 tree use_lhs = gimple_get_lhs (use_stmt);
3407 if (state->m_last_result)
3408 {
3409 if (ops[1] == state->m_last_result
3410 || ops[0] == state->m_last_result)
3411 defer = true;
3412 else
3413 defer = false;
3414 }
3415 else
3416 {
3417 gcc_checking_assert (!state->m_initial_phi);
3418 gphi *phi;
3419 if (ops[0] == result)
3420 phi = result_of_phi (ops[1]);
3421 else
3422 {
3423 gcc_assert (ops[1] == result);
3424 phi = result_of_phi (ops[0]);
3425 }
3426
3427 if (phi)
3428 {
3429 state->m_initial_phi = phi;
3430 defer = true;
3431 }
3432 else
3433 defer = false;
3434 }
3435
3436 state->m_last_result = use_lhs;
3437 check_defer = false;
3438 }
3439 else
3440 defer = false;
3441
3442 /* While it is possible to validate whether or not the exact form that
3443 we've recognized is available in the backend, the assumption is that
3444 if the deferring logic above did not trigger, the transformation is
3445 never a loss. For instance, suppose the target only has the plain FMA
3446 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3447 MUL+SUB for FMA+NEG, which is still two operations. Consider
3448 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3449 form the two NEGs are independent and could be run in parallel. */
3450 }
3451
3452 if (defer)
3453 {
3454 fma_transformation_info fti;
3455 fti.mul_stmt = mul_stmt;
3456 fti.mul_result = mul_result;
3457 fti.op1 = op1;
3458 fti.op2 = op2;
3459 state->m_candidates.safe_push (fti);
3460 state->m_mul_result_set.add (mul_result);
3461
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 {
3464 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3465 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3466 fprintf (dump_file, "\n");
3467 }
3468
3469 return false;
3470 }
3471 else
3472 {
3473 if (state->m_deferring_p)
3474 cancel_fma_deferring (state);
3475 convert_mult_to_fma_1 (mul_result, op1, op2);
3476 return true;
3477 }
3478 }
3479
3480
3481 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3482 a check for non-zero like:
3483 _1 = x_4(D) * y_5(D);
3484 *res_7(D) = _1;
3485 if (x_4(D) != 0)
3486 goto <bb 3>; [50.00%]
3487 else
3488 goto <bb 4>; [50.00%]
3489
3490 <bb 3> [local count: 536870913]:
3491 _2 = _1 / x_4(D);
3492 _9 = _2 != y_5(D);
3493 _10 = (int) _9;
3494
3495 <bb 4> [local count: 1073741824]:
3496 # iftmp.0_3 = PHI <_10(3), 0(2)>
3497 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3498 optimize the x_4(D) != 0 condition to 1. */
3499
3500 static void
maybe_optimize_guarding_check(vec<gimple * > & mul_stmts,gimple * cond_stmt,gimple * div_stmt,bool * cfg_changed)3501 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3502 gimple *div_stmt, bool *cfg_changed)
3503 {
3504 basic_block bb = gimple_bb (cond_stmt);
3505 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3506 return;
3507 edge pred_edge = single_pred_edge (bb);
3508 basic_block pred_bb = pred_edge->src;
3509 if (EDGE_COUNT (pred_bb->succs) != 2)
3510 return;
3511 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3512 edge other_succ_edge = NULL;
3513 if (gimple_code (cond_stmt) == GIMPLE_COND)
3514 {
3515 if (EDGE_COUNT (bb->succs) != 2)
3516 return;
3517 other_succ_edge = EDGE_SUCC (bb, 0);
3518 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3519 {
3520 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3521 other_succ_edge = EDGE_SUCC (bb, 1);
3522 }
3523 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3524 other_succ_edge = EDGE_SUCC (bb, 0);
3525 if (other_edge->dest != other_succ_edge->dest)
3526 return;
3527 }
3528 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3529 return;
3530 gimple *zero_cond = last_stmt (pred_bb);
3531 if (zero_cond == NULL
3532 || gimple_code (zero_cond) != GIMPLE_COND
3533 || (gimple_cond_code (zero_cond)
3534 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3535 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3536 return;
3537 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3538 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3539 return;
3540 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3541 {
3542 /* Allow the divisor to be result of a same precision cast
3543 from zero_cond_lhs. */
3544 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3545 if (TREE_CODE (rhs2) != SSA_NAME)
3546 return;
3547 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3548 if (!gimple_assign_cast_p (g)
3549 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3550 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3551 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3552 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3553 return;
3554 }
3555 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3556 mul_stmts.quick_push (div_stmt);
3557 if (is_gimple_debug (gsi_stmt (gsi)))
3558 gsi_next_nondebug (&gsi);
3559 unsigned cast_count = 0;
3560 while (gsi_stmt (gsi) != cond_stmt)
3561 {
3562 /* If original mul_stmt has a single use, allow it in the same bb,
3563 we are looking then just at __builtin_mul_overflow_p.
3564 Though, in that case the original mul_stmt will be replaced
3565 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3566 gimple *mul_stmt;
3567 unsigned int i;
3568 bool ok = false;
3569 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3570 {
3571 if (gsi_stmt (gsi) == mul_stmt)
3572 {
3573 ok = true;
3574 break;
3575 }
3576 }
3577 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3578 ok = true;
3579 if (!ok)
3580 return;
3581 gsi_next_nondebug (&gsi);
3582 }
3583 if (gimple_code (cond_stmt) == GIMPLE_COND)
3584 {
3585 basic_block succ_bb = other_edge->dest;
3586 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3587 gsi_next (&gpi))
3588 {
3589 gphi *phi = gpi.phi ();
3590 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3591 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3592 if (!operand_equal_p (v1, v2, 0))
3593 return;
3594 }
3595 }
3596 else
3597 {
3598 tree lhs = gimple_assign_lhs (cond_stmt);
3599 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3600 return;
3601 gsi_next_nondebug (&gsi);
3602 if (!gsi_end_p (gsi))
3603 {
3604 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3605 return;
3606 gimple *cast_stmt = gsi_stmt (gsi);
3607 if (!gimple_assign_cast_p (cast_stmt))
3608 return;
3609 tree new_lhs = gimple_assign_lhs (cast_stmt);
3610 gsi_next_nondebug (&gsi);
3611 if (!gsi_end_p (gsi)
3612 || !new_lhs
3613 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3614 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3615 return;
3616 lhs = new_lhs;
3617 }
3618 edge succ_edge = single_succ_edge (bb);
3619 basic_block succ_bb = succ_edge->dest;
3620 gsi = gsi_start_phis (succ_bb);
3621 if (gsi_end_p (gsi))
3622 return;
3623 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3624 gsi_next (&gsi);
3625 if (!gsi_end_p (gsi))
3626 return;
3627 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3628 return;
3629 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3630 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3631 {
3632 tree cond = gimple_assign_rhs1 (cond_stmt);
3633 if (TREE_CODE (cond) == NE_EXPR)
3634 {
3635 if (!operand_equal_p (other_val,
3636 gimple_assign_rhs3 (cond_stmt), 0))
3637 return;
3638 }
3639 else if (!operand_equal_p (other_val,
3640 gimple_assign_rhs2 (cond_stmt), 0))
3641 return;
3642 }
3643 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3644 {
3645 if (!integer_zerop (other_val))
3646 return;
3647 }
3648 else if (!integer_onep (other_val))
3649 return;
3650 }
3651 gcond *zero_gcond = as_a <gcond *> (zero_cond);
3652 if (pred_edge->flags & EDGE_TRUE_VALUE)
3653 gimple_cond_make_true (zero_gcond);
3654 else
3655 gimple_cond_make_false (zero_gcond);
3656 update_stmt (zero_cond);
3657 *cfg_changed = true;
3658 }
3659
3660 /* Helper function for arith_overflow_check_p. Return true
3661 if VAL1 is equal to VAL2 cast to corresponding integral type
3662 with other signedness or vice versa. */
3663
3664 static bool
arith_cast_equal_p(tree val1,tree val2)3665 arith_cast_equal_p (tree val1, tree val2)
3666 {
3667 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3668 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3669 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3670 return false;
3671 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3672 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3673 return true;
3674 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3675 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3676 return true;
3677 return false;
3678 }
3679
3680 /* Helper function of match_arith_overflow. Return 1
3681 if USE_STMT is unsigned overflow check ovf != 0 for
3682 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3683 and 0 otherwise. */
3684
3685 static int
arith_overflow_check_p(gimple * stmt,gimple * cast_stmt,gimple * & use_stmt,tree maxval,tree * other)3686 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3687 tree maxval, tree *other)
3688 {
3689 enum tree_code ccode = ERROR_MARK;
3690 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3691 enum tree_code code = gimple_assign_rhs_code (stmt);
3692 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3693 tree rhs1 = gimple_assign_rhs1 (stmt);
3694 tree rhs2 = gimple_assign_rhs2 (stmt);
3695 tree multop = NULL_TREE, divlhs = NULL_TREE;
3696 gimple *cur_use_stmt = use_stmt;
3697
3698 if (code == MULT_EXPR)
3699 {
3700 if (!is_gimple_assign (use_stmt))
3701 return 0;
3702 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3703 return 0;
3704 if (gimple_assign_rhs1 (use_stmt) != lhs)
3705 return 0;
3706 if (cast_stmt)
3707 {
3708 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3709 multop = rhs2;
3710 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3711 multop = rhs1;
3712 else
3713 return 0;
3714 }
3715 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3716 multop = rhs2;
3717 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3718 multop = rhs1;
3719 else
3720 return 0;
3721 if (stmt_ends_bb_p (use_stmt))
3722 return 0;
3723 divlhs = gimple_assign_lhs (use_stmt);
3724 if (!divlhs)
3725 return 0;
3726 use_operand_p use;
3727 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3728 return 0;
3729 }
3730 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3731 {
3732 ccode = gimple_cond_code (cur_use_stmt);
3733 crhs1 = gimple_cond_lhs (cur_use_stmt);
3734 crhs2 = gimple_cond_rhs (cur_use_stmt);
3735 }
3736 else if (is_gimple_assign (cur_use_stmt))
3737 {
3738 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3739 {
3740 ccode = gimple_assign_rhs_code (cur_use_stmt);
3741 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3742 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3743 }
3744 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3745 {
3746 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3747 if (COMPARISON_CLASS_P (cond))
3748 {
3749 ccode = TREE_CODE (cond);
3750 crhs1 = TREE_OPERAND (cond, 0);
3751 crhs2 = TREE_OPERAND (cond, 1);
3752 }
3753 else
3754 return 0;
3755 }
3756 else
3757 return 0;
3758 }
3759 else
3760 return 0;
3761
3762 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3763 return 0;
3764
3765 switch (ccode)
3766 {
3767 case GT_EXPR:
3768 case LE_EXPR:
3769 if (maxval)
3770 {
3771 /* r = a + b; r > maxval or r <= maxval */
3772 if (crhs1 == lhs
3773 && TREE_CODE (crhs2) == INTEGER_CST
3774 && tree_int_cst_equal (crhs2, maxval))
3775 return ccode == GT_EXPR ? 1 : -1;
3776 break;
3777 }
3778 /* r = a - b; r > a or r <= a
3779 r = a + b; a > r or a <= r or b > r or b <= r. */
3780 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3781 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3782 && crhs2 == lhs))
3783 return ccode == GT_EXPR ? 1 : -1;
3784 /* r = ~a; b > r or b <= r. */
3785 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3786 {
3787 if (other)
3788 *other = crhs1;
3789 return ccode == GT_EXPR ? 1 : -1;
3790 }
3791 break;
3792 case LT_EXPR:
3793 case GE_EXPR:
3794 if (maxval)
3795 break;
3796 /* r = a - b; a < r or a >= r
3797 r = a + b; r < a or r >= a or r < b or r >= b. */
3798 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3799 || (code == PLUS_EXPR && crhs1 == lhs
3800 && (crhs2 == rhs1 || crhs2 == rhs2)))
3801 return ccode == LT_EXPR ? 1 : -1;
3802 /* r = ~a; r < b or r >= b. */
3803 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3804 {
3805 if (other)
3806 *other = crhs2;
3807 return ccode == LT_EXPR ? 1 : -1;
3808 }
3809 break;
3810 case EQ_EXPR:
3811 case NE_EXPR:
3812 /* r = a * b; _1 = r / a; _1 == b
3813 r = a * b; _1 = r / b; _1 == a
3814 r = a * b; _1 = r / a; _1 != b
3815 r = a * b; _1 = r / b; _1 != a. */
3816 if (code == MULT_EXPR)
3817 {
3818 if (cast_stmt)
3819 {
3820 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3821 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3822 {
3823 use_stmt = cur_use_stmt;
3824 return ccode == NE_EXPR ? 1 : -1;
3825 }
3826 }
3827 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3828 || (crhs2 == divlhs && crhs1 == multop))
3829 {
3830 use_stmt = cur_use_stmt;
3831 return ccode == NE_EXPR ? 1 : -1;
3832 }
3833 }
3834 break;
3835 default:
3836 break;
3837 }
3838 return 0;
3839 }
3840
3841 /* Recognize for unsigned x
3842 x = y - z;
3843 if (x > y)
3844 where there are other uses of x and replace it with
3845 _7 = .SUB_OVERFLOW (y, z);
3846 x = REALPART_EXPR <_7>;
3847 _8 = IMAGPART_EXPR <_7>;
3848 if (_8)
3849 and similarly for addition.
3850
3851 Also recognize:
3852 yc = (type) y;
3853 zc = (type) z;
3854 x = yc + zc;
3855 if (x > max)
3856 where y and z have unsigned types with maximum max
3857 and there are other uses of x and all of those cast x
3858 back to that unsigned type and again replace it with
3859 _7 = .ADD_OVERFLOW (y, z);
3860 _9 = REALPART_EXPR <_7>;
3861 _8 = IMAGPART_EXPR <_7>;
3862 if (_8)
3863 and replace (utype) x with _9.
3864
3865 Also recognize:
3866 x = ~z;
3867 if (y > x)
3868 and replace it with
3869 _7 = .ADD_OVERFLOW (y, z);
3870 _8 = IMAGPART_EXPR <_7>;
3871 if (_8)
3872
3873 And also recognize:
3874 z = x * y;
3875 if (x != 0)
3876 goto <bb 3>; [50.00%]
3877 else
3878 goto <bb 4>; [50.00%]
3879
3880 <bb 3> [local count: 536870913]:
3881 _2 = z / x;
3882 _9 = _2 != y;
3883 _10 = (int) _9;
3884
3885 <bb 4> [local count: 1073741824]:
3886 # iftmp.0_3 = PHI <_10(3), 0(2)>
3887 and replace it with
3888 _7 = .MUL_OVERFLOW (x, y);
3889 z = IMAGPART_EXPR <_7>;
3890 _8 = IMAGPART_EXPR <_7>;
3891 _9 = _8 != 0;
3892 iftmp.0_3 = (int) _9; */
3893
3894 static bool
match_arith_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code,bool * cfg_changed)3895 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3896 enum tree_code code, bool *cfg_changed)
3897 {
3898 tree lhs = gimple_assign_lhs (stmt);
3899 tree type = TREE_TYPE (lhs);
3900 use_operand_p use_p;
3901 imm_use_iterator iter;
3902 bool use_seen = false;
3903 bool ovf_use_seen = false;
3904 gimple *use_stmt;
3905 gimple *add_stmt = NULL;
3906 bool add_first = false;
3907 gimple *cond_stmt = NULL;
3908 gimple *cast_stmt = NULL;
3909 tree cast_lhs = NULL_TREE;
3910
3911 gcc_checking_assert (code == PLUS_EXPR
3912 || code == MINUS_EXPR
3913 || code == MULT_EXPR
3914 || code == BIT_NOT_EXPR);
3915 if (!INTEGRAL_TYPE_P (type)
3916 || !TYPE_UNSIGNED (type)
3917 || has_zero_uses (lhs)
3918 || (code != PLUS_EXPR
3919 && code != MULT_EXPR
3920 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3921 TYPE_MODE (type)) == CODE_FOR_nothing))
3922 return false;
3923
3924 tree rhs1 = gimple_assign_rhs1 (stmt);
3925 tree rhs2 = gimple_assign_rhs2 (stmt);
3926 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3927 {
3928 use_stmt = USE_STMT (use_p);
3929 if (is_gimple_debug (use_stmt))
3930 continue;
3931
3932 tree other = NULL_TREE;
3933 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3934 {
3935 if (code == BIT_NOT_EXPR)
3936 {
3937 gcc_assert (other);
3938 if (TREE_CODE (other) != SSA_NAME)
3939 return false;
3940 if (rhs2 == NULL)
3941 rhs2 = other;
3942 else
3943 return false;
3944 cond_stmt = use_stmt;
3945 }
3946 ovf_use_seen = true;
3947 }
3948 else
3949 {
3950 use_seen = true;
3951 if (code == MULT_EXPR
3952 && cast_stmt == NULL
3953 && gimple_assign_cast_p (use_stmt))
3954 {
3955 cast_lhs = gimple_assign_lhs (use_stmt);
3956 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3957 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3958 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3959 == TYPE_PRECISION (TREE_TYPE (lhs))))
3960 cast_stmt = use_stmt;
3961 else
3962 cast_lhs = NULL_TREE;
3963 }
3964 }
3965 if (ovf_use_seen && use_seen)
3966 break;
3967 }
3968
3969 if (!ovf_use_seen
3970 && code == MULT_EXPR
3971 && cast_stmt)
3972 {
3973 if (TREE_CODE (rhs1) != SSA_NAME
3974 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3975 return false;
3976 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3977 {
3978 use_stmt = USE_STMT (use_p);
3979 if (is_gimple_debug (use_stmt))
3980 continue;
3981
3982 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3983 NULL_TREE, NULL))
3984 ovf_use_seen = true;
3985 }
3986 }
3987 else
3988 {
3989 cast_stmt = NULL;
3990 cast_lhs = NULL_TREE;
3991 }
3992
3993 tree maxval = NULL_TREE;
3994 if (!ovf_use_seen
3995 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3996 || (code == PLUS_EXPR
3997 && optab_handler (uaddv4_optab,
3998 TYPE_MODE (type)) == CODE_FOR_nothing)
3999 || (code == MULT_EXPR
4000 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4001 TYPE_MODE (type)) == CODE_FOR_nothing))
4002 {
4003 if (code != PLUS_EXPR)
4004 return false;
4005 if (TREE_CODE (rhs1) != SSA_NAME
4006 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4007 return false;
4008 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4009 tree type1 = TREE_TYPE (rhs1);
4010 if (!INTEGRAL_TYPE_P (type1)
4011 || !TYPE_UNSIGNED (type1)
4012 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4013 || (TYPE_PRECISION (type1)
4014 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4015 return false;
4016 if (TREE_CODE (rhs2) == INTEGER_CST)
4017 {
4018 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4019 TYPE_PRECISION (type1),
4020 UNSIGNED), 0))
4021 return false;
4022 rhs2 = fold_convert (type1, rhs2);
4023 }
4024 else
4025 {
4026 if (TREE_CODE (rhs2) != SSA_NAME
4027 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4028 return false;
4029 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4030 tree type2 = TREE_TYPE (rhs2);
4031 if (!INTEGRAL_TYPE_P (type2)
4032 || !TYPE_UNSIGNED (type2)
4033 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4034 || (TYPE_PRECISION (type2)
4035 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4036 return false;
4037 }
4038 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4039 type = type1;
4040 else
4041 type = TREE_TYPE (rhs2);
4042
4043 if (TREE_CODE (type) != INTEGER_TYPE
4044 || optab_handler (uaddv4_optab,
4045 TYPE_MODE (type)) == CODE_FOR_nothing)
4046 return false;
4047
4048 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4049 UNSIGNED));
4050 ovf_use_seen = false;
4051 use_seen = false;
4052 basic_block use_bb = NULL;
4053 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4054 {
4055 use_stmt = USE_STMT (use_p);
4056 if (is_gimple_debug (use_stmt))
4057 continue;
4058
4059 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4060 {
4061 ovf_use_seen = true;
4062 use_bb = gimple_bb (use_stmt);
4063 }
4064 else
4065 {
4066 if (!gimple_assign_cast_p (use_stmt)
4067 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4068 return false;
4069 tree use_lhs = gimple_assign_lhs (use_stmt);
4070 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4071 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4072 > TYPE_PRECISION (type)))
4073 return false;
4074 use_seen = true;
4075 }
4076 }
4077 if (!ovf_use_seen)
4078 return false;
4079 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4080 {
4081 if (!use_seen)
4082 return false;
4083 tree new_rhs1 = make_ssa_name (type);
4084 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4085 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4086 rhs1 = new_rhs1;
4087 }
4088 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4089 {
4090 if (!use_seen)
4091 return false;
4092 tree new_rhs2 = make_ssa_name (type);
4093 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4094 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4095 rhs2 = new_rhs2;
4096 }
4097 else if (!use_seen)
4098 {
4099 /* If there are no uses of the wider addition, check if
4100 forwprop has not created a narrower addition.
4101 Require it to be in the same bb as the overflow check. */
4102 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4103 {
4104 use_stmt = USE_STMT (use_p);
4105 if (is_gimple_debug (use_stmt))
4106 continue;
4107
4108 if (use_stmt == stmt)
4109 continue;
4110
4111 if (!is_gimple_assign (use_stmt)
4112 || gimple_bb (use_stmt) != use_bb
4113 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4114 continue;
4115
4116 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4117 {
4118 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4119 rhs2, 0))
4120 continue;
4121 }
4122 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4123 {
4124 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4125 continue;
4126 }
4127 else
4128 continue;
4129
4130 add_stmt = use_stmt;
4131 break;
4132 }
4133 if (add_stmt == NULL)
4134 return false;
4135
4136 /* If stmt and add_stmt are in the same bb, we need to find out
4137 which one is earlier. If they are in different bbs, we've
4138 checked add_stmt is in the same bb as one of the uses of the
4139 stmt lhs, so stmt needs to dominate add_stmt too. */
4140 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4141 {
4142 gimple_stmt_iterator gsif = *gsi;
4143 gimple_stmt_iterator gsib = *gsi;
4144 int i;
4145 /* Search both forward and backward from stmt and have a small
4146 upper bound. */
4147 for (i = 0; i < 128; i++)
4148 {
4149 if (!gsi_end_p (gsib))
4150 {
4151 gsi_prev_nondebug (&gsib);
4152 if (gsi_stmt (gsib) == add_stmt)
4153 {
4154 add_first = true;
4155 break;
4156 }
4157 }
4158 else if (gsi_end_p (gsif))
4159 break;
4160 if (!gsi_end_p (gsif))
4161 {
4162 gsi_next_nondebug (&gsif);
4163 if (gsi_stmt (gsif) == add_stmt)
4164 break;
4165 }
4166 }
4167 if (i == 128)
4168 return false;
4169 if (add_first)
4170 *gsi = gsi_for_stmt (add_stmt);
4171 }
4172 }
4173 }
4174
4175 if (code == BIT_NOT_EXPR)
4176 *gsi = gsi_for_stmt (cond_stmt);
4177
4178 auto_vec<gimple *, 8> mul_stmts;
4179 if (code == MULT_EXPR && cast_stmt)
4180 {
4181 type = TREE_TYPE (cast_lhs);
4182 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4183 if (gimple_assign_cast_p (g)
4184 && useless_type_conversion_p (type,
4185 TREE_TYPE (gimple_assign_rhs1 (g)))
4186 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4187 rhs1 = gimple_assign_rhs1 (g);
4188 else
4189 {
4190 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4191 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4192 rhs1 = gimple_assign_lhs (g);
4193 mul_stmts.quick_push (g);
4194 }
4195 if (TREE_CODE (rhs2) == INTEGER_CST)
4196 rhs2 = fold_convert (type, rhs2);
4197 else
4198 {
4199 g = SSA_NAME_DEF_STMT (rhs2);
4200 if (gimple_assign_cast_p (g)
4201 && useless_type_conversion_p (type,
4202 TREE_TYPE (gimple_assign_rhs1 (g)))
4203 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4204 rhs2 = gimple_assign_rhs1 (g);
4205 else
4206 {
4207 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4208 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4209 rhs2 = gimple_assign_lhs (g);
4210 mul_stmts.quick_push (g);
4211 }
4212 }
4213 }
4214 tree ctype = build_complex_type (type);
4215 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4216 ? IFN_MUL_OVERFLOW
4217 : code != MINUS_EXPR
4218 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4219 2, rhs1, rhs2);
4220 tree ctmp = make_ssa_name (ctype);
4221 gimple_call_set_lhs (g, ctmp);
4222 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4223 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4224 gassign *g2;
4225 if (code != BIT_NOT_EXPR)
4226 {
4227 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4228 build1 (REALPART_EXPR, type, ctmp));
4229 if (maxval || cast_stmt)
4230 {
4231 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4232 if (add_first)
4233 *gsi = gsi_for_stmt (stmt);
4234 }
4235 else
4236 gsi_replace (gsi, g2, true);
4237 if (code == MULT_EXPR)
4238 {
4239 mul_stmts.quick_push (g);
4240 mul_stmts.quick_push (g2);
4241 if (cast_stmt)
4242 {
4243 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4244 gsi_replace (gsi, g2, true);
4245 mul_stmts.quick_push (g2);
4246 }
4247 }
4248 }
4249 tree ovf = make_ssa_name (type);
4250 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4251 build1 (IMAGPART_EXPR, type, ctmp));
4252 if (code != BIT_NOT_EXPR)
4253 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4254 else
4255 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4256 if (code == MULT_EXPR)
4257 mul_stmts.quick_push (g2);
4258
4259 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4260 {
4261 if (is_gimple_debug (use_stmt))
4262 continue;
4263
4264 gimple *orig_use_stmt = use_stmt;
4265 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4266 maxval, NULL);
4267 if (ovf_use == 0)
4268 {
4269 gcc_assert (code != BIT_NOT_EXPR);
4270 if (maxval)
4271 {
4272 tree use_lhs = gimple_assign_lhs (use_stmt);
4273 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4274 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4275 TREE_TYPE (new_lhs)))
4276 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4277 update_stmt (use_stmt);
4278 }
4279 continue;
4280 }
4281 if (gimple_code (use_stmt) == GIMPLE_COND)
4282 {
4283 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4284 gimple_cond_set_lhs (cond_stmt, ovf);
4285 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4286 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4287 }
4288 else
4289 {
4290 gcc_checking_assert (is_gimple_assign (use_stmt));
4291 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4292 {
4293 gimple_assign_set_rhs1 (use_stmt, ovf);
4294 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4295 gimple_assign_set_rhs_code (use_stmt,
4296 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4297 }
4298 else
4299 {
4300 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4301 == COND_EXPR);
4302 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4303 boolean_type_node, ovf,
4304 build_int_cst (type, 0));
4305 gimple_assign_set_rhs1 (use_stmt, cond);
4306 }
4307 }
4308 update_stmt (use_stmt);
4309 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4310 {
4311 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4312 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4313 cfg_changed);
4314 gsi_remove (&gsi2, true);
4315 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4316 }
4317 }
4318 if (maxval)
4319 {
4320 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4321 gsi_remove (&gsi2, true);
4322 if (add_stmt)
4323 {
4324 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4325 new_lhs);
4326 gsi2 = gsi_for_stmt (add_stmt);
4327 gsi_replace (&gsi2, g, true);
4328 }
4329 }
4330 else if (code == BIT_NOT_EXPR)
4331 {
4332 *gsi = gsi_for_stmt (stmt);
4333 gsi_remove (gsi, true);
4334 release_ssa_name (lhs);
4335 return true;
4336 }
4337 return false;
4338 }
4339
4340 /* Return true if target has support for divmod. */
4341
4342 static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)4343 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4344 {
4345 /* If target supports hardware divmod insn, use it for divmod. */
4346 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4347 return true;
4348
4349 /* Check if libfunc for divmod is available. */
4350 rtx libfunc = optab_libfunc (divmod_optab, mode);
4351 if (libfunc != NULL_RTX)
4352 {
4353 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4354 we don't want to use the libfunc even if it exists for given mode. */
4355 machine_mode div_mode;
4356 FOR_EACH_MODE_FROM (div_mode, mode)
4357 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4358 return false;
4359
4360 return targetm.expand_divmod_libfunc != NULL;
4361 }
4362
4363 return false;
4364 }
4365
4366 /* Check if stmt is candidate for divmod transform. */
4367
4368 static bool
divmod_candidate_p(gassign * stmt)4369 divmod_candidate_p (gassign *stmt)
4370 {
4371 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4372 machine_mode mode = TYPE_MODE (type);
4373 optab divmod_optab, div_optab;
4374
4375 if (TYPE_UNSIGNED (type))
4376 {
4377 divmod_optab = udivmod_optab;
4378 div_optab = udiv_optab;
4379 }
4380 else
4381 {
4382 divmod_optab = sdivmod_optab;
4383 div_optab = sdiv_optab;
4384 }
4385
4386 tree op1 = gimple_assign_rhs1 (stmt);
4387 tree op2 = gimple_assign_rhs2 (stmt);
4388
4389 /* Disable the transform if either is a constant, since division-by-constant
4390 may have specialized expansion. */
4391 if (CONSTANT_CLASS_P (op1))
4392 return false;
4393
4394 if (CONSTANT_CLASS_P (op2))
4395 {
4396 if (integer_pow2p (op2))
4397 return false;
4398
4399 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4400 && TYPE_PRECISION (type) <= BITS_PER_WORD)
4401 return false;
4402
4403 /* If the divisor is not power of 2 and the precision wider than
4404 HWI, expand_divmod punts on that, so in that case it is better
4405 to use divmod optab or libfunc. Similarly if choose_multiplier
4406 might need pre/post shifts of BITS_PER_WORD or more. */
4407 }
4408
4409 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4410 expand using the [su]divv optabs. */
4411 if (TYPE_OVERFLOW_TRAPS (type))
4412 return false;
4413
4414 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4415 return false;
4416
4417 return true;
4418 }
4419
4420 /* This function looks for:
4421 t1 = a TRUNC_DIV_EXPR b;
4422 t2 = a TRUNC_MOD_EXPR b;
4423 and transforms it to the following sequence:
4424 complex_tmp = DIVMOD (a, b);
4425 t1 = REALPART_EXPR(a);
4426 t2 = IMAGPART_EXPR(b);
4427 For conditions enabling the transform see divmod_candidate_p().
4428
4429 The pass has three parts:
4430 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4431 other trunc_div_expr and trunc_mod_expr stmts.
4432 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4433 to stmts vector.
4434 3) Insert DIVMOD call just before top_stmt and update entries in
4435 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4436 IMAGPART_EXPR for mod). */
4437
4438 static bool
convert_to_divmod(gassign * stmt)4439 convert_to_divmod (gassign *stmt)
4440 {
4441 if (stmt_can_throw_internal (cfun, stmt)
4442 || !divmod_candidate_p (stmt))
4443 return false;
4444
4445 tree op1 = gimple_assign_rhs1 (stmt);
4446 tree op2 = gimple_assign_rhs2 (stmt);
4447
4448 imm_use_iterator use_iter;
4449 gimple *use_stmt;
4450 auto_vec<gimple *> stmts;
4451
4452 gimple *top_stmt = stmt;
4453 basic_block top_bb = gimple_bb (stmt);
4454
4455 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4456 at-least stmt and possibly other trunc_div/trunc_mod stmts
4457 having same operands as stmt. */
4458
4459 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4460 {
4461 if (is_gimple_assign (use_stmt)
4462 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4463 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4464 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4465 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4466 {
4467 if (stmt_can_throw_internal (cfun, use_stmt))
4468 continue;
4469
4470 basic_block bb = gimple_bb (use_stmt);
4471
4472 if (bb == top_bb)
4473 {
4474 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4475 top_stmt = use_stmt;
4476 }
4477 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4478 {
4479 top_bb = bb;
4480 top_stmt = use_stmt;
4481 }
4482 }
4483 }
4484
4485 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4486 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4487
4488 stmts.safe_push (top_stmt);
4489 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4490
4491 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4492 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4493 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4494 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4495
4496 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4497 {
4498 if (is_gimple_assign (use_stmt)
4499 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4500 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4501 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4502 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4503 {
4504 if (use_stmt == top_stmt
4505 || stmt_can_throw_internal (cfun, use_stmt)
4506 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4507 continue;
4508
4509 stmts.safe_push (use_stmt);
4510 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4511 div_seen = true;
4512 }
4513 }
4514
4515 if (!div_seen)
4516 return false;
4517
4518 /* Part 3: Create libcall to internal fn DIVMOD:
4519 divmod_tmp = DIVMOD (op1, op2). */
4520
4521 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4522 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4523 call_stmt, "divmod_tmp");
4524 gimple_call_set_lhs (call_stmt, res);
4525 /* We rejected throwing statements above. */
4526 gimple_call_set_nothrow (call_stmt, true);
4527
4528 /* Insert the call before top_stmt. */
4529 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4530 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4531
4532 widen_mul_stats.divmod_calls_inserted++;
4533
4534 /* Update all statements in stmts vector:
4535 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4536 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4537
4538 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4539 {
4540 tree new_rhs;
4541
4542 switch (gimple_assign_rhs_code (use_stmt))
4543 {
4544 case TRUNC_DIV_EXPR:
4545 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4546 break;
4547
4548 case TRUNC_MOD_EXPR:
4549 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4550 break;
4551
4552 default:
4553 gcc_unreachable ();
4554 }
4555
4556 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4557 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4558 update_stmt (use_stmt);
4559 }
4560
4561 return true;
4562 }
4563
4564 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4565 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
4566 value is true iff we converted the statement. */
4567
4568 static bool
convert_mult_to_highpart(gassign * stmt,gimple_stmt_iterator * gsi)4569 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4570 {
4571 tree lhs = gimple_assign_lhs (stmt);
4572 tree stype = TREE_TYPE (lhs);
4573 tree sarg0 = gimple_assign_rhs1 (stmt);
4574 tree sarg1 = gimple_assign_rhs2 (stmt);
4575
4576 if (TREE_CODE (stype) != INTEGER_TYPE
4577 || TREE_CODE (sarg1) != INTEGER_CST
4578 || TREE_CODE (sarg0) != SSA_NAME
4579 || !tree_fits_uhwi_p (sarg1)
4580 || !has_single_use (sarg0))
4581 return false;
4582
4583 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4584 if (!def)
4585 return false;
4586
4587 enum tree_code mcode = gimple_assign_rhs_code (def);
4588 if (mcode == NOP_EXPR)
4589 {
4590 tree tmp = gimple_assign_rhs1 (def);
4591 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4592 return false;
4593 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4594 if (!def)
4595 return false;
4596 mcode = gimple_assign_rhs_code (def);
4597 }
4598
4599 if (mcode != WIDEN_MULT_EXPR
4600 || gimple_bb (def) != gimple_bb (stmt))
4601 return false;
4602 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4603 if (TREE_CODE (mtype) != INTEGER_TYPE
4604 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4605 return false;
4606
4607 tree mop1 = gimple_assign_rhs1 (def);
4608 tree mop2 = gimple_assign_rhs2 (def);
4609 tree optype = TREE_TYPE (mop1);
4610 bool unsignedp = TYPE_UNSIGNED (optype);
4611 unsigned int prec = TYPE_PRECISION (optype);
4612
4613 if (unsignedp != TYPE_UNSIGNED (mtype)
4614 || TYPE_PRECISION (mtype) != 2 * prec)
4615 return false;
4616
4617 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4618 if (bits < prec || bits >= 2 * prec)
4619 return false;
4620
4621 /* For the time being, require operands to have the same sign. */
4622 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4623 return false;
4624
4625 machine_mode mode = TYPE_MODE (optype);
4626 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4627 if (optab_handler (tab, mode) == CODE_FOR_nothing)
4628 return false;
4629
4630 location_t loc = gimple_location (stmt);
4631 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4632 MULT_HIGHPART_EXPR, mop1, mop2);
4633 tree highpart2 = highpart1;
4634 tree ntype = optype;
4635
4636 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4637 {
4638 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4639 : signed_type_for (optype);
4640 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4641 }
4642 if (bits > prec)
4643 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4644 RSHIFT_EXPR, highpart2,
4645 build_int_cst (ntype, bits - prec));
4646
4647 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4648 gsi_replace (gsi, new_stmt, true);
4649
4650 widen_mul_stats.highpart_mults_inserted++;
4651 return true;
4652 }
4653
4654 /* If target has spaceship<MODE>3 expander, pattern recognize
4655 <bb 2> [local count: 1073741824]:
4656 if (a_2(D) == b_3(D))
4657 goto <bb 6>; [34.00%]
4658 else
4659 goto <bb 3>; [66.00%]
4660
4661 <bb 3> [local count: 708669601]:
4662 if (a_2(D) < b_3(D))
4663 goto <bb 6>; [1.04%]
4664 else
4665 goto <bb 4>; [98.96%]
4666
4667 <bb 4> [local count: 701299439]:
4668 if (a_2(D) > b_3(D))
4669 goto <bb 5>; [48.89%]
4670 else
4671 goto <bb 6>; [51.11%]
4672
4673 <bb 5> [local count: 342865295]:
4674
4675 <bb 6> [local count: 1073741824]:
4676 and turn it into:
4677 <bb 2> [local count: 1073741824]:
4678 _1 = .SPACESHIP (a_2(D), b_3(D));
4679 if (_1 == 0)
4680 goto <bb 6>; [34.00%]
4681 else
4682 goto <bb 3>; [66.00%]
4683
4684 <bb 3> [local count: 708669601]:
4685 if (_1 == -1)
4686 goto <bb 6>; [1.04%]
4687 else
4688 goto <bb 4>; [98.96%]
4689
4690 <bb 4> [local count: 701299439]:
4691 if (_1 == 1)
4692 goto <bb 5>; [48.89%]
4693 else
4694 goto <bb 6>; [51.11%]
4695
4696 <bb 5> [local count: 342865295]:
4697
4698 <bb 6> [local count: 1073741824]:
4699 so that the backend can emit optimal comparison and
4700 conditional jump sequence. */
4701
4702 static void
optimize_spaceship(gimple * stmt)4703 optimize_spaceship (gimple *stmt)
4704 {
4705 enum tree_code code = gimple_cond_code (stmt);
4706 if (code != EQ_EXPR && code != NE_EXPR)
4707 return;
4708 tree arg1 = gimple_cond_lhs (stmt);
4709 tree arg2 = gimple_cond_rhs (stmt);
4710 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4711 || optab_handler (spaceship_optab,
4712 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4713 || operand_equal_p (arg1, arg2, 0))
4714 return;
4715
4716 basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4717 edge em1 = NULL, e1 = NULL, e2 = NULL;
4718 bb1 = EDGE_SUCC (bb0, 1)->dest;
4719 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4720 bb1 = EDGE_SUCC (bb0, 0)->dest;
4721
4722 gimple *g = last_stmt (bb1);
4723 if (g == NULL
4724 || gimple_code (g) != GIMPLE_COND
4725 || !single_pred_p (bb1)
4726 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4727 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4728 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4729 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4730 || !cond_only_block_p (bb1))
4731 return;
4732
4733 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4734 ? LT_EXPR : GT_EXPR);
4735 switch (gimple_cond_code (g))
4736 {
4737 case LT_EXPR:
4738 case LE_EXPR:
4739 break;
4740 case GT_EXPR:
4741 case GE_EXPR:
4742 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4743 break;
4744 default:
4745 return;
4746 }
4747
4748 for (int i = 0; i < 2; ++i)
4749 {
4750 /* With NaNs, </<=/>/>= are false, so we need to look for the
4751 third comparison on the false edge from whatever non-equality
4752 comparison the second comparison is. */
4753 if (HONOR_NANS (TREE_TYPE (arg1))
4754 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4755 continue;
4756
4757 bb2 = EDGE_SUCC (bb1, i)->dest;
4758 g = last_stmt (bb2);
4759 if (g == NULL
4760 || gimple_code (g) != GIMPLE_COND
4761 || !single_pred_p (bb2)
4762 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4763 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4764 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4765 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4766 || !cond_only_block_p (bb2)
4767 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4768 continue;
4769
4770 enum tree_code ccode2
4771 = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4772 switch (gimple_cond_code (g))
4773 {
4774 case LT_EXPR:
4775 case LE_EXPR:
4776 break;
4777 case GT_EXPR:
4778 case GE_EXPR:
4779 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4780 break;
4781 default:
4782 continue;
4783 }
4784 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4785 continue;
4786
4787 if ((ccode == LT_EXPR)
4788 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4789 {
4790 em1 = EDGE_SUCC (bb1, 1 - i);
4791 e1 = EDGE_SUCC (bb2, 0);
4792 e2 = EDGE_SUCC (bb2, 1);
4793 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4794 std::swap (e1, e2);
4795 }
4796 else
4797 {
4798 e1 = EDGE_SUCC (bb1, 1 - i);
4799 em1 = EDGE_SUCC (bb2, 0);
4800 e2 = EDGE_SUCC (bb2, 1);
4801 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4802 std::swap (em1, e2);
4803 }
4804 break;
4805 }
4806
4807 if (em1 == NULL)
4808 {
4809 if ((ccode == LT_EXPR)
4810 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4811 {
4812 em1 = EDGE_SUCC (bb1, 1);
4813 e1 = EDGE_SUCC (bb1, 0);
4814 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4815 }
4816 else
4817 {
4818 em1 = EDGE_SUCC (bb1, 0);
4819 e1 = EDGE_SUCC (bb1, 1);
4820 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4821 }
4822 }
4823
4824 g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4825 tree lhs = make_ssa_name (integer_type_node);
4826 gimple_call_set_lhs (g, lhs);
4827 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4828 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4829
4830 gcond *cond = as_a <gcond *> (stmt);
4831 gimple_cond_set_lhs (cond, lhs);
4832 gimple_cond_set_rhs (cond, integer_zero_node);
4833 update_stmt (stmt);
4834
4835 g = last_stmt (bb1);
4836 cond = as_a <gcond *> (g);
4837 gimple_cond_set_lhs (cond, lhs);
4838 if (em1->src == bb1 && e2 != em1)
4839 {
4840 gimple_cond_set_rhs (cond, integer_minus_one_node);
4841 gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4842 ? EQ_EXPR : NE_EXPR);
4843 }
4844 else
4845 {
4846 gcc_assert (e1->src == bb1 && e2 != e1);
4847 gimple_cond_set_rhs (cond, integer_one_node);
4848 gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4849 ? EQ_EXPR : NE_EXPR);
4850 }
4851 update_stmt (g);
4852
4853 if (e2 != e1 && e2 != em1)
4854 {
4855 g = last_stmt (bb2);
4856 cond = as_a <gcond *> (g);
4857 gimple_cond_set_lhs (cond, lhs);
4858 if (em1->src == bb2)
4859 gimple_cond_set_rhs (cond, integer_minus_one_node);
4860 else
4861 {
4862 gcc_assert (e1->src == bb2);
4863 gimple_cond_set_rhs (cond, integer_one_node);
4864 }
4865 gimple_cond_set_code (cond,
4866 (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4867 update_stmt (g);
4868 }
4869
4870 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4871 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4872 set_range_info (lhs, VR_RANGE, wm1, w2);
4873 }
4874
4875
4876 /* Find integer multiplications where the operands are extended from
4877 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4878 or MULT_HIGHPART_EXPR where appropriate. */
4879
4880 namespace {
4881
4882 const pass_data pass_data_optimize_widening_mul =
4883 {
4884 GIMPLE_PASS, /* type */
4885 "widening_mul", /* name */
4886 OPTGROUP_NONE, /* optinfo_flags */
4887 TV_TREE_WIDEN_MUL, /* tv_id */
4888 PROP_ssa, /* properties_required */
4889 0, /* properties_provided */
4890 0, /* properties_destroyed */
4891 0, /* todo_flags_start */
4892 TODO_update_ssa, /* todo_flags_finish */
4893 };
4894
4895 class pass_optimize_widening_mul : public gimple_opt_pass
4896 {
4897 public:
pass_optimize_widening_mul(gcc::context * ctxt)4898 pass_optimize_widening_mul (gcc::context *ctxt)
4899 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4900 {}
4901
4902 /* opt_pass methods: */
gate(function *)4903 virtual bool gate (function *)
4904 {
4905 return flag_expensive_optimizations && optimize;
4906 }
4907
4908 virtual unsigned int execute (function *);
4909
4910 }; // class pass_optimize_widening_mul
4911
4912 /* Walker class to perform the transformation in reverse dominance order. */
4913
4914 class math_opts_dom_walker : public dom_walker
4915 {
4916 public:
4917 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4918 if walking modidifes the CFG. */
4919
math_opts_dom_walker(bool * cfg_changed_p)4920 math_opts_dom_walker (bool *cfg_changed_p)
4921 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4922 m_cfg_changed_p (cfg_changed_p) {}
4923
4924 /* The actual actions performed in the walk. */
4925
4926 virtual void after_dom_children (basic_block);
4927
4928 /* Set of results of chains of multiply and add statement combinations that
4929 were not transformed into FMAs because of active deferring. */
4930 hash_set<tree> m_last_result_set;
4931
4932 /* Pointer to a flag of the user that needs to be set if CFG has been
4933 modified. */
4934 bool *m_cfg_changed_p;
4935 };
4936
4937 void
after_dom_children(basic_block bb)4938 math_opts_dom_walker::after_dom_children (basic_block bb)
4939 {
4940 gimple_stmt_iterator gsi;
4941
4942 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4943
4944 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4945 {
4946 gimple *stmt = gsi_stmt (gsi);
4947 enum tree_code code;
4948
4949 if (is_gimple_assign (stmt))
4950 {
4951 code = gimple_assign_rhs_code (stmt);
4952 switch (code)
4953 {
4954 case MULT_EXPR:
4955 if (!convert_mult_to_widen (stmt, &gsi)
4956 && !convert_expand_mult_copysign (stmt, &gsi)
4957 && convert_mult_to_fma (stmt,
4958 gimple_assign_rhs1 (stmt),
4959 gimple_assign_rhs2 (stmt),
4960 &fma_state))
4961 {
4962 gsi_remove (&gsi, true);
4963 release_defs (stmt);
4964 continue;
4965 }
4966 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4967 break;
4968
4969 case PLUS_EXPR:
4970 case MINUS_EXPR:
4971 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4972 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4973 break;
4974
4975 case BIT_NOT_EXPR:
4976 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4977 continue;
4978 break;
4979
4980 case TRUNC_MOD_EXPR:
4981 convert_to_divmod (as_a<gassign *> (stmt));
4982 break;
4983
4984 case RSHIFT_EXPR:
4985 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4986 break;
4987
4988 default:;
4989 }
4990 }
4991 else if (is_gimple_call (stmt))
4992 {
4993 switch (gimple_call_combined_fn (stmt))
4994 {
4995 CASE_CFN_POW:
4996 if (gimple_call_lhs (stmt)
4997 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4998 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4999 &dconst2)
5000 && convert_mult_to_fma (stmt,
5001 gimple_call_arg (stmt, 0),
5002 gimple_call_arg (stmt, 0),
5003 &fma_state))
5004 {
5005 unlink_stmt_vdef (stmt);
5006 if (gsi_remove (&gsi, true)
5007 && gimple_purge_dead_eh_edges (bb))
5008 *m_cfg_changed_p = true;
5009 release_defs (stmt);
5010 continue;
5011 }
5012 break;
5013
5014 case CFN_COND_MUL:
5015 if (convert_mult_to_fma (stmt,
5016 gimple_call_arg (stmt, 1),
5017 gimple_call_arg (stmt, 2),
5018 &fma_state,
5019 gimple_call_arg (stmt, 0)))
5020
5021 {
5022 gsi_remove (&gsi, true);
5023 release_defs (stmt);
5024 continue;
5025 }
5026 break;
5027
5028 case CFN_LAST:
5029 cancel_fma_deferring (&fma_state);
5030 break;
5031
5032 default:
5033 break;
5034 }
5035 }
5036 else if (gimple_code (stmt) == GIMPLE_COND)
5037 optimize_spaceship (stmt);
5038 gsi_next (&gsi);
5039 }
5040 if (fma_state.m_deferring_p
5041 && fma_state.m_initial_phi)
5042 {
5043 gcc_checking_assert (fma_state.m_last_result);
5044 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5045 &m_last_result_set))
5046 cancel_fma_deferring (&fma_state);
5047 else
5048 m_last_result_set.add (fma_state.m_last_result);
5049 }
5050 }
5051
5052
5053 unsigned int
execute(function * fun)5054 pass_optimize_widening_mul::execute (function *fun)
5055 {
5056 bool cfg_changed = false;
5057
5058 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5059 calculate_dominance_info (CDI_DOMINATORS);
5060 renumber_gimple_stmt_uids (cfun);
5061
5062 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5063
5064 statistics_counter_event (fun, "widening multiplications inserted",
5065 widen_mul_stats.widen_mults_inserted);
5066 statistics_counter_event (fun, "widening maccs inserted",
5067 widen_mul_stats.maccs_inserted);
5068 statistics_counter_event (fun, "fused multiply-adds inserted",
5069 widen_mul_stats.fmas_inserted);
5070 statistics_counter_event (fun, "divmod calls inserted",
5071 widen_mul_stats.divmod_calls_inserted);
5072 statistics_counter_event (fun, "highpart multiplications inserted",
5073 widen_mul_stats.highpart_mults_inserted);
5074
5075 return cfg_changed ? TODO_cleanup_cfg : 0;
5076 }
5077
5078 } // anon namespace
5079
5080 gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)5081 make_pass_optimize_widening_mul (gcc::context *ctxt)
5082 {
5083 return new pass_optimize_widening_mul (ctxt);
5084 }
5085