xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-math-opts.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Global, SSA-based optimizations using mathematical identities.
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21    operations.  These are common in sequences such as this one:
22 
23 	modulus = sqrt(x*x + y*y + z*z);
24 	x = x / modulus;
25 	y = y / modulus;
26 	z = z / modulus;
27 
28    that can be optimized to
29 
30 	modulus = sqrt(x*x + y*y + z*z);
31         rmodulus = 1.0 / modulus;
32 	x = x * rmodulus;
33 	y = y * rmodulus;
34 	z = z * rmodulus;
35 
36    We do this for loop invariant divisors, and with this pass whenever
37    we notice that a division has the same divisor multiple times.
38 
39    Of course, like in PRE, we don't insert a division if a dominator
40    already has one.  However, this cannot be done as an extension of
41    PRE for several reasons.
42 
43    First of all, with some experiments it was found out that the
44    transformation is not always useful if there are only two divisions
45    by the same divisor.  This is probably because modern processors
46    can pipeline the divisions; on older, in-order processors it should
47    still be effective to optimize two divisions by the same number.
48    We make this a param, and it shall be called N in the remainder of
49    this comment.
50 
51    Second, if trapping math is active, we have less freedom on where
52    to insert divisions: we can only do so in basic blocks that already
53    contain one.  (If divisions don't trap, instead, we can insert
54    divisions elsewhere, which will be in blocks that are common dominators
55    of those that have the division).
56 
57    We really don't want to compute the reciprocal unless a division will
58    be found.  To do this, we won't insert the division in a basic block
59    that has less than N divisions *post-dominating* it.
60 
61    The algorithm constructs a subset of the dominator tree, holding the
62    blocks containing the divisions and the common dominators to them,
63    and walk it twice.  The first walk is in post-order, and it annotates
64    each block with the number of divisions that post-dominate it: this
65    gives information on where divisions can be inserted profitably.
66    The second walk is in pre-order, and it inserts divisions as explained
67    above, and replaces divisions by multiplications.
68 
69    In the best case, the cost of the pass is O(n_statements).  In the
70    worst-case, the cost is due to creating the dominator tree subset,
71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
72    for n_statements / n_basic_blocks statements.  So, the amortized cost
73    of creating the dominator tree subset is O(n_basic_blocks) and the
74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 
76    More practically, the cost will be small because there are few
77    divisions, and they tend to be in the same basic block, so insert_bb
78    is called very few times.
79 
80    If we did this using domwalk.cc, an efficient implementation would have
81    to work on all the variables in a single pass, because we could not
82    work on just a subset of the dominator tree, as we do now, and the
83    cost would also be something like O(n_statements * n_basic_blocks).
84    The data structures would be more complex in order to work on all the
85    variables in a single pass.  */
86 
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
119 
120 /* This structure represents one basic block that either computes a
121    division, or is a common dominator for basic block that compute a
122    division.  */
123 struct occurrence {
124   /* The basic block represented by this structure.  */
125   basic_block bb = basic_block();
126 
127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128      inserted in BB.  */
129   tree recip_def = tree();
130 
131   /* If non-NULL, the SSA_NAME holding the definition for a squared
132      reciprocal inserted in BB.  */
133   tree square_recip_def = tree();
134 
135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136      was inserted in BB.  */
137   gimple *recip_def_stmt = nullptr;
138 
139   /* Pointer to a list of "struct occurrence"s for blocks dominated
140      by BB.  */
141   struct occurrence *children = nullptr;
142 
143   /* Pointer to the next "struct occurrence"s in the list of blocks
144      sharing a common dominator.  */
145   struct occurrence *next = nullptr;
146 
147   /* The number of divisions that are in BB before compute_merit.  The
148      number of divisions that are in BB or post-dominate it after
149      compute_merit.  */
150   int num_divisions = 0;
151 
152   /* True if the basic block has a division, false if it is a common
153      dominator for basic blocks that do.  If it is false and trapping
154      math is active, BB is not a candidate for inserting a reciprocal.  */
155   bool bb_has_division = false;
156 
157   /* Construct a struct occurrence for basic block BB, and whose
158      children list is headed by CHILDREN.  */
occurrenceoccurrence159   occurrence (basic_block bb, struct occurrence *children)
160   : bb (bb), children (children)
161   {
162     bb->aux = this;
163   }
164 
165   /* Destroy a struct occurrence and remove it from its basic block.  */
~occurrenceoccurrence166   ~occurrence ()
167   {
168     bb->aux = nullptr;
169   }
170 
171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
172   static void* operator new (size_t);
173 
174   /* Return memory for a struct occurrence to OCC_POOL.  */
175   static void operator delete (void*, size_t);
176 };
177 
178 static struct
179 {
180   /* Number of 1.0/X ops inserted.  */
181   int rdivs_inserted;
182 
183   /* Number of 1.0/FUNC ops inserted.  */
184   int rfuncs_inserted;
185 } reciprocal_stats;
186 
187 static struct
188 {
189   /* Number of cexpi calls inserted.  */
190   int inserted;
191 
192   /* Number of conversions removed.  */
193   int conv_removed;
194 
195 } sincos_stats;
196 
197 static struct
198 {
199   /* Number of widening multiplication ops inserted.  */
200   int widen_mults_inserted;
201 
202   /* Number of integer multiply-and-accumulate ops inserted.  */
203   int maccs_inserted;
204 
205   /* Number of fp fused multiply-add ops inserted.  */
206   int fmas_inserted;
207 
208   /* Number of divmod calls inserted.  */
209   int divmod_calls_inserted;
210 
211   /* Number of highpart multiplication ops inserted.  */
212   int highpart_mults_inserted;
213 } widen_mul_stats;
214 
215 /* The instance of "struct occurrence" representing the highest
216    interesting block in the dominator tree.  */
217 static struct occurrence *occ_head;
218 
219 /* Allocation pool for getting instances of "struct occurrence".  */
220 static object_allocator<occurrence> *occ_pool;
221 
operator new(size_t n)222 void* occurrence::operator new (size_t n)
223 {
224   gcc_assert (n == sizeof(occurrence));
225   return occ_pool->allocate_raw ();
226 }
227 
operator delete(void * occ,size_t n)228 void occurrence::operator delete (void *occ, size_t n)
229 {
230   gcc_assert (n == sizeof(occurrence));
231   occ_pool->remove_raw (occ);
232 }
233 
234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
235    list of "struct occurrence"s, one per basic block, having IDOM as
236    their common dominator.
237 
238    We try to insert NEW_OCC as deep as possible in the tree, and we also
239    insert any other block that is a common dominator for BB and one
240    block already in the tree.  */
241 
242 static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)243 insert_bb (struct occurrence *new_occ, basic_block idom,
244 	   struct occurrence **p_head)
245 {
246   struct occurrence *occ, **p_occ;
247 
248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
249     {
250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252       if (dom == bb)
253 	{
254 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
255 	     from its list.  */
256 	  *p_occ = occ->next;
257 	  occ->next = new_occ->children;
258 	  new_occ->children = occ;
259 
260 	  /* Try the next block (it may as well be dominated by BB).  */
261 	}
262 
263       else if (dom == occ_bb)
264 	{
265 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
266 	  insert_bb (new_occ, dom, &occ->children);
267 	  return;
268 	}
269 
270       else if (dom != idom)
271 	{
272 	  gcc_assert (!dom->aux);
273 
274 	  /* There is a dominator between IDOM and BB, add it and make
275 	     two children out of NEW_OCC and OCC.  First, remove OCC from
276 	     its list.	*/
277 	  *p_occ = occ->next;
278 	  new_occ->next = occ;
279 	  occ->next = NULL;
280 
281 	  /* None of the previous blocks has DOM as a dominator: if we tail
282 	     recursed, we would reexamine them uselessly. Just switch BB with
283 	     DOM, and go on looking for blocks dominated by DOM.  */
284 	  new_occ = new occurrence (dom, new_occ);
285 	}
286 
287       else
288 	{
289 	  /* Nothing special, go on with the next element.  */
290 	  p_occ = &occ->next;
291 	}
292     }
293 
294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
295   new_occ->next = *p_head;
296   *p_head = new_occ;
297 }
298 
299 /* Register that we found a division in BB.
300    IMPORTANCE is a measure of how much weighting to give
301    that division.  Use IMPORTANCE = 2 to register a single
302    division.  If the division is going to be found multiple
303    times use 1 (as it is with squares).  */
304 
305 static inline void
register_division_in(basic_block bb,int importance)306 register_division_in (basic_block bb, int importance)
307 {
308   struct occurrence *occ;
309 
310   occ = (struct occurrence *) bb->aux;
311   if (!occ)
312     {
313       occ = new occurrence (bb, NULL);
314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
315     }
316 
317   occ->bb_has_division = true;
318   occ->num_divisions += importance;
319 }
320 
321 
322 /* Compute the number of divisions that postdominate each block in OCC and
323    its children.  */
324 
325 static void
compute_merit(struct occurrence * occ)326 compute_merit (struct occurrence *occ)
327 {
328   struct occurrence *occ_child;
329   basic_block dom = occ->bb;
330 
331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
332     {
333       basic_block bb;
334       if (occ_child->children)
335         compute_merit (occ_child);
336 
337       if (flag_exceptions)
338 	bb = single_noncomplex_succ (dom);
339       else
340 	bb = dom;
341 
342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343         occ->num_divisions += occ_child->num_divisions;
344     }
345 }
346 
347 
348 /* Return whether USE_STMT is a floating-point division by DEF.  */
349 static inline bool
is_division_by(gimple * use_stmt,tree def)350 is_division_by (gimple *use_stmt, tree def)
351 {
352   return is_gimple_assign (use_stmt)
353 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 	 && gimple_assign_rhs2 (use_stmt) == def
355 	 /* Do not recognize x / x as valid division, as we are getting
356 	    confused later by replacing all immediate uses x in such
357 	    a stmt.  */
358 	 && gimple_assign_rhs1 (use_stmt) != def
359 	 && !stmt_can_throw_internal (cfun, use_stmt);
360 }
361 
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
363 static inline bool
is_mult_by(gimple * use_stmt,tree def,tree a)364 is_mult_by (gimple *use_stmt, tree def, tree a)
365 {
366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
368     {
369       tree op0 = gimple_assign_rhs1 (use_stmt);
370       tree op1 = gimple_assign_rhs2 (use_stmt);
371 
372       return (op0 == def && op1 == a)
373 	      || (op0 == a && op1 == def);
374     }
375   return 0;
376 }
377 
378 /* Return whether USE_STMT is DEF * DEF.  */
379 static inline bool
is_square_of(gimple * use_stmt,tree def)380 is_square_of (gimple *use_stmt, tree def)
381 {
382   return is_mult_by (use_stmt, def, def);
383 }
384 
385 /* Return whether USE_STMT is a floating-point division by
386    DEF * DEF.  */
387 static inline bool
is_division_by_square(gimple * use_stmt,tree def)388 is_division_by_square (gimple *use_stmt, tree def)
389 {
390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393       && !stmt_can_throw_internal (cfun, use_stmt))
394     {
395       tree denominator = gimple_assign_rhs2 (use_stmt);
396       if (TREE_CODE (denominator) == SSA_NAME)
397 	return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
398     }
399   return 0;
400 }
401 
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404    the given basic block.  The field may be left NULL, of course,
405    if it is not possible or profitable to do the optimization.
406 
407    DEF_BSI is an iterator pointing at the statement defining DEF.
408    If RECIP_DEF is set, a dominator already has a computation that can
409    be used.
410 
411    If should_insert_square_recip is set, then this also inserts
412    the square of the reciprocal immediately after the definition
413    of the reciprocal.  */
414 
415 static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,tree square_recip_def,int should_insert_square_recip,int threshold)416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 		    tree def, tree recip_def, tree square_recip_def,
418 		    int should_insert_square_recip, int threshold)
419 {
420   tree type;
421   gassign *new_stmt, *new_square_stmt;
422   gimple_stmt_iterator gsi;
423   struct occurrence *occ_child;
424 
425   if (!recip_def
426       && (occ->bb_has_division || !flag_trapping_math)
427       /* Divide by two as all divisions are counted twice in
428 	 the costing loop.  */
429       && occ->num_divisions / 2 >= threshold)
430     {
431       /* Make a variable with the replacement and substitute it.  */
432       type = TREE_TYPE (def);
433       recip_def = create_tmp_reg (type, "reciptmp");
434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 				      build_one_cst (type), def);
436 
437       if (should_insert_square_recip)
438 	{
439 	  square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 	  new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 						 recip_def, recip_def);
442 	}
443 
444       if (occ->bb_has_division)
445 	{
446 	  /* Case 1: insert before an existing division.  */
447 	  gsi = gsi_after_labels (occ->bb);
448 	  while (!gsi_end_p (gsi)
449 		 && (!is_division_by (gsi_stmt (gsi), def))
450 		 && (!is_division_by_square (gsi_stmt (gsi), def)))
451 	    gsi_next (&gsi);
452 
453 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 	  if (should_insert_square_recip)
455 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
456 	}
457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
458 	{
459 	  /* Case 2: insert right after the definition.  Note that this will
460 	     never happen if the definition statement can throw, because in
461 	     that case the sole successor of the statement's basic block will
462 	     dominate all the uses as well.  */
463 	  gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 	  if (should_insert_square_recip)
465 	    gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
466 	}
467       else
468 	{
469 	  /* Case 3: insert in a basic block not containing defs/uses.  */
470 	  gsi = gsi_after_labels (occ->bb);
471 	  gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 	  if (should_insert_square_recip)
473 	    gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
474 	}
475 
476       reciprocal_stats.rdivs_inserted++;
477 
478       occ->recip_def_stmt = new_stmt;
479     }
480 
481   occ->recip_def = recip_def;
482   occ->square_recip_def = square_recip_def;
483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 			square_recip_def, should_insert_square_recip,
486 			threshold);
487 }
488 
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490    Take as argument the use for (x * x).  */
491 static inline void
replace_reciprocal_squares(use_operand_p use_p)492 replace_reciprocal_squares (use_operand_p use_p)
493 {
494   gimple *use_stmt = USE_STMT (use_p);
495   basic_block bb = gimple_bb (use_stmt);
496   struct occurrence *occ = (struct occurrence *) bb->aux;
497 
498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499       && occ->recip_def)
500     {
501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504       SET_USE (use_p, occ->square_recip_def);
505       fold_stmt_inplace (&gsi);
506       update_stmt (use_stmt);
507     }
508 }
509 
510 
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512    possible.  */
513 
514 static inline void
replace_reciprocal(use_operand_p use_p)515 replace_reciprocal (use_operand_p use_p)
516 {
517   gimple *use_stmt = USE_STMT (use_p);
518   basic_block bb = gimple_bb (use_stmt);
519   struct occurrence *occ = (struct occurrence *) bb->aux;
520 
521   if (optimize_bb_for_speed_p (bb)
522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
523     {
524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526       SET_USE (use_p, occ->recip_def);
527       fold_stmt_inplace (&gsi);
528       update_stmt (use_stmt);
529     }
530 }
531 
532 
533 /* Free OCC and return one more "struct occurrence" to be freed.  */
534 
535 static struct occurrence *
free_bb(struct occurrence * occ)536 free_bb (struct occurrence *occ)
537 {
538   struct occurrence *child, *next;
539 
540   /* First get the two pointers hanging off OCC.  */
541   next = occ->next;
542   child = occ->children;
543   delete occ;
544 
545   /* Now ensure that we don't recurse unless it is necessary.  */
546   if (!child)
547     return next;
548   else
549     {
550       while (next)
551 	next = free_bb (next);
552 
553       return child;
554     }
555 }
556 
557 /* Transform sequences like
558    t = sqrt (a)
559    x = 1.0 / t;
560    r1 = x * x;
561    r2 = a * x;
562    into:
563    t = sqrt (a)
564    r1 = 1.0 / a;
565    r2 = t;
566    x = r1 * r2;
567    depending on the uses of x, r1, r2.  This removes one multiplication and
568    allows the sqrt and division operations to execute in parallel.
569    DEF_GSI is the gsi of the initial division by sqrt that defines
570    DEF (x in the example above).  */
571 
572 static void
optimize_recip_sqrt(gimple_stmt_iterator * def_gsi,tree def)573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
574 {
575   gimple *use_stmt;
576   imm_use_iterator use_iter;
577   gimple *stmt = gsi_stmt (*def_gsi);
578   tree x = def;
579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
581 
582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583       || TREE_CODE (div_rhs1) != REAL_CST
584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585     return;
586 
587   gcall *sqrt_stmt
588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
589 
590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591     return;
592 
593   switch (gimple_call_combined_fn (sqrt_stmt))
594     {
595     CASE_CFN_SQRT:
596     CASE_CFN_SQRT_FN:
597       break;
598 
599     default:
600       return;
601     }
602   tree a = gimple_call_arg (sqrt_stmt, 0);
603 
604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
605 
606   /* Statements that use x in x * x.  */
607   auto_vec<gimple *> sqr_stmts;
608   /* Statements that use x in a * x.  */
609   auto_vec<gimple *> mult_stmts;
610   bool has_other_use = false;
611   bool mult_on_main_path = false;
612 
613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
614     {
615       if (is_gimple_debug (use_stmt))
616 	continue;
617       if (is_square_of (use_stmt, x))
618 	{
619 	  sqr_stmts.safe_push (use_stmt);
620 	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 	    mult_on_main_path = true;
622 	}
623       else if (is_mult_by (use_stmt, x, a))
624 	{
625 	  mult_stmts.safe_push (use_stmt);
626 	  if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 	    mult_on_main_path = true;
628 	}
629       else
630 	has_other_use = true;
631     }
632 
633   /* In the x * x and a * x cases we just rewire stmt operands or
634      remove multiplications.  In the has_other_use case we introduce
635      a multiplication so make sure we don't introduce a multiplication
636      on a path where there was none.  */
637   if (has_other_use && !mult_on_main_path)
638     return;
639 
640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641     return;
642 
643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644      to be able to compose it from the sqr and mult cases.  */
645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646     return;
647 
648   if (dump_file)
649     {
650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653       fprintf (dump_file, "\n");
654     }
655 
656   bool delete_div = !has_other_use;
657   tree sqr_ssa_name = NULL_TREE;
658   if (!sqr_stmts.is_empty ())
659     {
660       /* r1 = x * x.  Transform the original
661 	 x = 1.0 / t
662 	 into
663 	 tmp1 = 1.0 / a
664 	 r1 = tmp1.  */
665 
666       sqr_ssa_name
667 	= make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
668 
669       if (dump_file)
670 	{
671 	  fprintf (dump_file, "Replacing original division\n");
672 	  print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 	  fprintf (dump_file, "with new division\n");
674 	}
675       stmt
676 	= gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 			       gimple_assign_rhs1 (stmt), a);
678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679       gsi_remove (def_gsi, true);
680       *def_gsi = gsi_for_stmt (stmt);
681       fold_stmt_inplace (def_gsi);
682       update_stmt (stmt);
683 
684       if (dump_file)
685 	print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
686 
687       delete_div = false;
688       gimple *sqr_stmt;
689       unsigned int i;
690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
691 	{
692 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 	  gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 	  update_stmt (sqr_stmt);
695 	}
696     }
697   if (!mult_stmts.is_empty ())
698     {
699       /* r2 = a * x.  Transform this into:
700 	 r2 = t (The original sqrt (a)).  */
701       unsigned int i;
702       gimple *mult_stmt = NULL;
703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
704 	{
705 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
706 
707 	  if (dump_file)
708 	    {
709 	      fprintf (dump_file, "Replacing squaring multiplication\n");
710 	      print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 	      fprintf (dump_file, "with assignment\n");
712 	    }
713 	  gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 	  fold_stmt_inplace (&gsi2);
715 	  update_stmt (mult_stmt);
716 	  if (dump_file)
717 	    print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
718       }
719     }
720 
721   if (has_other_use)
722     {
723       /* Using the two temporaries tmp1, tmp2 from above
724 	 the original x is now:
725 	 x = tmp1 * tmp2.  */
726       gcc_assert (orig_sqrt_ssa_name);
727       gcc_assert (sqr_ssa_name);
728 
729       gimple *new_stmt
730 	= gimple_build_assign (x, MULT_EXPR,
731 			       orig_sqrt_ssa_name, sqr_ssa_name);
732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733       update_stmt (stmt);
734     }
735   else if (delete_div)
736     {
737       /* Remove the original division.  */
738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739       gsi_remove (&gsi2, true);
740       release_defs (stmt);
741     }
742   else
743     release_ssa_name (x);
744 }
745 
746 /* Look for floating-point divisions among DEF's uses, and try to
747    replace them by multiplications with the reciprocal.  Add
748    as many statements computing the reciprocal as needed.
749 
750    DEF must be a GIMPLE register of a floating-point type.  */
751 
752 static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
754 {
755   use_operand_p use_p, square_use_p;
756   imm_use_iterator use_iter, square_use_iter;
757   tree square_def;
758   struct occurrence *occ;
759   int count = 0;
760   int threshold;
761   int square_recip_count = 0;
762   int sqrt_recip_count = 0;
763 
764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
766 
767   /* If DEF is a square (x * x), count the number of divisions by x.
768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
769      the reciprocal of x instead of DEF.  This improves cases like:
770        def = x * x
771        t0 = a / def
772        t1 = b / def
773        t2 = c / x
774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
776 
777   if (is_gimple_assign (def_stmt)
778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
781     {
782       tree op0 = gimple_assign_rhs1 (def_stmt);
783 
784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
785 	{
786 	  gimple *use_stmt = USE_STMT (use_p);
787 	  if (is_division_by (use_stmt, op0))
788 	    sqrt_recip_count++;
789 	}
790     }
791 
792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
793     {
794       gimple *use_stmt = USE_STMT (use_p);
795       if (is_division_by (use_stmt, def))
796 	{
797 	  register_division_in (gimple_bb (use_stmt), 2);
798 	  count++;
799 	}
800 
801       if (is_square_of (use_stmt, def))
802 	{
803 	  square_def = gimple_assign_lhs (use_stmt);
804 	  FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
805 	    {
806 	      gimple *square_use_stmt = USE_STMT (square_use_p);
807 	      if (is_division_by (square_use_stmt, square_def))
808 		{
809 		  /* This is executed twice for each division by a square.  */
810 		  register_division_in (gimple_bb (square_use_stmt), 1);
811 		  square_recip_count++;
812 		}
813 	    }
814 	}
815     }
816 
817   /* Square reciprocals were counted twice above.  */
818   square_recip_count /= 2;
819 
820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
821   if (sqrt_recip_count > square_recip_count)
822     goto out;
823 
824   /* Do the expensive part only if we can hope to optimize something.  */
825   if (count + square_recip_count >= threshold && count >= 1)
826     {
827       gimple *use_stmt;
828       for (occ = occ_head; occ; occ = occ->next)
829 	{
830 	  compute_merit (occ);
831 	  insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 			      square_recip_count, threshold);
833 	}
834 
835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
836 	{
837 	  if (is_division_by (use_stmt, def))
838 	    {
839 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 		replace_reciprocal (use_p);
841 	    }
842 	  else if (square_recip_count > 0 && is_square_of (use_stmt, def))
843 	    {
844 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
845 		{
846 		  /* Find all uses of the square that are divisions and
847 		   * replace them by multiplications with the inverse.  */
848 		  imm_use_iterator square_iterator;
849 		  gimple *powmult_use_stmt = USE_STMT (use_p);
850 		  tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
851 
852 		  FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 					 square_iterator, powmult_def_name)
854 		    FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
855 		      {
856 			gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 			if (is_division_by (powmult_use_stmt, powmult_def_name))
858 			  replace_reciprocal_squares (square_use_p);
859 		      }
860 		}
861 	    }
862 	}
863     }
864 
865 out:
866   for (occ = occ_head; occ; )
867     occ = free_bb (occ);
868 
869   occ_head = NULL;
870 }
871 
872 /* Return an internal function that implements the reciprocal of CALL,
873    or IFN_LAST if there is no such function that the target supports.  */
874 
875 internal_fn
internal_fn_reciprocal(gcall * call)876 internal_fn_reciprocal (gcall *call)
877 {
878   internal_fn ifn;
879 
880   switch (gimple_call_combined_fn (call))
881     {
882     CASE_CFN_SQRT:
883     CASE_CFN_SQRT_FN:
884       ifn = IFN_RSQRT;
885       break;
886 
887     default:
888       return IFN_LAST;
889     }
890 
891   tree_pair types = direct_internal_fn_types (ifn, call);
892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893     return IFN_LAST;
894 
895   return ifn;
896 }
897 
898 /* Go through all the floating-point SSA_NAMEs, and call
899    execute_cse_reciprocals_1 on each of them.  */
900 namespace {
901 
902 const pass_data pass_data_cse_reciprocals =
903 {
904   GIMPLE_PASS, /* type */
905   "recip", /* name */
906   OPTGROUP_NONE, /* optinfo_flags */
907   TV_TREE_RECIP, /* tv_id */
908   PROP_ssa, /* properties_required */
909   0, /* properties_provided */
910   0, /* properties_destroyed */
911   0, /* todo_flags_start */
912   TODO_update_ssa, /* todo_flags_finish */
913 };
914 
915 class pass_cse_reciprocals : public gimple_opt_pass
916 {
917 public:
pass_cse_reciprocals(gcc::context * ctxt)918   pass_cse_reciprocals (gcc::context *ctxt)
919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
920   {}
921 
922   /* opt_pass methods: */
gate(function *)923   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
924   virtual unsigned int execute (function *);
925 
926 }; // class pass_cse_reciprocals
927 
928 unsigned int
execute(function * fun)929 pass_cse_reciprocals::execute (function *fun)
930 {
931   basic_block bb;
932   tree arg;
933 
934   occ_pool = new object_allocator<occurrence> ("dominators for recip");
935 
936   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
937   calculate_dominance_info (CDI_DOMINATORS);
938   calculate_dominance_info (CDI_POST_DOMINATORS);
939 
940   if (flag_checking)
941     FOR_EACH_BB_FN (bb, fun)
942       gcc_assert (!bb->aux);
943 
944   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
945     if (FLOAT_TYPE_P (TREE_TYPE (arg))
946 	&& is_gimple_reg (arg))
947       {
948 	tree name = ssa_default_def (fun, arg);
949 	if (name)
950 	  execute_cse_reciprocals_1 (NULL, name);
951       }
952 
953   FOR_EACH_BB_FN (bb, fun)
954     {
955       tree def;
956 
957       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
958 	   gsi_next (&gsi))
959 	{
960 	  gphi *phi = gsi.phi ();
961 	  def = PHI_RESULT (phi);
962 	  if (! virtual_operand_p (def)
963 	      && FLOAT_TYPE_P (TREE_TYPE (def)))
964 	    execute_cse_reciprocals_1 (NULL, def);
965 	}
966 
967       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
968 	   gsi_next (&gsi))
969         {
970 	  gimple *stmt = gsi_stmt (gsi);
971 
972 	  if (gimple_has_lhs (stmt)
973 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
974 	      && FLOAT_TYPE_P (TREE_TYPE (def))
975 	      && TREE_CODE (def) == SSA_NAME)
976 	    {
977 	      execute_cse_reciprocals_1 (&gsi, def);
978 	      stmt = gsi_stmt (gsi);
979 	      if (flag_unsafe_math_optimizations
980 		  && is_gimple_assign (stmt)
981 		  && gimple_assign_lhs (stmt) == def
982 		  && !stmt_can_throw_internal (cfun, stmt)
983 		  && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
984 		optimize_recip_sqrt (&gsi, def);
985 	    }
986 	}
987 
988       if (optimize_bb_for_size_p (bb))
989         continue;
990 
991       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
992       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
993 	   gsi_next (&gsi))
994         {
995 	  gimple *stmt = gsi_stmt (gsi);
996 
997 	  if (is_gimple_assign (stmt)
998 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
999 	    {
1000 	      tree arg1 = gimple_assign_rhs2 (stmt);
1001 	      gimple *stmt1;
1002 
1003 	      if (TREE_CODE (arg1) != SSA_NAME)
1004 		continue;
1005 
1006 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
1007 
1008 	      if (is_gimple_call (stmt1)
1009 		  && gimple_call_lhs (stmt1))
1010 		{
1011 		  bool fail;
1012 		  imm_use_iterator ui;
1013 		  use_operand_p use_p;
1014 		  tree fndecl = NULL_TREE;
1015 
1016 		  gcall *call = as_a <gcall *> (stmt1);
1017 		  internal_fn ifn = internal_fn_reciprocal (call);
1018 		  if (ifn == IFN_LAST)
1019 		    {
1020 		      fndecl = gimple_call_fndecl (call);
1021 		      if (!fndecl
1022 			  || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023 			continue;
1024 		      fndecl = targetm.builtin_reciprocal (fndecl);
1025 		      if (!fndecl)
1026 			continue;
1027 		    }
1028 
1029 		  /* Check that all uses of the SSA name are divisions,
1030 		     otherwise replacing the defining statement will do
1031 		     the wrong thing.  */
1032 		  fail = false;
1033 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1034 		    {
1035 		      gimple *stmt2 = USE_STMT (use_p);
1036 		      if (is_gimple_debug (stmt2))
1037 			continue;
1038 		      if (!is_gimple_assign (stmt2)
1039 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040 			  || gimple_assign_rhs1 (stmt2) == arg1
1041 			  || gimple_assign_rhs2 (stmt2) != arg1)
1042 			{
1043 			  fail = true;
1044 			  break;
1045 			}
1046 		    }
1047 		  if (fail)
1048 		    continue;
1049 
1050 		  gimple_replace_ssa_lhs (call, arg1);
1051 		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1052 		    {
1053 		      auto_vec<tree, 4> args;
1054 		      for (unsigned int i = 0;
1055 			   i < gimple_call_num_args (call); i++)
1056 			args.safe_push (gimple_call_arg (call, i));
1057 		      gcall *stmt2;
1058 		      if (ifn == IFN_LAST)
1059 			stmt2 = gimple_build_call_vec (fndecl, args);
1060 		      else
1061 			stmt2 = gimple_build_call_internal_vec (ifn, args);
1062 		      gimple_call_set_lhs (stmt2, arg1);
1063 		      gimple_move_vops (stmt2, call);
1064 		      gimple_call_set_nothrow (stmt2,
1065 					       gimple_call_nothrow_p (call));
1066 		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067 		      gsi_replace (&gsi2, stmt2, true);
1068 		    }
1069 		  else
1070 		    {
1071 		      if (ifn == IFN_LAST)
1072 			gimple_call_set_fndecl (call, fndecl);
1073 		      else
1074 			gimple_call_set_internal_fn (call, ifn);
1075 		      update_stmt (call);
1076 		    }
1077 		  reciprocal_stats.rfuncs_inserted++;
1078 
1079 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1080 		    {
1081 		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083 		      fold_stmt_inplace (&gsi);
1084 		      update_stmt (stmt);
1085 		    }
1086 		}
1087 	    }
1088 	}
1089     }
1090 
1091   statistics_counter_event (fun, "reciprocal divs inserted",
1092 			    reciprocal_stats.rdivs_inserted);
1093   statistics_counter_event (fun, "reciprocal functions inserted",
1094 			    reciprocal_stats.rfuncs_inserted);
1095 
1096   free_dominance_info (CDI_DOMINATORS);
1097   free_dominance_info (CDI_POST_DOMINATORS);
1098   delete occ_pool;
1099   return 0;
1100 }
1101 
1102 } // anon namespace
1103 
1104 gimple_opt_pass *
make_pass_cse_reciprocals(gcc::context * ctxt)1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1106 {
1107   return new pass_cse_reciprocals (ctxt);
1108 }
1109 
1110 /* If NAME is the result of a type conversion, look for other
1111    equivalent dominating or dominated conversions, and replace all
1112    uses with the earliest dominating name, removing the redundant
1113    conversions.  Return the prevailing name.  */
1114 
1115 static tree
execute_cse_conv_1(tree name,bool * cfg_changed)1116 execute_cse_conv_1 (tree name, bool *cfg_changed)
1117 {
1118   if (SSA_NAME_IS_DEFAULT_DEF (name)
1119       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120     return name;
1121 
1122   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1123 
1124   if (!gimple_assign_cast_p (def_stmt))
1125     return name;
1126 
1127   tree src = gimple_assign_rhs1 (def_stmt);
1128 
1129   if (TREE_CODE (src) != SSA_NAME)
1130     return name;
1131 
1132   imm_use_iterator use_iter;
1133   gimple *use_stmt;
1134 
1135   /* Find the earliest dominating def.    */
1136   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1137     {
1138       if (use_stmt == def_stmt
1139 	  || !gimple_assign_cast_p (use_stmt))
1140 	continue;
1141 
1142       tree lhs = gimple_assign_lhs (use_stmt);
1143 
1144       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145 	  || (gimple_assign_rhs1 (use_stmt)
1146 	      != gimple_assign_rhs1 (def_stmt))
1147 	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148 	continue;
1149 
1150       bool use_dominates;
1151       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1152 	{
1153 	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154 	  while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155 	    gsi_next (&gsi);
1156 	  use_dominates = !gsi_end_p (gsi);
1157 	}
1158       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159 			       gimple_bb (def_stmt)))
1160 	use_dominates = false;
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162 			       gimple_bb (use_stmt)))
1163 	use_dominates = true;
1164       else
1165 	continue;
1166 
1167       if (use_dominates)
1168 	{
1169 	  std::swap (name, lhs);
1170 	  std::swap (def_stmt, use_stmt);
1171 	}
1172     }
1173 
1174   /* Now go through all uses of SRC again, replacing the equivalent
1175      dominated conversions.  We may replace defs that were not
1176      dominated by the then-prevailing defs when we first visited
1177      them.  */
1178   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1179     {
1180       if (use_stmt == def_stmt
1181 	  || !gimple_assign_cast_p (use_stmt))
1182 	continue;
1183 
1184       tree lhs = gimple_assign_lhs (use_stmt);
1185 
1186       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187 	  || (gimple_assign_rhs1 (use_stmt)
1188 	      != gimple_assign_rhs1 (def_stmt))
1189 	  || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190 	continue;
1191 
1192       basic_block use_bb = gimple_bb (use_stmt);
1193       if (gimple_bb (def_stmt) == use_bb
1194 	  || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1195 	{
1196 	  sincos_stats.conv_removed++;
1197 
1198 	  gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199 	  replace_uses_by (lhs, name);
1200 	  if (gsi_remove (&gsi, true)
1201 	      && gimple_purge_dead_eh_edges (use_bb))
1202 	    *cfg_changed = true;
1203 	  release_defs (use_stmt);
1204 	}
1205     }
1206 
1207   return name;
1208 }
1209 
1210 /* Records an occurrence at statement USE_STMT in the vector of trees
1211    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212    is not yet initialized.  Returns true if the occurrence was pushed on
1213    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1214    statements in the vector.  */
1215 
1216 static bool
maybe_record_sincos(vec<gimple * > * stmts,basic_block * top_bb,gimple * use_stmt)1217 maybe_record_sincos (vec<gimple *> *stmts,
1218 		     basic_block *top_bb, gimple *use_stmt)
1219 {
1220   basic_block use_bb = gimple_bb (use_stmt);
1221   if (*top_bb
1222       && (*top_bb == use_bb
1223 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1224     stmts->safe_push (use_stmt);
1225   else if (!*top_bb
1226 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1227     {
1228       stmts->safe_push (use_stmt);
1229       *top_bb = use_bb;
1230     }
1231   else
1232     return false;
1233 
1234   return true;
1235 }
1236 
1237 /* Look for sin, cos and cexpi calls with the same argument NAME and
1238    create a single call to cexpi CSEing the result in this case.
1239    We first walk over all immediate uses of the argument collecting
1240    statements that we can CSE in a vector and in a second pass replace
1241    the statement rhs with a REALPART or IMAGPART expression on the
1242    result of the cexpi call we insert before the use statement that
1243    dominates all other candidates.  */
1244 
1245 static bool
execute_cse_sincos_1(tree name)1246 execute_cse_sincos_1 (tree name)
1247 {
1248   gimple_stmt_iterator gsi;
1249   imm_use_iterator use_iter;
1250   tree fndecl, res, type = NULL_TREE;
1251   gimple *def_stmt, *use_stmt, *stmt;
1252   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1253   auto_vec<gimple *> stmts;
1254   basic_block top_bb = NULL;
1255   int i;
1256   bool cfg_changed = false;
1257 
1258   name = execute_cse_conv_1 (name, &cfg_changed);
1259 
1260   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1261     {
1262       if (gimple_code (use_stmt) != GIMPLE_CALL
1263 	  || !gimple_call_lhs (use_stmt))
1264 	continue;
1265 
1266       switch (gimple_call_combined_fn (use_stmt))
1267 	{
1268 	CASE_CFN_COS:
1269 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1270 	  break;
1271 
1272 	CASE_CFN_SIN:
1273 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274 	  break;
1275 
1276 	CASE_CFN_CEXPI:
1277 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278 	  break;
1279 
1280 	default:;
1281 	  continue;
1282 	}
1283 
1284       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1285       if (!type)
1286 	{
1287 	  type = t;
1288 	  t = TREE_TYPE (name);
1289 	}
1290       /* This checks that NAME has the right type in the first round,
1291 	 and, in subsequent rounds, that the built_in type is the same
1292 	 type, or a compatible type.  */
1293       if (type != t && !types_compatible_p (type, t))
1294 	return false;
1295     }
1296   if (seen_cos + seen_sin + seen_cexpi <= 1)
1297     return false;
1298 
1299   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300      the name def statement.  */
1301   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1302   if (!fndecl)
1303     return false;
1304   stmt = gimple_build_call (fndecl, 1, name);
1305   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1306   gimple_call_set_lhs (stmt, res);
1307 
1308   def_stmt = SSA_NAME_DEF_STMT (name);
1309   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1310       && gimple_code (def_stmt) != GIMPLE_PHI
1311       && gimple_bb (def_stmt) == top_bb)
1312     {
1313       gsi = gsi_for_stmt (def_stmt);
1314       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1315     }
1316   else
1317     {
1318       gsi = gsi_after_labels (top_bb);
1319       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1320     }
1321   sincos_stats.inserted++;
1322 
1323   /* And adjust the recorded old call sites.  */
1324   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1325     {
1326       tree rhs = NULL;
1327 
1328       switch (gimple_call_combined_fn (use_stmt))
1329 	{
1330 	CASE_CFN_COS:
1331 	  rhs = fold_build1 (REALPART_EXPR, type, res);
1332 	  break;
1333 
1334 	CASE_CFN_SIN:
1335 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
1336 	  break;
1337 
1338 	CASE_CFN_CEXPI:
1339 	  rhs = res;
1340 	  break;
1341 
1342 	default:;
1343 	  gcc_unreachable ();
1344 	}
1345 
1346 	/* Replace call with a copy.  */
1347 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1348 
1349 	gsi = gsi_for_stmt (use_stmt);
1350 	gsi_replace (&gsi, stmt, true);
1351 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1352 	  cfg_changed = true;
1353     }
1354 
1355   return cfg_changed;
1356 }
1357 
1358 /* To evaluate powi(x,n), the floating point value x raised to the
1359    constant integer exponent n, we use a hybrid algorithm that
1360    combines the "window method" with look-up tables.  For an
1361    introduction to exponentiation algorithms and "addition chains",
1362    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1366 
1367 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368    multiplications to inline before calling the system library's pow
1369    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370    so this default never requires calling pow, powf or powl.  */
1371 
1372 #ifndef POWI_MAX_MULTS
1373 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1374 #endif
1375 
1376 /* The size of the "optimal power tree" lookup table.  All
1377    exponents less than this value are simply looked up in the
1378    powi_table below.  This threshold is also used to size the
1379    cache of pseudo registers that hold intermediate results.  */
1380 #define POWI_TABLE_SIZE 256
1381 
1382 /* The size, in bits of the window, used in the "window method"
1383    exponentiation algorithm.  This is equivalent to a radix of
1384    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1385 #define POWI_WINDOW_SIZE 3
1386 
1387 /* The following table is an efficient representation of an
1388    "optimal power tree".  For each value, i, the corresponding
1389    value, j, in the table states than an optimal evaluation
1390    sequence for calculating pow(x,i) can be found by evaluating
1391    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1392    100 integers is given in Knuth's "Seminumerical algorithms".  */
1393 
1394 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1395   {
1396       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1397       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1398       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1399      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1400      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1401      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1402      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1403      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1404      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1405      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1406      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1407      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1408      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1409      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1410      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1411      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1412      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1413      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1414      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1415      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1416      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1417      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1418      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1419      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1420      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1421     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1422     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1423     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1424     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1425     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1426     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1427     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1428   };
1429 
1430 
1431 /* Return the number of multiplications required to calculate
1432    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1433    subroutine of powi_cost.  CACHE is an array indicating
1434    which exponents have already been calculated.  */
1435 
1436 static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)1437 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1438 {
1439   /* If we've already calculated this exponent, then this evaluation
1440      doesn't require any additional multiplications.  */
1441   if (cache[n])
1442     return 0;
1443 
1444   cache[n] = true;
1445   return powi_lookup_cost (n - powi_table[n], cache)
1446 	 + powi_lookup_cost (powi_table[n], cache) + 1;
1447 }
1448 
1449 /* Return the number of multiplications required to calculate
1450    powi(x,n) for an arbitrary x, given the exponent N.  This
1451    function needs to be kept in sync with powi_as_mults below.  */
1452 
1453 static int
powi_cost(HOST_WIDE_INT n)1454 powi_cost (HOST_WIDE_INT n)
1455 {
1456   bool cache[POWI_TABLE_SIZE];
1457   unsigned HOST_WIDE_INT digit;
1458   unsigned HOST_WIDE_INT val;
1459   int result;
1460 
1461   if (n == 0)
1462     return 0;
1463 
1464   /* Ignore the reciprocal when calculating the cost.  */
1465   val = absu_hwi (n);
1466 
1467   /* Initialize the exponent cache.  */
1468   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1469   cache[1] = true;
1470 
1471   result = 0;
1472 
1473   while (val >= POWI_TABLE_SIZE)
1474     {
1475       if (val & 1)
1476 	{
1477 	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1478 	  result += powi_lookup_cost (digit, cache)
1479 		    + POWI_WINDOW_SIZE + 1;
1480 	  val >>= POWI_WINDOW_SIZE;
1481 	}
1482       else
1483 	{
1484 	  val >>= 1;
1485 	  result++;
1486 	}
1487     }
1488 
1489   return result + powi_lookup_cost (val, cache);
1490 }
1491 
1492 /* Recursive subroutine of powi_as_mults.  This function takes the
1493    array, CACHE, of already calculated exponents and an exponent N and
1494    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1495 
1496 static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,unsigned HOST_WIDE_INT n,tree * cache)1497 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1498 		 unsigned HOST_WIDE_INT n, tree *cache)
1499 {
1500   tree op0, op1, ssa_target;
1501   unsigned HOST_WIDE_INT digit;
1502   gassign *mult_stmt;
1503 
1504   if (n < POWI_TABLE_SIZE && cache[n])
1505     return cache[n];
1506 
1507   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1508 
1509   if (n < POWI_TABLE_SIZE)
1510     {
1511       cache[n] = ssa_target;
1512       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1513       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1514     }
1515   else if (n & 1)
1516     {
1517       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1518       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1519       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1520     }
1521   else
1522     {
1523       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1524       op1 = op0;
1525     }
1526 
1527   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1528   gimple_set_location (mult_stmt, loc);
1529   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1530 
1531   return ssa_target;
1532 }
1533 
1534 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535    This function needs to be kept in sync with powi_cost above.  */
1536 
1537 tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1538 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1539 	       tree arg0, HOST_WIDE_INT n)
1540 {
1541   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1542   gassign *div_stmt;
1543   tree target;
1544 
1545   if (n == 0)
1546     return build_one_cst (type);
1547 
1548   memset (cache, 0, sizeof (cache));
1549   cache[1] = arg0;
1550 
1551   result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1552   if (n >= 0)
1553     return result;
1554 
1555   /* If the original exponent was negative, reciprocate the result.  */
1556   target = make_temp_ssa_name (type, NULL, "powmult");
1557   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1558 				  build_real (type, dconst1), result);
1559   gimple_set_location (div_stmt, loc);
1560   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1561 
1562   return target;
1563 }
1564 
1565 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1566    location info LOC.  If the arguments are appropriate, create an
1567    equivalent sequence of statements prior to GSI using an optimal
1568    number of multiplications, and return an expession holding the
1569    result.  */
1570 
1571 static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1572 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1573 			    tree arg0, HOST_WIDE_INT n)
1574 {
1575   if ((n >= -1 && n <= 2)
1576       || (optimize_function_for_speed_p (cfun)
1577 	  && powi_cost (n) <= POWI_MAX_MULTS))
1578     return powi_as_mults (gsi, loc, arg0, n);
1579 
1580   return NULL_TREE;
1581 }
1582 
1583 /* Build a gimple call statement that calls FN with argument ARG.
1584    Set the lhs of the call statement to a fresh SSA name.  Insert the
1585    statement prior to GSI's current position, and return the fresh
1586    SSA name.  */
1587 
1588 static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree fn,tree arg)1589 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1590 		       tree fn, tree arg)
1591 {
1592   gcall *call_stmt;
1593   tree ssa_target;
1594 
1595   call_stmt = gimple_build_call (fn, 1, arg);
1596   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1597   gimple_set_lhs (call_stmt, ssa_target);
1598   gimple_set_location (call_stmt, loc);
1599   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1600 
1601   return ssa_target;
1602 }
1603 
1604 /* Build a gimple binary operation with the given CODE and arguments
1605    ARG0, ARG1, assigning the result to a new SSA name for variable
1606    TARGET.  Insert the statement prior to GSI's current position, and
1607    return the fresh SSA name.*/
1608 
1609 static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,const char * name,enum tree_code code,tree arg0,tree arg1)1610 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1611 			const char *name, enum tree_code code,
1612 			tree arg0, tree arg1)
1613 {
1614   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1615   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1616   gimple_set_location (stmt, loc);
1617   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1618   return result;
1619 }
1620 
1621 /* Build a gimple reference operation with the given CODE and argument
1622    ARG, assigning the result to a new SSA name of TYPE with NAME.
1623    Insert the statement prior to GSI's current position, and return
1624    the fresh SSA name.  */
1625 
1626 static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,const char * name,enum tree_code code,tree arg0)1627 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1628 		      const char *name, enum tree_code code, tree arg0)
1629 {
1630   tree result = make_temp_ssa_name (type, NULL, name);
1631   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1632   gimple_set_location (stmt, loc);
1633   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634   return result;
1635 }
1636 
1637 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1638    prior to GSI's current position, and return the fresh SSA name.  */
1639 
1640 static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree type,tree val)1641 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642 		       tree type, tree val)
1643 {
1644   tree result = make_ssa_name (type);
1645   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1646   gimple_set_location (stmt, loc);
1647   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1648   return result;
1649 }
1650 
1651 struct pow_synth_sqrt_info
1652 {
1653   bool *factors;
1654   unsigned int deepest;
1655   unsigned int num_mults;
1656 };
1657 
1658 /* Return true iff the real value C can be represented as a
1659    sum of powers of 0.5 up to N.  That is:
1660    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661    Record in INFO the various parameters of the synthesis algorithm such
1662    as the factors a[i], the maximum 0.5 power and the number of
1663    multiplications that will be required.  */
1664 
1665 bool
representable_as_half_series_p(REAL_VALUE_TYPE c,unsigned n,struct pow_synth_sqrt_info * info)1666 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1667 				 struct pow_synth_sqrt_info *info)
1668 {
1669   REAL_VALUE_TYPE factor = dconsthalf;
1670   REAL_VALUE_TYPE remainder = c;
1671 
1672   info->deepest = 0;
1673   info->num_mults = 0;
1674   memset (info->factors, 0, n * sizeof (bool));
1675 
1676   for (unsigned i = 0; i < n; i++)
1677     {
1678       REAL_VALUE_TYPE res;
1679 
1680       /* If something inexact happened bail out now.  */
1681       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1682 	return false;
1683 
1684       /* We have hit zero.  The number is representable as a sum
1685          of powers of 0.5.  */
1686       if (real_equal (&res, &dconst0))
1687 	{
1688 	  info->factors[i] = true;
1689 	  info->deepest = i + 1;
1690 	  return true;
1691 	}
1692       else if (!REAL_VALUE_NEGATIVE (res))
1693 	{
1694 	  remainder = res;
1695 	  info->factors[i] = true;
1696 	  info->num_mults++;
1697 	}
1698       else
1699 	info->factors[i] = false;
1700 
1701       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1702     }
1703   return false;
1704 }
1705 
1706 /* Return the tree corresponding to FN being applied
1707    to ARG N times at GSI and LOC.
1708    Look up previous results from CACHE if need be.
1709    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1710 
1711 static tree
get_fn_chain(tree arg,unsigned int n,gimple_stmt_iterator * gsi,tree fn,location_t loc,tree * cache)1712 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1713 	      tree fn, location_t loc, tree *cache)
1714 {
1715   tree res = cache[n];
1716   if (!res)
1717     {
1718       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1719       res = build_and_insert_call (gsi, loc, fn, prev);
1720       cache[n] = res;
1721     }
1722 
1723   return res;
1724 }
1725 
1726 /* Print to STREAM the repeated application of function FNAME to ARG
1727    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1728    "foo (foo (x))".  */
1729 
1730 static void
print_nested_fn(FILE * stream,const char * fname,const char * arg,unsigned int n)1731 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1732 		 unsigned int n)
1733 {
1734   if (n == 0)
1735     fprintf (stream, "%s", arg);
1736   else
1737     {
1738       fprintf (stream, "%s (", fname);
1739       print_nested_fn (stream, fname, arg, n - 1);
1740       fprintf (stream, ")");
1741     }
1742 }
1743 
1744 /* Print to STREAM the fractional sequence of sqrt chains
1745    applied to ARG, described by INFO.  Used for the dump file.  */
1746 
1747 static void
dump_fractional_sqrt_sequence(FILE * stream,const char * arg,struct pow_synth_sqrt_info * info)1748 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1749 			        struct pow_synth_sqrt_info *info)
1750 {
1751   for (unsigned int i = 0; i < info->deepest; i++)
1752     {
1753       bool is_set = info->factors[i];
1754       if (is_set)
1755 	{
1756 	  print_nested_fn (stream, "sqrt", arg, i + 1);
1757 	  if (i != info->deepest - 1)
1758 	    fprintf (stream, " * ");
1759 	}
1760     }
1761 }
1762 
1763 /* Print to STREAM a representation of raising ARG to an integer
1764    power N.  Used for the dump file.  */
1765 
1766 static void
dump_integer_part(FILE * stream,const char * arg,HOST_WIDE_INT n)1767 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1768 {
1769   if (n > 1)
1770     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1771   else if (n == 1)
1772     fprintf (stream, "%s", arg);
1773 }
1774 
1775 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776    square roots.  Place at GSI and LOC.  Limit the maximum depth
1777    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1778    result of the expanded sequence or NULL_TREE if the expansion failed.
1779 
1780    This routine assumes that ARG1 is a real number with a fractional part
1781    (the integer exponent case will have been handled earlier in
1782    gimple_expand_builtin_pow).
1783 
1784    For ARG1 > 0.0:
1785    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787                     FRAC_PART == ARG1 - WHOLE_PART:
1788      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790      if it can be expressed as such, that is if FRAC_PART satisfies:
1791      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792      where integer a[i] is either 0 or 1.
1793 
1794      Example:
1795      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1797 
1798    For ARG1 < 0.0 there are two approaches:
1799    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800          is calculated as above.
1801 
1802      Example:
1803      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1805 
1806    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807            FRAC_PART  := ARG1 - WHOLE_PART
1808      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1809      Example:
1810      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1812 
1813    For ARG1 < 0.0 we choose between (A) and (B) depending on
1814    how many multiplications we'd have to do.
1815    So, for the example in (B): POW (x, -5.875), if we were to
1816    follow algorithm (A) we would produce:
1817    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818    which contains more multiplications than approach (B).
1819 
1820    Hopefully, this approach will eliminate potentially expensive POW library
1821    calls when unsafe floating point math is enabled and allow the compiler to
1822    further optimise the multiplies, square roots and divides produced by this
1823    function.  */
1824 
1825 static tree
expand_pow_as_sqrts(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1,HOST_WIDE_INT max_depth)1826 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1827 		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1828 {
1829   tree type = TREE_TYPE (arg0);
1830   machine_mode mode = TYPE_MODE (type);
1831   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1832   bool one_over = true;
1833 
1834   if (!sqrtfn)
1835     return NULL_TREE;
1836 
1837   if (TREE_CODE (arg1) != REAL_CST)
1838     return NULL_TREE;
1839 
1840   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1841 
1842   gcc_assert (max_depth > 0);
1843   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1844 
1845   struct pow_synth_sqrt_info synth_info;
1846   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1847   synth_info.deepest = 0;
1848   synth_info.num_mults = 0;
1849 
1850   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1851   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1852 
1853   /* The whole and fractional parts of exp.  */
1854   REAL_VALUE_TYPE whole_part;
1855   REAL_VALUE_TYPE frac_part;
1856 
1857   real_floor (&whole_part, mode, &exp);
1858   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1859 
1860 
1861   REAL_VALUE_TYPE ceil_whole = dconst0;
1862   REAL_VALUE_TYPE ceil_fract = dconst0;
1863 
1864   if (neg_exp)
1865     {
1866       real_ceil (&ceil_whole, mode, &exp);
1867       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1868     }
1869 
1870   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1871     return NULL_TREE;
1872 
1873   /* Check whether it's more profitable to not use 1.0 / ...  */
1874   if (neg_exp)
1875     {
1876       struct pow_synth_sqrt_info alt_synth_info;
1877       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1878       alt_synth_info.deepest = 0;
1879       alt_synth_info.num_mults = 0;
1880 
1881       if (representable_as_half_series_p (ceil_fract, max_depth,
1882 					   &alt_synth_info)
1883 	  && alt_synth_info.deepest <= synth_info.deepest
1884 	  && alt_synth_info.num_mults < synth_info.num_mults)
1885 	{
1886 	  whole_part = ceil_whole;
1887 	  frac_part = ceil_fract;
1888 	  synth_info.deepest = alt_synth_info.deepest;
1889 	  synth_info.num_mults = alt_synth_info.num_mults;
1890 	  memcpy (synth_info.factors, alt_synth_info.factors,
1891 		  (max_depth + 1) * sizeof (bool));
1892 	  one_over = false;
1893 	}
1894     }
1895 
1896   HOST_WIDE_INT n = real_to_integer (&whole_part);
1897   REAL_VALUE_TYPE cint;
1898   real_from_integer (&cint, VOIDmode, n, SIGNED);
1899 
1900   if (!real_identical (&whole_part, &cint))
1901     return NULL_TREE;
1902 
1903   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1904     return NULL_TREE;
1905 
1906   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1907 
1908   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1909 
1910   /* Calculate the integer part of the exponent.  */
1911   if (n > 1)
1912     {
1913       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914       if (!integer_res)
1915 	return NULL_TREE;
1916     }
1917 
1918   if (dump_file)
1919     {
1920       char string[64];
1921 
1922       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1923       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1924 
1925       if (neg_exp)
1926 	{
1927 	  if (one_over)
1928 	    {
1929 	      fprintf (dump_file, "1.0 / (");
1930 	      dump_integer_part (dump_file, "x", n);
1931 	      if (n > 0)
1932 	        fprintf (dump_file, " * ");
1933 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934 	      fprintf (dump_file, ")");
1935 	    }
1936 	  else
1937 	    {
1938 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1939 	      fprintf (dump_file, " / (");
1940 	      dump_integer_part (dump_file, "x", n);
1941 	      fprintf (dump_file, ")");
1942 	    }
1943 	}
1944       else
1945 	{
1946 	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1947 	  if (n > 0)
1948 	    fprintf (dump_file, " * ");
1949 	  dump_integer_part (dump_file, "x", n);
1950 	}
1951 
1952       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1953     }
1954 
1955 
1956   tree fract_res = NULL_TREE;
1957   cache[0] = arg0;
1958 
1959   /* Calculate the fractional part of the exponent.  */
1960   for (unsigned i = 0; i < synth_info.deepest; i++)
1961     {
1962       if (synth_info.factors[i])
1963 	{
1964 	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1965 
1966 	  if (!fract_res)
1967 	      fract_res = sqrt_chain;
1968 
1969 	  else
1970 	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1971 					   fract_res, sqrt_chain);
1972 	}
1973     }
1974 
1975   tree res = NULL_TREE;
1976 
1977   if (neg_exp)
1978     {
1979       if (one_over)
1980 	{
1981 	  if (n > 0)
1982 	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1983 					   fract_res, integer_res);
1984 	  else
1985 	    res = fract_res;
1986 
1987 	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1988 					  build_real (type, dconst1), res);
1989 	}
1990       else
1991 	{
1992 	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1993 					 fract_res, integer_res);
1994 	}
1995     }
1996   else
1997     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1998 				   fract_res, integer_res);
1999   return res;
2000 }
2001 
2002 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003    with location info LOC.  If possible, create an equivalent and
2004    less expensive sequence of statements prior to GSI, and return an
2005    expession holding the result.  */
2006 
2007 static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)2008 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2009 			   tree arg0, tree arg1)
2010 {
2011   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2012   REAL_VALUE_TYPE c2, dconst3;
2013   HOST_WIDE_INT n;
2014   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2015   machine_mode mode;
2016   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2017   bool hw_sqrt_exists, c_is_int, c2_is_int;
2018 
2019   dconst1_4 = dconst1;
2020   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2021 
2022   /* If the exponent isn't a constant, there's nothing of interest
2023      to be done.  */
2024   if (TREE_CODE (arg1) != REAL_CST)
2025     return NULL_TREE;
2026 
2027   /* Don't perform the operation if flag_signaling_nans is on
2028      and the operand is a signaling NaN.  */
2029   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2030       && ((TREE_CODE (arg0) == REAL_CST
2031 	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2032 	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2033     return NULL_TREE;
2034 
2035   /* If the exponent is equivalent to an integer, expand to an optimal
2036      multiplication sequence when profitable.  */
2037   c = TREE_REAL_CST (arg1);
2038   n = real_to_integer (&c);
2039   real_from_integer (&cint, VOIDmode, n, SIGNED);
2040   c_is_int = real_identical (&c, &cint);
2041 
2042   if (c_is_int
2043       && ((n >= -1 && n <= 2)
2044 	  || (flag_unsafe_math_optimizations
2045 	      && speed_p
2046 	      && powi_cost (n) <= POWI_MAX_MULTS)))
2047     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048 
2049   /* Attempt various optimizations using sqrt and cbrt.  */
2050   type = TREE_TYPE (arg0);
2051   mode = TYPE_MODE (type);
2052   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053 
2054   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2055      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2056      sqrt(-0) = -0.  */
2057   if (sqrtfn
2058       && real_equal (&c, &dconsthalf)
2059       && !HONOR_SIGNED_ZEROS (mode))
2060     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061 
2062   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063 
2064   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2065      optimizations since 1./3. is not exactly representable.  If x
2066      is negative and finite, the correct value of pow(x,1./3.) is
2067      a NaN with the "invalid" exception raised, because the value
2068      of 1./3. actually has an even denominator.  The correct value
2069      of cbrt(x) is a negative real value.  */
2070   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071   dconst1_3 = real_value_truncate (mode, dconst_third ());
2072 
2073   if (flag_unsafe_math_optimizations
2074       && cbrtfn
2075       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076       && real_equal (&c, &dconst1_3))
2077     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078 
2079   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2080      if we don't have a hardware sqrt insn.  */
2081   dconst1_6 = dconst1_3;
2082   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083 
2084   if (flag_unsafe_math_optimizations
2085       && sqrtfn
2086       && cbrtfn
2087       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088       && speed_p
2089       && hw_sqrt_exists
2090       && real_equal (&c, &dconst1_6))
2091     {
2092       /* sqrt(x)  */
2093       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094 
2095       /* cbrt(sqrt(x))  */
2096       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2097     }
2098 
2099 
2100   /* Attempt to expand the POW as a product of square root chains.
2101      Expand the 0.25 case even when otpimising for size.  */
2102   if (flag_unsafe_math_optimizations
2103       && sqrtfn
2104       && hw_sqrt_exists
2105       && (speed_p || real_equal (&c, &dconst1_4))
2106       && !HONOR_SIGNED_ZEROS (mode))
2107     {
2108       unsigned int max_depth = speed_p
2109 				? param_max_pow_sqrt_depth
2110 				: 2;
2111 
2112       tree expand_with_sqrts
2113 	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114 
2115       if (expand_with_sqrts)
2116 	return expand_with_sqrts;
2117     }
2118 
2119   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120   n = real_to_integer (&c2);
2121   real_from_integer (&cint, VOIDmode, n, SIGNED);
2122   c2_is_int = real_identical (&c2, &cint);
2123 
2124   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125 
2126      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2127      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2128 
2129      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2130      different from pow(x, 1./3.) due to rounding and behavior with
2131      negative x, we need to constrain this transformation to unsafe
2132      math and positive x or finite math.  */
2133   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135   real_round (&c2, mode, &c2);
2136   n = real_to_integer (&c2);
2137   real_from_integer (&cint, VOIDmode, n, SIGNED);
2138   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139   real_convert (&c2, mode, &c2);
2140 
2141   if (flag_unsafe_math_optimizations
2142       && cbrtfn
2143       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144       && real_identical (&c2, &c)
2145       && !c2_is_int
2146       && optimize_function_for_speed_p (cfun)
2147       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148     {
2149       tree powi_x_ndiv3 = NULL_TREE;
2150 
2151       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2152          possible or profitable, give up.  Skip the degenerate case when
2153          abs(n) < 3, where the result is always 1.  */
2154       if (absu_hwi (n) >= 3)
2155 	{
2156 	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157 						     abs_hwi (n / 3));
2158 	  if (!powi_x_ndiv3)
2159 	    return NULL_TREE;
2160 	}
2161 
2162       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2163          as that creates an unnecessary variable.  Instead, just produce
2164          either cbrt(x) or cbrt(x) * cbrt(x).  */
2165       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166 
2167       if (absu_hwi (n) % 3 == 1)
2168 	powi_cbrt_x = cbrt_x;
2169       else
2170 	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171 					      cbrt_x, cbrt_x);
2172 
2173       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2174       if (absu_hwi (n) < 3)
2175 	result = powi_cbrt_x;
2176       else
2177 	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178 					 powi_x_ndiv3, powi_cbrt_x);
2179 
2180       /* If n is negative, reciprocate the result.  */
2181       if (n < 0)
2182 	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183 					 build_real (type, dconst1), result);
2184 
2185       return result;
2186     }
2187 
2188   /* No optimizations succeeded.  */
2189   return NULL_TREE;
2190 }
2191 
2192 /* ARG is the argument to a cabs builtin call in GSI with location info
2193    LOC.  Create a sequence of statements prior to GSI that calculates
2194    sqrt(R*R + I*I), where R and I are the real and imaginary components
2195    of ARG, respectively.  Return an expression holding the result.  */
2196 
2197 static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)2198 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2199 {
2200   tree real_part, imag_part, addend1, addend2, sum, result;
2201   tree type = TREE_TYPE (TREE_TYPE (arg));
2202   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2203   machine_mode mode = TYPE_MODE (type);
2204 
2205   if (!flag_unsafe_math_optimizations
2206       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2207       || !sqrtfn
2208       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2209     return NULL_TREE;
2210 
2211   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212 				    REALPART_EXPR, arg);
2213   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214 				    real_part, real_part);
2215   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216 				    IMAGPART_EXPR, arg);
2217   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218 				    imag_part, imag_part);
2219   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2220   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2221 
2222   return result;
2223 }
2224 
2225 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2227    an optimal number of multiplies, when n is a constant.  */
2228 
2229 namespace {
2230 
2231 const pass_data pass_data_cse_sincos =
2232 {
2233   GIMPLE_PASS, /* type */
2234   "sincos", /* name */
2235   OPTGROUP_NONE, /* optinfo_flags */
2236   TV_TREE_SINCOS, /* tv_id */
2237   PROP_ssa, /* properties_required */
2238   PROP_gimple_opt_math, /* properties_provided */
2239   0, /* properties_destroyed */
2240   0, /* todo_flags_start */
2241   TODO_update_ssa, /* todo_flags_finish */
2242 };
2243 
2244 class pass_cse_sincos : public gimple_opt_pass
2245 {
2246 public:
pass_cse_sincos(gcc::context * ctxt)2247   pass_cse_sincos (gcc::context *ctxt)
2248     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2249   {}
2250 
2251   /* opt_pass methods: */
gate(function *)2252   virtual bool gate (function *)
2253     {
2254       /* We no longer require either sincos or cexp, since powi expansion
2255 	 piggybacks on this pass.  */
2256       return optimize;
2257     }
2258 
2259   virtual unsigned int execute (function *);
2260 
2261 }; // class pass_cse_sincos
2262 
2263 unsigned int
execute(function * fun)2264 pass_cse_sincos::execute (function *fun)
2265 {
2266   basic_block bb;
2267   bool cfg_changed = false;
2268 
2269   calculate_dominance_info (CDI_DOMINATORS);
2270   memset (&sincos_stats, 0, sizeof (sincos_stats));
2271 
2272   FOR_EACH_BB_FN (bb, fun)
2273     {
2274       gimple_stmt_iterator gsi;
2275       bool cleanup_eh = false;
2276 
2277       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278         {
2279 	  gimple *stmt = gsi_stmt (gsi);
2280 
2281 	  /* Only the last stmt in a bb could throw, no need to call
2282 	     gimple_purge_dead_eh_edges if we change something in the middle
2283 	     of a basic block.  */
2284 	  cleanup_eh = false;
2285 
2286 	  if (is_gimple_call (stmt)
2287 	      && gimple_call_lhs (stmt))
2288 	    {
2289 	      tree arg, arg0, arg1, result;
2290 	      HOST_WIDE_INT n;
2291 	      location_t loc;
2292 
2293 	      switch (gimple_call_combined_fn (stmt))
2294 		{
2295 		CASE_CFN_COS:
2296 		CASE_CFN_SIN:
2297 		CASE_CFN_CEXPI:
2298 		  arg = gimple_call_arg (stmt, 0);
2299 		  /* Make sure we have either sincos or cexp.  */
2300 		  if (!targetm.libc_has_function (function_c99_math_complex,
2301 						  TREE_TYPE (arg))
2302 		      && !targetm.libc_has_function (function_sincos,
2303 						     TREE_TYPE (arg)))
2304 		    break;
2305 
2306 		  if (TREE_CODE (arg) == SSA_NAME)
2307 		    cfg_changed |= execute_cse_sincos_1 (arg);
2308 		  break;
2309 
2310 		CASE_CFN_POW:
2311 		  arg0 = gimple_call_arg (stmt, 0);
2312 		  arg1 = gimple_call_arg (stmt, 1);
2313 
2314 		  loc = gimple_location (stmt);
2315 		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2316 
2317 		  if (result)
2318 		    {
2319 		      tree lhs = gimple_get_lhs (stmt);
2320 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2321 		      gimple_set_location (new_stmt, loc);
2322 		      unlink_stmt_vdef (stmt);
2323 		      gsi_replace (&gsi, new_stmt, true);
2324 		      cleanup_eh = true;
2325 		      if (gimple_vdef (stmt))
2326 			release_ssa_name (gimple_vdef (stmt));
2327 		    }
2328 		  break;
2329 
2330 		CASE_CFN_POWI:
2331 		  arg0 = gimple_call_arg (stmt, 0);
2332 		  arg1 = gimple_call_arg (stmt, 1);
2333 		  loc = gimple_location (stmt);
2334 
2335 		  if (real_minus_onep (arg0))
2336 		    {
2337                       tree t0, t1, cond, one, minus_one;
2338 		      gassign *stmt;
2339 
2340 		      t0 = TREE_TYPE (arg0);
2341 		      t1 = TREE_TYPE (arg1);
2342 		      one = build_real (t0, dconst1);
2343 		      minus_one = build_real (t0, dconstm1);
2344 
2345 		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2346 		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2347 						  arg1, build_int_cst (t1, 1));
2348 		      gimple_set_location (stmt, loc);
2349 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2350 
2351 		      result = make_temp_ssa_name (t0, NULL, "powi");
2352 		      stmt = gimple_build_assign (result, COND_EXPR, cond,
2353 						  minus_one, one);
2354 		      gimple_set_location (stmt, loc);
2355 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2356 		    }
2357 		  else
2358 		    {
2359 		      if (!tree_fits_shwi_p (arg1))
2360 			break;
2361 
2362 		      n = tree_to_shwi (arg1);
2363 		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2364 		    }
2365 
2366 		  if (result)
2367 		    {
2368 		      tree lhs = gimple_get_lhs (stmt);
2369 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2370 		      gimple_set_location (new_stmt, loc);
2371 		      unlink_stmt_vdef (stmt);
2372 		      gsi_replace (&gsi, new_stmt, true);
2373 		      cleanup_eh = true;
2374 		      if (gimple_vdef (stmt))
2375 			release_ssa_name (gimple_vdef (stmt));
2376 		    }
2377 		  break;
2378 
2379 		CASE_CFN_CABS:
2380 		  arg0 = gimple_call_arg (stmt, 0);
2381 		  loc = gimple_location (stmt);
2382 		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2383 
2384 		  if (result)
2385 		    {
2386 		      tree lhs = gimple_get_lhs (stmt);
2387 		      gassign *new_stmt = gimple_build_assign (lhs, result);
2388 		      gimple_set_location (new_stmt, loc);
2389 		      unlink_stmt_vdef (stmt);
2390 		      gsi_replace (&gsi, new_stmt, true);
2391 		      cleanup_eh = true;
2392 		      if (gimple_vdef (stmt))
2393 			release_ssa_name (gimple_vdef (stmt));
2394 		    }
2395 		  break;
2396 
2397 		default:;
2398 		}
2399 	    }
2400 	}
2401       if (cleanup_eh)
2402 	cfg_changed |= gimple_purge_dead_eh_edges (bb);
2403     }
2404 
2405   statistics_counter_event (fun, "sincos statements inserted",
2406 			    sincos_stats.inserted);
2407   statistics_counter_event (fun, "conv statements removed",
2408 			    sincos_stats.conv_removed);
2409 
2410   return cfg_changed ? TODO_cleanup_cfg : 0;
2411 }
2412 
2413 } // anon namespace
2414 
2415 gimple_opt_pass *
make_pass_cse_sincos(gcc::context * ctxt)2416 make_pass_cse_sincos (gcc::context *ctxt)
2417 {
2418   return new pass_cse_sincos (ctxt);
2419 }
2420 
2421 /* Return true if stmt is a type conversion operation that can be stripped
2422    when used in a widening multiply operation.  */
2423 static bool
widening_mult_conversion_strippable_p(tree result_type,gimple * stmt)2424 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2425 {
2426   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2427 
2428   if (TREE_CODE (result_type) == INTEGER_TYPE)
2429     {
2430       tree op_type;
2431       tree inner_op_type;
2432 
2433       if (!CONVERT_EXPR_CODE_P (rhs_code))
2434 	return false;
2435 
2436       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2437 
2438       /* If the type of OP has the same precision as the result, then
2439 	 we can strip this conversion.  The multiply operation will be
2440 	 selected to create the correct extension as a by-product.  */
2441       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2442 	return true;
2443 
2444       /* We can also strip a conversion if it preserves the signed-ness of
2445 	 the operation and doesn't narrow the range.  */
2446       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2447 
2448       /* If the inner-most type is unsigned, then we can strip any
2449 	 intermediate widening operation.  If it's signed, then the
2450 	 intermediate widening operation must also be signed.  */
2451       if ((TYPE_UNSIGNED (inner_op_type)
2452 	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2453 	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2454 	return true;
2455 
2456       return false;
2457     }
2458 
2459   return rhs_code == FIXED_CONVERT_EXPR;
2460 }
2461 
2462 /* Return true if RHS is a suitable operand for a widening multiplication,
2463    assuming a target type of TYPE.
2464    There are two cases:
2465 
2466      - RHS makes some value at least twice as wide.  Store that value
2467        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2468 
2469      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2470        but leave *TYPE_OUT untouched.  */
2471 
2472 static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)2473 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2474 			tree *new_rhs_out)
2475 {
2476   gimple *stmt;
2477   tree type1, rhs1;
2478 
2479   if (TREE_CODE (rhs) == SSA_NAME)
2480     {
2481       stmt = SSA_NAME_DEF_STMT (rhs);
2482       if (is_gimple_assign (stmt))
2483 	{
2484 	  if (! widening_mult_conversion_strippable_p (type, stmt))
2485 	    rhs1 = rhs;
2486 	  else
2487 	    {
2488 	      rhs1 = gimple_assign_rhs1 (stmt);
2489 
2490 	      if (TREE_CODE (rhs1) == INTEGER_CST)
2491 		{
2492 		  *new_rhs_out = rhs1;
2493 		  *type_out = NULL;
2494 		  return true;
2495 		}
2496 	    }
2497 	}
2498       else
2499 	rhs1 = rhs;
2500 
2501       type1 = TREE_TYPE (rhs1);
2502 
2503       if (TREE_CODE (type1) != TREE_CODE (type)
2504 	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2505 	return false;
2506 
2507       *new_rhs_out = rhs1;
2508       *type_out = type1;
2509       return true;
2510     }
2511 
2512   if (TREE_CODE (rhs) == INTEGER_CST)
2513     {
2514       *new_rhs_out = rhs;
2515       *type_out = NULL;
2516       return true;
2517     }
2518 
2519   return false;
2520 }
2521 
2522 /* Return true if STMT performs a widening multiplication, assuming the
2523    output type is TYPE.  If so, store the unwidened types of the operands
2524    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2525    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526    and *TYPE2_OUT would give the operands of the multiplication.  */
2527 
2528 static bool
is_widening_mult_p(gimple * stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2529 is_widening_mult_p (gimple *stmt,
2530 		    tree *type1_out, tree *rhs1_out,
2531 		    tree *type2_out, tree *rhs2_out)
2532 {
2533   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2534 
2535   if (TREE_CODE (type) == INTEGER_TYPE)
2536     {
2537       if (TYPE_OVERFLOW_TRAPS (type))
2538 	return false;
2539     }
2540   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2541     return false;
2542 
2543   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2544 			       rhs1_out))
2545     return false;
2546 
2547   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2548 			       rhs2_out))
2549     return false;
2550 
2551   if (*type1_out == NULL)
2552     {
2553       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2554 	return false;
2555       *type1_out = *type2_out;
2556     }
2557 
2558   if (*type2_out == NULL)
2559     {
2560       if (!int_fits_type_p (*rhs2_out, *type1_out))
2561 	return false;
2562       *type2_out = *type1_out;
2563     }
2564 
2565   /* Ensure that the larger of the two operands comes first. */
2566   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2567     {
2568       std::swap (*type1_out, *type2_out);
2569       std::swap (*rhs1_out, *rhs2_out);
2570     }
2571 
2572   return true;
2573 }
2574 
2575 /* Check to see if the CALL statement is an invocation of copysign
2576    with 1. being the first argument.  */
2577 static bool
is_copysign_call_with_1(gimple * call)2578 is_copysign_call_with_1 (gimple *call)
2579 {
2580   gcall *c = dyn_cast <gcall *> (call);
2581   if (! c)
2582     return false;
2583 
2584   enum combined_fn code = gimple_call_combined_fn (c);
2585 
2586   if (code == CFN_LAST)
2587     return false;
2588 
2589   if (builtin_fn_p (code))
2590     {
2591       switch (as_builtin_fn (code))
2592 	{
2593 	CASE_FLT_FN (BUILT_IN_COPYSIGN):
2594 	CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2595 	  return real_onep (gimple_call_arg (c, 0));
2596 	default:
2597 	  return false;
2598 	}
2599     }
2600 
2601   if (internal_fn_p (code))
2602     {
2603       switch (as_internal_fn (code))
2604 	{
2605 	case IFN_COPYSIGN:
2606 	  return real_onep (gimple_call_arg (c, 0));
2607 	default:
2608 	  return false;
2609 	}
2610     }
2611 
2612    return false;
2613 }
2614 
2615 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616    This only happens when the xorsign optab is defined, if the
2617    pattern is not a xorsign pattern or if expansion fails FALSE is
2618    returned, otherwise TRUE is returned.  */
2619 static bool
convert_expand_mult_copysign(gimple * stmt,gimple_stmt_iterator * gsi)2620 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2621 {
2622   tree treeop0, treeop1, lhs, type;
2623   location_t loc = gimple_location (stmt);
2624   lhs = gimple_assign_lhs (stmt);
2625   treeop0 = gimple_assign_rhs1 (stmt);
2626   treeop1 = gimple_assign_rhs2 (stmt);
2627   type = TREE_TYPE (lhs);
2628   machine_mode mode = TYPE_MODE (type);
2629 
2630   if (HONOR_SNANS (type))
2631     return false;
2632 
2633   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2634     {
2635       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2636       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2637 	{
2638 	  call0 = SSA_NAME_DEF_STMT (treeop1);
2639 	  if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2640 	    return false;
2641 
2642 	  treeop1 = treeop0;
2643 	}
2644 	if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2645 	  return false;
2646 
2647 	gcall *c = as_a<gcall*> (call0);
2648 	treeop0 = gimple_call_arg (c, 1);
2649 
2650 	gcall *call_stmt
2651 	  = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2652 	gimple_set_lhs (call_stmt, lhs);
2653 	gimple_set_location (call_stmt, loc);
2654 	gsi_replace (gsi, call_stmt, true);
2655 	return true;
2656     }
2657 
2658   return false;
2659 }
2660 
2661 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2662    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2663    value is true iff we converted the statement.  */
2664 
2665 static bool
convert_mult_to_widen(gimple * stmt,gimple_stmt_iterator * gsi)2666 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2667 {
2668   tree lhs, rhs1, rhs2, type, type1, type2;
2669   enum insn_code handler;
2670   scalar_int_mode to_mode, from_mode, actual_mode;
2671   optab op;
2672   int actual_precision;
2673   location_t loc = gimple_location (stmt);
2674   bool from_unsigned1, from_unsigned2;
2675 
2676   lhs = gimple_assign_lhs (stmt);
2677   type = TREE_TYPE (lhs);
2678   if (TREE_CODE (type) != INTEGER_TYPE)
2679     return false;
2680 
2681   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2682     return false;
2683 
2684   /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2685      avoid the tranform. */
2686   if ((TREE_CODE (rhs1) == SSA_NAME
2687        && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2688       || (TREE_CODE (rhs2) == SSA_NAME
2689 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2690     return false;
2691 
2692   to_mode = SCALAR_INT_TYPE_MODE (type);
2693   from_mode = SCALAR_INT_TYPE_MODE (type1);
2694   if (to_mode == from_mode)
2695     return false;
2696 
2697   from_unsigned1 = TYPE_UNSIGNED (type1);
2698   from_unsigned2 = TYPE_UNSIGNED (type2);
2699 
2700   if (from_unsigned1 && from_unsigned2)
2701     op = umul_widen_optab;
2702   else if (!from_unsigned1 && !from_unsigned2)
2703     op = smul_widen_optab;
2704   else
2705     op = usmul_widen_optab;
2706 
2707   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2708 						  &actual_mode);
2709 
2710   if (handler == CODE_FOR_nothing)
2711     {
2712       if (op != smul_widen_optab)
2713 	{
2714 	  /* We can use a signed multiply with unsigned types as long as
2715 	     there is a wider mode to use, or it is the smaller of the two
2716 	     types that is unsigned.  Note that type1 >= type2, always.  */
2717 	  if ((TYPE_UNSIGNED (type1)
2718 	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2719 	      || (TYPE_UNSIGNED (type2)
2720 		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2721 	    {
2722 	      if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2723 		  || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2724 		return false;
2725 	    }
2726 
2727 	  op = smul_widen_optab;
2728 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2729 							  from_mode,
2730 							  &actual_mode);
2731 
2732 	  if (handler == CODE_FOR_nothing)
2733 	    return false;
2734 
2735 	  from_unsigned1 = from_unsigned2 = false;
2736 	}
2737       else
2738 	{
2739 	  /* Expand can synthesize smul_widen_optab if the target
2740 	     supports umul_widen_optab.  */
2741 	  op = umul_widen_optab;
2742 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2743 							  from_mode,
2744 							  &actual_mode);
2745 	  if (handler == CODE_FOR_nothing)
2746 	    return false;
2747 	}
2748     }
2749 
2750   /* Ensure that the inputs to the handler are in the correct precison
2751      for the opcode.  This will be the full mode size.  */
2752   actual_precision = GET_MODE_PRECISION (actual_mode);
2753   if (2 * actual_precision > TYPE_PRECISION (type))
2754     return false;
2755   if (actual_precision != TYPE_PRECISION (type1)
2756       || from_unsigned1 != TYPE_UNSIGNED (type1))
2757     rhs1 = build_and_insert_cast (gsi, loc,
2758 				  build_nonstandard_integer_type
2759 				    (actual_precision, from_unsigned1), rhs1);
2760   if (actual_precision != TYPE_PRECISION (type2)
2761       || from_unsigned2 != TYPE_UNSIGNED (type2))
2762     rhs2 = build_and_insert_cast (gsi, loc,
2763 				  build_nonstandard_integer_type
2764 				    (actual_precision, from_unsigned2), rhs2);
2765 
2766   /* Handle constants.  */
2767   if (TREE_CODE (rhs1) == INTEGER_CST)
2768     rhs1 = fold_convert (type1, rhs1);
2769   if (TREE_CODE (rhs2) == INTEGER_CST)
2770     rhs2 = fold_convert (type2, rhs2);
2771 
2772   gimple_assign_set_rhs1 (stmt, rhs1);
2773   gimple_assign_set_rhs2 (stmt, rhs2);
2774   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2775   update_stmt (stmt);
2776   widen_mul_stats.widen_mults_inserted++;
2777   return true;
2778 }
2779 
2780 /* Process a single gimple statement STMT, which is found at the
2781    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2782    rhs (given by CODE), and try to convert it into a
2783    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2784    is true iff we converted the statement.  */
2785 
2786 static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code)2787 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2788 			    enum tree_code code)
2789 {
2790   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2791   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2792   tree type, type1, type2, optype;
2793   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2794   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2795   optab this_optab;
2796   enum tree_code wmult_code;
2797   enum insn_code handler;
2798   scalar_mode to_mode, from_mode, actual_mode;
2799   location_t loc = gimple_location (stmt);
2800   int actual_precision;
2801   bool from_unsigned1, from_unsigned2;
2802 
2803   lhs = gimple_assign_lhs (stmt);
2804   type = TREE_TYPE (lhs);
2805   if ((TREE_CODE (type) != INTEGER_TYPE
2806        && TREE_CODE (type) != FIXED_POINT_TYPE)
2807       || !type_has_mode_precision_p (type))
2808     return false;
2809 
2810   if (code == MINUS_EXPR)
2811     wmult_code = WIDEN_MULT_MINUS_EXPR;
2812   else
2813     wmult_code = WIDEN_MULT_PLUS_EXPR;
2814 
2815   rhs1 = gimple_assign_rhs1 (stmt);
2816   rhs2 = gimple_assign_rhs2 (stmt);
2817 
2818   if (TREE_CODE (rhs1) == SSA_NAME)
2819     {
2820       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2821       if (is_gimple_assign (rhs1_stmt))
2822 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2823     }
2824 
2825   if (TREE_CODE (rhs2) == SSA_NAME)
2826     {
2827       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2828       if (is_gimple_assign (rhs2_stmt))
2829 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2830     }
2831 
2832   /* Allow for one conversion statement between the multiply
2833      and addition/subtraction statement.  If there are more than
2834      one conversions then we assume they would invalidate this
2835      transformation.  If that's not the case then they should have
2836      been folded before now.  */
2837   if (CONVERT_EXPR_CODE_P (rhs1_code))
2838     {
2839       conv1_stmt = rhs1_stmt;
2840       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2841       if (TREE_CODE (rhs1) == SSA_NAME)
2842 	{
2843 	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2844 	  if (is_gimple_assign (rhs1_stmt))
2845 	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2846 	}
2847       else
2848 	return false;
2849     }
2850   if (CONVERT_EXPR_CODE_P (rhs2_code))
2851     {
2852       conv2_stmt = rhs2_stmt;
2853       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2854       if (TREE_CODE (rhs2) == SSA_NAME)
2855 	{
2856 	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2857 	  if (is_gimple_assign (rhs2_stmt))
2858 	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2859 	}
2860       else
2861 	return false;
2862     }
2863 
2864   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2865      is_widening_mult_p, but we still need the rhs returns.
2866 
2867      It might also appear that it would be sufficient to use the existing
2868      operands of the widening multiply, but that would limit the choice of
2869      multiply-and-accumulate instructions.
2870 
2871      If the widened-multiplication result has more than one uses, it is
2872      probably wiser not to do the conversion.  Also restrict this operation
2873      to single basic block to avoid moving the multiply to a different block
2874      with a higher execution frequency.  */
2875   if (code == PLUS_EXPR
2876       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2877     {
2878       if (!has_single_use (rhs1)
2879 	  || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2880 	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2881 				  &type2, &mult_rhs2))
2882 	return false;
2883       add_rhs = rhs2;
2884       conv_stmt = conv1_stmt;
2885     }
2886   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2887     {
2888       if (!has_single_use (rhs2)
2889 	  || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2890 	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2891 				  &type2, &mult_rhs2))
2892 	return false;
2893       add_rhs = rhs1;
2894       conv_stmt = conv2_stmt;
2895     }
2896   else
2897     return false;
2898 
2899   to_mode = SCALAR_TYPE_MODE (type);
2900   from_mode = SCALAR_TYPE_MODE (type1);
2901   if (to_mode == from_mode)
2902     return false;
2903 
2904   from_unsigned1 = TYPE_UNSIGNED (type1);
2905   from_unsigned2 = TYPE_UNSIGNED (type2);
2906   optype = type1;
2907 
2908   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2909   if (from_unsigned1 != from_unsigned2)
2910     {
2911       if (!INTEGRAL_TYPE_P (type))
2912 	return false;
2913       /* We can use a signed multiply with unsigned types as long as
2914 	 there is a wider mode to use, or it is the smaller of the two
2915 	 types that is unsigned.  Note that type1 >= type2, always.  */
2916       if ((from_unsigned1
2917 	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2918 	  || (from_unsigned2
2919 	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2920 	{
2921 	  if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2922 	      || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2923 	    return false;
2924 	}
2925 
2926       from_unsigned1 = from_unsigned2 = false;
2927       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2928 					       false);
2929     }
2930 
2931   /* If there was a conversion between the multiply and addition
2932      then we need to make sure it fits a multiply-and-accumulate.
2933      The should be a single mode change which does not change the
2934      value.  */
2935   if (conv_stmt)
2936     {
2937       /* We use the original, unmodified data types for this.  */
2938       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2939       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2940       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2941       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2942 
2943       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2944 	{
2945 	  /* Conversion is a truncate.  */
2946 	  if (TYPE_PRECISION (to_type) < data_size)
2947 	    return false;
2948 	}
2949       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2950 	{
2951 	  /* Conversion is an extend.  Check it's the right sort.  */
2952 	  if (TYPE_UNSIGNED (from_type) != is_unsigned
2953 	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2954 	    return false;
2955 	}
2956       /* else convert is a no-op for our purposes.  */
2957     }
2958 
2959   /* Verify that the machine can perform a widening multiply
2960      accumulate in this mode/signedness combination, otherwise
2961      this transformation is likely to pessimize code.  */
2962   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2963   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2964 						  from_mode, &actual_mode);
2965 
2966   if (handler == CODE_FOR_nothing)
2967     return false;
2968 
2969   /* Ensure that the inputs to the handler are in the correct precison
2970      for the opcode.  This will be the full mode size.  */
2971   actual_precision = GET_MODE_PRECISION (actual_mode);
2972   if (actual_precision != TYPE_PRECISION (type1)
2973       || from_unsigned1 != TYPE_UNSIGNED (type1))
2974     mult_rhs1 = build_and_insert_cast (gsi, loc,
2975 				       build_nonstandard_integer_type
2976 				         (actual_precision, from_unsigned1),
2977 				       mult_rhs1);
2978   if (actual_precision != TYPE_PRECISION (type2)
2979       || from_unsigned2 != TYPE_UNSIGNED (type2))
2980     mult_rhs2 = build_and_insert_cast (gsi, loc,
2981 				       build_nonstandard_integer_type
2982 					 (actual_precision, from_unsigned2),
2983 				       mult_rhs2);
2984 
2985   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2986     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2987 
2988   /* Handle constants.  */
2989   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2990     mult_rhs1 = fold_convert (type1, mult_rhs1);
2991   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2992     mult_rhs2 = fold_convert (type2, mult_rhs2);
2993 
2994   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2995 				  add_rhs);
2996   update_stmt (gsi_stmt (*gsi));
2997   widen_mul_stats.maccs_inserted++;
2998   return true;
2999 }
3000 
3001 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3002    OP2 and which we know is used in statements that can be, together with the
3003    multiplication, converted to FMAs, perform the transformation.  */
3004 
3005 static void
convert_mult_to_fma_1(tree mul_result,tree op1,tree op2)3006 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3007 {
3008   tree type = TREE_TYPE (mul_result);
3009   gimple *use_stmt;
3010   imm_use_iterator imm_iter;
3011   gcall *fma_stmt;
3012 
3013   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3014     {
3015       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3016       tree addop, mulop1 = op1, result = mul_result;
3017       bool negate_p = false;
3018       gimple_seq seq = NULL;
3019 
3020       if (is_gimple_debug (use_stmt))
3021 	continue;
3022 
3023       if (is_gimple_assign (use_stmt)
3024 	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3025 	{
3026 	  result = gimple_assign_lhs (use_stmt);
3027 	  use_operand_p use_p;
3028 	  gimple *neguse_stmt;
3029 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3030 	  gsi_remove (&gsi, true);
3031 	  release_defs (use_stmt);
3032 
3033 	  use_stmt = neguse_stmt;
3034 	  gsi = gsi_for_stmt (use_stmt);
3035 	  negate_p = true;
3036 	}
3037 
3038       tree cond, else_value, ops[3];
3039       tree_code code;
3040       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3041 					      ops, &else_value))
3042 	gcc_unreachable ();
3043       addop = ops[0] == result ? ops[1] : ops[0];
3044 
3045       if (code == MINUS_EXPR)
3046 	{
3047 	  if (ops[0] == result)
3048 	    /* a * b - c -> a * b + (-c)  */
3049 	    addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3050 	  else
3051 	    /* a - b * c -> (-b) * c + a */
3052 	    negate_p = !negate_p;
3053 	}
3054 
3055       if (negate_p)
3056 	mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3057 
3058       if (seq)
3059 	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3060 
3061       if (cond)
3062 	fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3063 					       op2, addop, else_value);
3064       else
3065 	fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3066       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3067       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3068 								   use_stmt));
3069       gsi_replace (&gsi, fma_stmt, true);
3070       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3071 	 regardless of where the negation occurs.  */
3072       gimple *orig_stmt = gsi_stmt (gsi);
3073       if (fold_stmt (&gsi, follow_all_ssa_edges))
3074 	{
3075 	  if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3076 	    gcc_unreachable ();
3077 	  update_stmt (gsi_stmt (gsi));
3078 	}
3079 
3080       if (dump_file && (dump_flags & TDF_DETAILS))
3081 	{
3082 	  fprintf (dump_file, "Generated FMA ");
3083 	  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3084 	  fprintf (dump_file, "\n");
3085 	}
3086 
3087       /* If the FMA result is negated in a single use, fold the negation
3088 	 too.  */
3089       orig_stmt = gsi_stmt (gsi);
3090       use_operand_p use_p;
3091       gimple *neg_stmt;
3092       if (is_gimple_call (orig_stmt)
3093 	  && gimple_call_internal_p (orig_stmt)
3094 	  && gimple_call_lhs (orig_stmt)
3095 	  && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3096 	  && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3097 	  && is_gimple_assign (neg_stmt)
3098 	  && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3099 	  && !stmt_could_throw_p (cfun, neg_stmt))
3100 	{
3101 	  gsi = gsi_for_stmt (neg_stmt);
3102 	  if (fold_stmt (&gsi, follow_all_ssa_edges))
3103 	    {
3104 	      if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3105 		gcc_unreachable ();
3106 	      update_stmt (gsi_stmt (gsi));
3107 	      if (dump_file && (dump_flags & TDF_DETAILS))
3108 		{
3109 		  fprintf (dump_file, "Folded FMA negation ");
3110 		  print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3111 		  fprintf (dump_file, "\n");
3112 		}
3113 	    }
3114 	}
3115 
3116       widen_mul_stats.fmas_inserted++;
3117     }
3118 }
3119 
3120 /* Data necessary to perform the actual transformation from a multiplication
3121    and an addition to an FMA after decision is taken it should be done and to
3122    then delete the multiplication statement from the function IL.  */
3123 
3124 struct fma_transformation_info
3125 {
3126   gimple *mul_stmt;
3127   tree mul_result;
3128   tree op1;
3129   tree op2;
3130 };
3131 
3132 /* Structure containing the current state of FMA deferring, i.e. whether we are
3133    deferring, whether to continue deferring, and all data necessary to come
3134    back and perform all deferred transformations.  */
3135 
3136 class fma_deferring_state
3137 {
3138 public:
3139   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3140      do any deferring.  */
3141 
fma_deferring_state(bool perform_deferring)3142   fma_deferring_state (bool perform_deferring)
3143     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3144       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3145 
3146   /* List of FMA candidates for which we the transformation has been determined
3147      possible but we at this point in BB analysis we do not consider them
3148      beneficial.  */
3149   auto_vec<fma_transformation_info, 8> m_candidates;
3150 
3151   /* Set of results of multiplication that are part of an already deferred FMA
3152      candidates.  */
3153   hash_set<tree> m_mul_result_set;
3154 
3155   /* The PHI that supposedly feeds back result of a FMA to another over loop
3156      boundary.  */
3157   gphi *m_initial_phi;
3158 
3159   /* Result of the last produced FMA candidate or NULL if there has not been
3160      one.  */
3161   tree m_last_result;
3162 
3163   /* If true, deferring might still be profitable.  If false, transform all
3164      candidates and no longer defer.  */
3165   bool m_deferring_p;
3166 };
3167 
3168 /* Transform all deferred FMA candidates and mark STATE as no longer
3169    deferring.  */
3170 
3171 static void
cancel_fma_deferring(fma_deferring_state * state)3172 cancel_fma_deferring (fma_deferring_state *state)
3173 {
3174   if (!state->m_deferring_p)
3175     return;
3176 
3177   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3178     {
3179       if (dump_file && (dump_flags & TDF_DETAILS))
3180 	fprintf (dump_file, "Generating deferred FMA\n");
3181 
3182       const fma_transformation_info &fti = state->m_candidates[i];
3183       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3184 
3185       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3186       gsi_remove (&gsi, true);
3187       release_defs (fti.mul_stmt);
3188     }
3189   state->m_deferring_p = false;
3190 }
3191 
3192 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3193    Otherwise return NULL.  */
3194 
3195 static gphi *
result_of_phi(tree op)3196 result_of_phi (tree op)
3197 {
3198   if (TREE_CODE (op) != SSA_NAME)
3199     return NULL;
3200 
3201   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3202 }
3203 
3204 /* After processing statements of a BB and recording STATE, return true if the
3205    initial phi is fed by the last FMA candidate result ore one such result from
3206    previously processed BBs marked in LAST_RESULT_SET.  */
3207 
3208 static bool
last_fma_candidate_feeds_initial_phi(fma_deferring_state * state,hash_set<tree> * last_result_set)3209 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3210 				      hash_set<tree> *last_result_set)
3211 {
3212   ssa_op_iter iter;
3213   use_operand_p use;
3214   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3215     {
3216       tree t = USE_FROM_PTR (use);
3217       if (t == state->m_last_result
3218 	  || last_result_set->contains (t))
3219 	return true;
3220     }
3221 
3222   return false;
3223 }
3224 
3225 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3226    with uses in additions and subtractions to form fused multiply-add
3227    operations.  Returns true if successful and MUL_STMT should be removed.
3228    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3229    on MUL_COND, otherwise it is unconditional.
3230 
3231    If STATE indicates that we are deferring FMA transformation, that means
3232    that we do not produce FMAs for basic blocks which look like:
3233 
3234     <bb 6>
3235     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3236     _65 = _14 * _16;
3237     accumulator_66 = _65 + accumulator_111;
3238 
3239   or its unrolled version, i.e. with several FMA candidates that feed result
3240   of one into the addend of another.  Instead, we add them to a list in STATE
3241   and if we later discover an FMA candidate that is not part of such a chain,
3242   we go back and perform all deferred past candidates.  */
3243 
3244 static bool
convert_mult_to_fma(gimple * mul_stmt,tree op1,tree op2,fma_deferring_state * state,tree mul_cond=NULL_TREE)3245 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3246 		     fma_deferring_state *state, tree mul_cond = NULL_TREE)
3247 {
3248   tree mul_result = gimple_get_lhs (mul_stmt);
3249   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3250      if the statement was left just for the side-effects.  */
3251   if (!mul_result)
3252     return false;
3253   tree type = TREE_TYPE (mul_result);
3254   gimple *use_stmt, *neguse_stmt;
3255   use_operand_p use_p;
3256   imm_use_iterator imm_iter;
3257 
3258   if (FLOAT_TYPE_P (type)
3259       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3260     return false;
3261 
3262   /* We don't want to do bitfield reduction ops.  */
3263   if (INTEGRAL_TYPE_P (type)
3264       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3265     return false;
3266 
3267   /* If the target doesn't support it, don't generate it.  We assume that
3268      if fma isn't available then fms, fnma or fnms are not either.  */
3269   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3270   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3271     return false;
3272 
3273   /* If the multiplication has zero uses, it is kept around probably because
3274      of -fnon-call-exceptions.  Don't optimize it away in that case,
3275      it is DCE job.  */
3276   if (has_zero_uses (mul_result))
3277     return false;
3278 
3279   bool check_defer
3280     = (state->m_deferring_p
3281        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3282 		    param_avoid_fma_max_bits));
3283   bool defer = check_defer;
3284   bool seen_negate_p = false;
3285   /* Make sure that the multiplication statement becomes dead after
3286      the transformation, thus that all uses are transformed to FMAs.
3287      This means we assume that an FMA operation has the same cost
3288      as an addition.  */
3289   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3290     {
3291       tree result = mul_result;
3292       bool negate_p = false;
3293 
3294       use_stmt = USE_STMT (use_p);
3295 
3296       if (is_gimple_debug (use_stmt))
3297 	continue;
3298 
3299       /* For now restrict this operations to single basic blocks.  In theory
3300 	 we would want to support sinking the multiplication in
3301 	 m = a*b;
3302 	 if ()
3303 	   ma = m + c;
3304 	 else
3305 	   d = m;
3306 	 to form a fma in the then block and sink the multiplication to the
3307 	 else block.  */
3308       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3309 	return false;
3310 
3311       /* A negate on the multiplication leads to FNMA.  */
3312       if (is_gimple_assign (use_stmt)
3313 	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3314 	{
3315 	  ssa_op_iter iter;
3316 	  use_operand_p usep;
3317 
3318 	  /* If (due to earlier missed optimizations) we have two
3319 	     negates of the same value, treat them as equivalent
3320 	     to a single negate with multiple uses.  */
3321 	  if (seen_negate_p)
3322 	    return false;
3323 
3324 	  result = gimple_assign_lhs (use_stmt);
3325 
3326 	  /* Make sure the negate statement becomes dead with this
3327 	     single transformation.  */
3328 	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
3329 			       &use_p, &neguse_stmt))
3330 	    return false;
3331 
3332 	  /* Make sure the multiplication isn't also used on that stmt.  */
3333 	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3334 	    if (USE_FROM_PTR (usep) == mul_result)
3335 	      return false;
3336 
3337 	  /* Re-validate.  */
3338 	  use_stmt = neguse_stmt;
3339 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3340 	    return false;
3341 
3342 	  negate_p = seen_negate_p = true;
3343 	}
3344 
3345       tree cond, else_value, ops[3];
3346       tree_code code;
3347       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3348 					      &else_value))
3349 	return false;
3350 
3351       switch (code)
3352 	{
3353 	case MINUS_EXPR:
3354 	  if (ops[1] == result)
3355 	    negate_p = !negate_p;
3356 	  break;
3357 	case PLUS_EXPR:
3358 	  break;
3359 	default:
3360 	  /* FMA can only be formed from PLUS and MINUS.  */
3361 	  return false;
3362 	}
3363 
3364       if (mul_cond && cond != mul_cond)
3365 	return false;
3366 
3367       if (cond)
3368 	{
3369 	  if (cond == result || else_value == result)
3370 	    return false;
3371 	  if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3372 	    return false;
3373 	}
3374 
3375       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3376 	 we'll visit later, we might be able to get a more profitable
3377 	 match with fnma.
3378 	 OTOH, if we don't, a negate / fma pair has likely lower latency
3379 	 that a mult / subtract pair.  */
3380       if (code == MINUS_EXPR
3381 	  && !negate_p
3382 	  && ops[0] == result
3383 	  && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3384 	  && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3385 	  && TREE_CODE (ops[1]) == SSA_NAME
3386 	  && has_single_use (ops[1]))
3387 	{
3388 	  gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3389 	  if (is_gimple_assign (stmt2)
3390 	      && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3391 	    return false;
3392 	}
3393 
3394       /* We can't handle a * b + a * b.  */
3395       if (ops[0] == ops[1])
3396 	return false;
3397       /* If deferring, make sure we are not looking at an instruction that
3398 	 wouldn't have existed if we were not.  */
3399       if (state->m_deferring_p
3400 	  && (state->m_mul_result_set.contains (ops[0])
3401 	      || state->m_mul_result_set.contains (ops[1])))
3402 	return false;
3403 
3404       if (check_defer)
3405 	{
3406 	  tree use_lhs = gimple_get_lhs (use_stmt);
3407 	  if (state->m_last_result)
3408 	    {
3409 	      if (ops[1] == state->m_last_result
3410 		  || ops[0] == state->m_last_result)
3411 		defer = true;
3412 	      else
3413 		defer = false;
3414 	    }
3415 	  else
3416 	    {
3417 	      gcc_checking_assert (!state->m_initial_phi);
3418 	      gphi *phi;
3419 	      if (ops[0] == result)
3420 		phi = result_of_phi (ops[1]);
3421 	      else
3422 		{
3423 		  gcc_assert (ops[1] == result);
3424 		  phi = result_of_phi (ops[0]);
3425 		}
3426 
3427 	      if (phi)
3428 		{
3429 		  state->m_initial_phi = phi;
3430 		  defer = true;
3431 		}
3432 	      else
3433 		defer = false;
3434 	    }
3435 
3436 	  state->m_last_result = use_lhs;
3437 	  check_defer = false;
3438 	}
3439       else
3440 	defer = false;
3441 
3442       /* While it is possible to validate whether or not the exact form that
3443 	 we've recognized is available in the backend, the assumption is that
3444 	 if the deferring logic above did not trigger, the transformation is
3445 	 never a loss.  For instance, suppose the target only has the plain FMA
3446 	 pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3447 	 MUL+SUB for FMA+NEG, which is still two operations.  Consider
3448          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3449 	 form the two NEGs are independent and could be run in parallel.  */
3450     }
3451 
3452   if (defer)
3453     {
3454       fma_transformation_info fti;
3455       fti.mul_stmt = mul_stmt;
3456       fti.mul_result = mul_result;
3457       fti.op1 = op1;
3458       fti.op2 = op2;
3459       state->m_candidates.safe_push (fti);
3460       state->m_mul_result_set.add (mul_result);
3461 
3462       if (dump_file && (dump_flags & TDF_DETAILS))
3463 	{
3464 	  fprintf (dump_file, "Deferred generating FMA for multiplication ");
3465 	  print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3466 	  fprintf (dump_file, "\n");
3467 	}
3468 
3469       return false;
3470     }
3471   else
3472     {
3473       if (state->m_deferring_p)
3474 	cancel_fma_deferring (state);
3475       convert_mult_to_fma_1 (mul_result, op1, op2);
3476       return true;
3477     }
3478 }
3479 
3480 
3481 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3482    a check for non-zero like:
3483    _1 = x_4(D) * y_5(D);
3484    *res_7(D) = _1;
3485    if (x_4(D) != 0)
3486      goto <bb 3>; [50.00%]
3487    else
3488      goto <bb 4>; [50.00%]
3489 
3490    <bb 3> [local count: 536870913]:
3491    _2 = _1 / x_4(D);
3492    _9 = _2 != y_5(D);
3493    _10 = (int) _9;
3494 
3495    <bb 4> [local count: 1073741824]:
3496    # iftmp.0_3 = PHI <_10(3), 0(2)>
3497    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3498    optimize the x_4(D) != 0 condition to 1.  */
3499 
3500 static void
maybe_optimize_guarding_check(vec<gimple * > & mul_stmts,gimple * cond_stmt,gimple * div_stmt,bool * cfg_changed)3501 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3502 			       gimple *div_stmt, bool *cfg_changed)
3503 {
3504   basic_block bb = gimple_bb (cond_stmt);
3505   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3506     return;
3507   edge pred_edge = single_pred_edge (bb);
3508   basic_block pred_bb = pred_edge->src;
3509   if (EDGE_COUNT (pred_bb->succs) != 2)
3510     return;
3511   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3512   edge other_succ_edge = NULL;
3513   if (gimple_code (cond_stmt) == GIMPLE_COND)
3514     {
3515       if (EDGE_COUNT (bb->succs) != 2)
3516 	return;
3517       other_succ_edge = EDGE_SUCC (bb, 0);
3518       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3519 	{
3520 	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3521 	    other_succ_edge = EDGE_SUCC (bb, 1);
3522 	}
3523       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3524 	other_succ_edge = EDGE_SUCC (bb, 0);
3525       if (other_edge->dest != other_succ_edge->dest)
3526 	return;
3527     }
3528   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3529     return;
3530   gimple *zero_cond = last_stmt (pred_bb);
3531   if (zero_cond == NULL
3532       || gimple_code (zero_cond) != GIMPLE_COND
3533       || (gimple_cond_code (zero_cond)
3534 	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3535       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3536     return;
3537   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3538   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3539     return;
3540   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3541     {
3542       /* Allow the divisor to be result of a same precision cast
3543 	 from zero_cond_lhs.  */
3544       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3545       if (TREE_CODE (rhs2) != SSA_NAME)
3546 	return;
3547       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3548       if (!gimple_assign_cast_p (g)
3549 	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3550 	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3551 	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3552 	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
3553 	return;
3554     }
3555   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3556   mul_stmts.quick_push (div_stmt);
3557   if (is_gimple_debug (gsi_stmt (gsi)))
3558     gsi_next_nondebug (&gsi);
3559   unsigned cast_count = 0;
3560   while (gsi_stmt (gsi) != cond_stmt)
3561     {
3562       /* If original mul_stmt has a single use, allow it in the same bb,
3563 	 we are looking then just at __builtin_mul_overflow_p.
3564 	 Though, in that case the original mul_stmt will be replaced
3565 	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3566       gimple *mul_stmt;
3567       unsigned int i;
3568       bool ok = false;
3569       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3570 	{
3571 	  if (gsi_stmt (gsi) == mul_stmt)
3572 	    {
3573 	      ok = true;
3574 	      break;
3575 	    }
3576 	}
3577       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3578 	ok = true;
3579       if (!ok)
3580 	return;
3581       gsi_next_nondebug (&gsi);
3582     }
3583   if (gimple_code (cond_stmt) == GIMPLE_COND)
3584     {
3585       basic_block succ_bb = other_edge->dest;
3586       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3587 	   gsi_next (&gpi))
3588 	{
3589 	  gphi *phi = gpi.phi ();
3590 	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3591 	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3592 	  if (!operand_equal_p (v1, v2, 0))
3593 	    return;
3594 	}
3595     }
3596   else
3597     {
3598       tree lhs = gimple_assign_lhs (cond_stmt);
3599       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3600 	return;
3601       gsi_next_nondebug (&gsi);
3602       if (!gsi_end_p (gsi))
3603 	{
3604 	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3605 	    return;
3606 	  gimple *cast_stmt = gsi_stmt (gsi);
3607 	  if (!gimple_assign_cast_p (cast_stmt))
3608 	    return;
3609 	  tree new_lhs = gimple_assign_lhs (cast_stmt);
3610 	  gsi_next_nondebug (&gsi);
3611 	  if (!gsi_end_p (gsi)
3612 	      || !new_lhs
3613 	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3614 	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3615 	    return;
3616 	  lhs = new_lhs;
3617 	}
3618       edge succ_edge = single_succ_edge (bb);
3619       basic_block succ_bb = succ_edge->dest;
3620       gsi = gsi_start_phis (succ_bb);
3621       if (gsi_end_p (gsi))
3622 	return;
3623       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3624       gsi_next (&gsi);
3625       if (!gsi_end_p (gsi))
3626 	return;
3627       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3628 	return;
3629       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3630       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3631 	{
3632 	  tree cond = gimple_assign_rhs1 (cond_stmt);
3633 	  if (TREE_CODE (cond) == NE_EXPR)
3634 	    {
3635 	      if (!operand_equal_p (other_val,
3636 				    gimple_assign_rhs3 (cond_stmt), 0))
3637 		return;
3638 	    }
3639 	  else if (!operand_equal_p (other_val,
3640 				     gimple_assign_rhs2 (cond_stmt), 0))
3641 	    return;
3642 	}
3643       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3644 	{
3645 	  if (!integer_zerop (other_val))
3646 	    return;
3647 	}
3648       else if (!integer_onep (other_val))
3649 	return;
3650     }
3651   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3652   if (pred_edge->flags & EDGE_TRUE_VALUE)
3653     gimple_cond_make_true (zero_gcond);
3654   else
3655     gimple_cond_make_false (zero_gcond);
3656   update_stmt (zero_cond);
3657   *cfg_changed = true;
3658 }
3659 
3660 /* Helper function for arith_overflow_check_p.  Return true
3661    if VAL1 is equal to VAL2 cast to corresponding integral type
3662    with other signedness or vice versa.  */
3663 
3664 static bool
arith_cast_equal_p(tree val1,tree val2)3665 arith_cast_equal_p (tree val1, tree val2)
3666 {
3667   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3668     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3669   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3670     return false;
3671   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3672       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3673     return true;
3674   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3675       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3676     return true;
3677   return false;
3678 }
3679 
3680 /* Helper function of match_arith_overflow.  Return 1
3681    if USE_STMT is unsigned overflow check ovf != 0 for
3682    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3683    and 0 otherwise.  */
3684 
3685 static int
arith_overflow_check_p(gimple * stmt,gimple * cast_stmt,gimple * & use_stmt,tree maxval,tree * other)3686 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3687 			tree maxval, tree *other)
3688 {
3689   enum tree_code ccode = ERROR_MARK;
3690   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3691   enum tree_code code = gimple_assign_rhs_code (stmt);
3692   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3693   tree rhs1 = gimple_assign_rhs1 (stmt);
3694   tree rhs2 = gimple_assign_rhs2 (stmt);
3695   tree multop = NULL_TREE, divlhs = NULL_TREE;
3696   gimple *cur_use_stmt = use_stmt;
3697 
3698   if (code == MULT_EXPR)
3699     {
3700       if (!is_gimple_assign (use_stmt))
3701 	return 0;
3702       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3703 	return 0;
3704       if (gimple_assign_rhs1 (use_stmt) != lhs)
3705 	return 0;
3706       if (cast_stmt)
3707 	{
3708 	  if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3709 	    multop = rhs2;
3710 	  else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3711 	    multop = rhs1;
3712 	  else
3713 	    return 0;
3714 	}
3715       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3716 	multop = rhs2;
3717       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3718 	multop = rhs1;
3719       else
3720 	return 0;
3721       if (stmt_ends_bb_p (use_stmt))
3722 	return 0;
3723       divlhs = gimple_assign_lhs (use_stmt);
3724       if (!divlhs)
3725 	return 0;
3726       use_operand_p use;
3727       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3728 	return 0;
3729     }
3730   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3731     {
3732       ccode = gimple_cond_code (cur_use_stmt);
3733       crhs1 = gimple_cond_lhs (cur_use_stmt);
3734       crhs2 = gimple_cond_rhs (cur_use_stmt);
3735     }
3736   else if (is_gimple_assign (cur_use_stmt))
3737     {
3738       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3739 	{
3740 	  ccode = gimple_assign_rhs_code (cur_use_stmt);
3741 	  crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3742 	  crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3743 	}
3744       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3745 	{
3746 	  tree cond = gimple_assign_rhs1 (cur_use_stmt);
3747 	  if (COMPARISON_CLASS_P (cond))
3748 	    {
3749 	      ccode = TREE_CODE (cond);
3750 	      crhs1 = TREE_OPERAND (cond, 0);
3751 	      crhs2 = TREE_OPERAND (cond, 1);
3752 	    }
3753 	  else
3754 	    return 0;
3755 	}
3756       else
3757 	return 0;
3758     }
3759   else
3760     return 0;
3761 
3762   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3763     return 0;
3764 
3765   switch (ccode)
3766     {
3767     case GT_EXPR:
3768     case LE_EXPR:
3769       if (maxval)
3770 	{
3771 	  /* r = a + b; r > maxval or r <= maxval  */
3772 	  if (crhs1 == lhs
3773 	      && TREE_CODE (crhs2) == INTEGER_CST
3774 	      && tree_int_cst_equal (crhs2, maxval))
3775 	    return ccode == GT_EXPR ? 1 : -1;
3776 	  break;
3777 	}
3778       /* r = a - b; r > a or r <= a
3779 	 r = a + b; a > r or a <= r or b > r or b <= r.  */
3780       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3781 	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3782 	      && crhs2 == lhs))
3783 	return ccode == GT_EXPR ? 1 : -1;
3784       /* r = ~a; b > r or b <= r.  */
3785       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3786 	{
3787 	  if (other)
3788 	    *other = crhs1;
3789 	  return ccode == GT_EXPR ? 1 : -1;
3790 	}
3791       break;
3792     case LT_EXPR:
3793     case GE_EXPR:
3794       if (maxval)
3795 	break;
3796       /* r = a - b; a < r or a >= r
3797 	 r = a + b; r < a or r >= a or r < b or r >= b.  */
3798       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3799 	  || (code == PLUS_EXPR && crhs1 == lhs
3800 	      && (crhs2 == rhs1 || crhs2 == rhs2)))
3801 	return ccode == LT_EXPR ? 1 : -1;
3802       /* r = ~a; r < b or r >= b.  */
3803       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3804 	{
3805 	  if (other)
3806 	    *other = crhs2;
3807 	  return ccode == LT_EXPR ? 1 : -1;
3808 	}
3809       break;
3810     case EQ_EXPR:
3811     case NE_EXPR:
3812       /* r = a * b; _1 = r / a; _1 == b
3813 	 r = a * b; _1 = r / b; _1 == a
3814 	 r = a * b; _1 = r / a; _1 != b
3815 	 r = a * b; _1 = r / b; _1 != a.  */
3816       if (code == MULT_EXPR)
3817 	{
3818 	  if (cast_stmt)
3819 	    {
3820 	      if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3821 		  || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3822 		{
3823 		  use_stmt = cur_use_stmt;
3824 		  return ccode == NE_EXPR ? 1 : -1;
3825 		}
3826 	    }
3827 	  else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3828 		   || (crhs2 == divlhs && crhs1 == multop))
3829 	    {
3830 	      use_stmt = cur_use_stmt;
3831 	      return ccode == NE_EXPR ? 1 : -1;
3832 	    }
3833 	}
3834       break;
3835     default:
3836       break;
3837     }
3838   return 0;
3839 }
3840 
3841 /* Recognize for unsigned x
3842    x = y - z;
3843    if (x > y)
3844    where there are other uses of x and replace it with
3845    _7 = .SUB_OVERFLOW (y, z);
3846    x = REALPART_EXPR <_7>;
3847    _8 = IMAGPART_EXPR <_7>;
3848    if (_8)
3849    and similarly for addition.
3850 
3851    Also recognize:
3852    yc = (type) y;
3853    zc = (type) z;
3854    x = yc + zc;
3855    if (x > max)
3856    where y and z have unsigned types with maximum max
3857    and there are other uses of x and all of those cast x
3858    back to that unsigned type and again replace it with
3859    _7 = .ADD_OVERFLOW (y, z);
3860    _9 = REALPART_EXPR <_7>;
3861    _8 = IMAGPART_EXPR <_7>;
3862    if (_8)
3863    and replace (utype) x with _9.
3864 
3865    Also recognize:
3866    x = ~z;
3867    if (y > x)
3868    and replace it with
3869    _7 = .ADD_OVERFLOW (y, z);
3870    _8 = IMAGPART_EXPR <_7>;
3871    if (_8)
3872 
3873    And also recognize:
3874    z = x * y;
3875    if (x != 0)
3876      goto <bb 3>; [50.00%]
3877    else
3878      goto <bb 4>; [50.00%]
3879 
3880    <bb 3> [local count: 536870913]:
3881    _2 = z / x;
3882    _9 = _2 != y;
3883    _10 = (int) _9;
3884 
3885    <bb 4> [local count: 1073741824]:
3886    # iftmp.0_3 = PHI <_10(3), 0(2)>
3887    and replace it with
3888    _7 = .MUL_OVERFLOW (x, y);
3889    z = IMAGPART_EXPR <_7>;
3890    _8 = IMAGPART_EXPR <_7>;
3891    _9 = _8 != 0;
3892    iftmp.0_3 = (int) _9;  */
3893 
3894 static bool
match_arith_overflow(gimple_stmt_iterator * gsi,gimple * stmt,enum tree_code code,bool * cfg_changed)3895 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3896 		      enum tree_code code, bool *cfg_changed)
3897 {
3898   tree lhs = gimple_assign_lhs (stmt);
3899   tree type = TREE_TYPE (lhs);
3900   use_operand_p use_p;
3901   imm_use_iterator iter;
3902   bool use_seen = false;
3903   bool ovf_use_seen = false;
3904   gimple *use_stmt;
3905   gimple *add_stmt = NULL;
3906   bool add_first = false;
3907   gimple *cond_stmt = NULL;
3908   gimple *cast_stmt = NULL;
3909   tree cast_lhs = NULL_TREE;
3910 
3911   gcc_checking_assert (code == PLUS_EXPR
3912 		       || code == MINUS_EXPR
3913 		       || code == MULT_EXPR
3914 		       || code == BIT_NOT_EXPR);
3915   if (!INTEGRAL_TYPE_P (type)
3916       || !TYPE_UNSIGNED (type)
3917       || has_zero_uses (lhs)
3918       || (code != PLUS_EXPR
3919 	  && code != MULT_EXPR
3920 	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3921 			    TYPE_MODE (type)) == CODE_FOR_nothing))
3922     return false;
3923 
3924   tree rhs1 = gimple_assign_rhs1 (stmt);
3925   tree rhs2 = gimple_assign_rhs2 (stmt);
3926   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3927     {
3928       use_stmt = USE_STMT (use_p);
3929       if (is_gimple_debug (use_stmt))
3930 	continue;
3931 
3932       tree other = NULL_TREE;
3933       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3934 	{
3935 	  if (code == BIT_NOT_EXPR)
3936 	    {
3937 	      gcc_assert (other);
3938 	      if (TREE_CODE (other) != SSA_NAME)
3939 		return false;
3940 	      if (rhs2 == NULL)
3941 		rhs2 = other;
3942 	      else
3943 		return false;
3944 	      cond_stmt = use_stmt;
3945 	    }
3946 	  ovf_use_seen = true;
3947 	}
3948       else
3949 	{
3950 	  use_seen = true;
3951 	  if (code == MULT_EXPR
3952 	      && cast_stmt == NULL
3953 	      && gimple_assign_cast_p (use_stmt))
3954 	    {
3955 	      cast_lhs = gimple_assign_lhs (use_stmt);
3956 	      if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3957 		  && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3958 		  && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3959 		      == TYPE_PRECISION (TREE_TYPE (lhs))))
3960 		cast_stmt = use_stmt;
3961 	      else
3962 		cast_lhs = NULL_TREE;
3963 	    }
3964 	}
3965       if (ovf_use_seen && use_seen)
3966 	break;
3967     }
3968 
3969   if (!ovf_use_seen
3970       && code == MULT_EXPR
3971       && cast_stmt)
3972     {
3973       if (TREE_CODE (rhs1) != SSA_NAME
3974 	  || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3975 	return false;
3976       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3977 	{
3978 	  use_stmt = USE_STMT (use_p);
3979 	  if (is_gimple_debug (use_stmt))
3980 	    continue;
3981 
3982 	  if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3983 				      NULL_TREE, NULL))
3984 	    ovf_use_seen = true;
3985 	}
3986     }
3987   else
3988     {
3989       cast_stmt = NULL;
3990       cast_lhs = NULL_TREE;
3991     }
3992 
3993   tree maxval = NULL_TREE;
3994   if (!ovf_use_seen
3995       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3996       || (code == PLUS_EXPR
3997 	  && optab_handler (uaddv4_optab,
3998 			    TYPE_MODE (type)) == CODE_FOR_nothing)
3999       || (code == MULT_EXPR
4000 	  && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4001 			    TYPE_MODE (type)) == CODE_FOR_nothing))
4002     {
4003       if (code != PLUS_EXPR)
4004 	return false;
4005       if (TREE_CODE (rhs1) != SSA_NAME
4006 	  || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4007 	return false;
4008       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4009       tree type1 = TREE_TYPE (rhs1);
4010       if (!INTEGRAL_TYPE_P (type1)
4011 	  || !TYPE_UNSIGNED (type1)
4012 	  || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4013 	  || (TYPE_PRECISION (type1)
4014 	      != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4015 	return false;
4016       if (TREE_CODE (rhs2) == INTEGER_CST)
4017 	{
4018 	  if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4019 	  			    TYPE_PRECISION (type1),
4020 				    UNSIGNED), 0))
4021 	    return false;
4022 	  rhs2 = fold_convert (type1, rhs2);
4023 	}
4024       else
4025 	{
4026 	  if (TREE_CODE (rhs2) != SSA_NAME
4027 	      || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4028 	    return false;
4029 	  rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4030 	  tree type2 = TREE_TYPE (rhs2);
4031 	  if (!INTEGRAL_TYPE_P (type2)
4032 	      || !TYPE_UNSIGNED (type2)
4033 	      || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4034 	      || (TYPE_PRECISION (type2)
4035 		  != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4036 	    return false;
4037 	}
4038       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4039 	type = type1;
4040       else
4041 	type = TREE_TYPE (rhs2);
4042 
4043       if (TREE_CODE (type) != INTEGER_TYPE
4044 	  || optab_handler (uaddv4_optab,
4045 			    TYPE_MODE (type)) == CODE_FOR_nothing)
4046 	return false;
4047 
4048       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4049 						      UNSIGNED));
4050       ovf_use_seen = false;
4051       use_seen = false;
4052       basic_block use_bb = NULL;
4053       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4054 	{
4055 	  use_stmt = USE_STMT (use_p);
4056 	  if (is_gimple_debug (use_stmt))
4057 	    continue;
4058 
4059 	  if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4060 	    {
4061 	      ovf_use_seen = true;
4062 	      use_bb = gimple_bb (use_stmt);
4063 	    }
4064 	  else
4065 	    {
4066 	      if (!gimple_assign_cast_p (use_stmt)
4067 		  || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4068 		return false;
4069 	      tree use_lhs = gimple_assign_lhs (use_stmt);
4070 	      if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4071 		  || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4072 		      > TYPE_PRECISION (type)))
4073 		return false;
4074 	      use_seen = true;
4075 	    }
4076 	}
4077       if (!ovf_use_seen)
4078 	return false;
4079       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4080 	{
4081 	  if (!use_seen)
4082 	    return false;
4083 	  tree new_rhs1 = make_ssa_name (type);
4084 	  gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4085 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4086 	  rhs1 = new_rhs1;
4087 	}
4088       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4089 	{
4090 	  if (!use_seen)
4091 	    return false;
4092 	  tree new_rhs2 = make_ssa_name (type);
4093 	  gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4094 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4095 	  rhs2 = new_rhs2;
4096 	}
4097       else if (!use_seen)
4098 	{
4099 	  /* If there are no uses of the wider addition, check if
4100 	     forwprop has not created a narrower addition.
4101 	     Require it to be in the same bb as the overflow check.  */
4102 	  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4103 	    {
4104 	      use_stmt = USE_STMT (use_p);
4105 	      if (is_gimple_debug (use_stmt))
4106 		continue;
4107 
4108 	      if (use_stmt == stmt)
4109 		continue;
4110 
4111 	      if (!is_gimple_assign (use_stmt)
4112 		  || gimple_bb (use_stmt) != use_bb
4113 		  || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4114 		continue;
4115 
4116 	      if (gimple_assign_rhs1 (use_stmt) == rhs1)
4117 		{
4118 		  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4119 					rhs2, 0))
4120 		    continue;
4121 		}
4122 	      else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4123 		{
4124 		  if (gimple_assign_rhs1 (use_stmt) != rhs2)
4125 		    continue;
4126 		}
4127 	      else
4128 		continue;
4129 
4130 	      add_stmt = use_stmt;
4131 	      break;
4132 	    }
4133 	  if (add_stmt == NULL)
4134 	    return false;
4135 
4136 	  /* If stmt and add_stmt are in the same bb, we need to find out
4137 	     which one is earlier.  If they are in different bbs, we've
4138 	     checked add_stmt is in the same bb as one of the uses of the
4139 	     stmt lhs, so stmt needs to dominate add_stmt too.  */
4140 	  if (gimple_bb (stmt) == gimple_bb (add_stmt))
4141 	    {
4142 	      gimple_stmt_iterator gsif = *gsi;
4143 	      gimple_stmt_iterator gsib = *gsi;
4144 	      int i;
4145 	      /* Search both forward and backward from stmt and have a small
4146 		 upper bound.  */
4147 	      for (i = 0; i < 128; i++)
4148 		{
4149 		  if (!gsi_end_p (gsib))
4150 		    {
4151 		      gsi_prev_nondebug (&gsib);
4152 		      if (gsi_stmt (gsib) == add_stmt)
4153 			{
4154 			  add_first = true;
4155 			  break;
4156 			}
4157 		    }
4158 		  else if (gsi_end_p (gsif))
4159 		    break;
4160 		  if (!gsi_end_p (gsif))
4161 		    {
4162 		      gsi_next_nondebug (&gsif);
4163 		      if (gsi_stmt (gsif) == add_stmt)
4164 			break;
4165 		    }
4166 		}
4167 	      if (i == 128)
4168 		return false;
4169 	      if (add_first)
4170 		*gsi = gsi_for_stmt (add_stmt);
4171 	    }
4172 	}
4173     }
4174 
4175   if (code == BIT_NOT_EXPR)
4176     *gsi = gsi_for_stmt (cond_stmt);
4177 
4178   auto_vec<gimple *, 8> mul_stmts;
4179   if (code == MULT_EXPR && cast_stmt)
4180     {
4181       type = TREE_TYPE (cast_lhs);
4182       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4183       if (gimple_assign_cast_p (g)
4184 	  && useless_type_conversion_p (type,
4185 					TREE_TYPE (gimple_assign_rhs1 (g)))
4186 	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4187 	rhs1 = gimple_assign_rhs1 (g);
4188       else
4189 	{
4190 	  g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4191 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
4192 	  rhs1 = gimple_assign_lhs (g);
4193 	  mul_stmts.quick_push (g);
4194 	}
4195       if (TREE_CODE (rhs2) == INTEGER_CST)
4196 	rhs2 = fold_convert (type, rhs2);
4197       else
4198 	{
4199 	  g = SSA_NAME_DEF_STMT (rhs2);
4200 	  if (gimple_assign_cast_p (g)
4201 	      && useless_type_conversion_p (type,
4202 					    TREE_TYPE (gimple_assign_rhs1 (g)))
4203 	      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4204 	    rhs2 = gimple_assign_rhs1 (g);
4205 	  else
4206 	    {
4207 	      g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4208 	      gsi_insert_before (gsi, g, GSI_SAME_STMT);
4209 	      rhs2 = gimple_assign_lhs (g);
4210 	      mul_stmts.quick_push (g);
4211 	    }
4212 	}
4213     }
4214   tree ctype = build_complex_type (type);
4215   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4216 					 ? IFN_MUL_OVERFLOW
4217 					 : code != MINUS_EXPR
4218 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4219 					 2, rhs1, rhs2);
4220   tree ctmp = make_ssa_name (ctype);
4221   gimple_call_set_lhs (g, ctmp);
4222   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4223   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4224   gassign *g2;
4225   if (code != BIT_NOT_EXPR)
4226     {
4227       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4228 				build1 (REALPART_EXPR, type, ctmp));
4229       if (maxval || cast_stmt)
4230 	{
4231 	  gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4232 	  if (add_first)
4233 	    *gsi = gsi_for_stmt (stmt);
4234 	}
4235       else
4236 	gsi_replace (gsi, g2, true);
4237       if (code == MULT_EXPR)
4238 	{
4239 	  mul_stmts.quick_push (g);
4240 	  mul_stmts.quick_push (g2);
4241 	  if (cast_stmt)
4242 	    {
4243 	      g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4244 	      gsi_replace (gsi, g2, true);
4245 	      mul_stmts.quick_push (g2);
4246 	    }
4247 	}
4248     }
4249   tree ovf = make_ssa_name (type);
4250   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4251 			    build1 (IMAGPART_EXPR, type, ctmp));
4252   if (code != BIT_NOT_EXPR)
4253     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4254   else
4255     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4256   if (code == MULT_EXPR)
4257     mul_stmts.quick_push (g2);
4258 
4259   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4260     {
4261       if (is_gimple_debug (use_stmt))
4262 	continue;
4263 
4264       gimple *orig_use_stmt = use_stmt;
4265       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4266 					    maxval, NULL);
4267       if (ovf_use == 0)
4268 	{
4269 	  gcc_assert (code != BIT_NOT_EXPR);
4270 	  if (maxval)
4271 	    {
4272 	      tree use_lhs = gimple_assign_lhs (use_stmt);
4273 	      gimple_assign_set_rhs1 (use_stmt, new_lhs);
4274 	      if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4275 					     TREE_TYPE (new_lhs)))
4276 		gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4277 	      update_stmt (use_stmt);
4278 	    }
4279 	  continue;
4280 	}
4281       if (gimple_code (use_stmt) == GIMPLE_COND)
4282 	{
4283 	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
4284 	  gimple_cond_set_lhs (cond_stmt, ovf);
4285 	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4286 	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4287 	}
4288       else
4289 	{
4290 	  gcc_checking_assert (is_gimple_assign (use_stmt));
4291 	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4292 	    {
4293 	      gimple_assign_set_rhs1 (use_stmt, ovf);
4294 	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4295 	      gimple_assign_set_rhs_code (use_stmt,
4296 					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4297 	    }
4298 	  else
4299 	    {
4300 	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4301 				   == COND_EXPR);
4302 	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4303 				  boolean_type_node, ovf,
4304 				  build_int_cst (type, 0));
4305 	      gimple_assign_set_rhs1 (use_stmt, cond);
4306 	    }
4307 	}
4308       update_stmt (use_stmt);
4309       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4310 	{
4311 	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4312 	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4313 					 cfg_changed);
4314 	  gsi_remove (&gsi2, true);
4315 	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4316 	}
4317     }
4318   if (maxval)
4319     {
4320       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4321       gsi_remove (&gsi2, true);
4322       if (add_stmt)
4323 	{
4324 	  gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4325 					   new_lhs);
4326 	  gsi2 = gsi_for_stmt (add_stmt);
4327 	  gsi_replace (&gsi2, g, true);
4328 	}
4329     }
4330   else if (code == BIT_NOT_EXPR)
4331     {
4332       *gsi = gsi_for_stmt (stmt);
4333       gsi_remove (gsi, true);
4334       release_ssa_name (lhs);
4335       return true;
4336     }
4337   return false;
4338 }
4339 
4340 /* Return true if target has support for divmod.  */
4341 
4342 static bool
target_supports_divmod_p(optab divmod_optab,optab div_optab,machine_mode mode)4343 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4344 {
4345   /* If target supports hardware divmod insn, use it for divmod.  */
4346   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4347     return true;
4348 
4349   /* Check if libfunc for divmod is available.  */
4350   rtx libfunc = optab_libfunc (divmod_optab, mode);
4351   if (libfunc != NULL_RTX)
4352     {
4353       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4354 	 we don't want to use the libfunc even if it exists for given mode.  */
4355       machine_mode div_mode;
4356       FOR_EACH_MODE_FROM (div_mode, mode)
4357 	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4358 	  return false;
4359 
4360       return targetm.expand_divmod_libfunc != NULL;
4361     }
4362 
4363   return false;
4364 }
4365 
4366 /* Check if stmt is candidate for divmod transform.  */
4367 
4368 static bool
divmod_candidate_p(gassign * stmt)4369 divmod_candidate_p (gassign *stmt)
4370 {
4371   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4372   machine_mode mode = TYPE_MODE (type);
4373   optab divmod_optab, div_optab;
4374 
4375   if (TYPE_UNSIGNED (type))
4376     {
4377       divmod_optab = udivmod_optab;
4378       div_optab = udiv_optab;
4379     }
4380   else
4381     {
4382       divmod_optab = sdivmod_optab;
4383       div_optab = sdiv_optab;
4384     }
4385 
4386   tree op1 = gimple_assign_rhs1 (stmt);
4387   tree op2 = gimple_assign_rhs2 (stmt);
4388 
4389   /* Disable the transform if either is a constant, since division-by-constant
4390      may have specialized expansion.  */
4391   if (CONSTANT_CLASS_P (op1))
4392     return false;
4393 
4394   if (CONSTANT_CLASS_P (op2))
4395     {
4396       if (integer_pow2p (op2))
4397 	return false;
4398 
4399       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4400 	  && TYPE_PRECISION (type) <= BITS_PER_WORD)
4401 	return false;
4402 
4403       /* If the divisor is not power of 2 and the precision wider than
4404 	 HWI, expand_divmod punts on that, so in that case it is better
4405 	 to use divmod optab or libfunc.  Similarly if choose_multiplier
4406 	 might need pre/post shifts of BITS_PER_WORD or more.  */
4407     }
4408 
4409   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4410      expand using the [su]divv optabs.  */
4411   if (TYPE_OVERFLOW_TRAPS (type))
4412     return false;
4413 
4414   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4415     return false;
4416 
4417   return true;
4418 }
4419 
4420 /* This function looks for:
4421    t1 = a TRUNC_DIV_EXPR b;
4422    t2 = a TRUNC_MOD_EXPR b;
4423    and transforms it to the following sequence:
4424    complex_tmp = DIVMOD (a, b);
4425    t1 = REALPART_EXPR(a);
4426    t2 = IMAGPART_EXPR(b);
4427    For conditions enabling the transform see divmod_candidate_p().
4428 
4429    The pass has three parts:
4430    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4431       other trunc_div_expr and trunc_mod_expr stmts.
4432    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4433       to stmts vector.
4434    3) Insert DIVMOD call just before top_stmt and update entries in
4435       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4436       IMAGPART_EXPR for mod).  */
4437 
4438 static bool
convert_to_divmod(gassign * stmt)4439 convert_to_divmod (gassign *stmt)
4440 {
4441   if (stmt_can_throw_internal (cfun, stmt)
4442       || !divmod_candidate_p (stmt))
4443     return false;
4444 
4445   tree op1 = gimple_assign_rhs1 (stmt);
4446   tree op2 = gimple_assign_rhs2 (stmt);
4447 
4448   imm_use_iterator use_iter;
4449   gimple *use_stmt;
4450   auto_vec<gimple *> stmts;
4451 
4452   gimple *top_stmt = stmt;
4453   basic_block top_bb = gimple_bb (stmt);
4454 
4455   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4456      at-least stmt and possibly other trunc_div/trunc_mod stmts
4457      having same operands as stmt.  */
4458 
4459   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4460     {
4461       if (is_gimple_assign (use_stmt)
4462 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4463 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4464 	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4465 	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4466 	{
4467 	  if (stmt_can_throw_internal (cfun, use_stmt))
4468 	    continue;
4469 
4470 	  basic_block bb = gimple_bb (use_stmt);
4471 
4472 	  if (bb == top_bb)
4473 	    {
4474 	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4475 		top_stmt = use_stmt;
4476 	    }
4477 	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4478 	    {
4479 	      top_bb = bb;
4480 	      top_stmt = use_stmt;
4481 	    }
4482 	}
4483     }
4484 
4485   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4486   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4487 
4488   stmts.safe_push (top_stmt);
4489   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4490 
4491   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4492      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4493      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4494      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4495 
4496   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4497     {
4498       if (is_gimple_assign (use_stmt)
4499 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4500 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4501 	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4502 	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4503 	{
4504 	  if (use_stmt == top_stmt
4505 	      || stmt_can_throw_internal (cfun, use_stmt)
4506 	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4507 	    continue;
4508 
4509 	  stmts.safe_push (use_stmt);
4510 	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4511 	    div_seen = true;
4512 	}
4513     }
4514 
4515   if (!div_seen)
4516     return false;
4517 
4518   /* Part 3: Create libcall to internal fn DIVMOD:
4519      divmod_tmp = DIVMOD (op1, op2).  */
4520 
4521   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4522   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4523 				 call_stmt, "divmod_tmp");
4524   gimple_call_set_lhs (call_stmt, res);
4525   /* We rejected throwing statements above.  */
4526   gimple_call_set_nothrow (call_stmt, true);
4527 
4528   /* Insert the call before top_stmt.  */
4529   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4530   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4531 
4532   widen_mul_stats.divmod_calls_inserted++;
4533 
4534   /* Update all statements in stmts vector:
4535      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4536      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4537 
4538   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4539     {
4540       tree new_rhs;
4541 
4542       switch (gimple_assign_rhs_code (use_stmt))
4543 	{
4544 	  case TRUNC_DIV_EXPR:
4545 	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4546 	    break;
4547 
4548 	  case TRUNC_MOD_EXPR:
4549 	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4550 	    break;
4551 
4552 	  default:
4553 	    gcc_unreachable ();
4554 	}
4555 
4556       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4557       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4558       update_stmt (use_stmt);
4559     }
4560 
4561   return true;
4562 }
4563 
4564 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4565    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4566    value is true iff we converted the statement.  */
4567 
4568 static bool
convert_mult_to_highpart(gassign * stmt,gimple_stmt_iterator * gsi)4569 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4570 {
4571   tree lhs = gimple_assign_lhs (stmt);
4572   tree stype = TREE_TYPE (lhs);
4573   tree sarg0 = gimple_assign_rhs1 (stmt);
4574   tree sarg1 = gimple_assign_rhs2 (stmt);
4575 
4576   if (TREE_CODE (stype) != INTEGER_TYPE
4577       || TREE_CODE (sarg1) != INTEGER_CST
4578       || TREE_CODE (sarg0) != SSA_NAME
4579       || !tree_fits_uhwi_p (sarg1)
4580       || !has_single_use (sarg0))
4581     return false;
4582 
4583   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4584   if (!def)
4585     return false;
4586 
4587   enum tree_code mcode = gimple_assign_rhs_code (def);
4588   if (mcode == NOP_EXPR)
4589     {
4590       tree tmp = gimple_assign_rhs1 (def);
4591       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4592 	return false;
4593       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4594       if (!def)
4595 	return false;
4596       mcode = gimple_assign_rhs_code (def);
4597     }
4598 
4599   if (mcode != WIDEN_MULT_EXPR
4600       || gimple_bb (def) != gimple_bb (stmt))
4601     return false;
4602   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4603   if (TREE_CODE (mtype) != INTEGER_TYPE
4604       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4605     return false;
4606 
4607   tree mop1 = gimple_assign_rhs1 (def);
4608   tree mop2 = gimple_assign_rhs2 (def);
4609   tree optype = TREE_TYPE (mop1);
4610   bool unsignedp = TYPE_UNSIGNED (optype);
4611   unsigned int prec = TYPE_PRECISION (optype);
4612 
4613   if (unsignedp != TYPE_UNSIGNED (mtype)
4614       || TYPE_PRECISION (mtype) != 2 * prec)
4615     return false;
4616 
4617   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4618   if (bits < prec || bits >= 2 * prec)
4619     return false;
4620 
4621   /* For the time being, require operands to have the same sign.  */
4622   if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4623     return false;
4624 
4625   machine_mode mode = TYPE_MODE (optype);
4626   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4627   if (optab_handler (tab, mode) == CODE_FOR_nothing)
4628     return false;
4629 
4630   location_t loc = gimple_location (stmt);
4631   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4632 					   MULT_HIGHPART_EXPR, mop1, mop2);
4633   tree highpart2 = highpart1;
4634   tree ntype = optype;
4635 
4636   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4637     {
4638       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4639 				    : signed_type_for (optype);
4640       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4641     }
4642   if (bits > prec)
4643     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4644 					RSHIFT_EXPR, highpart2,
4645 					build_int_cst (ntype, bits - prec));
4646 
4647   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4648   gsi_replace (gsi, new_stmt, true);
4649 
4650   widen_mul_stats.highpart_mults_inserted++;
4651   return true;
4652 }
4653 
4654 /* If target has spaceship<MODE>3 expander, pattern recognize
4655    <bb 2> [local count: 1073741824]:
4656    if (a_2(D) == b_3(D))
4657      goto <bb 6>; [34.00%]
4658    else
4659      goto <bb 3>; [66.00%]
4660 
4661    <bb 3> [local count: 708669601]:
4662    if (a_2(D) < b_3(D))
4663      goto <bb 6>; [1.04%]
4664    else
4665      goto <bb 4>; [98.96%]
4666 
4667    <bb 4> [local count: 701299439]:
4668    if (a_2(D) > b_3(D))
4669      goto <bb 5>; [48.89%]
4670    else
4671      goto <bb 6>; [51.11%]
4672 
4673    <bb 5> [local count: 342865295]:
4674 
4675    <bb 6> [local count: 1073741824]:
4676    and turn it into:
4677    <bb 2> [local count: 1073741824]:
4678    _1 = .SPACESHIP (a_2(D), b_3(D));
4679    if (_1 == 0)
4680      goto <bb 6>; [34.00%]
4681    else
4682      goto <bb 3>; [66.00%]
4683 
4684    <bb 3> [local count: 708669601]:
4685    if (_1 == -1)
4686      goto <bb 6>; [1.04%]
4687    else
4688      goto <bb 4>; [98.96%]
4689 
4690    <bb 4> [local count: 701299439]:
4691    if (_1 == 1)
4692      goto <bb 5>; [48.89%]
4693    else
4694      goto <bb 6>; [51.11%]
4695 
4696    <bb 5> [local count: 342865295]:
4697 
4698    <bb 6> [local count: 1073741824]:
4699    so that the backend can emit optimal comparison and
4700    conditional jump sequence.  */
4701 
4702 static void
optimize_spaceship(gimple * stmt)4703 optimize_spaceship (gimple *stmt)
4704 {
4705   enum tree_code code = gimple_cond_code (stmt);
4706   if (code != EQ_EXPR && code != NE_EXPR)
4707     return;
4708   tree arg1 = gimple_cond_lhs (stmt);
4709   tree arg2 = gimple_cond_rhs (stmt);
4710   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4711       || optab_handler (spaceship_optab,
4712 			TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4713       || operand_equal_p (arg1, arg2, 0))
4714     return;
4715 
4716   basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4717   edge em1 = NULL, e1 = NULL, e2 = NULL;
4718   bb1 = EDGE_SUCC (bb0, 1)->dest;
4719   if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4720     bb1 = EDGE_SUCC (bb0, 0)->dest;
4721 
4722   gimple *g = last_stmt (bb1);
4723   if (g == NULL
4724       || gimple_code (g) != GIMPLE_COND
4725       || !single_pred_p (bb1)
4726       || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4727 	  ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4728 	  : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4729 	     || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4730       || !cond_only_block_p (bb1))
4731     return;
4732 
4733   enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4734 			  ? LT_EXPR : GT_EXPR);
4735   switch (gimple_cond_code (g))
4736     {
4737     case LT_EXPR:
4738     case LE_EXPR:
4739       break;
4740     case GT_EXPR:
4741     case GE_EXPR:
4742       ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4743       break;
4744     default:
4745       return;
4746     }
4747 
4748   for (int i = 0; i < 2; ++i)
4749     {
4750       /* With NaNs, </<=/>/>= are false, so we need to look for the
4751 	 third comparison on the false edge from whatever non-equality
4752 	 comparison the second comparison is.  */
4753       if (HONOR_NANS (TREE_TYPE (arg1))
4754 	  && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4755 	continue;
4756 
4757       bb2 = EDGE_SUCC (bb1, i)->dest;
4758       g = last_stmt (bb2);
4759       if (g == NULL
4760 	  || gimple_code (g) != GIMPLE_COND
4761 	  || !single_pred_p (bb2)
4762 	  || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4763 	      ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4764 	      : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4765 		 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4766 	  || !cond_only_block_p (bb2)
4767 	  || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4768 	continue;
4769 
4770       enum tree_code ccode2
4771 	= (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4772       switch (gimple_cond_code (g))
4773 	{
4774 	case LT_EXPR:
4775 	case LE_EXPR:
4776 	  break;
4777 	case GT_EXPR:
4778 	case GE_EXPR:
4779 	  ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4780 	  break;
4781 	default:
4782 	  continue;
4783 	}
4784       if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4785 	continue;
4786 
4787       if ((ccode == LT_EXPR)
4788 	  ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4789 	{
4790 	  em1 = EDGE_SUCC (bb1, 1 - i);
4791 	  e1 = EDGE_SUCC (bb2, 0);
4792 	  e2 = EDGE_SUCC (bb2, 1);
4793 	  if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4794 	    std::swap (e1, e2);
4795 	}
4796       else
4797 	{
4798 	  e1 = EDGE_SUCC (bb1, 1 - i);
4799 	  em1 = EDGE_SUCC (bb2, 0);
4800 	  e2 = EDGE_SUCC (bb2, 1);
4801 	  if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4802 	    std::swap (em1, e2);
4803 	}
4804       break;
4805     }
4806 
4807   if (em1 == NULL)
4808     {
4809       if ((ccode == LT_EXPR)
4810 	  ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4811 	{
4812 	  em1 = EDGE_SUCC (bb1, 1);
4813 	  e1 = EDGE_SUCC (bb1, 0);
4814 	  e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4815 	}
4816       else
4817 	{
4818 	  em1 = EDGE_SUCC (bb1, 0);
4819 	  e1 = EDGE_SUCC (bb1, 1);
4820 	  e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4821 	}
4822     }
4823 
4824   g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4825   tree lhs = make_ssa_name (integer_type_node);
4826   gimple_call_set_lhs (g, lhs);
4827   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4828   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4829 
4830   gcond *cond = as_a <gcond *> (stmt);
4831   gimple_cond_set_lhs (cond, lhs);
4832   gimple_cond_set_rhs (cond, integer_zero_node);
4833   update_stmt (stmt);
4834 
4835   g = last_stmt (bb1);
4836   cond = as_a <gcond *> (g);
4837   gimple_cond_set_lhs (cond, lhs);
4838   if (em1->src == bb1 && e2 != em1)
4839     {
4840       gimple_cond_set_rhs (cond, integer_minus_one_node);
4841       gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4842 				  ? EQ_EXPR : NE_EXPR);
4843     }
4844   else
4845     {
4846       gcc_assert (e1->src == bb1 && e2 != e1);
4847       gimple_cond_set_rhs (cond, integer_one_node);
4848       gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4849 				  ? EQ_EXPR : NE_EXPR);
4850     }
4851   update_stmt (g);
4852 
4853   if (e2 != e1 && e2 != em1)
4854     {
4855       g = last_stmt (bb2);
4856       cond = as_a <gcond *> (g);
4857       gimple_cond_set_lhs (cond, lhs);
4858       if (em1->src == bb2)
4859 	gimple_cond_set_rhs (cond, integer_minus_one_node);
4860       else
4861 	{
4862 	  gcc_assert (e1->src == bb2);
4863 	  gimple_cond_set_rhs (cond, integer_one_node);
4864 	}
4865       gimple_cond_set_code (cond,
4866 			    (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4867       update_stmt (g);
4868     }
4869 
4870   wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4871   wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4872   set_range_info (lhs, VR_RANGE, wm1, w2);
4873 }
4874 
4875 
4876 /* Find integer multiplications where the operands are extended from
4877    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4878    or MULT_HIGHPART_EXPR where appropriate.  */
4879 
4880 namespace {
4881 
4882 const pass_data pass_data_optimize_widening_mul =
4883 {
4884   GIMPLE_PASS, /* type */
4885   "widening_mul", /* name */
4886   OPTGROUP_NONE, /* optinfo_flags */
4887   TV_TREE_WIDEN_MUL, /* tv_id */
4888   PROP_ssa, /* properties_required */
4889   0, /* properties_provided */
4890   0, /* properties_destroyed */
4891   0, /* todo_flags_start */
4892   TODO_update_ssa, /* todo_flags_finish */
4893 };
4894 
4895 class pass_optimize_widening_mul : public gimple_opt_pass
4896 {
4897 public:
pass_optimize_widening_mul(gcc::context * ctxt)4898   pass_optimize_widening_mul (gcc::context *ctxt)
4899     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4900   {}
4901 
4902   /* opt_pass methods: */
gate(function *)4903   virtual bool gate (function *)
4904     {
4905       return flag_expensive_optimizations && optimize;
4906     }
4907 
4908   virtual unsigned int execute (function *);
4909 
4910 }; // class pass_optimize_widening_mul
4911 
4912 /* Walker class to perform the transformation in reverse dominance order. */
4913 
4914 class math_opts_dom_walker : public dom_walker
4915 {
4916 public:
4917   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4918      if walking modidifes the CFG.  */
4919 
math_opts_dom_walker(bool * cfg_changed_p)4920   math_opts_dom_walker (bool *cfg_changed_p)
4921     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4922       m_cfg_changed_p (cfg_changed_p) {}
4923 
4924   /* The actual actions performed in the walk.  */
4925 
4926   virtual void after_dom_children (basic_block);
4927 
4928   /* Set of results of chains of multiply and add statement combinations that
4929      were not transformed into FMAs because of active deferring.  */
4930   hash_set<tree> m_last_result_set;
4931 
4932   /* Pointer to a flag of the user that needs to be set if CFG has been
4933      modified.  */
4934   bool *m_cfg_changed_p;
4935 };
4936 
4937 void
after_dom_children(basic_block bb)4938 math_opts_dom_walker::after_dom_children (basic_block bb)
4939 {
4940   gimple_stmt_iterator gsi;
4941 
4942   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4943 
4944   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4945     {
4946       gimple *stmt = gsi_stmt (gsi);
4947       enum tree_code code;
4948 
4949       if (is_gimple_assign (stmt))
4950 	{
4951 	  code = gimple_assign_rhs_code (stmt);
4952 	  switch (code)
4953 	    {
4954 	    case MULT_EXPR:
4955 	      if (!convert_mult_to_widen (stmt, &gsi)
4956 		  && !convert_expand_mult_copysign (stmt, &gsi)
4957 		  && convert_mult_to_fma (stmt,
4958 					  gimple_assign_rhs1 (stmt),
4959 					  gimple_assign_rhs2 (stmt),
4960 					  &fma_state))
4961 		{
4962 		  gsi_remove (&gsi, true);
4963 		  release_defs (stmt);
4964 		  continue;
4965 		}
4966 	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4967 	      break;
4968 
4969 	    case PLUS_EXPR:
4970 	    case MINUS_EXPR:
4971 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
4972 		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4973 	      break;
4974 
4975 	    case BIT_NOT_EXPR:
4976 	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4977 		continue;
4978 	      break;
4979 
4980 	    case TRUNC_MOD_EXPR:
4981 	      convert_to_divmod (as_a<gassign *> (stmt));
4982 	      break;
4983 
4984 	    case RSHIFT_EXPR:
4985 	      convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4986 	      break;
4987 
4988 	    default:;
4989 	    }
4990 	}
4991       else if (is_gimple_call (stmt))
4992 	{
4993 	  switch (gimple_call_combined_fn (stmt))
4994 	    {
4995 	    CASE_CFN_POW:
4996 	      if (gimple_call_lhs (stmt)
4997 		  && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4998 		  && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4999 				 &dconst2)
5000 		  && convert_mult_to_fma (stmt,
5001 					  gimple_call_arg (stmt, 0),
5002 					  gimple_call_arg (stmt, 0),
5003 					  &fma_state))
5004 		{
5005 		  unlink_stmt_vdef (stmt);
5006 		  if (gsi_remove (&gsi, true)
5007 		      && gimple_purge_dead_eh_edges (bb))
5008 		    *m_cfg_changed_p = true;
5009 		  release_defs (stmt);
5010 		  continue;
5011 		}
5012 	      break;
5013 
5014 	    case CFN_COND_MUL:
5015 	      if (convert_mult_to_fma (stmt,
5016 				       gimple_call_arg (stmt, 1),
5017 				       gimple_call_arg (stmt, 2),
5018 				       &fma_state,
5019 				       gimple_call_arg (stmt, 0)))
5020 
5021 		{
5022 		  gsi_remove (&gsi, true);
5023 		  release_defs (stmt);
5024 		  continue;
5025 		}
5026 	      break;
5027 
5028 	    case CFN_LAST:
5029 	      cancel_fma_deferring (&fma_state);
5030 	      break;
5031 
5032 	    default:
5033 	      break;
5034 	    }
5035 	}
5036       else if (gimple_code (stmt) == GIMPLE_COND)
5037 	optimize_spaceship (stmt);
5038       gsi_next (&gsi);
5039     }
5040   if (fma_state.m_deferring_p
5041       && fma_state.m_initial_phi)
5042     {
5043       gcc_checking_assert (fma_state.m_last_result);
5044       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5045 						 &m_last_result_set))
5046 	cancel_fma_deferring (&fma_state);
5047       else
5048 	m_last_result_set.add (fma_state.m_last_result);
5049     }
5050 }
5051 
5052 
5053 unsigned int
execute(function * fun)5054 pass_optimize_widening_mul::execute (function *fun)
5055 {
5056   bool cfg_changed = false;
5057 
5058   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5059   calculate_dominance_info (CDI_DOMINATORS);
5060   renumber_gimple_stmt_uids (cfun);
5061 
5062   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5063 
5064   statistics_counter_event (fun, "widening multiplications inserted",
5065 			    widen_mul_stats.widen_mults_inserted);
5066   statistics_counter_event (fun, "widening maccs inserted",
5067 			    widen_mul_stats.maccs_inserted);
5068   statistics_counter_event (fun, "fused multiply-adds inserted",
5069 			    widen_mul_stats.fmas_inserted);
5070   statistics_counter_event (fun, "divmod calls inserted",
5071 			    widen_mul_stats.divmod_calls_inserted);
5072   statistics_counter_event (fun, "highpart multiplications inserted",
5073 			    widen_mul_stats.highpart_mults_inserted);
5074 
5075   return cfg_changed ? TODO_cleanup_cfg : 0;
5076 }
5077 
5078 } // anon namespace
5079 
5080 gimple_opt_pass *
make_pass_optimize_widening_mul(gcc::context * ctxt)5081 make_pass_optimize_widening_mul (gcc::context *ctxt)
5082 {
5083   return new pass_optimize_widening_mul (ctxt);
5084 }
5085