xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-math-opts.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Global, SSA-based optimizations using mathematical identities.
2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21    operations.  These are common in sequences such as this one:
22 
23 	modulus = sqrt(x*x + y*y + z*z);
24 	x = x / modulus;
25 	y = y / modulus;
26 	z = z / modulus;
27 
28    that can be optimized to
29 
30 	modulus = sqrt(x*x + y*y + z*z);
31         rmodulus = 1.0 / modulus;
32 	x = x * rmodulus;
33 	y = y * rmodulus;
34 	z = z * rmodulus;
35 
36    We do this for loop invariant divisors, and with this pass whenever
37    we notice that a division has the same divisor multiple times.
38 
39    Of course, like in PRE, we don't insert a division if a dominator
40    already has one.  However, this cannot be done as an extension of
41    PRE for several reasons.
42 
43    First of all, with some experiments it was found out that the
44    transformation is not always useful if there are only two divisions
45    by the same divisor.  This is probably because modern processors
46    can pipeline the divisions; on older, in-order processors it should
47    still be effective to optimize two divisions by the same number.
48    We make this a param, and it shall be called N in the remainder of
49    this comment.
50 
51    Second, if trapping math is active, we have less freedom on where
52    to insert divisions: we can only do so in basic blocks that already
53    contain one.  (If divisions don't trap, instead, we can insert
54    divisions elsewhere, which will be in blocks that are common dominators
55    of those that have the division).
56 
57    We really don't want to compute the reciprocal unless a division will
58    be found.  To do this, we won't insert the division in a basic block
59    that has less than N divisions *post-dominating* it.
60 
61    The algorithm constructs a subset of the dominator tree, holding the
62    blocks containing the divisions and the common dominators to them,
63    and walk it twice.  The first walk is in post-order, and it annotates
64    each block with the number of divisions that post-dominate it: this
65    gives information on where divisions can be inserted profitably.
66    The second walk is in pre-order, and it inserts divisions as explained
67    above, and replaces divisions by multiplications.
68 
69    In the best case, the cost of the pass is O(n_statements).  In the
70    worst-case, the cost is due to creating the dominator tree subset,
71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
72    for n_statements / n_basic_blocks statements.  So, the amortized cost
73    of creating the dominator tree subset is O(n_basic_blocks) and the
74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
75 
76    More practically, the cost will be small because there are few
77    divisions, and they tend to be in the same basic block, so insert_bb
78    is called very few times.
79 
80    If we did this using domwalk.c, an efficient implementation would have
81    to work on all the variables in a single pass, because we could not
82    work on just a subset of the dominator tree, as we do now, and the
83    cost would also be something like O(n_statements * n_basic_blocks).
84    The data structures would be more complex in order to work on all the
85    variables in a single pass.  */
86 
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "params.h"
113 #include "internal-fn.h"
114 #include "case-cfn-macros.h"
115 #include "optabs-libfuncs.h"
116 #include "tree-eh.h"
117 #include "targhooks.h"
118 
119 /* This structure represents one basic block that either computes a
120    division, or is a common dominator for basic block that compute a
121    division.  */
122 struct occurrence {
123   /* The basic block represented by this structure.  */
124   basic_block bb;
125 
126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127      inserted in BB.  */
128   tree recip_def;
129 
130   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
131      was inserted in BB.  */
132   gimple *recip_def_stmt;
133 
134   /* Pointer to a list of "struct occurrence"s for blocks dominated
135      by BB.  */
136   struct occurrence *children;
137 
138   /* Pointer to the next "struct occurrence"s in the list of blocks
139      sharing a common dominator.  */
140   struct occurrence *next;
141 
142   /* The number of divisions that are in BB before compute_merit.  The
143      number of divisions that are in BB or post-dominate it after
144      compute_merit.  */
145   int num_divisions;
146 
147   /* True if the basic block has a division, false if it is a common
148      dominator for basic blocks that do.  If it is false and trapping
149      math is active, BB is not a candidate for inserting a reciprocal.  */
150   bool bb_has_division;
151 };
152 
153 static struct
154 {
155   /* Number of 1.0/X ops inserted.  */
156   int rdivs_inserted;
157 
158   /* Number of 1.0/FUNC ops inserted.  */
159   int rfuncs_inserted;
160 } reciprocal_stats;
161 
162 static struct
163 {
164   /* Number of cexpi calls inserted.  */
165   int inserted;
166 } sincos_stats;
167 
168 static struct
169 {
170   /* Number of hand-written 16-bit nop / bswaps found.  */
171   int found_16bit;
172 
173   /* Number of hand-written 32-bit nop / bswaps found.  */
174   int found_32bit;
175 
176   /* Number of hand-written 64-bit nop / bswaps found.  */
177   int found_64bit;
178 } nop_stats, bswap_stats;
179 
180 static struct
181 {
182   /* Number of widening multiplication ops inserted.  */
183   int widen_mults_inserted;
184 
185   /* Number of integer multiply-and-accumulate ops inserted.  */
186   int maccs_inserted;
187 
188   /* Number of fp fused multiply-add ops inserted.  */
189   int fmas_inserted;
190 
191   /* Number of divmod calls inserted.  */
192   int divmod_calls_inserted;
193 } widen_mul_stats;
194 
195 /* The instance of "struct occurrence" representing the highest
196    interesting block in the dominator tree.  */
197 static struct occurrence *occ_head;
198 
199 /* Allocation pool for getting instances of "struct occurrence".  */
200 static object_allocator<occurrence> *occ_pool;
201 
202 
203 
204 /* Allocate and return a new struct occurrence for basic block BB, and
205    whose children list is headed by CHILDREN.  */
206 static struct occurrence *
207 occ_new (basic_block bb, struct occurrence *children)
208 {
209   struct occurrence *occ;
210 
211   bb->aux = occ = occ_pool->allocate ();
212   memset (occ, 0, sizeof (struct occurrence));
213 
214   occ->bb = bb;
215   occ->children = children;
216   return occ;
217 }
218 
219 
220 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
221    list of "struct occurrence"s, one per basic block, having IDOM as
222    their common dominator.
223 
224    We try to insert NEW_OCC as deep as possible in the tree, and we also
225    insert any other block that is a common dominator for BB and one
226    block already in the tree.  */
227 
228 static void
229 insert_bb (struct occurrence *new_occ, basic_block idom,
230 	   struct occurrence **p_head)
231 {
232   struct occurrence *occ, **p_occ;
233 
234   for (p_occ = p_head; (occ = *p_occ) != NULL; )
235     {
236       basic_block bb = new_occ->bb, occ_bb = occ->bb;
237       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
238       if (dom == bb)
239 	{
240 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
241 	     from its list.  */
242 	  *p_occ = occ->next;
243 	  occ->next = new_occ->children;
244 	  new_occ->children = occ;
245 
246 	  /* Try the next block (it may as well be dominated by BB).  */
247 	}
248 
249       else if (dom == occ_bb)
250 	{
251 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
252 	  insert_bb (new_occ, dom, &occ->children);
253 	  return;
254 	}
255 
256       else if (dom != idom)
257 	{
258 	  gcc_assert (!dom->aux);
259 
260 	  /* There is a dominator between IDOM and BB, add it and make
261 	     two children out of NEW_OCC and OCC.  First, remove OCC from
262 	     its list.	*/
263 	  *p_occ = occ->next;
264 	  new_occ->next = occ;
265 	  occ->next = NULL;
266 
267 	  /* None of the previous blocks has DOM as a dominator: if we tail
268 	     recursed, we would reexamine them uselessly. Just switch BB with
269 	     DOM, and go on looking for blocks dominated by DOM.  */
270           new_occ = occ_new (dom, new_occ);
271 	}
272 
273       else
274 	{
275 	  /* Nothing special, go on with the next element.  */
276 	  p_occ = &occ->next;
277 	}
278     }
279 
280   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
281   new_occ->next = *p_head;
282   *p_head = new_occ;
283 }
284 
285 /* Register that we found a division in BB.  */
286 
287 static inline void
288 register_division_in (basic_block bb)
289 {
290   struct occurrence *occ;
291 
292   occ = (struct occurrence *) bb->aux;
293   if (!occ)
294     {
295       occ = occ_new (bb, NULL);
296       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
297     }
298 
299   occ->bb_has_division = true;
300   occ->num_divisions++;
301 }
302 
303 
304 /* Compute the number of divisions that postdominate each block in OCC and
305    its children.  */
306 
307 static void
308 compute_merit (struct occurrence *occ)
309 {
310   struct occurrence *occ_child;
311   basic_block dom = occ->bb;
312 
313   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
314     {
315       basic_block bb;
316       if (occ_child->children)
317         compute_merit (occ_child);
318 
319       if (flag_exceptions)
320 	bb = single_noncomplex_succ (dom);
321       else
322 	bb = dom;
323 
324       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
325         occ->num_divisions += occ_child->num_divisions;
326     }
327 }
328 
329 
330 /* Return whether USE_STMT is a floating-point division by DEF.  */
331 static inline bool
332 is_division_by (gimple *use_stmt, tree def)
333 {
334   return is_gimple_assign (use_stmt)
335 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
336 	 && gimple_assign_rhs2 (use_stmt) == def
337 	 /* Do not recognize x / x as valid division, as we are getting
338 	    confused later by replacing all immediate uses x in such
339 	    a stmt.  */
340 	 && gimple_assign_rhs1 (use_stmt) != def
341 	 && !stmt_can_throw_internal (use_stmt);
342 }
343 
344 /* Walk the subset of the dominator tree rooted at OCC, setting the
345    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
346    the given basic block.  The field may be left NULL, of course,
347    if it is not possible or profitable to do the optimization.
348 
349    DEF_BSI is an iterator pointing at the statement defining DEF.
350    If RECIP_DEF is set, a dominator already has a computation that can
351    be used.  */
352 
353 static void
354 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
355 		    tree def, tree recip_def, int threshold)
356 {
357   tree type;
358   gassign *new_stmt;
359   gimple_stmt_iterator gsi;
360   struct occurrence *occ_child;
361 
362   if (!recip_def
363       && (occ->bb_has_division || !flag_trapping_math)
364       && occ->num_divisions >= threshold)
365     {
366       /* Make a variable with the replacement and substitute it.  */
367       type = TREE_TYPE (def);
368       recip_def = create_tmp_reg (type, "reciptmp");
369       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
370 				      build_one_cst (type), def);
371 
372       if (occ->bb_has_division)
373         {
374           /* Case 1: insert before an existing division.  */
375           gsi = gsi_after_labels (occ->bb);
376           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
377 	    gsi_next (&gsi);
378 
379           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
380         }
381       else if (def_gsi && occ->bb == def_gsi->bb)
382         {
383           /* Case 2: insert right after the definition.  Note that this will
384 	     never happen if the definition statement can throw, because in
385 	     that case the sole successor of the statement's basic block will
386 	     dominate all the uses as well.  */
387           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
388         }
389       else
390         {
391           /* Case 3: insert in a basic block not containing defs/uses.  */
392           gsi = gsi_after_labels (occ->bb);
393           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
394         }
395 
396       reciprocal_stats.rdivs_inserted++;
397 
398       occ->recip_def_stmt = new_stmt;
399     }
400 
401   occ->recip_def = recip_def;
402   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
403     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
404 }
405 
406 
407 /* Replace the division at USE_P with a multiplication by the reciprocal, if
408    possible.  */
409 
410 static inline void
411 replace_reciprocal (use_operand_p use_p)
412 {
413   gimple *use_stmt = USE_STMT (use_p);
414   basic_block bb = gimple_bb (use_stmt);
415   struct occurrence *occ = (struct occurrence *) bb->aux;
416 
417   if (optimize_bb_for_speed_p (bb)
418       && occ->recip_def && use_stmt != occ->recip_def_stmt)
419     {
420       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
421       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
422       SET_USE (use_p, occ->recip_def);
423       fold_stmt_inplace (&gsi);
424       update_stmt (use_stmt);
425     }
426 }
427 
428 
429 /* Free OCC and return one more "struct occurrence" to be freed.  */
430 
431 static struct occurrence *
432 free_bb (struct occurrence *occ)
433 {
434   struct occurrence *child, *next;
435 
436   /* First get the two pointers hanging off OCC.  */
437   next = occ->next;
438   child = occ->children;
439   occ->bb->aux = NULL;
440   occ_pool->remove (occ);
441 
442   /* Now ensure that we don't recurse unless it is necessary.  */
443   if (!child)
444     return next;
445   else
446     {
447       while (next)
448 	next = free_bb (next);
449 
450       return child;
451     }
452 }
453 
454 
455 /* Look for floating-point divisions among DEF's uses, and try to
456    replace them by multiplications with the reciprocal.  Add
457    as many statements computing the reciprocal as needed.
458 
459    DEF must be a GIMPLE register of a floating-point type.  */
460 
461 static void
462 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
463 {
464   use_operand_p use_p;
465   imm_use_iterator use_iter;
466   struct occurrence *occ;
467   int count = 0, threshold;
468 
469   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
470 
471   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
472     {
473       gimple *use_stmt = USE_STMT (use_p);
474       if (is_division_by (use_stmt, def))
475 	{
476 	  register_division_in (gimple_bb (use_stmt));
477 	  count++;
478 	}
479     }
480 
481   /* Do the expensive part only if we can hope to optimize something.  */
482   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
483   if (count >= threshold)
484     {
485       gimple *use_stmt;
486       for (occ = occ_head; occ; occ = occ->next)
487 	{
488 	  compute_merit (occ);
489 	  insert_reciprocals (def_gsi, occ, def, NULL, threshold);
490 	}
491 
492       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
493 	{
494 	  if (is_division_by (use_stmt, def))
495 	    {
496 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
497 		replace_reciprocal (use_p);
498 	    }
499 	}
500     }
501 
502   for (occ = occ_head; occ; )
503     occ = free_bb (occ);
504 
505   occ_head = NULL;
506 }
507 
508 /* Return an internal function that implements the reciprocal of CALL,
509    or IFN_LAST if there is no such function that the target supports.  */
510 
511 internal_fn
512 internal_fn_reciprocal (gcall *call)
513 {
514   internal_fn ifn;
515 
516   switch (gimple_call_combined_fn (call))
517     {
518     CASE_CFN_SQRT:
519       ifn = IFN_RSQRT;
520       break;
521 
522     default:
523       return IFN_LAST;
524     }
525 
526   tree_pair types = direct_internal_fn_types (ifn, call);
527   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
528     return IFN_LAST;
529 
530   return ifn;
531 }
532 
533 /* Go through all the floating-point SSA_NAMEs, and call
534    execute_cse_reciprocals_1 on each of them.  */
535 namespace {
536 
537 const pass_data pass_data_cse_reciprocals =
538 {
539   GIMPLE_PASS, /* type */
540   "recip", /* name */
541   OPTGROUP_NONE, /* optinfo_flags */
542   TV_NONE, /* tv_id */
543   PROP_ssa, /* properties_required */
544   0, /* properties_provided */
545   0, /* properties_destroyed */
546   0, /* todo_flags_start */
547   TODO_update_ssa, /* todo_flags_finish */
548 };
549 
550 class pass_cse_reciprocals : public gimple_opt_pass
551 {
552 public:
553   pass_cse_reciprocals (gcc::context *ctxt)
554     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
555   {}
556 
557   /* opt_pass methods: */
558   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
559   virtual unsigned int execute (function *);
560 
561 }; // class pass_cse_reciprocals
562 
563 unsigned int
564 pass_cse_reciprocals::execute (function *fun)
565 {
566   basic_block bb;
567   tree arg;
568 
569   occ_pool = new object_allocator<occurrence> ("dominators for recip");
570 
571   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
572   calculate_dominance_info (CDI_DOMINATORS);
573   calculate_dominance_info (CDI_POST_DOMINATORS);
574 
575   if (flag_checking)
576     FOR_EACH_BB_FN (bb, fun)
577       gcc_assert (!bb->aux);
578 
579   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
580     if (FLOAT_TYPE_P (TREE_TYPE (arg))
581 	&& is_gimple_reg (arg))
582       {
583 	tree name = ssa_default_def (fun, arg);
584 	if (name)
585 	  execute_cse_reciprocals_1 (NULL, name);
586       }
587 
588   FOR_EACH_BB_FN (bb, fun)
589     {
590       tree def;
591 
592       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
593 	   gsi_next (&gsi))
594 	{
595 	  gphi *phi = gsi.phi ();
596 	  def = PHI_RESULT (phi);
597 	  if (! virtual_operand_p (def)
598 	      && FLOAT_TYPE_P (TREE_TYPE (def)))
599 	    execute_cse_reciprocals_1 (NULL, def);
600 	}
601 
602       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
603 	   gsi_next (&gsi))
604         {
605 	  gimple *stmt = gsi_stmt (gsi);
606 
607 	  if (gimple_has_lhs (stmt)
608 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
609 	      && FLOAT_TYPE_P (TREE_TYPE (def))
610 	      && TREE_CODE (def) == SSA_NAME)
611 	    execute_cse_reciprocals_1 (&gsi, def);
612 	}
613 
614       if (optimize_bb_for_size_p (bb))
615         continue;
616 
617       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
618       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
619 	   gsi_next (&gsi))
620         {
621 	  gimple *stmt = gsi_stmt (gsi);
622 
623 	  if (is_gimple_assign (stmt)
624 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
625 	    {
626 	      tree arg1 = gimple_assign_rhs2 (stmt);
627 	      gimple *stmt1;
628 
629 	      if (TREE_CODE (arg1) != SSA_NAME)
630 		continue;
631 
632 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
633 
634 	      if (is_gimple_call (stmt1)
635 		  && gimple_call_lhs (stmt1))
636 		{
637 		  bool fail;
638 		  imm_use_iterator ui;
639 		  use_operand_p use_p;
640 		  tree fndecl = NULL_TREE;
641 
642 		  gcall *call = as_a <gcall *> (stmt1);
643 		  internal_fn ifn = internal_fn_reciprocal (call);
644 		  if (ifn == IFN_LAST)
645 		    {
646 		      fndecl = gimple_call_fndecl (call);
647 		      if (!fndecl
648 			  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
649 			continue;
650 		      fndecl = targetm.builtin_reciprocal (fndecl);
651 		      if (!fndecl)
652 			continue;
653 		    }
654 
655 		  /* Check that all uses of the SSA name are divisions,
656 		     otherwise replacing the defining statement will do
657 		     the wrong thing.  */
658 		  fail = false;
659 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
660 		    {
661 		      gimple *stmt2 = USE_STMT (use_p);
662 		      if (is_gimple_debug (stmt2))
663 			continue;
664 		      if (!is_gimple_assign (stmt2)
665 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
666 			  || gimple_assign_rhs1 (stmt2) == arg1
667 			  || gimple_assign_rhs2 (stmt2) != arg1)
668 			{
669 			  fail = true;
670 			  break;
671 			}
672 		    }
673 		  if (fail)
674 		    continue;
675 
676 		  gimple_replace_ssa_lhs (call, arg1);
677 		  if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
678 		    {
679 		      auto_vec<tree, 4> args;
680 		      for (unsigned int i = 0;
681 			   i < gimple_call_num_args (call); i++)
682 			args.safe_push (gimple_call_arg (call, i));
683 		      gcall *stmt2;
684 		      if (ifn == IFN_LAST)
685 			stmt2 = gimple_build_call_vec (fndecl, args);
686 		      else
687 			stmt2 = gimple_build_call_internal_vec (ifn, args);
688 		      gimple_call_set_lhs (stmt2, arg1);
689 		      if (gimple_vdef (call))
690 			{
691 			  gimple_set_vdef (stmt2, gimple_vdef (call));
692 			  SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
693 			}
694 		      gimple_set_vuse (stmt2, gimple_vuse (call));
695 		      gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
696 		      gsi_replace (&gsi2, stmt2, true);
697 		    }
698 		  else
699 		    {
700 		      if (ifn == IFN_LAST)
701 			gimple_call_set_fndecl (call, fndecl);
702 		      else
703 			gimple_call_set_internal_fn (call, ifn);
704 		      update_stmt (call);
705 		    }
706 		  reciprocal_stats.rfuncs_inserted++;
707 
708 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
709 		    {
710 		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
711 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
712 		      fold_stmt_inplace (&gsi);
713 		      update_stmt (stmt);
714 		    }
715 		}
716 	    }
717 	}
718     }
719 
720   statistics_counter_event (fun, "reciprocal divs inserted",
721 			    reciprocal_stats.rdivs_inserted);
722   statistics_counter_event (fun, "reciprocal functions inserted",
723 			    reciprocal_stats.rfuncs_inserted);
724 
725   free_dominance_info (CDI_DOMINATORS);
726   free_dominance_info (CDI_POST_DOMINATORS);
727   delete occ_pool;
728   return 0;
729 }
730 
731 } // anon namespace
732 
733 gimple_opt_pass *
734 make_pass_cse_reciprocals (gcc::context *ctxt)
735 {
736   return new pass_cse_reciprocals (ctxt);
737 }
738 
739 /* Records an occurrence at statement USE_STMT in the vector of trees
740    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
741    is not yet initialized.  Returns true if the occurrence was pushed on
742    the vector.  Adjusts *TOP_BB to be the basic block dominating all
743    statements in the vector.  */
744 
745 static bool
746 maybe_record_sincos (vec<gimple *> *stmts,
747 		     basic_block *top_bb, gimple *use_stmt)
748 {
749   basic_block use_bb = gimple_bb (use_stmt);
750   if (*top_bb
751       && (*top_bb == use_bb
752 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
753     stmts->safe_push (use_stmt);
754   else if (!*top_bb
755 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
756     {
757       stmts->safe_push (use_stmt);
758       *top_bb = use_bb;
759     }
760   else
761     return false;
762 
763   return true;
764 }
765 
766 /* Look for sin, cos and cexpi calls with the same argument NAME and
767    create a single call to cexpi CSEing the result in this case.
768    We first walk over all immediate uses of the argument collecting
769    statements that we can CSE in a vector and in a second pass replace
770    the statement rhs with a REALPART or IMAGPART expression on the
771    result of the cexpi call we insert before the use statement that
772    dominates all other candidates.  */
773 
774 static bool
775 execute_cse_sincos_1 (tree name)
776 {
777   gimple_stmt_iterator gsi;
778   imm_use_iterator use_iter;
779   tree fndecl, res, type;
780   gimple *def_stmt, *use_stmt, *stmt;
781   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
782   auto_vec<gimple *> stmts;
783   basic_block top_bb = NULL;
784   int i;
785   bool cfg_changed = false;
786 
787   type = TREE_TYPE (name);
788   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
789     {
790       if (gimple_code (use_stmt) != GIMPLE_CALL
791 	  || !gimple_call_lhs (use_stmt))
792 	continue;
793 
794       switch (gimple_call_combined_fn (use_stmt))
795 	{
796 	CASE_CFN_COS:
797 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
798 	  break;
799 
800 	CASE_CFN_SIN:
801 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
802 	  break;
803 
804 	CASE_CFN_CEXPI:
805 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
806 	  break;
807 
808 	default:;
809 	}
810     }
811 
812   if (seen_cos + seen_sin + seen_cexpi <= 1)
813     return false;
814 
815   /* Simply insert cexpi at the beginning of top_bb but not earlier than
816      the name def statement.  */
817   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
818   if (!fndecl)
819     return false;
820   stmt = gimple_build_call (fndecl, 1, name);
821   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
822   gimple_call_set_lhs (stmt, res);
823 
824   def_stmt = SSA_NAME_DEF_STMT (name);
825   if (!SSA_NAME_IS_DEFAULT_DEF (name)
826       && gimple_code (def_stmt) != GIMPLE_PHI
827       && gimple_bb (def_stmt) == top_bb)
828     {
829       gsi = gsi_for_stmt (def_stmt);
830       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
831     }
832   else
833     {
834       gsi = gsi_after_labels (top_bb);
835       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
836     }
837   sincos_stats.inserted++;
838 
839   /* And adjust the recorded old call sites.  */
840   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
841     {
842       tree rhs = NULL;
843 
844       switch (gimple_call_combined_fn (use_stmt))
845 	{
846 	CASE_CFN_COS:
847 	  rhs = fold_build1 (REALPART_EXPR, type, res);
848 	  break;
849 
850 	CASE_CFN_SIN:
851 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
852 	  break;
853 
854 	CASE_CFN_CEXPI:
855 	  rhs = res;
856 	  break;
857 
858 	default:;
859 	  gcc_unreachable ();
860 	}
861 
862 	/* Replace call with a copy.  */
863 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
864 
865 	gsi = gsi_for_stmt (use_stmt);
866 	gsi_replace (&gsi, stmt, true);
867 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
868 	  cfg_changed = true;
869     }
870 
871   return cfg_changed;
872 }
873 
874 /* To evaluate powi(x,n), the floating point value x raised to the
875    constant integer exponent n, we use a hybrid algorithm that
876    combines the "window method" with look-up tables.  For an
877    introduction to exponentiation algorithms and "addition chains",
878    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
879    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
880    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
881    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
882 
883 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
884    multiplications to inline before calling the system library's pow
885    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
886    so this default never requires calling pow, powf or powl.  */
887 
888 #ifndef POWI_MAX_MULTS
889 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
890 #endif
891 
892 /* The size of the "optimal power tree" lookup table.  All
893    exponents less than this value are simply looked up in the
894    powi_table below.  This threshold is also used to size the
895    cache of pseudo registers that hold intermediate results.  */
896 #define POWI_TABLE_SIZE 256
897 
898 /* The size, in bits of the window, used in the "window method"
899    exponentiation algorithm.  This is equivalent to a radix of
900    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
901 #define POWI_WINDOW_SIZE 3
902 
903 /* The following table is an efficient representation of an
904    "optimal power tree".  For each value, i, the corresponding
905    value, j, in the table states than an optimal evaluation
906    sequence for calculating pow(x,i) can be found by evaluating
907    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
908    100 integers is given in Knuth's "Seminumerical algorithms".  */
909 
910 static const unsigned char powi_table[POWI_TABLE_SIZE] =
911   {
912       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
913       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
914       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
915      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
916      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
917      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
918      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
919      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
920      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
921      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
922      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
923      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
924      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
925      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
926      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
927      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
928      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
929      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
930      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
931      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
932      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
933      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
934      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
935      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
936      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
937     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
938     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
939     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
940     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
941     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
942     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
943     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
944   };
945 
946 
947 /* Return the number of multiplications required to calculate
948    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
949    subroutine of powi_cost.  CACHE is an array indicating
950    which exponents have already been calculated.  */
951 
952 static int
953 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
954 {
955   /* If we've already calculated this exponent, then this evaluation
956      doesn't require any additional multiplications.  */
957   if (cache[n])
958     return 0;
959 
960   cache[n] = true;
961   return powi_lookup_cost (n - powi_table[n], cache)
962 	 + powi_lookup_cost (powi_table[n], cache) + 1;
963 }
964 
965 /* Return the number of multiplications required to calculate
966    powi(x,n) for an arbitrary x, given the exponent N.  This
967    function needs to be kept in sync with powi_as_mults below.  */
968 
969 static int
970 powi_cost (HOST_WIDE_INT n)
971 {
972   bool cache[POWI_TABLE_SIZE];
973   unsigned HOST_WIDE_INT digit;
974   unsigned HOST_WIDE_INT val;
975   int result;
976 
977   if (n == 0)
978     return 0;
979 
980   /* Ignore the reciprocal when calculating the cost.  */
981   val = (n < 0) ? -n : n;
982 
983   /* Initialize the exponent cache.  */
984   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
985   cache[1] = true;
986 
987   result = 0;
988 
989   while (val >= POWI_TABLE_SIZE)
990     {
991       if (val & 1)
992 	{
993 	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
994 	  result += powi_lookup_cost (digit, cache)
995 		    + POWI_WINDOW_SIZE + 1;
996 	  val >>= POWI_WINDOW_SIZE;
997 	}
998       else
999 	{
1000 	  val >>= 1;
1001 	  result++;
1002 	}
1003     }
1004 
1005   return result + powi_lookup_cost (val, cache);
1006 }
1007 
1008 /* Recursive subroutine of powi_as_mults.  This function takes the
1009    array, CACHE, of already calculated exponents and an exponent N and
1010    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1011 
1012 static tree
1013 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1014 		 HOST_WIDE_INT n, tree *cache)
1015 {
1016   tree op0, op1, ssa_target;
1017   unsigned HOST_WIDE_INT digit;
1018   gassign *mult_stmt;
1019 
1020   if (n < POWI_TABLE_SIZE && cache[n])
1021     return cache[n];
1022 
1023   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1024 
1025   if (n < POWI_TABLE_SIZE)
1026     {
1027       cache[n] = ssa_target;
1028       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1029       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1030     }
1031   else if (n & 1)
1032     {
1033       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1034       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1035       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1036     }
1037   else
1038     {
1039       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1040       op1 = op0;
1041     }
1042 
1043   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1044   gimple_set_location (mult_stmt, loc);
1045   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1046 
1047   return ssa_target;
1048 }
1049 
1050 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1051    This function needs to be kept in sync with powi_cost above.  */
1052 
1053 static tree
1054 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1055 	       tree arg0, HOST_WIDE_INT n)
1056 {
1057   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1058   gassign *div_stmt;
1059   tree target;
1060 
1061   if (n == 0)
1062     return build_real (type, dconst1);
1063 
1064   memset (cache, 0,  sizeof (cache));
1065   cache[1] = arg0;
1066 
1067   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1068   if (n >= 0)
1069     return result;
1070 
1071   /* If the original exponent was negative, reciprocate the result.  */
1072   target = make_temp_ssa_name (type, NULL, "powmult");
1073   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1074 				  build_real (type, dconst1), result);
1075   gimple_set_location (div_stmt, loc);
1076   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1077 
1078   return target;
1079 }
1080 
1081 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1082    location info LOC.  If the arguments are appropriate, create an
1083    equivalent sequence of statements prior to GSI using an optimal
1084    number of multiplications, and return an expession holding the
1085    result.  */
1086 
1087 static tree
1088 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1089 			    tree arg0, HOST_WIDE_INT n)
1090 {
1091   /* Avoid largest negative number.  */
1092   if (n != -n
1093       && ((n >= -1 && n <= 2)
1094 	  || (optimize_function_for_speed_p (cfun)
1095 	      && powi_cost (n) <= POWI_MAX_MULTS)))
1096     return powi_as_mults (gsi, loc, arg0, n);
1097 
1098   return NULL_TREE;
1099 }
1100 
1101 /* Build a gimple call statement that calls FN with argument ARG.
1102    Set the lhs of the call statement to a fresh SSA name.  Insert the
1103    statement prior to GSI's current position, and return the fresh
1104    SSA name.  */
1105 
1106 static tree
1107 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1108 		       tree fn, tree arg)
1109 {
1110   gcall *call_stmt;
1111   tree ssa_target;
1112 
1113   call_stmt = gimple_build_call (fn, 1, arg);
1114   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1115   gimple_set_lhs (call_stmt, ssa_target);
1116   gimple_set_location (call_stmt, loc);
1117   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1118 
1119   return ssa_target;
1120 }
1121 
1122 /* Build a gimple binary operation with the given CODE and arguments
1123    ARG0, ARG1, assigning the result to a new SSA name for variable
1124    TARGET.  Insert the statement prior to GSI's current position, and
1125    return the fresh SSA name.*/
1126 
1127 static tree
1128 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1129 			const char *name, enum tree_code code,
1130 			tree arg0, tree arg1)
1131 {
1132   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1133   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1134   gimple_set_location (stmt, loc);
1135   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1136   return result;
1137 }
1138 
1139 /* Build a gimple reference operation with the given CODE and argument
1140    ARG, assigning the result to a new SSA name of TYPE with NAME.
1141    Insert the statement prior to GSI's current position, and return
1142    the fresh SSA name.  */
1143 
1144 static inline tree
1145 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1146 		      const char *name, enum tree_code code, tree arg0)
1147 {
1148   tree result = make_temp_ssa_name (type, NULL, name);
1149   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1150   gimple_set_location (stmt, loc);
1151   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1152   return result;
1153 }
1154 
1155 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1156    prior to GSI's current position, and return the fresh SSA name.  */
1157 
1158 static tree
1159 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1160 		       tree type, tree val)
1161 {
1162   tree result = make_ssa_name (type);
1163   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1164   gimple_set_location (stmt, loc);
1165   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1166   return result;
1167 }
1168 
1169 struct pow_synth_sqrt_info
1170 {
1171   bool *factors;
1172   unsigned int deepest;
1173   unsigned int num_mults;
1174 };
1175 
1176 /* Return true iff the real value C can be represented as a
1177    sum of powers of 0.5 up to N.  That is:
1178    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1179    Record in INFO the various parameters of the synthesis algorithm such
1180    as the factors a[i], the maximum 0.5 power and the number of
1181    multiplications that will be required.  */
1182 
1183 bool
1184 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1185 				 struct pow_synth_sqrt_info *info)
1186 {
1187   REAL_VALUE_TYPE factor = dconsthalf;
1188   REAL_VALUE_TYPE remainder = c;
1189 
1190   info->deepest = 0;
1191   info->num_mults = 0;
1192   memset (info->factors, 0, n * sizeof (bool));
1193 
1194   for (unsigned i = 0; i < n; i++)
1195     {
1196       REAL_VALUE_TYPE res;
1197 
1198       /* If something inexact happened bail out now.  */
1199       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1200 	return false;
1201 
1202       /* We have hit zero.  The number is representable as a sum
1203          of powers of 0.5.  */
1204       if (real_equal (&res, &dconst0))
1205 	{
1206 	  info->factors[i] = true;
1207 	  info->deepest = i + 1;
1208 	  return true;
1209 	}
1210       else if (!REAL_VALUE_NEGATIVE (res))
1211 	{
1212 	  remainder = res;
1213 	  info->factors[i] = true;
1214 	  info->num_mults++;
1215 	}
1216       else
1217 	info->factors[i] = false;
1218 
1219       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1220     }
1221   return false;
1222 }
1223 
1224 /* Return the tree corresponding to FN being applied
1225    to ARG N times at GSI and LOC.
1226    Look up previous results from CACHE if need be.
1227    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1228 
1229 static tree
1230 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1231 	      tree fn, location_t loc, tree *cache)
1232 {
1233   tree res = cache[n];
1234   if (!res)
1235     {
1236       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1237       res = build_and_insert_call (gsi, loc, fn, prev);
1238       cache[n] = res;
1239     }
1240 
1241   return res;
1242 }
1243 
1244 /* Print to STREAM the repeated application of function FNAME to ARG
1245    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1246    "foo (foo (x))".  */
1247 
1248 static void
1249 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1250 		 unsigned int n)
1251 {
1252   if (n == 0)
1253     fprintf (stream, "%s", arg);
1254   else
1255     {
1256       fprintf (stream, "%s (", fname);
1257       print_nested_fn (stream, fname, arg, n - 1);
1258       fprintf (stream, ")");
1259     }
1260 }
1261 
1262 /* Print to STREAM the fractional sequence of sqrt chains
1263    applied to ARG, described by INFO.  Used for the dump file.  */
1264 
1265 static void
1266 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1267 			        struct pow_synth_sqrt_info *info)
1268 {
1269   for (unsigned int i = 0; i < info->deepest; i++)
1270     {
1271       bool is_set = info->factors[i];
1272       if (is_set)
1273 	{
1274 	  print_nested_fn (stream, "sqrt", arg, i + 1);
1275 	  if (i != info->deepest - 1)
1276 	    fprintf (stream, " * ");
1277 	}
1278     }
1279 }
1280 
1281 /* Print to STREAM a representation of raising ARG to an integer
1282    power N.  Used for the dump file.  */
1283 
1284 static void
1285 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1286 {
1287   if (n > 1)
1288     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1289   else if (n == 1)
1290     fprintf (stream, "%s", arg);
1291 }
1292 
1293 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1294    square roots.  Place at GSI and LOC.  Limit the maximum depth
1295    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1296    result of the expanded sequence or NULL_TREE if the expansion failed.
1297 
1298    This routine assumes that ARG1 is a real number with a fractional part
1299    (the integer exponent case will have been handled earlier in
1300    gimple_expand_builtin_pow).
1301 
1302    For ARG1 > 0.0:
1303    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1304      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1305                     FRAC_PART == ARG1 - WHOLE_PART:
1306      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1307      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1308      if it can be expressed as such, that is if FRAC_PART satisfies:
1309      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1310      where integer a[i] is either 0 or 1.
1311 
1312      Example:
1313      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1314        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1315 
1316    For ARG1 < 0.0 there are two approaches:
1317    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1318          is calculated as above.
1319 
1320      Example:
1321      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1322        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1323 
1324    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1325            FRAC_PART  := ARG1 - WHOLE_PART
1326      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1327      Example:
1328      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1329        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1330 
1331    For ARG1 < 0.0 we choose between (A) and (B) depending on
1332    how many multiplications we'd have to do.
1333    So, for the example in (B): POW (x, -5.875), if we were to
1334    follow algorithm (A) we would produce:
1335    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1336    which contains more multiplications than approach (B).
1337 
1338    Hopefully, this approach will eliminate potentially expensive POW library
1339    calls when unsafe floating point math is enabled and allow the compiler to
1340    further optimise the multiplies, square roots and divides produced by this
1341    function.  */
1342 
1343 static tree
1344 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1345 		     tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1346 {
1347   tree type = TREE_TYPE (arg0);
1348   machine_mode mode = TYPE_MODE (type);
1349   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1350   bool one_over = true;
1351 
1352   if (!sqrtfn)
1353     return NULL_TREE;
1354 
1355   if (TREE_CODE (arg1) != REAL_CST)
1356     return NULL_TREE;
1357 
1358   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1359 
1360   gcc_assert (max_depth > 0);
1361   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1362 
1363   struct pow_synth_sqrt_info synth_info;
1364   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1365   synth_info.deepest = 0;
1366   synth_info.num_mults = 0;
1367 
1368   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1369   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1370 
1371   /* The whole and fractional parts of exp.  */
1372   REAL_VALUE_TYPE whole_part;
1373   REAL_VALUE_TYPE frac_part;
1374 
1375   real_floor (&whole_part, mode, &exp);
1376   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1377 
1378 
1379   REAL_VALUE_TYPE ceil_whole = dconst0;
1380   REAL_VALUE_TYPE ceil_fract = dconst0;
1381 
1382   if (neg_exp)
1383     {
1384       real_ceil (&ceil_whole, mode, &exp);
1385       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1386     }
1387 
1388   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1389     return NULL_TREE;
1390 
1391   /* Check whether it's more profitable to not use 1.0 / ...  */
1392   if (neg_exp)
1393     {
1394       struct pow_synth_sqrt_info alt_synth_info;
1395       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1396       alt_synth_info.deepest = 0;
1397       alt_synth_info.num_mults = 0;
1398 
1399       if (representable_as_half_series_p (ceil_fract, max_depth,
1400 					   &alt_synth_info)
1401 	  && alt_synth_info.deepest <= synth_info.deepest
1402 	  && alt_synth_info.num_mults < synth_info.num_mults)
1403 	{
1404 	  whole_part = ceil_whole;
1405 	  frac_part = ceil_fract;
1406 	  synth_info.deepest = alt_synth_info.deepest;
1407 	  synth_info.num_mults = alt_synth_info.num_mults;
1408 	  memcpy (synth_info.factors, alt_synth_info.factors,
1409 		  (max_depth + 1) * sizeof (bool));
1410 	  one_over = false;
1411 	}
1412     }
1413 
1414   HOST_WIDE_INT n = real_to_integer (&whole_part);
1415   REAL_VALUE_TYPE cint;
1416   real_from_integer (&cint, VOIDmode, n, SIGNED);
1417 
1418   if (!real_identical (&whole_part, &cint))
1419     return NULL_TREE;
1420 
1421   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1422     return NULL_TREE;
1423 
1424   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1425 
1426   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1427 
1428   /* Calculate the integer part of the exponent.  */
1429   if (n > 1)
1430     {
1431       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1432       if (!integer_res)
1433 	return NULL_TREE;
1434     }
1435 
1436   if (dump_file)
1437     {
1438       char string[64];
1439 
1440       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1441       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1442 
1443       if (neg_exp)
1444 	{
1445 	  if (one_over)
1446 	    {
1447 	      fprintf (dump_file, "1.0 / (");
1448 	      dump_integer_part (dump_file, "x", n);
1449 	      if (n > 0)
1450 	        fprintf (dump_file, " * ");
1451 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1452 	      fprintf (dump_file, ")");
1453 	    }
1454 	  else
1455 	    {
1456 	      dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1457 	      fprintf (dump_file, " / (");
1458 	      dump_integer_part (dump_file, "x", n);
1459 	      fprintf (dump_file, ")");
1460 	    }
1461 	}
1462       else
1463 	{
1464 	  dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1465 	  if (n > 0)
1466 	    fprintf (dump_file, " * ");
1467 	  dump_integer_part (dump_file, "x", n);
1468 	}
1469 
1470       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1471     }
1472 
1473 
1474   tree fract_res = NULL_TREE;
1475   cache[0] = arg0;
1476 
1477   /* Calculate the fractional part of the exponent.  */
1478   for (unsigned i = 0; i < synth_info.deepest; i++)
1479     {
1480       if (synth_info.factors[i])
1481 	{
1482 	  tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1483 
1484 	  if (!fract_res)
1485 	      fract_res = sqrt_chain;
1486 
1487 	  else
1488 	    fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1489 					   fract_res, sqrt_chain);
1490 	}
1491     }
1492 
1493   tree res = NULL_TREE;
1494 
1495   if (neg_exp)
1496     {
1497       if (one_over)
1498 	{
1499 	  if (n > 0)
1500 	    res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1501 					   fract_res, integer_res);
1502 	  else
1503 	    res = fract_res;
1504 
1505 	  res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1506 					  build_real (type, dconst1), res);
1507 	}
1508       else
1509 	{
1510 	  res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1511 					 fract_res, integer_res);
1512 	}
1513     }
1514   else
1515     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1516 				   fract_res, integer_res);
1517   return res;
1518 }
1519 
1520 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1521    with location info LOC.  If possible, create an equivalent and
1522    less expensive sequence of statements prior to GSI, and return an
1523    expession holding the result.  */
1524 
1525 static tree
1526 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1527 			   tree arg0, tree arg1)
1528 {
1529   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1530   REAL_VALUE_TYPE c2, dconst3;
1531   HOST_WIDE_INT n;
1532   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1533   machine_mode mode;
1534   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1535   bool hw_sqrt_exists, c_is_int, c2_is_int;
1536 
1537   dconst1_4 = dconst1;
1538   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1539 
1540   /* If the exponent isn't a constant, there's nothing of interest
1541      to be done.  */
1542   if (TREE_CODE (arg1) != REAL_CST)
1543     return NULL_TREE;
1544 
1545   /* Don't perform the operation if flag_signaling_nans is on
1546      and the operand is a signaling NaN.  */
1547   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1548       && ((TREE_CODE (arg0) == REAL_CST
1549 	   && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1550 	  || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1551     return NULL_TREE;
1552 
1553   /* If the exponent is equivalent to an integer, expand to an optimal
1554      multiplication sequence when profitable.  */
1555   c = TREE_REAL_CST (arg1);
1556   n = real_to_integer (&c);
1557   real_from_integer (&cint, VOIDmode, n, SIGNED);
1558   c_is_int = real_identical (&c, &cint);
1559 
1560   if (c_is_int
1561       && ((n >= -1 && n <= 2)
1562 	  || (flag_unsafe_math_optimizations
1563 	      && speed_p
1564 	      && powi_cost (n) <= POWI_MAX_MULTS)))
1565     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1566 
1567   /* Attempt various optimizations using sqrt and cbrt.  */
1568   type = TREE_TYPE (arg0);
1569   mode = TYPE_MODE (type);
1570   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1571 
1572   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1573      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1574      sqrt(-0) = -0.  */
1575   if (sqrtfn
1576       && real_equal (&c, &dconsthalf)
1577       && !HONOR_SIGNED_ZEROS (mode))
1578     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1579 
1580   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1581 
1582   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1583      optimizations since 1./3. is not exactly representable.  If x
1584      is negative and finite, the correct value of pow(x,1./3.) is
1585      a NaN with the "invalid" exception raised, because the value
1586      of 1./3. actually has an even denominator.  The correct value
1587      of cbrt(x) is a negative real value.  */
1588   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1589   dconst1_3 = real_value_truncate (mode, dconst_third ());
1590 
1591   if (flag_unsafe_math_optimizations
1592       && cbrtfn
1593       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1594       && real_equal (&c, &dconst1_3))
1595     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1596 
1597   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1598      if we don't have a hardware sqrt insn.  */
1599   dconst1_6 = dconst1_3;
1600   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1601 
1602   if (flag_unsafe_math_optimizations
1603       && sqrtfn
1604       && cbrtfn
1605       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1606       && speed_p
1607       && hw_sqrt_exists
1608       && real_equal (&c, &dconst1_6))
1609     {
1610       /* sqrt(x)  */
1611       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1612 
1613       /* cbrt(sqrt(x))  */
1614       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1615     }
1616 
1617 
1618   /* Attempt to expand the POW as a product of square root chains.
1619      Expand the 0.25 case even when otpimising for size.  */
1620   if (flag_unsafe_math_optimizations
1621       && sqrtfn
1622       && hw_sqrt_exists
1623       && (speed_p || real_equal (&c, &dconst1_4))
1624       && !HONOR_SIGNED_ZEROS (mode))
1625     {
1626       unsigned int max_depth = speed_p
1627 				? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1628 				: 2;
1629 
1630       tree expand_with_sqrts
1631 	= expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1632 
1633       if (expand_with_sqrts)
1634 	return expand_with_sqrts;
1635     }
1636 
1637   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1638   n = real_to_integer (&c2);
1639   real_from_integer (&cint, VOIDmode, n, SIGNED);
1640   c2_is_int = real_identical (&c2, &cint);
1641 
1642   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1643 
1644      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1645      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1646 
1647      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1648      different from pow(x, 1./3.) due to rounding and behavior with
1649      negative x, we need to constrain this transformation to unsafe
1650      math and positive x or finite math.  */
1651   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1652   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1653   real_round (&c2, mode, &c2);
1654   n = real_to_integer (&c2);
1655   real_from_integer (&cint, VOIDmode, n, SIGNED);
1656   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1657   real_convert (&c2, mode, &c2);
1658 
1659   if (flag_unsafe_math_optimizations
1660       && cbrtfn
1661       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1662       && real_identical (&c2, &c)
1663       && !c2_is_int
1664       && optimize_function_for_speed_p (cfun)
1665       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1666     {
1667       tree powi_x_ndiv3 = NULL_TREE;
1668 
1669       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1670          possible or profitable, give up.  Skip the degenerate case when
1671          abs(n) < 3, where the result is always 1.  */
1672       if (absu_hwi (n) >= 3)
1673 	{
1674 	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1675 						     abs_hwi (n / 3));
1676 	  if (!powi_x_ndiv3)
1677 	    return NULL_TREE;
1678 	}
1679 
1680       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1681          as that creates an unnecessary variable.  Instead, just produce
1682          either cbrt(x) or cbrt(x) * cbrt(x).  */
1683       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1684 
1685       if (absu_hwi (n) % 3 == 1)
1686 	powi_cbrt_x = cbrt_x;
1687       else
1688 	powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1689 					      cbrt_x, cbrt_x);
1690 
1691       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1692       if (absu_hwi (n) < 3)
1693 	result = powi_cbrt_x;
1694       else
1695 	result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1696 					 powi_x_ndiv3, powi_cbrt_x);
1697 
1698       /* If n is negative, reciprocate the result.  */
1699       if (n < 0)
1700 	result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1701 					 build_real (type, dconst1), result);
1702 
1703       return result;
1704     }
1705 
1706   /* No optimizations succeeded.  */
1707   return NULL_TREE;
1708 }
1709 
1710 /* ARG is the argument to a cabs builtin call in GSI with location info
1711    LOC.  Create a sequence of statements prior to GSI that calculates
1712    sqrt(R*R + I*I), where R and I are the real and imaginary components
1713    of ARG, respectively.  Return an expression holding the result.  */
1714 
1715 static tree
1716 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1717 {
1718   tree real_part, imag_part, addend1, addend2, sum, result;
1719   tree type = TREE_TYPE (TREE_TYPE (arg));
1720   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1721   machine_mode mode = TYPE_MODE (type);
1722 
1723   if (!flag_unsafe_math_optimizations
1724       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1725       || !sqrtfn
1726       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1727     return NULL_TREE;
1728 
1729   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1730 				    REALPART_EXPR, arg);
1731   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1732 				    real_part, real_part);
1733   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1734 				    IMAGPART_EXPR, arg);
1735   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1736 				    imag_part, imag_part);
1737   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1738   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1739 
1740   return result;
1741 }
1742 
1743 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1744    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1745    an optimal number of multiplies, when n is a constant.  */
1746 
1747 namespace {
1748 
1749 const pass_data pass_data_cse_sincos =
1750 {
1751   GIMPLE_PASS, /* type */
1752   "sincos", /* name */
1753   OPTGROUP_NONE, /* optinfo_flags */
1754   TV_NONE, /* tv_id */
1755   PROP_ssa, /* properties_required */
1756   PROP_gimple_opt_math, /* properties_provided */
1757   0, /* properties_destroyed */
1758   0, /* todo_flags_start */
1759   TODO_update_ssa, /* todo_flags_finish */
1760 };
1761 
1762 class pass_cse_sincos : public gimple_opt_pass
1763 {
1764 public:
1765   pass_cse_sincos (gcc::context *ctxt)
1766     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1767   {}
1768 
1769   /* opt_pass methods: */
1770   virtual bool gate (function *)
1771     {
1772       /* We no longer require either sincos or cexp, since powi expansion
1773 	 piggybacks on this pass.  */
1774       return optimize;
1775     }
1776 
1777   virtual unsigned int execute (function *);
1778 
1779 }; // class pass_cse_sincos
1780 
1781 unsigned int
1782 pass_cse_sincos::execute (function *fun)
1783 {
1784   basic_block bb;
1785   bool cfg_changed = false;
1786 
1787   calculate_dominance_info (CDI_DOMINATORS);
1788   memset (&sincos_stats, 0, sizeof (sincos_stats));
1789 
1790   FOR_EACH_BB_FN (bb, fun)
1791     {
1792       gimple_stmt_iterator gsi;
1793       bool cleanup_eh = false;
1794 
1795       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1796         {
1797 	  gimple *stmt = gsi_stmt (gsi);
1798 
1799 	  /* Only the last stmt in a bb could throw, no need to call
1800 	     gimple_purge_dead_eh_edges if we change something in the middle
1801 	     of a basic block.  */
1802 	  cleanup_eh = false;
1803 
1804 	  if (is_gimple_call (stmt)
1805 	      && gimple_call_lhs (stmt))
1806 	    {
1807 	      tree arg, arg0, arg1, result;
1808 	      HOST_WIDE_INT n;
1809 	      location_t loc;
1810 
1811 	      switch (gimple_call_combined_fn (stmt))
1812 		{
1813 		CASE_CFN_COS:
1814 		CASE_CFN_SIN:
1815 		CASE_CFN_CEXPI:
1816 		  /* Make sure we have either sincos or cexp.  */
1817 		  if (!targetm.libc_has_function (function_c99_math_complex)
1818 		      && !targetm.libc_has_function (function_sincos))
1819 		    break;
1820 
1821 		  arg = gimple_call_arg (stmt, 0);
1822 		  if (TREE_CODE (arg) == SSA_NAME)
1823 		    cfg_changed |= execute_cse_sincos_1 (arg);
1824 		  break;
1825 
1826 		CASE_CFN_POW:
1827 		  arg0 = gimple_call_arg (stmt, 0);
1828 		  arg1 = gimple_call_arg (stmt, 1);
1829 
1830 		  loc = gimple_location (stmt);
1831 		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1832 
1833 		  if (result)
1834 		    {
1835 		      tree lhs = gimple_get_lhs (stmt);
1836 		      gassign *new_stmt = gimple_build_assign (lhs, result);
1837 		      gimple_set_location (new_stmt, loc);
1838 		      unlink_stmt_vdef (stmt);
1839 		      gsi_replace (&gsi, new_stmt, true);
1840 		      cleanup_eh = true;
1841 		      if (gimple_vdef (stmt))
1842 			release_ssa_name (gimple_vdef (stmt));
1843 		    }
1844 		  break;
1845 
1846 		CASE_CFN_POWI:
1847 		  arg0 = gimple_call_arg (stmt, 0);
1848 		  arg1 = gimple_call_arg (stmt, 1);
1849 		  loc = gimple_location (stmt);
1850 
1851 		  if (real_minus_onep (arg0))
1852 		    {
1853                       tree t0, t1, cond, one, minus_one;
1854 		      gassign *stmt;
1855 
1856 		      t0 = TREE_TYPE (arg0);
1857 		      t1 = TREE_TYPE (arg1);
1858 		      one = build_real (t0, dconst1);
1859 		      minus_one = build_real (t0, dconstm1);
1860 
1861 		      cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1862 		      stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1863 						  arg1, build_int_cst (t1, 1));
1864 		      gimple_set_location (stmt, loc);
1865 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1866 
1867 		      result = make_temp_ssa_name (t0, NULL, "powi");
1868 		      stmt = gimple_build_assign (result, COND_EXPR, cond,
1869 						  minus_one, one);
1870 		      gimple_set_location (stmt, loc);
1871 		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1872 		    }
1873 		  else
1874 		    {
1875 		      if (!tree_fits_shwi_p (arg1))
1876 			break;
1877 
1878 		      n = tree_to_shwi (arg1);
1879 		      result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1880 		    }
1881 
1882 		  if (result)
1883 		    {
1884 		      tree lhs = gimple_get_lhs (stmt);
1885 		      gassign *new_stmt = gimple_build_assign (lhs, result);
1886 		      gimple_set_location (new_stmt, loc);
1887 		      unlink_stmt_vdef (stmt);
1888 		      gsi_replace (&gsi, new_stmt, true);
1889 		      cleanup_eh = true;
1890 		      if (gimple_vdef (stmt))
1891 			release_ssa_name (gimple_vdef (stmt));
1892 		    }
1893 		  break;
1894 
1895 		CASE_CFN_CABS:
1896 		  arg0 = gimple_call_arg (stmt, 0);
1897 		  loc = gimple_location (stmt);
1898 		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1899 
1900 		  if (result)
1901 		    {
1902 		      tree lhs = gimple_get_lhs (stmt);
1903 		      gassign *new_stmt = gimple_build_assign (lhs, result);
1904 		      gimple_set_location (new_stmt, loc);
1905 		      unlink_stmt_vdef (stmt);
1906 		      gsi_replace (&gsi, new_stmt, true);
1907 		      cleanup_eh = true;
1908 		      if (gimple_vdef (stmt))
1909 			release_ssa_name (gimple_vdef (stmt));
1910 		    }
1911 		  break;
1912 
1913 		default:;
1914 		}
1915 	    }
1916 	}
1917       if (cleanup_eh)
1918 	cfg_changed |= gimple_purge_dead_eh_edges (bb);
1919     }
1920 
1921   statistics_counter_event (fun, "sincos statements inserted",
1922 			    sincos_stats.inserted);
1923 
1924   return cfg_changed ? TODO_cleanup_cfg : 0;
1925 }
1926 
1927 } // anon namespace
1928 
1929 gimple_opt_pass *
1930 make_pass_cse_sincos (gcc::context *ctxt)
1931 {
1932   return new pass_cse_sincos (ctxt);
1933 }
1934 
1935 /* A symbolic number structure is used to detect byte permutation and selection
1936    patterns of a source.  To achieve that, its field N contains an artificial
1937    number consisting of BITS_PER_MARKER sized markers tracking where does each
1938    byte come from in the source:
1939 
1940    0	   - target byte has the value 0
1941    FF	   - target byte has an unknown value (eg. due to sign extension)
1942    1..size - marker value is the byte index in the source (0 for lsb).
1943 
1944    To detect permutations on memory sources (arrays and structures), a symbolic
1945    number is also associated:
1946    - a base address BASE_ADDR and an OFFSET giving the address of the source;
1947    - a range which gives the difference between the highest and lowest accessed
1948      memory location to make such a symbolic number;
1949    - the address SRC of the source element of lowest address as a convenience
1950      to easily get BASE_ADDR + offset + lowest bytepos.
1951 
1952    Note 1: the range is different from size as size reflects the size of the
1953    type of the current expression.  For instance, for an array char a[],
1954    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
1955    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
1956    time a range of 1.
1957 
1958    Note 2: for non-memory sources, range holds the same value as size.
1959 
1960    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
1961 
1962 struct symbolic_number {
1963   uint64_t n;
1964   tree type;
1965   tree base_addr;
1966   tree offset;
1967   HOST_WIDE_INT bytepos;
1968   tree src;
1969   tree alias_set;
1970   tree vuse;
1971   unsigned HOST_WIDE_INT range;
1972 };
1973 
1974 #define BITS_PER_MARKER 8
1975 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1976 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1977 #define HEAD_MARKER(n, size) \
1978   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1979 
1980 /* The number which the find_bswap_or_nop_1 result should match in
1981    order to have a nop.  The number is masked according to the size of
1982    the symbolic number before using it.  */
1983 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1984   (uint64_t)0x08070605 << 32 | 0x04030201)
1985 
1986 /* The number which the find_bswap_or_nop_1 result should match in
1987    order to have a byte swap.  The number is masked according to the
1988    size of the symbolic number before using it.  */
1989 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1990   (uint64_t)0x01020304 << 32 | 0x05060708)
1991 
1992 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1993    number N.  Return false if the requested operation is not permitted
1994    on a symbolic number.  */
1995 
1996 static inline bool
1997 do_shift_rotate (enum tree_code code,
1998 		 struct symbolic_number *n,
1999 		 int count)
2000 {
2001   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2002   unsigned head_marker;
2003 
2004   if (count % BITS_PER_UNIT != 0)
2005     return false;
2006   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
2007 
2008   /* Zero out the extra bits of N in order to avoid them being shifted
2009      into the significant bits.  */
2010   if (size < 64 / BITS_PER_MARKER)
2011     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2012 
2013   switch (code)
2014     {
2015     case LSHIFT_EXPR:
2016       n->n <<= count;
2017       break;
2018     case RSHIFT_EXPR:
2019       head_marker = HEAD_MARKER (n->n, size);
2020       n->n >>= count;
2021       /* Arithmetic shift of signed type: result is dependent on the value.  */
2022       if (!TYPE_UNSIGNED (n->type) && head_marker)
2023 	for (i = 0; i < count / BITS_PER_MARKER; i++)
2024 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2025 		  << ((size - 1 - i) * BITS_PER_MARKER);
2026       break;
2027     case LROTATE_EXPR:
2028       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
2029       break;
2030     case RROTATE_EXPR:
2031       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
2032       break;
2033     default:
2034       return false;
2035     }
2036   /* Zero unused bits for size.  */
2037   if (size < 64 / BITS_PER_MARKER)
2038     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2039   return true;
2040 }
2041 
2042 /* Perform sanity checking for the symbolic number N and the gimple
2043    statement STMT.  */
2044 
2045 static inline bool
2046 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
2047 {
2048   tree lhs_type;
2049 
2050   lhs_type = gimple_expr_type (stmt);
2051 
2052   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2053     return false;
2054 
2055   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2056     return false;
2057 
2058   return true;
2059 }
2060 
2061 /* Initialize the symbolic number N for the bswap pass from the base element
2062    SRC manipulated by the bitwise OR expression.  */
2063 
2064 static bool
2065 init_symbolic_number (struct symbolic_number *n, tree src)
2066 {
2067   int size;
2068 
2069   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
2070     return false;
2071 
2072   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2073   n->src = src;
2074 
2075   /* Set up the symbolic number N by setting each byte to a value between 1 and
2076      the byte size of rhs1.  The highest order byte is set to n->size and the
2077      lowest order byte to 1.  */
2078   n->type = TREE_TYPE (src);
2079   size = TYPE_PRECISION (n->type);
2080   if (size % BITS_PER_UNIT != 0)
2081     return false;
2082   size /= BITS_PER_UNIT;
2083   if (size > 64 / BITS_PER_MARKER)
2084     return false;
2085   n->range = size;
2086   n->n = CMPNOP;
2087 
2088   if (size < 64 / BITS_PER_MARKER)
2089     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2090 
2091   return true;
2092 }
2093 
2094 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2095    the answer. If so, REF is that memory source and the base of the memory area
2096    accessed and the offset of the access from that base are recorded in N.  */
2097 
2098 bool
2099 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
2100 {
2101   /* Leaf node is an array or component ref. Memorize its base and
2102      offset from base to compare to other such leaf node.  */
2103   HOST_WIDE_INT bitsize, bitpos;
2104   machine_mode mode;
2105   int unsignedp, reversep, volatilep;
2106   tree offset, base_addr;
2107 
2108   /* Not prepared to handle PDP endian.  */
2109   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2110     return false;
2111 
2112   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2113     return false;
2114 
2115   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2116 				   &unsignedp, &reversep, &volatilep);
2117 
2118   if (TREE_CODE (base_addr) == MEM_REF)
2119     {
2120       offset_int bit_offset = 0;
2121       tree off = TREE_OPERAND (base_addr, 1);
2122 
2123       if (!integer_zerop (off))
2124 	{
2125 	  offset_int boff, coff = mem_ref_offset (base_addr);
2126 	  boff = coff << LOG2_BITS_PER_UNIT;
2127 	  bit_offset += boff;
2128 	}
2129 
2130       base_addr = TREE_OPERAND (base_addr, 0);
2131 
2132       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2133       if (wi::neg_p (bit_offset))
2134 	{
2135 	  offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2136 	  offset_int tem = bit_offset.and_not (mask);
2137 	  /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2138 	     Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2139 	  bit_offset -= tem;
2140 	  tem >>= LOG2_BITS_PER_UNIT;
2141 	  if (offset)
2142 	    offset = size_binop (PLUS_EXPR, offset,
2143 				    wide_int_to_tree (sizetype, tem));
2144 	  else
2145 	    offset = wide_int_to_tree (sizetype, tem);
2146 	}
2147 
2148       bitpos += bit_offset.to_shwi ();
2149     }
2150 
2151   if (bitpos % BITS_PER_UNIT)
2152     return false;
2153   if (bitsize % BITS_PER_UNIT)
2154     return false;
2155   if (reversep)
2156     return false;
2157 
2158   if (!init_symbolic_number (n, ref))
2159     return false;
2160   n->base_addr = base_addr;
2161   n->offset = offset;
2162   n->bytepos = bitpos / BITS_PER_UNIT;
2163   n->alias_set = reference_alias_ptr_type (ref);
2164   n->vuse = gimple_vuse (stmt);
2165   return true;
2166 }
2167 
2168 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2169    symbolic number N1 and N2 whose source statements are respectively
2170    SOURCE_STMT1 and SOURCE_STMT2.  */
2171 
2172 static gimple *
2173 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
2174 			gimple *source_stmt2, struct symbolic_number *n2,
2175 			struct symbolic_number *n)
2176 {
2177   int i, size;
2178   uint64_t mask;
2179   gimple *source_stmt;
2180   struct symbolic_number *n_start;
2181 
2182   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
2183   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2184       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2185     rhs1 = TREE_OPERAND (rhs1, 0);
2186   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
2187   if (TREE_CODE (rhs2) == BIT_FIELD_REF
2188       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
2189     rhs2 = TREE_OPERAND (rhs2, 0);
2190 
2191   /* Sources are different, cancel bswap if they are not memory location with
2192      the same base (array, structure, ...).  */
2193   if (rhs1 != rhs2)
2194     {
2195       uint64_t inc;
2196       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2197       struct symbolic_number *toinc_n_ptr, *n_end;
2198       basic_block bb1, bb2;
2199 
2200       if (!n1->base_addr || !n2->base_addr
2201 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2202 	return NULL;
2203 
2204       if (!n1->offset != !n2->offset
2205 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2206 	return NULL;
2207 
2208       if (n1->bytepos < n2->bytepos)
2209 	{
2210 	  n_start = n1;
2211 	  start_sub = n2->bytepos - n1->bytepos;
2212 	}
2213       else
2214 	{
2215 	  n_start = n2;
2216 	  start_sub = n1->bytepos - n2->bytepos;
2217 	}
2218 
2219       bb1 = gimple_bb (source_stmt1);
2220       bb2 = gimple_bb (source_stmt2);
2221       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
2222 	source_stmt = source_stmt1;
2223       else
2224 	source_stmt = source_stmt2;
2225 
2226       /* Find the highest address at which a load is performed and
2227 	 compute related info.  */
2228       end1 = n1->bytepos + (n1->range - 1);
2229       end2 = n2->bytepos + (n2->range - 1);
2230       if (end1 < end2)
2231 	{
2232 	  end = end2;
2233 	  end_sub = end2 - end1;
2234 	}
2235       else
2236 	{
2237 	  end = end1;
2238 	  end_sub = end1 - end2;
2239 	}
2240       n_end = (end2 > end1) ? n2 : n1;
2241 
2242       /* Find symbolic number whose lsb is the most significant.  */
2243       if (BYTES_BIG_ENDIAN)
2244 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
2245       else
2246 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
2247 
2248       n->range = end - n_start->bytepos + 1;
2249 
2250       /* Check that the range of memory covered can be represented by
2251 	 a symbolic number.  */
2252       if (n->range > 64 / BITS_PER_MARKER)
2253 	return NULL;
2254 
2255       /* Reinterpret byte marks in symbolic number holding the value of
2256 	 bigger weight according to target endianness.  */
2257       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2258       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2259       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2260 	{
2261 	  unsigned marker
2262 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2263 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
2264 	    toinc_n_ptr->n += inc;
2265 	}
2266     }
2267   else
2268     {
2269       n->range = n1->range;
2270       n_start = n1;
2271       source_stmt = source_stmt1;
2272     }
2273 
2274   if (!n1->alias_set
2275       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2276     n->alias_set = n1->alias_set;
2277   else
2278     n->alias_set = ptr_type_node;
2279   n->vuse = n_start->vuse;
2280   n->base_addr = n_start->base_addr;
2281   n->offset = n_start->offset;
2282   n->src = n_start->src;
2283   n->bytepos = n_start->bytepos;
2284   n->type = n_start->type;
2285   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2286 
2287   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2288     {
2289       uint64_t masked1, masked2;
2290 
2291       masked1 = n1->n & mask;
2292       masked2 = n2->n & mask;
2293       if (masked1 && masked2 && masked1 != masked2)
2294 	return NULL;
2295     }
2296   n->n = n1->n | n2->n;
2297 
2298   return source_stmt;
2299 }
2300 
2301 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2302    the operation given by the rhs of STMT on the result.  If the operation
2303    could successfully be executed the function returns a gimple stmt whose
2304    rhs's first tree is the expression of the source operand and NULL
2305    otherwise.  */
2306 
2307 static gimple *
2308 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
2309 {
2310   enum tree_code code;
2311   tree rhs1, rhs2 = NULL;
2312   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
2313   enum gimple_rhs_class rhs_class;
2314 
2315   if (!limit || !is_gimple_assign (stmt))
2316     return NULL;
2317 
2318   rhs1 = gimple_assign_rhs1 (stmt);
2319 
2320   if (find_bswap_or_nop_load (stmt, rhs1, n))
2321     return stmt;
2322 
2323   /* Handle BIT_FIELD_REF.  */
2324   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2325       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2326     {
2327       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
2328       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
2329       if (bitpos % BITS_PER_UNIT == 0
2330 	  && bitsize % BITS_PER_UNIT == 0
2331 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
2332 	{
2333 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
2334 	  if (BYTES_BIG_ENDIAN)
2335 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
2336 
2337 	  /* Shift.  */
2338 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
2339 	    return NULL;
2340 
2341 	  /* Mask.  */
2342 	  uint64_t mask = 0;
2343 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2344 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
2345 	       i++, tmp <<= BITS_PER_UNIT)
2346 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2347 	  n->n &= mask;
2348 
2349 	  /* Convert.  */
2350 	  n->type = TREE_TYPE (rhs1);
2351 	  if (!n->base_addr)
2352 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2353 
2354 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
2355 	}
2356 
2357       return NULL;
2358     }
2359 
2360   if (TREE_CODE (rhs1) != SSA_NAME)
2361     return NULL;
2362 
2363   code = gimple_assign_rhs_code (stmt);
2364   rhs_class = gimple_assign_rhs_class (stmt);
2365   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2366 
2367   if (rhs_class == GIMPLE_BINARY_RHS)
2368     rhs2 = gimple_assign_rhs2 (stmt);
2369 
2370   /* Handle unary rhs and binary rhs with integer constants as second
2371      operand.  */
2372 
2373   if (rhs_class == GIMPLE_UNARY_RHS
2374       || (rhs_class == GIMPLE_BINARY_RHS
2375 	  && TREE_CODE (rhs2) == INTEGER_CST))
2376     {
2377       if (code != BIT_AND_EXPR
2378 	  && code != LSHIFT_EXPR
2379 	  && code != RSHIFT_EXPR
2380 	  && code != LROTATE_EXPR
2381 	  && code != RROTATE_EXPR
2382 	  && !CONVERT_EXPR_CODE_P (code))
2383 	return NULL;
2384 
2385       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2386 
2387       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2388 	 we have to initialize the symbolic number.  */
2389       if (!source_stmt1)
2390 	{
2391 	  if (gimple_assign_load_p (stmt)
2392 	      || !init_symbolic_number (n, rhs1))
2393 	    return NULL;
2394 	  source_stmt1 = stmt;
2395 	}
2396 
2397       switch (code)
2398 	{
2399 	case BIT_AND_EXPR:
2400 	  {
2401 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2402 	    uint64_t val = int_cst_value (rhs2), mask = 0;
2403 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2404 
2405 	    /* Only constants masking full bytes are allowed.  */
2406 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2407 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
2408 		return NULL;
2409 	      else if (val & tmp)
2410 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2411 
2412 	    n->n &= mask;
2413 	  }
2414 	  break;
2415 	case LSHIFT_EXPR:
2416 	case RSHIFT_EXPR:
2417 	case LROTATE_EXPR:
2418 	case RROTATE_EXPR:
2419 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2420 	    return NULL;
2421 	  break;
2422 	CASE_CONVERT:
2423 	  {
2424 	    int i, type_size, old_type_size;
2425 	    tree type;
2426 
2427 	    type = gimple_expr_type (stmt);
2428 	    type_size = TYPE_PRECISION (type);
2429 	    if (type_size % BITS_PER_UNIT != 0)
2430 	      return NULL;
2431 	    type_size /= BITS_PER_UNIT;
2432 	    if (type_size > 64 / BITS_PER_MARKER)
2433 	      return NULL;
2434 
2435 	    /* Sign extension: result is dependent on the value.  */
2436 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2437 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2438 		&& HEAD_MARKER (n->n, old_type_size))
2439 	      for (i = 0; i < type_size - old_type_size; i++)
2440 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2441 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
2442 
2443 	    if (type_size < 64 / BITS_PER_MARKER)
2444 	      {
2445 		/* If STMT casts to a smaller type mask out the bits not
2446 		   belonging to the target type.  */
2447 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2448 	      }
2449 	    n->type = type;
2450 	    if (!n->base_addr)
2451 	      n->range = type_size;
2452 	  }
2453 	  break;
2454 	default:
2455 	  return NULL;
2456 	};
2457       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2458     }
2459 
2460   /* Handle binary rhs.  */
2461 
2462   if (rhs_class == GIMPLE_BINARY_RHS)
2463     {
2464       struct symbolic_number n1, n2;
2465       gimple *source_stmt, *source_stmt2;
2466 
2467       if (code != BIT_IOR_EXPR)
2468 	return NULL;
2469 
2470       if (TREE_CODE (rhs2) != SSA_NAME)
2471 	return NULL;
2472 
2473       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2474 
2475       switch (code)
2476 	{
2477 	case BIT_IOR_EXPR:
2478 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2479 
2480 	  if (!source_stmt1)
2481 	    return NULL;
2482 
2483 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2484 
2485 	  if (!source_stmt2)
2486 	    return NULL;
2487 
2488 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2489 	    return NULL;
2490 
2491 	  if (!n1.vuse != !n2.vuse
2492 	      || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2493 	    return NULL;
2494 
2495 	  source_stmt
2496 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2497 
2498 	  if (!source_stmt)
2499 	    return NULL;
2500 
2501 	  if (!verify_symbolic_number_p (n, stmt))
2502 	    return NULL;
2503 
2504 	  break;
2505 	default:
2506 	  return NULL;
2507 	}
2508       return source_stmt;
2509     }
2510   return NULL;
2511 }
2512 
2513 /* Check if STMT completes a bswap implementation or a read in a given
2514    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2515    accordingly.  It also sets N to represent the kind of operations
2516    performed: size of the resulting expression and whether it works on
2517    a memory source, and if so alias-set and vuse.  At last, the
2518    function returns a stmt whose rhs's first tree is the source
2519    expression.  */
2520 
2521 static gimple *
2522 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
2523 {
2524   unsigned rsize;
2525   uint64_t tmpn, mask;
2526 /* The number which the find_bswap_or_nop_1 result should match in order
2527    to have a full byte swap.  The number is shifted to the right
2528    according to the size of the symbolic number before using it.  */
2529   uint64_t cmpxchg = CMPXCHG;
2530   uint64_t cmpnop = CMPNOP;
2531 
2532   gimple *ins_stmt;
2533   int limit;
2534 
2535   /* The last parameter determines the depth search limit.  It usually
2536      correlates directly to the number n of bytes to be touched.  We
2537      increase that number by log2(n) + 1 here in order to also
2538      cover signed -> unsigned conversions of the src operand as can be seen
2539      in libgcc, and for initial shift/and operation of the src operand.  */
2540   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2541   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2542   ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2543 
2544   if (!ins_stmt)
2545     return NULL;
2546 
2547   /* Find real size of result (highest non-zero byte).  */
2548   if (n->base_addr)
2549     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2550   else
2551     rsize = n->range;
2552 
2553   /* Zero out the bits corresponding to untouched bytes in original gimple
2554      expression.  */
2555   if (n->range < (int) sizeof (int64_t))
2556     {
2557       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2558       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2559       cmpnop &= mask;
2560     }
2561 
2562   /* Zero out the bits corresponding to unused bytes in the result of the
2563      gimple expression.  */
2564   if (rsize < n->range)
2565     {
2566       if (BYTES_BIG_ENDIAN)
2567 	{
2568 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2569 	  cmpxchg &= mask;
2570 	  cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
2571 	}
2572       else
2573 	{
2574 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2575 	  cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
2576 	  cmpnop &= mask;
2577 	}
2578       n->range = rsize;
2579     }
2580 
2581   /* A complete byte swap should make the symbolic number to start with
2582      the largest digit in the highest order byte. Unchanged symbolic
2583      number indicates a read with same endianness as target architecture.  */
2584   if (n->n == cmpnop)
2585     *bswap = false;
2586   else if (n->n == cmpxchg)
2587     *bswap = true;
2588   else
2589     return NULL;
2590 
2591   /* Useless bit manipulation performed by code.  */
2592   if (!n->base_addr && n->n == cmpnop)
2593     return NULL;
2594 
2595   n->range *= BITS_PER_UNIT;
2596   return ins_stmt;
2597 }
2598 
2599 namespace {
2600 
2601 const pass_data pass_data_optimize_bswap =
2602 {
2603   GIMPLE_PASS, /* type */
2604   "bswap", /* name */
2605   OPTGROUP_NONE, /* optinfo_flags */
2606   TV_NONE, /* tv_id */
2607   PROP_ssa, /* properties_required */
2608   0, /* properties_provided */
2609   0, /* properties_destroyed */
2610   0, /* todo_flags_start */
2611   0, /* todo_flags_finish */
2612 };
2613 
2614 class pass_optimize_bswap : public gimple_opt_pass
2615 {
2616 public:
2617   pass_optimize_bswap (gcc::context *ctxt)
2618     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2619   {}
2620 
2621   /* opt_pass methods: */
2622   virtual bool gate (function *)
2623     {
2624       return flag_expensive_optimizations && optimize;
2625     }
2626 
2627   virtual unsigned int execute (function *);
2628 
2629 }; // class pass_optimize_bswap
2630 
2631 /* Perform the bswap optimization: replace the expression computed in the rhs
2632    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2633    Which of these alternatives replace the rhs is given by N->base_addr (non
2634    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2635    load to perform are also given in N while the builtin bswap invoke is given
2636    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2637    load statements involved to construct the rhs in CUR_STMT and N->range gives
2638    the size of the rhs expression for maintaining some statistics.
2639 
2640    Note that if the replacement involve a load, CUR_STMT is moved just after
2641    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2642    changing of basic block.  */
2643 
2644 static bool
2645 bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
2646 	       tree bswap_type, tree load_type, struct symbolic_number *n,
2647 	       bool bswap)
2648 {
2649   gimple_stmt_iterator gsi;
2650   tree src, tmp, tgt;
2651   gimple *bswap_stmt;
2652 
2653   gsi = gsi_for_stmt (cur_stmt);
2654   src = n->src;
2655   tgt = gimple_assign_lhs (cur_stmt);
2656 
2657   /* Need to load the value from memory first.  */
2658   if (n->base_addr)
2659     {
2660       gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
2661       tree addr_expr, addr_tmp, val_expr, val_tmp;
2662       tree load_offset_ptr, aligned_load_type;
2663       gimple *addr_stmt, *load_stmt;
2664       unsigned align;
2665       HOST_WIDE_INT load_offset = 0;
2666       basic_block ins_bb, cur_bb;
2667 
2668       ins_bb = gimple_bb (ins_stmt);
2669       cur_bb = gimple_bb (cur_stmt);
2670       if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
2671 	return false;
2672 
2673       align = get_object_alignment (src);
2674 
2675       /* Move cur_stmt just before  one of the load of the original
2676 	 to ensure it has the same VUSE.  See PR61517 for what could
2677 	 go wrong.  */
2678       if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
2679 	reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
2680       gsi_move_before (&gsi, &gsi_ins);
2681       gsi = gsi_for_stmt (cur_stmt);
2682 
2683       /* Compute address to load from and cast according to the size
2684 	 of the load.  */
2685       addr_expr = build_fold_addr_expr (unshare_expr (src));
2686       if (is_gimple_mem_ref_addr (addr_expr))
2687 	addr_tmp = addr_expr;
2688       else
2689 	{
2690 	  addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2691 					 "load_src");
2692 	  addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2693 	  gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2694 	}
2695 
2696       /* Perform the load.  */
2697       aligned_load_type = load_type;
2698       if (align < TYPE_ALIGN (load_type))
2699 	aligned_load_type = build_aligned_type (load_type, align);
2700       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2701       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2702 			      load_offset_ptr);
2703 
2704       if (!bswap)
2705 	{
2706 	  if (n->range == 16)
2707 	    nop_stats.found_16bit++;
2708 	  else if (n->range == 32)
2709 	    nop_stats.found_32bit++;
2710 	  else
2711 	    {
2712 	      gcc_assert (n->range == 64);
2713 	      nop_stats.found_64bit++;
2714 	    }
2715 
2716 	  /* Convert the result of load if necessary.  */
2717 	  if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2718 	    {
2719 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2720 					    "load_dst");
2721 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
2722 	      gimple_set_vuse (load_stmt, n->vuse);
2723 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2724 	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2725 	    }
2726 	  else
2727 	    {
2728 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2729 	      gimple_set_vuse (cur_stmt, n->vuse);
2730 	    }
2731 	  update_stmt (cur_stmt);
2732 
2733 	  if (dump_file)
2734 	    {
2735 	      fprintf (dump_file,
2736 		       "%d bit load in target endianness found at: ",
2737 		       (int) n->range);
2738 	      print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2739 	    }
2740 	  return true;
2741 	}
2742       else
2743 	{
2744 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2745 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
2746 	  gimple_set_vuse (load_stmt, n->vuse);
2747 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2748 	}
2749       src = val_tmp;
2750     }
2751   else if (TREE_CODE (src) == BIT_FIELD_REF)
2752     src = TREE_OPERAND (src, 0);
2753 
2754   if (n->range == 16)
2755     bswap_stats.found_16bit++;
2756   else if (n->range == 32)
2757     bswap_stats.found_32bit++;
2758   else
2759     {
2760       gcc_assert (n->range == 64);
2761       bswap_stats.found_64bit++;
2762     }
2763 
2764   tmp = src;
2765 
2766   /* Convert the src expression if necessary.  */
2767   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2768     {
2769       gimple *convert_stmt;
2770 
2771       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2772       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2773       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2774     }
2775 
2776   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2777      are considered as rotation of 2N bit values by N bits is generally not
2778      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2779      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2780   if (bswap && n->range == 16)
2781     {
2782       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2783       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2784       bswap_stmt = gimple_build_assign (NULL, src);
2785     }
2786   else
2787     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2788 
2789   tmp = tgt;
2790 
2791   /* Convert the result if necessary.  */
2792   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2793     {
2794       gimple *convert_stmt;
2795 
2796       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2797       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2798       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2799     }
2800 
2801   gimple_set_lhs (bswap_stmt, tmp);
2802 
2803   if (dump_file)
2804     {
2805       fprintf (dump_file, "%d bit bswap implementation found at: ",
2806 	       (int) n->range);
2807       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2808     }
2809 
2810   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2811   gsi_remove (&gsi, true);
2812   return true;
2813 }
2814 
2815 /* Find manual byte swap implementations as well as load in a given
2816    endianness. Byte swaps are turned into a bswap builtin invokation
2817    while endian loads are converted to bswap builtin invokation or
2818    simple load according to the target endianness.  */
2819 
2820 unsigned int
2821 pass_optimize_bswap::execute (function *fun)
2822 {
2823   basic_block bb;
2824   bool bswap32_p, bswap64_p;
2825   bool changed = false;
2826   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2827 
2828   if (BITS_PER_UNIT != 8)
2829     return 0;
2830 
2831   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2832 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2833   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2834 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2835 		   || (bswap32_p && word_mode == SImode)));
2836 
2837   /* Determine the argument type of the builtins.  The code later on
2838      assumes that the return and argument type are the same.  */
2839   if (bswap32_p)
2840     {
2841       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2842       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2843     }
2844 
2845   if (bswap64_p)
2846     {
2847       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2848       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2849     }
2850 
2851   memset (&nop_stats, 0, sizeof (nop_stats));
2852   memset (&bswap_stats, 0, sizeof (bswap_stats));
2853   calculate_dominance_info (CDI_DOMINATORS);
2854 
2855   FOR_EACH_BB_FN (bb, fun)
2856     {
2857       gimple_stmt_iterator gsi;
2858 
2859       /* We do a reverse scan for bswap patterns to make sure we get the
2860 	 widest match. As bswap pattern matching doesn't handle previously
2861 	 inserted smaller bswap replacements as sub-patterns, the wider
2862 	 variant wouldn't be detected.  */
2863       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2864         {
2865 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
2866 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2867 	  enum tree_code code;
2868 	  struct symbolic_number n;
2869 	  bool bswap;
2870 
2871 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2872 	     might be moved to a different basic block by bswap_replace and gsi
2873 	     must not points to it if that's the case.  Moving the gsi_prev
2874 	     there make sure that gsi points to the statement previous to
2875 	     cur_stmt while still making sure that all statements are
2876 	     considered in this basic block.  */
2877 	  gsi_prev (&gsi);
2878 
2879 	  if (!is_gimple_assign (cur_stmt))
2880 	    continue;
2881 
2882 	  code = gimple_assign_rhs_code (cur_stmt);
2883 	  switch (code)
2884 	    {
2885 	    case LROTATE_EXPR:
2886 	    case RROTATE_EXPR:
2887 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2888 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2889 		     % BITS_PER_UNIT)
2890 		continue;
2891 	      /* Fall through.  */
2892 	    case BIT_IOR_EXPR:
2893 	      break;
2894 	    default:
2895 	      continue;
2896 	    }
2897 
2898 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2899 
2900 	  if (!ins_stmt)
2901 	    continue;
2902 
2903 	  switch (n.range)
2904 	    {
2905 	    case 16:
2906 	      /* Already in canonical form, nothing to do.  */
2907 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2908 		continue;
2909 	      load_type = bswap_type = uint16_type_node;
2910 	      break;
2911 	    case 32:
2912 	      load_type = uint32_type_node;
2913 	      if (bswap32_p)
2914 		{
2915 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2916 		  bswap_type = bswap32_type;
2917 		}
2918 	      break;
2919 	    case 64:
2920 	      load_type = uint64_type_node;
2921 	      if (bswap64_p)
2922 		{
2923 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2924 		  bswap_type = bswap64_type;
2925 		}
2926 	      break;
2927 	    default:
2928 	      continue;
2929 	    }
2930 
2931 	  if (bswap && !fndecl && n.range != 16)
2932 	    continue;
2933 
2934 	  if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
2935 			     &n, bswap))
2936 	    changed = true;
2937 	}
2938     }
2939 
2940   statistics_counter_event (fun, "16-bit nop implementations found",
2941 			    nop_stats.found_16bit);
2942   statistics_counter_event (fun, "32-bit nop implementations found",
2943 			    nop_stats.found_32bit);
2944   statistics_counter_event (fun, "64-bit nop implementations found",
2945 			    nop_stats.found_64bit);
2946   statistics_counter_event (fun, "16-bit bswap implementations found",
2947 			    bswap_stats.found_16bit);
2948   statistics_counter_event (fun, "32-bit bswap implementations found",
2949 			    bswap_stats.found_32bit);
2950   statistics_counter_event (fun, "64-bit bswap implementations found",
2951 			    bswap_stats.found_64bit);
2952 
2953   return (changed ? TODO_update_ssa : 0);
2954 }
2955 
2956 } // anon namespace
2957 
2958 gimple_opt_pass *
2959 make_pass_optimize_bswap (gcc::context *ctxt)
2960 {
2961   return new pass_optimize_bswap (ctxt);
2962 }
2963 
2964 /* Return true if stmt is a type conversion operation that can be stripped
2965    when used in a widening multiply operation.  */
2966 static bool
2967 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2968 {
2969   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2970 
2971   if (TREE_CODE (result_type) == INTEGER_TYPE)
2972     {
2973       tree op_type;
2974       tree inner_op_type;
2975 
2976       if (!CONVERT_EXPR_CODE_P (rhs_code))
2977 	return false;
2978 
2979       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2980 
2981       /* If the type of OP has the same precision as the result, then
2982 	 we can strip this conversion.  The multiply operation will be
2983 	 selected to create the correct extension as a by-product.  */
2984       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2985 	return true;
2986 
2987       /* We can also strip a conversion if it preserves the signed-ness of
2988 	 the operation and doesn't narrow the range.  */
2989       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2990 
2991       /* If the inner-most type is unsigned, then we can strip any
2992 	 intermediate widening operation.  If it's signed, then the
2993 	 intermediate widening operation must also be signed.  */
2994       if ((TYPE_UNSIGNED (inner_op_type)
2995 	   || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2996 	  && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2997 	return true;
2998 
2999       return false;
3000     }
3001 
3002   return rhs_code == FIXED_CONVERT_EXPR;
3003 }
3004 
3005 /* Return true if RHS is a suitable operand for a widening multiplication,
3006    assuming a target type of TYPE.
3007    There are two cases:
3008 
3009      - RHS makes some value at least twice as wide.  Store that value
3010        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
3011 
3012      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
3013        but leave *TYPE_OUT untouched.  */
3014 
3015 static bool
3016 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
3017 			tree *new_rhs_out)
3018 {
3019   gimple *stmt;
3020   tree type1, rhs1;
3021 
3022   if (TREE_CODE (rhs) == SSA_NAME)
3023     {
3024       stmt = SSA_NAME_DEF_STMT (rhs);
3025       if (is_gimple_assign (stmt))
3026 	{
3027 	  if (! widening_mult_conversion_strippable_p (type, stmt))
3028 	    rhs1 = rhs;
3029 	  else
3030 	    {
3031 	      rhs1 = gimple_assign_rhs1 (stmt);
3032 
3033 	      if (TREE_CODE (rhs1) == INTEGER_CST)
3034 		{
3035 		  *new_rhs_out = rhs1;
3036 		  *type_out = NULL;
3037 		  return true;
3038 		}
3039 	    }
3040 	}
3041       else
3042 	rhs1 = rhs;
3043 
3044       type1 = TREE_TYPE (rhs1);
3045 
3046       if (TREE_CODE (type1) != TREE_CODE (type)
3047 	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
3048 	return false;
3049 
3050       *new_rhs_out = rhs1;
3051       *type_out = type1;
3052       return true;
3053     }
3054 
3055   if (TREE_CODE (rhs) == INTEGER_CST)
3056     {
3057       *new_rhs_out = rhs;
3058       *type_out = NULL;
3059       return true;
3060     }
3061 
3062   return false;
3063 }
3064 
3065 /* Return true if STMT performs a widening multiplication, assuming the
3066    output type is TYPE.  If so, store the unwidened types of the operands
3067    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
3068    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
3069    and *TYPE2_OUT would give the operands of the multiplication.  */
3070 
3071 static bool
3072 is_widening_mult_p (gimple *stmt,
3073 		    tree *type1_out, tree *rhs1_out,
3074 		    tree *type2_out, tree *rhs2_out)
3075 {
3076   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3077 
3078   if (TREE_CODE (type) == INTEGER_TYPE)
3079     {
3080       if (TYPE_OVERFLOW_TRAPS (type))
3081 	return false;
3082     }
3083   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
3084     return false;
3085 
3086   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
3087 			       rhs1_out))
3088     return false;
3089 
3090   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
3091 			       rhs2_out))
3092     return false;
3093 
3094   if (*type1_out == NULL)
3095     {
3096       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
3097 	return false;
3098       *type1_out = *type2_out;
3099     }
3100 
3101   if (*type2_out == NULL)
3102     {
3103       if (!int_fits_type_p (*rhs2_out, *type1_out))
3104 	return false;
3105       *type2_out = *type1_out;
3106     }
3107 
3108   /* Ensure that the larger of the two operands comes first. */
3109   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3110     {
3111       std::swap (*type1_out, *type2_out);
3112       std::swap (*rhs1_out, *rhs2_out);
3113     }
3114 
3115   return true;
3116 }
3117 
3118 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3119    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3120    value is true iff we converted the statement.  */
3121 
3122 static bool
3123 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
3124 {
3125   tree lhs, rhs1, rhs2, type, type1, type2;
3126   enum insn_code handler;
3127   machine_mode to_mode, from_mode, actual_mode;
3128   optab op;
3129   int actual_precision;
3130   location_t loc = gimple_location (stmt);
3131   bool from_unsigned1, from_unsigned2;
3132 
3133   lhs = gimple_assign_lhs (stmt);
3134   type = TREE_TYPE (lhs);
3135   if (TREE_CODE (type) != INTEGER_TYPE)
3136     return false;
3137 
3138   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3139     return false;
3140 
3141   to_mode = TYPE_MODE (type);
3142   from_mode = TYPE_MODE (type1);
3143   from_unsigned1 = TYPE_UNSIGNED (type1);
3144   from_unsigned2 = TYPE_UNSIGNED (type2);
3145 
3146   if (from_unsigned1 && from_unsigned2)
3147     op = umul_widen_optab;
3148   else if (!from_unsigned1 && !from_unsigned2)
3149     op = smul_widen_optab;
3150   else
3151     op = usmul_widen_optab;
3152 
3153   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3154 						  0, &actual_mode);
3155 
3156   if (handler == CODE_FOR_nothing)
3157     {
3158       if (op != smul_widen_optab)
3159 	{
3160 	  /* We can use a signed multiply with unsigned types as long as
3161 	     there is a wider mode to use, or it is the smaller of the two
3162 	     types that is unsigned.  Note that type1 >= type2, always.  */
3163 	  if ((TYPE_UNSIGNED (type1)
3164 	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3165 	      || (TYPE_UNSIGNED (type2)
3166 		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3167 	    {
3168 	      from_mode = GET_MODE_WIDER_MODE (from_mode);
3169 	      if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3170 		return false;
3171 	    }
3172 
3173 	  op = smul_widen_optab;
3174 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
3175 							  from_mode, 0,
3176 							  &actual_mode);
3177 
3178 	  if (handler == CODE_FOR_nothing)
3179 	    return false;
3180 
3181 	  from_unsigned1 = from_unsigned2 = false;
3182 	}
3183       else
3184 	return false;
3185     }
3186 
3187   /* Ensure that the inputs to the handler are in the correct precison
3188      for the opcode.  This will be the full mode size.  */
3189   actual_precision = GET_MODE_PRECISION (actual_mode);
3190   if (2 * actual_precision > TYPE_PRECISION (type))
3191     return false;
3192   if (actual_precision != TYPE_PRECISION (type1)
3193       || from_unsigned1 != TYPE_UNSIGNED (type1))
3194     rhs1 = build_and_insert_cast (gsi, loc,
3195 				  build_nonstandard_integer_type
3196 				    (actual_precision, from_unsigned1), rhs1);
3197   if (actual_precision != TYPE_PRECISION (type2)
3198       || from_unsigned2 != TYPE_UNSIGNED (type2))
3199     rhs2 = build_and_insert_cast (gsi, loc,
3200 				  build_nonstandard_integer_type
3201 				    (actual_precision, from_unsigned2), rhs2);
3202 
3203   /* Handle constants.  */
3204   if (TREE_CODE (rhs1) == INTEGER_CST)
3205     rhs1 = fold_convert (type1, rhs1);
3206   if (TREE_CODE (rhs2) == INTEGER_CST)
3207     rhs2 = fold_convert (type2, rhs2);
3208 
3209   gimple_assign_set_rhs1 (stmt, rhs1);
3210   gimple_assign_set_rhs2 (stmt, rhs2);
3211   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3212   update_stmt (stmt);
3213   widen_mul_stats.widen_mults_inserted++;
3214   return true;
3215 }
3216 
3217 /* Process a single gimple statement STMT, which is found at the
3218    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3219    rhs (given by CODE), and try to convert it into a
3220    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3221    is true iff we converted the statement.  */
3222 
3223 static bool
3224 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
3225 			    enum tree_code code)
3226 {
3227   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
3228   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
3229   tree type, type1, type2, optype;
3230   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3231   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3232   optab this_optab;
3233   enum tree_code wmult_code;
3234   enum insn_code handler;
3235   machine_mode to_mode, from_mode, actual_mode;
3236   location_t loc = gimple_location (stmt);
3237   int actual_precision;
3238   bool from_unsigned1, from_unsigned2;
3239 
3240   lhs = gimple_assign_lhs (stmt);
3241   type = TREE_TYPE (lhs);
3242   if (TREE_CODE (type) != INTEGER_TYPE
3243       && TREE_CODE (type) != FIXED_POINT_TYPE)
3244     return false;
3245 
3246   if (code == MINUS_EXPR)
3247     wmult_code = WIDEN_MULT_MINUS_EXPR;
3248   else
3249     wmult_code = WIDEN_MULT_PLUS_EXPR;
3250 
3251   rhs1 = gimple_assign_rhs1 (stmt);
3252   rhs2 = gimple_assign_rhs2 (stmt);
3253 
3254   if (TREE_CODE (rhs1) == SSA_NAME)
3255     {
3256       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3257       if (is_gimple_assign (rhs1_stmt))
3258 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3259     }
3260 
3261   if (TREE_CODE (rhs2) == SSA_NAME)
3262     {
3263       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3264       if (is_gimple_assign (rhs2_stmt))
3265 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3266     }
3267 
3268   /* Allow for one conversion statement between the multiply
3269      and addition/subtraction statement.  If there are more than
3270      one conversions then we assume they would invalidate this
3271      transformation.  If that's not the case then they should have
3272      been folded before now.  */
3273   if (CONVERT_EXPR_CODE_P (rhs1_code))
3274     {
3275       conv1_stmt = rhs1_stmt;
3276       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3277       if (TREE_CODE (rhs1) == SSA_NAME)
3278 	{
3279 	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3280 	  if (is_gimple_assign (rhs1_stmt))
3281 	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3282 	}
3283       else
3284 	return false;
3285     }
3286   if (CONVERT_EXPR_CODE_P (rhs2_code))
3287     {
3288       conv2_stmt = rhs2_stmt;
3289       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3290       if (TREE_CODE (rhs2) == SSA_NAME)
3291 	{
3292 	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3293 	  if (is_gimple_assign (rhs2_stmt))
3294 	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3295 	}
3296       else
3297 	return false;
3298     }
3299 
3300   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3301      is_widening_mult_p, but we still need the rhs returns.
3302 
3303      It might also appear that it would be sufficient to use the existing
3304      operands of the widening multiply, but that would limit the choice of
3305      multiply-and-accumulate instructions.
3306 
3307      If the widened-multiplication result has more than one uses, it is
3308      probably wiser not to do the conversion.  */
3309   if (code == PLUS_EXPR
3310       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3311     {
3312       if (!has_single_use (rhs1)
3313 	  || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3314 				  &type2, &mult_rhs2))
3315 	return false;
3316       add_rhs = rhs2;
3317       conv_stmt = conv1_stmt;
3318     }
3319   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3320     {
3321       if (!has_single_use (rhs2)
3322 	  || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3323 				  &type2, &mult_rhs2))
3324 	return false;
3325       add_rhs = rhs1;
3326       conv_stmt = conv2_stmt;
3327     }
3328   else
3329     return false;
3330 
3331   to_mode = TYPE_MODE (type);
3332   from_mode = TYPE_MODE (type1);
3333   from_unsigned1 = TYPE_UNSIGNED (type1);
3334   from_unsigned2 = TYPE_UNSIGNED (type2);
3335   optype = type1;
3336 
3337   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3338   if (from_unsigned1 != from_unsigned2)
3339     {
3340       if (!INTEGRAL_TYPE_P (type))
3341 	return false;
3342       /* We can use a signed multiply with unsigned types as long as
3343 	 there is a wider mode to use, or it is the smaller of the two
3344 	 types that is unsigned.  Note that type1 >= type2, always.  */
3345       if ((from_unsigned1
3346 	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3347 	  || (from_unsigned2
3348 	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3349 	{
3350 	  from_mode = GET_MODE_WIDER_MODE (from_mode);
3351 	  if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3352 	    return false;
3353 	}
3354 
3355       from_unsigned1 = from_unsigned2 = false;
3356       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3357 					       false);
3358     }
3359 
3360   /* If there was a conversion between the multiply and addition
3361      then we need to make sure it fits a multiply-and-accumulate.
3362      The should be a single mode change which does not change the
3363      value.  */
3364   if (conv_stmt)
3365     {
3366       /* We use the original, unmodified data types for this.  */
3367       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3368       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3369       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3370       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3371 
3372       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3373 	{
3374 	  /* Conversion is a truncate.  */
3375 	  if (TYPE_PRECISION (to_type) < data_size)
3376 	    return false;
3377 	}
3378       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3379 	{
3380 	  /* Conversion is an extend.  Check it's the right sort.  */
3381 	  if (TYPE_UNSIGNED (from_type) != is_unsigned
3382 	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3383 	    return false;
3384 	}
3385       /* else convert is a no-op for our purposes.  */
3386     }
3387 
3388   /* Verify that the machine can perform a widening multiply
3389      accumulate in this mode/signedness combination, otherwise
3390      this transformation is likely to pessimize code.  */
3391   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3392   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3393 						  from_mode, 0, &actual_mode);
3394 
3395   if (handler == CODE_FOR_nothing)
3396     return false;
3397 
3398   /* Ensure that the inputs to the handler are in the correct precison
3399      for the opcode.  This will be the full mode size.  */
3400   actual_precision = GET_MODE_PRECISION (actual_mode);
3401   if (actual_precision != TYPE_PRECISION (type1)
3402       || from_unsigned1 != TYPE_UNSIGNED (type1))
3403     mult_rhs1 = build_and_insert_cast (gsi, loc,
3404 				       build_nonstandard_integer_type
3405 				         (actual_precision, from_unsigned1),
3406 				       mult_rhs1);
3407   if (actual_precision != TYPE_PRECISION (type2)
3408       || from_unsigned2 != TYPE_UNSIGNED (type2))
3409     mult_rhs2 = build_and_insert_cast (gsi, loc,
3410 				       build_nonstandard_integer_type
3411 					 (actual_precision, from_unsigned2),
3412 				       mult_rhs2);
3413 
3414   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3415     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3416 
3417   /* Handle constants.  */
3418   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3419     mult_rhs1 = fold_convert (type1, mult_rhs1);
3420   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3421     mult_rhs2 = fold_convert (type2, mult_rhs2);
3422 
3423   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3424 				  add_rhs);
3425   update_stmt (gsi_stmt (*gsi));
3426   widen_mul_stats.maccs_inserted++;
3427   return true;
3428 }
3429 
3430 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3431    with uses in additions and subtractions to form fused multiply-add
3432    operations.  Returns true if successful and MUL_STMT should be removed.  */
3433 
3434 static bool
3435 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
3436 {
3437   tree mul_result = gimple_get_lhs (mul_stmt);
3438   tree type = TREE_TYPE (mul_result);
3439   gimple *use_stmt, *neguse_stmt;
3440   gassign *fma_stmt;
3441   use_operand_p use_p;
3442   imm_use_iterator imm_iter;
3443 
3444   if (FLOAT_TYPE_P (type)
3445       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3446     return false;
3447 
3448   /* We don't want to do bitfield reduction ops.  */
3449   if (INTEGRAL_TYPE_P (type)
3450       && (TYPE_PRECISION (type)
3451 	  != GET_MODE_PRECISION (TYPE_MODE (type))
3452 	  || TYPE_OVERFLOW_TRAPS (type)))
3453     return false;
3454 
3455   /* If the target doesn't support it, don't generate it.  We assume that
3456      if fma isn't available then fms, fnma or fnms are not either.  */
3457   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3458     return false;
3459 
3460   /* If the multiplication has zero uses, it is kept around probably because
3461      of -fnon-call-exceptions.  Don't optimize it away in that case,
3462      it is DCE job.  */
3463   if (has_zero_uses (mul_result))
3464     return false;
3465 
3466   /* Make sure that the multiplication statement becomes dead after
3467      the transformation, thus that all uses are transformed to FMAs.
3468      This means we assume that an FMA operation has the same cost
3469      as an addition.  */
3470   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3471     {
3472       enum tree_code use_code;
3473       tree result = mul_result;
3474       bool negate_p = false;
3475 
3476       use_stmt = USE_STMT (use_p);
3477 
3478       if (is_gimple_debug (use_stmt))
3479 	continue;
3480 
3481       /* For now restrict this operations to single basic blocks.  In theory
3482 	 we would want to support sinking the multiplication in
3483 	 m = a*b;
3484 	 if ()
3485 	   ma = m + c;
3486 	 else
3487 	   d = m;
3488 	 to form a fma in the then block and sink the multiplication to the
3489 	 else block.  */
3490       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3491 	return false;
3492 
3493       if (!is_gimple_assign (use_stmt))
3494 	return false;
3495 
3496       use_code = gimple_assign_rhs_code (use_stmt);
3497 
3498       /* A negate on the multiplication leads to FNMA.  */
3499       if (use_code == NEGATE_EXPR)
3500 	{
3501 	  ssa_op_iter iter;
3502 	  use_operand_p usep;
3503 
3504 	  result = gimple_assign_lhs (use_stmt);
3505 
3506 	  /* Make sure the negate statement becomes dead with this
3507 	     single transformation.  */
3508 	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
3509 			       &use_p, &neguse_stmt))
3510 	    return false;
3511 
3512 	  /* Make sure the multiplication isn't also used on that stmt.  */
3513 	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3514 	    if (USE_FROM_PTR (usep) == mul_result)
3515 	      return false;
3516 
3517 	  /* Re-validate.  */
3518 	  use_stmt = neguse_stmt;
3519 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3520 	    return false;
3521 	  if (!is_gimple_assign (use_stmt))
3522 	    return false;
3523 
3524 	  use_code = gimple_assign_rhs_code (use_stmt);
3525 	  negate_p = true;
3526 	}
3527 
3528       switch (use_code)
3529 	{
3530 	case MINUS_EXPR:
3531 	  if (gimple_assign_rhs2 (use_stmt) == result)
3532 	    negate_p = !negate_p;
3533 	  break;
3534 	case PLUS_EXPR:
3535 	  break;
3536 	default:
3537 	  /* FMA can only be formed from PLUS and MINUS.  */
3538 	  return false;
3539 	}
3540 
3541       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3542 	 by a MULT_EXPR that we'll visit later, we might be able to
3543 	 get a more profitable match with fnma.
3544 	 OTOH, if we don't, a negate / fma pair has likely lower latency
3545 	 that a mult / subtract pair.  */
3546       if (use_code == MINUS_EXPR && !negate_p
3547 	  && gimple_assign_rhs1 (use_stmt) == result
3548 	  && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3549 	  && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3550 	{
3551 	  tree rhs2 = gimple_assign_rhs2 (use_stmt);
3552 
3553 	  if (TREE_CODE (rhs2) == SSA_NAME)
3554 	    {
3555 	      gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
3556 	      if (has_single_use (rhs2)
3557 		  && is_gimple_assign (stmt2)
3558 		  && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3559 	      return false;
3560 	    }
3561 	}
3562 
3563       /* We can't handle a * b + a * b.  */
3564       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3565 	return false;
3566 
3567       /* While it is possible to validate whether or not the exact form
3568 	 that we've recognized is available in the backend, the assumption
3569 	 is that the transformation is never a loss.  For instance, suppose
3570 	 the target only has the plain FMA pattern available.  Consider
3571 	 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3572 	 is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3573 	 still have 3 operations, but in the FMA form the two NEGs are
3574 	 independent and could be run in parallel.  */
3575     }
3576 
3577   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3578     {
3579       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3580       enum tree_code use_code;
3581       tree addop, mulop1 = op1, result = mul_result;
3582       bool negate_p = false;
3583 
3584       if (is_gimple_debug (use_stmt))
3585 	continue;
3586 
3587       use_code = gimple_assign_rhs_code (use_stmt);
3588       if (use_code == NEGATE_EXPR)
3589 	{
3590 	  result = gimple_assign_lhs (use_stmt);
3591 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3592 	  gsi_remove (&gsi, true);
3593 	  release_defs (use_stmt);
3594 
3595 	  use_stmt = neguse_stmt;
3596 	  gsi = gsi_for_stmt (use_stmt);
3597 	  use_code = gimple_assign_rhs_code (use_stmt);
3598 	  negate_p = true;
3599 	}
3600 
3601       if (gimple_assign_rhs1 (use_stmt) == result)
3602 	{
3603 	  addop = gimple_assign_rhs2 (use_stmt);
3604 	  /* a * b - c -> a * b + (-c)  */
3605 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3606 	    addop = force_gimple_operand_gsi (&gsi,
3607 					      build1 (NEGATE_EXPR,
3608 						      type, addop),
3609 					      true, NULL_TREE, true,
3610 					      GSI_SAME_STMT);
3611 	}
3612       else
3613 	{
3614 	  addop = gimple_assign_rhs1 (use_stmt);
3615 	  /* a - b * c -> (-b) * c + a */
3616 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3617 	    negate_p = !negate_p;
3618 	}
3619 
3620       if (negate_p)
3621 	mulop1 = force_gimple_operand_gsi (&gsi,
3622 					   build1 (NEGATE_EXPR,
3623 						   type, mulop1),
3624 					   true, NULL_TREE, true,
3625 					   GSI_SAME_STMT);
3626 
3627       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3628 				      FMA_EXPR, mulop1, op2, addop);
3629       gsi_replace (&gsi, fma_stmt, true);
3630       widen_mul_stats.fmas_inserted++;
3631     }
3632 
3633   return true;
3634 }
3635 
3636 
3637 /* Helper function of match_uaddsub_overflow.  Return 1
3638    if USE_STMT is unsigned overflow check ovf != 0 for
3639    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3640    and 0 otherwise.  */
3641 
3642 static int
3643 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3644 {
3645   enum tree_code ccode = ERROR_MARK;
3646   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3647   if (gimple_code (use_stmt) == GIMPLE_COND)
3648     {
3649       ccode = gimple_cond_code (use_stmt);
3650       crhs1 = gimple_cond_lhs (use_stmt);
3651       crhs2 = gimple_cond_rhs (use_stmt);
3652     }
3653   else if (is_gimple_assign (use_stmt))
3654     {
3655       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3656 	{
3657 	  ccode = gimple_assign_rhs_code (use_stmt);
3658 	  crhs1 = gimple_assign_rhs1 (use_stmt);
3659 	  crhs2 = gimple_assign_rhs2 (use_stmt);
3660 	}
3661       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3662 	{
3663 	  tree cond = gimple_assign_rhs1 (use_stmt);
3664 	  if (COMPARISON_CLASS_P (cond))
3665 	    {
3666 	      ccode = TREE_CODE (cond);
3667 	      crhs1 = TREE_OPERAND (cond, 0);
3668 	      crhs2 = TREE_OPERAND (cond, 1);
3669 	    }
3670 	  else
3671 	    return 0;
3672 	}
3673       else
3674 	return 0;
3675     }
3676   else
3677     return 0;
3678 
3679   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3680     return 0;
3681 
3682   enum tree_code code = gimple_assign_rhs_code (stmt);
3683   tree lhs = gimple_assign_lhs (stmt);
3684   tree rhs1 = gimple_assign_rhs1 (stmt);
3685   tree rhs2 = gimple_assign_rhs2 (stmt);
3686 
3687   switch (ccode)
3688     {
3689     case GT_EXPR:
3690     case LE_EXPR:
3691       /* r = a - b; r > a or r <= a
3692 	 r = a + b; a > r or a <= r or b > r or b <= r.  */
3693       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3694 	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3695 	      && crhs2 == lhs))
3696 	return ccode == GT_EXPR ? 1 : -1;
3697       break;
3698     case LT_EXPR:
3699     case GE_EXPR:
3700       /* r = a - b; a < r or a >= r
3701 	 r = a + b; r < a or r >= a or r < b or r >= b.  */
3702       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3703 	  || (code == PLUS_EXPR && crhs1 == lhs
3704 	      && (crhs2 == rhs1 || crhs2 == rhs2)))
3705 	return ccode == LT_EXPR ? 1 : -1;
3706       break;
3707     default:
3708       break;
3709     }
3710   return 0;
3711 }
3712 
3713 /* Recognize for unsigned x
3714    x = y - z;
3715    if (x > y)
3716    where there are other uses of x and replace it with
3717    _7 = SUB_OVERFLOW (y, z);
3718    x = REALPART_EXPR <_7>;
3719    _8 = IMAGPART_EXPR <_7>;
3720    if (_8)
3721    and similarly for addition.  */
3722 
3723 static bool
3724 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3725 			enum tree_code code)
3726 {
3727   tree lhs = gimple_assign_lhs (stmt);
3728   tree type = TREE_TYPE (lhs);
3729   use_operand_p use_p;
3730   imm_use_iterator iter;
3731   bool use_seen = false;
3732   bool ovf_use_seen = false;
3733   gimple *use_stmt;
3734 
3735   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3736   if (!INTEGRAL_TYPE_P (type)
3737       || !TYPE_UNSIGNED (type)
3738       || has_zero_uses (lhs)
3739       || has_single_use (lhs)
3740       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3741 			TYPE_MODE (type)) == CODE_FOR_nothing)
3742     return false;
3743 
3744   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3745     {
3746       use_stmt = USE_STMT (use_p);
3747       if (is_gimple_debug (use_stmt))
3748 	continue;
3749 
3750       if (uaddsub_overflow_check_p (stmt, use_stmt))
3751 	ovf_use_seen = true;
3752       else
3753 	use_seen = true;
3754       if (ovf_use_seen && use_seen)
3755 	break;
3756     }
3757 
3758   if (!ovf_use_seen || !use_seen)
3759     return false;
3760 
3761   tree ctype = build_complex_type (type);
3762   tree rhs1 = gimple_assign_rhs1 (stmt);
3763   tree rhs2 = gimple_assign_rhs2 (stmt);
3764   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3765 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3766 					 2, rhs1, rhs2);
3767   tree ctmp = make_ssa_name (ctype);
3768   gimple_call_set_lhs (g, ctmp);
3769   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3770   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3771 				     build1 (REALPART_EXPR, type, ctmp));
3772   gsi_replace (gsi, g2, true);
3773   tree ovf = make_ssa_name (type);
3774   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3775 			    build1 (IMAGPART_EXPR, type, ctmp));
3776   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3777 
3778   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3779     {
3780       if (is_gimple_debug (use_stmt))
3781 	continue;
3782 
3783       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3784       if (ovf_use == 0)
3785 	continue;
3786       if (gimple_code (use_stmt) == GIMPLE_COND)
3787 	{
3788 	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
3789 	  gimple_cond_set_lhs (cond_stmt, ovf);
3790 	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3791 	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3792 	}
3793       else
3794 	{
3795 	  gcc_checking_assert (is_gimple_assign (use_stmt));
3796 	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3797 	    {
3798 	      gimple_assign_set_rhs1 (use_stmt, ovf);
3799 	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3800 	      gimple_assign_set_rhs_code (use_stmt,
3801 					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3802 	    }
3803 	  else
3804 	    {
3805 	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3806 				   == COND_EXPR);
3807 	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3808 				  boolean_type_node, ovf,
3809 				  build_int_cst (type, 0));
3810 	      gimple_assign_set_rhs1 (use_stmt, cond);
3811 	    }
3812 	}
3813       update_stmt (use_stmt);
3814     }
3815   return true;
3816 }
3817 
3818 /* Return true if target has support for divmod.  */
3819 
3820 static bool
3821 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3822 {
3823   /* If target supports hardware divmod insn, use it for divmod.  */
3824   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3825     return true;
3826 
3827   /* Check if libfunc for divmod is available.  */
3828   rtx libfunc = optab_libfunc (divmod_optab, mode);
3829   if (libfunc != NULL_RTX)
3830     {
3831       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3832 	 we don't want to use the libfunc even if it exists for given mode.  */
3833       for (machine_mode div_mode = mode;
3834 	   div_mode != VOIDmode;
3835 	   div_mode = GET_MODE_WIDER_MODE (div_mode))
3836 	if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3837 	  return false;
3838 
3839       return targetm.expand_divmod_libfunc != NULL;
3840     }
3841 
3842   return false;
3843 }
3844 
3845 /* Check if stmt is candidate for divmod transform.  */
3846 
3847 static bool
3848 divmod_candidate_p (gassign *stmt)
3849 {
3850   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3851   enum machine_mode mode = TYPE_MODE (type);
3852   optab divmod_optab, div_optab;
3853 
3854   if (TYPE_UNSIGNED (type))
3855     {
3856       divmod_optab = udivmod_optab;
3857       div_optab = udiv_optab;
3858     }
3859   else
3860     {
3861       divmod_optab = sdivmod_optab;
3862       div_optab = sdiv_optab;
3863     }
3864 
3865   tree op1 = gimple_assign_rhs1 (stmt);
3866   tree op2 = gimple_assign_rhs2 (stmt);
3867 
3868   /* Disable the transform if either is a constant, since division-by-constant
3869      may have specialized expansion.  */
3870   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3871     return false;
3872 
3873   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3874      expand using the [su]divv optabs.  */
3875   if (TYPE_OVERFLOW_TRAPS (type))
3876     return false;
3877 
3878   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3879     return false;
3880 
3881   return true;
3882 }
3883 
3884 /* This function looks for:
3885    t1 = a TRUNC_DIV_EXPR b;
3886    t2 = a TRUNC_MOD_EXPR b;
3887    and transforms it to the following sequence:
3888    complex_tmp = DIVMOD (a, b);
3889    t1 = REALPART_EXPR(a);
3890    t2 = IMAGPART_EXPR(b);
3891    For conditions enabling the transform see divmod_candidate_p().
3892 
3893    The pass has three parts:
3894    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3895       other trunc_div_expr and trunc_mod_expr stmts.
3896    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3897       to stmts vector.
3898    3) Insert DIVMOD call just before top_stmt and update entries in
3899       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3900       IMAGPART_EXPR for mod).  */
3901 
3902 static bool
3903 convert_to_divmod (gassign *stmt)
3904 {
3905   if (stmt_can_throw_internal (stmt)
3906       || !divmod_candidate_p (stmt))
3907     return false;
3908 
3909   tree op1 = gimple_assign_rhs1 (stmt);
3910   tree op2 = gimple_assign_rhs2 (stmt);
3911 
3912   imm_use_iterator use_iter;
3913   gimple *use_stmt;
3914   auto_vec<gimple *> stmts;
3915 
3916   gimple *top_stmt = stmt;
3917   basic_block top_bb = gimple_bb (stmt);
3918 
3919   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3920      at-least stmt and possibly other trunc_div/trunc_mod stmts
3921      having same operands as stmt.  */
3922 
3923   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3924     {
3925       if (is_gimple_assign (use_stmt)
3926 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3927 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3928 	  && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3929 	  && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3930 	{
3931 	  if (stmt_can_throw_internal (use_stmt))
3932 	    continue;
3933 
3934 	  basic_block bb = gimple_bb (use_stmt);
3935 
3936 	  if (bb == top_bb)
3937 	    {
3938 	      if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3939 		top_stmt = use_stmt;
3940 	    }
3941 	  else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3942 	    {
3943 	      top_bb = bb;
3944 	      top_stmt = use_stmt;
3945 	    }
3946 	}
3947     }
3948 
3949   tree top_op1 = gimple_assign_rhs1 (top_stmt);
3950   tree top_op2 = gimple_assign_rhs2 (top_stmt);
3951 
3952   stmts.safe_push (top_stmt);
3953   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3954 
3955   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3956      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3957      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3958      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3959 
3960   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3961     {
3962       if (is_gimple_assign (use_stmt)
3963 	  && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3964 	      || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3965 	  && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3966 	  && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3967 	{
3968 	  if (use_stmt == top_stmt
3969 	      || stmt_can_throw_internal (use_stmt)
3970 	      || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3971 	    continue;
3972 
3973 	  stmts.safe_push (use_stmt);
3974 	  if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3975 	    div_seen = true;
3976 	}
3977     }
3978 
3979   if (!div_seen)
3980     return false;
3981 
3982   /* Part 3: Create libcall to internal fn DIVMOD:
3983      divmod_tmp = DIVMOD (op1, op2).  */
3984 
3985   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3986   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3987 				 call_stmt, "divmod_tmp");
3988   gimple_call_set_lhs (call_stmt, res);
3989 
3990   /* Insert the call before top_stmt.  */
3991   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3992   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3993 
3994   widen_mul_stats.divmod_calls_inserted++;
3995 
3996   /* Update all statements in stmts vector:
3997      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3998      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3999 
4000   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4001     {
4002       tree new_rhs;
4003 
4004       switch (gimple_assign_rhs_code (use_stmt))
4005 	{
4006 	  case TRUNC_DIV_EXPR:
4007 	    new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4008 	    break;
4009 
4010 	  case TRUNC_MOD_EXPR:
4011 	    new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4012 	    break;
4013 
4014 	  default:
4015 	    gcc_unreachable ();
4016 	}
4017 
4018       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4019       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4020       update_stmt (use_stmt);
4021     }
4022 
4023   return true;
4024 }
4025 
4026 /* Find integer multiplications where the operands are extended from
4027    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4028    where appropriate.  */
4029 
4030 namespace {
4031 
4032 const pass_data pass_data_optimize_widening_mul =
4033 {
4034   GIMPLE_PASS, /* type */
4035   "widening_mul", /* name */
4036   OPTGROUP_NONE, /* optinfo_flags */
4037   TV_NONE, /* tv_id */
4038   PROP_ssa, /* properties_required */
4039   0, /* properties_provided */
4040   0, /* properties_destroyed */
4041   0, /* todo_flags_start */
4042   TODO_update_ssa, /* todo_flags_finish */
4043 };
4044 
4045 class pass_optimize_widening_mul : public gimple_opt_pass
4046 {
4047 public:
4048   pass_optimize_widening_mul (gcc::context *ctxt)
4049     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4050   {}
4051 
4052   /* opt_pass methods: */
4053   virtual bool gate (function *)
4054     {
4055       return flag_expensive_optimizations && optimize;
4056     }
4057 
4058   virtual unsigned int execute (function *);
4059 
4060 }; // class pass_optimize_widening_mul
4061 
4062 unsigned int
4063 pass_optimize_widening_mul::execute (function *fun)
4064 {
4065   basic_block bb;
4066   bool cfg_changed = false;
4067 
4068   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4069   calculate_dominance_info (CDI_DOMINATORS);
4070   renumber_gimple_stmt_uids ();
4071 
4072   FOR_EACH_BB_FN (bb, fun)
4073     {
4074       gimple_stmt_iterator gsi;
4075 
4076       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4077         {
4078 	  gimple *stmt = gsi_stmt (gsi);
4079 	  enum tree_code code;
4080 
4081 	  if (is_gimple_assign (stmt))
4082 	    {
4083 	      code = gimple_assign_rhs_code (stmt);
4084 	      switch (code)
4085 		{
4086 		case MULT_EXPR:
4087 		  if (!convert_mult_to_widen (stmt, &gsi)
4088 		      && convert_mult_to_fma (stmt,
4089 					      gimple_assign_rhs1 (stmt),
4090 					      gimple_assign_rhs2 (stmt)))
4091 		    {
4092 		      gsi_remove (&gsi, true);
4093 		      release_defs (stmt);
4094 		      continue;
4095 		    }
4096 		  break;
4097 
4098 		case PLUS_EXPR:
4099 		case MINUS_EXPR:
4100 		  if (!convert_plusminus_to_widen (&gsi, stmt, code))
4101 		    match_uaddsub_overflow (&gsi, stmt, code);
4102 		  break;
4103 
4104 		case TRUNC_MOD_EXPR:
4105 		  convert_to_divmod (as_a<gassign *> (stmt));
4106 		  break;
4107 
4108 		default:;
4109 		}
4110 	    }
4111 	  else if (is_gimple_call (stmt)
4112 		   && gimple_call_lhs (stmt))
4113 	    {
4114 	      tree fndecl = gimple_call_fndecl (stmt);
4115 	      if (fndecl
4116 		  && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
4117 		{
4118 		  switch (DECL_FUNCTION_CODE (fndecl))
4119 		    {
4120 		      case BUILT_IN_POWF:
4121 		      case BUILT_IN_POW:
4122 		      case BUILT_IN_POWL:
4123 			if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4124 			    && real_equal
4125 			         (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4126 				  &dconst2)
4127 			    && convert_mult_to_fma (stmt,
4128 						    gimple_call_arg (stmt, 0),
4129 						    gimple_call_arg (stmt, 0)))
4130 			  {
4131 			    unlink_stmt_vdef (stmt);
4132 			    if (gsi_remove (&gsi, true)
4133 				&& gimple_purge_dead_eh_edges (bb))
4134 			      cfg_changed = true;
4135 			    release_defs (stmt);
4136 			    continue;
4137 			  }
4138 			  break;
4139 
4140 		      default:;
4141 		    }
4142 		}
4143 	    }
4144 	  gsi_next (&gsi);
4145 	}
4146     }
4147 
4148   statistics_counter_event (fun, "widening multiplications inserted",
4149 			    widen_mul_stats.widen_mults_inserted);
4150   statistics_counter_event (fun, "widening maccs inserted",
4151 			    widen_mul_stats.maccs_inserted);
4152   statistics_counter_event (fun, "fused multiply-adds inserted",
4153 			    widen_mul_stats.fmas_inserted);
4154   statistics_counter_event (fun, "divmod calls inserted",
4155 			    widen_mul_stats.divmod_calls_inserted);
4156 
4157   return cfg_changed ? TODO_cleanup_cfg : 0;
4158 }
4159 
4160 } // anon namespace
4161 
4162 gimple_opt_pass *
4163 make_pass_optimize_widening_mul (gcc::context *ctxt)
4164 {
4165   return new pass_optimize_widening_mul (ctxt);
4166 }
4167