xref: /dflybsd-src/contrib/gcc-4.7/gcc/tree-ssa-math-opts.c (revision 81fc95a5293ee307c688a350a3feb4734aaddbb4)
1e4b17023SJohn Marino /* Global, SSA-based optimizations using mathematical identities.
2*5ce9237cSJohn Marino    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3e4b17023SJohn Marino    Free Software Foundation, Inc.
4e4b17023SJohn Marino 
5e4b17023SJohn Marino This file is part of GCC.
6e4b17023SJohn Marino 
7e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it
8e4b17023SJohn Marino under the terms of the GNU General Public License as published by the
9e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any
10e4b17023SJohn Marino later version.
11e4b17023SJohn Marino 
12e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT
13e4b17023SJohn Marino ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15e4b17023SJohn Marino for more details.
16e4b17023SJohn Marino 
17e4b17023SJohn Marino You should have received a copy of the GNU General Public License
18e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
19e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
20e4b17023SJohn Marino 
21e4b17023SJohn Marino /* Currently, the only mini-pass in this file tries to CSE reciprocal
22e4b17023SJohn Marino    operations.  These are common in sequences such as this one:
23e4b17023SJohn Marino 
24e4b17023SJohn Marino 	modulus = sqrt(x*x + y*y + z*z);
25e4b17023SJohn Marino 	x = x / modulus;
26e4b17023SJohn Marino 	y = y / modulus;
27e4b17023SJohn Marino 	z = z / modulus;
28e4b17023SJohn Marino 
29e4b17023SJohn Marino    that can be optimized to
30e4b17023SJohn Marino 
31e4b17023SJohn Marino 	modulus = sqrt(x*x + y*y + z*z);
32e4b17023SJohn Marino         rmodulus = 1.0 / modulus;
33e4b17023SJohn Marino 	x = x * rmodulus;
34e4b17023SJohn Marino 	y = y * rmodulus;
35e4b17023SJohn Marino 	z = z * rmodulus;
36e4b17023SJohn Marino 
37e4b17023SJohn Marino    We do this for loop invariant divisors, and with this pass whenever
38e4b17023SJohn Marino    we notice that a division has the same divisor multiple times.
39e4b17023SJohn Marino 
40e4b17023SJohn Marino    Of course, like in PRE, we don't insert a division if a dominator
41e4b17023SJohn Marino    already has one.  However, this cannot be done as an extension of
42e4b17023SJohn Marino    PRE for several reasons.
43e4b17023SJohn Marino 
44e4b17023SJohn Marino    First of all, with some experiments it was found out that the
45e4b17023SJohn Marino    transformation is not always useful if there are only two divisions
46e4b17023SJohn Marino    hy the same divisor.  This is probably because modern processors
47e4b17023SJohn Marino    can pipeline the divisions; on older, in-order processors it should
48e4b17023SJohn Marino    still be effective to optimize two divisions by the same number.
49e4b17023SJohn Marino    We make this a param, and it shall be called N in the remainder of
50e4b17023SJohn Marino    this comment.
51e4b17023SJohn Marino 
52e4b17023SJohn Marino    Second, if trapping math is active, we have less freedom on where
53e4b17023SJohn Marino    to insert divisions: we can only do so in basic blocks that already
54e4b17023SJohn Marino    contain one.  (If divisions don't trap, instead, we can insert
55e4b17023SJohn Marino    divisions elsewhere, which will be in blocks that are common dominators
56e4b17023SJohn Marino    of those that have the division).
57e4b17023SJohn Marino 
58e4b17023SJohn Marino    We really don't want to compute the reciprocal unless a division will
59e4b17023SJohn Marino    be found.  To do this, we won't insert the division in a basic block
60e4b17023SJohn Marino    that has less than N divisions *post-dominating* it.
61e4b17023SJohn Marino 
62e4b17023SJohn Marino    The algorithm constructs a subset of the dominator tree, holding the
63e4b17023SJohn Marino    blocks containing the divisions and the common dominators to them,
64e4b17023SJohn Marino    and walk it twice.  The first walk is in post-order, and it annotates
65e4b17023SJohn Marino    each block with the number of divisions that post-dominate it: this
66e4b17023SJohn Marino    gives information on where divisions can be inserted profitably.
67e4b17023SJohn Marino    The second walk is in pre-order, and it inserts divisions as explained
68e4b17023SJohn Marino    above, and replaces divisions by multiplications.
69e4b17023SJohn Marino 
70e4b17023SJohn Marino    In the best case, the cost of the pass is O(n_statements).  In the
71e4b17023SJohn Marino    worst-case, the cost is due to creating the dominator tree subset,
72e4b17023SJohn Marino    with a cost of O(n_basic_blocks ^ 2); however this can only happen
73e4b17023SJohn Marino    for n_statements / n_basic_blocks statements.  So, the amortized cost
74e4b17023SJohn Marino    of creating the dominator tree subset is O(n_basic_blocks) and the
75e4b17023SJohn Marino    worst-case cost of the pass is O(n_statements * n_basic_blocks).
76e4b17023SJohn Marino 
77e4b17023SJohn Marino    More practically, the cost will be small because there are few
78e4b17023SJohn Marino    divisions, and they tend to be in the same basic block, so insert_bb
79e4b17023SJohn Marino    is called very few times.
80e4b17023SJohn Marino 
81e4b17023SJohn Marino    If we did this using domwalk.c, an efficient implementation would have
82e4b17023SJohn Marino    to work on all the variables in a single pass, because we could not
83e4b17023SJohn Marino    work on just a subset of the dominator tree, as we do now, and the
84e4b17023SJohn Marino    cost would also be something like O(n_statements * n_basic_blocks).
85e4b17023SJohn Marino    The data structures would be more complex in order to work on all the
86e4b17023SJohn Marino    variables in a single pass.  */
87e4b17023SJohn Marino 
88e4b17023SJohn Marino #include "config.h"
89e4b17023SJohn Marino #include "system.h"
90e4b17023SJohn Marino #include "coretypes.h"
91e4b17023SJohn Marino #include "tm.h"
92e4b17023SJohn Marino #include "flags.h"
93e4b17023SJohn Marino #include "tree.h"
94e4b17023SJohn Marino #include "tree-flow.h"
95e4b17023SJohn Marino #include "timevar.h"
96e4b17023SJohn Marino #include "tree-pass.h"
97e4b17023SJohn Marino #include "alloc-pool.h"
98e4b17023SJohn Marino #include "basic-block.h"
99e4b17023SJohn Marino #include "target.h"
100e4b17023SJohn Marino #include "gimple-pretty-print.h"
101e4b17023SJohn Marino 
102e4b17023SJohn Marino /* FIXME: RTL headers have to be included here for optabs.  */
103e4b17023SJohn Marino #include "rtl.h"		/* Because optabs.h wants enum rtx_code.  */
104e4b17023SJohn Marino #include "expr.h"		/* Because optabs.h wants sepops.  */
105e4b17023SJohn Marino #include "optabs.h"
106e4b17023SJohn Marino 
107e4b17023SJohn Marino /* This structure represents one basic block that either computes a
108e4b17023SJohn Marino    division, or is a common dominator for basic block that compute a
109e4b17023SJohn Marino    division.  */
110e4b17023SJohn Marino struct occurrence {
111e4b17023SJohn Marino   /* The basic block represented by this structure.  */
112e4b17023SJohn Marino   basic_block bb;
113e4b17023SJohn Marino 
114e4b17023SJohn Marino   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
115e4b17023SJohn Marino      inserted in BB.  */
116e4b17023SJohn Marino   tree recip_def;
117e4b17023SJohn Marino 
118e4b17023SJohn Marino   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
119e4b17023SJohn Marino      was inserted in BB.  */
120e4b17023SJohn Marino   gimple recip_def_stmt;
121e4b17023SJohn Marino 
122e4b17023SJohn Marino   /* Pointer to a list of "struct occurrence"s for blocks dominated
123e4b17023SJohn Marino      by BB.  */
124e4b17023SJohn Marino   struct occurrence *children;
125e4b17023SJohn Marino 
126e4b17023SJohn Marino   /* Pointer to the next "struct occurrence"s in the list of blocks
127e4b17023SJohn Marino      sharing a common dominator.  */
128e4b17023SJohn Marino   struct occurrence *next;
129e4b17023SJohn Marino 
130e4b17023SJohn Marino   /* The number of divisions that are in BB before compute_merit.  The
131e4b17023SJohn Marino      number of divisions that are in BB or post-dominate it after
132e4b17023SJohn Marino      compute_merit.  */
133e4b17023SJohn Marino   int num_divisions;
134e4b17023SJohn Marino 
135e4b17023SJohn Marino   /* True if the basic block has a division, false if it is a common
136e4b17023SJohn Marino      dominator for basic blocks that do.  If it is false and trapping
137e4b17023SJohn Marino      math is active, BB is not a candidate for inserting a reciprocal.  */
138e4b17023SJohn Marino   bool bb_has_division;
139e4b17023SJohn Marino };
140e4b17023SJohn Marino 
141e4b17023SJohn Marino static struct
142e4b17023SJohn Marino {
143e4b17023SJohn Marino   /* Number of 1.0/X ops inserted.  */
144e4b17023SJohn Marino   int rdivs_inserted;
145e4b17023SJohn Marino 
146e4b17023SJohn Marino   /* Number of 1.0/FUNC ops inserted.  */
147e4b17023SJohn Marino   int rfuncs_inserted;
148e4b17023SJohn Marino } reciprocal_stats;
149e4b17023SJohn Marino 
150e4b17023SJohn Marino static struct
151e4b17023SJohn Marino {
152e4b17023SJohn Marino   /* Number of cexpi calls inserted.  */
153e4b17023SJohn Marino   int inserted;
154e4b17023SJohn Marino } sincos_stats;
155e4b17023SJohn Marino 
156e4b17023SJohn Marino static struct
157e4b17023SJohn Marino {
158e4b17023SJohn Marino   /* Number of hand-written 32-bit bswaps found.  */
159e4b17023SJohn Marino   int found_32bit;
160e4b17023SJohn Marino 
161e4b17023SJohn Marino   /* Number of hand-written 64-bit bswaps found.  */
162e4b17023SJohn Marino   int found_64bit;
163e4b17023SJohn Marino } bswap_stats;
164e4b17023SJohn Marino 
165e4b17023SJohn Marino static struct
166e4b17023SJohn Marino {
167e4b17023SJohn Marino   /* Number of widening multiplication ops inserted.  */
168e4b17023SJohn Marino   int widen_mults_inserted;
169e4b17023SJohn Marino 
170e4b17023SJohn Marino   /* Number of integer multiply-and-accumulate ops inserted.  */
171e4b17023SJohn Marino   int maccs_inserted;
172e4b17023SJohn Marino 
173e4b17023SJohn Marino   /* Number of fp fused multiply-add ops inserted.  */
174e4b17023SJohn Marino   int fmas_inserted;
175e4b17023SJohn Marino } widen_mul_stats;
176e4b17023SJohn Marino 
177e4b17023SJohn Marino /* The instance of "struct occurrence" representing the highest
178e4b17023SJohn Marino    interesting block in the dominator tree.  */
179e4b17023SJohn Marino static struct occurrence *occ_head;
180e4b17023SJohn Marino 
181e4b17023SJohn Marino /* Allocation pool for getting instances of "struct occurrence".  */
182e4b17023SJohn Marino static alloc_pool occ_pool;
183e4b17023SJohn Marino 
184e4b17023SJohn Marino 
185e4b17023SJohn Marino 
186e4b17023SJohn Marino /* Allocate and return a new struct occurrence for basic block BB, and
187e4b17023SJohn Marino    whose children list is headed by CHILDREN.  */
188e4b17023SJohn Marino static struct occurrence *
occ_new(basic_block bb,struct occurrence * children)189e4b17023SJohn Marino occ_new (basic_block bb, struct occurrence *children)
190e4b17023SJohn Marino {
191e4b17023SJohn Marino   struct occurrence *occ;
192e4b17023SJohn Marino 
193e4b17023SJohn Marino   bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
194e4b17023SJohn Marino   memset (occ, 0, sizeof (struct occurrence));
195e4b17023SJohn Marino 
196e4b17023SJohn Marino   occ->bb = bb;
197e4b17023SJohn Marino   occ->children = children;
198e4b17023SJohn Marino   return occ;
199e4b17023SJohn Marino }
200e4b17023SJohn Marino 
201e4b17023SJohn Marino 
202e4b17023SJohn Marino /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
203e4b17023SJohn Marino    list of "struct occurrence"s, one per basic block, having IDOM as
204e4b17023SJohn Marino    their common dominator.
205e4b17023SJohn Marino 
206e4b17023SJohn Marino    We try to insert NEW_OCC as deep as possible in the tree, and we also
207e4b17023SJohn Marino    insert any other block that is a common dominator for BB and one
208e4b17023SJohn Marino    block already in the tree.  */
209e4b17023SJohn Marino 
210e4b17023SJohn Marino static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)211e4b17023SJohn Marino insert_bb (struct occurrence *new_occ, basic_block idom,
212e4b17023SJohn Marino 	   struct occurrence **p_head)
213e4b17023SJohn Marino {
214e4b17023SJohn Marino   struct occurrence *occ, **p_occ;
215e4b17023SJohn Marino 
216e4b17023SJohn Marino   for (p_occ = p_head; (occ = *p_occ) != NULL; )
217e4b17023SJohn Marino     {
218e4b17023SJohn Marino       basic_block bb = new_occ->bb, occ_bb = occ->bb;
219e4b17023SJohn Marino       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
220e4b17023SJohn Marino       if (dom == bb)
221e4b17023SJohn Marino 	{
222e4b17023SJohn Marino 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
223e4b17023SJohn Marino 	     from its list.  */
224e4b17023SJohn Marino 	  *p_occ = occ->next;
225e4b17023SJohn Marino 	  occ->next = new_occ->children;
226e4b17023SJohn Marino 	  new_occ->children = occ;
227e4b17023SJohn Marino 
228e4b17023SJohn Marino 	  /* Try the next block (it may as well be dominated by BB).  */
229e4b17023SJohn Marino 	}
230e4b17023SJohn Marino 
231e4b17023SJohn Marino       else if (dom == occ_bb)
232e4b17023SJohn Marino 	{
233e4b17023SJohn Marino 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
234e4b17023SJohn Marino 	  insert_bb (new_occ, dom, &occ->children);
235e4b17023SJohn Marino 	  return;
236e4b17023SJohn Marino 	}
237e4b17023SJohn Marino 
238e4b17023SJohn Marino       else if (dom != idom)
239e4b17023SJohn Marino 	{
240e4b17023SJohn Marino 	  gcc_assert (!dom->aux);
241e4b17023SJohn Marino 
242e4b17023SJohn Marino 	  /* There is a dominator between IDOM and BB, add it and make
243e4b17023SJohn Marino 	     two children out of NEW_OCC and OCC.  First, remove OCC from
244e4b17023SJohn Marino 	     its list.	*/
245e4b17023SJohn Marino 	  *p_occ = occ->next;
246e4b17023SJohn Marino 	  new_occ->next = occ;
247e4b17023SJohn Marino 	  occ->next = NULL;
248e4b17023SJohn Marino 
249e4b17023SJohn Marino 	  /* None of the previous blocks has DOM as a dominator: if we tail
250e4b17023SJohn Marino 	     recursed, we would reexamine them uselessly. Just switch BB with
251e4b17023SJohn Marino 	     DOM, and go on looking for blocks dominated by DOM.  */
252e4b17023SJohn Marino           new_occ = occ_new (dom, new_occ);
253e4b17023SJohn Marino 	}
254e4b17023SJohn Marino 
255e4b17023SJohn Marino       else
256e4b17023SJohn Marino 	{
257e4b17023SJohn Marino 	  /* Nothing special, go on with the next element.  */
258e4b17023SJohn Marino 	  p_occ = &occ->next;
259e4b17023SJohn Marino 	}
260e4b17023SJohn Marino     }
261e4b17023SJohn Marino 
262e4b17023SJohn Marino   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
263e4b17023SJohn Marino   new_occ->next = *p_head;
264e4b17023SJohn Marino   *p_head = new_occ;
265e4b17023SJohn Marino }
266e4b17023SJohn Marino 
267e4b17023SJohn Marino /* Register that we found a division in BB.  */
268e4b17023SJohn Marino 
269e4b17023SJohn Marino static inline void
register_division_in(basic_block bb)270e4b17023SJohn Marino register_division_in (basic_block bb)
271e4b17023SJohn Marino {
272e4b17023SJohn Marino   struct occurrence *occ;
273e4b17023SJohn Marino 
274e4b17023SJohn Marino   occ = (struct occurrence *) bb->aux;
275e4b17023SJohn Marino   if (!occ)
276e4b17023SJohn Marino     {
277e4b17023SJohn Marino       occ = occ_new (bb, NULL);
278e4b17023SJohn Marino       insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
279e4b17023SJohn Marino     }
280e4b17023SJohn Marino 
281e4b17023SJohn Marino   occ->bb_has_division = true;
282e4b17023SJohn Marino   occ->num_divisions++;
283e4b17023SJohn Marino }
284e4b17023SJohn Marino 
285e4b17023SJohn Marino 
286e4b17023SJohn Marino /* Compute the number of divisions that postdominate each block in OCC and
287e4b17023SJohn Marino    its children.  */
288e4b17023SJohn Marino 
289e4b17023SJohn Marino static void
compute_merit(struct occurrence * occ)290e4b17023SJohn Marino compute_merit (struct occurrence *occ)
291e4b17023SJohn Marino {
292e4b17023SJohn Marino   struct occurrence *occ_child;
293e4b17023SJohn Marino   basic_block dom = occ->bb;
294e4b17023SJohn Marino 
295e4b17023SJohn Marino   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
296e4b17023SJohn Marino     {
297e4b17023SJohn Marino       basic_block bb;
298e4b17023SJohn Marino       if (occ_child->children)
299e4b17023SJohn Marino         compute_merit (occ_child);
300e4b17023SJohn Marino 
301e4b17023SJohn Marino       if (flag_exceptions)
302e4b17023SJohn Marino 	bb = single_noncomplex_succ (dom);
303e4b17023SJohn Marino       else
304e4b17023SJohn Marino 	bb = dom;
305e4b17023SJohn Marino 
306e4b17023SJohn Marino       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
307e4b17023SJohn Marino         occ->num_divisions += occ_child->num_divisions;
308e4b17023SJohn Marino     }
309e4b17023SJohn Marino }
310e4b17023SJohn Marino 
311e4b17023SJohn Marino 
312e4b17023SJohn Marino /* Return whether USE_STMT is a floating-point division by DEF.  */
313e4b17023SJohn Marino static inline bool
is_division_by(gimple use_stmt,tree def)314e4b17023SJohn Marino is_division_by (gimple use_stmt, tree def)
315e4b17023SJohn Marino {
316e4b17023SJohn Marino   return is_gimple_assign (use_stmt)
317e4b17023SJohn Marino 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
318e4b17023SJohn Marino 	 && gimple_assign_rhs2 (use_stmt) == def
319e4b17023SJohn Marino 	 /* Do not recognize x / x as valid division, as we are getting
320e4b17023SJohn Marino 	    confused later by replacing all immediate uses x in such
321e4b17023SJohn Marino 	    a stmt.  */
322e4b17023SJohn Marino 	 && gimple_assign_rhs1 (use_stmt) != def;
323e4b17023SJohn Marino }
324e4b17023SJohn Marino 
325e4b17023SJohn Marino /* Walk the subset of the dominator tree rooted at OCC, setting the
326e4b17023SJohn Marino    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
327e4b17023SJohn Marino    the given basic block.  The field may be left NULL, of course,
328e4b17023SJohn Marino    if it is not possible or profitable to do the optimization.
329e4b17023SJohn Marino 
330e4b17023SJohn Marino    DEF_BSI is an iterator pointing at the statement defining DEF.
331e4b17023SJohn Marino    If RECIP_DEF is set, a dominator already has a computation that can
332e4b17023SJohn Marino    be used.  */
333e4b17023SJohn Marino 
334e4b17023SJohn Marino static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,int threshold)335e4b17023SJohn Marino insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
336e4b17023SJohn Marino 		    tree def, tree recip_def, int threshold)
337e4b17023SJohn Marino {
338e4b17023SJohn Marino   tree type;
339e4b17023SJohn Marino   gimple new_stmt;
340e4b17023SJohn Marino   gimple_stmt_iterator gsi;
341e4b17023SJohn Marino   struct occurrence *occ_child;
342e4b17023SJohn Marino 
343e4b17023SJohn Marino   if (!recip_def
344e4b17023SJohn Marino       && (occ->bb_has_division || !flag_trapping_math)
345e4b17023SJohn Marino       && occ->num_divisions >= threshold)
346e4b17023SJohn Marino     {
347e4b17023SJohn Marino       /* Make a variable with the replacement and substitute it.  */
348e4b17023SJohn Marino       type = TREE_TYPE (def);
349e4b17023SJohn Marino       recip_def = make_rename_temp (type, "reciptmp");
350e4b17023SJohn Marino       new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
351e4b17023SJohn Marino 					       build_one_cst (type), def);
352e4b17023SJohn Marino 
353e4b17023SJohn Marino       if (occ->bb_has_division)
354e4b17023SJohn Marino         {
355e4b17023SJohn Marino           /* Case 1: insert before an existing division.  */
356e4b17023SJohn Marino           gsi = gsi_after_labels (occ->bb);
357e4b17023SJohn Marino           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
358e4b17023SJohn Marino 	    gsi_next (&gsi);
359e4b17023SJohn Marino 
360e4b17023SJohn Marino           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
361e4b17023SJohn Marino         }
362e4b17023SJohn Marino       else if (def_gsi && occ->bb == def_gsi->bb)
363e4b17023SJohn Marino         {
364e4b17023SJohn Marino           /* Case 2: insert right after the definition.  Note that this will
365e4b17023SJohn Marino 	     never happen if the definition statement can throw, because in
366e4b17023SJohn Marino 	     that case the sole successor of the statement's basic block will
367e4b17023SJohn Marino 	     dominate all the uses as well.  */
368e4b17023SJohn Marino           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
369e4b17023SJohn Marino         }
370e4b17023SJohn Marino       else
371e4b17023SJohn Marino         {
372e4b17023SJohn Marino           /* Case 3: insert in a basic block not containing defs/uses.  */
373e4b17023SJohn Marino           gsi = gsi_after_labels (occ->bb);
374e4b17023SJohn Marino           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
375e4b17023SJohn Marino         }
376e4b17023SJohn Marino 
377e4b17023SJohn Marino       reciprocal_stats.rdivs_inserted++;
378e4b17023SJohn Marino 
379e4b17023SJohn Marino       occ->recip_def_stmt = new_stmt;
380e4b17023SJohn Marino     }
381e4b17023SJohn Marino 
382e4b17023SJohn Marino   occ->recip_def = recip_def;
383e4b17023SJohn Marino   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
384e4b17023SJohn Marino     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
385e4b17023SJohn Marino }
386e4b17023SJohn Marino 
387e4b17023SJohn Marino 
388e4b17023SJohn Marino /* Replace the division at USE_P with a multiplication by the reciprocal, if
389e4b17023SJohn Marino    possible.  */
390e4b17023SJohn Marino 
391e4b17023SJohn Marino static inline void
replace_reciprocal(use_operand_p use_p)392e4b17023SJohn Marino replace_reciprocal (use_operand_p use_p)
393e4b17023SJohn Marino {
394e4b17023SJohn Marino   gimple use_stmt = USE_STMT (use_p);
395e4b17023SJohn Marino   basic_block bb = gimple_bb (use_stmt);
396e4b17023SJohn Marino   struct occurrence *occ = (struct occurrence *) bb->aux;
397e4b17023SJohn Marino 
398e4b17023SJohn Marino   if (optimize_bb_for_speed_p (bb)
399e4b17023SJohn Marino       && occ->recip_def && use_stmt != occ->recip_def_stmt)
400e4b17023SJohn Marino     {
401e4b17023SJohn Marino       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
402e4b17023SJohn Marino       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
403e4b17023SJohn Marino       SET_USE (use_p, occ->recip_def);
404e4b17023SJohn Marino       fold_stmt_inplace (&gsi);
405e4b17023SJohn Marino       update_stmt (use_stmt);
406e4b17023SJohn Marino     }
407e4b17023SJohn Marino }
408e4b17023SJohn Marino 
409e4b17023SJohn Marino 
410e4b17023SJohn Marino /* Free OCC and return one more "struct occurrence" to be freed.  */
411e4b17023SJohn Marino 
412e4b17023SJohn Marino static struct occurrence *
free_bb(struct occurrence * occ)413e4b17023SJohn Marino free_bb (struct occurrence *occ)
414e4b17023SJohn Marino {
415e4b17023SJohn Marino   struct occurrence *child, *next;
416e4b17023SJohn Marino 
417e4b17023SJohn Marino   /* First get the two pointers hanging off OCC.  */
418e4b17023SJohn Marino   next = occ->next;
419e4b17023SJohn Marino   child = occ->children;
420e4b17023SJohn Marino   occ->bb->aux = NULL;
421e4b17023SJohn Marino   pool_free (occ_pool, occ);
422e4b17023SJohn Marino 
423e4b17023SJohn Marino   /* Now ensure that we don't recurse unless it is necessary.  */
424e4b17023SJohn Marino   if (!child)
425e4b17023SJohn Marino     return next;
426e4b17023SJohn Marino   else
427e4b17023SJohn Marino     {
428e4b17023SJohn Marino       while (next)
429e4b17023SJohn Marino 	next = free_bb (next);
430e4b17023SJohn Marino 
431e4b17023SJohn Marino       return child;
432e4b17023SJohn Marino     }
433e4b17023SJohn Marino }
434e4b17023SJohn Marino 
435e4b17023SJohn Marino 
436e4b17023SJohn Marino /* Look for floating-point divisions among DEF's uses, and try to
437e4b17023SJohn Marino    replace them by multiplications with the reciprocal.  Add
438e4b17023SJohn Marino    as many statements computing the reciprocal as needed.
439e4b17023SJohn Marino 
440e4b17023SJohn Marino    DEF must be a GIMPLE register of a floating-point type.  */
441e4b17023SJohn Marino 
442e4b17023SJohn Marino static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)443e4b17023SJohn Marino execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
444e4b17023SJohn Marino {
445e4b17023SJohn Marino   use_operand_p use_p;
446e4b17023SJohn Marino   imm_use_iterator use_iter;
447e4b17023SJohn Marino   struct occurrence *occ;
448e4b17023SJohn Marino   int count = 0, threshold;
449e4b17023SJohn Marino 
450e4b17023SJohn Marino   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
451e4b17023SJohn Marino 
452e4b17023SJohn Marino   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
453e4b17023SJohn Marino     {
454e4b17023SJohn Marino       gimple use_stmt = USE_STMT (use_p);
455e4b17023SJohn Marino       if (is_division_by (use_stmt, def))
456e4b17023SJohn Marino 	{
457e4b17023SJohn Marino 	  register_division_in (gimple_bb (use_stmt));
458e4b17023SJohn Marino 	  count++;
459e4b17023SJohn Marino 	}
460e4b17023SJohn Marino     }
461e4b17023SJohn Marino 
462e4b17023SJohn Marino   /* Do the expensive part only if we can hope to optimize something.  */
463e4b17023SJohn Marino   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
464e4b17023SJohn Marino   if (count >= threshold)
465e4b17023SJohn Marino     {
466e4b17023SJohn Marino       gimple use_stmt;
467e4b17023SJohn Marino       for (occ = occ_head; occ; occ = occ->next)
468e4b17023SJohn Marino 	{
469e4b17023SJohn Marino 	  compute_merit (occ);
470e4b17023SJohn Marino 	  insert_reciprocals (def_gsi, occ, def, NULL, threshold);
471e4b17023SJohn Marino 	}
472e4b17023SJohn Marino 
473e4b17023SJohn Marino       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
474e4b17023SJohn Marino 	{
475e4b17023SJohn Marino 	  if (is_division_by (use_stmt, def))
476e4b17023SJohn Marino 	    {
477e4b17023SJohn Marino 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
478e4b17023SJohn Marino 		replace_reciprocal (use_p);
479e4b17023SJohn Marino 	    }
480e4b17023SJohn Marino 	}
481e4b17023SJohn Marino     }
482e4b17023SJohn Marino 
483e4b17023SJohn Marino   for (occ = occ_head; occ; )
484e4b17023SJohn Marino     occ = free_bb (occ);
485e4b17023SJohn Marino 
486e4b17023SJohn Marino   occ_head = NULL;
487e4b17023SJohn Marino }
488e4b17023SJohn Marino 
489e4b17023SJohn Marino static bool
gate_cse_reciprocals(void)490e4b17023SJohn Marino gate_cse_reciprocals (void)
491e4b17023SJohn Marino {
492e4b17023SJohn Marino   return optimize && flag_reciprocal_math;
493e4b17023SJohn Marino }
494e4b17023SJohn Marino 
495e4b17023SJohn Marino /* Go through all the floating-point SSA_NAMEs, and call
496e4b17023SJohn Marino    execute_cse_reciprocals_1 on each of them.  */
497e4b17023SJohn Marino static unsigned int
execute_cse_reciprocals(void)498e4b17023SJohn Marino execute_cse_reciprocals (void)
499e4b17023SJohn Marino {
500e4b17023SJohn Marino   basic_block bb;
501e4b17023SJohn Marino   tree arg;
502e4b17023SJohn Marino 
503e4b17023SJohn Marino   occ_pool = create_alloc_pool ("dominators for recip",
504e4b17023SJohn Marino 				sizeof (struct occurrence),
505e4b17023SJohn Marino 				n_basic_blocks / 3 + 1);
506e4b17023SJohn Marino 
507e4b17023SJohn Marino   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
508e4b17023SJohn Marino   calculate_dominance_info (CDI_DOMINATORS);
509e4b17023SJohn Marino   calculate_dominance_info (CDI_POST_DOMINATORS);
510e4b17023SJohn Marino 
511e4b17023SJohn Marino #ifdef ENABLE_CHECKING
512e4b17023SJohn Marino   FOR_EACH_BB (bb)
513e4b17023SJohn Marino     gcc_assert (!bb->aux);
514e4b17023SJohn Marino #endif
515e4b17023SJohn Marino 
516e4b17023SJohn Marino   for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
517e4b17023SJohn Marino     if (gimple_default_def (cfun, arg)
518e4b17023SJohn Marino 	&& FLOAT_TYPE_P (TREE_TYPE (arg))
519e4b17023SJohn Marino 	&& is_gimple_reg (arg))
520e4b17023SJohn Marino       execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
521e4b17023SJohn Marino 
522e4b17023SJohn Marino   FOR_EACH_BB (bb)
523e4b17023SJohn Marino     {
524e4b17023SJohn Marino       gimple_stmt_iterator gsi;
525e4b17023SJohn Marino       gimple phi;
526e4b17023SJohn Marino       tree def;
527e4b17023SJohn Marino 
528e4b17023SJohn Marino       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
529e4b17023SJohn Marino 	{
530e4b17023SJohn Marino 	  phi = gsi_stmt (gsi);
531e4b17023SJohn Marino 	  def = PHI_RESULT (phi);
532e4b17023SJohn Marino 	  if (FLOAT_TYPE_P (TREE_TYPE (def))
533e4b17023SJohn Marino 	      && is_gimple_reg (def))
534e4b17023SJohn Marino 	    execute_cse_reciprocals_1 (NULL, def);
535e4b17023SJohn Marino 	}
536e4b17023SJohn Marino 
537e4b17023SJohn Marino       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
538e4b17023SJohn Marino         {
539e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (gsi);
540e4b17023SJohn Marino 
541e4b17023SJohn Marino 	  if (gimple_has_lhs (stmt)
542e4b17023SJohn Marino 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
543e4b17023SJohn Marino 	      && FLOAT_TYPE_P (TREE_TYPE (def))
544e4b17023SJohn Marino 	      && TREE_CODE (def) == SSA_NAME)
545e4b17023SJohn Marino 	    execute_cse_reciprocals_1 (&gsi, def);
546e4b17023SJohn Marino 	}
547e4b17023SJohn Marino 
548e4b17023SJohn Marino       if (optimize_bb_for_size_p (bb))
549e4b17023SJohn Marino         continue;
550e4b17023SJohn Marino 
551e4b17023SJohn Marino       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
552e4b17023SJohn Marino       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
553e4b17023SJohn Marino         {
554e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (gsi);
555e4b17023SJohn Marino 	  tree fndecl;
556e4b17023SJohn Marino 
557e4b17023SJohn Marino 	  if (is_gimple_assign (stmt)
558e4b17023SJohn Marino 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
559e4b17023SJohn Marino 	    {
560e4b17023SJohn Marino 	      tree arg1 = gimple_assign_rhs2 (stmt);
561e4b17023SJohn Marino 	      gimple stmt1;
562e4b17023SJohn Marino 
563e4b17023SJohn Marino 	      if (TREE_CODE (arg1) != SSA_NAME)
564e4b17023SJohn Marino 		continue;
565e4b17023SJohn Marino 
566e4b17023SJohn Marino 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
567e4b17023SJohn Marino 
568e4b17023SJohn Marino 	      if (is_gimple_call (stmt1)
569e4b17023SJohn Marino 		  && gimple_call_lhs (stmt1)
570e4b17023SJohn Marino 		  && (fndecl = gimple_call_fndecl (stmt1))
571e4b17023SJohn Marino 		  && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
572e4b17023SJohn Marino 		      || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
573e4b17023SJohn Marino 		{
574e4b17023SJohn Marino 		  enum built_in_function code;
575e4b17023SJohn Marino 		  bool md_code, fail;
576e4b17023SJohn Marino 		  imm_use_iterator ui;
577e4b17023SJohn Marino 		  use_operand_p use_p;
578e4b17023SJohn Marino 
579e4b17023SJohn Marino 		  code = DECL_FUNCTION_CODE (fndecl);
580e4b17023SJohn Marino 		  md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
581e4b17023SJohn Marino 
582e4b17023SJohn Marino 		  fndecl = targetm.builtin_reciprocal (code, md_code, false);
583e4b17023SJohn Marino 		  if (!fndecl)
584e4b17023SJohn Marino 		    continue;
585e4b17023SJohn Marino 
586e4b17023SJohn Marino 		  /* Check that all uses of the SSA name are divisions,
587e4b17023SJohn Marino 		     otherwise replacing the defining statement will do
588e4b17023SJohn Marino 		     the wrong thing.  */
589e4b17023SJohn Marino 		  fail = false;
590e4b17023SJohn Marino 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
591e4b17023SJohn Marino 		    {
592e4b17023SJohn Marino 		      gimple stmt2 = USE_STMT (use_p);
593e4b17023SJohn Marino 		      if (is_gimple_debug (stmt2))
594e4b17023SJohn Marino 			continue;
595e4b17023SJohn Marino 		      if (!is_gimple_assign (stmt2)
596e4b17023SJohn Marino 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
597e4b17023SJohn Marino 			  || gimple_assign_rhs1 (stmt2) == arg1
598e4b17023SJohn Marino 			  || gimple_assign_rhs2 (stmt2) != arg1)
599e4b17023SJohn Marino 			{
600e4b17023SJohn Marino 			  fail = true;
601e4b17023SJohn Marino 			  break;
602e4b17023SJohn Marino 			}
603e4b17023SJohn Marino 		    }
604e4b17023SJohn Marino 		  if (fail)
605e4b17023SJohn Marino 		    continue;
606e4b17023SJohn Marino 
607e4b17023SJohn Marino 		  gimple_replace_lhs (stmt1, arg1);
608e4b17023SJohn Marino 		  gimple_call_set_fndecl (stmt1, fndecl);
609e4b17023SJohn Marino 		  update_stmt (stmt1);
610e4b17023SJohn Marino 		  reciprocal_stats.rfuncs_inserted++;
611e4b17023SJohn Marino 
612e4b17023SJohn Marino 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
613e4b17023SJohn Marino 		    {
614e4b17023SJohn Marino 		      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
615e4b17023SJohn Marino 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
616e4b17023SJohn Marino 		      fold_stmt_inplace (&gsi);
617e4b17023SJohn Marino 		      update_stmt (stmt);
618e4b17023SJohn Marino 		    }
619e4b17023SJohn Marino 		}
620e4b17023SJohn Marino 	    }
621e4b17023SJohn Marino 	}
622e4b17023SJohn Marino     }
623e4b17023SJohn Marino 
624e4b17023SJohn Marino   statistics_counter_event (cfun, "reciprocal divs inserted",
625e4b17023SJohn Marino 			    reciprocal_stats.rdivs_inserted);
626e4b17023SJohn Marino   statistics_counter_event (cfun, "reciprocal functions inserted",
627e4b17023SJohn Marino 			    reciprocal_stats.rfuncs_inserted);
628e4b17023SJohn Marino 
629e4b17023SJohn Marino   free_dominance_info (CDI_DOMINATORS);
630e4b17023SJohn Marino   free_dominance_info (CDI_POST_DOMINATORS);
631e4b17023SJohn Marino   free_alloc_pool (occ_pool);
632e4b17023SJohn Marino   return 0;
633e4b17023SJohn Marino }
634e4b17023SJohn Marino 
635e4b17023SJohn Marino struct gimple_opt_pass pass_cse_reciprocals =
636e4b17023SJohn Marino {
637e4b17023SJohn Marino  {
638e4b17023SJohn Marino   GIMPLE_PASS,
639e4b17023SJohn Marino   "recip",				/* name */
640e4b17023SJohn Marino   gate_cse_reciprocals,			/* gate */
641e4b17023SJohn Marino   execute_cse_reciprocals,		/* execute */
642e4b17023SJohn Marino   NULL,					/* sub */
643e4b17023SJohn Marino   NULL,					/* next */
644e4b17023SJohn Marino   0,					/* static_pass_number */
645e4b17023SJohn Marino   TV_NONE,				/* tv_id */
646e4b17023SJohn Marino   PROP_ssa,				/* properties_required */
647e4b17023SJohn Marino   0,					/* properties_provided */
648e4b17023SJohn Marino   0,					/* properties_destroyed */
649e4b17023SJohn Marino   0,					/* todo_flags_start */
650e4b17023SJohn Marino   TODO_update_ssa | TODO_verify_ssa
651e4b17023SJohn Marino     | TODO_verify_stmts                /* todo_flags_finish */
652e4b17023SJohn Marino  }
653e4b17023SJohn Marino };
654e4b17023SJohn Marino 
655e4b17023SJohn Marino /* Records an occurrence at statement USE_STMT in the vector of trees
656e4b17023SJohn Marino    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
657e4b17023SJohn Marino    is not yet initialized.  Returns true if the occurrence was pushed on
658e4b17023SJohn Marino    the vector.  Adjusts *TOP_BB to be the basic block dominating all
659e4b17023SJohn Marino    statements in the vector.  */
660e4b17023SJohn Marino 
661e4b17023SJohn Marino static bool
maybe_record_sincos(VEC (gimple,heap)** stmts,basic_block * top_bb,gimple use_stmt)662e4b17023SJohn Marino maybe_record_sincos (VEC(gimple, heap) **stmts,
663e4b17023SJohn Marino 		     basic_block *top_bb, gimple use_stmt)
664e4b17023SJohn Marino {
665e4b17023SJohn Marino   basic_block use_bb = gimple_bb (use_stmt);
666e4b17023SJohn Marino   if (*top_bb
667e4b17023SJohn Marino       && (*top_bb == use_bb
668e4b17023SJohn Marino 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
669e4b17023SJohn Marino     VEC_safe_push (gimple, heap, *stmts, use_stmt);
670e4b17023SJohn Marino   else if (!*top_bb
671e4b17023SJohn Marino 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
672e4b17023SJohn Marino     {
673e4b17023SJohn Marino       VEC_safe_push (gimple, heap, *stmts, use_stmt);
674e4b17023SJohn Marino       *top_bb = use_bb;
675e4b17023SJohn Marino     }
676e4b17023SJohn Marino   else
677e4b17023SJohn Marino     return false;
678e4b17023SJohn Marino 
679e4b17023SJohn Marino   return true;
680e4b17023SJohn Marino }
681e4b17023SJohn Marino 
682e4b17023SJohn Marino /* Look for sin, cos and cexpi calls with the same argument NAME and
683e4b17023SJohn Marino    create a single call to cexpi CSEing the result in this case.
684e4b17023SJohn Marino    We first walk over all immediate uses of the argument collecting
685e4b17023SJohn Marino    statements that we can CSE in a vector and in a second pass replace
686e4b17023SJohn Marino    the statement rhs with a REALPART or IMAGPART expression on the
687e4b17023SJohn Marino    result of the cexpi call we insert before the use statement that
688e4b17023SJohn Marino    dominates all other candidates.  */
689e4b17023SJohn Marino 
690e4b17023SJohn Marino static bool
execute_cse_sincos_1(tree name)691e4b17023SJohn Marino execute_cse_sincos_1 (tree name)
692e4b17023SJohn Marino {
693e4b17023SJohn Marino   gimple_stmt_iterator gsi;
694e4b17023SJohn Marino   imm_use_iterator use_iter;
695e4b17023SJohn Marino   tree fndecl, res, type;
696e4b17023SJohn Marino   gimple def_stmt, use_stmt, stmt;
697e4b17023SJohn Marino   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
698e4b17023SJohn Marino   VEC(gimple, heap) *stmts = NULL;
699e4b17023SJohn Marino   basic_block top_bb = NULL;
700e4b17023SJohn Marino   int i;
701e4b17023SJohn Marino   bool cfg_changed = false;
702e4b17023SJohn Marino 
703e4b17023SJohn Marino   type = TREE_TYPE (name);
704e4b17023SJohn Marino   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
705e4b17023SJohn Marino     {
706e4b17023SJohn Marino       if (gimple_code (use_stmt) != GIMPLE_CALL
707e4b17023SJohn Marino 	  || !gimple_call_lhs (use_stmt)
708e4b17023SJohn Marino 	  || !(fndecl = gimple_call_fndecl (use_stmt))
709e4b17023SJohn Marino 	  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
710e4b17023SJohn Marino 	continue;
711e4b17023SJohn Marino 
712e4b17023SJohn Marino       switch (DECL_FUNCTION_CODE (fndecl))
713e4b17023SJohn Marino 	{
714e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_COS):
715e4b17023SJohn Marino 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
716e4b17023SJohn Marino 	  break;
717e4b17023SJohn Marino 
718e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_SIN):
719e4b17023SJohn Marino 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
720e4b17023SJohn Marino 	  break;
721e4b17023SJohn Marino 
722e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_CEXPI):
723e4b17023SJohn Marino 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
724e4b17023SJohn Marino 	  break;
725e4b17023SJohn Marino 
726e4b17023SJohn Marino 	default:;
727e4b17023SJohn Marino 	}
728e4b17023SJohn Marino     }
729e4b17023SJohn Marino 
730e4b17023SJohn Marino   if (seen_cos + seen_sin + seen_cexpi <= 1)
731e4b17023SJohn Marino     {
732e4b17023SJohn Marino       VEC_free(gimple, heap, stmts);
733e4b17023SJohn Marino       return false;
734e4b17023SJohn Marino     }
735e4b17023SJohn Marino 
736e4b17023SJohn Marino   /* Simply insert cexpi at the beginning of top_bb but not earlier than
737e4b17023SJohn Marino      the name def statement.  */
738e4b17023SJohn Marino   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
739e4b17023SJohn Marino   if (!fndecl)
740e4b17023SJohn Marino     return false;
741e4b17023SJohn Marino   res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
742e4b17023SJohn Marino   stmt = gimple_build_call (fndecl, 1, name);
743e4b17023SJohn Marino   res = make_ssa_name (res, stmt);
744e4b17023SJohn Marino   gimple_call_set_lhs (stmt, res);
745e4b17023SJohn Marino 
746e4b17023SJohn Marino   def_stmt = SSA_NAME_DEF_STMT (name);
747e4b17023SJohn Marino   if (!SSA_NAME_IS_DEFAULT_DEF (name)
748e4b17023SJohn Marino       && gimple_code (def_stmt) != GIMPLE_PHI
749e4b17023SJohn Marino       && gimple_bb (def_stmt) == top_bb)
750e4b17023SJohn Marino     {
751e4b17023SJohn Marino       gsi = gsi_for_stmt (def_stmt);
752e4b17023SJohn Marino       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
753e4b17023SJohn Marino     }
754e4b17023SJohn Marino   else
755e4b17023SJohn Marino     {
756e4b17023SJohn Marino       gsi = gsi_after_labels (top_bb);
757e4b17023SJohn Marino       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
758e4b17023SJohn Marino     }
759e4b17023SJohn Marino   update_stmt (stmt);
760e4b17023SJohn Marino   sincos_stats.inserted++;
761e4b17023SJohn Marino 
762e4b17023SJohn Marino   /* And adjust the recorded old call sites.  */
763e4b17023SJohn Marino   for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
764e4b17023SJohn Marino     {
765e4b17023SJohn Marino       tree rhs = NULL;
766e4b17023SJohn Marino       fndecl = gimple_call_fndecl (use_stmt);
767e4b17023SJohn Marino 
768e4b17023SJohn Marino       switch (DECL_FUNCTION_CODE (fndecl))
769e4b17023SJohn Marino 	{
770e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_COS):
771e4b17023SJohn Marino 	  rhs = fold_build1 (REALPART_EXPR, type, res);
772e4b17023SJohn Marino 	  break;
773e4b17023SJohn Marino 
774e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_SIN):
775e4b17023SJohn Marino 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
776e4b17023SJohn Marino 	  break;
777e4b17023SJohn Marino 
778e4b17023SJohn Marino 	CASE_FLT_FN (BUILT_IN_CEXPI):
779e4b17023SJohn Marino 	  rhs = res;
780e4b17023SJohn Marino 	  break;
781e4b17023SJohn Marino 
782e4b17023SJohn Marino 	default:;
783e4b17023SJohn Marino 	  gcc_unreachable ();
784e4b17023SJohn Marino 	}
785e4b17023SJohn Marino 
786e4b17023SJohn Marino 	/* Replace call with a copy.  */
787e4b17023SJohn Marino 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
788e4b17023SJohn Marino 
789e4b17023SJohn Marino 	gsi = gsi_for_stmt (use_stmt);
790e4b17023SJohn Marino 	gsi_replace (&gsi, stmt, true);
791e4b17023SJohn Marino 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
792e4b17023SJohn Marino 	  cfg_changed = true;
793e4b17023SJohn Marino     }
794e4b17023SJohn Marino 
795e4b17023SJohn Marino   VEC_free(gimple, heap, stmts);
796e4b17023SJohn Marino 
797e4b17023SJohn Marino   return cfg_changed;
798e4b17023SJohn Marino }
799e4b17023SJohn Marino 
800e4b17023SJohn Marino /* To evaluate powi(x,n), the floating point value x raised to the
801e4b17023SJohn Marino    constant integer exponent n, we use a hybrid algorithm that
802e4b17023SJohn Marino    combines the "window method" with look-up tables.  For an
803e4b17023SJohn Marino    introduction to exponentiation algorithms and "addition chains",
804e4b17023SJohn Marino    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
805e4b17023SJohn Marino    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
806e4b17023SJohn Marino    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
807e4b17023SJohn Marino    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
808e4b17023SJohn Marino 
809e4b17023SJohn Marino /* Provide a default value for POWI_MAX_MULTS, the maximum number of
810e4b17023SJohn Marino    multiplications to inline before calling the system library's pow
811e4b17023SJohn Marino    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
812e4b17023SJohn Marino    so this default never requires calling pow, powf or powl.  */
813e4b17023SJohn Marino 
814e4b17023SJohn Marino #ifndef POWI_MAX_MULTS
815e4b17023SJohn Marino #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
816e4b17023SJohn Marino #endif
817e4b17023SJohn Marino 
818e4b17023SJohn Marino /* The size of the "optimal power tree" lookup table.  All
819e4b17023SJohn Marino    exponents less than this value are simply looked up in the
820e4b17023SJohn Marino    powi_table below.  This threshold is also used to size the
821e4b17023SJohn Marino    cache of pseudo registers that hold intermediate results.  */
822e4b17023SJohn Marino #define POWI_TABLE_SIZE 256
823e4b17023SJohn Marino 
824e4b17023SJohn Marino /* The size, in bits of the window, used in the "window method"
825e4b17023SJohn Marino    exponentiation algorithm.  This is equivalent to a radix of
826e4b17023SJohn Marino    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
827e4b17023SJohn Marino #define POWI_WINDOW_SIZE 3
828e4b17023SJohn Marino 
829e4b17023SJohn Marino /* The following table is an efficient representation of an
830e4b17023SJohn Marino    "optimal power tree".  For each value, i, the corresponding
831e4b17023SJohn Marino    value, j, in the table states than an optimal evaluation
832e4b17023SJohn Marino    sequence for calculating pow(x,i) can be found by evaluating
833e4b17023SJohn Marino    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
834e4b17023SJohn Marino    100 integers is given in Knuth's "Seminumerical algorithms".  */
835e4b17023SJohn Marino 
836e4b17023SJohn Marino static const unsigned char powi_table[POWI_TABLE_SIZE] =
837e4b17023SJohn Marino   {
838e4b17023SJohn Marino       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
839e4b17023SJohn Marino       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
840e4b17023SJohn Marino       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
841e4b17023SJohn Marino      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
842e4b17023SJohn Marino      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
843e4b17023SJohn Marino      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
844e4b17023SJohn Marino      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
845e4b17023SJohn Marino      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
846e4b17023SJohn Marino      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
847e4b17023SJohn Marino      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
848e4b17023SJohn Marino      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
849e4b17023SJohn Marino      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
850e4b17023SJohn Marino      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
851e4b17023SJohn Marino      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
852e4b17023SJohn Marino      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
853e4b17023SJohn Marino      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
854e4b17023SJohn Marino      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
855e4b17023SJohn Marino      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
856e4b17023SJohn Marino      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
857e4b17023SJohn Marino      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
858e4b17023SJohn Marino      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
859e4b17023SJohn Marino      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
860e4b17023SJohn Marino      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
861e4b17023SJohn Marino      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
862e4b17023SJohn Marino      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
863e4b17023SJohn Marino     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
864e4b17023SJohn Marino     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
865e4b17023SJohn Marino     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
866e4b17023SJohn Marino     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
867e4b17023SJohn Marino     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
868e4b17023SJohn Marino     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
869e4b17023SJohn Marino     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
870e4b17023SJohn Marino   };
871e4b17023SJohn Marino 
872e4b17023SJohn Marino 
873e4b17023SJohn Marino /* Return the number of multiplications required to calculate
874e4b17023SJohn Marino    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
875e4b17023SJohn Marino    subroutine of powi_cost.  CACHE is an array indicating
876e4b17023SJohn Marino    which exponents have already been calculated.  */
877e4b17023SJohn Marino 
878e4b17023SJohn Marino static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)879e4b17023SJohn Marino powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
880e4b17023SJohn Marino {
881e4b17023SJohn Marino   /* If we've already calculated this exponent, then this evaluation
882e4b17023SJohn Marino      doesn't require any additional multiplications.  */
883e4b17023SJohn Marino   if (cache[n])
884e4b17023SJohn Marino     return 0;
885e4b17023SJohn Marino 
886e4b17023SJohn Marino   cache[n] = true;
887e4b17023SJohn Marino   return powi_lookup_cost (n - powi_table[n], cache)
888e4b17023SJohn Marino 	 + powi_lookup_cost (powi_table[n], cache) + 1;
889e4b17023SJohn Marino }
890e4b17023SJohn Marino 
891e4b17023SJohn Marino /* Return the number of multiplications required to calculate
892e4b17023SJohn Marino    powi(x,n) for an arbitrary x, given the exponent N.  This
893e4b17023SJohn Marino    function needs to be kept in sync with powi_as_mults below.  */
894e4b17023SJohn Marino 
895e4b17023SJohn Marino static int
powi_cost(HOST_WIDE_INT n)896e4b17023SJohn Marino powi_cost (HOST_WIDE_INT n)
897e4b17023SJohn Marino {
898e4b17023SJohn Marino   bool cache[POWI_TABLE_SIZE];
899e4b17023SJohn Marino   unsigned HOST_WIDE_INT digit;
900e4b17023SJohn Marino   unsigned HOST_WIDE_INT val;
901e4b17023SJohn Marino   int result;
902e4b17023SJohn Marino 
903e4b17023SJohn Marino   if (n == 0)
904e4b17023SJohn Marino     return 0;
905e4b17023SJohn Marino 
906e4b17023SJohn Marino   /* Ignore the reciprocal when calculating the cost.  */
907e4b17023SJohn Marino   val = (n < 0) ? -n : n;
908e4b17023SJohn Marino 
909e4b17023SJohn Marino   /* Initialize the exponent cache.  */
910e4b17023SJohn Marino   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
911e4b17023SJohn Marino   cache[1] = true;
912e4b17023SJohn Marino 
913e4b17023SJohn Marino   result = 0;
914e4b17023SJohn Marino 
915e4b17023SJohn Marino   while (val >= POWI_TABLE_SIZE)
916e4b17023SJohn Marino     {
917e4b17023SJohn Marino       if (val & 1)
918e4b17023SJohn Marino 	{
919e4b17023SJohn Marino 	  digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
920e4b17023SJohn Marino 	  result += powi_lookup_cost (digit, cache)
921e4b17023SJohn Marino 		    + POWI_WINDOW_SIZE + 1;
922e4b17023SJohn Marino 	  val >>= POWI_WINDOW_SIZE;
923e4b17023SJohn Marino 	}
924e4b17023SJohn Marino       else
925e4b17023SJohn Marino 	{
926e4b17023SJohn Marino 	  val >>= 1;
927e4b17023SJohn Marino 	  result++;
928e4b17023SJohn Marino 	}
929e4b17023SJohn Marino     }
930e4b17023SJohn Marino 
931e4b17023SJohn Marino   return result + powi_lookup_cost (val, cache);
932e4b17023SJohn Marino }
933e4b17023SJohn Marino 
934e4b17023SJohn Marino /* Recursive subroutine of powi_as_mults.  This function takes the
935e4b17023SJohn Marino    array, CACHE, of already calculated exponents and an exponent N and
936e4b17023SJohn Marino    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
937e4b17023SJohn Marino 
938e4b17023SJohn Marino static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache,tree target)939e4b17023SJohn Marino powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
940e4b17023SJohn Marino 		 HOST_WIDE_INT n, tree *cache, tree target)
941e4b17023SJohn Marino {
942e4b17023SJohn Marino   tree op0, op1, ssa_target;
943e4b17023SJohn Marino   unsigned HOST_WIDE_INT digit;
944e4b17023SJohn Marino   gimple mult_stmt;
945e4b17023SJohn Marino 
946e4b17023SJohn Marino   if (n < POWI_TABLE_SIZE && cache[n])
947e4b17023SJohn Marino     return cache[n];
948e4b17023SJohn Marino 
949e4b17023SJohn Marino   ssa_target = make_ssa_name (target, NULL);
950e4b17023SJohn Marino 
951e4b17023SJohn Marino   if (n < POWI_TABLE_SIZE)
952e4b17023SJohn Marino     {
953e4b17023SJohn Marino       cache[n] = ssa_target;
954e4b17023SJohn Marino       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache, target);
955e4b17023SJohn Marino       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
956e4b17023SJohn Marino     }
957e4b17023SJohn Marino   else if (n & 1)
958e4b17023SJohn Marino     {
959e4b17023SJohn Marino       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
960e4b17023SJohn Marino       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
961e4b17023SJohn Marino       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
962e4b17023SJohn Marino     }
963e4b17023SJohn Marino   else
964e4b17023SJohn Marino     {
965e4b17023SJohn Marino       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
966e4b17023SJohn Marino       op1 = op0;
967e4b17023SJohn Marino     }
968e4b17023SJohn Marino 
969e4b17023SJohn Marino   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
970e4b17023SJohn Marino   gimple_set_location (mult_stmt, loc);
971e4b17023SJohn Marino   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
972e4b17023SJohn Marino 
973e4b17023SJohn Marino   return ssa_target;
974e4b17023SJohn Marino }
975e4b17023SJohn Marino 
976e4b17023SJohn Marino /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
977e4b17023SJohn Marino    This function needs to be kept in sync with powi_cost above.  */
978e4b17023SJohn Marino 
979e4b17023SJohn Marino static tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)980e4b17023SJohn Marino powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
981e4b17023SJohn Marino 	       tree arg0, HOST_WIDE_INT n)
982e4b17023SJohn Marino {
983e4b17023SJohn Marino   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
984e4b17023SJohn Marino   gimple div_stmt;
985e4b17023SJohn Marino 
986e4b17023SJohn Marino   if (n == 0)
987e4b17023SJohn Marino     return build_real (type, dconst1);
988e4b17023SJohn Marino 
989e4b17023SJohn Marino   memset (cache, 0,  sizeof (cache));
990e4b17023SJohn Marino   cache[1] = arg0;
991e4b17023SJohn Marino 
992e4b17023SJohn Marino   target = create_tmp_reg (type, "powmult");
993e4b17023SJohn Marino   add_referenced_var (target);
994e4b17023SJohn Marino 
995e4b17023SJohn Marino   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache, target);
996e4b17023SJohn Marino 
997e4b17023SJohn Marino   if (n >= 0)
998e4b17023SJohn Marino     return result;
999e4b17023SJohn Marino 
1000e4b17023SJohn Marino   /* If the original exponent was negative, reciprocate the result.  */
1001e4b17023SJohn Marino   target = make_ssa_name (target, NULL);
1002e4b17023SJohn Marino   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
1003e4b17023SJohn Marino 					   build_real (type, dconst1),
1004e4b17023SJohn Marino 					   result);
1005e4b17023SJohn Marino   gimple_set_location (div_stmt, loc);
1006e4b17023SJohn Marino   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1007e4b17023SJohn Marino 
1008e4b17023SJohn Marino   return target;
1009e4b17023SJohn Marino }
1010e4b17023SJohn Marino 
1011e4b17023SJohn Marino /* ARG0 and N are the two arguments to a powi builtin in GSI with
1012e4b17023SJohn Marino    location info LOC.  If the arguments are appropriate, create an
1013e4b17023SJohn Marino    equivalent sequence of statements prior to GSI using an optimal
1014e4b17023SJohn Marino    number of multiplications, and return an expession holding the
1015e4b17023SJohn Marino    result.  */
1016e4b17023SJohn Marino 
1017e4b17023SJohn Marino static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1018e4b17023SJohn Marino gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1019e4b17023SJohn Marino 			    tree arg0, HOST_WIDE_INT n)
1020e4b17023SJohn Marino {
1021e4b17023SJohn Marino   /* Avoid largest negative number.  */
1022e4b17023SJohn Marino   if (n != -n
1023e4b17023SJohn Marino       && ((n >= -1 && n <= 2)
1024e4b17023SJohn Marino 	  || (optimize_function_for_speed_p (cfun)
1025e4b17023SJohn Marino 	      && powi_cost (n) <= POWI_MAX_MULTS)))
1026e4b17023SJohn Marino     return powi_as_mults (gsi, loc, arg0, n);
1027e4b17023SJohn Marino 
1028e4b17023SJohn Marino   return NULL_TREE;
1029e4b17023SJohn Marino }
1030e4b17023SJohn Marino 
1031e4b17023SJohn Marino /* Build a gimple call statement that calls FN with argument ARG.
1032e4b17023SJohn Marino    Set the lhs of the call statement to a fresh SSA name for
1033e4b17023SJohn Marino    variable VAR.  If VAR is NULL, first allocate it.  Insert the
1034e4b17023SJohn Marino    statement prior to GSI's current position, and return the fresh
1035e4b17023SJohn Marino    SSA name.  */
1036e4b17023SJohn Marino 
1037e4b17023SJohn Marino static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree * var,tree fn,tree arg)1038e4b17023SJohn Marino build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1039e4b17023SJohn Marino 		       tree *var, tree fn, tree arg)
1040e4b17023SJohn Marino {
1041e4b17023SJohn Marino   gimple call_stmt;
1042e4b17023SJohn Marino   tree ssa_target;
1043e4b17023SJohn Marino 
1044e4b17023SJohn Marino   if (!*var)
1045e4b17023SJohn Marino     {
1046e4b17023SJohn Marino       *var = create_tmp_reg (TREE_TYPE (arg), "powroot");
1047e4b17023SJohn Marino       add_referenced_var (*var);
1048e4b17023SJohn Marino     }
1049e4b17023SJohn Marino 
1050e4b17023SJohn Marino   call_stmt = gimple_build_call (fn, 1, arg);
1051e4b17023SJohn Marino   ssa_target = make_ssa_name (*var, NULL);
1052e4b17023SJohn Marino   gimple_set_lhs (call_stmt, ssa_target);
1053e4b17023SJohn Marino   gimple_set_location (call_stmt, loc);
1054e4b17023SJohn Marino   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1055e4b17023SJohn Marino 
1056e4b17023SJohn Marino   return ssa_target;
1057e4b17023SJohn Marino }
1058e4b17023SJohn Marino 
1059e4b17023SJohn Marino /* Build a gimple binary operation with the given CODE and arguments
1060e4b17023SJohn Marino    ARG0, ARG1, assigning the result to a new SSA name for variable
1061e4b17023SJohn Marino    TARGET.  Insert the statement prior to GSI's current position, and
1062e4b17023SJohn Marino    return the fresh SSA name.*/
1063e4b17023SJohn Marino 
1064e4b17023SJohn Marino static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,tree target,enum tree_code code,tree arg0,tree arg1)1065e4b17023SJohn Marino build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1066e4b17023SJohn Marino 			tree target, enum tree_code code, tree arg0, tree arg1)
1067e4b17023SJohn Marino {
1068e4b17023SJohn Marino   tree result = make_ssa_name (target, NULL);
1069e4b17023SJohn Marino   gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
1070e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1071e4b17023SJohn Marino   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1072e4b17023SJohn Marino   return result;
1073e4b17023SJohn Marino }
1074e4b17023SJohn Marino 
1075e4b17023SJohn Marino /* Build a gimple reference operation with the given CODE and argument
1076e4b17023SJohn Marino    ARG, assigning the result to a new SSA name for variable TARGET.
1077e4b17023SJohn Marino    Insert the statement prior to GSI's current position, and return
1078e4b17023SJohn Marino    the fresh SSA name.  */
1079e4b17023SJohn Marino 
1080e4b17023SJohn Marino static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,tree target,enum tree_code code,tree arg0)1081e4b17023SJohn Marino build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1082e4b17023SJohn Marino 		      tree target, enum tree_code code, tree arg0)
1083e4b17023SJohn Marino {
1084e4b17023SJohn Marino   tree result = make_ssa_name (target, NULL);
1085e4b17023SJohn Marino   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1086e4b17023SJohn Marino   gimple_set_location (stmt, loc);
1087e4b17023SJohn Marino   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1088e4b17023SJohn Marino   return result;
1089e4b17023SJohn Marino }
1090e4b17023SJohn Marino 
1091e4b17023SJohn Marino /* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
1092e4b17023SJohn Marino    prior to GSI's current position, and return the fresh SSA name.  */
1093e4b17023SJohn Marino 
1094e4b17023SJohn Marino static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree target,tree val)1095e4b17023SJohn Marino build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1096e4b17023SJohn Marino 		       tree target, tree val)
1097e4b17023SJohn Marino {
1098e4b17023SJohn Marino   return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
1099e4b17023SJohn Marino }
1100e4b17023SJohn Marino 
1101e4b17023SJohn Marino /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1102e4b17023SJohn Marino    with location info LOC.  If possible, create an equivalent and
1103e4b17023SJohn Marino    less expensive sequence of statements prior to GSI, and return an
1104e4b17023SJohn Marino    expession holding the result.  */
1105e4b17023SJohn Marino 
1106e4b17023SJohn Marino static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)1107e4b17023SJohn Marino gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1108e4b17023SJohn Marino 			   tree arg0, tree arg1)
1109e4b17023SJohn Marino {
1110e4b17023SJohn Marino   REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
1111e4b17023SJohn Marino   REAL_VALUE_TYPE c2, dconst3;
1112e4b17023SJohn Marino   HOST_WIDE_INT n;
1113e4b17023SJohn Marino   tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
1114e4b17023SJohn Marino   tree target = NULL_TREE;
1115e4b17023SJohn Marino   enum machine_mode mode;
1116*5ce9237cSJohn Marino   bool hw_sqrt_exists, c_is_int, c2_is_int;
1117e4b17023SJohn Marino 
1118e4b17023SJohn Marino   /* If the exponent isn't a constant, there's nothing of interest
1119e4b17023SJohn Marino      to be done.  */
1120e4b17023SJohn Marino   if (TREE_CODE (arg1) != REAL_CST)
1121e4b17023SJohn Marino     return NULL_TREE;
1122e4b17023SJohn Marino 
1123e4b17023SJohn Marino   /* If the exponent is equivalent to an integer, expand to an optimal
1124e4b17023SJohn Marino      multiplication sequence when profitable.  */
1125e4b17023SJohn Marino   c = TREE_REAL_CST (arg1);
1126e4b17023SJohn Marino   n = real_to_integer (&c);
1127e4b17023SJohn Marino   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1128*5ce9237cSJohn Marino   c_is_int = real_identical (&c, &cint);
1129e4b17023SJohn Marino 
1130*5ce9237cSJohn Marino   if (c_is_int
1131e4b17023SJohn Marino       && ((n >= -1 && n <= 2)
1132e4b17023SJohn Marino 	  || (flag_unsafe_math_optimizations
1133e4b17023SJohn Marino 	      && optimize_insn_for_speed_p ()
1134e4b17023SJohn Marino 	      && powi_cost (n) <= POWI_MAX_MULTS)))
1135e4b17023SJohn Marino     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1136e4b17023SJohn Marino 
1137e4b17023SJohn Marino   /* Attempt various optimizations using sqrt and cbrt.  */
1138e4b17023SJohn Marino   type = TREE_TYPE (arg0);
1139e4b17023SJohn Marino   mode = TYPE_MODE (type);
1140e4b17023SJohn Marino   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1141e4b17023SJohn Marino 
1142e4b17023SJohn Marino   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1143e4b17023SJohn Marino      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1144e4b17023SJohn Marino      sqrt(-0) = -0.  */
1145e4b17023SJohn Marino   if (sqrtfn
1146e4b17023SJohn Marino       && REAL_VALUES_EQUAL (c, dconsthalf)
1147e4b17023SJohn Marino       && !HONOR_SIGNED_ZEROS (mode))
1148e4b17023SJohn Marino     return build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1149e4b17023SJohn Marino 
1150e4b17023SJohn Marino   /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
1151e4b17023SJohn Marino      a builtin sqrt instruction is smaller than a call to pow with 0.25,
1152e4b17023SJohn Marino      so do this optimization even if -Os.  Don't do this optimization
1153e4b17023SJohn Marino      if we don't have a hardware sqrt insn.  */
1154e4b17023SJohn Marino   dconst1_4 = dconst1;
1155e4b17023SJohn Marino   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1156e4b17023SJohn Marino   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1157e4b17023SJohn Marino 
1158e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1159e4b17023SJohn Marino       && sqrtfn
1160e4b17023SJohn Marino       && REAL_VALUES_EQUAL (c, dconst1_4)
1161e4b17023SJohn Marino       && hw_sqrt_exists)
1162e4b17023SJohn Marino     {
1163e4b17023SJohn Marino       /* sqrt(x)  */
1164e4b17023SJohn Marino       sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1165e4b17023SJohn Marino 
1166e4b17023SJohn Marino       /* sqrt(sqrt(x))  */
1167e4b17023SJohn Marino       return build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
1168e4b17023SJohn Marino     }
1169e4b17023SJohn Marino 
1170e4b17023SJohn Marino   /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
1171e4b17023SJohn Marino      optimizing for space.  Don't do this optimization if we don't have
1172e4b17023SJohn Marino      a hardware sqrt insn.  */
1173e4b17023SJohn Marino   real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
1174e4b17023SJohn Marino   SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
1175e4b17023SJohn Marino 
1176e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1177e4b17023SJohn Marino       && sqrtfn
1178e4b17023SJohn Marino       && optimize_function_for_speed_p (cfun)
1179e4b17023SJohn Marino       && REAL_VALUES_EQUAL (c, dconst3_4)
1180e4b17023SJohn Marino       && hw_sqrt_exists)
1181e4b17023SJohn Marino     {
1182e4b17023SJohn Marino       /* sqrt(x)  */
1183e4b17023SJohn Marino       sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1184e4b17023SJohn Marino 
1185e4b17023SJohn Marino       /* sqrt(sqrt(x))  */
1186e4b17023SJohn Marino       sqrt_sqrt = build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
1187e4b17023SJohn Marino 
1188e4b17023SJohn Marino       /* sqrt(x) * sqrt(sqrt(x))  */
1189e4b17023SJohn Marino       return build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1190e4b17023SJohn Marino 				     sqrt_arg0, sqrt_sqrt);
1191e4b17023SJohn Marino     }
1192e4b17023SJohn Marino 
1193e4b17023SJohn Marino   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1194e4b17023SJohn Marino      optimizations since 1./3. is not exactly representable.  If x
1195e4b17023SJohn Marino      is negative and finite, the correct value of pow(x,1./3.) is
1196e4b17023SJohn Marino      a NaN with the "invalid" exception raised, because the value
1197e4b17023SJohn Marino      of 1./3. actually has an even denominator.  The correct value
1198e4b17023SJohn Marino      of cbrt(x) is a negative real value.  */
1199e4b17023SJohn Marino   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1200e4b17023SJohn Marino   dconst1_3 = real_value_truncate (mode, dconst_third ());
1201e4b17023SJohn Marino 
1202e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1203e4b17023SJohn Marino       && cbrtfn
1204e4b17023SJohn Marino       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1205e4b17023SJohn Marino       && REAL_VALUES_EQUAL (c, dconst1_3))
1206e4b17023SJohn Marino     return build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
1207e4b17023SJohn Marino 
1208e4b17023SJohn Marino   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1209e4b17023SJohn Marino      if we don't have a hardware sqrt insn.  */
1210e4b17023SJohn Marino   dconst1_6 = dconst1_3;
1211e4b17023SJohn Marino   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1212e4b17023SJohn Marino 
1213e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1214e4b17023SJohn Marino       && sqrtfn
1215e4b17023SJohn Marino       && cbrtfn
1216e4b17023SJohn Marino       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1217e4b17023SJohn Marino       && optimize_function_for_speed_p (cfun)
1218e4b17023SJohn Marino       && hw_sqrt_exists
1219e4b17023SJohn Marino       && REAL_VALUES_EQUAL (c, dconst1_6))
1220e4b17023SJohn Marino     {
1221e4b17023SJohn Marino       /* sqrt(x)  */
1222e4b17023SJohn Marino       sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1223e4b17023SJohn Marino 
1224e4b17023SJohn Marino       /* cbrt(sqrt(x))  */
1225e4b17023SJohn Marino       return build_and_insert_call (gsi, loc, &target, cbrtfn, sqrt_arg0);
1226e4b17023SJohn Marino     }
1227e4b17023SJohn Marino 
1228*5ce9237cSJohn Marino   /* Optimize pow(x,c), where n = 2c for some nonzero integer n
1229*5ce9237cSJohn Marino      and c not an integer, into
1230e4b17023SJohn Marino 
1231e4b17023SJohn Marino        sqrt(x) * powi(x, n/2),                n > 0;
1232e4b17023SJohn Marino        1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
1233e4b17023SJohn Marino 
1234e4b17023SJohn Marino      Do not calculate the powi factor when n/2 = 0.  */
1235e4b17023SJohn Marino   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1236e4b17023SJohn Marino   n = real_to_integer (&c2);
1237e4b17023SJohn Marino   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1238*5ce9237cSJohn Marino   c2_is_int = real_identical (&c2, &cint);
1239e4b17023SJohn Marino 
1240e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1241e4b17023SJohn Marino       && sqrtfn
1242*5ce9237cSJohn Marino       && c2_is_int
1243*5ce9237cSJohn Marino       && !c_is_int
1244*5ce9237cSJohn Marino       && optimize_function_for_speed_p (cfun))
1245e4b17023SJohn Marino     {
1246e4b17023SJohn Marino       tree powi_x_ndiv2 = NULL_TREE;
1247e4b17023SJohn Marino 
1248e4b17023SJohn Marino       /* Attempt to fold powi(arg0, abs(n/2)) into multiplies.  If not
1249e4b17023SJohn Marino          possible or profitable, give up.  Skip the degenerate case when
1250e4b17023SJohn Marino          n is 1 or -1, where the result is always 1.  */
1251e4b17023SJohn Marino       if (absu_hwi (n) != 1)
1252e4b17023SJohn Marino 	{
1253e4b17023SJohn Marino 	  powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
1254e4b17023SJohn Marino 						     abs_hwi (n / 2));
1255e4b17023SJohn Marino 	  if (!powi_x_ndiv2)
1256e4b17023SJohn Marino 	    return NULL_TREE;
1257e4b17023SJohn Marino 	}
1258e4b17023SJohn Marino 
1259e4b17023SJohn Marino       /* Calculate sqrt(x).  When n is not 1 or -1, multiply it by the
1260e4b17023SJohn Marino 	 result of the optimal multiply sequence just calculated.  */
1261e4b17023SJohn Marino       sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1262e4b17023SJohn Marino 
1263e4b17023SJohn Marino       if (absu_hwi (n) == 1)
1264e4b17023SJohn Marino 	result = sqrt_arg0;
1265e4b17023SJohn Marino       else
1266e4b17023SJohn Marino 	result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1267e4b17023SJohn Marino 					 sqrt_arg0, powi_x_ndiv2);
1268e4b17023SJohn Marino 
1269e4b17023SJohn Marino       /* If n is negative, reciprocate the result.  */
1270e4b17023SJohn Marino       if (n < 0)
1271e4b17023SJohn Marino 	result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
1272e4b17023SJohn Marino 					 build_real (type, dconst1), result);
1273e4b17023SJohn Marino       return result;
1274e4b17023SJohn Marino     }
1275e4b17023SJohn Marino 
1276e4b17023SJohn Marino   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1277e4b17023SJohn Marino 
1278e4b17023SJohn Marino      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1279e4b17023SJohn Marino      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1280e4b17023SJohn Marino 
1281e4b17023SJohn Marino      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1282e4b17023SJohn Marino      different from pow(x, 1./3.) due to rounding and behavior with
1283e4b17023SJohn Marino      negative x, we need to constrain this transformation to unsafe
1284e4b17023SJohn Marino      math and positive x or finite math.  */
1285e4b17023SJohn Marino   real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
1286e4b17023SJohn Marino   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1287e4b17023SJohn Marino   real_round (&c2, mode, &c2);
1288e4b17023SJohn Marino   n = real_to_integer (&c2);
1289e4b17023SJohn Marino   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1290e4b17023SJohn Marino   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1291e4b17023SJohn Marino   real_convert (&c2, mode, &c2);
1292e4b17023SJohn Marino 
1293e4b17023SJohn Marino   if (flag_unsafe_math_optimizations
1294e4b17023SJohn Marino       && cbrtfn
1295e4b17023SJohn Marino       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1296e4b17023SJohn Marino       && real_identical (&c2, &c)
1297*5ce9237cSJohn Marino       && !c2_is_int
1298e4b17023SJohn Marino       && optimize_function_for_speed_p (cfun)
1299e4b17023SJohn Marino       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1300e4b17023SJohn Marino     {
1301e4b17023SJohn Marino       tree powi_x_ndiv3 = NULL_TREE;
1302e4b17023SJohn Marino 
1303e4b17023SJohn Marino       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1304e4b17023SJohn Marino          possible or profitable, give up.  Skip the degenerate case when
1305e4b17023SJohn Marino          abs(n) < 3, where the result is always 1.  */
1306e4b17023SJohn Marino       if (absu_hwi (n) >= 3)
1307e4b17023SJohn Marino 	{
1308e4b17023SJohn Marino 	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1309e4b17023SJohn Marino 						     abs_hwi (n / 3));
1310e4b17023SJohn Marino 	  if (!powi_x_ndiv3)
1311e4b17023SJohn Marino 	    return NULL_TREE;
1312e4b17023SJohn Marino 	}
1313e4b17023SJohn Marino 
1314e4b17023SJohn Marino       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1315e4b17023SJohn Marino          as that creates an unnecessary variable.  Instead, just produce
1316e4b17023SJohn Marino          either cbrt(x) or cbrt(x) * cbrt(x).  */
1317e4b17023SJohn Marino       cbrt_x = build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
1318e4b17023SJohn Marino 
1319e4b17023SJohn Marino       if (absu_hwi (n) % 3 == 1)
1320e4b17023SJohn Marino 	powi_cbrt_x = cbrt_x;
1321e4b17023SJohn Marino       else
1322e4b17023SJohn Marino 	powi_cbrt_x = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1323e4b17023SJohn Marino 					      cbrt_x, cbrt_x);
1324e4b17023SJohn Marino 
1325e4b17023SJohn Marino       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1326e4b17023SJohn Marino       if (absu_hwi (n) < 3)
1327e4b17023SJohn Marino 	result = powi_cbrt_x;
1328e4b17023SJohn Marino       else
1329e4b17023SJohn Marino 	result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1330e4b17023SJohn Marino 					 powi_x_ndiv3, powi_cbrt_x);
1331e4b17023SJohn Marino 
1332e4b17023SJohn Marino       /* If n is negative, reciprocate the result.  */
1333e4b17023SJohn Marino       if (n < 0)
1334e4b17023SJohn Marino 	result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
1335e4b17023SJohn Marino 					 build_real (type, dconst1), result);
1336e4b17023SJohn Marino 
1337e4b17023SJohn Marino       return result;
1338e4b17023SJohn Marino     }
1339e4b17023SJohn Marino 
1340e4b17023SJohn Marino   /* No optimizations succeeded.  */
1341e4b17023SJohn Marino   return NULL_TREE;
1342e4b17023SJohn Marino }
1343e4b17023SJohn Marino 
1344e4b17023SJohn Marino /* ARG is the argument to a cabs builtin call in GSI with location info
1345e4b17023SJohn Marino    LOC.  Create a sequence of statements prior to GSI that calculates
1346e4b17023SJohn Marino    sqrt(R*R + I*I), where R and I are the real and imaginary components
1347e4b17023SJohn Marino    of ARG, respectively.  Return an expression holding the result.  */
1348e4b17023SJohn Marino 
1349e4b17023SJohn Marino static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)1350e4b17023SJohn Marino gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1351e4b17023SJohn Marino {
1352e4b17023SJohn Marino   tree target, real_part, imag_part, addend1, addend2, sum, result;
1353e4b17023SJohn Marino   tree type = TREE_TYPE (TREE_TYPE (arg));
1354e4b17023SJohn Marino   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1355e4b17023SJohn Marino   enum machine_mode mode = TYPE_MODE (type);
1356e4b17023SJohn Marino 
1357e4b17023SJohn Marino   if (!flag_unsafe_math_optimizations
1358e4b17023SJohn Marino       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1359e4b17023SJohn Marino       || !sqrtfn
1360e4b17023SJohn Marino       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1361e4b17023SJohn Marino     return NULL_TREE;
1362e4b17023SJohn Marino 
1363e4b17023SJohn Marino   target = create_tmp_reg (type, "cabs");
1364e4b17023SJohn Marino   add_referenced_var (target);
1365e4b17023SJohn Marino 
1366e4b17023SJohn Marino   real_part = build_and_insert_ref (gsi, loc, type, target,
1367e4b17023SJohn Marino 				    REALPART_EXPR, arg);
1368e4b17023SJohn Marino   addend1 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1369e4b17023SJohn Marino 				    real_part, real_part);
1370e4b17023SJohn Marino   imag_part = build_and_insert_ref (gsi, loc, type, target,
1371e4b17023SJohn Marino 				    IMAGPART_EXPR, arg);
1372e4b17023SJohn Marino   addend2 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1373e4b17023SJohn Marino 				    imag_part, imag_part);
1374e4b17023SJohn Marino   sum = build_and_insert_binop (gsi, loc, target, PLUS_EXPR, addend1, addend2);
1375e4b17023SJohn Marino   result = build_and_insert_call (gsi, loc, &target, sqrtfn, sum);
1376e4b17023SJohn Marino 
1377e4b17023SJohn Marino   return result;
1378e4b17023SJohn Marino }
1379e4b17023SJohn Marino 
1380e4b17023SJohn Marino /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1381e4b17023SJohn Marino    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1382e4b17023SJohn Marino    an optimal number of multiplies, when n is a constant.  */
1383e4b17023SJohn Marino 
1384e4b17023SJohn Marino static unsigned int
execute_cse_sincos(void)1385e4b17023SJohn Marino execute_cse_sincos (void)
1386e4b17023SJohn Marino {
1387e4b17023SJohn Marino   basic_block bb;
1388e4b17023SJohn Marino   bool cfg_changed = false;
1389e4b17023SJohn Marino 
1390e4b17023SJohn Marino   calculate_dominance_info (CDI_DOMINATORS);
1391e4b17023SJohn Marino   memset (&sincos_stats, 0, sizeof (sincos_stats));
1392e4b17023SJohn Marino 
1393e4b17023SJohn Marino   FOR_EACH_BB (bb)
1394e4b17023SJohn Marino     {
1395e4b17023SJohn Marino       gimple_stmt_iterator gsi;
1396*5ce9237cSJohn Marino       bool cleanup_eh = false;
1397e4b17023SJohn Marino 
1398e4b17023SJohn Marino       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1399e4b17023SJohn Marino         {
1400e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (gsi);
1401e4b17023SJohn Marino 	  tree fndecl;
1402e4b17023SJohn Marino 
1403*5ce9237cSJohn Marino 	  /* Only the last stmt in a bb could throw, no need to call
1404*5ce9237cSJohn Marino 	     gimple_purge_dead_eh_edges if we change something in the middle
1405*5ce9237cSJohn Marino 	     of a basic block.  */
1406*5ce9237cSJohn Marino 	  cleanup_eh = false;
1407*5ce9237cSJohn Marino 
1408e4b17023SJohn Marino 	  if (is_gimple_call (stmt)
1409e4b17023SJohn Marino 	      && gimple_call_lhs (stmt)
1410e4b17023SJohn Marino 	      && (fndecl = gimple_call_fndecl (stmt))
1411e4b17023SJohn Marino 	      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1412e4b17023SJohn Marino 	    {
1413e4b17023SJohn Marino 	      tree arg, arg0, arg1, result;
1414e4b17023SJohn Marino 	      HOST_WIDE_INT n;
1415e4b17023SJohn Marino 	      location_t loc;
1416e4b17023SJohn Marino 
1417e4b17023SJohn Marino 	      switch (DECL_FUNCTION_CODE (fndecl))
1418e4b17023SJohn Marino 		{
1419e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_COS):
1420e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_SIN):
1421e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_CEXPI):
1422e4b17023SJohn Marino 		  /* Make sure we have either sincos or cexp.  */
1423e4b17023SJohn Marino 		  if (!TARGET_HAS_SINCOS && !TARGET_C99_FUNCTIONS)
1424e4b17023SJohn Marino 		    break;
1425e4b17023SJohn Marino 
1426e4b17023SJohn Marino 		  arg = gimple_call_arg (stmt, 0);
1427e4b17023SJohn Marino 		  if (TREE_CODE (arg) == SSA_NAME)
1428e4b17023SJohn Marino 		    cfg_changed |= execute_cse_sincos_1 (arg);
1429e4b17023SJohn Marino 		  break;
1430e4b17023SJohn Marino 
1431e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_POW):
1432e4b17023SJohn Marino 		  arg0 = gimple_call_arg (stmt, 0);
1433e4b17023SJohn Marino 		  arg1 = gimple_call_arg (stmt, 1);
1434e4b17023SJohn Marino 
1435e4b17023SJohn Marino 		  loc = gimple_location (stmt);
1436e4b17023SJohn Marino 		  result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1437e4b17023SJohn Marino 
1438e4b17023SJohn Marino 		  if (result)
1439e4b17023SJohn Marino 		    {
1440e4b17023SJohn Marino 		      tree lhs = gimple_get_lhs (stmt);
1441e4b17023SJohn Marino 		      gimple new_stmt = gimple_build_assign (lhs, result);
1442e4b17023SJohn Marino 		      gimple_set_location (new_stmt, loc);
1443e4b17023SJohn Marino 		      unlink_stmt_vdef (stmt);
1444e4b17023SJohn Marino 		      gsi_replace (&gsi, new_stmt, true);
1445*5ce9237cSJohn Marino 		      cleanup_eh = true;
1446e4b17023SJohn Marino 		    }
1447e4b17023SJohn Marino 		  break;
1448e4b17023SJohn Marino 
1449e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_POWI):
1450e4b17023SJohn Marino 		  arg0 = gimple_call_arg (stmt, 0);
1451e4b17023SJohn Marino 		  arg1 = gimple_call_arg (stmt, 1);
1452e4b17023SJohn Marino 		  if (!host_integerp (arg1, 0))
1453e4b17023SJohn Marino 		    break;
1454e4b17023SJohn Marino 
1455e4b17023SJohn Marino 		  n = TREE_INT_CST_LOW (arg1);
1456e4b17023SJohn Marino 		  loc = gimple_location (stmt);
1457e4b17023SJohn Marino 		  result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1458e4b17023SJohn Marino 
1459e4b17023SJohn Marino 		  if (result)
1460e4b17023SJohn Marino 		    {
1461e4b17023SJohn Marino 		      tree lhs = gimple_get_lhs (stmt);
1462e4b17023SJohn Marino 		      gimple new_stmt = gimple_build_assign (lhs, result);
1463e4b17023SJohn Marino 		      gimple_set_location (new_stmt, loc);
1464e4b17023SJohn Marino 		      unlink_stmt_vdef (stmt);
1465e4b17023SJohn Marino 		      gsi_replace (&gsi, new_stmt, true);
1466*5ce9237cSJohn Marino 		      cleanup_eh = true;
1467e4b17023SJohn Marino 		    }
1468e4b17023SJohn Marino 		  break;
1469e4b17023SJohn Marino 
1470e4b17023SJohn Marino 		CASE_FLT_FN (BUILT_IN_CABS):
1471e4b17023SJohn Marino 		  arg0 = gimple_call_arg (stmt, 0);
1472e4b17023SJohn Marino 		  loc = gimple_location (stmt);
1473e4b17023SJohn Marino 		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1474e4b17023SJohn Marino 
1475e4b17023SJohn Marino 		  if (result)
1476e4b17023SJohn Marino 		    {
1477e4b17023SJohn Marino 		      tree lhs = gimple_get_lhs (stmt);
1478e4b17023SJohn Marino 		      gimple new_stmt = gimple_build_assign (lhs, result);
1479e4b17023SJohn Marino 		      gimple_set_location (new_stmt, loc);
1480e4b17023SJohn Marino 		      unlink_stmt_vdef (stmt);
1481e4b17023SJohn Marino 		      gsi_replace (&gsi, new_stmt, true);
1482*5ce9237cSJohn Marino 		      cleanup_eh = true;
1483e4b17023SJohn Marino 		    }
1484e4b17023SJohn Marino 		  break;
1485e4b17023SJohn Marino 
1486e4b17023SJohn Marino 		default:;
1487e4b17023SJohn Marino 		}
1488e4b17023SJohn Marino 	    }
1489e4b17023SJohn Marino 	}
1490*5ce9237cSJohn Marino       if (cleanup_eh)
1491*5ce9237cSJohn Marino 	cfg_changed |= gimple_purge_dead_eh_edges (bb);
1492e4b17023SJohn Marino     }
1493e4b17023SJohn Marino 
1494e4b17023SJohn Marino   statistics_counter_event (cfun, "sincos statements inserted",
1495e4b17023SJohn Marino 			    sincos_stats.inserted);
1496e4b17023SJohn Marino 
1497e4b17023SJohn Marino   free_dominance_info (CDI_DOMINATORS);
1498e4b17023SJohn Marino   return cfg_changed ? TODO_cleanup_cfg : 0;
1499e4b17023SJohn Marino }
1500e4b17023SJohn Marino 
1501e4b17023SJohn Marino static bool
gate_cse_sincos(void)1502e4b17023SJohn Marino gate_cse_sincos (void)
1503e4b17023SJohn Marino {
1504e4b17023SJohn Marino   /* We no longer require either sincos or cexp, since powi expansion
1505e4b17023SJohn Marino      piggybacks on this pass.  */
1506e4b17023SJohn Marino   return optimize;
1507e4b17023SJohn Marino }
1508e4b17023SJohn Marino 
1509e4b17023SJohn Marino struct gimple_opt_pass pass_cse_sincos =
1510e4b17023SJohn Marino {
1511e4b17023SJohn Marino  {
1512e4b17023SJohn Marino   GIMPLE_PASS,
1513e4b17023SJohn Marino   "sincos",				/* name */
1514e4b17023SJohn Marino   gate_cse_sincos,			/* gate */
1515e4b17023SJohn Marino   execute_cse_sincos,			/* execute */
1516e4b17023SJohn Marino   NULL,					/* sub */
1517e4b17023SJohn Marino   NULL,					/* next */
1518e4b17023SJohn Marino   0,					/* static_pass_number */
1519e4b17023SJohn Marino   TV_NONE,				/* tv_id */
1520e4b17023SJohn Marino   PROP_ssa,				/* properties_required */
1521e4b17023SJohn Marino   0,					/* properties_provided */
1522e4b17023SJohn Marino   0,					/* properties_destroyed */
1523e4b17023SJohn Marino   0,					/* todo_flags_start */
1524e4b17023SJohn Marino   TODO_update_ssa | TODO_verify_ssa
1525e4b17023SJohn Marino     | TODO_verify_stmts                 /* todo_flags_finish */
1526e4b17023SJohn Marino  }
1527e4b17023SJohn Marino };
1528e4b17023SJohn Marino 
1529e4b17023SJohn Marino /* A symbolic number is used to detect byte permutation and selection
1530e4b17023SJohn Marino    patterns.  Therefore the field N contains an artificial number
1531e4b17023SJohn Marino    consisting of byte size markers:
1532e4b17023SJohn Marino 
1533e4b17023SJohn Marino    0    - byte has the value 0
1534e4b17023SJohn Marino    1..size - byte contains the content of the byte
1535e4b17023SJohn Marino    number indexed with that value minus one  */
1536e4b17023SJohn Marino 
1537e4b17023SJohn Marino struct symbolic_number {
1538e4b17023SJohn Marino   unsigned HOST_WIDEST_INT n;
1539e4b17023SJohn Marino   int size;
1540e4b17023SJohn Marino };
1541e4b17023SJohn Marino 
1542e4b17023SJohn Marino /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1543e4b17023SJohn Marino    number N.  Return false if the requested operation is not permitted
1544e4b17023SJohn Marino    on a symbolic number.  */
1545e4b17023SJohn Marino 
1546e4b17023SJohn Marino static inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)1547e4b17023SJohn Marino do_shift_rotate (enum tree_code code,
1548e4b17023SJohn Marino 		 struct symbolic_number *n,
1549e4b17023SJohn Marino 		 int count)
1550e4b17023SJohn Marino {
1551e4b17023SJohn Marino   if (count % 8 != 0)
1552e4b17023SJohn Marino     return false;
1553e4b17023SJohn Marino 
1554e4b17023SJohn Marino   /* Zero out the extra bits of N in order to avoid them being shifted
1555e4b17023SJohn Marino      into the significant bits.  */
1556e4b17023SJohn Marino   if (n->size < (int)sizeof (HOST_WIDEST_INT))
1557e4b17023SJohn Marino     n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1558e4b17023SJohn Marino 
1559e4b17023SJohn Marino   switch (code)
1560e4b17023SJohn Marino     {
1561e4b17023SJohn Marino     case LSHIFT_EXPR:
1562e4b17023SJohn Marino       n->n <<= count;
1563e4b17023SJohn Marino       break;
1564e4b17023SJohn Marino     case RSHIFT_EXPR:
1565e4b17023SJohn Marino       n->n >>= count;
1566e4b17023SJohn Marino       break;
1567e4b17023SJohn Marino     case LROTATE_EXPR:
1568e4b17023SJohn Marino       n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
1569e4b17023SJohn Marino       break;
1570e4b17023SJohn Marino     case RROTATE_EXPR:
1571e4b17023SJohn Marino       n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
1572e4b17023SJohn Marino       break;
1573e4b17023SJohn Marino     default:
1574e4b17023SJohn Marino       return false;
1575e4b17023SJohn Marino     }
1576e4b17023SJohn Marino   /* Zero unused bits for size.  */
1577e4b17023SJohn Marino   if (n->size < (int)sizeof (HOST_WIDEST_INT))
1578e4b17023SJohn Marino     n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1579e4b17023SJohn Marino   return true;
1580e4b17023SJohn Marino }
1581e4b17023SJohn Marino 
1582e4b17023SJohn Marino /* Perform sanity checking for the symbolic number N and the gimple
1583e4b17023SJohn Marino    statement STMT.  */
1584e4b17023SJohn Marino 
1585e4b17023SJohn Marino static inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple stmt)1586e4b17023SJohn Marino verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
1587e4b17023SJohn Marino {
1588e4b17023SJohn Marino   tree lhs_type;
1589e4b17023SJohn Marino 
1590e4b17023SJohn Marino   lhs_type = gimple_expr_type (stmt);
1591e4b17023SJohn Marino 
1592e4b17023SJohn Marino   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
1593e4b17023SJohn Marino     return false;
1594e4b17023SJohn Marino 
1595e4b17023SJohn Marino   if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
1596e4b17023SJohn Marino     return false;
1597e4b17023SJohn Marino 
1598e4b17023SJohn Marino   return true;
1599e4b17023SJohn Marino }
1600e4b17023SJohn Marino 
1601e4b17023SJohn Marino /* find_bswap_1 invokes itself recursively with N and tries to perform
1602e4b17023SJohn Marino    the operation given by the rhs of STMT on the result.  If the
1603e4b17023SJohn Marino    operation could successfully be executed the function returns the
1604e4b17023SJohn Marino    tree expression of the source operand and NULL otherwise.  */
1605e4b17023SJohn Marino 
1606e4b17023SJohn Marino static tree
find_bswap_1(gimple stmt,struct symbolic_number * n,int limit)1607e4b17023SJohn Marino find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
1608e4b17023SJohn Marino {
1609e4b17023SJohn Marino   enum tree_code code;
1610e4b17023SJohn Marino   tree rhs1, rhs2 = NULL;
1611e4b17023SJohn Marino   gimple rhs1_stmt, rhs2_stmt;
1612e4b17023SJohn Marino   tree source_expr1;
1613e4b17023SJohn Marino   enum gimple_rhs_class rhs_class;
1614e4b17023SJohn Marino 
1615e4b17023SJohn Marino   if (!limit || !is_gimple_assign (stmt))
1616e4b17023SJohn Marino     return NULL_TREE;
1617e4b17023SJohn Marino 
1618e4b17023SJohn Marino   rhs1 = gimple_assign_rhs1 (stmt);
1619e4b17023SJohn Marino 
1620e4b17023SJohn Marino   if (TREE_CODE (rhs1) != SSA_NAME)
1621e4b17023SJohn Marino     return NULL_TREE;
1622e4b17023SJohn Marino 
1623e4b17023SJohn Marino   code = gimple_assign_rhs_code (stmt);
1624e4b17023SJohn Marino   rhs_class = gimple_assign_rhs_class (stmt);
1625e4b17023SJohn Marino   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1626e4b17023SJohn Marino 
1627e4b17023SJohn Marino   if (rhs_class == GIMPLE_BINARY_RHS)
1628e4b17023SJohn Marino     rhs2 = gimple_assign_rhs2 (stmt);
1629e4b17023SJohn Marino 
1630e4b17023SJohn Marino   /* Handle unary rhs and binary rhs with integer constants as second
1631e4b17023SJohn Marino      operand.  */
1632e4b17023SJohn Marino 
1633e4b17023SJohn Marino   if (rhs_class == GIMPLE_UNARY_RHS
1634e4b17023SJohn Marino       || (rhs_class == GIMPLE_BINARY_RHS
1635e4b17023SJohn Marino 	  && TREE_CODE (rhs2) == INTEGER_CST))
1636e4b17023SJohn Marino     {
1637e4b17023SJohn Marino       if (code != BIT_AND_EXPR
1638e4b17023SJohn Marino 	  && code != LSHIFT_EXPR
1639e4b17023SJohn Marino 	  && code != RSHIFT_EXPR
1640e4b17023SJohn Marino 	  && code != LROTATE_EXPR
1641e4b17023SJohn Marino 	  && code != RROTATE_EXPR
1642e4b17023SJohn Marino 	  && code != NOP_EXPR
1643e4b17023SJohn Marino 	  && code != CONVERT_EXPR)
1644e4b17023SJohn Marino 	return NULL_TREE;
1645e4b17023SJohn Marino 
1646e4b17023SJohn Marino       source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
1647e4b17023SJohn Marino 
1648e4b17023SJohn Marino       /* If find_bswap_1 returned NULL STMT is a leaf node and we have
1649e4b17023SJohn Marino 	 to initialize the symbolic number.  */
1650e4b17023SJohn Marino       if (!source_expr1)
1651e4b17023SJohn Marino 	{
1652e4b17023SJohn Marino 	  /* Set up the symbolic number N by setting each byte to a
1653e4b17023SJohn Marino 	     value between 1 and the byte size of rhs1.  The highest
1654e4b17023SJohn Marino 	     order byte is set to n->size and the lowest order
1655e4b17023SJohn Marino 	     byte to 1.  */
1656e4b17023SJohn Marino 	  n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
1657e4b17023SJohn Marino 	  if (n->size % BITS_PER_UNIT != 0)
1658e4b17023SJohn Marino 	    return NULL_TREE;
1659e4b17023SJohn Marino 	  n->size /= BITS_PER_UNIT;
1660e4b17023SJohn Marino 	  n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1661e4b17023SJohn Marino 		  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
1662e4b17023SJohn Marino 
1663e4b17023SJohn Marino 	  if (n->size < (int)sizeof (HOST_WIDEST_INT))
1664e4b17023SJohn Marino 	    n->n &= ((unsigned HOST_WIDEST_INT)1 <<
1665e4b17023SJohn Marino 		     (n->size * BITS_PER_UNIT)) - 1;
1666e4b17023SJohn Marino 
1667e4b17023SJohn Marino 	  source_expr1 = rhs1;
1668e4b17023SJohn Marino 	}
1669e4b17023SJohn Marino 
1670e4b17023SJohn Marino       switch (code)
1671e4b17023SJohn Marino 	{
1672e4b17023SJohn Marino 	case BIT_AND_EXPR:
1673e4b17023SJohn Marino 	  {
1674e4b17023SJohn Marino 	    int i;
1675e4b17023SJohn Marino 	    unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
1676e4b17023SJohn Marino 	    unsigned HOST_WIDEST_INT tmp = val;
1677e4b17023SJohn Marino 
1678e4b17023SJohn Marino 	    /* Only constants masking full bytes are allowed.  */
1679e4b17023SJohn Marino 	    for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
1680e4b17023SJohn Marino 	      if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
1681e4b17023SJohn Marino 		return NULL_TREE;
1682e4b17023SJohn Marino 
1683e4b17023SJohn Marino 	    n->n &= val;
1684e4b17023SJohn Marino 	  }
1685e4b17023SJohn Marino 	  break;
1686e4b17023SJohn Marino 	case LSHIFT_EXPR:
1687e4b17023SJohn Marino 	case RSHIFT_EXPR:
1688e4b17023SJohn Marino 	case LROTATE_EXPR:
1689e4b17023SJohn Marino 	case RROTATE_EXPR:
1690e4b17023SJohn Marino 	  if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
1691e4b17023SJohn Marino 	    return NULL_TREE;
1692e4b17023SJohn Marino 	  break;
1693e4b17023SJohn Marino 	CASE_CONVERT:
1694e4b17023SJohn Marino 	  {
1695e4b17023SJohn Marino 	    int type_size;
1696e4b17023SJohn Marino 
1697e4b17023SJohn Marino 	    type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1698e4b17023SJohn Marino 	    if (type_size % BITS_PER_UNIT != 0)
1699e4b17023SJohn Marino 	      return NULL_TREE;
1700e4b17023SJohn Marino 
1701e4b17023SJohn Marino 	    if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1702e4b17023SJohn Marino 	      {
1703e4b17023SJohn Marino 		/* If STMT casts to a smaller type mask out the bits not
1704e4b17023SJohn Marino 		   belonging to the target type.  */
1705e4b17023SJohn Marino 		n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1706e4b17023SJohn Marino 	      }
1707e4b17023SJohn Marino 	    n->size = type_size / BITS_PER_UNIT;
1708e4b17023SJohn Marino 	  }
1709e4b17023SJohn Marino 	  break;
1710e4b17023SJohn Marino 	default:
1711e4b17023SJohn Marino 	  return NULL_TREE;
1712e4b17023SJohn Marino 	};
1713e4b17023SJohn Marino       return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1714e4b17023SJohn Marino     }
1715e4b17023SJohn Marino 
1716e4b17023SJohn Marino   /* Handle binary rhs.  */
1717e4b17023SJohn Marino 
1718e4b17023SJohn Marino   if (rhs_class == GIMPLE_BINARY_RHS)
1719e4b17023SJohn Marino     {
1720e4b17023SJohn Marino       struct symbolic_number n1, n2;
1721e4b17023SJohn Marino       tree source_expr2;
1722e4b17023SJohn Marino 
1723e4b17023SJohn Marino       if (code != BIT_IOR_EXPR)
1724e4b17023SJohn Marino 	return NULL_TREE;
1725e4b17023SJohn Marino 
1726e4b17023SJohn Marino       if (TREE_CODE (rhs2) != SSA_NAME)
1727e4b17023SJohn Marino 	return NULL_TREE;
1728e4b17023SJohn Marino 
1729e4b17023SJohn Marino       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1730e4b17023SJohn Marino 
1731e4b17023SJohn Marino       switch (code)
1732e4b17023SJohn Marino 	{
1733e4b17023SJohn Marino 	case BIT_IOR_EXPR:
1734e4b17023SJohn Marino 	  source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1735e4b17023SJohn Marino 
1736e4b17023SJohn Marino 	  if (!source_expr1)
1737e4b17023SJohn Marino 	    return NULL_TREE;
1738e4b17023SJohn Marino 
1739e4b17023SJohn Marino 	  source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1740e4b17023SJohn Marino 
1741e4b17023SJohn Marino 	  if (source_expr1 != source_expr2
1742e4b17023SJohn Marino 	      || n1.size != n2.size)
1743e4b17023SJohn Marino 	    return NULL_TREE;
1744e4b17023SJohn Marino 
1745e4b17023SJohn Marino 	  n->size = n1.size;
1746e4b17023SJohn Marino 	  n->n = n1.n | n2.n;
1747e4b17023SJohn Marino 
1748e4b17023SJohn Marino 	  if (!verify_symbolic_number_p (n, stmt))
1749e4b17023SJohn Marino 	    return NULL_TREE;
1750e4b17023SJohn Marino 
1751e4b17023SJohn Marino 	  break;
1752e4b17023SJohn Marino 	default:
1753e4b17023SJohn Marino 	  return NULL_TREE;
1754e4b17023SJohn Marino 	}
1755e4b17023SJohn Marino       return source_expr1;
1756e4b17023SJohn Marino     }
1757e4b17023SJohn Marino   return NULL_TREE;
1758e4b17023SJohn Marino }
1759e4b17023SJohn Marino 
1760e4b17023SJohn Marino /* Check if STMT completes a bswap implementation consisting of ORs,
1761e4b17023SJohn Marino    SHIFTs and ANDs.  Return the source tree expression on which the
1762e4b17023SJohn Marino    byte swap is performed and NULL if no bswap was found.  */
1763e4b17023SJohn Marino 
1764e4b17023SJohn Marino static tree
find_bswap(gimple stmt)1765e4b17023SJohn Marino find_bswap (gimple stmt)
1766e4b17023SJohn Marino {
1767e4b17023SJohn Marino /* The number which the find_bswap result should match in order to
1768e4b17023SJohn Marino    have a full byte swap.  The number is shifted to the left according
1769e4b17023SJohn Marino    to the size of the symbolic number before using it.  */
1770e4b17023SJohn Marino   unsigned HOST_WIDEST_INT cmp =
1771e4b17023SJohn Marino     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1772e4b17023SJohn Marino     (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1773e4b17023SJohn Marino 
1774e4b17023SJohn Marino   struct symbolic_number n;
1775e4b17023SJohn Marino   tree source_expr;
1776e4b17023SJohn Marino   int limit;
1777e4b17023SJohn Marino 
1778e4b17023SJohn Marino   /* The last parameter determines the depth search limit.  It usually
1779e4b17023SJohn Marino      correlates directly to the number of bytes to be touched.  We
1780e4b17023SJohn Marino      increase that number by three  here in order to also
1781e4b17023SJohn Marino      cover signed -> unsigned converions of the src operand as can be seen
1782e4b17023SJohn Marino      in libgcc, and for initial shift/and operation of the src operand.  */
1783e4b17023SJohn Marino   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
1784e4b17023SJohn Marino   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
1785e4b17023SJohn Marino   source_expr =  find_bswap_1 (stmt, &n, limit);
1786e4b17023SJohn Marino 
1787e4b17023SJohn Marino   if (!source_expr)
1788e4b17023SJohn Marino     return NULL_TREE;
1789e4b17023SJohn Marino 
1790e4b17023SJohn Marino   /* Zero out the extra bits of N and CMP.  */
1791e4b17023SJohn Marino   if (n.size < (int)sizeof (HOST_WIDEST_INT))
1792e4b17023SJohn Marino     {
1793e4b17023SJohn Marino       unsigned HOST_WIDEST_INT mask =
1794e4b17023SJohn Marino 	((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1795e4b17023SJohn Marino 
1796e4b17023SJohn Marino       n.n &= mask;
1797e4b17023SJohn Marino       cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1798e4b17023SJohn Marino     }
1799e4b17023SJohn Marino 
1800e4b17023SJohn Marino   /* A complete byte swap should make the symbolic number to start
1801e4b17023SJohn Marino      with the largest digit in the highest order byte.  */
1802e4b17023SJohn Marino   if (cmp != n.n)
1803e4b17023SJohn Marino     return NULL_TREE;
1804e4b17023SJohn Marino 
1805e4b17023SJohn Marino   return source_expr;
1806e4b17023SJohn Marino }
1807e4b17023SJohn Marino 
1808e4b17023SJohn Marino /* Find manual byte swap implementations and turn them into a bswap
1809e4b17023SJohn Marino    builtin invokation.  */
1810e4b17023SJohn Marino 
1811e4b17023SJohn Marino static unsigned int
execute_optimize_bswap(void)1812e4b17023SJohn Marino execute_optimize_bswap (void)
1813e4b17023SJohn Marino {
1814e4b17023SJohn Marino   basic_block bb;
1815e4b17023SJohn Marino   bool bswap32_p, bswap64_p;
1816e4b17023SJohn Marino   bool changed = false;
1817e4b17023SJohn Marino   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1818e4b17023SJohn Marino 
1819e4b17023SJohn Marino   if (BITS_PER_UNIT != 8)
1820e4b17023SJohn Marino     return 0;
1821e4b17023SJohn Marino 
1822e4b17023SJohn Marino   if (sizeof (HOST_WIDEST_INT) < 8)
1823e4b17023SJohn Marino     return 0;
1824e4b17023SJohn Marino 
1825e4b17023SJohn Marino   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1826e4b17023SJohn Marino 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1827e4b17023SJohn Marino   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1828e4b17023SJohn Marino 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1829e4b17023SJohn Marino 		   || (bswap32_p && word_mode == SImode)));
1830e4b17023SJohn Marino 
1831e4b17023SJohn Marino   if (!bswap32_p && !bswap64_p)
1832e4b17023SJohn Marino     return 0;
1833e4b17023SJohn Marino 
1834e4b17023SJohn Marino   /* Determine the argument type of the builtins.  The code later on
1835e4b17023SJohn Marino      assumes that the return and argument type are the same.  */
1836e4b17023SJohn Marino   if (bswap32_p)
1837e4b17023SJohn Marino     {
1838e4b17023SJohn Marino       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1839e4b17023SJohn Marino       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1840e4b17023SJohn Marino     }
1841e4b17023SJohn Marino 
1842e4b17023SJohn Marino   if (bswap64_p)
1843e4b17023SJohn Marino     {
1844e4b17023SJohn Marino       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1845e4b17023SJohn Marino       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1846e4b17023SJohn Marino     }
1847e4b17023SJohn Marino 
1848e4b17023SJohn Marino   memset (&bswap_stats, 0, sizeof (bswap_stats));
1849e4b17023SJohn Marino 
1850e4b17023SJohn Marino   FOR_EACH_BB (bb)
1851e4b17023SJohn Marino     {
1852e4b17023SJohn Marino       gimple_stmt_iterator gsi;
1853e4b17023SJohn Marino 
1854e4b17023SJohn Marino       /* We do a reverse scan for bswap patterns to make sure we get the
1855e4b17023SJohn Marino 	 widest match. As bswap pattern matching doesn't handle
1856e4b17023SJohn Marino 	 previously inserted smaller bswap replacements as sub-
1857e4b17023SJohn Marino 	 patterns, the wider variant wouldn't be detected.  */
1858e4b17023SJohn Marino       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1859e4b17023SJohn Marino         {
1860e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (gsi);
1861e4b17023SJohn Marino 	  tree bswap_src, bswap_type;
1862e4b17023SJohn Marino 	  tree bswap_tmp;
1863e4b17023SJohn Marino 	  tree fndecl = NULL_TREE;
1864e4b17023SJohn Marino 	  int type_size;
1865e4b17023SJohn Marino 	  gimple call;
1866e4b17023SJohn Marino 
1867e4b17023SJohn Marino 	  if (!is_gimple_assign (stmt)
1868e4b17023SJohn Marino 	      || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1869e4b17023SJohn Marino 	    continue;
1870e4b17023SJohn Marino 
1871e4b17023SJohn Marino 	  type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1872e4b17023SJohn Marino 
1873e4b17023SJohn Marino 	  switch (type_size)
1874e4b17023SJohn Marino 	    {
1875e4b17023SJohn Marino 	    case 32:
1876e4b17023SJohn Marino 	      if (bswap32_p)
1877e4b17023SJohn Marino 		{
1878e4b17023SJohn Marino 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1879e4b17023SJohn Marino 		  bswap_type = bswap32_type;
1880e4b17023SJohn Marino 		}
1881e4b17023SJohn Marino 	      break;
1882e4b17023SJohn Marino 	    case 64:
1883e4b17023SJohn Marino 	      if (bswap64_p)
1884e4b17023SJohn Marino 		{
1885e4b17023SJohn Marino 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1886e4b17023SJohn Marino 		  bswap_type = bswap64_type;
1887e4b17023SJohn Marino 		}
1888e4b17023SJohn Marino 	      break;
1889e4b17023SJohn Marino 	    default:
1890e4b17023SJohn Marino 	      continue;
1891e4b17023SJohn Marino 	    }
1892e4b17023SJohn Marino 
1893e4b17023SJohn Marino 	  if (!fndecl)
1894e4b17023SJohn Marino 	    continue;
1895e4b17023SJohn Marino 
1896e4b17023SJohn Marino 	  bswap_src = find_bswap (stmt);
1897e4b17023SJohn Marino 
1898e4b17023SJohn Marino 	  if (!bswap_src)
1899e4b17023SJohn Marino 	    continue;
1900e4b17023SJohn Marino 
1901e4b17023SJohn Marino 	  changed = true;
1902e4b17023SJohn Marino 	  if (type_size == 32)
1903e4b17023SJohn Marino 	    bswap_stats.found_32bit++;
1904e4b17023SJohn Marino 	  else
1905e4b17023SJohn Marino 	    bswap_stats.found_64bit++;
1906e4b17023SJohn Marino 
1907e4b17023SJohn Marino 	  bswap_tmp = bswap_src;
1908e4b17023SJohn Marino 
1909e4b17023SJohn Marino 	  /* Convert the src expression if necessary.  */
1910e4b17023SJohn Marino 	  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1911e4b17023SJohn Marino 	    {
1912e4b17023SJohn Marino 	      gimple convert_stmt;
1913e4b17023SJohn Marino 
1914e4b17023SJohn Marino 	      bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1915e4b17023SJohn Marino 	      add_referenced_var (bswap_tmp);
1916e4b17023SJohn Marino 	      bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1917e4b17023SJohn Marino 
1918e4b17023SJohn Marino 	      convert_stmt = gimple_build_assign_with_ops (
1919e4b17023SJohn Marino 			       CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1920e4b17023SJohn Marino 	      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1921e4b17023SJohn Marino 	    }
1922e4b17023SJohn Marino 
1923e4b17023SJohn Marino 	  call = gimple_build_call (fndecl, 1, bswap_tmp);
1924e4b17023SJohn Marino 
1925e4b17023SJohn Marino 	  bswap_tmp = gimple_assign_lhs (stmt);
1926e4b17023SJohn Marino 
1927e4b17023SJohn Marino 	  /* Convert the result if necessary.  */
1928e4b17023SJohn Marino 	  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1929e4b17023SJohn Marino 	    {
1930e4b17023SJohn Marino 	      gimple convert_stmt;
1931e4b17023SJohn Marino 
1932e4b17023SJohn Marino 	      bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1933e4b17023SJohn Marino 	      add_referenced_var (bswap_tmp);
1934e4b17023SJohn Marino 	      bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1935e4b17023SJohn Marino 	      convert_stmt = gimple_build_assign_with_ops (
1936e4b17023SJohn Marino 		               CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1937e4b17023SJohn Marino 	      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1938e4b17023SJohn Marino 	    }
1939e4b17023SJohn Marino 
1940e4b17023SJohn Marino 	  gimple_call_set_lhs (call, bswap_tmp);
1941e4b17023SJohn Marino 
1942e4b17023SJohn Marino 	  if (dump_file)
1943e4b17023SJohn Marino 	    {
1944e4b17023SJohn Marino 	      fprintf (dump_file, "%d bit bswap implementation found at: ",
1945e4b17023SJohn Marino 		       (int)type_size);
1946e4b17023SJohn Marino 	      print_gimple_stmt (dump_file, stmt, 0, 0);
1947e4b17023SJohn Marino 	    }
1948e4b17023SJohn Marino 
1949e4b17023SJohn Marino 	  gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1950e4b17023SJohn Marino 	  gsi_remove (&gsi, true);
1951e4b17023SJohn Marino 	}
1952e4b17023SJohn Marino     }
1953e4b17023SJohn Marino 
1954e4b17023SJohn Marino   statistics_counter_event (cfun, "32-bit bswap implementations found",
1955e4b17023SJohn Marino 			    bswap_stats.found_32bit);
1956e4b17023SJohn Marino   statistics_counter_event (cfun, "64-bit bswap implementations found",
1957e4b17023SJohn Marino 			    bswap_stats.found_64bit);
1958e4b17023SJohn Marino 
1959e4b17023SJohn Marino   return (changed ? TODO_update_ssa | TODO_verify_ssa
1960e4b17023SJohn Marino 	  | TODO_verify_stmts : 0);
1961e4b17023SJohn Marino }
1962e4b17023SJohn Marino 
1963e4b17023SJohn Marino static bool
gate_optimize_bswap(void)1964e4b17023SJohn Marino gate_optimize_bswap (void)
1965e4b17023SJohn Marino {
1966e4b17023SJohn Marino   return flag_expensive_optimizations && optimize;
1967e4b17023SJohn Marino }
1968e4b17023SJohn Marino 
1969e4b17023SJohn Marino struct gimple_opt_pass pass_optimize_bswap =
1970e4b17023SJohn Marino {
1971e4b17023SJohn Marino  {
1972e4b17023SJohn Marino   GIMPLE_PASS,
1973e4b17023SJohn Marino   "bswap",				/* name */
1974e4b17023SJohn Marino   gate_optimize_bswap,                  /* gate */
1975e4b17023SJohn Marino   execute_optimize_bswap,		/* execute */
1976e4b17023SJohn Marino   NULL,					/* sub */
1977e4b17023SJohn Marino   NULL,					/* next */
1978e4b17023SJohn Marino   0,					/* static_pass_number */
1979e4b17023SJohn Marino   TV_NONE,				/* tv_id */
1980e4b17023SJohn Marino   PROP_ssa,				/* properties_required */
1981e4b17023SJohn Marino   0,					/* properties_provided */
1982e4b17023SJohn Marino   0,					/* properties_destroyed */
1983e4b17023SJohn Marino   0,					/* todo_flags_start */
1984e4b17023SJohn Marino   0                                     /* todo_flags_finish */
1985e4b17023SJohn Marino  }
1986e4b17023SJohn Marino };
1987e4b17023SJohn Marino 
1988e4b17023SJohn Marino /* Return true if RHS is a suitable operand for a widening multiplication,
1989e4b17023SJohn Marino    assuming a target type of TYPE.
1990e4b17023SJohn Marino    There are two cases:
1991e4b17023SJohn Marino 
1992e4b17023SJohn Marino      - RHS makes some value at least twice as wide.  Store that value
1993e4b17023SJohn Marino        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
1994e4b17023SJohn Marino 
1995e4b17023SJohn Marino      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
1996e4b17023SJohn Marino        but leave *TYPE_OUT untouched.  */
1997e4b17023SJohn Marino 
1998e4b17023SJohn Marino static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)1999e4b17023SJohn Marino is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2000e4b17023SJohn Marino 			tree *new_rhs_out)
2001e4b17023SJohn Marino {
2002e4b17023SJohn Marino   gimple stmt;
2003e4b17023SJohn Marino   tree type1, rhs1;
2004e4b17023SJohn Marino   enum tree_code rhs_code;
2005e4b17023SJohn Marino 
2006e4b17023SJohn Marino   if (TREE_CODE (rhs) == SSA_NAME)
2007e4b17023SJohn Marino     {
2008e4b17023SJohn Marino       stmt = SSA_NAME_DEF_STMT (rhs);
2009e4b17023SJohn Marino       if (is_gimple_assign (stmt))
2010e4b17023SJohn Marino 	{
2011e4b17023SJohn Marino 	  rhs_code = gimple_assign_rhs_code (stmt);
2012e4b17023SJohn Marino 	  if (TREE_CODE (type) == INTEGER_TYPE
2013e4b17023SJohn Marino 	      ? !CONVERT_EXPR_CODE_P (rhs_code)
2014e4b17023SJohn Marino 	      : rhs_code != FIXED_CONVERT_EXPR)
2015e4b17023SJohn Marino 	    rhs1 = rhs;
2016e4b17023SJohn Marino 	  else
2017e4b17023SJohn Marino 	    {
2018e4b17023SJohn Marino 	      rhs1 = gimple_assign_rhs1 (stmt);
2019e4b17023SJohn Marino 
2020e4b17023SJohn Marino 	      if (TREE_CODE (rhs1) == INTEGER_CST)
2021e4b17023SJohn Marino 		{
2022e4b17023SJohn Marino 		  *new_rhs_out = rhs1;
2023e4b17023SJohn Marino 		  *type_out = NULL;
2024e4b17023SJohn Marino 		  return true;
2025e4b17023SJohn Marino 		}
2026e4b17023SJohn Marino 	    }
2027e4b17023SJohn Marino 	}
2028e4b17023SJohn Marino       else
2029e4b17023SJohn Marino 	rhs1 = rhs;
2030e4b17023SJohn Marino 
2031e4b17023SJohn Marino       type1 = TREE_TYPE (rhs1);
2032e4b17023SJohn Marino 
2033e4b17023SJohn Marino       if (TREE_CODE (type1) != TREE_CODE (type)
2034e4b17023SJohn Marino 	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2035e4b17023SJohn Marino 	return false;
2036e4b17023SJohn Marino 
2037e4b17023SJohn Marino       *new_rhs_out = rhs1;
2038e4b17023SJohn Marino       *type_out = type1;
2039e4b17023SJohn Marino       return true;
2040e4b17023SJohn Marino     }
2041e4b17023SJohn Marino 
2042e4b17023SJohn Marino   if (TREE_CODE (rhs) == INTEGER_CST)
2043e4b17023SJohn Marino     {
2044e4b17023SJohn Marino       *new_rhs_out = rhs;
2045e4b17023SJohn Marino       *type_out = NULL;
2046e4b17023SJohn Marino       return true;
2047e4b17023SJohn Marino     }
2048e4b17023SJohn Marino 
2049e4b17023SJohn Marino   return false;
2050e4b17023SJohn Marino }
2051e4b17023SJohn Marino 
2052e4b17023SJohn Marino /* Return true if STMT performs a widening multiplication, assuming the
2053e4b17023SJohn Marino    output type is TYPE.  If so, store the unwidened types of the operands
2054e4b17023SJohn Marino    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2055e4b17023SJohn Marino    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2056e4b17023SJohn Marino    and *TYPE2_OUT would give the operands of the multiplication.  */
2057e4b17023SJohn Marino 
2058e4b17023SJohn Marino static bool
is_widening_mult_p(gimple stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2059e4b17023SJohn Marino is_widening_mult_p (gimple stmt,
2060e4b17023SJohn Marino 		    tree *type1_out, tree *rhs1_out,
2061e4b17023SJohn Marino 		    tree *type2_out, tree *rhs2_out)
2062e4b17023SJohn Marino {
2063e4b17023SJohn Marino   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2064e4b17023SJohn Marino 
2065e4b17023SJohn Marino   if (TREE_CODE (type) != INTEGER_TYPE
2066e4b17023SJohn Marino       && TREE_CODE (type) != FIXED_POINT_TYPE)
2067e4b17023SJohn Marino     return false;
2068e4b17023SJohn Marino 
2069e4b17023SJohn Marino   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2070e4b17023SJohn Marino 			       rhs1_out))
2071e4b17023SJohn Marino     return false;
2072e4b17023SJohn Marino 
2073e4b17023SJohn Marino   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2074e4b17023SJohn Marino 			       rhs2_out))
2075e4b17023SJohn Marino     return false;
2076e4b17023SJohn Marino 
2077e4b17023SJohn Marino   if (*type1_out == NULL)
2078e4b17023SJohn Marino     {
2079e4b17023SJohn Marino       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2080e4b17023SJohn Marino 	return false;
2081e4b17023SJohn Marino       *type1_out = *type2_out;
2082e4b17023SJohn Marino     }
2083e4b17023SJohn Marino 
2084e4b17023SJohn Marino   if (*type2_out == NULL)
2085e4b17023SJohn Marino     {
2086e4b17023SJohn Marino       if (!int_fits_type_p (*rhs2_out, *type1_out))
2087e4b17023SJohn Marino 	return false;
2088e4b17023SJohn Marino       *type2_out = *type1_out;
2089e4b17023SJohn Marino     }
2090e4b17023SJohn Marino 
2091e4b17023SJohn Marino   /* Ensure that the larger of the two operands comes first. */
2092e4b17023SJohn Marino   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2093e4b17023SJohn Marino     {
2094e4b17023SJohn Marino       tree tmp;
2095e4b17023SJohn Marino       tmp = *type1_out;
2096e4b17023SJohn Marino       *type1_out = *type2_out;
2097e4b17023SJohn Marino       *type2_out = tmp;
2098e4b17023SJohn Marino       tmp = *rhs1_out;
2099e4b17023SJohn Marino       *rhs1_out = *rhs2_out;
2100e4b17023SJohn Marino       *rhs2_out = tmp;
2101e4b17023SJohn Marino     }
2102e4b17023SJohn Marino 
2103e4b17023SJohn Marino   return true;
2104e4b17023SJohn Marino }
2105e4b17023SJohn Marino 
2106e4b17023SJohn Marino /* Process a single gimple statement STMT, which has a MULT_EXPR as
2107e4b17023SJohn Marino    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2108e4b17023SJohn Marino    value is true iff we converted the statement.  */
2109e4b17023SJohn Marino 
2110e4b17023SJohn Marino static bool
convert_mult_to_widen(gimple stmt,gimple_stmt_iterator * gsi)2111e4b17023SJohn Marino convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
2112e4b17023SJohn Marino {
2113e4b17023SJohn Marino   tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
2114e4b17023SJohn Marino   enum insn_code handler;
2115e4b17023SJohn Marino   enum machine_mode to_mode, from_mode, actual_mode;
2116e4b17023SJohn Marino   optab op;
2117e4b17023SJohn Marino   int actual_precision;
2118e4b17023SJohn Marino   location_t loc = gimple_location (stmt);
2119e4b17023SJohn Marino   bool from_unsigned1, from_unsigned2;
2120e4b17023SJohn Marino 
2121e4b17023SJohn Marino   lhs = gimple_assign_lhs (stmt);
2122e4b17023SJohn Marino   type = TREE_TYPE (lhs);
2123e4b17023SJohn Marino   if (TREE_CODE (type) != INTEGER_TYPE)
2124e4b17023SJohn Marino     return false;
2125e4b17023SJohn Marino 
2126e4b17023SJohn Marino   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2127e4b17023SJohn Marino     return false;
2128e4b17023SJohn Marino 
2129e4b17023SJohn Marino   to_mode = TYPE_MODE (type);
2130e4b17023SJohn Marino   from_mode = TYPE_MODE (type1);
2131e4b17023SJohn Marino   from_unsigned1 = TYPE_UNSIGNED (type1);
2132e4b17023SJohn Marino   from_unsigned2 = TYPE_UNSIGNED (type2);
2133e4b17023SJohn Marino 
2134e4b17023SJohn Marino   if (from_unsigned1 && from_unsigned2)
2135e4b17023SJohn Marino     op = umul_widen_optab;
2136e4b17023SJohn Marino   else if (!from_unsigned1 && !from_unsigned2)
2137e4b17023SJohn Marino     op = smul_widen_optab;
2138e4b17023SJohn Marino   else
2139e4b17023SJohn Marino     op = usmul_widen_optab;
2140e4b17023SJohn Marino 
2141e4b17023SJohn Marino   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2142e4b17023SJohn Marino 						  0, &actual_mode);
2143e4b17023SJohn Marino 
2144e4b17023SJohn Marino   if (handler == CODE_FOR_nothing)
2145e4b17023SJohn Marino     {
2146e4b17023SJohn Marino       if (op != smul_widen_optab)
2147e4b17023SJohn Marino 	{
2148e4b17023SJohn Marino 	  /* We can use a signed multiply with unsigned types as long as
2149e4b17023SJohn Marino 	     there is a wider mode to use, or it is the smaller of the two
2150e4b17023SJohn Marino 	     types that is unsigned.  Note that type1 >= type2, always.  */
2151e4b17023SJohn Marino 	  if ((TYPE_UNSIGNED (type1)
2152e4b17023SJohn Marino 	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2153e4b17023SJohn Marino 	      || (TYPE_UNSIGNED (type2)
2154e4b17023SJohn Marino 		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2155e4b17023SJohn Marino 	    {
2156e4b17023SJohn Marino 	      from_mode = GET_MODE_WIDER_MODE (from_mode);
2157e4b17023SJohn Marino 	      if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2158e4b17023SJohn Marino 		return false;
2159e4b17023SJohn Marino 	    }
2160e4b17023SJohn Marino 
2161e4b17023SJohn Marino 	  op = smul_widen_optab;
2162e4b17023SJohn Marino 	  handler = find_widening_optab_handler_and_mode (op, to_mode,
2163e4b17023SJohn Marino 							  from_mode, 0,
2164e4b17023SJohn Marino 							  &actual_mode);
2165e4b17023SJohn Marino 
2166e4b17023SJohn Marino 	  if (handler == CODE_FOR_nothing)
2167e4b17023SJohn Marino 	    return false;
2168e4b17023SJohn Marino 
2169e4b17023SJohn Marino 	  from_unsigned1 = from_unsigned2 = false;
2170e4b17023SJohn Marino 	}
2171e4b17023SJohn Marino       else
2172e4b17023SJohn Marino 	return false;
2173e4b17023SJohn Marino     }
2174e4b17023SJohn Marino 
2175e4b17023SJohn Marino   /* Ensure that the inputs to the handler are in the correct precison
2176e4b17023SJohn Marino      for the opcode.  This will be the full mode size.  */
2177e4b17023SJohn Marino   actual_precision = GET_MODE_PRECISION (actual_mode);
2178e4b17023SJohn Marino   if (actual_precision != TYPE_PRECISION (type1)
2179e4b17023SJohn Marino       || from_unsigned1 != TYPE_UNSIGNED (type1))
2180e4b17023SJohn Marino     {
2181e4b17023SJohn Marino       tmp = create_tmp_var (build_nonstandard_integer_type
2182e4b17023SJohn Marino 				(actual_precision, from_unsigned1),
2183e4b17023SJohn Marino 			    NULL);
2184e4b17023SJohn Marino       rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
2185e4b17023SJohn Marino     }
2186e4b17023SJohn Marino   if (actual_precision != TYPE_PRECISION (type2)
2187e4b17023SJohn Marino       || from_unsigned2 != TYPE_UNSIGNED (type2))
2188e4b17023SJohn Marino     {
2189e4b17023SJohn Marino       /* Reuse the same type info, if possible.  */
2190e4b17023SJohn Marino       if (!tmp || from_unsigned1 != from_unsigned2)
2191e4b17023SJohn Marino 	tmp = create_tmp_var (build_nonstandard_integer_type
2192e4b17023SJohn Marino 				(actual_precision, from_unsigned2),
2193e4b17023SJohn Marino 			      NULL);
2194e4b17023SJohn Marino       rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
2195e4b17023SJohn Marino     }
2196e4b17023SJohn Marino 
2197e4b17023SJohn Marino   /* Handle constants.  */
2198e4b17023SJohn Marino   if (TREE_CODE (rhs1) == INTEGER_CST)
2199e4b17023SJohn Marino     rhs1 = fold_convert (type1, rhs1);
2200e4b17023SJohn Marino   if (TREE_CODE (rhs2) == INTEGER_CST)
2201e4b17023SJohn Marino     rhs2 = fold_convert (type2, rhs2);
2202e4b17023SJohn Marino 
2203e4b17023SJohn Marino   gimple_assign_set_rhs1 (stmt, rhs1);
2204e4b17023SJohn Marino   gimple_assign_set_rhs2 (stmt, rhs2);
2205e4b17023SJohn Marino   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2206e4b17023SJohn Marino   update_stmt (stmt);
2207e4b17023SJohn Marino   widen_mul_stats.widen_mults_inserted++;
2208e4b17023SJohn Marino   return true;
2209e4b17023SJohn Marino }
2210e4b17023SJohn Marino 
2211e4b17023SJohn Marino /* Process a single gimple statement STMT, which is found at the
2212e4b17023SJohn Marino    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2213e4b17023SJohn Marino    rhs (given by CODE), and try to convert it into a
2214e4b17023SJohn Marino    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2215e4b17023SJohn Marino    is true iff we converted the statement.  */
2216e4b17023SJohn Marino 
2217e4b17023SJohn Marino static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple stmt,enum tree_code code)2218e4b17023SJohn Marino convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
2219e4b17023SJohn Marino 			    enum tree_code code)
2220e4b17023SJohn Marino {
2221e4b17023SJohn Marino   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2222e4b17023SJohn Marino   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
2223e4b17023SJohn Marino   tree type, type1, type2, optype, tmp = NULL;
2224e4b17023SJohn Marino   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2225e4b17023SJohn Marino   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2226e4b17023SJohn Marino   optab this_optab;
2227e4b17023SJohn Marino   enum tree_code wmult_code;
2228e4b17023SJohn Marino   enum insn_code handler;
2229e4b17023SJohn Marino   enum machine_mode to_mode, from_mode, actual_mode;
2230e4b17023SJohn Marino   location_t loc = gimple_location (stmt);
2231e4b17023SJohn Marino   int actual_precision;
2232e4b17023SJohn Marino   bool from_unsigned1, from_unsigned2;
2233e4b17023SJohn Marino 
2234e4b17023SJohn Marino   lhs = gimple_assign_lhs (stmt);
2235e4b17023SJohn Marino   type = TREE_TYPE (lhs);
2236e4b17023SJohn Marino   if (TREE_CODE (type) != INTEGER_TYPE
2237e4b17023SJohn Marino       && TREE_CODE (type) != FIXED_POINT_TYPE)
2238e4b17023SJohn Marino     return false;
2239e4b17023SJohn Marino 
2240e4b17023SJohn Marino   if (code == MINUS_EXPR)
2241e4b17023SJohn Marino     wmult_code = WIDEN_MULT_MINUS_EXPR;
2242e4b17023SJohn Marino   else
2243e4b17023SJohn Marino     wmult_code = WIDEN_MULT_PLUS_EXPR;
2244e4b17023SJohn Marino 
2245e4b17023SJohn Marino   rhs1 = gimple_assign_rhs1 (stmt);
2246e4b17023SJohn Marino   rhs2 = gimple_assign_rhs2 (stmt);
2247e4b17023SJohn Marino 
2248e4b17023SJohn Marino   if (TREE_CODE (rhs1) == SSA_NAME)
2249e4b17023SJohn Marino     {
2250e4b17023SJohn Marino       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2251e4b17023SJohn Marino       if (is_gimple_assign (rhs1_stmt))
2252e4b17023SJohn Marino 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2253e4b17023SJohn Marino     }
2254e4b17023SJohn Marino 
2255e4b17023SJohn Marino   if (TREE_CODE (rhs2) == SSA_NAME)
2256e4b17023SJohn Marino     {
2257e4b17023SJohn Marino       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2258e4b17023SJohn Marino       if (is_gimple_assign (rhs2_stmt))
2259e4b17023SJohn Marino 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2260e4b17023SJohn Marino     }
2261e4b17023SJohn Marino 
2262e4b17023SJohn Marino   /* Allow for one conversion statement between the multiply
2263e4b17023SJohn Marino      and addition/subtraction statement.  If there are more than
2264e4b17023SJohn Marino      one conversions then we assume they would invalidate this
2265e4b17023SJohn Marino      transformation.  If that's not the case then they should have
2266e4b17023SJohn Marino      been folded before now.  */
2267e4b17023SJohn Marino   if (CONVERT_EXPR_CODE_P (rhs1_code))
2268e4b17023SJohn Marino     {
2269e4b17023SJohn Marino       conv1_stmt = rhs1_stmt;
2270e4b17023SJohn Marino       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2271e4b17023SJohn Marino       if (TREE_CODE (rhs1) == SSA_NAME)
2272e4b17023SJohn Marino 	{
2273e4b17023SJohn Marino 	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2274e4b17023SJohn Marino 	  if (is_gimple_assign (rhs1_stmt))
2275e4b17023SJohn Marino 	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2276e4b17023SJohn Marino 	}
2277e4b17023SJohn Marino       else
2278e4b17023SJohn Marino 	return false;
2279e4b17023SJohn Marino     }
2280e4b17023SJohn Marino   if (CONVERT_EXPR_CODE_P (rhs2_code))
2281e4b17023SJohn Marino     {
2282e4b17023SJohn Marino       conv2_stmt = rhs2_stmt;
2283e4b17023SJohn Marino       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2284e4b17023SJohn Marino       if (TREE_CODE (rhs2) == SSA_NAME)
2285e4b17023SJohn Marino 	{
2286e4b17023SJohn Marino 	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2287e4b17023SJohn Marino 	  if (is_gimple_assign (rhs2_stmt))
2288e4b17023SJohn Marino 	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2289e4b17023SJohn Marino 	}
2290e4b17023SJohn Marino       else
2291e4b17023SJohn Marino 	return false;
2292e4b17023SJohn Marino     }
2293e4b17023SJohn Marino 
2294e4b17023SJohn Marino   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2295e4b17023SJohn Marino      is_widening_mult_p, but we still need the rhs returns.
2296e4b17023SJohn Marino 
2297e4b17023SJohn Marino      It might also appear that it would be sufficient to use the existing
2298e4b17023SJohn Marino      operands of the widening multiply, but that would limit the choice of
2299e4b17023SJohn Marino      multiply-and-accumulate instructions.  */
2300e4b17023SJohn Marino   if (code == PLUS_EXPR
2301e4b17023SJohn Marino       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2302e4b17023SJohn Marino     {
2303e4b17023SJohn Marino       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2304e4b17023SJohn Marino 			       &type2, &mult_rhs2))
2305e4b17023SJohn Marino 	return false;
2306e4b17023SJohn Marino       add_rhs = rhs2;
2307e4b17023SJohn Marino       conv_stmt = conv1_stmt;
2308e4b17023SJohn Marino     }
2309e4b17023SJohn Marino   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2310e4b17023SJohn Marino     {
2311e4b17023SJohn Marino       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2312e4b17023SJohn Marino 			       &type2, &mult_rhs2))
2313e4b17023SJohn Marino 	return false;
2314e4b17023SJohn Marino       add_rhs = rhs1;
2315e4b17023SJohn Marino       conv_stmt = conv2_stmt;
2316e4b17023SJohn Marino     }
2317e4b17023SJohn Marino   else
2318e4b17023SJohn Marino     return false;
2319e4b17023SJohn Marino 
2320e4b17023SJohn Marino   to_mode = TYPE_MODE (type);
2321e4b17023SJohn Marino   from_mode = TYPE_MODE (type1);
2322e4b17023SJohn Marino   from_unsigned1 = TYPE_UNSIGNED (type1);
2323e4b17023SJohn Marino   from_unsigned2 = TYPE_UNSIGNED (type2);
2324e4b17023SJohn Marino   optype = type1;
2325e4b17023SJohn Marino 
2326e4b17023SJohn Marino   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2327e4b17023SJohn Marino   if (from_unsigned1 != from_unsigned2)
2328e4b17023SJohn Marino     {
2329e4b17023SJohn Marino       if (!INTEGRAL_TYPE_P (type))
2330e4b17023SJohn Marino 	return false;
2331e4b17023SJohn Marino       /* We can use a signed multiply with unsigned types as long as
2332e4b17023SJohn Marino 	 there is a wider mode to use, or it is the smaller of the two
2333e4b17023SJohn Marino 	 types that is unsigned.  Note that type1 >= type2, always.  */
2334e4b17023SJohn Marino       if ((from_unsigned1
2335e4b17023SJohn Marino 	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2336e4b17023SJohn Marino 	  || (from_unsigned2
2337e4b17023SJohn Marino 	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2338e4b17023SJohn Marino 	{
2339e4b17023SJohn Marino 	  from_mode = GET_MODE_WIDER_MODE (from_mode);
2340e4b17023SJohn Marino 	  if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2341e4b17023SJohn Marino 	    return false;
2342e4b17023SJohn Marino 	}
2343e4b17023SJohn Marino 
2344e4b17023SJohn Marino       from_unsigned1 = from_unsigned2 = false;
2345e4b17023SJohn Marino       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2346e4b17023SJohn Marino 					       false);
2347e4b17023SJohn Marino     }
2348e4b17023SJohn Marino 
2349e4b17023SJohn Marino   /* If there was a conversion between the multiply and addition
2350e4b17023SJohn Marino      then we need to make sure it fits a multiply-and-accumulate.
2351e4b17023SJohn Marino      The should be a single mode change which does not change the
2352e4b17023SJohn Marino      value.  */
2353e4b17023SJohn Marino   if (conv_stmt)
2354e4b17023SJohn Marino     {
2355e4b17023SJohn Marino       /* We use the original, unmodified data types for this.  */
2356e4b17023SJohn Marino       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2357e4b17023SJohn Marino       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2358e4b17023SJohn Marino       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2359e4b17023SJohn Marino       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2360e4b17023SJohn Marino 
2361e4b17023SJohn Marino       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2362e4b17023SJohn Marino 	{
2363e4b17023SJohn Marino 	  /* Conversion is a truncate.  */
2364e4b17023SJohn Marino 	  if (TYPE_PRECISION (to_type) < data_size)
2365e4b17023SJohn Marino 	    return false;
2366e4b17023SJohn Marino 	}
2367e4b17023SJohn Marino       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2368e4b17023SJohn Marino 	{
2369e4b17023SJohn Marino 	  /* Conversion is an extend.  Check it's the right sort.  */
2370e4b17023SJohn Marino 	  if (TYPE_UNSIGNED (from_type) != is_unsigned
2371e4b17023SJohn Marino 	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2372e4b17023SJohn Marino 	    return false;
2373e4b17023SJohn Marino 	}
2374e4b17023SJohn Marino       /* else convert is a no-op for our purposes.  */
2375e4b17023SJohn Marino     }
2376e4b17023SJohn Marino 
2377e4b17023SJohn Marino   /* Verify that the machine can perform a widening multiply
2378e4b17023SJohn Marino      accumulate in this mode/signedness combination, otherwise
2379e4b17023SJohn Marino      this transformation is likely to pessimize code.  */
2380e4b17023SJohn Marino   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2381e4b17023SJohn Marino   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2382e4b17023SJohn Marino 						  from_mode, 0, &actual_mode);
2383e4b17023SJohn Marino 
2384e4b17023SJohn Marino   if (handler == CODE_FOR_nothing)
2385e4b17023SJohn Marino     return false;
2386e4b17023SJohn Marino 
2387e4b17023SJohn Marino   /* Ensure that the inputs to the handler are in the correct precison
2388e4b17023SJohn Marino      for the opcode.  This will be the full mode size.  */
2389e4b17023SJohn Marino   actual_precision = GET_MODE_PRECISION (actual_mode);
2390e4b17023SJohn Marino   if (actual_precision != TYPE_PRECISION (type1)
2391e4b17023SJohn Marino       || from_unsigned1 != TYPE_UNSIGNED (type1))
2392e4b17023SJohn Marino     {
2393e4b17023SJohn Marino       tmp = create_tmp_var (build_nonstandard_integer_type
2394e4b17023SJohn Marino 				(actual_precision, from_unsigned1),
2395e4b17023SJohn Marino 			    NULL);
2396e4b17023SJohn Marino       mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
2397e4b17023SJohn Marino     }
2398e4b17023SJohn Marino   if (actual_precision != TYPE_PRECISION (type2)
2399e4b17023SJohn Marino       || from_unsigned2 != TYPE_UNSIGNED (type2))
2400e4b17023SJohn Marino     {
2401e4b17023SJohn Marino       if (!tmp || from_unsigned1 != from_unsigned2)
2402e4b17023SJohn Marino 	tmp = create_tmp_var (build_nonstandard_integer_type
2403e4b17023SJohn Marino 				(actual_precision, from_unsigned2),
2404e4b17023SJohn Marino 			      NULL);
2405e4b17023SJohn Marino       mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
2406e4b17023SJohn Marino     }
2407e4b17023SJohn Marino 
2408e4b17023SJohn Marino   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2409e4b17023SJohn Marino     add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL),
2410e4b17023SJohn Marino 				     add_rhs);
2411e4b17023SJohn Marino 
2412e4b17023SJohn Marino   /* Handle constants.  */
2413e4b17023SJohn Marino   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2414e4b17023SJohn Marino     mult_rhs1 = fold_convert (type1, mult_rhs1);
2415e4b17023SJohn Marino   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2416e4b17023SJohn Marino     mult_rhs2 = fold_convert (type2, mult_rhs2);
2417e4b17023SJohn Marino 
2418e4b17023SJohn Marino   gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
2419e4b17023SJohn Marino 				    add_rhs);
2420e4b17023SJohn Marino   update_stmt (gsi_stmt (*gsi));
2421e4b17023SJohn Marino   widen_mul_stats.maccs_inserted++;
2422e4b17023SJohn Marino   return true;
2423e4b17023SJohn Marino }
2424e4b17023SJohn Marino 
2425e4b17023SJohn Marino /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2426e4b17023SJohn Marino    with uses in additions and subtractions to form fused multiply-add
2427e4b17023SJohn Marino    operations.  Returns true if successful and MUL_STMT should be removed.  */
2428e4b17023SJohn Marino 
2429e4b17023SJohn Marino static bool
convert_mult_to_fma(gimple mul_stmt,tree op1,tree op2)2430e4b17023SJohn Marino convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
2431e4b17023SJohn Marino {
2432e4b17023SJohn Marino   tree mul_result = gimple_get_lhs (mul_stmt);
2433e4b17023SJohn Marino   tree type = TREE_TYPE (mul_result);
2434e4b17023SJohn Marino   gimple use_stmt, neguse_stmt, fma_stmt;
2435e4b17023SJohn Marino   use_operand_p use_p;
2436e4b17023SJohn Marino   imm_use_iterator imm_iter;
2437e4b17023SJohn Marino 
2438e4b17023SJohn Marino   if (FLOAT_TYPE_P (type)
2439e4b17023SJohn Marino       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2440e4b17023SJohn Marino     return false;
2441e4b17023SJohn Marino 
2442e4b17023SJohn Marino   /* We don't want to do bitfield reduction ops.  */
2443e4b17023SJohn Marino   if (INTEGRAL_TYPE_P (type)
2444e4b17023SJohn Marino       && (TYPE_PRECISION (type)
2445e4b17023SJohn Marino 	  != GET_MODE_PRECISION (TYPE_MODE (type))))
2446e4b17023SJohn Marino     return false;
2447e4b17023SJohn Marino 
2448e4b17023SJohn Marino   /* If the target doesn't support it, don't generate it.  We assume that
2449e4b17023SJohn Marino      if fma isn't available then fms, fnma or fnms are not either.  */
2450e4b17023SJohn Marino   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2451e4b17023SJohn Marino     return false;
2452e4b17023SJohn Marino 
2453e4b17023SJohn Marino   /* If the multiplication has zero uses, it is kept around probably because
2454e4b17023SJohn Marino      of -fnon-call-exceptions.  Don't optimize it away in that case,
2455e4b17023SJohn Marino      it is DCE job.  */
2456e4b17023SJohn Marino   if (has_zero_uses (mul_result))
2457e4b17023SJohn Marino     return false;
2458e4b17023SJohn Marino 
2459e4b17023SJohn Marino   /* Make sure that the multiplication statement becomes dead after
2460e4b17023SJohn Marino      the transformation, thus that all uses are transformed to FMAs.
2461e4b17023SJohn Marino      This means we assume that an FMA operation has the same cost
2462e4b17023SJohn Marino      as an addition.  */
2463e4b17023SJohn Marino   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2464e4b17023SJohn Marino     {
2465e4b17023SJohn Marino       enum tree_code use_code;
2466e4b17023SJohn Marino       tree result = mul_result;
2467e4b17023SJohn Marino       bool negate_p = false;
2468e4b17023SJohn Marino 
2469e4b17023SJohn Marino       use_stmt = USE_STMT (use_p);
2470e4b17023SJohn Marino 
2471e4b17023SJohn Marino       if (is_gimple_debug (use_stmt))
2472e4b17023SJohn Marino 	continue;
2473e4b17023SJohn Marino 
2474e4b17023SJohn Marino       /* For now restrict this operations to single basic blocks.  In theory
2475e4b17023SJohn Marino 	 we would want to support sinking the multiplication in
2476e4b17023SJohn Marino 	 m = a*b;
2477e4b17023SJohn Marino 	 if ()
2478e4b17023SJohn Marino 	   ma = m + c;
2479e4b17023SJohn Marino 	 else
2480e4b17023SJohn Marino 	   d = m;
2481e4b17023SJohn Marino 	 to form a fma in the then block and sink the multiplication to the
2482e4b17023SJohn Marino 	 else block.  */
2483e4b17023SJohn Marino       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2484e4b17023SJohn Marino 	return false;
2485e4b17023SJohn Marino 
2486e4b17023SJohn Marino       if (!is_gimple_assign (use_stmt))
2487e4b17023SJohn Marino 	return false;
2488e4b17023SJohn Marino 
2489e4b17023SJohn Marino       use_code = gimple_assign_rhs_code (use_stmt);
2490e4b17023SJohn Marino 
2491e4b17023SJohn Marino       /* A negate on the multiplication leads to FNMA.  */
2492e4b17023SJohn Marino       if (use_code == NEGATE_EXPR)
2493e4b17023SJohn Marino 	{
2494e4b17023SJohn Marino 	  ssa_op_iter iter;
2495e4b17023SJohn Marino 	  use_operand_p usep;
2496e4b17023SJohn Marino 
2497e4b17023SJohn Marino 	  result = gimple_assign_lhs (use_stmt);
2498e4b17023SJohn Marino 
2499e4b17023SJohn Marino 	  /* Make sure the negate statement becomes dead with this
2500e4b17023SJohn Marino 	     single transformation.  */
2501e4b17023SJohn Marino 	  if (!single_imm_use (gimple_assign_lhs (use_stmt),
2502e4b17023SJohn Marino 			       &use_p, &neguse_stmt))
2503e4b17023SJohn Marino 	    return false;
2504e4b17023SJohn Marino 
2505e4b17023SJohn Marino 	  /* Make sure the multiplication isn't also used on that stmt.  */
2506e4b17023SJohn Marino 	  FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2507e4b17023SJohn Marino 	    if (USE_FROM_PTR (usep) == mul_result)
2508e4b17023SJohn Marino 	      return false;
2509e4b17023SJohn Marino 
2510e4b17023SJohn Marino 	  /* Re-validate.  */
2511e4b17023SJohn Marino 	  use_stmt = neguse_stmt;
2512e4b17023SJohn Marino 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2513e4b17023SJohn Marino 	    return false;
2514e4b17023SJohn Marino 	  if (!is_gimple_assign (use_stmt))
2515e4b17023SJohn Marino 	    return false;
2516e4b17023SJohn Marino 
2517e4b17023SJohn Marino 	  use_code = gimple_assign_rhs_code (use_stmt);
2518e4b17023SJohn Marino 	  negate_p = true;
2519e4b17023SJohn Marino 	}
2520e4b17023SJohn Marino 
2521e4b17023SJohn Marino       switch (use_code)
2522e4b17023SJohn Marino 	{
2523e4b17023SJohn Marino 	case MINUS_EXPR:
2524e4b17023SJohn Marino 	  if (gimple_assign_rhs2 (use_stmt) == result)
2525e4b17023SJohn Marino 	    negate_p = !negate_p;
2526e4b17023SJohn Marino 	  break;
2527e4b17023SJohn Marino 	case PLUS_EXPR:
2528e4b17023SJohn Marino 	  break;
2529e4b17023SJohn Marino 	default:
2530e4b17023SJohn Marino 	  /* FMA can only be formed from PLUS and MINUS.  */
2531e4b17023SJohn Marino 	  return false;
2532e4b17023SJohn Marino 	}
2533e4b17023SJohn Marino 
2534e4b17023SJohn Marino       /* We can't handle a * b + a * b.  */
2535e4b17023SJohn Marino       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2536e4b17023SJohn Marino 	return false;
2537e4b17023SJohn Marino 
2538e4b17023SJohn Marino       /* While it is possible to validate whether or not the exact form
2539e4b17023SJohn Marino 	 that we've recognized is available in the backend, the assumption
2540e4b17023SJohn Marino 	 is that the transformation is never a loss.  For instance, suppose
2541e4b17023SJohn Marino 	 the target only has the plain FMA pattern available.  Consider
2542e4b17023SJohn Marino 	 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2543e4b17023SJohn Marino 	 is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2544e4b17023SJohn Marino 	 still have 3 operations, but in the FMA form the two NEGs are
2545e4b17023SJohn Marino 	 independant and could be run in parallel.  */
2546e4b17023SJohn Marino     }
2547e4b17023SJohn Marino 
2548e4b17023SJohn Marino   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2549e4b17023SJohn Marino     {
2550e4b17023SJohn Marino       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2551e4b17023SJohn Marino       enum tree_code use_code;
2552e4b17023SJohn Marino       tree addop, mulop1 = op1, result = mul_result;
2553e4b17023SJohn Marino       bool negate_p = false;
2554e4b17023SJohn Marino 
2555e4b17023SJohn Marino       if (is_gimple_debug (use_stmt))
2556e4b17023SJohn Marino 	continue;
2557e4b17023SJohn Marino 
2558e4b17023SJohn Marino       use_code = gimple_assign_rhs_code (use_stmt);
2559e4b17023SJohn Marino       if (use_code == NEGATE_EXPR)
2560e4b17023SJohn Marino 	{
2561e4b17023SJohn Marino 	  result = gimple_assign_lhs (use_stmt);
2562e4b17023SJohn Marino 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2563e4b17023SJohn Marino 	  gsi_remove (&gsi, true);
2564e4b17023SJohn Marino 	  release_defs (use_stmt);
2565e4b17023SJohn Marino 
2566e4b17023SJohn Marino 	  use_stmt = neguse_stmt;
2567e4b17023SJohn Marino 	  gsi = gsi_for_stmt (use_stmt);
2568e4b17023SJohn Marino 	  use_code = gimple_assign_rhs_code (use_stmt);
2569e4b17023SJohn Marino 	  negate_p = true;
2570e4b17023SJohn Marino 	}
2571e4b17023SJohn Marino 
2572e4b17023SJohn Marino       if (gimple_assign_rhs1 (use_stmt) == result)
2573e4b17023SJohn Marino 	{
2574e4b17023SJohn Marino 	  addop = gimple_assign_rhs2 (use_stmt);
2575e4b17023SJohn Marino 	  /* a * b - c -> a * b + (-c)  */
2576e4b17023SJohn Marino 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2577e4b17023SJohn Marino 	    addop = force_gimple_operand_gsi (&gsi,
2578e4b17023SJohn Marino 					      build1 (NEGATE_EXPR,
2579e4b17023SJohn Marino 						      type, addop),
2580e4b17023SJohn Marino 					      true, NULL_TREE, true,
2581e4b17023SJohn Marino 					      GSI_SAME_STMT);
2582e4b17023SJohn Marino 	}
2583e4b17023SJohn Marino       else
2584e4b17023SJohn Marino 	{
2585e4b17023SJohn Marino 	  addop = gimple_assign_rhs1 (use_stmt);
2586e4b17023SJohn Marino 	  /* a - b * c -> (-b) * c + a */
2587e4b17023SJohn Marino 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2588e4b17023SJohn Marino 	    negate_p = !negate_p;
2589e4b17023SJohn Marino 	}
2590e4b17023SJohn Marino 
2591e4b17023SJohn Marino       if (negate_p)
2592e4b17023SJohn Marino 	mulop1 = force_gimple_operand_gsi (&gsi,
2593e4b17023SJohn Marino 					   build1 (NEGATE_EXPR,
2594e4b17023SJohn Marino 						   type, mulop1),
2595e4b17023SJohn Marino 					   true, NULL_TREE, true,
2596e4b17023SJohn Marino 					   GSI_SAME_STMT);
2597e4b17023SJohn Marino 
2598e4b17023SJohn Marino       fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
2599e4b17023SJohn Marino 						gimple_assign_lhs (use_stmt),
2600e4b17023SJohn Marino 						mulop1, op2,
2601e4b17023SJohn Marino 						addop);
2602e4b17023SJohn Marino       gsi_replace (&gsi, fma_stmt, true);
2603e4b17023SJohn Marino       widen_mul_stats.fmas_inserted++;
2604e4b17023SJohn Marino     }
2605e4b17023SJohn Marino 
2606e4b17023SJohn Marino   return true;
2607e4b17023SJohn Marino }
2608e4b17023SJohn Marino 
2609e4b17023SJohn Marino /* Find integer multiplications where the operands are extended from
2610e4b17023SJohn Marino    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
2611e4b17023SJohn Marino    where appropriate.  */
2612e4b17023SJohn Marino 
2613e4b17023SJohn Marino static unsigned int
execute_optimize_widening_mul(void)2614e4b17023SJohn Marino execute_optimize_widening_mul (void)
2615e4b17023SJohn Marino {
2616e4b17023SJohn Marino   basic_block bb;
2617e4b17023SJohn Marino   bool cfg_changed = false;
2618e4b17023SJohn Marino 
2619e4b17023SJohn Marino   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
2620e4b17023SJohn Marino 
2621e4b17023SJohn Marino   FOR_EACH_BB (bb)
2622e4b17023SJohn Marino     {
2623e4b17023SJohn Marino       gimple_stmt_iterator gsi;
2624e4b17023SJohn Marino 
2625e4b17023SJohn Marino       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
2626e4b17023SJohn Marino         {
2627e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (gsi);
2628e4b17023SJohn Marino 	  enum tree_code code;
2629e4b17023SJohn Marino 
2630e4b17023SJohn Marino 	  if (is_gimple_assign (stmt))
2631e4b17023SJohn Marino 	    {
2632e4b17023SJohn Marino 	      code = gimple_assign_rhs_code (stmt);
2633e4b17023SJohn Marino 	      switch (code)
2634e4b17023SJohn Marino 		{
2635e4b17023SJohn Marino 		case MULT_EXPR:
2636e4b17023SJohn Marino 		  if (!convert_mult_to_widen (stmt, &gsi)
2637e4b17023SJohn Marino 		      && convert_mult_to_fma (stmt,
2638e4b17023SJohn Marino 					      gimple_assign_rhs1 (stmt),
2639e4b17023SJohn Marino 					      gimple_assign_rhs2 (stmt)))
2640e4b17023SJohn Marino 		    {
2641e4b17023SJohn Marino 		      gsi_remove (&gsi, true);
2642e4b17023SJohn Marino 		      release_defs (stmt);
2643e4b17023SJohn Marino 		      continue;
2644e4b17023SJohn Marino 		    }
2645e4b17023SJohn Marino 		  break;
2646e4b17023SJohn Marino 
2647e4b17023SJohn Marino 		case PLUS_EXPR:
2648e4b17023SJohn Marino 		case MINUS_EXPR:
2649e4b17023SJohn Marino 		  convert_plusminus_to_widen (&gsi, stmt, code);
2650e4b17023SJohn Marino 		  break;
2651e4b17023SJohn Marino 
2652e4b17023SJohn Marino 		default:;
2653e4b17023SJohn Marino 		}
2654e4b17023SJohn Marino 	    }
2655e4b17023SJohn Marino 	  else if (is_gimple_call (stmt)
2656e4b17023SJohn Marino 		   && gimple_call_lhs (stmt))
2657e4b17023SJohn Marino 	    {
2658e4b17023SJohn Marino 	      tree fndecl = gimple_call_fndecl (stmt);
2659e4b17023SJohn Marino 	      if (fndecl
2660e4b17023SJohn Marino 		  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
2661e4b17023SJohn Marino 		{
2662e4b17023SJohn Marino 		  switch (DECL_FUNCTION_CODE (fndecl))
2663e4b17023SJohn Marino 		    {
2664e4b17023SJohn Marino 		      case BUILT_IN_POWF:
2665e4b17023SJohn Marino 		      case BUILT_IN_POW:
2666e4b17023SJohn Marino 		      case BUILT_IN_POWL:
2667e4b17023SJohn Marino 			if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
2668e4b17023SJohn Marino 			    && REAL_VALUES_EQUAL
2669e4b17023SJohn Marino 			         (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
2670e4b17023SJohn Marino 				  dconst2)
2671e4b17023SJohn Marino 			    && convert_mult_to_fma (stmt,
2672e4b17023SJohn Marino 						    gimple_call_arg (stmt, 0),
2673e4b17023SJohn Marino 						    gimple_call_arg (stmt, 0)))
2674e4b17023SJohn Marino 			  {
2675e4b17023SJohn Marino 			    unlink_stmt_vdef (stmt);
2676e4b17023SJohn Marino 			    gsi_remove (&gsi, true);
2677e4b17023SJohn Marino 			    release_defs (stmt);
2678e4b17023SJohn Marino 			    if (gimple_purge_dead_eh_edges (bb))
2679e4b17023SJohn Marino 			      cfg_changed = true;
2680e4b17023SJohn Marino 			    continue;
2681e4b17023SJohn Marino 			  }
2682e4b17023SJohn Marino 			  break;
2683e4b17023SJohn Marino 
2684e4b17023SJohn Marino 		      default:;
2685e4b17023SJohn Marino 		    }
2686e4b17023SJohn Marino 		}
2687e4b17023SJohn Marino 	    }
2688e4b17023SJohn Marino 	  gsi_next (&gsi);
2689e4b17023SJohn Marino 	}
2690e4b17023SJohn Marino     }
2691e4b17023SJohn Marino 
2692e4b17023SJohn Marino   statistics_counter_event (cfun, "widening multiplications inserted",
2693e4b17023SJohn Marino 			    widen_mul_stats.widen_mults_inserted);
2694e4b17023SJohn Marino   statistics_counter_event (cfun, "widening maccs inserted",
2695e4b17023SJohn Marino 			    widen_mul_stats.maccs_inserted);
2696e4b17023SJohn Marino   statistics_counter_event (cfun, "fused multiply-adds inserted",
2697e4b17023SJohn Marino 			    widen_mul_stats.fmas_inserted);
2698e4b17023SJohn Marino 
2699e4b17023SJohn Marino   return cfg_changed ? TODO_cleanup_cfg : 0;
2700e4b17023SJohn Marino }
2701e4b17023SJohn Marino 
2702e4b17023SJohn Marino static bool
gate_optimize_widening_mul(void)2703e4b17023SJohn Marino gate_optimize_widening_mul (void)
2704e4b17023SJohn Marino {
2705e4b17023SJohn Marino   return flag_expensive_optimizations && optimize;
2706e4b17023SJohn Marino }
2707e4b17023SJohn Marino 
2708e4b17023SJohn Marino struct gimple_opt_pass pass_optimize_widening_mul =
2709e4b17023SJohn Marino {
2710e4b17023SJohn Marino  {
2711e4b17023SJohn Marino   GIMPLE_PASS,
2712e4b17023SJohn Marino   "widening_mul",			/* name */
2713e4b17023SJohn Marino   gate_optimize_widening_mul,		/* gate */
2714e4b17023SJohn Marino   execute_optimize_widening_mul,	/* execute */
2715e4b17023SJohn Marino   NULL,					/* sub */
2716e4b17023SJohn Marino   NULL,					/* next */
2717e4b17023SJohn Marino   0,					/* static_pass_number */
2718e4b17023SJohn Marino   TV_NONE,				/* tv_id */
2719e4b17023SJohn Marino   PROP_ssa,				/* properties_required */
2720e4b17023SJohn Marino   0,					/* properties_provided */
2721e4b17023SJohn Marino   0,					/* properties_destroyed */
2722e4b17023SJohn Marino   0,					/* todo_flags_start */
2723e4b17023SJohn Marino   TODO_verify_ssa
2724e4b17023SJohn Marino   | TODO_verify_stmts
2725e4b17023SJohn Marino   | TODO_update_ssa                     /* todo_flags_finish */
2726e4b17023SJohn Marino  }
2727e4b17023SJohn Marino };
2728