1e4b17023SJohn Marino /* Global, SSA-based optimizations using mathematical identities.
2*5ce9237cSJohn Marino Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3e4b17023SJohn Marino Free Software Foundation, Inc.
4e4b17023SJohn Marino
5e4b17023SJohn Marino This file is part of GCC.
6e4b17023SJohn Marino
7e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it
8e4b17023SJohn Marino under the terms of the GNU General Public License as published by the
9e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any
10e4b17023SJohn Marino later version.
11e4b17023SJohn Marino
12e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT
13e4b17023SJohn Marino ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15e4b17023SJohn Marino for more details.
16e4b17023SJohn Marino
17e4b17023SJohn Marino You should have received a copy of the GNU General Public License
18e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
19e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
20e4b17023SJohn Marino
21e4b17023SJohn Marino /* Currently, the only mini-pass in this file tries to CSE reciprocal
22e4b17023SJohn Marino operations. These are common in sequences such as this one:
23e4b17023SJohn Marino
24e4b17023SJohn Marino modulus = sqrt(x*x + y*y + z*z);
25e4b17023SJohn Marino x = x / modulus;
26e4b17023SJohn Marino y = y / modulus;
27e4b17023SJohn Marino z = z / modulus;
28e4b17023SJohn Marino
29e4b17023SJohn Marino that can be optimized to
30e4b17023SJohn Marino
31e4b17023SJohn Marino modulus = sqrt(x*x + y*y + z*z);
32e4b17023SJohn Marino rmodulus = 1.0 / modulus;
33e4b17023SJohn Marino x = x * rmodulus;
34e4b17023SJohn Marino y = y * rmodulus;
35e4b17023SJohn Marino z = z * rmodulus;
36e4b17023SJohn Marino
37e4b17023SJohn Marino We do this for loop invariant divisors, and with this pass whenever
38e4b17023SJohn Marino we notice that a division has the same divisor multiple times.
39e4b17023SJohn Marino
40e4b17023SJohn Marino Of course, like in PRE, we don't insert a division if a dominator
41e4b17023SJohn Marino already has one. However, this cannot be done as an extension of
42e4b17023SJohn Marino PRE for several reasons.
43e4b17023SJohn Marino
44e4b17023SJohn Marino First of all, with some experiments it was found out that the
45e4b17023SJohn Marino transformation is not always useful if there are only two divisions
46e4b17023SJohn Marino hy the same divisor. This is probably because modern processors
47e4b17023SJohn Marino can pipeline the divisions; on older, in-order processors it should
48e4b17023SJohn Marino still be effective to optimize two divisions by the same number.
49e4b17023SJohn Marino We make this a param, and it shall be called N in the remainder of
50e4b17023SJohn Marino this comment.
51e4b17023SJohn Marino
52e4b17023SJohn Marino Second, if trapping math is active, we have less freedom on where
53e4b17023SJohn Marino to insert divisions: we can only do so in basic blocks that already
54e4b17023SJohn Marino contain one. (If divisions don't trap, instead, we can insert
55e4b17023SJohn Marino divisions elsewhere, which will be in blocks that are common dominators
56e4b17023SJohn Marino of those that have the division).
57e4b17023SJohn Marino
58e4b17023SJohn Marino We really don't want to compute the reciprocal unless a division will
59e4b17023SJohn Marino be found. To do this, we won't insert the division in a basic block
60e4b17023SJohn Marino that has less than N divisions *post-dominating* it.
61e4b17023SJohn Marino
62e4b17023SJohn Marino The algorithm constructs a subset of the dominator tree, holding the
63e4b17023SJohn Marino blocks containing the divisions and the common dominators to them,
64e4b17023SJohn Marino and walk it twice. The first walk is in post-order, and it annotates
65e4b17023SJohn Marino each block with the number of divisions that post-dominate it: this
66e4b17023SJohn Marino gives information on where divisions can be inserted profitably.
67e4b17023SJohn Marino The second walk is in pre-order, and it inserts divisions as explained
68e4b17023SJohn Marino above, and replaces divisions by multiplications.
69e4b17023SJohn Marino
70e4b17023SJohn Marino In the best case, the cost of the pass is O(n_statements). In the
71e4b17023SJohn Marino worst-case, the cost is due to creating the dominator tree subset,
72e4b17023SJohn Marino with a cost of O(n_basic_blocks ^ 2); however this can only happen
73e4b17023SJohn Marino for n_statements / n_basic_blocks statements. So, the amortized cost
74e4b17023SJohn Marino of creating the dominator tree subset is O(n_basic_blocks) and the
75e4b17023SJohn Marino worst-case cost of the pass is O(n_statements * n_basic_blocks).
76e4b17023SJohn Marino
77e4b17023SJohn Marino More practically, the cost will be small because there are few
78e4b17023SJohn Marino divisions, and they tend to be in the same basic block, so insert_bb
79e4b17023SJohn Marino is called very few times.
80e4b17023SJohn Marino
81e4b17023SJohn Marino If we did this using domwalk.c, an efficient implementation would have
82e4b17023SJohn Marino to work on all the variables in a single pass, because we could not
83e4b17023SJohn Marino work on just a subset of the dominator tree, as we do now, and the
84e4b17023SJohn Marino cost would also be something like O(n_statements * n_basic_blocks).
85e4b17023SJohn Marino The data structures would be more complex in order to work on all the
86e4b17023SJohn Marino variables in a single pass. */
87e4b17023SJohn Marino
88e4b17023SJohn Marino #include "config.h"
89e4b17023SJohn Marino #include "system.h"
90e4b17023SJohn Marino #include "coretypes.h"
91e4b17023SJohn Marino #include "tm.h"
92e4b17023SJohn Marino #include "flags.h"
93e4b17023SJohn Marino #include "tree.h"
94e4b17023SJohn Marino #include "tree-flow.h"
95e4b17023SJohn Marino #include "timevar.h"
96e4b17023SJohn Marino #include "tree-pass.h"
97e4b17023SJohn Marino #include "alloc-pool.h"
98e4b17023SJohn Marino #include "basic-block.h"
99e4b17023SJohn Marino #include "target.h"
100e4b17023SJohn Marino #include "gimple-pretty-print.h"
101e4b17023SJohn Marino
102e4b17023SJohn Marino /* FIXME: RTL headers have to be included here for optabs. */
103e4b17023SJohn Marino #include "rtl.h" /* Because optabs.h wants enum rtx_code. */
104e4b17023SJohn Marino #include "expr.h" /* Because optabs.h wants sepops. */
105e4b17023SJohn Marino #include "optabs.h"
106e4b17023SJohn Marino
107e4b17023SJohn Marino /* This structure represents one basic block that either computes a
108e4b17023SJohn Marino division, or is a common dominator for basic block that compute a
109e4b17023SJohn Marino division. */
110e4b17023SJohn Marino struct occurrence {
111e4b17023SJohn Marino /* The basic block represented by this structure. */
112e4b17023SJohn Marino basic_block bb;
113e4b17023SJohn Marino
114e4b17023SJohn Marino /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
115e4b17023SJohn Marino inserted in BB. */
116e4b17023SJohn Marino tree recip_def;
117e4b17023SJohn Marino
118e4b17023SJohn Marino /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
119e4b17023SJohn Marino was inserted in BB. */
120e4b17023SJohn Marino gimple recip_def_stmt;
121e4b17023SJohn Marino
122e4b17023SJohn Marino /* Pointer to a list of "struct occurrence"s for blocks dominated
123e4b17023SJohn Marino by BB. */
124e4b17023SJohn Marino struct occurrence *children;
125e4b17023SJohn Marino
126e4b17023SJohn Marino /* Pointer to the next "struct occurrence"s in the list of blocks
127e4b17023SJohn Marino sharing a common dominator. */
128e4b17023SJohn Marino struct occurrence *next;
129e4b17023SJohn Marino
130e4b17023SJohn Marino /* The number of divisions that are in BB before compute_merit. The
131e4b17023SJohn Marino number of divisions that are in BB or post-dominate it after
132e4b17023SJohn Marino compute_merit. */
133e4b17023SJohn Marino int num_divisions;
134e4b17023SJohn Marino
135e4b17023SJohn Marino /* True if the basic block has a division, false if it is a common
136e4b17023SJohn Marino dominator for basic blocks that do. If it is false and trapping
137e4b17023SJohn Marino math is active, BB is not a candidate for inserting a reciprocal. */
138e4b17023SJohn Marino bool bb_has_division;
139e4b17023SJohn Marino };
140e4b17023SJohn Marino
141e4b17023SJohn Marino static struct
142e4b17023SJohn Marino {
143e4b17023SJohn Marino /* Number of 1.0/X ops inserted. */
144e4b17023SJohn Marino int rdivs_inserted;
145e4b17023SJohn Marino
146e4b17023SJohn Marino /* Number of 1.0/FUNC ops inserted. */
147e4b17023SJohn Marino int rfuncs_inserted;
148e4b17023SJohn Marino } reciprocal_stats;
149e4b17023SJohn Marino
150e4b17023SJohn Marino static struct
151e4b17023SJohn Marino {
152e4b17023SJohn Marino /* Number of cexpi calls inserted. */
153e4b17023SJohn Marino int inserted;
154e4b17023SJohn Marino } sincos_stats;
155e4b17023SJohn Marino
156e4b17023SJohn Marino static struct
157e4b17023SJohn Marino {
158e4b17023SJohn Marino /* Number of hand-written 32-bit bswaps found. */
159e4b17023SJohn Marino int found_32bit;
160e4b17023SJohn Marino
161e4b17023SJohn Marino /* Number of hand-written 64-bit bswaps found. */
162e4b17023SJohn Marino int found_64bit;
163e4b17023SJohn Marino } bswap_stats;
164e4b17023SJohn Marino
165e4b17023SJohn Marino static struct
166e4b17023SJohn Marino {
167e4b17023SJohn Marino /* Number of widening multiplication ops inserted. */
168e4b17023SJohn Marino int widen_mults_inserted;
169e4b17023SJohn Marino
170e4b17023SJohn Marino /* Number of integer multiply-and-accumulate ops inserted. */
171e4b17023SJohn Marino int maccs_inserted;
172e4b17023SJohn Marino
173e4b17023SJohn Marino /* Number of fp fused multiply-add ops inserted. */
174e4b17023SJohn Marino int fmas_inserted;
175e4b17023SJohn Marino } widen_mul_stats;
176e4b17023SJohn Marino
177e4b17023SJohn Marino /* The instance of "struct occurrence" representing the highest
178e4b17023SJohn Marino interesting block in the dominator tree. */
179e4b17023SJohn Marino static struct occurrence *occ_head;
180e4b17023SJohn Marino
181e4b17023SJohn Marino /* Allocation pool for getting instances of "struct occurrence". */
182e4b17023SJohn Marino static alloc_pool occ_pool;
183e4b17023SJohn Marino
184e4b17023SJohn Marino
185e4b17023SJohn Marino
186e4b17023SJohn Marino /* Allocate and return a new struct occurrence for basic block BB, and
187e4b17023SJohn Marino whose children list is headed by CHILDREN. */
188e4b17023SJohn Marino static struct occurrence *
occ_new(basic_block bb,struct occurrence * children)189e4b17023SJohn Marino occ_new (basic_block bb, struct occurrence *children)
190e4b17023SJohn Marino {
191e4b17023SJohn Marino struct occurrence *occ;
192e4b17023SJohn Marino
193e4b17023SJohn Marino bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
194e4b17023SJohn Marino memset (occ, 0, sizeof (struct occurrence));
195e4b17023SJohn Marino
196e4b17023SJohn Marino occ->bb = bb;
197e4b17023SJohn Marino occ->children = children;
198e4b17023SJohn Marino return occ;
199e4b17023SJohn Marino }
200e4b17023SJohn Marino
201e4b17023SJohn Marino
202e4b17023SJohn Marino /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
203e4b17023SJohn Marino list of "struct occurrence"s, one per basic block, having IDOM as
204e4b17023SJohn Marino their common dominator.
205e4b17023SJohn Marino
206e4b17023SJohn Marino We try to insert NEW_OCC as deep as possible in the tree, and we also
207e4b17023SJohn Marino insert any other block that is a common dominator for BB and one
208e4b17023SJohn Marino block already in the tree. */
209e4b17023SJohn Marino
210e4b17023SJohn Marino static void
insert_bb(struct occurrence * new_occ,basic_block idom,struct occurrence ** p_head)211e4b17023SJohn Marino insert_bb (struct occurrence *new_occ, basic_block idom,
212e4b17023SJohn Marino struct occurrence **p_head)
213e4b17023SJohn Marino {
214e4b17023SJohn Marino struct occurrence *occ, **p_occ;
215e4b17023SJohn Marino
216e4b17023SJohn Marino for (p_occ = p_head; (occ = *p_occ) != NULL; )
217e4b17023SJohn Marino {
218e4b17023SJohn Marino basic_block bb = new_occ->bb, occ_bb = occ->bb;
219e4b17023SJohn Marino basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
220e4b17023SJohn Marino if (dom == bb)
221e4b17023SJohn Marino {
222e4b17023SJohn Marino /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
223e4b17023SJohn Marino from its list. */
224e4b17023SJohn Marino *p_occ = occ->next;
225e4b17023SJohn Marino occ->next = new_occ->children;
226e4b17023SJohn Marino new_occ->children = occ;
227e4b17023SJohn Marino
228e4b17023SJohn Marino /* Try the next block (it may as well be dominated by BB). */
229e4b17023SJohn Marino }
230e4b17023SJohn Marino
231e4b17023SJohn Marino else if (dom == occ_bb)
232e4b17023SJohn Marino {
233e4b17023SJohn Marino /* OCC_BB dominates BB. Tail recurse to look deeper. */
234e4b17023SJohn Marino insert_bb (new_occ, dom, &occ->children);
235e4b17023SJohn Marino return;
236e4b17023SJohn Marino }
237e4b17023SJohn Marino
238e4b17023SJohn Marino else if (dom != idom)
239e4b17023SJohn Marino {
240e4b17023SJohn Marino gcc_assert (!dom->aux);
241e4b17023SJohn Marino
242e4b17023SJohn Marino /* There is a dominator between IDOM and BB, add it and make
243e4b17023SJohn Marino two children out of NEW_OCC and OCC. First, remove OCC from
244e4b17023SJohn Marino its list. */
245e4b17023SJohn Marino *p_occ = occ->next;
246e4b17023SJohn Marino new_occ->next = occ;
247e4b17023SJohn Marino occ->next = NULL;
248e4b17023SJohn Marino
249e4b17023SJohn Marino /* None of the previous blocks has DOM as a dominator: if we tail
250e4b17023SJohn Marino recursed, we would reexamine them uselessly. Just switch BB with
251e4b17023SJohn Marino DOM, and go on looking for blocks dominated by DOM. */
252e4b17023SJohn Marino new_occ = occ_new (dom, new_occ);
253e4b17023SJohn Marino }
254e4b17023SJohn Marino
255e4b17023SJohn Marino else
256e4b17023SJohn Marino {
257e4b17023SJohn Marino /* Nothing special, go on with the next element. */
258e4b17023SJohn Marino p_occ = &occ->next;
259e4b17023SJohn Marino }
260e4b17023SJohn Marino }
261e4b17023SJohn Marino
262e4b17023SJohn Marino /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
263e4b17023SJohn Marino new_occ->next = *p_head;
264e4b17023SJohn Marino *p_head = new_occ;
265e4b17023SJohn Marino }
266e4b17023SJohn Marino
267e4b17023SJohn Marino /* Register that we found a division in BB. */
268e4b17023SJohn Marino
269e4b17023SJohn Marino static inline void
register_division_in(basic_block bb)270e4b17023SJohn Marino register_division_in (basic_block bb)
271e4b17023SJohn Marino {
272e4b17023SJohn Marino struct occurrence *occ;
273e4b17023SJohn Marino
274e4b17023SJohn Marino occ = (struct occurrence *) bb->aux;
275e4b17023SJohn Marino if (!occ)
276e4b17023SJohn Marino {
277e4b17023SJohn Marino occ = occ_new (bb, NULL);
278e4b17023SJohn Marino insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
279e4b17023SJohn Marino }
280e4b17023SJohn Marino
281e4b17023SJohn Marino occ->bb_has_division = true;
282e4b17023SJohn Marino occ->num_divisions++;
283e4b17023SJohn Marino }
284e4b17023SJohn Marino
285e4b17023SJohn Marino
286e4b17023SJohn Marino /* Compute the number of divisions that postdominate each block in OCC and
287e4b17023SJohn Marino its children. */
288e4b17023SJohn Marino
289e4b17023SJohn Marino static void
compute_merit(struct occurrence * occ)290e4b17023SJohn Marino compute_merit (struct occurrence *occ)
291e4b17023SJohn Marino {
292e4b17023SJohn Marino struct occurrence *occ_child;
293e4b17023SJohn Marino basic_block dom = occ->bb;
294e4b17023SJohn Marino
295e4b17023SJohn Marino for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
296e4b17023SJohn Marino {
297e4b17023SJohn Marino basic_block bb;
298e4b17023SJohn Marino if (occ_child->children)
299e4b17023SJohn Marino compute_merit (occ_child);
300e4b17023SJohn Marino
301e4b17023SJohn Marino if (flag_exceptions)
302e4b17023SJohn Marino bb = single_noncomplex_succ (dom);
303e4b17023SJohn Marino else
304e4b17023SJohn Marino bb = dom;
305e4b17023SJohn Marino
306e4b17023SJohn Marino if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
307e4b17023SJohn Marino occ->num_divisions += occ_child->num_divisions;
308e4b17023SJohn Marino }
309e4b17023SJohn Marino }
310e4b17023SJohn Marino
311e4b17023SJohn Marino
312e4b17023SJohn Marino /* Return whether USE_STMT is a floating-point division by DEF. */
313e4b17023SJohn Marino static inline bool
is_division_by(gimple use_stmt,tree def)314e4b17023SJohn Marino is_division_by (gimple use_stmt, tree def)
315e4b17023SJohn Marino {
316e4b17023SJohn Marino return is_gimple_assign (use_stmt)
317e4b17023SJohn Marino && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
318e4b17023SJohn Marino && gimple_assign_rhs2 (use_stmt) == def
319e4b17023SJohn Marino /* Do not recognize x / x as valid division, as we are getting
320e4b17023SJohn Marino confused later by replacing all immediate uses x in such
321e4b17023SJohn Marino a stmt. */
322e4b17023SJohn Marino && gimple_assign_rhs1 (use_stmt) != def;
323e4b17023SJohn Marino }
324e4b17023SJohn Marino
325e4b17023SJohn Marino /* Walk the subset of the dominator tree rooted at OCC, setting the
326e4b17023SJohn Marino RECIP_DEF field to a definition of 1.0 / DEF that can be used in
327e4b17023SJohn Marino the given basic block. The field may be left NULL, of course,
328e4b17023SJohn Marino if it is not possible or profitable to do the optimization.
329e4b17023SJohn Marino
330e4b17023SJohn Marino DEF_BSI is an iterator pointing at the statement defining DEF.
331e4b17023SJohn Marino If RECIP_DEF is set, a dominator already has a computation that can
332e4b17023SJohn Marino be used. */
333e4b17023SJohn Marino
334e4b17023SJohn Marino static void
insert_reciprocals(gimple_stmt_iterator * def_gsi,struct occurrence * occ,tree def,tree recip_def,int threshold)335e4b17023SJohn Marino insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
336e4b17023SJohn Marino tree def, tree recip_def, int threshold)
337e4b17023SJohn Marino {
338e4b17023SJohn Marino tree type;
339e4b17023SJohn Marino gimple new_stmt;
340e4b17023SJohn Marino gimple_stmt_iterator gsi;
341e4b17023SJohn Marino struct occurrence *occ_child;
342e4b17023SJohn Marino
343e4b17023SJohn Marino if (!recip_def
344e4b17023SJohn Marino && (occ->bb_has_division || !flag_trapping_math)
345e4b17023SJohn Marino && occ->num_divisions >= threshold)
346e4b17023SJohn Marino {
347e4b17023SJohn Marino /* Make a variable with the replacement and substitute it. */
348e4b17023SJohn Marino type = TREE_TYPE (def);
349e4b17023SJohn Marino recip_def = make_rename_temp (type, "reciptmp");
350e4b17023SJohn Marino new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
351e4b17023SJohn Marino build_one_cst (type), def);
352e4b17023SJohn Marino
353e4b17023SJohn Marino if (occ->bb_has_division)
354e4b17023SJohn Marino {
355e4b17023SJohn Marino /* Case 1: insert before an existing division. */
356e4b17023SJohn Marino gsi = gsi_after_labels (occ->bb);
357e4b17023SJohn Marino while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
358e4b17023SJohn Marino gsi_next (&gsi);
359e4b17023SJohn Marino
360e4b17023SJohn Marino gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
361e4b17023SJohn Marino }
362e4b17023SJohn Marino else if (def_gsi && occ->bb == def_gsi->bb)
363e4b17023SJohn Marino {
364e4b17023SJohn Marino /* Case 2: insert right after the definition. Note that this will
365e4b17023SJohn Marino never happen if the definition statement can throw, because in
366e4b17023SJohn Marino that case the sole successor of the statement's basic block will
367e4b17023SJohn Marino dominate all the uses as well. */
368e4b17023SJohn Marino gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
369e4b17023SJohn Marino }
370e4b17023SJohn Marino else
371e4b17023SJohn Marino {
372e4b17023SJohn Marino /* Case 3: insert in a basic block not containing defs/uses. */
373e4b17023SJohn Marino gsi = gsi_after_labels (occ->bb);
374e4b17023SJohn Marino gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
375e4b17023SJohn Marino }
376e4b17023SJohn Marino
377e4b17023SJohn Marino reciprocal_stats.rdivs_inserted++;
378e4b17023SJohn Marino
379e4b17023SJohn Marino occ->recip_def_stmt = new_stmt;
380e4b17023SJohn Marino }
381e4b17023SJohn Marino
382e4b17023SJohn Marino occ->recip_def = recip_def;
383e4b17023SJohn Marino for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
384e4b17023SJohn Marino insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
385e4b17023SJohn Marino }
386e4b17023SJohn Marino
387e4b17023SJohn Marino
388e4b17023SJohn Marino /* Replace the division at USE_P with a multiplication by the reciprocal, if
389e4b17023SJohn Marino possible. */
390e4b17023SJohn Marino
391e4b17023SJohn Marino static inline void
replace_reciprocal(use_operand_p use_p)392e4b17023SJohn Marino replace_reciprocal (use_operand_p use_p)
393e4b17023SJohn Marino {
394e4b17023SJohn Marino gimple use_stmt = USE_STMT (use_p);
395e4b17023SJohn Marino basic_block bb = gimple_bb (use_stmt);
396e4b17023SJohn Marino struct occurrence *occ = (struct occurrence *) bb->aux;
397e4b17023SJohn Marino
398e4b17023SJohn Marino if (optimize_bb_for_speed_p (bb)
399e4b17023SJohn Marino && occ->recip_def && use_stmt != occ->recip_def_stmt)
400e4b17023SJohn Marino {
401e4b17023SJohn Marino gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
402e4b17023SJohn Marino gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
403e4b17023SJohn Marino SET_USE (use_p, occ->recip_def);
404e4b17023SJohn Marino fold_stmt_inplace (&gsi);
405e4b17023SJohn Marino update_stmt (use_stmt);
406e4b17023SJohn Marino }
407e4b17023SJohn Marino }
408e4b17023SJohn Marino
409e4b17023SJohn Marino
410e4b17023SJohn Marino /* Free OCC and return one more "struct occurrence" to be freed. */
411e4b17023SJohn Marino
412e4b17023SJohn Marino static struct occurrence *
free_bb(struct occurrence * occ)413e4b17023SJohn Marino free_bb (struct occurrence *occ)
414e4b17023SJohn Marino {
415e4b17023SJohn Marino struct occurrence *child, *next;
416e4b17023SJohn Marino
417e4b17023SJohn Marino /* First get the two pointers hanging off OCC. */
418e4b17023SJohn Marino next = occ->next;
419e4b17023SJohn Marino child = occ->children;
420e4b17023SJohn Marino occ->bb->aux = NULL;
421e4b17023SJohn Marino pool_free (occ_pool, occ);
422e4b17023SJohn Marino
423e4b17023SJohn Marino /* Now ensure that we don't recurse unless it is necessary. */
424e4b17023SJohn Marino if (!child)
425e4b17023SJohn Marino return next;
426e4b17023SJohn Marino else
427e4b17023SJohn Marino {
428e4b17023SJohn Marino while (next)
429e4b17023SJohn Marino next = free_bb (next);
430e4b17023SJohn Marino
431e4b17023SJohn Marino return child;
432e4b17023SJohn Marino }
433e4b17023SJohn Marino }
434e4b17023SJohn Marino
435e4b17023SJohn Marino
436e4b17023SJohn Marino /* Look for floating-point divisions among DEF's uses, and try to
437e4b17023SJohn Marino replace them by multiplications with the reciprocal. Add
438e4b17023SJohn Marino as many statements computing the reciprocal as needed.
439e4b17023SJohn Marino
440e4b17023SJohn Marino DEF must be a GIMPLE register of a floating-point type. */
441e4b17023SJohn Marino
442e4b17023SJohn Marino static void
execute_cse_reciprocals_1(gimple_stmt_iterator * def_gsi,tree def)443e4b17023SJohn Marino execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
444e4b17023SJohn Marino {
445e4b17023SJohn Marino use_operand_p use_p;
446e4b17023SJohn Marino imm_use_iterator use_iter;
447e4b17023SJohn Marino struct occurrence *occ;
448e4b17023SJohn Marino int count = 0, threshold;
449e4b17023SJohn Marino
450e4b17023SJohn Marino gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
451e4b17023SJohn Marino
452e4b17023SJohn Marino FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
453e4b17023SJohn Marino {
454e4b17023SJohn Marino gimple use_stmt = USE_STMT (use_p);
455e4b17023SJohn Marino if (is_division_by (use_stmt, def))
456e4b17023SJohn Marino {
457e4b17023SJohn Marino register_division_in (gimple_bb (use_stmt));
458e4b17023SJohn Marino count++;
459e4b17023SJohn Marino }
460e4b17023SJohn Marino }
461e4b17023SJohn Marino
462e4b17023SJohn Marino /* Do the expensive part only if we can hope to optimize something. */
463e4b17023SJohn Marino threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
464e4b17023SJohn Marino if (count >= threshold)
465e4b17023SJohn Marino {
466e4b17023SJohn Marino gimple use_stmt;
467e4b17023SJohn Marino for (occ = occ_head; occ; occ = occ->next)
468e4b17023SJohn Marino {
469e4b17023SJohn Marino compute_merit (occ);
470e4b17023SJohn Marino insert_reciprocals (def_gsi, occ, def, NULL, threshold);
471e4b17023SJohn Marino }
472e4b17023SJohn Marino
473e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
474e4b17023SJohn Marino {
475e4b17023SJohn Marino if (is_division_by (use_stmt, def))
476e4b17023SJohn Marino {
477e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
478e4b17023SJohn Marino replace_reciprocal (use_p);
479e4b17023SJohn Marino }
480e4b17023SJohn Marino }
481e4b17023SJohn Marino }
482e4b17023SJohn Marino
483e4b17023SJohn Marino for (occ = occ_head; occ; )
484e4b17023SJohn Marino occ = free_bb (occ);
485e4b17023SJohn Marino
486e4b17023SJohn Marino occ_head = NULL;
487e4b17023SJohn Marino }
488e4b17023SJohn Marino
489e4b17023SJohn Marino static bool
gate_cse_reciprocals(void)490e4b17023SJohn Marino gate_cse_reciprocals (void)
491e4b17023SJohn Marino {
492e4b17023SJohn Marino return optimize && flag_reciprocal_math;
493e4b17023SJohn Marino }
494e4b17023SJohn Marino
495e4b17023SJohn Marino /* Go through all the floating-point SSA_NAMEs, and call
496e4b17023SJohn Marino execute_cse_reciprocals_1 on each of them. */
497e4b17023SJohn Marino static unsigned int
execute_cse_reciprocals(void)498e4b17023SJohn Marino execute_cse_reciprocals (void)
499e4b17023SJohn Marino {
500e4b17023SJohn Marino basic_block bb;
501e4b17023SJohn Marino tree arg;
502e4b17023SJohn Marino
503e4b17023SJohn Marino occ_pool = create_alloc_pool ("dominators for recip",
504e4b17023SJohn Marino sizeof (struct occurrence),
505e4b17023SJohn Marino n_basic_blocks / 3 + 1);
506e4b17023SJohn Marino
507e4b17023SJohn Marino memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
508e4b17023SJohn Marino calculate_dominance_info (CDI_DOMINATORS);
509e4b17023SJohn Marino calculate_dominance_info (CDI_POST_DOMINATORS);
510e4b17023SJohn Marino
511e4b17023SJohn Marino #ifdef ENABLE_CHECKING
512e4b17023SJohn Marino FOR_EACH_BB (bb)
513e4b17023SJohn Marino gcc_assert (!bb->aux);
514e4b17023SJohn Marino #endif
515e4b17023SJohn Marino
516e4b17023SJohn Marino for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
517e4b17023SJohn Marino if (gimple_default_def (cfun, arg)
518e4b17023SJohn Marino && FLOAT_TYPE_P (TREE_TYPE (arg))
519e4b17023SJohn Marino && is_gimple_reg (arg))
520e4b17023SJohn Marino execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
521e4b17023SJohn Marino
522e4b17023SJohn Marino FOR_EACH_BB (bb)
523e4b17023SJohn Marino {
524e4b17023SJohn Marino gimple_stmt_iterator gsi;
525e4b17023SJohn Marino gimple phi;
526e4b17023SJohn Marino tree def;
527e4b17023SJohn Marino
528e4b17023SJohn Marino for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
529e4b17023SJohn Marino {
530e4b17023SJohn Marino phi = gsi_stmt (gsi);
531e4b17023SJohn Marino def = PHI_RESULT (phi);
532e4b17023SJohn Marino if (FLOAT_TYPE_P (TREE_TYPE (def))
533e4b17023SJohn Marino && is_gimple_reg (def))
534e4b17023SJohn Marino execute_cse_reciprocals_1 (NULL, def);
535e4b17023SJohn Marino }
536e4b17023SJohn Marino
537e4b17023SJohn Marino for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
538e4b17023SJohn Marino {
539e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
540e4b17023SJohn Marino
541e4b17023SJohn Marino if (gimple_has_lhs (stmt)
542e4b17023SJohn Marino && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
543e4b17023SJohn Marino && FLOAT_TYPE_P (TREE_TYPE (def))
544e4b17023SJohn Marino && TREE_CODE (def) == SSA_NAME)
545e4b17023SJohn Marino execute_cse_reciprocals_1 (&gsi, def);
546e4b17023SJohn Marino }
547e4b17023SJohn Marino
548e4b17023SJohn Marino if (optimize_bb_for_size_p (bb))
549e4b17023SJohn Marino continue;
550e4b17023SJohn Marino
551e4b17023SJohn Marino /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
552e4b17023SJohn Marino for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
553e4b17023SJohn Marino {
554e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
555e4b17023SJohn Marino tree fndecl;
556e4b17023SJohn Marino
557e4b17023SJohn Marino if (is_gimple_assign (stmt)
558e4b17023SJohn Marino && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
559e4b17023SJohn Marino {
560e4b17023SJohn Marino tree arg1 = gimple_assign_rhs2 (stmt);
561e4b17023SJohn Marino gimple stmt1;
562e4b17023SJohn Marino
563e4b17023SJohn Marino if (TREE_CODE (arg1) != SSA_NAME)
564e4b17023SJohn Marino continue;
565e4b17023SJohn Marino
566e4b17023SJohn Marino stmt1 = SSA_NAME_DEF_STMT (arg1);
567e4b17023SJohn Marino
568e4b17023SJohn Marino if (is_gimple_call (stmt1)
569e4b17023SJohn Marino && gimple_call_lhs (stmt1)
570e4b17023SJohn Marino && (fndecl = gimple_call_fndecl (stmt1))
571e4b17023SJohn Marino && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
572e4b17023SJohn Marino || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
573e4b17023SJohn Marino {
574e4b17023SJohn Marino enum built_in_function code;
575e4b17023SJohn Marino bool md_code, fail;
576e4b17023SJohn Marino imm_use_iterator ui;
577e4b17023SJohn Marino use_operand_p use_p;
578e4b17023SJohn Marino
579e4b17023SJohn Marino code = DECL_FUNCTION_CODE (fndecl);
580e4b17023SJohn Marino md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
581e4b17023SJohn Marino
582e4b17023SJohn Marino fndecl = targetm.builtin_reciprocal (code, md_code, false);
583e4b17023SJohn Marino if (!fndecl)
584e4b17023SJohn Marino continue;
585e4b17023SJohn Marino
586e4b17023SJohn Marino /* Check that all uses of the SSA name are divisions,
587e4b17023SJohn Marino otherwise replacing the defining statement will do
588e4b17023SJohn Marino the wrong thing. */
589e4b17023SJohn Marino fail = false;
590e4b17023SJohn Marino FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
591e4b17023SJohn Marino {
592e4b17023SJohn Marino gimple stmt2 = USE_STMT (use_p);
593e4b17023SJohn Marino if (is_gimple_debug (stmt2))
594e4b17023SJohn Marino continue;
595e4b17023SJohn Marino if (!is_gimple_assign (stmt2)
596e4b17023SJohn Marino || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
597e4b17023SJohn Marino || gimple_assign_rhs1 (stmt2) == arg1
598e4b17023SJohn Marino || gimple_assign_rhs2 (stmt2) != arg1)
599e4b17023SJohn Marino {
600e4b17023SJohn Marino fail = true;
601e4b17023SJohn Marino break;
602e4b17023SJohn Marino }
603e4b17023SJohn Marino }
604e4b17023SJohn Marino if (fail)
605e4b17023SJohn Marino continue;
606e4b17023SJohn Marino
607e4b17023SJohn Marino gimple_replace_lhs (stmt1, arg1);
608e4b17023SJohn Marino gimple_call_set_fndecl (stmt1, fndecl);
609e4b17023SJohn Marino update_stmt (stmt1);
610e4b17023SJohn Marino reciprocal_stats.rfuncs_inserted++;
611e4b17023SJohn Marino
612e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
613e4b17023SJohn Marino {
614e4b17023SJohn Marino gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
615e4b17023SJohn Marino gimple_assign_set_rhs_code (stmt, MULT_EXPR);
616e4b17023SJohn Marino fold_stmt_inplace (&gsi);
617e4b17023SJohn Marino update_stmt (stmt);
618e4b17023SJohn Marino }
619e4b17023SJohn Marino }
620e4b17023SJohn Marino }
621e4b17023SJohn Marino }
622e4b17023SJohn Marino }
623e4b17023SJohn Marino
624e4b17023SJohn Marino statistics_counter_event (cfun, "reciprocal divs inserted",
625e4b17023SJohn Marino reciprocal_stats.rdivs_inserted);
626e4b17023SJohn Marino statistics_counter_event (cfun, "reciprocal functions inserted",
627e4b17023SJohn Marino reciprocal_stats.rfuncs_inserted);
628e4b17023SJohn Marino
629e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
630e4b17023SJohn Marino free_dominance_info (CDI_POST_DOMINATORS);
631e4b17023SJohn Marino free_alloc_pool (occ_pool);
632e4b17023SJohn Marino return 0;
633e4b17023SJohn Marino }
634e4b17023SJohn Marino
635e4b17023SJohn Marino struct gimple_opt_pass pass_cse_reciprocals =
636e4b17023SJohn Marino {
637e4b17023SJohn Marino {
638e4b17023SJohn Marino GIMPLE_PASS,
639e4b17023SJohn Marino "recip", /* name */
640e4b17023SJohn Marino gate_cse_reciprocals, /* gate */
641e4b17023SJohn Marino execute_cse_reciprocals, /* execute */
642e4b17023SJohn Marino NULL, /* sub */
643e4b17023SJohn Marino NULL, /* next */
644e4b17023SJohn Marino 0, /* static_pass_number */
645e4b17023SJohn Marino TV_NONE, /* tv_id */
646e4b17023SJohn Marino PROP_ssa, /* properties_required */
647e4b17023SJohn Marino 0, /* properties_provided */
648e4b17023SJohn Marino 0, /* properties_destroyed */
649e4b17023SJohn Marino 0, /* todo_flags_start */
650e4b17023SJohn Marino TODO_update_ssa | TODO_verify_ssa
651e4b17023SJohn Marino | TODO_verify_stmts /* todo_flags_finish */
652e4b17023SJohn Marino }
653e4b17023SJohn Marino };
654e4b17023SJohn Marino
655e4b17023SJohn Marino /* Records an occurrence at statement USE_STMT in the vector of trees
656e4b17023SJohn Marino STMTS if it is dominated by *TOP_BB or dominates it or this basic block
657e4b17023SJohn Marino is not yet initialized. Returns true if the occurrence was pushed on
658e4b17023SJohn Marino the vector. Adjusts *TOP_BB to be the basic block dominating all
659e4b17023SJohn Marino statements in the vector. */
660e4b17023SJohn Marino
661e4b17023SJohn Marino static bool
maybe_record_sincos(VEC (gimple,heap)** stmts,basic_block * top_bb,gimple use_stmt)662e4b17023SJohn Marino maybe_record_sincos (VEC(gimple, heap) **stmts,
663e4b17023SJohn Marino basic_block *top_bb, gimple use_stmt)
664e4b17023SJohn Marino {
665e4b17023SJohn Marino basic_block use_bb = gimple_bb (use_stmt);
666e4b17023SJohn Marino if (*top_bb
667e4b17023SJohn Marino && (*top_bb == use_bb
668e4b17023SJohn Marino || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
669e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, use_stmt);
670e4b17023SJohn Marino else if (!*top_bb
671e4b17023SJohn Marino || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
672e4b17023SJohn Marino {
673e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, use_stmt);
674e4b17023SJohn Marino *top_bb = use_bb;
675e4b17023SJohn Marino }
676e4b17023SJohn Marino else
677e4b17023SJohn Marino return false;
678e4b17023SJohn Marino
679e4b17023SJohn Marino return true;
680e4b17023SJohn Marino }
681e4b17023SJohn Marino
682e4b17023SJohn Marino /* Look for sin, cos and cexpi calls with the same argument NAME and
683e4b17023SJohn Marino create a single call to cexpi CSEing the result in this case.
684e4b17023SJohn Marino We first walk over all immediate uses of the argument collecting
685e4b17023SJohn Marino statements that we can CSE in a vector and in a second pass replace
686e4b17023SJohn Marino the statement rhs with a REALPART or IMAGPART expression on the
687e4b17023SJohn Marino result of the cexpi call we insert before the use statement that
688e4b17023SJohn Marino dominates all other candidates. */
689e4b17023SJohn Marino
690e4b17023SJohn Marino static bool
execute_cse_sincos_1(tree name)691e4b17023SJohn Marino execute_cse_sincos_1 (tree name)
692e4b17023SJohn Marino {
693e4b17023SJohn Marino gimple_stmt_iterator gsi;
694e4b17023SJohn Marino imm_use_iterator use_iter;
695e4b17023SJohn Marino tree fndecl, res, type;
696e4b17023SJohn Marino gimple def_stmt, use_stmt, stmt;
697e4b17023SJohn Marino int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
698e4b17023SJohn Marino VEC(gimple, heap) *stmts = NULL;
699e4b17023SJohn Marino basic_block top_bb = NULL;
700e4b17023SJohn Marino int i;
701e4b17023SJohn Marino bool cfg_changed = false;
702e4b17023SJohn Marino
703e4b17023SJohn Marino type = TREE_TYPE (name);
704e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
705e4b17023SJohn Marino {
706e4b17023SJohn Marino if (gimple_code (use_stmt) != GIMPLE_CALL
707e4b17023SJohn Marino || !gimple_call_lhs (use_stmt)
708e4b17023SJohn Marino || !(fndecl = gimple_call_fndecl (use_stmt))
709e4b17023SJohn Marino || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
710e4b17023SJohn Marino continue;
711e4b17023SJohn Marino
712e4b17023SJohn Marino switch (DECL_FUNCTION_CODE (fndecl))
713e4b17023SJohn Marino {
714e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_COS):
715e4b17023SJohn Marino seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
716e4b17023SJohn Marino break;
717e4b17023SJohn Marino
718e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_SIN):
719e4b17023SJohn Marino seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
720e4b17023SJohn Marino break;
721e4b17023SJohn Marino
722e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_CEXPI):
723e4b17023SJohn Marino seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
724e4b17023SJohn Marino break;
725e4b17023SJohn Marino
726e4b17023SJohn Marino default:;
727e4b17023SJohn Marino }
728e4b17023SJohn Marino }
729e4b17023SJohn Marino
730e4b17023SJohn Marino if (seen_cos + seen_sin + seen_cexpi <= 1)
731e4b17023SJohn Marino {
732e4b17023SJohn Marino VEC_free(gimple, heap, stmts);
733e4b17023SJohn Marino return false;
734e4b17023SJohn Marino }
735e4b17023SJohn Marino
736e4b17023SJohn Marino /* Simply insert cexpi at the beginning of top_bb but not earlier than
737e4b17023SJohn Marino the name def statement. */
738e4b17023SJohn Marino fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
739e4b17023SJohn Marino if (!fndecl)
740e4b17023SJohn Marino return false;
741e4b17023SJohn Marino res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
742e4b17023SJohn Marino stmt = gimple_build_call (fndecl, 1, name);
743e4b17023SJohn Marino res = make_ssa_name (res, stmt);
744e4b17023SJohn Marino gimple_call_set_lhs (stmt, res);
745e4b17023SJohn Marino
746e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (name);
747e4b17023SJohn Marino if (!SSA_NAME_IS_DEFAULT_DEF (name)
748e4b17023SJohn Marino && gimple_code (def_stmt) != GIMPLE_PHI
749e4b17023SJohn Marino && gimple_bb (def_stmt) == top_bb)
750e4b17023SJohn Marino {
751e4b17023SJohn Marino gsi = gsi_for_stmt (def_stmt);
752e4b17023SJohn Marino gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
753e4b17023SJohn Marino }
754e4b17023SJohn Marino else
755e4b17023SJohn Marino {
756e4b17023SJohn Marino gsi = gsi_after_labels (top_bb);
757e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
758e4b17023SJohn Marino }
759e4b17023SJohn Marino update_stmt (stmt);
760e4b17023SJohn Marino sincos_stats.inserted++;
761e4b17023SJohn Marino
762e4b17023SJohn Marino /* And adjust the recorded old call sites. */
763e4b17023SJohn Marino for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
764e4b17023SJohn Marino {
765e4b17023SJohn Marino tree rhs = NULL;
766e4b17023SJohn Marino fndecl = gimple_call_fndecl (use_stmt);
767e4b17023SJohn Marino
768e4b17023SJohn Marino switch (DECL_FUNCTION_CODE (fndecl))
769e4b17023SJohn Marino {
770e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_COS):
771e4b17023SJohn Marino rhs = fold_build1 (REALPART_EXPR, type, res);
772e4b17023SJohn Marino break;
773e4b17023SJohn Marino
774e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_SIN):
775e4b17023SJohn Marino rhs = fold_build1 (IMAGPART_EXPR, type, res);
776e4b17023SJohn Marino break;
777e4b17023SJohn Marino
778e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_CEXPI):
779e4b17023SJohn Marino rhs = res;
780e4b17023SJohn Marino break;
781e4b17023SJohn Marino
782e4b17023SJohn Marino default:;
783e4b17023SJohn Marino gcc_unreachable ();
784e4b17023SJohn Marino }
785e4b17023SJohn Marino
786e4b17023SJohn Marino /* Replace call with a copy. */
787e4b17023SJohn Marino stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
788e4b17023SJohn Marino
789e4b17023SJohn Marino gsi = gsi_for_stmt (use_stmt);
790e4b17023SJohn Marino gsi_replace (&gsi, stmt, true);
791e4b17023SJohn Marino if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
792e4b17023SJohn Marino cfg_changed = true;
793e4b17023SJohn Marino }
794e4b17023SJohn Marino
795e4b17023SJohn Marino VEC_free(gimple, heap, stmts);
796e4b17023SJohn Marino
797e4b17023SJohn Marino return cfg_changed;
798e4b17023SJohn Marino }
799e4b17023SJohn Marino
800e4b17023SJohn Marino /* To evaluate powi(x,n), the floating point value x raised to the
801e4b17023SJohn Marino constant integer exponent n, we use a hybrid algorithm that
802e4b17023SJohn Marino combines the "window method" with look-up tables. For an
803e4b17023SJohn Marino introduction to exponentiation algorithms and "addition chains",
804e4b17023SJohn Marino see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
805e4b17023SJohn Marino "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
806e4b17023SJohn Marino 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
807e4b17023SJohn Marino Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
808e4b17023SJohn Marino
809e4b17023SJohn Marino /* Provide a default value for POWI_MAX_MULTS, the maximum number of
810e4b17023SJohn Marino multiplications to inline before calling the system library's pow
811e4b17023SJohn Marino function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
812e4b17023SJohn Marino so this default never requires calling pow, powf or powl. */
813e4b17023SJohn Marino
814e4b17023SJohn Marino #ifndef POWI_MAX_MULTS
815e4b17023SJohn Marino #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
816e4b17023SJohn Marino #endif
817e4b17023SJohn Marino
818e4b17023SJohn Marino /* The size of the "optimal power tree" lookup table. All
819e4b17023SJohn Marino exponents less than this value are simply looked up in the
820e4b17023SJohn Marino powi_table below. This threshold is also used to size the
821e4b17023SJohn Marino cache of pseudo registers that hold intermediate results. */
822e4b17023SJohn Marino #define POWI_TABLE_SIZE 256
823e4b17023SJohn Marino
824e4b17023SJohn Marino /* The size, in bits of the window, used in the "window method"
825e4b17023SJohn Marino exponentiation algorithm. This is equivalent to a radix of
826e4b17023SJohn Marino (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
827e4b17023SJohn Marino #define POWI_WINDOW_SIZE 3
828e4b17023SJohn Marino
829e4b17023SJohn Marino /* The following table is an efficient representation of an
830e4b17023SJohn Marino "optimal power tree". For each value, i, the corresponding
831e4b17023SJohn Marino value, j, in the table states than an optimal evaluation
832e4b17023SJohn Marino sequence for calculating pow(x,i) can be found by evaluating
833e4b17023SJohn Marino pow(x,j)*pow(x,i-j). An optimal power tree for the first
834e4b17023SJohn Marino 100 integers is given in Knuth's "Seminumerical algorithms". */
835e4b17023SJohn Marino
836e4b17023SJohn Marino static const unsigned char powi_table[POWI_TABLE_SIZE] =
837e4b17023SJohn Marino {
838e4b17023SJohn Marino 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
839e4b17023SJohn Marino 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
840e4b17023SJohn Marino 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
841e4b17023SJohn Marino 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
842e4b17023SJohn Marino 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
843e4b17023SJohn Marino 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
844e4b17023SJohn Marino 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
845e4b17023SJohn Marino 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
846e4b17023SJohn Marino 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
847e4b17023SJohn Marino 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
848e4b17023SJohn Marino 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
849e4b17023SJohn Marino 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
850e4b17023SJohn Marino 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
851e4b17023SJohn Marino 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
852e4b17023SJohn Marino 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
853e4b17023SJohn Marino 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
854e4b17023SJohn Marino 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
855e4b17023SJohn Marino 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
856e4b17023SJohn Marino 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
857e4b17023SJohn Marino 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
858e4b17023SJohn Marino 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
859e4b17023SJohn Marino 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
860e4b17023SJohn Marino 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
861e4b17023SJohn Marino 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
862e4b17023SJohn Marino 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
863e4b17023SJohn Marino 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
864e4b17023SJohn Marino 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
865e4b17023SJohn Marino 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
866e4b17023SJohn Marino 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
867e4b17023SJohn Marino 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
868e4b17023SJohn Marino 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
869e4b17023SJohn Marino 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
870e4b17023SJohn Marino };
871e4b17023SJohn Marino
872e4b17023SJohn Marino
873e4b17023SJohn Marino /* Return the number of multiplications required to calculate
874e4b17023SJohn Marino powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
875e4b17023SJohn Marino subroutine of powi_cost. CACHE is an array indicating
876e4b17023SJohn Marino which exponents have already been calculated. */
877e4b17023SJohn Marino
878e4b17023SJohn Marino static int
powi_lookup_cost(unsigned HOST_WIDE_INT n,bool * cache)879e4b17023SJohn Marino powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
880e4b17023SJohn Marino {
881e4b17023SJohn Marino /* If we've already calculated this exponent, then this evaluation
882e4b17023SJohn Marino doesn't require any additional multiplications. */
883e4b17023SJohn Marino if (cache[n])
884e4b17023SJohn Marino return 0;
885e4b17023SJohn Marino
886e4b17023SJohn Marino cache[n] = true;
887e4b17023SJohn Marino return powi_lookup_cost (n - powi_table[n], cache)
888e4b17023SJohn Marino + powi_lookup_cost (powi_table[n], cache) + 1;
889e4b17023SJohn Marino }
890e4b17023SJohn Marino
891e4b17023SJohn Marino /* Return the number of multiplications required to calculate
892e4b17023SJohn Marino powi(x,n) for an arbitrary x, given the exponent N. This
893e4b17023SJohn Marino function needs to be kept in sync with powi_as_mults below. */
894e4b17023SJohn Marino
895e4b17023SJohn Marino static int
powi_cost(HOST_WIDE_INT n)896e4b17023SJohn Marino powi_cost (HOST_WIDE_INT n)
897e4b17023SJohn Marino {
898e4b17023SJohn Marino bool cache[POWI_TABLE_SIZE];
899e4b17023SJohn Marino unsigned HOST_WIDE_INT digit;
900e4b17023SJohn Marino unsigned HOST_WIDE_INT val;
901e4b17023SJohn Marino int result;
902e4b17023SJohn Marino
903e4b17023SJohn Marino if (n == 0)
904e4b17023SJohn Marino return 0;
905e4b17023SJohn Marino
906e4b17023SJohn Marino /* Ignore the reciprocal when calculating the cost. */
907e4b17023SJohn Marino val = (n < 0) ? -n : n;
908e4b17023SJohn Marino
909e4b17023SJohn Marino /* Initialize the exponent cache. */
910e4b17023SJohn Marino memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
911e4b17023SJohn Marino cache[1] = true;
912e4b17023SJohn Marino
913e4b17023SJohn Marino result = 0;
914e4b17023SJohn Marino
915e4b17023SJohn Marino while (val >= POWI_TABLE_SIZE)
916e4b17023SJohn Marino {
917e4b17023SJohn Marino if (val & 1)
918e4b17023SJohn Marino {
919e4b17023SJohn Marino digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
920e4b17023SJohn Marino result += powi_lookup_cost (digit, cache)
921e4b17023SJohn Marino + POWI_WINDOW_SIZE + 1;
922e4b17023SJohn Marino val >>= POWI_WINDOW_SIZE;
923e4b17023SJohn Marino }
924e4b17023SJohn Marino else
925e4b17023SJohn Marino {
926e4b17023SJohn Marino val >>= 1;
927e4b17023SJohn Marino result++;
928e4b17023SJohn Marino }
929e4b17023SJohn Marino }
930e4b17023SJohn Marino
931e4b17023SJohn Marino return result + powi_lookup_cost (val, cache);
932e4b17023SJohn Marino }
933e4b17023SJohn Marino
934e4b17023SJohn Marino /* Recursive subroutine of powi_as_mults. This function takes the
935e4b17023SJohn Marino array, CACHE, of already calculated exponents and an exponent N and
936e4b17023SJohn Marino returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
937e4b17023SJohn Marino
938e4b17023SJohn Marino static tree
powi_as_mults_1(gimple_stmt_iterator * gsi,location_t loc,tree type,HOST_WIDE_INT n,tree * cache,tree target)939e4b17023SJohn Marino powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
940e4b17023SJohn Marino HOST_WIDE_INT n, tree *cache, tree target)
941e4b17023SJohn Marino {
942e4b17023SJohn Marino tree op0, op1, ssa_target;
943e4b17023SJohn Marino unsigned HOST_WIDE_INT digit;
944e4b17023SJohn Marino gimple mult_stmt;
945e4b17023SJohn Marino
946e4b17023SJohn Marino if (n < POWI_TABLE_SIZE && cache[n])
947e4b17023SJohn Marino return cache[n];
948e4b17023SJohn Marino
949e4b17023SJohn Marino ssa_target = make_ssa_name (target, NULL);
950e4b17023SJohn Marino
951e4b17023SJohn Marino if (n < POWI_TABLE_SIZE)
952e4b17023SJohn Marino {
953e4b17023SJohn Marino cache[n] = ssa_target;
954e4b17023SJohn Marino op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache, target);
955e4b17023SJohn Marino op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
956e4b17023SJohn Marino }
957e4b17023SJohn Marino else if (n & 1)
958e4b17023SJohn Marino {
959e4b17023SJohn Marino digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
960e4b17023SJohn Marino op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
961e4b17023SJohn Marino op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
962e4b17023SJohn Marino }
963e4b17023SJohn Marino else
964e4b17023SJohn Marino {
965e4b17023SJohn Marino op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
966e4b17023SJohn Marino op1 = op0;
967e4b17023SJohn Marino }
968e4b17023SJohn Marino
969e4b17023SJohn Marino mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
970e4b17023SJohn Marino gimple_set_location (mult_stmt, loc);
971e4b17023SJohn Marino gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
972e4b17023SJohn Marino
973e4b17023SJohn Marino return ssa_target;
974e4b17023SJohn Marino }
975e4b17023SJohn Marino
976e4b17023SJohn Marino /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
977e4b17023SJohn Marino This function needs to be kept in sync with powi_cost above. */
978e4b17023SJohn Marino
979e4b17023SJohn Marino static tree
powi_as_mults(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)980e4b17023SJohn Marino powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
981e4b17023SJohn Marino tree arg0, HOST_WIDE_INT n)
982e4b17023SJohn Marino {
983e4b17023SJohn Marino tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
984e4b17023SJohn Marino gimple div_stmt;
985e4b17023SJohn Marino
986e4b17023SJohn Marino if (n == 0)
987e4b17023SJohn Marino return build_real (type, dconst1);
988e4b17023SJohn Marino
989e4b17023SJohn Marino memset (cache, 0, sizeof (cache));
990e4b17023SJohn Marino cache[1] = arg0;
991e4b17023SJohn Marino
992e4b17023SJohn Marino target = create_tmp_reg (type, "powmult");
993e4b17023SJohn Marino add_referenced_var (target);
994e4b17023SJohn Marino
995e4b17023SJohn Marino result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache, target);
996e4b17023SJohn Marino
997e4b17023SJohn Marino if (n >= 0)
998e4b17023SJohn Marino return result;
999e4b17023SJohn Marino
1000e4b17023SJohn Marino /* If the original exponent was negative, reciprocate the result. */
1001e4b17023SJohn Marino target = make_ssa_name (target, NULL);
1002e4b17023SJohn Marino div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
1003e4b17023SJohn Marino build_real (type, dconst1),
1004e4b17023SJohn Marino result);
1005e4b17023SJohn Marino gimple_set_location (div_stmt, loc);
1006e4b17023SJohn Marino gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1007e4b17023SJohn Marino
1008e4b17023SJohn Marino return target;
1009e4b17023SJohn Marino }
1010e4b17023SJohn Marino
1011e4b17023SJohn Marino /* ARG0 and N are the two arguments to a powi builtin in GSI with
1012e4b17023SJohn Marino location info LOC. If the arguments are appropriate, create an
1013e4b17023SJohn Marino equivalent sequence of statements prior to GSI using an optimal
1014e4b17023SJohn Marino number of multiplications, and return an expession holding the
1015e4b17023SJohn Marino result. */
1016e4b17023SJohn Marino
1017e4b17023SJohn Marino static tree
gimple_expand_builtin_powi(gimple_stmt_iterator * gsi,location_t loc,tree arg0,HOST_WIDE_INT n)1018e4b17023SJohn Marino gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1019e4b17023SJohn Marino tree arg0, HOST_WIDE_INT n)
1020e4b17023SJohn Marino {
1021e4b17023SJohn Marino /* Avoid largest negative number. */
1022e4b17023SJohn Marino if (n != -n
1023e4b17023SJohn Marino && ((n >= -1 && n <= 2)
1024e4b17023SJohn Marino || (optimize_function_for_speed_p (cfun)
1025e4b17023SJohn Marino && powi_cost (n) <= POWI_MAX_MULTS)))
1026e4b17023SJohn Marino return powi_as_mults (gsi, loc, arg0, n);
1027e4b17023SJohn Marino
1028e4b17023SJohn Marino return NULL_TREE;
1029e4b17023SJohn Marino }
1030e4b17023SJohn Marino
1031e4b17023SJohn Marino /* Build a gimple call statement that calls FN with argument ARG.
1032e4b17023SJohn Marino Set the lhs of the call statement to a fresh SSA name for
1033e4b17023SJohn Marino variable VAR. If VAR is NULL, first allocate it. Insert the
1034e4b17023SJohn Marino statement prior to GSI's current position, and return the fresh
1035e4b17023SJohn Marino SSA name. */
1036e4b17023SJohn Marino
1037e4b17023SJohn Marino static tree
build_and_insert_call(gimple_stmt_iterator * gsi,location_t loc,tree * var,tree fn,tree arg)1038e4b17023SJohn Marino build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1039e4b17023SJohn Marino tree *var, tree fn, tree arg)
1040e4b17023SJohn Marino {
1041e4b17023SJohn Marino gimple call_stmt;
1042e4b17023SJohn Marino tree ssa_target;
1043e4b17023SJohn Marino
1044e4b17023SJohn Marino if (!*var)
1045e4b17023SJohn Marino {
1046e4b17023SJohn Marino *var = create_tmp_reg (TREE_TYPE (arg), "powroot");
1047e4b17023SJohn Marino add_referenced_var (*var);
1048e4b17023SJohn Marino }
1049e4b17023SJohn Marino
1050e4b17023SJohn Marino call_stmt = gimple_build_call (fn, 1, arg);
1051e4b17023SJohn Marino ssa_target = make_ssa_name (*var, NULL);
1052e4b17023SJohn Marino gimple_set_lhs (call_stmt, ssa_target);
1053e4b17023SJohn Marino gimple_set_location (call_stmt, loc);
1054e4b17023SJohn Marino gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1055e4b17023SJohn Marino
1056e4b17023SJohn Marino return ssa_target;
1057e4b17023SJohn Marino }
1058e4b17023SJohn Marino
1059e4b17023SJohn Marino /* Build a gimple binary operation with the given CODE and arguments
1060e4b17023SJohn Marino ARG0, ARG1, assigning the result to a new SSA name for variable
1061e4b17023SJohn Marino TARGET. Insert the statement prior to GSI's current position, and
1062e4b17023SJohn Marino return the fresh SSA name.*/
1063e4b17023SJohn Marino
1064e4b17023SJohn Marino static tree
build_and_insert_binop(gimple_stmt_iterator * gsi,location_t loc,tree target,enum tree_code code,tree arg0,tree arg1)1065e4b17023SJohn Marino build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1066e4b17023SJohn Marino tree target, enum tree_code code, tree arg0, tree arg1)
1067e4b17023SJohn Marino {
1068e4b17023SJohn Marino tree result = make_ssa_name (target, NULL);
1069e4b17023SJohn Marino gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
1070e4b17023SJohn Marino gimple_set_location (stmt, loc);
1071e4b17023SJohn Marino gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1072e4b17023SJohn Marino return result;
1073e4b17023SJohn Marino }
1074e4b17023SJohn Marino
1075e4b17023SJohn Marino /* Build a gimple reference operation with the given CODE and argument
1076e4b17023SJohn Marino ARG, assigning the result to a new SSA name for variable TARGET.
1077e4b17023SJohn Marino Insert the statement prior to GSI's current position, and return
1078e4b17023SJohn Marino the fresh SSA name. */
1079e4b17023SJohn Marino
1080e4b17023SJohn Marino static inline tree
build_and_insert_ref(gimple_stmt_iterator * gsi,location_t loc,tree type,tree target,enum tree_code code,tree arg0)1081e4b17023SJohn Marino build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1082e4b17023SJohn Marino tree target, enum tree_code code, tree arg0)
1083e4b17023SJohn Marino {
1084e4b17023SJohn Marino tree result = make_ssa_name (target, NULL);
1085e4b17023SJohn Marino gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1086e4b17023SJohn Marino gimple_set_location (stmt, loc);
1087e4b17023SJohn Marino gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1088e4b17023SJohn Marino return result;
1089e4b17023SJohn Marino }
1090e4b17023SJohn Marino
1091e4b17023SJohn Marino /* Build a gimple assignment to cast VAL to TARGET. Insert the statement
1092e4b17023SJohn Marino prior to GSI's current position, and return the fresh SSA name. */
1093e4b17023SJohn Marino
1094e4b17023SJohn Marino static tree
build_and_insert_cast(gimple_stmt_iterator * gsi,location_t loc,tree target,tree val)1095e4b17023SJohn Marino build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1096e4b17023SJohn Marino tree target, tree val)
1097e4b17023SJohn Marino {
1098e4b17023SJohn Marino return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
1099e4b17023SJohn Marino }
1100e4b17023SJohn Marino
1101e4b17023SJohn Marino /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1102e4b17023SJohn Marino with location info LOC. If possible, create an equivalent and
1103e4b17023SJohn Marino less expensive sequence of statements prior to GSI, and return an
1104e4b17023SJohn Marino expession holding the result. */
1105e4b17023SJohn Marino
1106e4b17023SJohn Marino static tree
gimple_expand_builtin_pow(gimple_stmt_iterator * gsi,location_t loc,tree arg0,tree arg1)1107e4b17023SJohn Marino gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1108e4b17023SJohn Marino tree arg0, tree arg1)
1109e4b17023SJohn Marino {
1110e4b17023SJohn Marino REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
1111e4b17023SJohn Marino REAL_VALUE_TYPE c2, dconst3;
1112e4b17023SJohn Marino HOST_WIDE_INT n;
1113e4b17023SJohn Marino tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
1114e4b17023SJohn Marino tree target = NULL_TREE;
1115e4b17023SJohn Marino enum machine_mode mode;
1116*5ce9237cSJohn Marino bool hw_sqrt_exists, c_is_int, c2_is_int;
1117e4b17023SJohn Marino
1118e4b17023SJohn Marino /* If the exponent isn't a constant, there's nothing of interest
1119e4b17023SJohn Marino to be done. */
1120e4b17023SJohn Marino if (TREE_CODE (arg1) != REAL_CST)
1121e4b17023SJohn Marino return NULL_TREE;
1122e4b17023SJohn Marino
1123e4b17023SJohn Marino /* If the exponent is equivalent to an integer, expand to an optimal
1124e4b17023SJohn Marino multiplication sequence when profitable. */
1125e4b17023SJohn Marino c = TREE_REAL_CST (arg1);
1126e4b17023SJohn Marino n = real_to_integer (&c);
1127e4b17023SJohn Marino real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1128*5ce9237cSJohn Marino c_is_int = real_identical (&c, &cint);
1129e4b17023SJohn Marino
1130*5ce9237cSJohn Marino if (c_is_int
1131e4b17023SJohn Marino && ((n >= -1 && n <= 2)
1132e4b17023SJohn Marino || (flag_unsafe_math_optimizations
1133e4b17023SJohn Marino && optimize_insn_for_speed_p ()
1134e4b17023SJohn Marino && powi_cost (n) <= POWI_MAX_MULTS)))
1135e4b17023SJohn Marino return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1136e4b17023SJohn Marino
1137e4b17023SJohn Marino /* Attempt various optimizations using sqrt and cbrt. */
1138e4b17023SJohn Marino type = TREE_TYPE (arg0);
1139e4b17023SJohn Marino mode = TYPE_MODE (type);
1140e4b17023SJohn Marino sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1141e4b17023SJohn Marino
1142e4b17023SJohn Marino /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1143e4b17023SJohn Marino unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1144e4b17023SJohn Marino sqrt(-0) = -0. */
1145e4b17023SJohn Marino if (sqrtfn
1146e4b17023SJohn Marino && REAL_VALUES_EQUAL (c, dconsthalf)
1147e4b17023SJohn Marino && !HONOR_SIGNED_ZEROS (mode))
1148e4b17023SJohn Marino return build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1149e4b17023SJohn Marino
1150e4b17023SJohn Marino /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that
1151e4b17023SJohn Marino a builtin sqrt instruction is smaller than a call to pow with 0.25,
1152e4b17023SJohn Marino so do this optimization even if -Os. Don't do this optimization
1153e4b17023SJohn Marino if we don't have a hardware sqrt insn. */
1154e4b17023SJohn Marino dconst1_4 = dconst1;
1155e4b17023SJohn Marino SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1156e4b17023SJohn Marino hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1157e4b17023SJohn Marino
1158e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1159e4b17023SJohn Marino && sqrtfn
1160e4b17023SJohn Marino && REAL_VALUES_EQUAL (c, dconst1_4)
1161e4b17023SJohn Marino && hw_sqrt_exists)
1162e4b17023SJohn Marino {
1163e4b17023SJohn Marino /* sqrt(x) */
1164e4b17023SJohn Marino sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1165e4b17023SJohn Marino
1166e4b17023SJohn Marino /* sqrt(sqrt(x)) */
1167e4b17023SJohn Marino return build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
1168e4b17023SJohn Marino }
1169e4b17023SJohn Marino
1170e4b17023SJohn Marino /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
1171e4b17023SJohn Marino optimizing for space. Don't do this optimization if we don't have
1172e4b17023SJohn Marino a hardware sqrt insn. */
1173e4b17023SJohn Marino real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
1174e4b17023SJohn Marino SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
1175e4b17023SJohn Marino
1176e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1177e4b17023SJohn Marino && sqrtfn
1178e4b17023SJohn Marino && optimize_function_for_speed_p (cfun)
1179e4b17023SJohn Marino && REAL_VALUES_EQUAL (c, dconst3_4)
1180e4b17023SJohn Marino && hw_sqrt_exists)
1181e4b17023SJohn Marino {
1182e4b17023SJohn Marino /* sqrt(x) */
1183e4b17023SJohn Marino sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1184e4b17023SJohn Marino
1185e4b17023SJohn Marino /* sqrt(sqrt(x)) */
1186e4b17023SJohn Marino sqrt_sqrt = build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
1187e4b17023SJohn Marino
1188e4b17023SJohn Marino /* sqrt(x) * sqrt(sqrt(x)) */
1189e4b17023SJohn Marino return build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1190e4b17023SJohn Marino sqrt_arg0, sqrt_sqrt);
1191e4b17023SJohn Marino }
1192e4b17023SJohn Marino
1193e4b17023SJohn Marino /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1194e4b17023SJohn Marino optimizations since 1./3. is not exactly representable. If x
1195e4b17023SJohn Marino is negative and finite, the correct value of pow(x,1./3.) is
1196e4b17023SJohn Marino a NaN with the "invalid" exception raised, because the value
1197e4b17023SJohn Marino of 1./3. actually has an even denominator. The correct value
1198e4b17023SJohn Marino of cbrt(x) is a negative real value. */
1199e4b17023SJohn Marino cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1200e4b17023SJohn Marino dconst1_3 = real_value_truncate (mode, dconst_third ());
1201e4b17023SJohn Marino
1202e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1203e4b17023SJohn Marino && cbrtfn
1204e4b17023SJohn Marino && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1205e4b17023SJohn Marino && REAL_VALUES_EQUAL (c, dconst1_3))
1206e4b17023SJohn Marino return build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
1207e4b17023SJohn Marino
1208e4b17023SJohn Marino /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1209e4b17023SJohn Marino if we don't have a hardware sqrt insn. */
1210e4b17023SJohn Marino dconst1_6 = dconst1_3;
1211e4b17023SJohn Marino SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1212e4b17023SJohn Marino
1213e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1214e4b17023SJohn Marino && sqrtfn
1215e4b17023SJohn Marino && cbrtfn
1216e4b17023SJohn Marino && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1217e4b17023SJohn Marino && optimize_function_for_speed_p (cfun)
1218e4b17023SJohn Marino && hw_sqrt_exists
1219e4b17023SJohn Marino && REAL_VALUES_EQUAL (c, dconst1_6))
1220e4b17023SJohn Marino {
1221e4b17023SJohn Marino /* sqrt(x) */
1222e4b17023SJohn Marino sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1223e4b17023SJohn Marino
1224e4b17023SJohn Marino /* cbrt(sqrt(x)) */
1225e4b17023SJohn Marino return build_and_insert_call (gsi, loc, &target, cbrtfn, sqrt_arg0);
1226e4b17023SJohn Marino }
1227e4b17023SJohn Marino
1228*5ce9237cSJohn Marino /* Optimize pow(x,c), where n = 2c for some nonzero integer n
1229*5ce9237cSJohn Marino and c not an integer, into
1230e4b17023SJohn Marino
1231e4b17023SJohn Marino sqrt(x) * powi(x, n/2), n > 0;
1232e4b17023SJohn Marino 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.
1233e4b17023SJohn Marino
1234e4b17023SJohn Marino Do not calculate the powi factor when n/2 = 0. */
1235e4b17023SJohn Marino real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1236e4b17023SJohn Marino n = real_to_integer (&c2);
1237e4b17023SJohn Marino real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1238*5ce9237cSJohn Marino c2_is_int = real_identical (&c2, &cint);
1239e4b17023SJohn Marino
1240e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1241e4b17023SJohn Marino && sqrtfn
1242*5ce9237cSJohn Marino && c2_is_int
1243*5ce9237cSJohn Marino && !c_is_int
1244*5ce9237cSJohn Marino && optimize_function_for_speed_p (cfun))
1245e4b17023SJohn Marino {
1246e4b17023SJohn Marino tree powi_x_ndiv2 = NULL_TREE;
1247e4b17023SJohn Marino
1248e4b17023SJohn Marino /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not
1249e4b17023SJohn Marino possible or profitable, give up. Skip the degenerate case when
1250e4b17023SJohn Marino n is 1 or -1, where the result is always 1. */
1251e4b17023SJohn Marino if (absu_hwi (n) != 1)
1252e4b17023SJohn Marino {
1253e4b17023SJohn Marino powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
1254e4b17023SJohn Marino abs_hwi (n / 2));
1255e4b17023SJohn Marino if (!powi_x_ndiv2)
1256e4b17023SJohn Marino return NULL_TREE;
1257e4b17023SJohn Marino }
1258e4b17023SJohn Marino
1259e4b17023SJohn Marino /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the
1260e4b17023SJohn Marino result of the optimal multiply sequence just calculated. */
1261e4b17023SJohn Marino sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
1262e4b17023SJohn Marino
1263e4b17023SJohn Marino if (absu_hwi (n) == 1)
1264e4b17023SJohn Marino result = sqrt_arg0;
1265e4b17023SJohn Marino else
1266e4b17023SJohn Marino result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1267e4b17023SJohn Marino sqrt_arg0, powi_x_ndiv2);
1268e4b17023SJohn Marino
1269e4b17023SJohn Marino /* If n is negative, reciprocate the result. */
1270e4b17023SJohn Marino if (n < 0)
1271e4b17023SJohn Marino result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
1272e4b17023SJohn Marino build_real (type, dconst1), result);
1273e4b17023SJohn Marino return result;
1274e4b17023SJohn Marino }
1275e4b17023SJohn Marino
1276e4b17023SJohn Marino /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1277e4b17023SJohn Marino
1278e4b17023SJohn Marino powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1279e4b17023SJohn Marino 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1280e4b17023SJohn Marino
1281e4b17023SJohn Marino Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1282e4b17023SJohn Marino different from pow(x, 1./3.) due to rounding and behavior with
1283e4b17023SJohn Marino negative x, we need to constrain this transformation to unsafe
1284e4b17023SJohn Marino math and positive x or finite math. */
1285e4b17023SJohn Marino real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
1286e4b17023SJohn Marino real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1287e4b17023SJohn Marino real_round (&c2, mode, &c2);
1288e4b17023SJohn Marino n = real_to_integer (&c2);
1289e4b17023SJohn Marino real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1290e4b17023SJohn Marino real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1291e4b17023SJohn Marino real_convert (&c2, mode, &c2);
1292e4b17023SJohn Marino
1293e4b17023SJohn Marino if (flag_unsafe_math_optimizations
1294e4b17023SJohn Marino && cbrtfn
1295e4b17023SJohn Marino && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1296e4b17023SJohn Marino && real_identical (&c2, &c)
1297*5ce9237cSJohn Marino && !c2_is_int
1298e4b17023SJohn Marino && optimize_function_for_speed_p (cfun)
1299e4b17023SJohn Marino && powi_cost (n / 3) <= POWI_MAX_MULTS)
1300e4b17023SJohn Marino {
1301e4b17023SJohn Marino tree powi_x_ndiv3 = NULL_TREE;
1302e4b17023SJohn Marino
1303e4b17023SJohn Marino /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1304e4b17023SJohn Marino possible or profitable, give up. Skip the degenerate case when
1305e4b17023SJohn Marino abs(n) < 3, where the result is always 1. */
1306e4b17023SJohn Marino if (absu_hwi (n) >= 3)
1307e4b17023SJohn Marino {
1308e4b17023SJohn Marino powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1309e4b17023SJohn Marino abs_hwi (n / 3));
1310e4b17023SJohn Marino if (!powi_x_ndiv3)
1311e4b17023SJohn Marino return NULL_TREE;
1312e4b17023SJohn Marino }
1313e4b17023SJohn Marino
1314e4b17023SJohn Marino /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1315e4b17023SJohn Marino as that creates an unnecessary variable. Instead, just produce
1316e4b17023SJohn Marino either cbrt(x) or cbrt(x) * cbrt(x). */
1317e4b17023SJohn Marino cbrt_x = build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
1318e4b17023SJohn Marino
1319e4b17023SJohn Marino if (absu_hwi (n) % 3 == 1)
1320e4b17023SJohn Marino powi_cbrt_x = cbrt_x;
1321e4b17023SJohn Marino else
1322e4b17023SJohn Marino powi_cbrt_x = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1323e4b17023SJohn Marino cbrt_x, cbrt_x);
1324e4b17023SJohn Marino
1325e4b17023SJohn Marino /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1326e4b17023SJohn Marino if (absu_hwi (n) < 3)
1327e4b17023SJohn Marino result = powi_cbrt_x;
1328e4b17023SJohn Marino else
1329e4b17023SJohn Marino result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1330e4b17023SJohn Marino powi_x_ndiv3, powi_cbrt_x);
1331e4b17023SJohn Marino
1332e4b17023SJohn Marino /* If n is negative, reciprocate the result. */
1333e4b17023SJohn Marino if (n < 0)
1334e4b17023SJohn Marino result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
1335e4b17023SJohn Marino build_real (type, dconst1), result);
1336e4b17023SJohn Marino
1337e4b17023SJohn Marino return result;
1338e4b17023SJohn Marino }
1339e4b17023SJohn Marino
1340e4b17023SJohn Marino /* No optimizations succeeded. */
1341e4b17023SJohn Marino return NULL_TREE;
1342e4b17023SJohn Marino }
1343e4b17023SJohn Marino
1344e4b17023SJohn Marino /* ARG is the argument to a cabs builtin call in GSI with location info
1345e4b17023SJohn Marino LOC. Create a sequence of statements prior to GSI that calculates
1346e4b17023SJohn Marino sqrt(R*R + I*I), where R and I are the real and imaginary components
1347e4b17023SJohn Marino of ARG, respectively. Return an expression holding the result. */
1348e4b17023SJohn Marino
1349e4b17023SJohn Marino static tree
gimple_expand_builtin_cabs(gimple_stmt_iterator * gsi,location_t loc,tree arg)1350e4b17023SJohn Marino gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1351e4b17023SJohn Marino {
1352e4b17023SJohn Marino tree target, real_part, imag_part, addend1, addend2, sum, result;
1353e4b17023SJohn Marino tree type = TREE_TYPE (TREE_TYPE (arg));
1354e4b17023SJohn Marino tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1355e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (type);
1356e4b17023SJohn Marino
1357e4b17023SJohn Marino if (!flag_unsafe_math_optimizations
1358e4b17023SJohn Marino || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1359e4b17023SJohn Marino || !sqrtfn
1360e4b17023SJohn Marino || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1361e4b17023SJohn Marino return NULL_TREE;
1362e4b17023SJohn Marino
1363e4b17023SJohn Marino target = create_tmp_reg (type, "cabs");
1364e4b17023SJohn Marino add_referenced_var (target);
1365e4b17023SJohn Marino
1366e4b17023SJohn Marino real_part = build_and_insert_ref (gsi, loc, type, target,
1367e4b17023SJohn Marino REALPART_EXPR, arg);
1368e4b17023SJohn Marino addend1 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1369e4b17023SJohn Marino real_part, real_part);
1370e4b17023SJohn Marino imag_part = build_and_insert_ref (gsi, loc, type, target,
1371e4b17023SJohn Marino IMAGPART_EXPR, arg);
1372e4b17023SJohn Marino addend2 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
1373e4b17023SJohn Marino imag_part, imag_part);
1374e4b17023SJohn Marino sum = build_and_insert_binop (gsi, loc, target, PLUS_EXPR, addend1, addend2);
1375e4b17023SJohn Marino result = build_and_insert_call (gsi, loc, &target, sqrtfn, sum);
1376e4b17023SJohn Marino
1377e4b17023SJohn Marino return result;
1378e4b17023SJohn Marino }
1379e4b17023SJohn Marino
1380e4b17023SJohn Marino /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1381e4b17023SJohn Marino on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1382e4b17023SJohn Marino an optimal number of multiplies, when n is a constant. */
1383e4b17023SJohn Marino
1384e4b17023SJohn Marino static unsigned int
execute_cse_sincos(void)1385e4b17023SJohn Marino execute_cse_sincos (void)
1386e4b17023SJohn Marino {
1387e4b17023SJohn Marino basic_block bb;
1388e4b17023SJohn Marino bool cfg_changed = false;
1389e4b17023SJohn Marino
1390e4b17023SJohn Marino calculate_dominance_info (CDI_DOMINATORS);
1391e4b17023SJohn Marino memset (&sincos_stats, 0, sizeof (sincos_stats));
1392e4b17023SJohn Marino
1393e4b17023SJohn Marino FOR_EACH_BB (bb)
1394e4b17023SJohn Marino {
1395e4b17023SJohn Marino gimple_stmt_iterator gsi;
1396*5ce9237cSJohn Marino bool cleanup_eh = false;
1397e4b17023SJohn Marino
1398e4b17023SJohn Marino for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1399e4b17023SJohn Marino {
1400e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
1401e4b17023SJohn Marino tree fndecl;
1402e4b17023SJohn Marino
1403*5ce9237cSJohn Marino /* Only the last stmt in a bb could throw, no need to call
1404*5ce9237cSJohn Marino gimple_purge_dead_eh_edges if we change something in the middle
1405*5ce9237cSJohn Marino of a basic block. */
1406*5ce9237cSJohn Marino cleanup_eh = false;
1407*5ce9237cSJohn Marino
1408e4b17023SJohn Marino if (is_gimple_call (stmt)
1409e4b17023SJohn Marino && gimple_call_lhs (stmt)
1410e4b17023SJohn Marino && (fndecl = gimple_call_fndecl (stmt))
1411e4b17023SJohn Marino && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1412e4b17023SJohn Marino {
1413e4b17023SJohn Marino tree arg, arg0, arg1, result;
1414e4b17023SJohn Marino HOST_WIDE_INT n;
1415e4b17023SJohn Marino location_t loc;
1416e4b17023SJohn Marino
1417e4b17023SJohn Marino switch (DECL_FUNCTION_CODE (fndecl))
1418e4b17023SJohn Marino {
1419e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_COS):
1420e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_SIN):
1421e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_CEXPI):
1422e4b17023SJohn Marino /* Make sure we have either sincos or cexp. */
1423e4b17023SJohn Marino if (!TARGET_HAS_SINCOS && !TARGET_C99_FUNCTIONS)
1424e4b17023SJohn Marino break;
1425e4b17023SJohn Marino
1426e4b17023SJohn Marino arg = gimple_call_arg (stmt, 0);
1427e4b17023SJohn Marino if (TREE_CODE (arg) == SSA_NAME)
1428e4b17023SJohn Marino cfg_changed |= execute_cse_sincos_1 (arg);
1429e4b17023SJohn Marino break;
1430e4b17023SJohn Marino
1431e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_POW):
1432e4b17023SJohn Marino arg0 = gimple_call_arg (stmt, 0);
1433e4b17023SJohn Marino arg1 = gimple_call_arg (stmt, 1);
1434e4b17023SJohn Marino
1435e4b17023SJohn Marino loc = gimple_location (stmt);
1436e4b17023SJohn Marino result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1437e4b17023SJohn Marino
1438e4b17023SJohn Marino if (result)
1439e4b17023SJohn Marino {
1440e4b17023SJohn Marino tree lhs = gimple_get_lhs (stmt);
1441e4b17023SJohn Marino gimple new_stmt = gimple_build_assign (lhs, result);
1442e4b17023SJohn Marino gimple_set_location (new_stmt, loc);
1443e4b17023SJohn Marino unlink_stmt_vdef (stmt);
1444e4b17023SJohn Marino gsi_replace (&gsi, new_stmt, true);
1445*5ce9237cSJohn Marino cleanup_eh = true;
1446e4b17023SJohn Marino }
1447e4b17023SJohn Marino break;
1448e4b17023SJohn Marino
1449e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_POWI):
1450e4b17023SJohn Marino arg0 = gimple_call_arg (stmt, 0);
1451e4b17023SJohn Marino arg1 = gimple_call_arg (stmt, 1);
1452e4b17023SJohn Marino if (!host_integerp (arg1, 0))
1453e4b17023SJohn Marino break;
1454e4b17023SJohn Marino
1455e4b17023SJohn Marino n = TREE_INT_CST_LOW (arg1);
1456e4b17023SJohn Marino loc = gimple_location (stmt);
1457e4b17023SJohn Marino result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1458e4b17023SJohn Marino
1459e4b17023SJohn Marino if (result)
1460e4b17023SJohn Marino {
1461e4b17023SJohn Marino tree lhs = gimple_get_lhs (stmt);
1462e4b17023SJohn Marino gimple new_stmt = gimple_build_assign (lhs, result);
1463e4b17023SJohn Marino gimple_set_location (new_stmt, loc);
1464e4b17023SJohn Marino unlink_stmt_vdef (stmt);
1465e4b17023SJohn Marino gsi_replace (&gsi, new_stmt, true);
1466*5ce9237cSJohn Marino cleanup_eh = true;
1467e4b17023SJohn Marino }
1468e4b17023SJohn Marino break;
1469e4b17023SJohn Marino
1470e4b17023SJohn Marino CASE_FLT_FN (BUILT_IN_CABS):
1471e4b17023SJohn Marino arg0 = gimple_call_arg (stmt, 0);
1472e4b17023SJohn Marino loc = gimple_location (stmt);
1473e4b17023SJohn Marino result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1474e4b17023SJohn Marino
1475e4b17023SJohn Marino if (result)
1476e4b17023SJohn Marino {
1477e4b17023SJohn Marino tree lhs = gimple_get_lhs (stmt);
1478e4b17023SJohn Marino gimple new_stmt = gimple_build_assign (lhs, result);
1479e4b17023SJohn Marino gimple_set_location (new_stmt, loc);
1480e4b17023SJohn Marino unlink_stmt_vdef (stmt);
1481e4b17023SJohn Marino gsi_replace (&gsi, new_stmt, true);
1482*5ce9237cSJohn Marino cleanup_eh = true;
1483e4b17023SJohn Marino }
1484e4b17023SJohn Marino break;
1485e4b17023SJohn Marino
1486e4b17023SJohn Marino default:;
1487e4b17023SJohn Marino }
1488e4b17023SJohn Marino }
1489e4b17023SJohn Marino }
1490*5ce9237cSJohn Marino if (cleanup_eh)
1491*5ce9237cSJohn Marino cfg_changed |= gimple_purge_dead_eh_edges (bb);
1492e4b17023SJohn Marino }
1493e4b17023SJohn Marino
1494e4b17023SJohn Marino statistics_counter_event (cfun, "sincos statements inserted",
1495e4b17023SJohn Marino sincos_stats.inserted);
1496e4b17023SJohn Marino
1497e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
1498e4b17023SJohn Marino return cfg_changed ? TODO_cleanup_cfg : 0;
1499e4b17023SJohn Marino }
1500e4b17023SJohn Marino
1501e4b17023SJohn Marino static bool
gate_cse_sincos(void)1502e4b17023SJohn Marino gate_cse_sincos (void)
1503e4b17023SJohn Marino {
1504e4b17023SJohn Marino /* We no longer require either sincos or cexp, since powi expansion
1505e4b17023SJohn Marino piggybacks on this pass. */
1506e4b17023SJohn Marino return optimize;
1507e4b17023SJohn Marino }
1508e4b17023SJohn Marino
1509e4b17023SJohn Marino struct gimple_opt_pass pass_cse_sincos =
1510e4b17023SJohn Marino {
1511e4b17023SJohn Marino {
1512e4b17023SJohn Marino GIMPLE_PASS,
1513e4b17023SJohn Marino "sincos", /* name */
1514e4b17023SJohn Marino gate_cse_sincos, /* gate */
1515e4b17023SJohn Marino execute_cse_sincos, /* execute */
1516e4b17023SJohn Marino NULL, /* sub */
1517e4b17023SJohn Marino NULL, /* next */
1518e4b17023SJohn Marino 0, /* static_pass_number */
1519e4b17023SJohn Marino TV_NONE, /* tv_id */
1520e4b17023SJohn Marino PROP_ssa, /* properties_required */
1521e4b17023SJohn Marino 0, /* properties_provided */
1522e4b17023SJohn Marino 0, /* properties_destroyed */
1523e4b17023SJohn Marino 0, /* todo_flags_start */
1524e4b17023SJohn Marino TODO_update_ssa | TODO_verify_ssa
1525e4b17023SJohn Marino | TODO_verify_stmts /* todo_flags_finish */
1526e4b17023SJohn Marino }
1527e4b17023SJohn Marino };
1528e4b17023SJohn Marino
1529e4b17023SJohn Marino /* A symbolic number is used to detect byte permutation and selection
1530e4b17023SJohn Marino patterns. Therefore the field N contains an artificial number
1531e4b17023SJohn Marino consisting of byte size markers:
1532e4b17023SJohn Marino
1533e4b17023SJohn Marino 0 - byte has the value 0
1534e4b17023SJohn Marino 1..size - byte contains the content of the byte
1535e4b17023SJohn Marino number indexed with that value minus one */
1536e4b17023SJohn Marino
1537e4b17023SJohn Marino struct symbolic_number {
1538e4b17023SJohn Marino unsigned HOST_WIDEST_INT n;
1539e4b17023SJohn Marino int size;
1540e4b17023SJohn Marino };
1541e4b17023SJohn Marino
1542e4b17023SJohn Marino /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1543e4b17023SJohn Marino number N. Return false if the requested operation is not permitted
1544e4b17023SJohn Marino on a symbolic number. */
1545e4b17023SJohn Marino
1546e4b17023SJohn Marino static inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)1547e4b17023SJohn Marino do_shift_rotate (enum tree_code code,
1548e4b17023SJohn Marino struct symbolic_number *n,
1549e4b17023SJohn Marino int count)
1550e4b17023SJohn Marino {
1551e4b17023SJohn Marino if (count % 8 != 0)
1552e4b17023SJohn Marino return false;
1553e4b17023SJohn Marino
1554e4b17023SJohn Marino /* Zero out the extra bits of N in order to avoid them being shifted
1555e4b17023SJohn Marino into the significant bits. */
1556e4b17023SJohn Marino if (n->size < (int)sizeof (HOST_WIDEST_INT))
1557e4b17023SJohn Marino n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1558e4b17023SJohn Marino
1559e4b17023SJohn Marino switch (code)
1560e4b17023SJohn Marino {
1561e4b17023SJohn Marino case LSHIFT_EXPR:
1562e4b17023SJohn Marino n->n <<= count;
1563e4b17023SJohn Marino break;
1564e4b17023SJohn Marino case RSHIFT_EXPR:
1565e4b17023SJohn Marino n->n >>= count;
1566e4b17023SJohn Marino break;
1567e4b17023SJohn Marino case LROTATE_EXPR:
1568e4b17023SJohn Marino n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
1569e4b17023SJohn Marino break;
1570e4b17023SJohn Marino case RROTATE_EXPR:
1571e4b17023SJohn Marino n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
1572e4b17023SJohn Marino break;
1573e4b17023SJohn Marino default:
1574e4b17023SJohn Marino return false;
1575e4b17023SJohn Marino }
1576e4b17023SJohn Marino /* Zero unused bits for size. */
1577e4b17023SJohn Marino if (n->size < (int)sizeof (HOST_WIDEST_INT))
1578e4b17023SJohn Marino n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1579e4b17023SJohn Marino return true;
1580e4b17023SJohn Marino }
1581e4b17023SJohn Marino
1582e4b17023SJohn Marino /* Perform sanity checking for the symbolic number N and the gimple
1583e4b17023SJohn Marino statement STMT. */
1584e4b17023SJohn Marino
1585e4b17023SJohn Marino static inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple stmt)1586e4b17023SJohn Marino verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
1587e4b17023SJohn Marino {
1588e4b17023SJohn Marino tree lhs_type;
1589e4b17023SJohn Marino
1590e4b17023SJohn Marino lhs_type = gimple_expr_type (stmt);
1591e4b17023SJohn Marino
1592e4b17023SJohn Marino if (TREE_CODE (lhs_type) != INTEGER_TYPE)
1593e4b17023SJohn Marino return false;
1594e4b17023SJohn Marino
1595e4b17023SJohn Marino if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
1596e4b17023SJohn Marino return false;
1597e4b17023SJohn Marino
1598e4b17023SJohn Marino return true;
1599e4b17023SJohn Marino }
1600e4b17023SJohn Marino
1601e4b17023SJohn Marino /* find_bswap_1 invokes itself recursively with N and tries to perform
1602e4b17023SJohn Marino the operation given by the rhs of STMT on the result. If the
1603e4b17023SJohn Marino operation could successfully be executed the function returns the
1604e4b17023SJohn Marino tree expression of the source operand and NULL otherwise. */
1605e4b17023SJohn Marino
1606e4b17023SJohn Marino static tree
find_bswap_1(gimple stmt,struct symbolic_number * n,int limit)1607e4b17023SJohn Marino find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
1608e4b17023SJohn Marino {
1609e4b17023SJohn Marino enum tree_code code;
1610e4b17023SJohn Marino tree rhs1, rhs2 = NULL;
1611e4b17023SJohn Marino gimple rhs1_stmt, rhs2_stmt;
1612e4b17023SJohn Marino tree source_expr1;
1613e4b17023SJohn Marino enum gimple_rhs_class rhs_class;
1614e4b17023SJohn Marino
1615e4b17023SJohn Marino if (!limit || !is_gimple_assign (stmt))
1616e4b17023SJohn Marino return NULL_TREE;
1617e4b17023SJohn Marino
1618e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (stmt);
1619e4b17023SJohn Marino
1620e4b17023SJohn Marino if (TREE_CODE (rhs1) != SSA_NAME)
1621e4b17023SJohn Marino return NULL_TREE;
1622e4b17023SJohn Marino
1623e4b17023SJohn Marino code = gimple_assign_rhs_code (stmt);
1624e4b17023SJohn Marino rhs_class = gimple_assign_rhs_class (stmt);
1625e4b17023SJohn Marino rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1626e4b17023SJohn Marino
1627e4b17023SJohn Marino if (rhs_class == GIMPLE_BINARY_RHS)
1628e4b17023SJohn Marino rhs2 = gimple_assign_rhs2 (stmt);
1629e4b17023SJohn Marino
1630e4b17023SJohn Marino /* Handle unary rhs and binary rhs with integer constants as second
1631e4b17023SJohn Marino operand. */
1632e4b17023SJohn Marino
1633e4b17023SJohn Marino if (rhs_class == GIMPLE_UNARY_RHS
1634e4b17023SJohn Marino || (rhs_class == GIMPLE_BINARY_RHS
1635e4b17023SJohn Marino && TREE_CODE (rhs2) == INTEGER_CST))
1636e4b17023SJohn Marino {
1637e4b17023SJohn Marino if (code != BIT_AND_EXPR
1638e4b17023SJohn Marino && code != LSHIFT_EXPR
1639e4b17023SJohn Marino && code != RSHIFT_EXPR
1640e4b17023SJohn Marino && code != LROTATE_EXPR
1641e4b17023SJohn Marino && code != RROTATE_EXPR
1642e4b17023SJohn Marino && code != NOP_EXPR
1643e4b17023SJohn Marino && code != CONVERT_EXPR)
1644e4b17023SJohn Marino return NULL_TREE;
1645e4b17023SJohn Marino
1646e4b17023SJohn Marino source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
1647e4b17023SJohn Marino
1648e4b17023SJohn Marino /* If find_bswap_1 returned NULL STMT is a leaf node and we have
1649e4b17023SJohn Marino to initialize the symbolic number. */
1650e4b17023SJohn Marino if (!source_expr1)
1651e4b17023SJohn Marino {
1652e4b17023SJohn Marino /* Set up the symbolic number N by setting each byte to a
1653e4b17023SJohn Marino value between 1 and the byte size of rhs1. The highest
1654e4b17023SJohn Marino order byte is set to n->size and the lowest order
1655e4b17023SJohn Marino byte to 1. */
1656e4b17023SJohn Marino n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
1657e4b17023SJohn Marino if (n->size % BITS_PER_UNIT != 0)
1658e4b17023SJohn Marino return NULL_TREE;
1659e4b17023SJohn Marino n->size /= BITS_PER_UNIT;
1660e4b17023SJohn Marino n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1661e4b17023SJohn Marino (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
1662e4b17023SJohn Marino
1663e4b17023SJohn Marino if (n->size < (int)sizeof (HOST_WIDEST_INT))
1664e4b17023SJohn Marino n->n &= ((unsigned HOST_WIDEST_INT)1 <<
1665e4b17023SJohn Marino (n->size * BITS_PER_UNIT)) - 1;
1666e4b17023SJohn Marino
1667e4b17023SJohn Marino source_expr1 = rhs1;
1668e4b17023SJohn Marino }
1669e4b17023SJohn Marino
1670e4b17023SJohn Marino switch (code)
1671e4b17023SJohn Marino {
1672e4b17023SJohn Marino case BIT_AND_EXPR:
1673e4b17023SJohn Marino {
1674e4b17023SJohn Marino int i;
1675e4b17023SJohn Marino unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
1676e4b17023SJohn Marino unsigned HOST_WIDEST_INT tmp = val;
1677e4b17023SJohn Marino
1678e4b17023SJohn Marino /* Only constants masking full bytes are allowed. */
1679e4b17023SJohn Marino for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
1680e4b17023SJohn Marino if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
1681e4b17023SJohn Marino return NULL_TREE;
1682e4b17023SJohn Marino
1683e4b17023SJohn Marino n->n &= val;
1684e4b17023SJohn Marino }
1685e4b17023SJohn Marino break;
1686e4b17023SJohn Marino case LSHIFT_EXPR:
1687e4b17023SJohn Marino case RSHIFT_EXPR:
1688e4b17023SJohn Marino case LROTATE_EXPR:
1689e4b17023SJohn Marino case RROTATE_EXPR:
1690e4b17023SJohn Marino if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
1691e4b17023SJohn Marino return NULL_TREE;
1692e4b17023SJohn Marino break;
1693e4b17023SJohn Marino CASE_CONVERT:
1694e4b17023SJohn Marino {
1695e4b17023SJohn Marino int type_size;
1696e4b17023SJohn Marino
1697e4b17023SJohn Marino type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1698e4b17023SJohn Marino if (type_size % BITS_PER_UNIT != 0)
1699e4b17023SJohn Marino return NULL_TREE;
1700e4b17023SJohn Marino
1701e4b17023SJohn Marino if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1702e4b17023SJohn Marino {
1703e4b17023SJohn Marino /* If STMT casts to a smaller type mask out the bits not
1704e4b17023SJohn Marino belonging to the target type. */
1705e4b17023SJohn Marino n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1706e4b17023SJohn Marino }
1707e4b17023SJohn Marino n->size = type_size / BITS_PER_UNIT;
1708e4b17023SJohn Marino }
1709e4b17023SJohn Marino break;
1710e4b17023SJohn Marino default:
1711e4b17023SJohn Marino return NULL_TREE;
1712e4b17023SJohn Marino };
1713e4b17023SJohn Marino return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1714e4b17023SJohn Marino }
1715e4b17023SJohn Marino
1716e4b17023SJohn Marino /* Handle binary rhs. */
1717e4b17023SJohn Marino
1718e4b17023SJohn Marino if (rhs_class == GIMPLE_BINARY_RHS)
1719e4b17023SJohn Marino {
1720e4b17023SJohn Marino struct symbolic_number n1, n2;
1721e4b17023SJohn Marino tree source_expr2;
1722e4b17023SJohn Marino
1723e4b17023SJohn Marino if (code != BIT_IOR_EXPR)
1724e4b17023SJohn Marino return NULL_TREE;
1725e4b17023SJohn Marino
1726e4b17023SJohn Marino if (TREE_CODE (rhs2) != SSA_NAME)
1727e4b17023SJohn Marino return NULL_TREE;
1728e4b17023SJohn Marino
1729e4b17023SJohn Marino rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1730e4b17023SJohn Marino
1731e4b17023SJohn Marino switch (code)
1732e4b17023SJohn Marino {
1733e4b17023SJohn Marino case BIT_IOR_EXPR:
1734e4b17023SJohn Marino source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1735e4b17023SJohn Marino
1736e4b17023SJohn Marino if (!source_expr1)
1737e4b17023SJohn Marino return NULL_TREE;
1738e4b17023SJohn Marino
1739e4b17023SJohn Marino source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1740e4b17023SJohn Marino
1741e4b17023SJohn Marino if (source_expr1 != source_expr2
1742e4b17023SJohn Marino || n1.size != n2.size)
1743e4b17023SJohn Marino return NULL_TREE;
1744e4b17023SJohn Marino
1745e4b17023SJohn Marino n->size = n1.size;
1746e4b17023SJohn Marino n->n = n1.n | n2.n;
1747e4b17023SJohn Marino
1748e4b17023SJohn Marino if (!verify_symbolic_number_p (n, stmt))
1749e4b17023SJohn Marino return NULL_TREE;
1750e4b17023SJohn Marino
1751e4b17023SJohn Marino break;
1752e4b17023SJohn Marino default:
1753e4b17023SJohn Marino return NULL_TREE;
1754e4b17023SJohn Marino }
1755e4b17023SJohn Marino return source_expr1;
1756e4b17023SJohn Marino }
1757e4b17023SJohn Marino return NULL_TREE;
1758e4b17023SJohn Marino }
1759e4b17023SJohn Marino
1760e4b17023SJohn Marino /* Check if STMT completes a bswap implementation consisting of ORs,
1761e4b17023SJohn Marino SHIFTs and ANDs. Return the source tree expression on which the
1762e4b17023SJohn Marino byte swap is performed and NULL if no bswap was found. */
1763e4b17023SJohn Marino
1764e4b17023SJohn Marino static tree
find_bswap(gimple stmt)1765e4b17023SJohn Marino find_bswap (gimple stmt)
1766e4b17023SJohn Marino {
1767e4b17023SJohn Marino /* The number which the find_bswap result should match in order to
1768e4b17023SJohn Marino have a full byte swap. The number is shifted to the left according
1769e4b17023SJohn Marino to the size of the symbolic number before using it. */
1770e4b17023SJohn Marino unsigned HOST_WIDEST_INT cmp =
1771e4b17023SJohn Marino sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1772e4b17023SJohn Marino (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1773e4b17023SJohn Marino
1774e4b17023SJohn Marino struct symbolic_number n;
1775e4b17023SJohn Marino tree source_expr;
1776e4b17023SJohn Marino int limit;
1777e4b17023SJohn Marino
1778e4b17023SJohn Marino /* The last parameter determines the depth search limit. It usually
1779e4b17023SJohn Marino correlates directly to the number of bytes to be touched. We
1780e4b17023SJohn Marino increase that number by three here in order to also
1781e4b17023SJohn Marino cover signed -> unsigned converions of the src operand as can be seen
1782e4b17023SJohn Marino in libgcc, and for initial shift/and operation of the src operand. */
1783e4b17023SJohn Marino limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
1784e4b17023SJohn Marino limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
1785e4b17023SJohn Marino source_expr = find_bswap_1 (stmt, &n, limit);
1786e4b17023SJohn Marino
1787e4b17023SJohn Marino if (!source_expr)
1788e4b17023SJohn Marino return NULL_TREE;
1789e4b17023SJohn Marino
1790e4b17023SJohn Marino /* Zero out the extra bits of N and CMP. */
1791e4b17023SJohn Marino if (n.size < (int)sizeof (HOST_WIDEST_INT))
1792e4b17023SJohn Marino {
1793e4b17023SJohn Marino unsigned HOST_WIDEST_INT mask =
1794e4b17023SJohn Marino ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1795e4b17023SJohn Marino
1796e4b17023SJohn Marino n.n &= mask;
1797e4b17023SJohn Marino cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1798e4b17023SJohn Marino }
1799e4b17023SJohn Marino
1800e4b17023SJohn Marino /* A complete byte swap should make the symbolic number to start
1801e4b17023SJohn Marino with the largest digit in the highest order byte. */
1802e4b17023SJohn Marino if (cmp != n.n)
1803e4b17023SJohn Marino return NULL_TREE;
1804e4b17023SJohn Marino
1805e4b17023SJohn Marino return source_expr;
1806e4b17023SJohn Marino }
1807e4b17023SJohn Marino
1808e4b17023SJohn Marino /* Find manual byte swap implementations and turn them into a bswap
1809e4b17023SJohn Marino builtin invokation. */
1810e4b17023SJohn Marino
1811e4b17023SJohn Marino static unsigned int
execute_optimize_bswap(void)1812e4b17023SJohn Marino execute_optimize_bswap (void)
1813e4b17023SJohn Marino {
1814e4b17023SJohn Marino basic_block bb;
1815e4b17023SJohn Marino bool bswap32_p, bswap64_p;
1816e4b17023SJohn Marino bool changed = false;
1817e4b17023SJohn Marino tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1818e4b17023SJohn Marino
1819e4b17023SJohn Marino if (BITS_PER_UNIT != 8)
1820e4b17023SJohn Marino return 0;
1821e4b17023SJohn Marino
1822e4b17023SJohn Marino if (sizeof (HOST_WIDEST_INT) < 8)
1823e4b17023SJohn Marino return 0;
1824e4b17023SJohn Marino
1825e4b17023SJohn Marino bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1826e4b17023SJohn Marino && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1827e4b17023SJohn Marino bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1828e4b17023SJohn Marino && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1829e4b17023SJohn Marino || (bswap32_p && word_mode == SImode)));
1830e4b17023SJohn Marino
1831e4b17023SJohn Marino if (!bswap32_p && !bswap64_p)
1832e4b17023SJohn Marino return 0;
1833e4b17023SJohn Marino
1834e4b17023SJohn Marino /* Determine the argument type of the builtins. The code later on
1835e4b17023SJohn Marino assumes that the return and argument type are the same. */
1836e4b17023SJohn Marino if (bswap32_p)
1837e4b17023SJohn Marino {
1838e4b17023SJohn Marino tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1839e4b17023SJohn Marino bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1840e4b17023SJohn Marino }
1841e4b17023SJohn Marino
1842e4b17023SJohn Marino if (bswap64_p)
1843e4b17023SJohn Marino {
1844e4b17023SJohn Marino tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1845e4b17023SJohn Marino bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1846e4b17023SJohn Marino }
1847e4b17023SJohn Marino
1848e4b17023SJohn Marino memset (&bswap_stats, 0, sizeof (bswap_stats));
1849e4b17023SJohn Marino
1850e4b17023SJohn Marino FOR_EACH_BB (bb)
1851e4b17023SJohn Marino {
1852e4b17023SJohn Marino gimple_stmt_iterator gsi;
1853e4b17023SJohn Marino
1854e4b17023SJohn Marino /* We do a reverse scan for bswap patterns to make sure we get the
1855e4b17023SJohn Marino widest match. As bswap pattern matching doesn't handle
1856e4b17023SJohn Marino previously inserted smaller bswap replacements as sub-
1857e4b17023SJohn Marino patterns, the wider variant wouldn't be detected. */
1858e4b17023SJohn Marino for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1859e4b17023SJohn Marino {
1860e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
1861e4b17023SJohn Marino tree bswap_src, bswap_type;
1862e4b17023SJohn Marino tree bswap_tmp;
1863e4b17023SJohn Marino tree fndecl = NULL_TREE;
1864e4b17023SJohn Marino int type_size;
1865e4b17023SJohn Marino gimple call;
1866e4b17023SJohn Marino
1867e4b17023SJohn Marino if (!is_gimple_assign (stmt)
1868e4b17023SJohn Marino || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1869e4b17023SJohn Marino continue;
1870e4b17023SJohn Marino
1871e4b17023SJohn Marino type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1872e4b17023SJohn Marino
1873e4b17023SJohn Marino switch (type_size)
1874e4b17023SJohn Marino {
1875e4b17023SJohn Marino case 32:
1876e4b17023SJohn Marino if (bswap32_p)
1877e4b17023SJohn Marino {
1878e4b17023SJohn Marino fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1879e4b17023SJohn Marino bswap_type = bswap32_type;
1880e4b17023SJohn Marino }
1881e4b17023SJohn Marino break;
1882e4b17023SJohn Marino case 64:
1883e4b17023SJohn Marino if (bswap64_p)
1884e4b17023SJohn Marino {
1885e4b17023SJohn Marino fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1886e4b17023SJohn Marino bswap_type = bswap64_type;
1887e4b17023SJohn Marino }
1888e4b17023SJohn Marino break;
1889e4b17023SJohn Marino default:
1890e4b17023SJohn Marino continue;
1891e4b17023SJohn Marino }
1892e4b17023SJohn Marino
1893e4b17023SJohn Marino if (!fndecl)
1894e4b17023SJohn Marino continue;
1895e4b17023SJohn Marino
1896e4b17023SJohn Marino bswap_src = find_bswap (stmt);
1897e4b17023SJohn Marino
1898e4b17023SJohn Marino if (!bswap_src)
1899e4b17023SJohn Marino continue;
1900e4b17023SJohn Marino
1901e4b17023SJohn Marino changed = true;
1902e4b17023SJohn Marino if (type_size == 32)
1903e4b17023SJohn Marino bswap_stats.found_32bit++;
1904e4b17023SJohn Marino else
1905e4b17023SJohn Marino bswap_stats.found_64bit++;
1906e4b17023SJohn Marino
1907e4b17023SJohn Marino bswap_tmp = bswap_src;
1908e4b17023SJohn Marino
1909e4b17023SJohn Marino /* Convert the src expression if necessary. */
1910e4b17023SJohn Marino if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1911e4b17023SJohn Marino {
1912e4b17023SJohn Marino gimple convert_stmt;
1913e4b17023SJohn Marino
1914e4b17023SJohn Marino bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1915e4b17023SJohn Marino add_referenced_var (bswap_tmp);
1916e4b17023SJohn Marino bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1917e4b17023SJohn Marino
1918e4b17023SJohn Marino convert_stmt = gimple_build_assign_with_ops (
1919e4b17023SJohn Marino CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1920e4b17023SJohn Marino gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1921e4b17023SJohn Marino }
1922e4b17023SJohn Marino
1923e4b17023SJohn Marino call = gimple_build_call (fndecl, 1, bswap_tmp);
1924e4b17023SJohn Marino
1925e4b17023SJohn Marino bswap_tmp = gimple_assign_lhs (stmt);
1926e4b17023SJohn Marino
1927e4b17023SJohn Marino /* Convert the result if necessary. */
1928e4b17023SJohn Marino if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1929e4b17023SJohn Marino {
1930e4b17023SJohn Marino gimple convert_stmt;
1931e4b17023SJohn Marino
1932e4b17023SJohn Marino bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1933e4b17023SJohn Marino add_referenced_var (bswap_tmp);
1934e4b17023SJohn Marino bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1935e4b17023SJohn Marino convert_stmt = gimple_build_assign_with_ops (
1936e4b17023SJohn Marino CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1937e4b17023SJohn Marino gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1938e4b17023SJohn Marino }
1939e4b17023SJohn Marino
1940e4b17023SJohn Marino gimple_call_set_lhs (call, bswap_tmp);
1941e4b17023SJohn Marino
1942e4b17023SJohn Marino if (dump_file)
1943e4b17023SJohn Marino {
1944e4b17023SJohn Marino fprintf (dump_file, "%d bit bswap implementation found at: ",
1945e4b17023SJohn Marino (int)type_size);
1946e4b17023SJohn Marino print_gimple_stmt (dump_file, stmt, 0, 0);
1947e4b17023SJohn Marino }
1948e4b17023SJohn Marino
1949e4b17023SJohn Marino gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1950e4b17023SJohn Marino gsi_remove (&gsi, true);
1951e4b17023SJohn Marino }
1952e4b17023SJohn Marino }
1953e4b17023SJohn Marino
1954e4b17023SJohn Marino statistics_counter_event (cfun, "32-bit bswap implementations found",
1955e4b17023SJohn Marino bswap_stats.found_32bit);
1956e4b17023SJohn Marino statistics_counter_event (cfun, "64-bit bswap implementations found",
1957e4b17023SJohn Marino bswap_stats.found_64bit);
1958e4b17023SJohn Marino
1959e4b17023SJohn Marino return (changed ? TODO_update_ssa | TODO_verify_ssa
1960e4b17023SJohn Marino | TODO_verify_stmts : 0);
1961e4b17023SJohn Marino }
1962e4b17023SJohn Marino
1963e4b17023SJohn Marino static bool
gate_optimize_bswap(void)1964e4b17023SJohn Marino gate_optimize_bswap (void)
1965e4b17023SJohn Marino {
1966e4b17023SJohn Marino return flag_expensive_optimizations && optimize;
1967e4b17023SJohn Marino }
1968e4b17023SJohn Marino
1969e4b17023SJohn Marino struct gimple_opt_pass pass_optimize_bswap =
1970e4b17023SJohn Marino {
1971e4b17023SJohn Marino {
1972e4b17023SJohn Marino GIMPLE_PASS,
1973e4b17023SJohn Marino "bswap", /* name */
1974e4b17023SJohn Marino gate_optimize_bswap, /* gate */
1975e4b17023SJohn Marino execute_optimize_bswap, /* execute */
1976e4b17023SJohn Marino NULL, /* sub */
1977e4b17023SJohn Marino NULL, /* next */
1978e4b17023SJohn Marino 0, /* static_pass_number */
1979e4b17023SJohn Marino TV_NONE, /* tv_id */
1980e4b17023SJohn Marino PROP_ssa, /* properties_required */
1981e4b17023SJohn Marino 0, /* properties_provided */
1982e4b17023SJohn Marino 0, /* properties_destroyed */
1983e4b17023SJohn Marino 0, /* todo_flags_start */
1984e4b17023SJohn Marino 0 /* todo_flags_finish */
1985e4b17023SJohn Marino }
1986e4b17023SJohn Marino };
1987e4b17023SJohn Marino
1988e4b17023SJohn Marino /* Return true if RHS is a suitable operand for a widening multiplication,
1989e4b17023SJohn Marino assuming a target type of TYPE.
1990e4b17023SJohn Marino There are two cases:
1991e4b17023SJohn Marino
1992e4b17023SJohn Marino - RHS makes some value at least twice as wide. Store that value
1993e4b17023SJohn Marino in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
1994e4b17023SJohn Marino
1995e4b17023SJohn Marino - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1996e4b17023SJohn Marino but leave *TYPE_OUT untouched. */
1997e4b17023SJohn Marino
1998e4b17023SJohn Marino static bool
is_widening_mult_rhs_p(tree type,tree rhs,tree * type_out,tree * new_rhs_out)1999e4b17023SJohn Marino is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2000e4b17023SJohn Marino tree *new_rhs_out)
2001e4b17023SJohn Marino {
2002e4b17023SJohn Marino gimple stmt;
2003e4b17023SJohn Marino tree type1, rhs1;
2004e4b17023SJohn Marino enum tree_code rhs_code;
2005e4b17023SJohn Marino
2006e4b17023SJohn Marino if (TREE_CODE (rhs) == SSA_NAME)
2007e4b17023SJohn Marino {
2008e4b17023SJohn Marino stmt = SSA_NAME_DEF_STMT (rhs);
2009e4b17023SJohn Marino if (is_gimple_assign (stmt))
2010e4b17023SJohn Marino {
2011e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (stmt);
2012e4b17023SJohn Marino if (TREE_CODE (type) == INTEGER_TYPE
2013e4b17023SJohn Marino ? !CONVERT_EXPR_CODE_P (rhs_code)
2014e4b17023SJohn Marino : rhs_code != FIXED_CONVERT_EXPR)
2015e4b17023SJohn Marino rhs1 = rhs;
2016e4b17023SJohn Marino else
2017e4b17023SJohn Marino {
2018e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (stmt);
2019e4b17023SJohn Marino
2020e4b17023SJohn Marino if (TREE_CODE (rhs1) == INTEGER_CST)
2021e4b17023SJohn Marino {
2022e4b17023SJohn Marino *new_rhs_out = rhs1;
2023e4b17023SJohn Marino *type_out = NULL;
2024e4b17023SJohn Marino return true;
2025e4b17023SJohn Marino }
2026e4b17023SJohn Marino }
2027e4b17023SJohn Marino }
2028e4b17023SJohn Marino else
2029e4b17023SJohn Marino rhs1 = rhs;
2030e4b17023SJohn Marino
2031e4b17023SJohn Marino type1 = TREE_TYPE (rhs1);
2032e4b17023SJohn Marino
2033e4b17023SJohn Marino if (TREE_CODE (type1) != TREE_CODE (type)
2034e4b17023SJohn Marino || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2035e4b17023SJohn Marino return false;
2036e4b17023SJohn Marino
2037e4b17023SJohn Marino *new_rhs_out = rhs1;
2038e4b17023SJohn Marino *type_out = type1;
2039e4b17023SJohn Marino return true;
2040e4b17023SJohn Marino }
2041e4b17023SJohn Marino
2042e4b17023SJohn Marino if (TREE_CODE (rhs) == INTEGER_CST)
2043e4b17023SJohn Marino {
2044e4b17023SJohn Marino *new_rhs_out = rhs;
2045e4b17023SJohn Marino *type_out = NULL;
2046e4b17023SJohn Marino return true;
2047e4b17023SJohn Marino }
2048e4b17023SJohn Marino
2049e4b17023SJohn Marino return false;
2050e4b17023SJohn Marino }
2051e4b17023SJohn Marino
2052e4b17023SJohn Marino /* Return true if STMT performs a widening multiplication, assuming the
2053e4b17023SJohn Marino output type is TYPE. If so, store the unwidened types of the operands
2054e4b17023SJohn Marino in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2055e4b17023SJohn Marino *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2056e4b17023SJohn Marino and *TYPE2_OUT would give the operands of the multiplication. */
2057e4b17023SJohn Marino
2058e4b17023SJohn Marino static bool
is_widening_mult_p(gimple stmt,tree * type1_out,tree * rhs1_out,tree * type2_out,tree * rhs2_out)2059e4b17023SJohn Marino is_widening_mult_p (gimple stmt,
2060e4b17023SJohn Marino tree *type1_out, tree *rhs1_out,
2061e4b17023SJohn Marino tree *type2_out, tree *rhs2_out)
2062e4b17023SJohn Marino {
2063e4b17023SJohn Marino tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2064e4b17023SJohn Marino
2065e4b17023SJohn Marino if (TREE_CODE (type) != INTEGER_TYPE
2066e4b17023SJohn Marino && TREE_CODE (type) != FIXED_POINT_TYPE)
2067e4b17023SJohn Marino return false;
2068e4b17023SJohn Marino
2069e4b17023SJohn Marino if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2070e4b17023SJohn Marino rhs1_out))
2071e4b17023SJohn Marino return false;
2072e4b17023SJohn Marino
2073e4b17023SJohn Marino if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2074e4b17023SJohn Marino rhs2_out))
2075e4b17023SJohn Marino return false;
2076e4b17023SJohn Marino
2077e4b17023SJohn Marino if (*type1_out == NULL)
2078e4b17023SJohn Marino {
2079e4b17023SJohn Marino if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2080e4b17023SJohn Marino return false;
2081e4b17023SJohn Marino *type1_out = *type2_out;
2082e4b17023SJohn Marino }
2083e4b17023SJohn Marino
2084e4b17023SJohn Marino if (*type2_out == NULL)
2085e4b17023SJohn Marino {
2086e4b17023SJohn Marino if (!int_fits_type_p (*rhs2_out, *type1_out))
2087e4b17023SJohn Marino return false;
2088e4b17023SJohn Marino *type2_out = *type1_out;
2089e4b17023SJohn Marino }
2090e4b17023SJohn Marino
2091e4b17023SJohn Marino /* Ensure that the larger of the two operands comes first. */
2092e4b17023SJohn Marino if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2093e4b17023SJohn Marino {
2094e4b17023SJohn Marino tree tmp;
2095e4b17023SJohn Marino tmp = *type1_out;
2096e4b17023SJohn Marino *type1_out = *type2_out;
2097e4b17023SJohn Marino *type2_out = tmp;
2098e4b17023SJohn Marino tmp = *rhs1_out;
2099e4b17023SJohn Marino *rhs1_out = *rhs2_out;
2100e4b17023SJohn Marino *rhs2_out = tmp;
2101e4b17023SJohn Marino }
2102e4b17023SJohn Marino
2103e4b17023SJohn Marino return true;
2104e4b17023SJohn Marino }
2105e4b17023SJohn Marino
2106e4b17023SJohn Marino /* Process a single gimple statement STMT, which has a MULT_EXPR as
2107e4b17023SJohn Marino its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2108e4b17023SJohn Marino value is true iff we converted the statement. */
2109e4b17023SJohn Marino
2110e4b17023SJohn Marino static bool
convert_mult_to_widen(gimple stmt,gimple_stmt_iterator * gsi)2111e4b17023SJohn Marino convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
2112e4b17023SJohn Marino {
2113e4b17023SJohn Marino tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
2114e4b17023SJohn Marino enum insn_code handler;
2115e4b17023SJohn Marino enum machine_mode to_mode, from_mode, actual_mode;
2116e4b17023SJohn Marino optab op;
2117e4b17023SJohn Marino int actual_precision;
2118e4b17023SJohn Marino location_t loc = gimple_location (stmt);
2119e4b17023SJohn Marino bool from_unsigned1, from_unsigned2;
2120e4b17023SJohn Marino
2121e4b17023SJohn Marino lhs = gimple_assign_lhs (stmt);
2122e4b17023SJohn Marino type = TREE_TYPE (lhs);
2123e4b17023SJohn Marino if (TREE_CODE (type) != INTEGER_TYPE)
2124e4b17023SJohn Marino return false;
2125e4b17023SJohn Marino
2126e4b17023SJohn Marino if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2127e4b17023SJohn Marino return false;
2128e4b17023SJohn Marino
2129e4b17023SJohn Marino to_mode = TYPE_MODE (type);
2130e4b17023SJohn Marino from_mode = TYPE_MODE (type1);
2131e4b17023SJohn Marino from_unsigned1 = TYPE_UNSIGNED (type1);
2132e4b17023SJohn Marino from_unsigned2 = TYPE_UNSIGNED (type2);
2133e4b17023SJohn Marino
2134e4b17023SJohn Marino if (from_unsigned1 && from_unsigned2)
2135e4b17023SJohn Marino op = umul_widen_optab;
2136e4b17023SJohn Marino else if (!from_unsigned1 && !from_unsigned2)
2137e4b17023SJohn Marino op = smul_widen_optab;
2138e4b17023SJohn Marino else
2139e4b17023SJohn Marino op = usmul_widen_optab;
2140e4b17023SJohn Marino
2141e4b17023SJohn Marino handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2142e4b17023SJohn Marino 0, &actual_mode);
2143e4b17023SJohn Marino
2144e4b17023SJohn Marino if (handler == CODE_FOR_nothing)
2145e4b17023SJohn Marino {
2146e4b17023SJohn Marino if (op != smul_widen_optab)
2147e4b17023SJohn Marino {
2148e4b17023SJohn Marino /* We can use a signed multiply with unsigned types as long as
2149e4b17023SJohn Marino there is a wider mode to use, or it is the smaller of the two
2150e4b17023SJohn Marino types that is unsigned. Note that type1 >= type2, always. */
2151e4b17023SJohn Marino if ((TYPE_UNSIGNED (type1)
2152e4b17023SJohn Marino && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2153e4b17023SJohn Marino || (TYPE_UNSIGNED (type2)
2154e4b17023SJohn Marino && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2155e4b17023SJohn Marino {
2156e4b17023SJohn Marino from_mode = GET_MODE_WIDER_MODE (from_mode);
2157e4b17023SJohn Marino if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2158e4b17023SJohn Marino return false;
2159e4b17023SJohn Marino }
2160e4b17023SJohn Marino
2161e4b17023SJohn Marino op = smul_widen_optab;
2162e4b17023SJohn Marino handler = find_widening_optab_handler_and_mode (op, to_mode,
2163e4b17023SJohn Marino from_mode, 0,
2164e4b17023SJohn Marino &actual_mode);
2165e4b17023SJohn Marino
2166e4b17023SJohn Marino if (handler == CODE_FOR_nothing)
2167e4b17023SJohn Marino return false;
2168e4b17023SJohn Marino
2169e4b17023SJohn Marino from_unsigned1 = from_unsigned2 = false;
2170e4b17023SJohn Marino }
2171e4b17023SJohn Marino else
2172e4b17023SJohn Marino return false;
2173e4b17023SJohn Marino }
2174e4b17023SJohn Marino
2175e4b17023SJohn Marino /* Ensure that the inputs to the handler are in the correct precison
2176e4b17023SJohn Marino for the opcode. This will be the full mode size. */
2177e4b17023SJohn Marino actual_precision = GET_MODE_PRECISION (actual_mode);
2178e4b17023SJohn Marino if (actual_precision != TYPE_PRECISION (type1)
2179e4b17023SJohn Marino || from_unsigned1 != TYPE_UNSIGNED (type1))
2180e4b17023SJohn Marino {
2181e4b17023SJohn Marino tmp = create_tmp_var (build_nonstandard_integer_type
2182e4b17023SJohn Marino (actual_precision, from_unsigned1),
2183e4b17023SJohn Marino NULL);
2184e4b17023SJohn Marino rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
2185e4b17023SJohn Marino }
2186e4b17023SJohn Marino if (actual_precision != TYPE_PRECISION (type2)
2187e4b17023SJohn Marino || from_unsigned2 != TYPE_UNSIGNED (type2))
2188e4b17023SJohn Marino {
2189e4b17023SJohn Marino /* Reuse the same type info, if possible. */
2190e4b17023SJohn Marino if (!tmp || from_unsigned1 != from_unsigned2)
2191e4b17023SJohn Marino tmp = create_tmp_var (build_nonstandard_integer_type
2192e4b17023SJohn Marino (actual_precision, from_unsigned2),
2193e4b17023SJohn Marino NULL);
2194e4b17023SJohn Marino rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
2195e4b17023SJohn Marino }
2196e4b17023SJohn Marino
2197e4b17023SJohn Marino /* Handle constants. */
2198e4b17023SJohn Marino if (TREE_CODE (rhs1) == INTEGER_CST)
2199e4b17023SJohn Marino rhs1 = fold_convert (type1, rhs1);
2200e4b17023SJohn Marino if (TREE_CODE (rhs2) == INTEGER_CST)
2201e4b17023SJohn Marino rhs2 = fold_convert (type2, rhs2);
2202e4b17023SJohn Marino
2203e4b17023SJohn Marino gimple_assign_set_rhs1 (stmt, rhs1);
2204e4b17023SJohn Marino gimple_assign_set_rhs2 (stmt, rhs2);
2205e4b17023SJohn Marino gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2206e4b17023SJohn Marino update_stmt (stmt);
2207e4b17023SJohn Marino widen_mul_stats.widen_mults_inserted++;
2208e4b17023SJohn Marino return true;
2209e4b17023SJohn Marino }
2210e4b17023SJohn Marino
2211e4b17023SJohn Marino /* Process a single gimple statement STMT, which is found at the
2212e4b17023SJohn Marino iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2213e4b17023SJohn Marino rhs (given by CODE), and try to convert it into a
2214e4b17023SJohn Marino WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2215e4b17023SJohn Marino is true iff we converted the statement. */
2216e4b17023SJohn Marino
2217e4b17023SJohn Marino static bool
convert_plusminus_to_widen(gimple_stmt_iterator * gsi,gimple stmt,enum tree_code code)2218e4b17023SJohn Marino convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
2219e4b17023SJohn Marino enum tree_code code)
2220e4b17023SJohn Marino {
2221e4b17023SJohn Marino gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2222e4b17023SJohn Marino gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
2223e4b17023SJohn Marino tree type, type1, type2, optype, tmp = NULL;
2224e4b17023SJohn Marino tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2225e4b17023SJohn Marino enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2226e4b17023SJohn Marino optab this_optab;
2227e4b17023SJohn Marino enum tree_code wmult_code;
2228e4b17023SJohn Marino enum insn_code handler;
2229e4b17023SJohn Marino enum machine_mode to_mode, from_mode, actual_mode;
2230e4b17023SJohn Marino location_t loc = gimple_location (stmt);
2231e4b17023SJohn Marino int actual_precision;
2232e4b17023SJohn Marino bool from_unsigned1, from_unsigned2;
2233e4b17023SJohn Marino
2234e4b17023SJohn Marino lhs = gimple_assign_lhs (stmt);
2235e4b17023SJohn Marino type = TREE_TYPE (lhs);
2236e4b17023SJohn Marino if (TREE_CODE (type) != INTEGER_TYPE
2237e4b17023SJohn Marino && TREE_CODE (type) != FIXED_POINT_TYPE)
2238e4b17023SJohn Marino return false;
2239e4b17023SJohn Marino
2240e4b17023SJohn Marino if (code == MINUS_EXPR)
2241e4b17023SJohn Marino wmult_code = WIDEN_MULT_MINUS_EXPR;
2242e4b17023SJohn Marino else
2243e4b17023SJohn Marino wmult_code = WIDEN_MULT_PLUS_EXPR;
2244e4b17023SJohn Marino
2245e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (stmt);
2246e4b17023SJohn Marino rhs2 = gimple_assign_rhs2 (stmt);
2247e4b17023SJohn Marino
2248e4b17023SJohn Marino if (TREE_CODE (rhs1) == SSA_NAME)
2249e4b17023SJohn Marino {
2250e4b17023SJohn Marino rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2251e4b17023SJohn Marino if (is_gimple_assign (rhs1_stmt))
2252e4b17023SJohn Marino rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2253e4b17023SJohn Marino }
2254e4b17023SJohn Marino
2255e4b17023SJohn Marino if (TREE_CODE (rhs2) == SSA_NAME)
2256e4b17023SJohn Marino {
2257e4b17023SJohn Marino rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2258e4b17023SJohn Marino if (is_gimple_assign (rhs2_stmt))
2259e4b17023SJohn Marino rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2260e4b17023SJohn Marino }
2261e4b17023SJohn Marino
2262e4b17023SJohn Marino /* Allow for one conversion statement between the multiply
2263e4b17023SJohn Marino and addition/subtraction statement. If there are more than
2264e4b17023SJohn Marino one conversions then we assume they would invalidate this
2265e4b17023SJohn Marino transformation. If that's not the case then they should have
2266e4b17023SJohn Marino been folded before now. */
2267e4b17023SJohn Marino if (CONVERT_EXPR_CODE_P (rhs1_code))
2268e4b17023SJohn Marino {
2269e4b17023SJohn Marino conv1_stmt = rhs1_stmt;
2270e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2271e4b17023SJohn Marino if (TREE_CODE (rhs1) == SSA_NAME)
2272e4b17023SJohn Marino {
2273e4b17023SJohn Marino rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2274e4b17023SJohn Marino if (is_gimple_assign (rhs1_stmt))
2275e4b17023SJohn Marino rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2276e4b17023SJohn Marino }
2277e4b17023SJohn Marino else
2278e4b17023SJohn Marino return false;
2279e4b17023SJohn Marino }
2280e4b17023SJohn Marino if (CONVERT_EXPR_CODE_P (rhs2_code))
2281e4b17023SJohn Marino {
2282e4b17023SJohn Marino conv2_stmt = rhs2_stmt;
2283e4b17023SJohn Marino rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2284e4b17023SJohn Marino if (TREE_CODE (rhs2) == SSA_NAME)
2285e4b17023SJohn Marino {
2286e4b17023SJohn Marino rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2287e4b17023SJohn Marino if (is_gimple_assign (rhs2_stmt))
2288e4b17023SJohn Marino rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2289e4b17023SJohn Marino }
2290e4b17023SJohn Marino else
2291e4b17023SJohn Marino return false;
2292e4b17023SJohn Marino }
2293e4b17023SJohn Marino
2294e4b17023SJohn Marino /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2295e4b17023SJohn Marino is_widening_mult_p, but we still need the rhs returns.
2296e4b17023SJohn Marino
2297e4b17023SJohn Marino It might also appear that it would be sufficient to use the existing
2298e4b17023SJohn Marino operands of the widening multiply, but that would limit the choice of
2299e4b17023SJohn Marino multiply-and-accumulate instructions. */
2300e4b17023SJohn Marino if (code == PLUS_EXPR
2301e4b17023SJohn Marino && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2302e4b17023SJohn Marino {
2303e4b17023SJohn Marino if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2304e4b17023SJohn Marino &type2, &mult_rhs2))
2305e4b17023SJohn Marino return false;
2306e4b17023SJohn Marino add_rhs = rhs2;
2307e4b17023SJohn Marino conv_stmt = conv1_stmt;
2308e4b17023SJohn Marino }
2309e4b17023SJohn Marino else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2310e4b17023SJohn Marino {
2311e4b17023SJohn Marino if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2312e4b17023SJohn Marino &type2, &mult_rhs2))
2313e4b17023SJohn Marino return false;
2314e4b17023SJohn Marino add_rhs = rhs1;
2315e4b17023SJohn Marino conv_stmt = conv2_stmt;
2316e4b17023SJohn Marino }
2317e4b17023SJohn Marino else
2318e4b17023SJohn Marino return false;
2319e4b17023SJohn Marino
2320e4b17023SJohn Marino to_mode = TYPE_MODE (type);
2321e4b17023SJohn Marino from_mode = TYPE_MODE (type1);
2322e4b17023SJohn Marino from_unsigned1 = TYPE_UNSIGNED (type1);
2323e4b17023SJohn Marino from_unsigned2 = TYPE_UNSIGNED (type2);
2324e4b17023SJohn Marino optype = type1;
2325e4b17023SJohn Marino
2326e4b17023SJohn Marino /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2327e4b17023SJohn Marino if (from_unsigned1 != from_unsigned2)
2328e4b17023SJohn Marino {
2329e4b17023SJohn Marino if (!INTEGRAL_TYPE_P (type))
2330e4b17023SJohn Marino return false;
2331e4b17023SJohn Marino /* We can use a signed multiply with unsigned types as long as
2332e4b17023SJohn Marino there is a wider mode to use, or it is the smaller of the two
2333e4b17023SJohn Marino types that is unsigned. Note that type1 >= type2, always. */
2334e4b17023SJohn Marino if ((from_unsigned1
2335e4b17023SJohn Marino && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2336e4b17023SJohn Marino || (from_unsigned2
2337e4b17023SJohn Marino && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2338e4b17023SJohn Marino {
2339e4b17023SJohn Marino from_mode = GET_MODE_WIDER_MODE (from_mode);
2340e4b17023SJohn Marino if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2341e4b17023SJohn Marino return false;
2342e4b17023SJohn Marino }
2343e4b17023SJohn Marino
2344e4b17023SJohn Marino from_unsigned1 = from_unsigned2 = false;
2345e4b17023SJohn Marino optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2346e4b17023SJohn Marino false);
2347e4b17023SJohn Marino }
2348e4b17023SJohn Marino
2349e4b17023SJohn Marino /* If there was a conversion between the multiply and addition
2350e4b17023SJohn Marino then we need to make sure it fits a multiply-and-accumulate.
2351e4b17023SJohn Marino The should be a single mode change which does not change the
2352e4b17023SJohn Marino value. */
2353e4b17023SJohn Marino if (conv_stmt)
2354e4b17023SJohn Marino {
2355e4b17023SJohn Marino /* We use the original, unmodified data types for this. */
2356e4b17023SJohn Marino tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2357e4b17023SJohn Marino tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2358e4b17023SJohn Marino int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2359e4b17023SJohn Marino bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2360e4b17023SJohn Marino
2361e4b17023SJohn Marino if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2362e4b17023SJohn Marino {
2363e4b17023SJohn Marino /* Conversion is a truncate. */
2364e4b17023SJohn Marino if (TYPE_PRECISION (to_type) < data_size)
2365e4b17023SJohn Marino return false;
2366e4b17023SJohn Marino }
2367e4b17023SJohn Marino else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2368e4b17023SJohn Marino {
2369e4b17023SJohn Marino /* Conversion is an extend. Check it's the right sort. */
2370e4b17023SJohn Marino if (TYPE_UNSIGNED (from_type) != is_unsigned
2371e4b17023SJohn Marino && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2372e4b17023SJohn Marino return false;
2373e4b17023SJohn Marino }
2374e4b17023SJohn Marino /* else convert is a no-op for our purposes. */
2375e4b17023SJohn Marino }
2376e4b17023SJohn Marino
2377e4b17023SJohn Marino /* Verify that the machine can perform a widening multiply
2378e4b17023SJohn Marino accumulate in this mode/signedness combination, otherwise
2379e4b17023SJohn Marino this transformation is likely to pessimize code. */
2380e4b17023SJohn Marino this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2381e4b17023SJohn Marino handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2382e4b17023SJohn Marino from_mode, 0, &actual_mode);
2383e4b17023SJohn Marino
2384e4b17023SJohn Marino if (handler == CODE_FOR_nothing)
2385e4b17023SJohn Marino return false;
2386e4b17023SJohn Marino
2387e4b17023SJohn Marino /* Ensure that the inputs to the handler are in the correct precison
2388e4b17023SJohn Marino for the opcode. This will be the full mode size. */
2389e4b17023SJohn Marino actual_precision = GET_MODE_PRECISION (actual_mode);
2390e4b17023SJohn Marino if (actual_precision != TYPE_PRECISION (type1)
2391e4b17023SJohn Marino || from_unsigned1 != TYPE_UNSIGNED (type1))
2392e4b17023SJohn Marino {
2393e4b17023SJohn Marino tmp = create_tmp_var (build_nonstandard_integer_type
2394e4b17023SJohn Marino (actual_precision, from_unsigned1),
2395e4b17023SJohn Marino NULL);
2396e4b17023SJohn Marino mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
2397e4b17023SJohn Marino }
2398e4b17023SJohn Marino if (actual_precision != TYPE_PRECISION (type2)
2399e4b17023SJohn Marino || from_unsigned2 != TYPE_UNSIGNED (type2))
2400e4b17023SJohn Marino {
2401e4b17023SJohn Marino if (!tmp || from_unsigned1 != from_unsigned2)
2402e4b17023SJohn Marino tmp = create_tmp_var (build_nonstandard_integer_type
2403e4b17023SJohn Marino (actual_precision, from_unsigned2),
2404e4b17023SJohn Marino NULL);
2405e4b17023SJohn Marino mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
2406e4b17023SJohn Marino }
2407e4b17023SJohn Marino
2408e4b17023SJohn Marino if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2409e4b17023SJohn Marino add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL),
2410e4b17023SJohn Marino add_rhs);
2411e4b17023SJohn Marino
2412e4b17023SJohn Marino /* Handle constants. */
2413e4b17023SJohn Marino if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2414e4b17023SJohn Marino mult_rhs1 = fold_convert (type1, mult_rhs1);
2415e4b17023SJohn Marino if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2416e4b17023SJohn Marino mult_rhs2 = fold_convert (type2, mult_rhs2);
2417e4b17023SJohn Marino
2418e4b17023SJohn Marino gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
2419e4b17023SJohn Marino add_rhs);
2420e4b17023SJohn Marino update_stmt (gsi_stmt (*gsi));
2421e4b17023SJohn Marino widen_mul_stats.maccs_inserted++;
2422e4b17023SJohn Marino return true;
2423e4b17023SJohn Marino }
2424e4b17023SJohn Marino
2425e4b17023SJohn Marino /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2426e4b17023SJohn Marino with uses in additions and subtractions to form fused multiply-add
2427e4b17023SJohn Marino operations. Returns true if successful and MUL_STMT should be removed. */
2428e4b17023SJohn Marino
2429e4b17023SJohn Marino static bool
convert_mult_to_fma(gimple mul_stmt,tree op1,tree op2)2430e4b17023SJohn Marino convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
2431e4b17023SJohn Marino {
2432e4b17023SJohn Marino tree mul_result = gimple_get_lhs (mul_stmt);
2433e4b17023SJohn Marino tree type = TREE_TYPE (mul_result);
2434e4b17023SJohn Marino gimple use_stmt, neguse_stmt, fma_stmt;
2435e4b17023SJohn Marino use_operand_p use_p;
2436e4b17023SJohn Marino imm_use_iterator imm_iter;
2437e4b17023SJohn Marino
2438e4b17023SJohn Marino if (FLOAT_TYPE_P (type)
2439e4b17023SJohn Marino && flag_fp_contract_mode == FP_CONTRACT_OFF)
2440e4b17023SJohn Marino return false;
2441e4b17023SJohn Marino
2442e4b17023SJohn Marino /* We don't want to do bitfield reduction ops. */
2443e4b17023SJohn Marino if (INTEGRAL_TYPE_P (type)
2444e4b17023SJohn Marino && (TYPE_PRECISION (type)
2445e4b17023SJohn Marino != GET_MODE_PRECISION (TYPE_MODE (type))))
2446e4b17023SJohn Marino return false;
2447e4b17023SJohn Marino
2448e4b17023SJohn Marino /* If the target doesn't support it, don't generate it. We assume that
2449e4b17023SJohn Marino if fma isn't available then fms, fnma or fnms are not either. */
2450e4b17023SJohn Marino if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2451e4b17023SJohn Marino return false;
2452e4b17023SJohn Marino
2453e4b17023SJohn Marino /* If the multiplication has zero uses, it is kept around probably because
2454e4b17023SJohn Marino of -fnon-call-exceptions. Don't optimize it away in that case,
2455e4b17023SJohn Marino it is DCE job. */
2456e4b17023SJohn Marino if (has_zero_uses (mul_result))
2457e4b17023SJohn Marino return false;
2458e4b17023SJohn Marino
2459e4b17023SJohn Marino /* Make sure that the multiplication statement becomes dead after
2460e4b17023SJohn Marino the transformation, thus that all uses are transformed to FMAs.
2461e4b17023SJohn Marino This means we assume that an FMA operation has the same cost
2462e4b17023SJohn Marino as an addition. */
2463e4b17023SJohn Marino FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2464e4b17023SJohn Marino {
2465e4b17023SJohn Marino enum tree_code use_code;
2466e4b17023SJohn Marino tree result = mul_result;
2467e4b17023SJohn Marino bool negate_p = false;
2468e4b17023SJohn Marino
2469e4b17023SJohn Marino use_stmt = USE_STMT (use_p);
2470e4b17023SJohn Marino
2471e4b17023SJohn Marino if (is_gimple_debug (use_stmt))
2472e4b17023SJohn Marino continue;
2473e4b17023SJohn Marino
2474e4b17023SJohn Marino /* For now restrict this operations to single basic blocks. In theory
2475e4b17023SJohn Marino we would want to support sinking the multiplication in
2476e4b17023SJohn Marino m = a*b;
2477e4b17023SJohn Marino if ()
2478e4b17023SJohn Marino ma = m + c;
2479e4b17023SJohn Marino else
2480e4b17023SJohn Marino d = m;
2481e4b17023SJohn Marino to form a fma in the then block and sink the multiplication to the
2482e4b17023SJohn Marino else block. */
2483e4b17023SJohn Marino if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2484e4b17023SJohn Marino return false;
2485e4b17023SJohn Marino
2486e4b17023SJohn Marino if (!is_gimple_assign (use_stmt))
2487e4b17023SJohn Marino return false;
2488e4b17023SJohn Marino
2489e4b17023SJohn Marino use_code = gimple_assign_rhs_code (use_stmt);
2490e4b17023SJohn Marino
2491e4b17023SJohn Marino /* A negate on the multiplication leads to FNMA. */
2492e4b17023SJohn Marino if (use_code == NEGATE_EXPR)
2493e4b17023SJohn Marino {
2494e4b17023SJohn Marino ssa_op_iter iter;
2495e4b17023SJohn Marino use_operand_p usep;
2496e4b17023SJohn Marino
2497e4b17023SJohn Marino result = gimple_assign_lhs (use_stmt);
2498e4b17023SJohn Marino
2499e4b17023SJohn Marino /* Make sure the negate statement becomes dead with this
2500e4b17023SJohn Marino single transformation. */
2501e4b17023SJohn Marino if (!single_imm_use (gimple_assign_lhs (use_stmt),
2502e4b17023SJohn Marino &use_p, &neguse_stmt))
2503e4b17023SJohn Marino return false;
2504e4b17023SJohn Marino
2505e4b17023SJohn Marino /* Make sure the multiplication isn't also used on that stmt. */
2506e4b17023SJohn Marino FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2507e4b17023SJohn Marino if (USE_FROM_PTR (usep) == mul_result)
2508e4b17023SJohn Marino return false;
2509e4b17023SJohn Marino
2510e4b17023SJohn Marino /* Re-validate. */
2511e4b17023SJohn Marino use_stmt = neguse_stmt;
2512e4b17023SJohn Marino if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2513e4b17023SJohn Marino return false;
2514e4b17023SJohn Marino if (!is_gimple_assign (use_stmt))
2515e4b17023SJohn Marino return false;
2516e4b17023SJohn Marino
2517e4b17023SJohn Marino use_code = gimple_assign_rhs_code (use_stmt);
2518e4b17023SJohn Marino negate_p = true;
2519e4b17023SJohn Marino }
2520e4b17023SJohn Marino
2521e4b17023SJohn Marino switch (use_code)
2522e4b17023SJohn Marino {
2523e4b17023SJohn Marino case MINUS_EXPR:
2524e4b17023SJohn Marino if (gimple_assign_rhs2 (use_stmt) == result)
2525e4b17023SJohn Marino negate_p = !negate_p;
2526e4b17023SJohn Marino break;
2527e4b17023SJohn Marino case PLUS_EXPR:
2528e4b17023SJohn Marino break;
2529e4b17023SJohn Marino default:
2530e4b17023SJohn Marino /* FMA can only be formed from PLUS and MINUS. */
2531e4b17023SJohn Marino return false;
2532e4b17023SJohn Marino }
2533e4b17023SJohn Marino
2534e4b17023SJohn Marino /* We can't handle a * b + a * b. */
2535e4b17023SJohn Marino if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2536e4b17023SJohn Marino return false;
2537e4b17023SJohn Marino
2538e4b17023SJohn Marino /* While it is possible to validate whether or not the exact form
2539e4b17023SJohn Marino that we've recognized is available in the backend, the assumption
2540e4b17023SJohn Marino is that the transformation is never a loss. For instance, suppose
2541e4b17023SJohn Marino the target only has the plain FMA pattern available. Consider
2542e4b17023SJohn Marino a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2543e4b17023SJohn Marino is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
2544e4b17023SJohn Marino still have 3 operations, but in the FMA form the two NEGs are
2545e4b17023SJohn Marino independant and could be run in parallel. */
2546e4b17023SJohn Marino }
2547e4b17023SJohn Marino
2548e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2549e4b17023SJohn Marino {
2550e4b17023SJohn Marino gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2551e4b17023SJohn Marino enum tree_code use_code;
2552e4b17023SJohn Marino tree addop, mulop1 = op1, result = mul_result;
2553e4b17023SJohn Marino bool negate_p = false;
2554e4b17023SJohn Marino
2555e4b17023SJohn Marino if (is_gimple_debug (use_stmt))
2556e4b17023SJohn Marino continue;
2557e4b17023SJohn Marino
2558e4b17023SJohn Marino use_code = gimple_assign_rhs_code (use_stmt);
2559e4b17023SJohn Marino if (use_code == NEGATE_EXPR)
2560e4b17023SJohn Marino {
2561e4b17023SJohn Marino result = gimple_assign_lhs (use_stmt);
2562e4b17023SJohn Marino single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2563e4b17023SJohn Marino gsi_remove (&gsi, true);
2564e4b17023SJohn Marino release_defs (use_stmt);
2565e4b17023SJohn Marino
2566e4b17023SJohn Marino use_stmt = neguse_stmt;
2567e4b17023SJohn Marino gsi = gsi_for_stmt (use_stmt);
2568e4b17023SJohn Marino use_code = gimple_assign_rhs_code (use_stmt);
2569e4b17023SJohn Marino negate_p = true;
2570e4b17023SJohn Marino }
2571e4b17023SJohn Marino
2572e4b17023SJohn Marino if (gimple_assign_rhs1 (use_stmt) == result)
2573e4b17023SJohn Marino {
2574e4b17023SJohn Marino addop = gimple_assign_rhs2 (use_stmt);
2575e4b17023SJohn Marino /* a * b - c -> a * b + (-c) */
2576e4b17023SJohn Marino if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2577e4b17023SJohn Marino addop = force_gimple_operand_gsi (&gsi,
2578e4b17023SJohn Marino build1 (NEGATE_EXPR,
2579e4b17023SJohn Marino type, addop),
2580e4b17023SJohn Marino true, NULL_TREE, true,
2581e4b17023SJohn Marino GSI_SAME_STMT);
2582e4b17023SJohn Marino }
2583e4b17023SJohn Marino else
2584e4b17023SJohn Marino {
2585e4b17023SJohn Marino addop = gimple_assign_rhs1 (use_stmt);
2586e4b17023SJohn Marino /* a - b * c -> (-b) * c + a */
2587e4b17023SJohn Marino if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2588e4b17023SJohn Marino negate_p = !negate_p;
2589e4b17023SJohn Marino }
2590e4b17023SJohn Marino
2591e4b17023SJohn Marino if (negate_p)
2592e4b17023SJohn Marino mulop1 = force_gimple_operand_gsi (&gsi,
2593e4b17023SJohn Marino build1 (NEGATE_EXPR,
2594e4b17023SJohn Marino type, mulop1),
2595e4b17023SJohn Marino true, NULL_TREE, true,
2596e4b17023SJohn Marino GSI_SAME_STMT);
2597e4b17023SJohn Marino
2598e4b17023SJohn Marino fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
2599e4b17023SJohn Marino gimple_assign_lhs (use_stmt),
2600e4b17023SJohn Marino mulop1, op2,
2601e4b17023SJohn Marino addop);
2602e4b17023SJohn Marino gsi_replace (&gsi, fma_stmt, true);
2603e4b17023SJohn Marino widen_mul_stats.fmas_inserted++;
2604e4b17023SJohn Marino }
2605e4b17023SJohn Marino
2606e4b17023SJohn Marino return true;
2607e4b17023SJohn Marino }
2608e4b17023SJohn Marino
2609e4b17023SJohn Marino /* Find integer multiplications where the operands are extended from
2610e4b17023SJohn Marino smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
2611e4b17023SJohn Marino where appropriate. */
2612e4b17023SJohn Marino
2613e4b17023SJohn Marino static unsigned int
execute_optimize_widening_mul(void)2614e4b17023SJohn Marino execute_optimize_widening_mul (void)
2615e4b17023SJohn Marino {
2616e4b17023SJohn Marino basic_block bb;
2617e4b17023SJohn Marino bool cfg_changed = false;
2618e4b17023SJohn Marino
2619e4b17023SJohn Marino memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
2620e4b17023SJohn Marino
2621e4b17023SJohn Marino FOR_EACH_BB (bb)
2622e4b17023SJohn Marino {
2623e4b17023SJohn Marino gimple_stmt_iterator gsi;
2624e4b17023SJohn Marino
2625e4b17023SJohn Marino for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
2626e4b17023SJohn Marino {
2627e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
2628e4b17023SJohn Marino enum tree_code code;
2629e4b17023SJohn Marino
2630e4b17023SJohn Marino if (is_gimple_assign (stmt))
2631e4b17023SJohn Marino {
2632e4b17023SJohn Marino code = gimple_assign_rhs_code (stmt);
2633e4b17023SJohn Marino switch (code)
2634e4b17023SJohn Marino {
2635e4b17023SJohn Marino case MULT_EXPR:
2636e4b17023SJohn Marino if (!convert_mult_to_widen (stmt, &gsi)
2637e4b17023SJohn Marino && convert_mult_to_fma (stmt,
2638e4b17023SJohn Marino gimple_assign_rhs1 (stmt),
2639e4b17023SJohn Marino gimple_assign_rhs2 (stmt)))
2640e4b17023SJohn Marino {
2641e4b17023SJohn Marino gsi_remove (&gsi, true);
2642e4b17023SJohn Marino release_defs (stmt);
2643e4b17023SJohn Marino continue;
2644e4b17023SJohn Marino }
2645e4b17023SJohn Marino break;
2646e4b17023SJohn Marino
2647e4b17023SJohn Marino case PLUS_EXPR:
2648e4b17023SJohn Marino case MINUS_EXPR:
2649e4b17023SJohn Marino convert_plusminus_to_widen (&gsi, stmt, code);
2650e4b17023SJohn Marino break;
2651e4b17023SJohn Marino
2652e4b17023SJohn Marino default:;
2653e4b17023SJohn Marino }
2654e4b17023SJohn Marino }
2655e4b17023SJohn Marino else if (is_gimple_call (stmt)
2656e4b17023SJohn Marino && gimple_call_lhs (stmt))
2657e4b17023SJohn Marino {
2658e4b17023SJohn Marino tree fndecl = gimple_call_fndecl (stmt);
2659e4b17023SJohn Marino if (fndecl
2660e4b17023SJohn Marino && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
2661e4b17023SJohn Marino {
2662e4b17023SJohn Marino switch (DECL_FUNCTION_CODE (fndecl))
2663e4b17023SJohn Marino {
2664e4b17023SJohn Marino case BUILT_IN_POWF:
2665e4b17023SJohn Marino case BUILT_IN_POW:
2666e4b17023SJohn Marino case BUILT_IN_POWL:
2667e4b17023SJohn Marino if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
2668e4b17023SJohn Marino && REAL_VALUES_EQUAL
2669e4b17023SJohn Marino (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
2670e4b17023SJohn Marino dconst2)
2671e4b17023SJohn Marino && convert_mult_to_fma (stmt,
2672e4b17023SJohn Marino gimple_call_arg (stmt, 0),
2673e4b17023SJohn Marino gimple_call_arg (stmt, 0)))
2674e4b17023SJohn Marino {
2675e4b17023SJohn Marino unlink_stmt_vdef (stmt);
2676e4b17023SJohn Marino gsi_remove (&gsi, true);
2677e4b17023SJohn Marino release_defs (stmt);
2678e4b17023SJohn Marino if (gimple_purge_dead_eh_edges (bb))
2679e4b17023SJohn Marino cfg_changed = true;
2680e4b17023SJohn Marino continue;
2681e4b17023SJohn Marino }
2682e4b17023SJohn Marino break;
2683e4b17023SJohn Marino
2684e4b17023SJohn Marino default:;
2685e4b17023SJohn Marino }
2686e4b17023SJohn Marino }
2687e4b17023SJohn Marino }
2688e4b17023SJohn Marino gsi_next (&gsi);
2689e4b17023SJohn Marino }
2690e4b17023SJohn Marino }
2691e4b17023SJohn Marino
2692e4b17023SJohn Marino statistics_counter_event (cfun, "widening multiplications inserted",
2693e4b17023SJohn Marino widen_mul_stats.widen_mults_inserted);
2694e4b17023SJohn Marino statistics_counter_event (cfun, "widening maccs inserted",
2695e4b17023SJohn Marino widen_mul_stats.maccs_inserted);
2696e4b17023SJohn Marino statistics_counter_event (cfun, "fused multiply-adds inserted",
2697e4b17023SJohn Marino widen_mul_stats.fmas_inserted);
2698e4b17023SJohn Marino
2699e4b17023SJohn Marino return cfg_changed ? TODO_cleanup_cfg : 0;
2700e4b17023SJohn Marino }
2701e4b17023SJohn Marino
2702e4b17023SJohn Marino static bool
gate_optimize_widening_mul(void)2703e4b17023SJohn Marino gate_optimize_widening_mul (void)
2704e4b17023SJohn Marino {
2705e4b17023SJohn Marino return flag_expensive_optimizations && optimize;
2706e4b17023SJohn Marino }
2707e4b17023SJohn Marino
2708e4b17023SJohn Marino struct gimple_opt_pass pass_optimize_widening_mul =
2709e4b17023SJohn Marino {
2710e4b17023SJohn Marino {
2711e4b17023SJohn Marino GIMPLE_PASS,
2712e4b17023SJohn Marino "widening_mul", /* name */
2713e4b17023SJohn Marino gate_optimize_widening_mul, /* gate */
2714e4b17023SJohn Marino execute_optimize_widening_mul, /* execute */
2715e4b17023SJohn Marino NULL, /* sub */
2716e4b17023SJohn Marino NULL, /* next */
2717e4b17023SJohn Marino 0, /* static_pass_number */
2718e4b17023SJohn Marino TV_NONE, /* tv_id */
2719e4b17023SJohn Marino PROP_ssa, /* properties_required */
2720e4b17023SJohn Marino 0, /* properties_provided */
2721e4b17023SJohn Marino 0, /* properties_destroyed */
2722e4b17023SJohn Marino 0, /* todo_flags_start */
2723e4b17023SJohn Marino TODO_verify_ssa
2724e4b17023SJohn Marino | TODO_verify_stmts
2725e4b17023SJohn Marino | TODO_update_ssa /* todo_flags_finish */
2726e4b17023SJohn Marino }
2727e4b17023SJohn Marino };
2728