xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-math-opts.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Global, SSA-based optimizations using mathematical identities.
2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Currently, the only mini-pass in this file tries to CSE reciprocal
22    operations.  These are common in sequences such as this one:
23 
24 	modulus = sqrt(x*x + y*y + z*z);
25 	x = x / modulus;
26 	y = y / modulus;
27 	z = z / modulus;
28 
29    that can be optimized to
30 
31 	modulus = sqrt(x*x + y*y + z*z);
32         rmodulus = 1.0 / modulus;
33 	x = x * rmodulus;
34 	y = y * rmodulus;
35 	z = z * rmodulus;
36 
37    We do this for loop invariant divisors, and with this pass whenever
38    we notice that a division has the same divisor multiple times.
39 
40    Of course, like in PRE, we don't insert a division if a dominator
41    already has one.  However, this cannot be done as an extension of
42    PRE for several reasons.
43 
44    First of all, with some experiments it was found out that the
45    transformation is not always useful if there are only two divisions
46    hy the same divisor.  This is probably because modern processors
47    can pipeline the divisions; on older, in-order processors it should
48    still be effective to optimize two divisions by the same number.
49    We make this a param, and it shall be called N in the remainder of
50    this comment.
51 
52    Second, if trapping math is active, we have less freedom on where
53    to insert divisions: we can only do so in basic blocks that already
54    contain one.  (If divisions don't trap, instead, we can insert
55    divisions elsewhere, which will be in blocks that are common dominators
56    of those that have the division).
57 
58    We really don't want to compute the reciprocal unless a division will
59    be found.  To do this, we won't insert the division in a basic block
60    that has less than N divisions *post-dominating* it.
61 
62    The algorithm constructs a subset of the dominator tree, holding the
63    blocks containing the divisions and the common dominators to them,
64    and walk it twice.  The first walk is in post-order, and it annotates
65    each block with the number of divisions that post-dominate it: this
66    gives information on where divisions can be inserted profitably.
67    The second walk is in pre-order, and it inserts divisions as explained
68    above, and replaces divisions by multiplications.
69 
70    In the best case, the cost of the pass is O(n_statements).  In the
71    worst-case, the cost is due to creating the dominator tree subset,
72    with a cost of O(n_basic_blocks ^ 2); however this can only happen
73    for n_statements / n_basic_blocks statements.  So, the amortized cost
74    of creating the dominator tree subset is O(n_basic_blocks) and the
75    worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 
77    More practically, the cost will be small because there are few
78    divisions, and they tend to be in the same basic block, so insert_bb
79    is called very few times.
80 
81    If we did this using domwalk.c, an efficient implementation would have
82    to work on all the variables in a single pass, because we could not
83    work on just a subset of the dominator tree, as we do now, and the
84    cost would also be something like O(n_statements * n_basic_blocks).
85    The data structures would be more complex in order to work on all the
86    variables in a single pass.  */
87 
88 #include "config.h"
89 #include "system.h"
90 #include "coretypes.h"
91 #include "tm.h"
92 #include "flags.h"
93 #include "tree.h"
94 #include "tree-flow.h"
95 #include "real.h"
96 #include "timevar.h"
97 #include "tree-pass.h"
98 #include "alloc-pool.h"
99 #include "basic-block.h"
100 #include "target.h"
101 #include "diagnostic.h"
102 #include "rtl.h"
103 #include "expr.h"
104 #include "optabs.h"
105 
106 /* This structure represents one basic block that either computes a
107    division, or is a common dominator for basic block that compute a
108    division.  */
109 struct occurrence {
110   /* The basic block represented by this structure.  */
111   basic_block bb;
112 
113   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
114      inserted in BB.  */
115   tree recip_def;
116 
117   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
118      was inserted in BB.  */
119   gimple recip_def_stmt;
120 
121   /* Pointer to a list of "struct occurrence"s for blocks dominated
122      by BB.  */
123   struct occurrence *children;
124 
125   /* Pointer to the next "struct occurrence"s in the list of blocks
126      sharing a common dominator.  */
127   struct occurrence *next;
128 
129   /* The number of divisions that are in BB before compute_merit.  The
130      number of divisions that are in BB or post-dominate it after
131      compute_merit.  */
132   int num_divisions;
133 
134   /* True if the basic block has a division, false if it is a common
135      dominator for basic blocks that do.  If it is false and trapping
136      math is active, BB is not a candidate for inserting a reciprocal.  */
137   bool bb_has_division;
138 };
139 
140 
141 /* The instance of "struct occurrence" representing the highest
142    interesting block in the dominator tree.  */
143 static struct occurrence *occ_head;
144 
145 /* Allocation pool for getting instances of "struct occurrence".  */
146 static alloc_pool occ_pool;
147 
148 
149 
150 /* Allocate and return a new struct occurrence for basic block BB, and
151    whose children list is headed by CHILDREN.  */
152 static struct occurrence *
153 occ_new (basic_block bb, struct occurrence *children)
154 {
155   struct occurrence *occ;
156 
157   bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
158   memset (occ, 0, sizeof (struct occurrence));
159 
160   occ->bb = bb;
161   occ->children = children;
162   return occ;
163 }
164 
165 
166 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
167    list of "struct occurrence"s, one per basic block, having IDOM as
168    their common dominator.
169 
170    We try to insert NEW_OCC as deep as possible in the tree, and we also
171    insert any other block that is a common dominator for BB and one
172    block already in the tree.  */
173 
174 static void
175 insert_bb (struct occurrence *new_occ, basic_block idom,
176 	   struct occurrence **p_head)
177 {
178   struct occurrence *occ, **p_occ;
179 
180   for (p_occ = p_head; (occ = *p_occ) != NULL; )
181     {
182       basic_block bb = new_occ->bb, occ_bb = occ->bb;
183       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
184       if (dom == bb)
185 	{
186 	  /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
187 	     from its list.  */
188 	  *p_occ = occ->next;
189 	  occ->next = new_occ->children;
190 	  new_occ->children = occ;
191 
192 	  /* Try the next block (it may as well be dominated by BB).  */
193 	}
194 
195       else if (dom == occ_bb)
196 	{
197 	  /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
198 	  insert_bb (new_occ, dom, &occ->children);
199 	  return;
200 	}
201 
202       else if (dom != idom)
203 	{
204 	  gcc_assert (!dom->aux);
205 
206 	  /* There is a dominator between IDOM and BB, add it and make
207 	     two children out of NEW_OCC and OCC.  First, remove OCC from
208 	     its list.	*/
209 	  *p_occ = occ->next;
210 	  new_occ->next = occ;
211 	  occ->next = NULL;
212 
213 	  /* None of the previous blocks has DOM as a dominator: if we tail
214 	     recursed, we would reexamine them uselessly. Just switch BB with
215 	     DOM, and go on looking for blocks dominated by DOM.  */
216           new_occ = occ_new (dom, new_occ);
217 	}
218 
219       else
220 	{
221 	  /* Nothing special, go on with the next element.  */
222 	  p_occ = &occ->next;
223 	}
224     }
225 
226   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
227   new_occ->next = *p_head;
228   *p_head = new_occ;
229 }
230 
231 /* Register that we found a division in BB.  */
232 
233 static inline void
234 register_division_in (basic_block bb)
235 {
236   struct occurrence *occ;
237 
238   occ = (struct occurrence *) bb->aux;
239   if (!occ)
240     {
241       occ = occ_new (bb, NULL);
242       insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
243     }
244 
245   occ->bb_has_division = true;
246   occ->num_divisions++;
247 }
248 
249 
250 /* Compute the number of divisions that postdominate each block in OCC and
251    its children.  */
252 
253 static void
254 compute_merit (struct occurrence *occ)
255 {
256   struct occurrence *occ_child;
257   basic_block dom = occ->bb;
258 
259   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
260     {
261       basic_block bb;
262       if (occ_child->children)
263         compute_merit (occ_child);
264 
265       if (flag_exceptions)
266 	bb = single_noncomplex_succ (dom);
267       else
268 	bb = dom;
269 
270       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
271         occ->num_divisions += occ_child->num_divisions;
272     }
273 }
274 
275 
276 /* Return whether USE_STMT is a floating-point division by DEF.  */
277 static inline bool
278 is_division_by (gimple use_stmt, tree def)
279 {
280   return is_gimple_assign (use_stmt)
281 	 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
282 	 && gimple_assign_rhs2 (use_stmt) == def
283 	 /* Do not recognize x / x as valid division, as we are getting
284 	    confused later by replacing all immediate uses x in such
285 	    a stmt.  */
286 	 && gimple_assign_rhs1 (use_stmt) != def;
287 }
288 
289 /* Walk the subset of the dominator tree rooted at OCC, setting the
290    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
291    the given basic block.  The field may be left NULL, of course,
292    if it is not possible or profitable to do the optimization.
293 
294    DEF_BSI is an iterator pointing at the statement defining DEF.
295    If RECIP_DEF is set, a dominator already has a computation that can
296    be used.  */
297 
298 static void
299 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
300 		    tree def, tree recip_def, int threshold)
301 {
302   tree type;
303   gimple new_stmt;
304   gimple_stmt_iterator gsi;
305   struct occurrence *occ_child;
306 
307   if (!recip_def
308       && (occ->bb_has_division || !flag_trapping_math)
309       && occ->num_divisions >= threshold)
310     {
311       /* Make a variable with the replacement and substitute it.  */
312       type = TREE_TYPE (def);
313       recip_def = make_rename_temp (type, "reciptmp");
314       new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
315 					       build_one_cst (type), def);
316 
317       if (occ->bb_has_division)
318         {
319           /* Case 1: insert before an existing division.  */
320           gsi = gsi_after_labels (occ->bb);
321           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
322 	    gsi_next (&gsi);
323 
324           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
325         }
326       else if (def_gsi && occ->bb == def_gsi->bb)
327         {
328           /* Case 2: insert right after the definition.  Note that this will
329 	     never happen if the definition statement can throw, because in
330 	     that case the sole successor of the statement's basic block will
331 	     dominate all the uses as well.  */
332           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
333         }
334       else
335         {
336           /* Case 3: insert in a basic block not containing defs/uses.  */
337           gsi = gsi_after_labels (occ->bb);
338           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
339         }
340 
341       occ->recip_def_stmt = new_stmt;
342     }
343 
344   occ->recip_def = recip_def;
345   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
346     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
347 }
348 
349 
350 /* Replace the division at USE_P with a multiplication by the reciprocal, if
351    possible.  */
352 
353 static inline void
354 replace_reciprocal (use_operand_p use_p)
355 {
356   gimple use_stmt = USE_STMT (use_p);
357   basic_block bb = gimple_bb (use_stmt);
358   struct occurrence *occ = (struct occurrence *) bb->aux;
359 
360   if (optimize_bb_for_speed_p (bb)
361       && occ->recip_def && use_stmt != occ->recip_def_stmt)
362     {
363       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
364       SET_USE (use_p, occ->recip_def);
365       fold_stmt_inplace (use_stmt);
366       update_stmt (use_stmt);
367     }
368 }
369 
370 
371 /* Free OCC and return one more "struct occurrence" to be freed.  */
372 
373 static struct occurrence *
374 free_bb (struct occurrence *occ)
375 {
376   struct occurrence *child, *next;
377 
378   /* First get the two pointers hanging off OCC.  */
379   next = occ->next;
380   child = occ->children;
381   occ->bb->aux = NULL;
382   pool_free (occ_pool, occ);
383 
384   /* Now ensure that we don't recurse unless it is necessary.  */
385   if (!child)
386     return next;
387   else
388     {
389       while (next)
390 	next = free_bb (next);
391 
392       return child;
393     }
394 }
395 
396 
397 /* Look for floating-point divisions among DEF's uses, and try to
398    replace them by multiplications with the reciprocal.  Add
399    as many statements computing the reciprocal as needed.
400 
401    DEF must be a GIMPLE register of a floating-point type.  */
402 
403 static void
404 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
405 {
406   use_operand_p use_p;
407   imm_use_iterator use_iter;
408   struct occurrence *occ;
409   int count = 0, threshold;
410 
411   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
412 
413   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
414     {
415       gimple use_stmt = USE_STMT (use_p);
416       if (is_division_by (use_stmt, def))
417 	{
418 	  register_division_in (gimple_bb (use_stmt));
419 	  count++;
420 	}
421     }
422 
423   /* Do the expensive part only if we can hope to optimize something.  */
424   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
425   if (count >= threshold)
426     {
427       gimple use_stmt;
428       for (occ = occ_head; occ; occ = occ->next)
429 	{
430 	  compute_merit (occ);
431 	  insert_reciprocals (def_gsi, occ, def, NULL, threshold);
432 	}
433 
434       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
435 	{
436 	  if (is_division_by (use_stmt, def))
437 	    {
438 	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
439 		replace_reciprocal (use_p);
440 	    }
441 	}
442     }
443 
444   for (occ = occ_head; occ; )
445     occ = free_bb (occ);
446 
447   occ_head = NULL;
448 }
449 
450 static bool
451 gate_cse_reciprocals (void)
452 {
453   return optimize && flag_reciprocal_math;
454 }
455 
456 /* Go through all the floating-point SSA_NAMEs, and call
457    execute_cse_reciprocals_1 on each of them.  */
458 static unsigned int
459 execute_cse_reciprocals (void)
460 {
461   basic_block bb;
462   tree arg;
463 
464   occ_pool = create_alloc_pool ("dominators for recip",
465 				sizeof (struct occurrence),
466 				n_basic_blocks / 3 + 1);
467 
468   calculate_dominance_info (CDI_DOMINATORS);
469   calculate_dominance_info (CDI_POST_DOMINATORS);
470 
471 #ifdef ENABLE_CHECKING
472   FOR_EACH_BB (bb)
473     gcc_assert (!bb->aux);
474 #endif
475 
476   for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
477     if (gimple_default_def (cfun, arg)
478 	&& FLOAT_TYPE_P (TREE_TYPE (arg))
479 	&& is_gimple_reg (arg))
480       execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
481 
482   FOR_EACH_BB (bb)
483     {
484       gimple_stmt_iterator gsi;
485       gimple phi;
486       tree def;
487 
488       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
489 	{
490 	  phi = gsi_stmt (gsi);
491 	  def = PHI_RESULT (phi);
492 	  if (FLOAT_TYPE_P (TREE_TYPE (def))
493 	      && is_gimple_reg (def))
494 	    execute_cse_reciprocals_1 (NULL, def);
495 	}
496 
497       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
498         {
499 	  gimple stmt = gsi_stmt (gsi);
500 
501 	  if (gimple_has_lhs (stmt)
502 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
503 	      && FLOAT_TYPE_P (TREE_TYPE (def))
504 	      && TREE_CODE (def) == SSA_NAME)
505 	    execute_cse_reciprocals_1 (&gsi, def);
506 	}
507 
508       if (optimize_bb_for_size_p (bb))
509         continue;
510 
511       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
512       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
513         {
514 	  gimple stmt = gsi_stmt (gsi);
515 	  tree fndecl;
516 
517 	  if (is_gimple_assign (stmt)
518 	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
519 	    {
520 	      tree arg1 = gimple_assign_rhs2 (stmt);
521 	      gimple stmt1;
522 
523 	      if (TREE_CODE (arg1) != SSA_NAME)
524 		continue;
525 
526 	      stmt1 = SSA_NAME_DEF_STMT (arg1);
527 
528 	      if (is_gimple_call (stmt1)
529 		  && gimple_call_lhs (stmt1)
530 		  && (fndecl = gimple_call_fndecl (stmt1))
531 		  && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
532 		      || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
533 		{
534 		  enum built_in_function code;
535 		  bool md_code, fail;
536 		  imm_use_iterator ui;
537 		  use_operand_p use_p;
538 
539 		  code = DECL_FUNCTION_CODE (fndecl);
540 		  md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
541 
542 		  fndecl = targetm.builtin_reciprocal (code, md_code, false);
543 		  if (!fndecl)
544 		    continue;
545 
546 		  /* Check that all uses of the SSA name are divisions,
547 		     otherwise replacing the defining statement will do
548 		     the wrong thing.  */
549 		  fail = false;
550 		  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
551 		    {
552 		      gimple stmt2 = USE_STMT (use_p);
553 		      if (is_gimple_debug (stmt2))
554 			continue;
555 		      if (!is_gimple_assign (stmt2)
556 			  || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
557 			  || gimple_assign_rhs1 (stmt2) == arg1
558 			  || gimple_assign_rhs2 (stmt2) != arg1)
559 			{
560 			  fail = true;
561 			  break;
562 			}
563 		    }
564 		  if (fail)
565 		    continue;
566 
567 		  gimple_replace_lhs (stmt1, arg1);
568 		  gimple_call_set_fndecl (stmt1, fndecl);
569 		  update_stmt (stmt1);
570 
571 		  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
572 		    {
573 		      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
574 		      fold_stmt_inplace (stmt);
575 		      update_stmt (stmt);
576 		    }
577 		}
578 	    }
579 	}
580     }
581 
582   free_dominance_info (CDI_DOMINATORS);
583   free_dominance_info (CDI_POST_DOMINATORS);
584   free_alloc_pool (occ_pool);
585   return 0;
586 }
587 
588 struct gimple_opt_pass pass_cse_reciprocals =
589 {
590  {
591   GIMPLE_PASS,
592   "recip",				/* name */
593   gate_cse_reciprocals,			/* gate */
594   execute_cse_reciprocals,		/* execute */
595   NULL,					/* sub */
596   NULL,					/* next */
597   0,					/* static_pass_number */
598   TV_NONE,				/* tv_id */
599   PROP_ssa,				/* properties_required */
600   0,					/* properties_provided */
601   0,					/* properties_destroyed */
602   0,					/* todo_flags_start */
603   TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
604     | TODO_verify_stmts                /* todo_flags_finish */
605  }
606 };
607 
608 /* Records an occurrence at statement USE_STMT in the vector of trees
609    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
610    is not yet initialized.  Returns true if the occurrence was pushed on
611    the vector.  Adjusts *TOP_BB to be the basic block dominating all
612    statements in the vector.  */
613 
614 static bool
615 maybe_record_sincos (VEC(gimple, heap) **stmts,
616 		     basic_block *top_bb, gimple use_stmt)
617 {
618   basic_block use_bb = gimple_bb (use_stmt);
619   if (*top_bb
620       && (*top_bb == use_bb
621 	  || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
622     VEC_safe_push (gimple, heap, *stmts, use_stmt);
623   else if (!*top_bb
624 	   || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
625     {
626       VEC_safe_push (gimple, heap, *stmts, use_stmt);
627       *top_bb = use_bb;
628     }
629   else
630     return false;
631 
632   return true;
633 }
634 
635 /* Look for sin, cos and cexpi calls with the same argument NAME and
636    create a single call to cexpi CSEing the result in this case.
637    We first walk over all immediate uses of the argument collecting
638    statements that we can CSE in a vector and in a second pass replace
639    the statement rhs with a REALPART or IMAGPART expression on the
640    result of the cexpi call we insert before the use statement that
641    dominates all other candidates.  */
642 
643 static bool
644 execute_cse_sincos_1 (tree name)
645 {
646   gimple_stmt_iterator gsi;
647   imm_use_iterator use_iter;
648   tree fndecl, res, type;
649   gimple def_stmt, use_stmt, stmt;
650   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
651   VEC(gimple, heap) *stmts = NULL;
652   basic_block top_bb = NULL;
653   int i;
654   bool cfg_changed = false;
655 
656   type = TREE_TYPE (name);
657   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
658     {
659       if (gimple_code (use_stmt) != GIMPLE_CALL
660 	  || !gimple_call_lhs (use_stmt)
661 	  || !(fndecl = gimple_call_fndecl (use_stmt))
662 	  || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
663 	continue;
664 
665       switch (DECL_FUNCTION_CODE (fndecl))
666 	{
667 	CASE_FLT_FN (BUILT_IN_COS):
668 	  seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
669 	  break;
670 
671 	CASE_FLT_FN (BUILT_IN_SIN):
672 	  seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
673 	  break;
674 
675 	CASE_FLT_FN (BUILT_IN_CEXPI):
676 	  seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
677 	  break;
678 
679 	default:;
680 	}
681     }
682 
683   if (seen_cos + seen_sin + seen_cexpi <= 1)
684     {
685       VEC_free(gimple, heap, stmts);
686       return false;
687     }
688 
689   /* Simply insert cexpi at the beginning of top_bb but not earlier than
690      the name def statement.  */
691   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
692   if (!fndecl)
693     return false;
694   res = create_tmp_var (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
695   if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) == COMPLEX_TYPE
696       || TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) == VECTOR_TYPE)
697     DECL_GIMPLE_REG_P (res) = 1;
698   stmt = gimple_build_call (fndecl, 1, name);
699   res = make_ssa_name (res, stmt);
700   gimple_call_set_lhs (stmt, res);
701 
702   def_stmt = SSA_NAME_DEF_STMT (name);
703   if (!SSA_NAME_IS_DEFAULT_DEF (name)
704       && gimple_code (def_stmt) != GIMPLE_PHI
705       && gimple_bb (def_stmt) == top_bb)
706     {
707       gsi = gsi_for_stmt (def_stmt);
708       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
709     }
710   else
711     {
712       gsi = gsi_after_labels (top_bb);
713       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
714     }
715   update_stmt (stmt);
716 
717   /* And adjust the recorded old call sites.  */
718   for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
719     {
720       tree rhs = NULL;
721       fndecl = gimple_call_fndecl (use_stmt);
722 
723       switch (DECL_FUNCTION_CODE (fndecl))
724 	{
725 	CASE_FLT_FN (BUILT_IN_COS):
726 	  rhs = fold_build1 (REALPART_EXPR, type, res);
727 	  break;
728 
729 	CASE_FLT_FN (BUILT_IN_SIN):
730 	  rhs = fold_build1 (IMAGPART_EXPR, type, res);
731 	  break;
732 
733 	CASE_FLT_FN (BUILT_IN_CEXPI):
734 	  rhs = res;
735 	  break;
736 
737 	default:;
738 	  gcc_unreachable ();
739 	}
740 
741 	/* Replace call with a copy.  */
742 	stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
743 
744 	gsi = gsi_for_stmt (use_stmt);
745 	gsi_replace (&gsi, stmt, true);
746 	if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
747 	  cfg_changed = true;
748     }
749 
750   VEC_free(gimple, heap, stmts);
751 
752   return cfg_changed;
753 }
754 
755 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
756    on the SSA_NAME argument of each of them.  */
757 
758 static unsigned int
759 execute_cse_sincos (void)
760 {
761   basic_block bb;
762   bool cfg_changed = false;
763 
764   calculate_dominance_info (CDI_DOMINATORS);
765 
766   FOR_EACH_BB (bb)
767     {
768       gimple_stmt_iterator gsi;
769 
770       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
771         {
772 	  gimple stmt = gsi_stmt (gsi);
773 	  tree fndecl;
774 
775 	  if (is_gimple_call (stmt)
776 	      && gimple_call_lhs (stmt)
777 	      && (fndecl = gimple_call_fndecl (stmt))
778 	      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
779 	    {
780 	      tree arg;
781 
782 	      switch (DECL_FUNCTION_CODE (fndecl))
783 		{
784 		CASE_FLT_FN (BUILT_IN_COS):
785 		CASE_FLT_FN (BUILT_IN_SIN):
786 		CASE_FLT_FN (BUILT_IN_CEXPI):
787 		  arg = gimple_call_arg (stmt, 0);
788 		  if (TREE_CODE (arg) == SSA_NAME)
789 		    cfg_changed |= execute_cse_sincos_1 (arg);
790 		  break;
791 
792 		default:;
793 		}
794 	    }
795 	}
796     }
797 
798   free_dominance_info (CDI_DOMINATORS);
799   return cfg_changed ? TODO_cleanup_cfg : 0;
800 }
801 
802 static bool
803 gate_cse_sincos (void)
804 {
805   /* Make sure we have either sincos or cexp.  */
806   return (TARGET_HAS_SINCOS
807 	  || TARGET_C99_FUNCTIONS)
808 	 && optimize;
809 }
810 
811 struct gimple_opt_pass pass_cse_sincos =
812 {
813  {
814   GIMPLE_PASS,
815   "sincos",				/* name */
816   gate_cse_sincos,			/* gate */
817   execute_cse_sincos,			/* execute */
818   NULL,					/* sub */
819   NULL,					/* next */
820   0,					/* static_pass_number */
821   TV_NONE,				/* tv_id */
822   PROP_ssa,				/* properties_required */
823   0,					/* properties_provided */
824   0,					/* properties_destroyed */
825   0,					/* todo_flags_start */
826   TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
827     | TODO_verify_stmts                 /* todo_flags_finish */
828  }
829 };
830 
831 /* A symbolic number is used to detect byte permutation and selection
832    patterns.  Therefore the field N contains an artificial number
833    consisting of byte size markers:
834 
835    0    - byte has the value 0
836    1..size - byte contains the content of the byte
837    number indexed with that value minus one  */
838 
839 struct symbolic_number {
840   unsigned HOST_WIDEST_INT n;
841   int size;
842 };
843 
844 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
845    number N.  Return false if the requested operation is not permitted
846    on a symbolic number.  */
847 
848 static inline bool
849 do_shift_rotate (enum tree_code code,
850 		 struct symbolic_number *n,
851 		 int count)
852 {
853   if (count % 8 != 0)
854     return false;
855 
856   /* Zero out the extra bits of N in order to avoid them being shifted
857      into the significant bits.  */
858   if (n->size < (int)sizeof (HOST_WIDEST_INT))
859     n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
860 
861   switch (code)
862     {
863     case LSHIFT_EXPR:
864       n->n <<= count;
865       break;
866     case RSHIFT_EXPR:
867       n->n >>= count;
868       break;
869     case LROTATE_EXPR:
870       n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
871       break;
872     case RROTATE_EXPR:
873       n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
874       break;
875     default:
876       return false;
877     }
878   return true;
879 }
880 
881 /* Perform sanity checking for the symbolic number N and the gimple
882    statement STMT.  */
883 
884 static inline bool
885 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
886 {
887   tree lhs_type;
888 
889   lhs_type = gimple_expr_type (stmt);
890 
891   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
892     return false;
893 
894   if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
895     return false;
896 
897   return true;
898 }
899 
900 /* find_bswap_1 invokes itself recursively with N and tries to perform
901    the operation given by the rhs of STMT on the result.  If the
902    operation could successfully be executed the function returns the
903    tree expression of the source operand and NULL otherwise.  */
904 
905 static tree
906 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
907 {
908   enum tree_code code;
909   tree rhs1, rhs2 = NULL;
910   gimple rhs1_stmt, rhs2_stmt;
911   tree source_expr1;
912   enum gimple_rhs_class rhs_class;
913 
914   if (!limit || !is_gimple_assign (stmt))
915     return NULL_TREE;
916 
917   rhs1 = gimple_assign_rhs1 (stmt);
918 
919   if (TREE_CODE (rhs1) != SSA_NAME)
920     return NULL_TREE;
921 
922   code = gimple_assign_rhs_code (stmt);
923   rhs_class = gimple_assign_rhs_class (stmt);
924   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
925 
926   if (rhs_class == GIMPLE_BINARY_RHS)
927     rhs2 = gimple_assign_rhs2 (stmt);
928 
929   /* Handle unary rhs and binary rhs with integer constants as second
930      operand.  */
931 
932   if (rhs_class == GIMPLE_UNARY_RHS
933       || (rhs_class == GIMPLE_BINARY_RHS
934 	  && TREE_CODE (rhs2) == INTEGER_CST))
935     {
936       if (code != BIT_AND_EXPR
937 	  && code != LSHIFT_EXPR
938 	  && code != RSHIFT_EXPR
939 	  && code != LROTATE_EXPR
940 	  && code != RROTATE_EXPR
941 	  && code != NOP_EXPR
942 	  && code != CONVERT_EXPR)
943 	return NULL_TREE;
944 
945       source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
946 
947       /* If find_bswap_1 returned NULL STMT is a leaf node and we have
948 	 to initialize the symbolic number.  */
949       if (!source_expr1)
950 	{
951 	  /* Set up the symbolic number N by setting each byte to a
952 	     value between 1 and the byte size of rhs1.  The highest
953 	     order byte is set to n->size and the lowest order
954 	     byte to 1.  */
955 	  n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
956 	  if (n->size % BITS_PER_UNIT != 0)
957 	    return NULL_TREE;
958 	  n->size /= BITS_PER_UNIT;
959 	  n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
960 		  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
961 
962 	  if (n->size < (int)sizeof (HOST_WIDEST_INT))
963 	    n->n &= ((unsigned HOST_WIDEST_INT)1 <<
964 		     (n->size * BITS_PER_UNIT)) - 1;
965 
966 	  source_expr1 = rhs1;
967 	}
968 
969       switch (code)
970 	{
971 	case BIT_AND_EXPR:
972 	  {
973 	    int i;
974 	    unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
975 	    unsigned HOST_WIDEST_INT tmp = val;
976 
977 	    /* Only constants masking full bytes are allowed.  */
978 	    for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
979 	      if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
980 		return NULL_TREE;
981 
982 	    n->n &= val;
983 	  }
984 	  break;
985 	case LSHIFT_EXPR:
986 	case RSHIFT_EXPR:
987 	case LROTATE_EXPR:
988 	case RROTATE_EXPR:
989 	  if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
990 	    return NULL_TREE;
991 	  break;
992 	CASE_CONVERT:
993 	  {
994 	    int type_size;
995 
996 	    type_size = TYPE_PRECISION (gimple_expr_type (stmt));
997 	    if (type_size % BITS_PER_UNIT != 0)
998 	      return NULL_TREE;
999 
1000 	    if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1001 	      {
1002 		/* If STMT casts to a smaller type mask out the bits not
1003 		   belonging to the target type.  */
1004 		n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1005 	      }
1006 	    n->size = type_size / BITS_PER_UNIT;
1007 	  }
1008 	  break;
1009 	default:
1010 	  return NULL_TREE;
1011 	};
1012       return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1013     }
1014 
1015   /* Handle binary rhs.  */
1016 
1017   if (rhs_class == GIMPLE_BINARY_RHS)
1018     {
1019       struct symbolic_number n1, n2;
1020       tree source_expr2;
1021 
1022       if (code != BIT_IOR_EXPR)
1023 	return NULL_TREE;
1024 
1025       if (TREE_CODE (rhs2) != SSA_NAME)
1026 	return NULL_TREE;
1027 
1028       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1029 
1030       switch (code)
1031 	{
1032 	case BIT_IOR_EXPR:
1033 	  source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1034 
1035 	  if (!source_expr1)
1036 	    return NULL_TREE;
1037 
1038 	  source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1039 
1040 	  if (source_expr1 != source_expr2
1041 	      || n1.size != n2.size)
1042 	    return NULL_TREE;
1043 
1044 	  n->size = n1.size;
1045 	  n->n = n1.n | n2.n;
1046 
1047 	  if (!verify_symbolic_number_p (n, stmt))
1048 	    return NULL_TREE;
1049 
1050 	  break;
1051 	default:
1052 	  return NULL_TREE;
1053 	}
1054       return source_expr1;
1055     }
1056   return NULL_TREE;
1057 }
1058 
1059 /* Check if STMT completes a bswap implementation consisting of ORs,
1060    SHIFTs and ANDs.  Return the source tree expression on which the
1061    byte swap is performed and NULL if no bswap was found.  */
1062 
1063 static tree
1064 find_bswap (gimple stmt)
1065 {
1066 /* The number which the find_bswap result should match in order to
1067    have a full byte swap.  The number is shifted to the left according
1068    to the size of the symbolic number before using it.  */
1069   unsigned HOST_WIDEST_INT cmp =
1070     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1071     (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1072 
1073   struct symbolic_number n;
1074   tree source_expr;
1075 
1076   /* The last parameter determines the depth search limit.  It usually
1077      correlates directly to the number of bytes to be touched.  We
1078      increase that number by one here in order to also cover signed ->
1079      unsigned conversions of the src operand as can be seen in
1080      libgcc.  */
1081   source_expr =  find_bswap_1 (stmt, &n,
1082 			       TREE_INT_CST_LOW (
1083 				 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1);
1084 
1085   if (!source_expr)
1086     return NULL_TREE;
1087 
1088   /* Zero out the extra bits of N and CMP.  */
1089   if (n.size < (int)sizeof (HOST_WIDEST_INT))
1090     {
1091       unsigned HOST_WIDEST_INT mask =
1092 	((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1093 
1094       n.n &= mask;
1095       cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1096     }
1097 
1098   /* A complete byte swap should make the symbolic number to start
1099      with the largest digit in the highest order byte.  */
1100   if (cmp != n.n)
1101     return NULL_TREE;
1102 
1103   return source_expr;
1104 }
1105 
1106 /* Find manual byte swap implementations and turn them into a bswap
1107    builtin invokation.  */
1108 
1109 static unsigned int
1110 execute_optimize_bswap (void)
1111 {
1112   basic_block bb;
1113   bool bswap32_p, bswap64_p;
1114   bool changed = false;
1115   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1116 
1117   if (BITS_PER_UNIT != 8)
1118     return 0;
1119 
1120   if (sizeof (HOST_WIDEST_INT) < 8)
1121     return 0;
1122 
1123   bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
1124 	       && optab_handler (bswap_optab, SImode)->insn_code !=
1125 	       CODE_FOR_nothing);
1126   bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
1127 	       && (optab_handler (bswap_optab, DImode)->insn_code !=
1128 		   CODE_FOR_nothing
1129 		   || (bswap32_p && word_mode == SImode)));
1130 
1131   if (!bswap32_p && !bswap64_p)
1132     return 0;
1133 
1134   /* Determine the argument type of the builtins.  The code later on
1135      assumes that the return and argument type are the same.  */
1136   if (bswap32_p)
1137     {
1138       tree fndecl = built_in_decls[BUILT_IN_BSWAP32];
1139       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1140     }
1141 
1142   if (bswap64_p)
1143     {
1144       tree fndecl = built_in_decls[BUILT_IN_BSWAP64];
1145       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1146     }
1147 
1148   FOR_EACH_BB (bb)
1149     {
1150       gimple_stmt_iterator gsi;
1151 
1152       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1153         {
1154 	  gimple stmt = gsi_stmt (gsi);
1155 	  tree bswap_src, bswap_type;
1156 	  tree bswap_tmp;
1157 	  tree fndecl = NULL_TREE;
1158 	  int type_size;
1159 	  gimple call;
1160 
1161 	  if (!is_gimple_assign (stmt)
1162 	      || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1163 	    continue;
1164 
1165 	  type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1166 
1167 	  switch (type_size)
1168 	    {
1169 	    case 32:
1170 	      if (bswap32_p)
1171 		{
1172 		  fndecl = built_in_decls[BUILT_IN_BSWAP32];
1173 		  bswap_type = bswap32_type;
1174 		}
1175 	      break;
1176 	    case 64:
1177 	      if (bswap64_p)
1178 		{
1179 		  fndecl = built_in_decls[BUILT_IN_BSWAP64];
1180 		  bswap_type = bswap64_type;
1181 		}
1182 	      break;
1183 	    default:
1184 	      continue;
1185 	    }
1186 
1187 	  if (!fndecl)
1188 	    continue;
1189 
1190 	  bswap_src = find_bswap (stmt);
1191 
1192 	  if (!bswap_src)
1193 	    continue;
1194 
1195 	  changed = true;
1196 
1197 	  bswap_tmp = bswap_src;
1198 
1199 	  /* Convert the src expression if necessary.  */
1200 	  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1201 	    {
1202 	      gimple convert_stmt;
1203 
1204 	      bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1205 	      add_referenced_var (bswap_tmp);
1206 	      bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1207 
1208 	      convert_stmt = gimple_build_assign_with_ops (
1209 			       CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1210 	      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1211 	    }
1212 
1213 	  call = gimple_build_call (fndecl, 1, bswap_tmp);
1214 
1215 	  bswap_tmp = gimple_assign_lhs (stmt);
1216 
1217 	  /* Convert the result if necessary.  */
1218 	  if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1219 	    {
1220 	      gimple convert_stmt;
1221 
1222 	      bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1223 	      add_referenced_var (bswap_tmp);
1224 	      bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1225 	      convert_stmt = gimple_build_assign_with_ops (
1226 		               CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1227 	      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1228 	    }
1229 
1230 	  gimple_call_set_lhs (call, bswap_tmp);
1231 
1232 	  if (dump_file)
1233 	    {
1234 	      fprintf (dump_file, "%d bit bswap implementation found at: ",
1235 		       (int)type_size);
1236 	      print_gimple_stmt (dump_file, stmt, 0, 0);
1237 	    }
1238 
1239 	  gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1240 	  gsi_remove (&gsi, true);
1241 	}
1242     }
1243 
1244   return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1245 	  | TODO_verify_stmts : 0);
1246 }
1247 
1248 static bool
1249 gate_optimize_bswap (void)
1250 {
1251   return flag_expensive_optimizations && optimize;
1252 }
1253 
1254 struct gimple_opt_pass pass_optimize_bswap =
1255 {
1256  {
1257   GIMPLE_PASS,
1258   "bswap",				/* name */
1259   gate_optimize_bswap,                  /* gate */
1260   execute_optimize_bswap,		/* execute */
1261   NULL,					/* sub */
1262   NULL,					/* next */
1263   0,					/* static_pass_number */
1264   TV_NONE,				/* tv_id */
1265   PROP_ssa,				/* properties_required */
1266   0,					/* properties_provided */
1267   0,					/* properties_destroyed */
1268   0,					/* todo_flags_start */
1269   0                                     /* todo_flags_finish */
1270  }
1271 };
1272