xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-tail-merge.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* Tail merging for gimple.
2    Copyright (C) 2011-2013 Free Software Foundation, Inc.
3    Contributed by Tom de Vries (tom@codesourcery.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Pass overview.
22 
23 
24    MOTIVATIONAL EXAMPLE
25 
26    gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27 
28    hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29    {
30      struct FILED.1638 * fpD.2605;
31      charD.1 fileNameD.2604[1000];
32      intD.0 D.3915;
33      const charD.1 * restrict outputFileName.0D.3914;
34 
35      # BLOCK 2 freq:10000
36      # PRED: ENTRY [100.0%]  (fallthru,exec)
37      # PT = nonlocal { D.3926 } (restr)
38      outputFileName.0D.3914_3
39        = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40      # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43      sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44      # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47      D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48      if (D.3915_4 == 0)
49        goto <bb 3>;
50      else
51        goto <bb 4>;
52      # SUCC: 3 [10.0%]  (true,exec) 4 [90.0%]  (false,exec)
53 
54      # BLOCK 3 freq:1000
55      # PRED: 2 [10.0%]  (true,exec)
56      # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59      freeD.898 (ctxD.2601_5(D));
60      goto <bb 7>;
61      # SUCC: 7 [100.0%]  (fallthru,exec)
62 
63      # BLOCK 4 freq:9000
64      # PRED: 2 [90.0%]  (false,exec)
65      # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66      # PT = nonlocal escaped
67      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69      fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70      if (fpD.2605_8 == 0B)
71        goto <bb 5>;
72      else
73        goto <bb 6>;
74      # SUCC: 5 [1.9%]  (true,exec) 6 [98.1%]  (false,exec)
75 
76      # BLOCK 5 freq:173
77      # PRED: 4 [1.9%]  (true,exec)
78      # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81      freeD.898 (ctxD.2601_5(D));
82      goto <bb 7>;
83      # SUCC: 7 [100.0%]  (fallthru,exec)
84 
85      # BLOCK 6 freq:8827
86      # PRED: 4 [98.1%]  (false,exec)
87      # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90      fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91      # SUCC: 7 [100.0%]  (fallthru,exec)
92 
93      # BLOCK 7 freq:10000
94      # PRED: 3 [100.0%]  (fallthru,exec) 5 [100.0%]  (fallthru,exec)
95              6 [100.0%]  (fallthru,exec)
96      # PT = nonlocal null
97 
98      # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99      # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100                             .MEMD.3923_18(6)>
101      # VUSE <.MEMD.3923_11>
102      return ctxD.2601_1;
103      # SUCC: EXIT [100.0%]
104    }
105 
106    bb 3 and bb 5 can be merged.  The blocks have different predecessors, but the
107    same successors, and the same operations.
108 
109 
110    CONTEXT
111 
112    A technique called tail merging (or cross jumping) can fix the example
113    above.  For a block, we look for common code at the end (the tail) of the
114    predecessor blocks, and insert jumps from one block to the other.
115    The example is a special case for tail merging, in that 2 whole blocks
116    can be merged, rather than just the end parts of it.
117    We currently only focus on whole block merging, so in that sense
118    calling this pass tail merge is a bit of a misnomer.
119 
120    We distinguish 2 kinds of situations in which blocks can be merged:
121    - same operations, same predecessors.  The successor edges coming from one
122      block are redirected to come from the other block.
123    - same operations, same successors.  The predecessor edges entering one block
124      are redirected to enter the other block.  Note that this operation might
125      involve introducing phi operations.
126 
127    For efficient implementation, we would like to value numbers the blocks, and
128    have a comparison operator that tells us whether the blocks are equal.
129    Besides being runtime efficient, block value numbering should also abstract
130    from irrelevant differences in order of operations, much like normal value
131    numbering abstracts from irrelevant order of operations.
132 
133    For the first situation (same_operations, same predecessors), normal value
134    numbering fits well.  We can calculate a block value number based on the
135    value numbers of the defs and vdefs.
136 
137    For the second situation (same operations, same successors), this approach
138    doesn't work so well.  We can illustrate this using the example.  The calls
139    to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140    remain different in value numbering, since they represent different memory
141    states.  So the resulting vdefs of the frees will be different in value
142    numbering, so the block value numbers will be different.
143 
144    The reason why we call the blocks equal is not because they define the same
145    values, but because uses in the blocks use (possibly different) defs in the
146    same way.  To be able to detect this efficiently, we need to do some kind of
147    reverse value numbering, meaning number the uses rather than the defs, and
148    calculate a block value number based on the value number of the uses.
149    Ideally, a block comparison operator will also indicate which phis are needed
150    to merge the blocks.
151 
152    For the moment, we don't do block value numbering, but we do insn-by-insn
153    matching, using scc value numbers to match operations with results, and
154    structural comparison otherwise, while ignoring vop mismatches.
155 
156 
157    IMPLEMENTATION
158 
159    1. The pass first determines all groups of blocks with the same successor
160       blocks.
161    2. Within each group, it tries to determine clusters of equal basic blocks.
162    3. The clusters are applied.
163    4. The same successor groups are updated.
164    5. This process is repeated from 2 onwards, until no more changes.
165 
166 
167    LIMITATIONS/TODO
168 
169    - block only
170    - handles only 'same operations, same successors'.
171      It handles same predecessors as a special subcase though.
172    - does not implement the reverse value numbering and block value numbering.
173    - improve memory allocation: use garbage collected memory, obstacks,
174      allocpools where appropriate.
175    - no insertion of gimple_reg phis,  We only introduce vop-phis.
176    - handle blocks with gimple_reg phi_nodes.
177 
178 
179    SWITCHES
180 
181    - ftree-tail-merge.  On at -O2.  We may have to enable it only at -Os.  */
182 
183 #include "config.h"
184 #include "system.h"
185 #include "coretypes.h"
186 #include "tm.h"
187 #include "tree.h"
188 #include "tm_p.h"
189 #include "basic-block.h"
190 #include "flags.h"
191 #include "function.h"
192 #include "tree-flow.h"
193 #include "bitmap.h"
194 #include "tree-ssa-alias.h"
195 #include "params.h"
196 #include "hash-table.h"
197 #include "gimple-pretty-print.h"
198 #include "tree-ssa-sccvn.h"
199 #include "tree-dump.h"
200 
201 /* ??? This currently runs as part of tree-ssa-pre.  Why is this not
202    a stand-alone GIMPLE pass?  */
203 #include "tree-pass.h"
204 
205 /* Describes a group of bbs with the same successors.  The successor bbs are
206    cached in succs, and the successor edge flags are cached in succ_flags.
207    If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
208    it's marked in inverse.
209    Additionally, the hash value for the struct is cached in hashval, and
210    in_worklist indicates whether it's currently part of worklist.  */
211 
212 struct same_succ_def
213 {
214   /* The bbs that have the same successor bbs.  */
215   bitmap bbs;
216   /* The successor bbs.  */
217   bitmap succs;
218   /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
219      bb.  */
220   bitmap inverse;
221   /* The edge flags for each of the successor bbs.  */
222   vec<int> succ_flags;
223   /* Indicates whether the struct is currently in the worklist.  */
224   bool in_worklist;
225   /* The hash value of the struct.  */
226   hashval_t hashval;
227 
228   /* hash_table support.  */
229   typedef same_succ_def value_type;
230   typedef same_succ_def compare_type;
231   static inline hashval_t hash (const value_type *);
232   static int equal (const value_type *, const compare_type *);
233   static void remove (value_type *);
234 };
235 typedef struct same_succ_def *same_succ;
236 typedef const struct same_succ_def *const_same_succ;
237 
238 /* hash routine for hash_table support, returns hashval of E.  */
239 
240 inline hashval_t
241 same_succ_def::hash (const value_type *e)
242 {
243   return e->hashval;
244 }
245 
246 /* A group of bbs where 1 bb from bbs can replace the other bbs.  */
247 
248 struct bb_cluster_def
249 {
250   /* The bbs in the cluster.  */
251   bitmap bbs;
252   /* The preds of the bbs in the cluster.  */
253   bitmap preds;
254   /* Index in all_clusters vector.  */
255   int index;
256   /* The bb to replace the cluster with.  */
257   basic_block rep_bb;
258 };
259 typedef struct bb_cluster_def *bb_cluster;
260 typedef const struct bb_cluster_def *const_bb_cluster;
261 
262 /* Per bb-info.  */
263 
264 struct aux_bb_info
265 {
266   /* The number of non-debug statements in the bb.  */
267   int size;
268   /* The same_succ that this bb is a member of.  */
269   same_succ bb_same_succ;
270   /* The cluster that this bb is a member of.  */
271   bb_cluster cluster;
272   /* The vop state at the exit of a bb.  This is shortlived data, used to
273      communicate data between update_block_by and update_vuses.  */
274   tree vop_at_exit;
275   /* The bb that either contains or is dominated by the dependencies of the
276      bb.  */
277   basic_block dep_bb;
278 };
279 
280 /* Macros to access the fields of struct aux_bb_info.  */
281 
282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
287 
288 /* Returns true if the only effect a statement STMT has, is to define locally
289    used SSA_NAMEs.  */
290 
291 static bool
292 stmt_local_def (gimple stmt)
293 {
294   basic_block bb, def_bb;
295   imm_use_iterator iter;
296   use_operand_p use_p;
297   tree val;
298   def_operand_p def_p;
299 
300   if (gimple_has_side_effects (stmt)
301       || gimple_vdef (stmt) != NULL_TREE
302       || gimple_vuse (stmt) != NULL_TREE)
303     return false;
304 
305   def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
306   if (def_p == NULL)
307     return false;
308 
309   val = DEF_FROM_PTR (def_p);
310   if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
311     return false;
312 
313   def_bb = gimple_bb (stmt);
314 
315   FOR_EACH_IMM_USE_FAST (use_p, iter, val)
316     {
317       if (is_gimple_debug (USE_STMT (use_p)))
318 	continue;
319       bb = gimple_bb (USE_STMT (use_p));
320       if (bb == def_bb)
321 	continue;
322 
323       if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
324 	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
325 	continue;
326 
327       return false;
328     }
329 
330   return true;
331 }
332 
333 /* Let GSI skip forwards over local defs.  */
334 
335 static void
336 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
337 {
338   gimple stmt;
339 
340   while (true)
341     {
342       if (gsi_end_p (*gsi))
343 	return;
344       stmt = gsi_stmt (*gsi);
345       if (!stmt_local_def (stmt))
346 	return;
347 	gsi_next_nondebug (gsi);
348     }
349 }
350 
351 /* VAL1 and VAL2 are either:
352    - uses in BB1 and BB2, or
353    - phi alternatives for BB1 and BB2.
354    Return true if the uses have the same gvn value.  */
355 
356 static bool
357 gvn_uses_equal (tree val1, tree val2)
358 {
359   gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
360 
361   if (val1 == val2)
362     return true;
363 
364   if (vn_valueize (val1) != vn_valueize (val2))
365     return false;
366 
367   return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
368 	  && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
369 }
370 
371 /* Prints E to FILE.  */
372 
373 static void
374 same_succ_print (FILE *file, const same_succ e)
375 {
376   unsigned int i;
377   bitmap_print (file, e->bbs, "bbs:", "\n");
378   bitmap_print (file, e->succs, "succs:", "\n");
379   bitmap_print (file, e->inverse, "inverse:", "\n");
380   fprintf (file, "flags:");
381   for (i = 0; i < e->succ_flags.length (); ++i)
382     fprintf (file, " %x", e->succ_flags[i]);
383   fprintf (file, "\n");
384 }
385 
386 /* Prints same_succ VE to VFILE.  */
387 
388 inline int
389 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
390 {
391   const same_succ e = *pe;
392   same_succ_print (file, e);
393   return 1;
394 }
395 
396 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB.  */
397 
398 static void
399 update_dep_bb (basic_block use_bb, tree val)
400 {
401   basic_block dep_bb;
402 
403   /* Not a dep.  */
404   if (TREE_CODE (val) != SSA_NAME)
405     return;
406 
407   /* Skip use of global def.  */
408   if (SSA_NAME_IS_DEFAULT_DEF (val))
409     return;
410 
411   /* Skip use of local def.  */
412   dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
413   if (dep_bb == use_bb)
414     return;
415 
416   if (BB_DEP_BB (use_bb) == NULL
417       || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
418     BB_DEP_BB (use_bb) = dep_bb;
419 }
420 
421 /* Update BB_DEP_BB, given the dependencies in STMT.  */
422 
423 static void
424 stmt_update_dep_bb (gimple stmt)
425 {
426   ssa_op_iter iter;
427   use_operand_p use;
428 
429   FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
430     update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
431 }
432 
433 /* Calculates hash value for same_succ VE.  */
434 
435 static hashval_t
436 same_succ_hash (const_same_succ e)
437 {
438   hashval_t hashval = bitmap_hash (e->succs);
439   int flags;
440   unsigned int i;
441   unsigned int first = bitmap_first_set_bit (e->bbs);
442   basic_block bb = BASIC_BLOCK (first);
443   int size = 0;
444   gimple_stmt_iterator gsi;
445   gimple stmt;
446   tree arg;
447   unsigned int s;
448   bitmap_iterator bs;
449 
450   for (gsi = gsi_start_nondebug_bb (bb);
451        !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
452     {
453       stmt = gsi_stmt (gsi);
454       stmt_update_dep_bb (stmt);
455       if (stmt_local_def (stmt))
456 	continue;
457       size++;
458 
459       hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval);
460       if (is_gimple_assign (stmt))
461 	hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt),
462 					    hashval);
463       if (!is_gimple_call (stmt))
464 	continue;
465       if (gimple_call_internal_p (stmt))
466 	hashval = iterative_hash_hashval_t
467 	  ((hashval_t) gimple_call_internal_fn (stmt), hashval);
468       else
469 	hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval);
470       for (i = 0; i < gimple_call_num_args (stmt); i++)
471 	{
472 	  arg = gimple_call_arg (stmt, i);
473 	  arg = vn_valueize (arg);
474 	  hashval = iterative_hash_expr (arg, hashval);
475 	}
476     }
477 
478   hashval = iterative_hash_hashval_t (size, hashval);
479   BB_SIZE (bb) = size;
480 
481   for (i = 0; i < e->succ_flags.length (); ++i)
482     {
483       flags = e->succ_flags[i];
484       flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
485       hashval = iterative_hash_hashval_t (flags, hashval);
486     }
487 
488   EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
489     {
490       int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx;
491       for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi);
492 	   gsi_next (&gsi))
493 	{
494 	  gimple phi = gsi_stmt (gsi);
495 	  tree lhs = gimple_phi_result (phi);
496 	  tree val = gimple_phi_arg_def (phi, n);
497 
498 	  if (virtual_operand_p (lhs))
499 	    continue;
500 	  update_dep_bb (bb, val);
501 	}
502     }
503 
504   return hashval;
505 }
506 
507 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
508    are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
509    the other edge flags.  */
510 
511 static bool
512 inverse_flags (const_same_succ e1, const_same_succ e2)
513 {
514   int f1a, f1b, f2a, f2b;
515   int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
516 
517   if (e1->succ_flags.length () != 2)
518     return false;
519 
520   f1a = e1->succ_flags[0];
521   f1b = e1->succ_flags[1];
522   f2a = e2->succ_flags[0];
523   f2b = e2->succ_flags[1];
524 
525   if (f1a == f2a && f1b == f2b)
526     return false;
527 
528   return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
529 }
530 
531 /* Compares SAME_SUCCs E1 and E2.  */
532 
533 int
534 same_succ_def::equal (const value_type *e1, const compare_type *e2)
535 {
536   unsigned int i, first1, first2;
537   gimple_stmt_iterator gsi1, gsi2;
538   gimple s1, s2;
539   basic_block bb1, bb2;
540 
541   if (e1->hashval != e2->hashval)
542     return 0;
543 
544   if (e1->succ_flags.length () != e2->succ_flags.length ())
545     return 0;
546 
547   if (!bitmap_equal_p (e1->succs, e2->succs))
548     return 0;
549 
550   if (!inverse_flags (e1, e2))
551     {
552       for (i = 0; i < e1->succ_flags.length (); ++i)
553 	if (e1->succ_flags[i] != e2->succ_flags[i])
554 	  return 0;
555     }
556 
557   first1 = bitmap_first_set_bit (e1->bbs);
558   first2 = bitmap_first_set_bit (e2->bbs);
559 
560   bb1 = BASIC_BLOCK (first1);
561   bb2 = BASIC_BLOCK (first2);
562 
563   if (BB_SIZE (bb1) != BB_SIZE (bb2))
564     return 0;
565 
566   gsi1 = gsi_start_nondebug_bb (bb1);
567   gsi2 = gsi_start_nondebug_bb (bb2);
568   gsi_advance_fw_nondebug_nonlocal (&gsi1);
569   gsi_advance_fw_nondebug_nonlocal (&gsi2);
570   while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
571     {
572       s1 = gsi_stmt (gsi1);
573       s2 = gsi_stmt (gsi2);
574       if (gimple_code (s1) != gimple_code (s2))
575 	return 0;
576       if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
577 	return 0;
578       gsi_next_nondebug (&gsi1);
579       gsi_next_nondebug (&gsi2);
580       gsi_advance_fw_nondebug_nonlocal (&gsi1);
581       gsi_advance_fw_nondebug_nonlocal (&gsi2);
582     }
583 
584   return 1;
585 }
586 
587 /* Alloc and init a new SAME_SUCC.  */
588 
589 static same_succ
590 same_succ_alloc (void)
591 {
592   same_succ same = XNEW (struct same_succ_def);
593 
594   same->bbs = BITMAP_ALLOC (NULL);
595   same->succs = BITMAP_ALLOC (NULL);
596   same->inverse = BITMAP_ALLOC (NULL);
597   same->succ_flags.create (10);
598   same->in_worklist = false;
599 
600   return same;
601 }
602 
603 /* Delete same_succ E.  */
604 
605 void
606 same_succ_def::remove (same_succ e)
607 {
608   BITMAP_FREE (e->bbs);
609   BITMAP_FREE (e->succs);
610   BITMAP_FREE (e->inverse);
611   e->succ_flags.release ();
612 
613   XDELETE (e);
614 }
615 
616 /* Reset same_succ SAME.  */
617 
618 static void
619 same_succ_reset (same_succ same)
620 {
621   bitmap_clear (same->bbs);
622   bitmap_clear (same->succs);
623   bitmap_clear (same->inverse);
624   same->succ_flags.truncate (0);
625 }
626 
627 static hash_table <same_succ_def> same_succ_htab;
628 
629 /* Array that is used to store the edge flags for a successor.  */
630 
631 static int *same_succ_edge_flags;
632 
633 /* Bitmap that is used to mark bbs that are recently deleted.  */
634 
635 static bitmap deleted_bbs;
636 
637 /* Bitmap that is used to mark predecessors of bbs that are
638    deleted.  */
639 
640 static bitmap deleted_bb_preds;
641 
642 /* Prints same_succ_htab to stderr.  */
643 
644 extern void debug_same_succ (void);
645 DEBUG_FUNCTION void
646 debug_same_succ ( void)
647 {
648   same_succ_htab.traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
649 }
650 
651 
652 /* Vector of bbs to process.  */
653 
654 static vec<same_succ> worklist;
655 
656 /* Prints worklist to FILE.  */
657 
658 static void
659 print_worklist (FILE *file)
660 {
661   unsigned int i;
662   for (i = 0; i < worklist.length (); ++i)
663     same_succ_print (file, worklist[i]);
664 }
665 
666 /* Adds SAME to worklist.  */
667 
668 static void
669 add_to_worklist (same_succ same)
670 {
671   if (same->in_worklist)
672     return;
673 
674   if (bitmap_count_bits (same->bbs) < 2)
675     return;
676 
677   same->in_worklist = true;
678   worklist.safe_push (same);
679 }
680 
681 /* Add BB to same_succ_htab.  */
682 
683 static void
684 find_same_succ_bb (basic_block bb, same_succ *same_p)
685 {
686   unsigned int j;
687   bitmap_iterator bj;
688   same_succ same = *same_p;
689   same_succ *slot;
690   edge_iterator ei;
691   edge e;
692 
693   if (bb == NULL)
694     return;
695   bitmap_set_bit (same->bbs, bb->index);
696   FOR_EACH_EDGE (e, ei, bb->succs)
697     {
698       int index = e->dest->index;
699       bitmap_set_bit (same->succs, index);
700       same_succ_edge_flags[index] = e->flags;
701     }
702   EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
703     same->succ_flags.safe_push (same_succ_edge_flags[j]);
704 
705   same->hashval = same_succ_hash (same);
706 
707   slot = same_succ_htab.find_slot_with_hash (same, same->hashval, INSERT);
708   if (*slot == NULL)
709     {
710       *slot = same;
711       BB_SAME_SUCC (bb) = same;
712       add_to_worklist (same);
713       *same_p = NULL;
714     }
715   else
716     {
717       bitmap_set_bit ((*slot)->bbs, bb->index);
718       BB_SAME_SUCC (bb) = *slot;
719       add_to_worklist (*slot);
720       if (inverse_flags (same, *slot))
721 	bitmap_set_bit ((*slot)->inverse, bb->index);
722       same_succ_reset (same);
723     }
724 }
725 
726 /* Find bbs with same successors.  */
727 
728 static void
729 find_same_succ (void)
730 {
731   same_succ same = same_succ_alloc ();
732   basic_block bb;
733 
734   FOR_EACH_BB (bb)
735     {
736       find_same_succ_bb (bb, &same);
737       if (same == NULL)
738 	same = same_succ_alloc ();
739     }
740 
741   same_succ_def::remove (same);
742 }
743 
744 /* Initializes worklist administration.  */
745 
746 static void
747 init_worklist (void)
748 {
749   alloc_aux_for_blocks (sizeof (struct aux_bb_info));
750   same_succ_htab.create (n_basic_blocks);
751   same_succ_edge_flags = XCNEWVEC (int, last_basic_block);
752   deleted_bbs = BITMAP_ALLOC (NULL);
753   deleted_bb_preds = BITMAP_ALLOC (NULL);
754   worklist.create (n_basic_blocks);
755   find_same_succ ();
756 
757   if (dump_file && (dump_flags & TDF_DETAILS))
758     {
759       fprintf (dump_file, "initial worklist:\n");
760       print_worklist (dump_file);
761     }
762 }
763 
764 /* Deletes worklist administration.  */
765 
766 static void
767 delete_worklist (void)
768 {
769   free_aux_for_blocks ();
770   same_succ_htab.dispose ();
771   XDELETEVEC (same_succ_edge_flags);
772   same_succ_edge_flags = NULL;
773   BITMAP_FREE (deleted_bbs);
774   BITMAP_FREE (deleted_bb_preds);
775   worklist.release ();
776 }
777 
778 /* Mark BB as deleted, and mark its predecessors.  */
779 
780 static void
781 mark_basic_block_deleted (basic_block bb)
782 {
783   edge e;
784   edge_iterator ei;
785 
786   bitmap_set_bit (deleted_bbs, bb->index);
787 
788   FOR_EACH_EDGE (e, ei, bb->preds)
789     bitmap_set_bit (deleted_bb_preds, e->src->index);
790 }
791 
792 /* Removes BB from its corresponding same_succ.  */
793 
794 static void
795 same_succ_flush_bb (basic_block bb)
796 {
797   same_succ same = BB_SAME_SUCC (bb);
798   BB_SAME_SUCC (bb) = NULL;
799   if (bitmap_single_bit_set_p (same->bbs))
800     same_succ_htab.remove_elt_with_hash (same, same->hashval);
801   else
802     bitmap_clear_bit (same->bbs, bb->index);
803 }
804 
805 /* Removes all bbs in BBS from their corresponding same_succ.  */
806 
807 static void
808 same_succ_flush_bbs (bitmap bbs)
809 {
810   unsigned int i;
811   bitmap_iterator bi;
812 
813   EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
814     same_succ_flush_bb (BASIC_BLOCK (i));
815 }
816 
817 /* Release the last vdef in BB, either normal or phi result.  */
818 
819 static void
820 release_last_vdef (basic_block bb)
821 {
822   gimple_stmt_iterator i;
823 
824   for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i))
825     {
826       gimple stmt = gsi_stmt (i);
827       if (gimple_vdef (stmt) == NULL_TREE)
828 	continue;
829 
830       mark_virtual_operand_for_renaming (gimple_vdef (stmt));
831       return;
832     }
833 
834   for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
835     {
836       gimple phi = gsi_stmt (i);
837       tree res = gimple_phi_result (phi);
838 
839       if (!virtual_operand_p (res))
840 	continue;
841 
842       mark_virtual_phi_result_for_renaming (phi);
843       return;
844     }
845 
846 }
847 
848 /* For deleted_bb_preds, find bbs with same successors.  */
849 
850 static void
851 update_worklist (void)
852 {
853   unsigned int i;
854   bitmap_iterator bi;
855   basic_block bb;
856   same_succ same;
857 
858   bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
859   bitmap_clear (deleted_bbs);
860 
861   bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
862   same_succ_flush_bbs (deleted_bb_preds);
863 
864   same = same_succ_alloc ();
865   EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
866     {
867       bb = BASIC_BLOCK (i);
868       gcc_assert (bb != NULL);
869       find_same_succ_bb (bb, &same);
870       if (same == NULL)
871 	same = same_succ_alloc ();
872     }
873   same_succ_def::remove (same);
874   bitmap_clear (deleted_bb_preds);
875 }
876 
877 /* Prints cluster C to FILE.  */
878 
879 static void
880 print_cluster (FILE *file, bb_cluster c)
881 {
882   if (c == NULL)
883     return;
884   bitmap_print (file, c->bbs, "bbs:", "\n");
885   bitmap_print (file, c->preds, "preds:", "\n");
886 }
887 
888 /* Prints cluster C to stderr.  */
889 
890 extern void debug_cluster (bb_cluster);
891 DEBUG_FUNCTION void
892 debug_cluster (bb_cluster c)
893 {
894   print_cluster (stderr, c);
895 }
896 
897 /* Update C->rep_bb, given that BB is added to the cluster.  */
898 
899 static void
900 update_rep_bb (bb_cluster c, basic_block bb)
901 {
902   /* Initial.  */
903   if (c->rep_bb == NULL)
904     {
905       c->rep_bb = bb;
906       return;
907     }
908 
909   /* Current needs no deps, keep it.  */
910   if (BB_DEP_BB (c->rep_bb) == NULL)
911     return;
912 
913   /* Bb needs no deps, change rep_bb.  */
914   if (BB_DEP_BB (bb) == NULL)
915     {
916       c->rep_bb = bb;
917       return;
918     }
919 
920   /* Bb needs last deps earlier than current, change rep_bb.  A potential
921      problem with this, is that the first deps might also be earlier, which
922      would mean we prefer longer lifetimes for the deps.  To be able to check
923      for this, we would have to trace BB_FIRST_DEP_BB as well, besides
924      BB_DEP_BB, which is really BB_LAST_DEP_BB.
925      The benefit of choosing the bb with last deps earlier, is that it can
926      potentially be used as replacement for more bbs.  */
927   if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
928     c->rep_bb = bb;
929 }
930 
931 /* Add BB to cluster C.  Sets BB in C->bbs, and preds of BB in C->preds.  */
932 
933 static void
934 add_bb_to_cluster (bb_cluster c, basic_block bb)
935 {
936   edge e;
937   edge_iterator ei;
938 
939   bitmap_set_bit (c->bbs, bb->index);
940 
941   FOR_EACH_EDGE (e, ei, bb->preds)
942     bitmap_set_bit (c->preds, e->src->index);
943 
944   update_rep_bb (c, bb);
945 }
946 
947 /* Allocate and init new cluster.  */
948 
949 static bb_cluster
950 new_cluster (void)
951 {
952   bb_cluster c;
953   c = XCNEW (struct bb_cluster_def);
954   c->bbs = BITMAP_ALLOC (NULL);
955   c->preds = BITMAP_ALLOC (NULL);
956   c->rep_bb = NULL;
957   return c;
958 }
959 
960 /* Delete clusters.  */
961 
962 static void
963 delete_cluster (bb_cluster c)
964 {
965   if (c == NULL)
966     return;
967   BITMAP_FREE (c->bbs);
968   BITMAP_FREE (c->preds);
969   XDELETE (c);
970 }
971 
972 
973 /* Array that contains all clusters.  */
974 
975 static vec<bb_cluster> all_clusters;
976 
977 /* Allocate all cluster vectors.  */
978 
979 static void
980 alloc_cluster_vectors (void)
981 {
982   all_clusters.create (n_basic_blocks);
983 }
984 
985 /* Reset all cluster vectors.  */
986 
987 static void
988 reset_cluster_vectors (void)
989 {
990   unsigned int i;
991   basic_block bb;
992   for (i = 0; i < all_clusters.length (); ++i)
993     delete_cluster (all_clusters[i]);
994   all_clusters.truncate (0);
995   FOR_EACH_BB (bb)
996     BB_CLUSTER (bb) = NULL;
997 }
998 
999 /* Delete all cluster vectors.  */
1000 
1001 static void
1002 delete_cluster_vectors (void)
1003 {
1004   unsigned int i;
1005   for (i = 0; i < all_clusters.length (); ++i)
1006     delete_cluster (all_clusters[i]);
1007   all_clusters.release ();
1008 }
1009 
1010 /* Merge cluster C2 into C1.  */
1011 
1012 static void
1013 merge_clusters (bb_cluster c1, bb_cluster c2)
1014 {
1015   bitmap_ior_into (c1->bbs, c2->bbs);
1016   bitmap_ior_into (c1->preds, c2->preds);
1017 }
1018 
1019 /* Register equivalence of BB1 and BB2 (members of cluster C).  Store c in
1020    all_clusters, or merge c with existing cluster.  */
1021 
1022 static void
1023 set_cluster (basic_block bb1, basic_block bb2)
1024 {
1025   basic_block merge_bb, other_bb;
1026   bb_cluster merge, old, c;
1027 
1028   if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1029     {
1030       c = new_cluster ();
1031       add_bb_to_cluster (c, bb1);
1032       add_bb_to_cluster (c, bb2);
1033       BB_CLUSTER (bb1) = c;
1034       BB_CLUSTER (bb2) = c;
1035       c->index = all_clusters.length ();
1036       all_clusters.safe_push (c);
1037     }
1038   else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1039     {
1040       merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1041       other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1042       merge = BB_CLUSTER (merge_bb);
1043       add_bb_to_cluster (merge, other_bb);
1044       BB_CLUSTER (other_bb) = merge;
1045     }
1046   else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1047     {
1048       unsigned int i;
1049       bitmap_iterator bi;
1050 
1051       old = BB_CLUSTER (bb2);
1052       merge = BB_CLUSTER (bb1);
1053       merge_clusters (merge, old);
1054       EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1055 	BB_CLUSTER (BASIC_BLOCK (i)) = merge;
1056       all_clusters[old->index] = NULL;
1057       update_rep_bb (merge, old->rep_bb);
1058       delete_cluster (old);
1059     }
1060   else
1061     gcc_unreachable ();
1062 }
1063 
1064 /* Return true if gimple operands T1 and T2 have the same value.  */
1065 
1066 static bool
1067 gimple_operand_equal_value_p (tree t1, tree t2)
1068 {
1069   if (t1 == t2)
1070     return true;
1071 
1072   if (t1 == NULL_TREE
1073       || t2 == NULL_TREE)
1074     return false;
1075 
1076   if (operand_equal_p (t1, t2, 0))
1077     return true;
1078 
1079   return gvn_uses_equal (t1, t2);
1080 }
1081 
1082 /* Return true if gimple statements S1 and S2 are equal.  Gimple_bb (s1) and
1083    gimple_bb (s2) are members of SAME_SUCC.  */
1084 
1085 static bool
1086 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1087 {
1088   unsigned int i;
1089   tree lhs1, lhs2;
1090   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1091   tree t1, t2;
1092   bool equal, inv_cond;
1093   enum tree_code code1, code2;
1094 
1095   if (gimple_code (s1) != gimple_code (s2))
1096     return false;
1097 
1098   switch (gimple_code (s1))
1099     {
1100     case GIMPLE_CALL:
1101       if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1102 	return false;
1103       if (!gimple_call_same_target_p (s1, s2))
1104         return false;
1105 
1106       /* Eventually, we'll significantly complicate the CFG by adding
1107 	 back edges to properly model the effects of transaction restart.
1108 	 For the bulk of optimization this does not matter, but what we
1109 	 cannot recover from is tail merging blocks between two separate
1110 	 transactions.  Avoid that by making commit not match.  */
1111       if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT))
1112 	return false;
1113 
1114       equal = true;
1115       for (i = 0; i < gimple_call_num_args (s1); ++i)
1116 	{
1117 	  t1 = gimple_call_arg (s1, i);
1118 	  t2 = gimple_call_arg (s2, i);
1119 	  if (operand_equal_p (t1, t2, 0))
1120 	    continue;
1121 	  if (gvn_uses_equal (t1, t2))
1122 	    continue;
1123 	  equal = false;
1124 	  break;
1125 	}
1126       if (!equal)
1127 	return false;
1128 
1129       lhs1 = gimple_get_lhs (s1);
1130       lhs2 = gimple_get_lhs (s2);
1131       if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1132 	return true;
1133       if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1134 	return false;
1135       if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1136 	return vn_valueize (lhs1) == vn_valueize (lhs2);
1137       return operand_equal_p (lhs1, lhs2, 0);
1138 
1139     case GIMPLE_ASSIGN:
1140       lhs1 = gimple_get_lhs (s1);
1141       lhs2 = gimple_get_lhs (s2);
1142       if (TREE_CODE (lhs1) != SSA_NAME
1143 	  && TREE_CODE (lhs2) != SSA_NAME)
1144 	return (operand_equal_p (lhs1, lhs2, 0)
1145 		&& gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1146 						 gimple_assign_rhs1 (s2)));
1147       else if (TREE_CODE (lhs1) == SSA_NAME
1148 	       && TREE_CODE (lhs2) == SSA_NAME)
1149 	return operand_equal_p (gimple_assign_rhs1 (s1),
1150 				gimple_assign_rhs1 (s2), 0);
1151       return false;
1152 
1153     case GIMPLE_COND:
1154       t1 = gimple_cond_lhs (s1);
1155       t2 = gimple_cond_lhs (s2);
1156       if (!operand_equal_p (t1, t2, 0)
1157 	  && !gvn_uses_equal (t1, t2))
1158 	return false;
1159 
1160       t1 = gimple_cond_rhs (s1);
1161       t2 = gimple_cond_rhs (s2);
1162       if (!operand_equal_p (t1, t2, 0)
1163 	  && !gvn_uses_equal (t1, t2))
1164 	return false;
1165 
1166       code1 = gimple_expr_code (s1);
1167       code2 = gimple_expr_code (s2);
1168       inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1169 		  != bitmap_bit_p (same_succ->inverse, bb2->index));
1170       if (inv_cond)
1171 	{
1172 	  bool honor_nans
1173 	    = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1))));
1174 	  code2 = invert_tree_comparison (code2, honor_nans);
1175 	}
1176       return code1 == code2;
1177 
1178     default:
1179       return false;
1180     }
1181 }
1182 
1183 /* Let GSI skip backwards over local defs.  Return the earliest vuse in VUSE.
1184    Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1185    processed statements.  */
1186 
1187 static void
1188 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1189 				  bool *vuse_escaped)
1190 {
1191   gimple stmt;
1192   tree lvuse;
1193 
1194   while (true)
1195     {
1196       if (gsi_end_p (*gsi))
1197 	return;
1198       stmt = gsi_stmt (*gsi);
1199 
1200       lvuse = gimple_vuse (stmt);
1201       if (lvuse != NULL_TREE)
1202 	{
1203 	  *vuse = lvuse;
1204 	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1205 	    *vuse_escaped = true;
1206 	}
1207 
1208       if (!stmt_local_def (stmt))
1209 	return;
1210       gsi_prev_nondebug (gsi);
1211     }
1212 }
1213 
1214 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates.  If so,
1215    clusters them.  */
1216 
1217 static void
1218 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1219 {
1220   gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1221   gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1222   tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1223   bool vuse_escaped = false;
1224 
1225   gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1226   gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1227 
1228   while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1229     {
1230       gimple stmt1 = gsi_stmt (gsi1);
1231       gimple stmt2 = gsi_stmt (gsi2);
1232 
1233       if (!gimple_equal_p (same_succ, stmt1, stmt2))
1234 	return;
1235 
1236       // We cannot tail-merge the builtins that end transactions.
1237       // ??? The alternative being unsharing of BBs in the tm_init pass.
1238       if (flag_tm
1239 	  && is_gimple_call (stmt1)
1240 	  && (gimple_call_flags (stmt1) & ECF_TM_BUILTIN)
1241 	  && is_tm_ending_fndecl (gimple_call_fndecl (stmt1)))
1242 	return;
1243 
1244       gsi_prev_nondebug (&gsi1);
1245       gsi_prev_nondebug (&gsi2);
1246       gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1247       gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1248     }
1249 
1250   if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1251     return;
1252 
1253   /* If the incoming vuses are not the same, and the vuse escaped into an
1254      SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1255      which potentially means the semantics of one of the blocks will be changed.
1256      TODO: make this check more precise.  */
1257   if (vuse_escaped && vuse1 != vuse2)
1258     return;
1259 
1260   if (dump_file)
1261     fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1262 	     bb1->index, bb2->index);
1263 
1264   set_cluster (bb1, bb2);
1265 }
1266 
1267 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1268    E2 are equal.  */
1269 
1270 static bool
1271 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1272 {
1273   int n1 = e1->dest_idx, n2 = e2->dest_idx;
1274   gimple_stmt_iterator gsi;
1275 
1276   for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1277     {
1278       gimple phi = gsi_stmt (gsi);
1279       tree lhs = gimple_phi_result (phi);
1280       tree val1 = gimple_phi_arg_def (phi, n1);
1281       tree val2 = gimple_phi_arg_def (phi, n2);
1282 
1283       if (virtual_operand_p (lhs))
1284 	continue;
1285 
1286       if (operand_equal_for_phi_arg_p (val1, val2))
1287         continue;
1288       if (gvn_uses_equal (val1, val2))
1289 	continue;
1290 
1291       return false;
1292     }
1293 
1294   return true;
1295 }
1296 
1297 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1298    phi alternatives for BB1 and BB2 are equal.  */
1299 
1300 static bool
1301 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1302 {
1303   unsigned int s;
1304   bitmap_iterator bs;
1305   edge e1, e2;
1306   basic_block succ;
1307 
1308   EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1309     {
1310       succ = BASIC_BLOCK (s);
1311       e1 = find_edge (bb1, succ);
1312       e2 = find_edge (bb2, succ);
1313       if (e1->flags & EDGE_COMPLEX
1314 	  || e2->flags & EDGE_COMPLEX)
1315 	return false;
1316 
1317       /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1318 	 the same value.  */
1319       if (!same_phi_alternatives_1 (succ, e1, e2))
1320 	return false;
1321     }
1322 
1323   return true;
1324 }
1325 
1326 /* Return true if BB has non-vop phis.  */
1327 
1328 static bool
1329 bb_has_non_vop_phi (basic_block bb)
1330 {
1331   gimple_seq phis = phi_nodes (bb);
1332   gimple phi;
1333 
1334   if (phis == NULL)
1335     return false;
1336 
1337   if (!gimple_seq_singleton_p (phis))
1338     return true;
1339 
1340   phi = gimple_seq_first_stmt (phis);
1341   return !virtual_operand_p (gimple_phi_result (phi));
1342 }
1343 
1344 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1345    invariant that uses in FROM are dominates by their defs.  */
1346 
1347 static bool
1348 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1349 {
1350   basic_block cd, dep_bb = BB_DEP_BB (to);
1351   edge_iterator ei;
1352   edge e;
1353   bitmap from_preds = BITMAP_ALLOC (NULL);
1354 
1355   if (dep_bb == NULL)
1356     return true;
1357 
1358   FOR_EACH_EDGE (e, ei, from->preds)
1359     bitmap_set_bit (from_preds, e->src->index);
1360   cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1361   BITMAP_FREE (from_preds);
1362 
1363   return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1364 }
1365 
1366 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1367    replacement bb) and vice versa maintains the invariant that uses in the
1368    replacement are dominates by their defs.  */
1369 
1370 static bool
1371 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1372 {
1373   if (BB_CLUSTER (bb1) != NULL)
1374     bb1 = BB_CLUSTER (bb1)->rep_bb;
1375 
1376   if (BB_CLUSTER (bb2) != NULL)
1377     bb2 = BB_CLUSTER (bb2)->rep_bb;
1378 
1379   return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1380 	  && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1381 }
1382 
1383 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged.  */
1384 
1385 static void
1386 find_clusters_1 (same_succ same_succ)
1387 {
1388   basic_block bb1, bb2;
1389   unsigned int i, j;
1390   bitmap_iterator bi, bj;
1391   int nr_comparisons;
1392   int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1393 
1394   EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1395     {
1396       bb1 = BASIC_BLOCK (i);
1397 
1398       /* TODO: handle blocks with phi-nodes.  We'll have to find corresponding
1399 	 phi-nodes in bb1 and bb2, with the same alternatives for the same
1400 	 preds.  */
1401       if (bb_has_non_vop_phi (bb1))
1402 	continue;
1403 
1404       nr_comparisons = 0;
1405       EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1406 	{
1407 	  bb2 = BASIC_BLOCK (j);
1408 
1409 	  if (bb_has_non_vop_phi (bb2))
1410 	    continue;
1411 
1412 	  if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1413 	    continue;
1414 
1415 	  /* Limit quadratic behaviour.  */
1416 	  nr_comparisons++;
1417 	  if (nr_comparisons > max_comparisons)
1418 	    break;
1419 
1420 	  /* This is a conservative dependency check.  We could test more
1421 	     precise for allowed replacement direction.  */
1422 	  if (!deps_ok_for_redirect (bb1, bb2))
1423 	    continue;
1424 
1425 	  if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1426 	    continue;
1427 
1428 	  find_duplicate (same_succ, bb1, bb2);
1429         }
1430     }
1431 }
1432 
1433 /* Find clusters of bbs which can be merged.  */
1434 
1435 static void
1436 find_clusters (void)
1437 {
1438   same_succ same;
1439 
1440   while (!worklist.is_empty ())
1441     {
1442       same = worklist.pop ();
1443       same->in_worklist = false;
1444       if (dump_file && (dump_flags & TDF_DETAILS))
1445 	{
1446 	  fprintf (dump_file, "processing worklist entry\n");
1447 	  same_succ_print (dump_file, same);
1448 	}
1449       find_clusters_1 (same);
1450     }
1451 }
1452 
1453 /* Returns the vop phi of BB, if any.  */
1454 
1455 static gimple
1456 vop_phi (basic_block bb)
1457 {
1458   gimple stmt;
1459   gimple_stmt_iterator gsi;
1460   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1461     {
1462       stmt = gsi_stmt (gsi);
1463       if (! virtual_operand_p (gimple_phi_result (stmt)))
1464 	continue;
1465       return stmt;
1466     }
1467   return NULL;
1468 }
1469 
1470 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed.  */
1471 
1472 static void
1473 replace_block_by (basic_block bb1, basic_block bb2)
1474 {
1475   edge pred_edge;
1476   unsigned int i;
1477   gimple bb2_phi;
1478 
1479   bb2_phi = vop_phi (bb2);
1480 
1481   /* Mark the basic block as deleted.  */
1482   mark_basic_block_deleted (bb1);
1483 
1484   /* Redirect the incoming edges of bb1 to bb2.  */
1485   for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1486     {
1487       pred_edge = EDGE_PRED (bb1, i - 1);
1488       pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1489       gcc_assert (pred_edge != NULL);
1490 
1491       if (bb2_phi == NULL)
1492 	continue;
1493 
1494       /* The phi might have run out of capacity when the redirect added an
1495 	 argument, which means it could have been replaced.  Refresh it.  */
1496       bb2_phi = vop_phi (bb2);
1497 
1498       add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1499 		   pred_edge, UNKNOWN_LOCATION);
1500     }
1501 
1502   bb2->frequency += bb1->frequency;
1503   if (bb2->frequency > BB_FREQ_MAX)
1504     bb2->frequency = BB_FREQ_MAX;
1505 
1506   bb2->count += bb1->count;
1507 
1508   /* Do updates that use bb1, before deleting bb1.  */
1509   release_last_vdef (bb1);
1510   same_succ_flush_bb (bb1);
1511 
1512   delete_basic_block (bb1);
1513 }
1514 
1515 /* Bbs for which update_debug_stmt need to be called.  */
1516 
1517 static bitmap update_bbs;
1518 
1519 /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
1520    number of bbs removed.  */
1521 
1522 static int
1523 apply_clusters (void)
1524 {
1525   basic_block bb1, bb2;
1526   bb_cluster c;
1527   unsigned int i, j;
1528   bitmap_iterator bj;
1529   int nr_bbs_removed = 0;
1530 
1531   for (i = 0; i < all_clusters.length (); ++i)
1532     {
1533       c = all_clusters[i];
1534       if (c == NULL)
1535 	continue;
1536 
1537       bb2 = c->rep_bb;
1538       bitmap_set_bit (update_bbs, bb2->index);
1539 
1540       bitmap_clear_bit (c->bbs, bb2->index);
1541       EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1542 	{
1543 	  bb1 = BASIC_BLOCK (j);
1544 	  bitmap_clear_bit (update_bbs, bb1->index);
1545 
1546 	  replace_block_by (bb1, bb2);
1547 	  nr_bbs_removed++;
1548 	}
1549     }
1550 
1551   return nr_bbs_removed;
1552 }
1553 
1554 /* Resets debug statement STMT if it has uses that are not dominated by their
1555    defs.  */
1556 
1557 static void
1558 update_debug_stmt (gimple stmt)
1559 {
1560   use_operand_p use_p;
1561   ssa_op_iter oi;
1562   basic_block bbdef, bbuse;
1563   gimple def_stmt;
1564   tree name;
1565 
1566   if (!gimple_debug_bind_p (stmt))
1567     return;
1568 
1569   bbuse = gimple_bb (stmt);
1570   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1571     {
1572       name = USE_FROM_PTR (use_p);
1573       gcc_assert (TREE_CODE (name) == SSA_NAME);
1574 
1575       def_stmt = SSA_NAME_DEF_STMT (name);
1576       gcc_assert (def_stmt != NULL);
1577 
1578       bbdef = gimple_bb (def_stmt);
1579       if (bbdef == NULL || bbuse == bbdef
1580 	  || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1581 	continue;
1582 
1583       gimple_debug_bind_reset_value (stmt);
1584       update_stmt (stmt);
1585     }
1586 }
1587 
1588 /* Resets all debug statements that have uses that are not
1589    dominated by their defs.  */
1590 
1591 static void
1592 update_debug_stmts (void)
1593 {
1594   basic_block bb;
1595   bitmap_iterator bi;
1596   unsigned int i;
1597 
1598   EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1599     {
1600       gimple stmt;
1601       gimple_stmt_iterator gsi;
1602 
1603       bb = BASIC_BLOCK (i);
1604       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1605 	{
1606 	  stmt = gsi_stmt (gsi);
1607 	  if (!is_gimple_debug (stmt))
1608 	    continue;
1609 	  update_debug_stmt (stmt);
1610 	}
1611     }
1612 }
1613 
1614 /* Runs tail merge optimization.  */
1615 
1616 unsigned int
1617 tail_merge_optimize (unsigned int todo)
1618 {
1619   int nr_bbs_removed_total = 0;
1620   int nr_bbs_removed;
1621   bool loop_entered = false;
1622   int iteration_nr = 0;
1623   int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1624 
1625   if (!flag_tree_tail_merge || max_iterations == 0)
1626     return 0;
1627 
1628   timevar_push (TV_TREE_TAIL_MERGE);
1629 
1630   if (!dom_info_available_p (CDI_DOMINATORS))
1631     {
1632       /* PRE can leave us with unreachable blocks, remove them now.  */
1633       delete_unreachable_blocks ();
1634       calculate_dominance_info (CDI_DOMINATORS);
1635     }
1636   init_worklist ();
1637 
1638   while (!worklist.is_empty ())
1639     {
1640       if (!loop_entered)
1641 	{
1642 	  loop_entered = true;
1643 	  alloc_cluster_vectors ();
1644 	  update_bbs = BITMAP_ALLOC (NULL);
1645 	}
1646       else
1647 	reset_cluster_vectors ();
1648 
1649       iteration_nr++;
1650       if (dump_file && (dump_flags & TDF_DETAILS))
1651 	fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1652 
1653       find_clusters ();
1654       gcc_assert (worklist.is_empty ());
1655       if (all_clusters.is_empty ())
1656 	break;
1657 
1658       nr_bbs_removed = apply_clusters ();
1659       nr_bbs_removed_total += nr_bbs_removed;
1660       if (nr_bbs_removed == 0)
1661 	break;
1662 
1663       free_dominance_info (CDI_DOMINATORS);
1664 
1665       if (iteration_nr == max_iterations)
1666 	break;
1667 
1668       calculate_dominance_info (CDI_DOMINATORS);
1669       update_worklist ();
1670     }
1671 
1672   if (dump_file && (dump_flags & TDF_DETAILS))
1673     fprintf (dump_file, "htab collision / search: %f\n",
1674 	     same_succ_htab.collisions ());
1675 
1676   if (nr_bbs_removed_total > 0)
1677     {
1678       if (MAY_HAVE_DEBUG_STMTS)
1679 	{
1680 	  calculate_dominance_info (CDI_DOMINATORS);
1681 	  update_debug_stmts ();
1682 	}
1683 
1684       if (dump_file && (dump_flags & TDF_DETAILS))
1685 	{
1686 	  fprintf (dump_file, "Before TODOs.\n");
1687 	  dump_function_to_file (current_function_decl, dump_file, dump_flags);
1688 	}
1689 
1690       todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow);
1691       mark_virtual_operands_for_renaming (cfun);
1692     }
1693 
1694   delete_worklist ();
1695   if (loop_entered)
1696     {
1697       delete_cluster_vectors ();
1698       BITMAP_FREE (update_bbs);
1699     }
1700 
1701   timevar_pop (TV_TREE_TAIL_MERGE);
1702 
1703   return todo;
1704 }
1705