xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-tail-merge.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Tail merging for gimple.
2    Copyright (C) 2011-2017 Free Software Foundation, Inc.
3    Contributed by Tom de Vries (tom@codesourcery.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Pass overview.
22 
23 
24    MOTIVATIONAL EXAMPLE
25 
26    gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27 
28    hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29    {
30      struct FILED.1638 * fpD.2605;
31      charD.1 fileNameD.2604[1000];
32      intD.0 D.3915;
33      const charD.1 * restrict outputFileName.0D.3914;
34 
35      # BLOCK 2 freq:10000
36      # PRED: ENTRY [100.0%]  (fallthru,exec)
37      # PT = nonlocal { D.3926 } (restr)
38      outputFileName.0D.3914_3
39        = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40      # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43      sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44      # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47      D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48      if (D.3915_4 == 0)
49        goto <bb 3>;
50      else
51        goto <bb 4>;
52      # SUCC: 3 [10.0%]  (true,exec) 4 [90.0%]  (false,exec)
53 
54      # BLOCK 3 freq:1000
55      # PRED: 2 [10.0%]  (true,exec)
56      # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59      freeD.898 (ctxD.2601_5(D));
60      goto <bb 7>;
61      # SUCC: 7 [100.0%]  (fallthru,exec)
62 
63      # BLOCK 4 freq:9000
64      # PRED: 2 [90.0%]  (false,exec)
65      # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66      # PT = nonlocal escaped
67      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69      fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70      if (fpD.2605_8 == 0B)
71        goto <bb 5>;
72      else
73        goto <bb 6>;
74      # SUCC: 5 [1.9%]  (true,exec) 6 [98.1%]  (false,exec)
75 
76      # BLOCK 5 freq:173
77      # PRED: 4 [1.9%]  (true,exec)
78      # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81      freeD.898 (ctxD.2601_5(D));
82      goto <bb 7>;
83      # SUCC: 7 [100.0%]  (fallthru,exec)
84 
85      # BLOCK 6 freq:8827
86      # PRED: 4 [98.1%]  (false,exec)
87      # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90      fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91      # SUCC: 7 [100.0%]  (fallthru,exec)
92 
93      # BLOCK 7 freq:10000
94      # PRED: 3 [100.0%]  (fallthru,exec) 5 [100.0%]  (fallthru,exec)
95 	     6 [100.0%]  (fallthru,exec)
96      # PT = nonlocal null
97 
98      # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99      # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 			    .MEMD.3923_18(6)>
101      # VUSE <.MEMD.3923_11>
102      return ctxD.2601_1;
103      # SUCC: EXIT [100.0%]
104    }
105 
106    bb 3 and bb 5 can be merged.  The blocks have different predecessors, but the
107    same successors, and the same operations.
108 
109 
110    CONTEXT
111 
112    A technique called tail merging (or cross jumping) can fix the example
113    above.  For a block, we look for common code at the end (the tail) of the
114    predecessor blocks, and insert jumps from one block to the other.
115    The example is a special case for tail merging, in that 2 whole blocks
116    can be merged, rather than just the end parts of it.
117    We currently only focus on whole block merging, so in that sense
118    calling this pass tail merge is a bit of a misnomer.
119 
120    We distinguish 2 kinds of situations in which blocks can be merged:
121    - same operations, same predecessors.  The successor edges coming from one
122      block are redirected to come from the other block.
123    - same operations, same successors.  The predecessor edges entering one block
124      are redirected to enter the other block.  Note that this operation might
125      involve introducing phi operations.
126 
127    For efficient implementation, we would like to value numbers the blocks, and
128    have a comparison operator that tells us whether the blocks are equal.
129    Besides being runtime efficient, block value numbering should also abstract
130    from irrelevant differences in order of operations, much like normal value
131    numbering abstracts from irrelevant order of operations.
132 
133    For the first situation (same_operations, same predecessors), normal value
134    numbering fits well.  We can calculate a block value number based on the
135    value numbers of the defs and vdefs.
136 
137    For the second situation (same operations, same successors), this approach
138    doesn't work so well.  We can illustrate this using the example.  The calls
139    to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140    remain different in value numbering, since they represent different memory
141    states.  So the resulting vdefs of the frees will be different in value
142    numbering, so the block value numbers will be different.
143 
144    The reason why we call the blocks equal is not because they define the same
145    values, but because uses in the blocks use (possibly different) defs in the
146    same way.  To be able to detect this efficiently, we need to do some kind of
147    reverse value numbering, meaning number the uses rather than the defs, and
148    calculate a block value number based on the value number of the uses.
149    Ideally, a block comparison operator will also indicate which phis are needed
150    to merge the blocks.
151 
152    For the moment, we don't do block value numbering, but we do insn-by-insn
153    matching, using scc value numbers to match operations with results, and
154    structural comparison otherwise, while ignoring vop mismatches.
155 
156 
157    IMPLEMENTATION
158 
159    1. The pass first determines all groups of blocks with the same successor
160       blocks.
161    2. Within each group, it tries to determine clusters of equal basic blocks.
162    3. The clusters are applied.
163    4. The same successor groups are updated.
164    5. This process is repeated from 2 onwards, until no more changes.
165 
166 
167    LIMITATIONS/TODO
168 
169    - block only
170    - handles only 'same operations, same successors'.
171      It handles same predecessors as a special subcase though.
172    - does not implement the reverse value numbering and block value numbering.
173    - improve memory allocation: use garbage collected memory, obstacks,
174      allocpools where appropriate.
175    - no insertion of gimple_reg phis,  We only introduce vop-phis.
176    - handle blocks with gimple_reg phi_nodes.
177 
178 
179    PASS PLACEMENT
180    This 'pass' is not a stand-alone gimple pass, but runs as part of
181    pass_pre, in order to share the value numbering.
182 
183 
184    SWITCHES
185 
186    - ftree-tail-merge.  On at -O2.  We may have to enable it only at -Os.  */
187 
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 
209 /* Describes a group of bbs with the same successors.  The successor bbs are
210    cached in succs, and the successor edge flags are cached in succ_flags.
211    If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
212    it's marked in inverse.
213    Additionally, the hash value for the struct is cached in hashval, and
214    in_worklist indicates whether it's currently part of worklist.  */
215 
216 struct same_succ : pointer_hash <same_succ>
217 {
218   /* The bbs that have the same successor bbs.  */
219   bitmap bbs;
220   /* The successor bbs.  */
221   bitmap succs;
222   /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
223      bb.  */
224   bitmap inverse;
225   /* The edge flags for each of the successor bbs.  */
226   vec<int> succ_flags;
227   /* Indicates whether the struct is currently in the worklist.  */
228   bool in_worklist;
229   /* The hash value of the struct.  */
230   hashval_t hashval;
231 
232   /* hash_table support.  */
233   static inline hashval_t hash (const same_succ *);
234   static int equal (const same_succ *, const same_succ *);
235   static void remove (same_succ *);
236 };
237 
238 /* hash routine for hash_table support, returns hashval of E.  */
239 
240 inline hashval_t
241 same_succ::hash (const same_succ *e)
242 {
243   return e->hashval;
244 }
245 
246 /* A group of bbs where 1 bb from bbs can replace the other bbs.  */
247 
248 struct bb_cluster
249 {
250   /* The bbs in the cluster.  */
251   bitmap bbs;
252   /* The preds of the bbs in the cluster.  */
253   bitmap preds;
254   /* Index in all_clusters vector.  */
255   int index;
256   /* The bb to replace the cluster with.  */
257   basic_block rep_bb;
258 };
259 
260 /* Per bb-info.  */
261 
262 struct aux_bb_info
263 {
264   /* The number of non-debug statements in the bb.  */
265   int size;
266   /* The same_succ that this bb is a member of.  */
267   same_succ *bb_same_succ;
268   /* The cluster that this bb is a member of.  */
269   bb_cluster *cluster;
270   /* The vop state at the exit of a bb.  This is shortlived data, used to
271      communicate data between update_block_by and update_vuses.  */
272   tree vop_at_exit;
273   /* The bb that either contains or is dominated by the dependencies of the
274      bb.  */
275   basic_block dep_bb;
276 };
277 
278 /* Macros to access the fields of struct aux_bb_info.  */
279 
280 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
281 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
282 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
283 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
284 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
285 
286 /* Returns true if the only effect a statement STMT has, is to define locally
287    used SSA_NAMEs.  */
288 
289 static bool
290 stmt_local_def (gimple *stmt)
291 {
292   basic_block bb, def_bb;
293   imm_use_iterator iter;
294   use_operand_p use_p;
295   tree val;
296   def_operand_p def_p;
297 
298   if (gimple_vdef (stmt) != NULL_TREE
299       || gimple_has_side_effects (stmt)
300       || gimple_could_trap_p_1 (stmt, false, false)
301       || gimple_vuse (stmt) != NULL_TREE
302       /* Copied from tree-ssa-ifcombine.c:bb_no_side_effects_p():
303 	 const calls don't match any of the above, yet they could
304 	 still have some side-effects - they could contain
305 	 gimple_could_trap_p statements, like floating point
306 	 exceptions or integer division by zero.  See PR70586.
307 	 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
308 	 should handle this.  */
309       || is_gimple_call (stmt))
310     return false;
311 
312   def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
313   if (def_p == NULL)
314     return false;
315 
316   val = DEF_FROM_PTR (def_p);
317   if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
318     return false;
319 
320   def_bb = gimple_bb (stmt);
321 
322   FOR_EACH_IMM_USE_FAST (use_p, iter, val)
323     {
324       if (is_gimple_debug (USE_STMT (use_p)))
325 	continue;
326       bb = gimple_bb (USE_STMT (use_p));
327       if (bb == def_bb)
328 	continue;
329 
330       if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
331 	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
332 	continue;
333 
334       return false;
335     }
336 
337   return true;
338 }
339 
340 /* Let GSI skip forwards over local defs.  */
341 
342 static void
343 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
344 {
345   gimple *stmt;
346 
347   while (true)
348     {
349       if (gsi_end_p (*gsi))
350 	return;
351       stmt = gsi_stmt (*gsi);
352       if (!stmt_local_def (stmt))
353 	return;
354       gsi_next_nondebug (gsi);
355     }
356 }
357 
358 /* VAL1 and VAL2 are either:
359    - uses in BB1 and BB2, or
360    - phi alternatives for BB1 and BB2.
361    Return true if the uses have the same gvn value.  */
362 
363 static bool
364 gvn_uses_equal (tree val1, tree val2)
365 {
366   gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
367 
368   if (val1 == val2)
369     return true;
370 
371   if (vn_valueize (val1) != vn_valueize (val2))
372     return false;
373 
374   return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
375 	  && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
376 }
377 
378 /* Prints E to FILE.  */
379 
380 static void
381 same_succ_print (FILE *file, const same_succ *e)
382 {
383   unsigned int i;
384   bitmap_print (file, e->bbs, "bbs:", "\n");
385   bitmap_print (file, e->succs, "succs:", "\n");
386   bitmap_print (file, e->inverse, "inverse:", "\n");
387   fprintf (file, "flags:");
388   for (i = 0; i < e->succ_flags.length (); ++i)
389     fprintf (file, " %x", e->succ_flags[i]);
390   fprintf (file, "\n");
391 }
392 
393 /* Prints same_succ VE to VFILE.  */
394 
395 inline int
396 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
397 {
398   const same_succ *e = *pe;
399   same_succ_print (file, e);
400   return 1;
401 }
402 
403 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB.  */
404 
405 static void
406 update_dep_bb (basic_block use_bb, tree val)
407 {
408   basic_block dep_bb;
409 
410   /* Not a dep.  */
411   if (TREE_CODE (val) != SSA_NAME)
412     return;
413 
414   /* Skip use of global def.  */
415   if (SSA_NAME_IS_DEFAULT_DEF (val))
416     return;
417 
418   /* Skip use of local def.  */
419   dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
420   if (dep_bb == use_bb)
421     return;
422 
423   if (BB_DEP_BB (use_bb) == NULL
424       || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
425     BB_DEP_BB (use_bb) = dep_bb;
426 }
427 
428 /* Update BB_DEP_BB, given the dependencies in STMT.  */
429 
430 static void
431 stmt_update_dep_bb (gimple *stmt)
432 {
433   ssa_op_iter iter;
434   use_operand_p use;
435 
436   FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
437     update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
438 }
439 
440 /* Calculates hash value for same_succ VE.  */
441 
442 static hashval_t
443 same_succ_hash (const same_succ *e)
444 {
445   inchash::hash hstate (bitmap_hash (e->succs));
446   int flags;
447   unsigned int i;
448   unsigned int first = bitmap_first_set_bit (e->bbs);
449   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
450   int size = 0;
451   gimple *stmt;
452   tree arg;
453   unsigned int s;
454   bitmap_iterator bs;
455 
456   for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
457        !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
458     {
459       stmt = gsi_stmt (gsi);
460       stmt_update_dep_bb (stmt);
461       if (stmt_local_def (stmt))
462 	continue;
463       size++;
464 
465       hstate.add_int (gimple_code (stmt));
466       if (is_gimple_assign (stmt))
467 	hstate.add_int (gimple_assign_rhs_code (stmt));
468       if (!is_gimple_call (stmt))
469 	continue;
470       if (gimple_call_internal_p (stmt))
471 	hstate.add_int (gimple_call_internal_fn (stmt));
472       else
473 	{
474 	  inchash::add_expr (gimple_call_fn (stmt), hstate);
475 	  if (gimple_call_chain (stmt))
476 	    inchash::add_expr (gimple_call_chain (stmt), hstate);
477 	}
478       for (i = 0; i < gimple_call_num_args (stmt); i++)
479 	{
480 	  arg = gimple_call_arg (stmt, i);
481 	  arg = vn_valueize (arg);
482 	  inchash::add_expr (arg, hstate);
483 	}
484     }
485 
486   hstate.add_int (size);
487   BB_SIZE (bb) = size;
488 
489   for (i = 0; i < e->succ_flags.length (); ++i)
490     {
491       flags = e->succ_flags[i];
492       flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
493       hstate.add_int (flags);
494     }
495 
496   EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
497     {
498       int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
499       for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
500 	   !gsi_end_p (gsi);
501 	   gsi_next (&gsi))
502 	{
503 	  gphi *phi = gsi.phi ();
504 	  tree lhs = gimple_phi_result (phi);
505 	  tree val = gimple_phi_arg_def (phi, n);
506 
507 	  if (virtual_operand_p (lhs))
508 	    continue;
509 	  update_dep_bb (bb, val);
510 	}
511     }
512 
513   return hstate.end ();
514 }
515 
516 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
517    are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
518    the other edge flags.  */
519 
520 static bool
521 inverse_flags (const same_succ *e1, const same_succ *e2)
522 {
523   int f1a, f1b, f2a, f2b;
524   int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
525 
526   if (e1->succ_flags.length () != 2)
527     return false;
528 
529   f1a = e1->succ_flags[0];
530   f1b = e1->succ_flags[1];
531   f2a = e2->succ_flags[0];
532   f2b = e2->succ_flags[1];
533 
534   if (f1a == f2a && f1b == f2b)
535     return false;
536 
537   return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
538 }
539 
540 /* Compares SAME_SUCCs E1 and E2.  */
541 
542 int
543 same_succ::equal (const same_succ *e1, const same_succ *e2)
544 {
545   unsigned int i, first1, first2;
546   gimple_stmt_iterator gsi1, gsi2;
547   gimple *s1, *s2;
548   basic_block bb1, bb2;
549 
550   if (e1 == e2)
551     return 1;
552 
553   if (e1->hashval != e2->hashval)
554     return 0;
555 
556   if (e1->succ_flags.length () != e2->succ_flags.length ())
557     return 0;
558 
559   if (!bitmap_equal_p (e1->succs, e2->succs))
560     return 0;
561 
562   if (!inverse_flags (e1, e2))
563     {
564       for (i = 0; i < e1->succ_flags.length (); ++i)
565 	if (e1->succ_flags[i] != e2->succ_flags[i])
566 	  return 0;
567     }
568 
569   first1 = bitmap_first_set_bit (e1->bbs);
570   first2 = bitmap_first_set_bit (e2->bbs);
571 
572   bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
573   bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
574 
575   if (BB_SIZE (bb1) != BB_SIZE (bb2))
576     return 0;
577 
578   gsi1 = gsi_start_nondebug_bb (bb1);
579   gsi2 = gsi_start_nondebug_bb (bb2);
580   gsi_advance_fw_nondebug_nonlocal (&gsi1);
581   gsi_advance_fw_nondebug_nonlocal (&gsi2);
582   while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
583     {
584       s1 = gsi_stmt (gsi1);
585       s2 = gsi_stmt (gsi2);
586       if (gimple_code (s1) != gimple_code (s2))
587 	return 0;
588       if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
589 	return 0;
590       gsi_next_nondebug (&gsi1);
591       gsi_next_nondebug (&gsi2);
592       gsi_advance_fw_nondebug_nonlocal (&gsi1);
593       gsi_advance_fw_nondebug_nonlocal (&gsi2);
594     }
595 
596   return 1;
597 }
598 
599 /* Alloc and init a new SAME_SUCC.  */
600 
601 static same_succ *
602 same_succ_alloc (void)
603 {
604   same_succ *same = XNEW (struct same_succ);
605 
606   same->bbs = BITMAP_ALLOC (NULL);
607   same->succs = BITMAP_ALLOC (NULL);
608   same->inverse = BITMAP_ALLOC (NULL);
609   same->succ_flags.create (10);
610   same->in_worklist = false;
611 
612   return same;
613 }
614 
615 /* Delete same_succ E.  */
616 
617 void
618 same_succ::remove (same_succ *e)
619 {
620   BITMAP_FREE (e->bbs);
621   BITMAP_FREE (e->succs);
622   BITMAP_FREE (e->inverse);
623   e->succ_flags.release ();
624 
625   XDELETE (e);
626 }
627 
628 /* Reset same_succ SAME.  */
629 
630 static void
631 same_succ_reset (same_succ *same)
632 {
633   bitmap_clear (same->bbs);
634   bitmap_clear (same->succs);
635   bitmap_clear (same->inverse);
636   same->succ_flags.truncate (0);
637 }
638 
639 static hash_table<same_succ> *same_succ_htab;
640 
641 /* Array that is used to store the edge flags for a successor.  */
642 
643 static int *same_succ_edge_flags;
644 
645 /* Bitmap that is used to mark bbs that are recently deleted.  */
646 
647 static bitmap deleted_bbs;
648 
649 /* Bitmap that is used to mark predecessors of bbs that are
650    deleted.  */
651 
652 static bitmap deleted_bb_preds;
653 
654 /* Prints same_succ_htab to stderr.  */
655 
656 extern void debug_same_succ (void);
657 DEBUG_FUNCTION void
658 debug_same_succ ( void)
659 {
660   same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
661 }
662 
663 
664 /* Vector of bbs to process.  */
665 
666 static vec<same_succ *> worklist;
667 
668 /* Prints worklist to FILE.  */
669 
670 static void
671 print_worklist (FILE *file)
672 {
673   unsigned int i;
674   for (i = 0; i < worklist.length (); ++i)
675     same_succ_print (file, worklist[i]);
676 }
677 
678 /* Adds SAME to worklist.  */
679 
680 static void
681 add_to_worklist (same_succ *same)
682 {
683   if (same->in_worklist)
684     return;
685 
686   if (bitmap_count_bits (same->bbs) < 2)
687     return;
688 
689   same->in_worklist = true;
690   worklist.safe_push (same);
691 }
692 
693 /* Add BB to same_succ_htab.  */
694 
695 static void
696 find_same_succ_bb (basic_block bb, same_succ **same_p)
697 {
698   unsigned int j;
699   bitmap_iterator bj;
700   same_succ *same = *same_p;
701   same_succ **slot;
702   edge_iterator ei;
703   edge e;
704 
705   if (bb == NULL
706       /* Be conservative with loop structure.  It's not evident that this test
707 	 is sufficient.  Before tail-merge, we've just called
708 	 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
709 	 set, so there's no guarantee that the loop->latch value is still valid.
710 	 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
711 	 start of pre, we've kept that property intact throughout pre, and are
712 	 keeping it throughout tail-merge using this test.  */
713       || bb->loop_father->latch == bb)
714     return;
715   bitmap_set_bit (same->bbs, bb->index);
716   FOR_EACH_EDGE (e, ei, bb->succs)
717     {
718       int index = e->dest->index;
719       bitmap_set_bit (same->succs, index);
720       same_succ_edge_flags[index] = e->flags;
721     }
722   EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
723     same->succ_flags.safe_push (same_succ_edge_flags[j]);
724 
725   same->hashval = same_succ_hash (same);
726 
727   slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
728   if (*slot == NULL)
729     {
730       *slot = same;
731       BB_SAME_SUCC (bb) = same;
732       add_to_worklist (same);
733       *same_p = NULL;
734     }
735   else
736     {
737       bitmap_set_bit ((*slot)->bbs, bb->index);
738       BB_SAME_SUCC (bb) = *slot;
739       add_to_worklist (*slot);
740       if (inverse_flags (same, *slot))
741 	bitmap_set_bit ((*slot)->inverse, bb->index);
742       same_succ_reset (same);
743     }
744 }
745 
746 /* Find bbs with same successors.  */
747 
748 static void
749 find_same_succ (void)
750 {
751   same_succ *same = same_succ_alloc ();
752   basic_block bb;
753 
754   FOR_EACH_BB_FN (bb, cfun)
755     {
756       find_same_succ_bb (bb, &same);
757       if (same == NULL)
758 	same = same_succ_alloc ();
759     }
760 
761   same_succ::remove (same);
762 }
763 
764 /* Initializes worklist administration.  */
765 
766 static void
767 init_worklist (void)
768 {
769   alloc_aux_for_blocks (sizeof (struct aux_bb_info));
770   same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
771   same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
772   deleted_bbs = BITMAP_ALLOC (NULL);
773   deleted_bb_preds = BITMAP_ALLOC (NULL);
774   worklist.create (n_basic_blocks_for_fn (cfun));
775   find_same_succ ();
776 
777   if (dump_file && (dump_flags & TDF_DETAILS))
778     {
779       fprintf (dump_file, "initial worklist:\n");
780       print_worklist (dump_file);
781     }
782 }
783 
784 /* Deletes worklist administration.  */
785 
786 static void
787 delete_worklist (void)
788 {
789   free_aux_for_blocks ();
790   delete same_succ_htab;
791   same_succ_htab = NULL;
792   XDELETEVEC (same_succ_edge_flags);
793   same_succ_edge_flags = NULL;
794   BITMAP_FREE (deleted_bbs);
795   BITMAP_FREE (deleted_bb_preds);
796   worklist.release ();
797 }
798 
799 /* Mark BB as deleted, and mark its predecessors.  */
800 
801 static void
802 mark_basic_block_deleted (basic_block bb)
803 {
804   edge e;
805   edge_iterator ei;
806 
807   bitmap_set_bit (deleted_bbs, bb->index);
808 
809   FOR_EACH_EDGE (e, ei, bb->preds)
810     bitmap_set_bit (deleted_bb_preds, e->src->index);
811 }
812 
813 /* Removes BB from its corresponding same_succ.  */
814 
815 static void
816 same_succ_flush_bb (basic_block bb)
817 {
818   same_succ *same = BB_SAME_SUCC (bb);
819   if (! same)
820     return;
821 
822   BB_SAME_SUCC (bb) = NULL;
823   if (bitmap_single_bit_set_p (same->bbs))
824     same_succ_htab->remove_elt_with_hash (same, same->hashval);
825   else
826     bitmap_clear_bit (same->bbs, bb->index);
827 }
828 
829 /* Removes all bbs in BBS from their corresponding same_succ.  */
830 
831 static void
832 same_succ_flush_bbs (bitmap bbs)
833 {
834   unsigned int i;
835   bitmap_iterator bi;
836 
837   EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
838     same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
839 }
840 
841 /* Release the last vdef in BB, either normal or phi result.  */
842 
843 static void
844 release_last_vdef (basic_block bb)
845 {
846   for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
847        gsi_prev_nondebug (&i))
848     {
849       gimple *stmt = gsi_stmt (i);
850       if (gimple_vdef (stmt) == NULL_TREE)
851 	continue;
852 
853       mark_virtual_operand_for_renaming (gimple_vdef (stmt));
854       return;
855     }
856 
857   for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
858        gsi_next (&i))
859     {
860       gphi *phi = i.phi ();
861       tree res = gimple_phi_result (phi);
862 
863       if (!virtual_operand_p (res))
864 	continue;
865 
866       mark_virtual_phi_result_for_renaming (phi);
867       return;
868     }
869 }
870 
871 /* For deleted_bb_preds, find bbs with same successors.  */
872 
873 static void
874 update_worklist (void)
875 {
876   unsigned int i;
877   bitmap_iterator bi;
878   basic_block bb;
879   same_succ *same;
880 
881   bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
882   bitmap_clear (deleted_bbs);
883 
884   bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
885   same_succ_flush_bbs (deleted_bb_preds);
886 
887   same = same_succ_alloc ();
888   EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
889     {
890       bb = BASIC_BLOCK_FOR_FN (cfun, i);
891       gcc_assert (bb != NULL);
892       find_same_succ_bb (bb, &same);
893       if (same == NULL)
894 	same = same_succ_alloc ();
895     }
896   same_succ::remove (same);
897   bitmap_clear (deleted_bb_preds);
898 }
899 
900 /* Prints cluster C to FILE.  */
901 
902 static void
903 print_cluster (FILE *file, bb_cluster *c)
904 {
905   if (c == NULL)
906     return;
907   bitmap_print (file, c->bbs, "bbs:", "\n");
908   bitmap_print (file, c->preds, "preds:", "\n");
909 }
910 
911 /* Prints cluster C to stderr.  */
912 
913 extern void debug_cluster (bb_cluster *);
914 DEBUG_FUNCTION void
915 debug_cluster (bb_cluster *c)
916 {
917   print_cluster (stderr, c);
918 }
919 
920 /* Update C->rep_bb, given that BB is added to the cluster.  */
921 
922 static void
923 update_rep_bb (bb_cluster *c, basic_block bb)
924 {
925   /* Initial.  */
926   if (c->rep_bb == NULL)
927     {
928       c->rep_bb = bb;
929       return;
930     }
931 
932   /* Current needs no deps, keep it.  */
933   if (BB_DEP_BB (c->rep_bb) == NULL)
934     return;
935 
936   /* Bb needs no deps, change rep_bb.  */
937   if (BB_DEP_BB (bb) == NULL)
938     {
939       c->rep_bb = bb;
940       return;
941     }
942 
943   /* Bb needs last deps earlier than current, change rep_bb.  A potential
944      problem with this, is that the first deps might also be earlier, which
945      would mean we prefer longer lifetimes for the deps.  To be able to check
946      for this, we would have to trace BB_FIRST_DEP_BB as well, besides
947      BB_DEP_BB, which is really BB_LAST_DEP_BB.
948      The benefit of choosing the bb with last deps earlier, is that it can
949      potentially be used as replacement for more bbs.  */
950   if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
951     c->rep_bb = bb;
952 }
953 
954 /* Add BB to cluster C.  Sets BB in C->bbs, and preds of BB in C->preds.  */
955 
956 static void
957 add_bb_to_cluster (bb_cluster *c, basic_block bb)
958 {
959   edge e;
960   edge_iterator ei;
961 
962   bitmap_set_bit (c->bbs, bb->index);
963 
964   FOR_EACH_EDGE (e, ei, bb->preds)
965     bitmap_set_bit (c->preds, e->src->index);
966 
967   update_rep_bb (c, bb);
968 }
969 
970 /* Allocate and init new cluster.  */
971 
972 static bb_cluster *
973 new_cluster (void)
974 {
975   bb_cluster *c;
976   c = XCNEW (bb_cluster);
977   c->bbs = BITMAP_ALLOC (NULL);
978   c->preds = BITMAP_ALLOC (NULL);
979   c->rep_bb = NULL;
980   return c;
981 }
982 
983 /* Delete clusters.  */
984 
985 static void
986 delete_cluster (bb_cluster *c)
987 {
988   if (c == NULL)
989     return;
990   BITMAP_FREE (c->bbs);
991   BITMAP_FREE (c->preds);
992   XDELETE (c);
993 }
994 
995 
996 /* Array that contains all clusters.  */
997 
998 static vec<bb_cluster *> all_clusters;
999 
1000 /* Allocate all cluster vectors.  */
1001 
1002 static void
1003 alloc_cluster_vectors (void)
1004 {
1005   all_clusters.create (n_basic_blocks_for_fn (cfun));
1006 }
1007 
1008 /* Reset all cluster vectors.  */
1009 
1010 static void
1011 reset_cluster_vectors (void)
1012 {
1013   unsigned int i;
1014   basic_block bb;
1015   for (i = 0; i < all_clusters.length (); ++i)
1016     delete_cluster (all_clusters[i]);
1017   all_clusters.truncate (0);
1018   FOR_EACH_BB_FN (bb, cfun)
1019     BB_CLUSTER (bb) = NULL;
1020 }
1021 
1022 /* Delete all cluster vectors.  */
1023 
1024 static void
1025 delete_cluster_vectors (void)
1026 {
1027   unsigned int i;
1028   for (i = 0; i < all_clusters.length (); ++i)
1029     delete_cluster (all_clusters[i]);
1030   all_clusters.release ();
1031 }
1032 
1033 /* Merge cluster C2 into C1.  */
1034 
1035 static void
1036 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1037 {
1038   bitmap_ior_into (c1->bbs, c2->bbs);
1039   bitmap_ior_into (c1->preds, c2->preds);
1040 }
1041 
1042 /* Register equivalence of BB1 and BB2 (members of cluster C).  Store c in
1043    all_clusters, or merge c with existing cluster.  */
1044 
1045 static void
1046 set_cluster (basic_block bb1, basic_block bb2)
1047 {
1048   basic_block merge_bb, other_bb;
1049   bb_cluster *merge, *old, *c;
1050 
1051   if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1052     {
1053       c = new_cluster ();
1054       add_bb_to_cluster (c, bb1);
1055       add_bb_to_cluster (c, bb2);
1056       BB_CLUSTER (bb1) = c;
1057       BB_CLUSTER (bb2) = c;
1058       c->index = all_clusters.length ();
1059       all_clusters.safe_push (c);
1060     }
1061   else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1062     {
1063       merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1064       other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1065       merge = BB_CLUSTER (merge_bb);
1066       add_bb_to_cluster (merge, other_bb);
1067       BB_CLUSTER (other_bb) = merge;
1068     }
1069   else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1070     {
1071       unsigned int i;
1072       bitmap_iterator bi;
1073 
1074       old = BB_CLUSTER (bb2);
1075       merge = BB_CLUSTER (bb1);
1076       merge_clusters (merge, old);
1077       EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1078 	BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1079       all_clusters[old->index] = NULL;
1080       update_rep_bb (merge, old->rep_bb);
1081       delete_cluster (old);
1082     }
1083   else
1084     gcc_unreachable ();
1085 }
1086 
1087 /* Return true if gimple operands T1 and T2 have the same value.  */
1088 
1089 static bool
1090 gimple_operand_equal_value_p (tree t1, tree t2)
1091 {
1092   if (t1 == t2)
1093     return true;
1094 
1095   if (t1 == NULL_TREE
1096       || t2 == NULL_TREE)
1097     return false;
1098 
1099   if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1100     return true;
1101 
1102   return gvn_uses_equal (t1, t2);
1103 }
1104 
1105 /* Return true if gimple statements S1 and S2 are equal.  Gimple_bb (s1) and
1106    gimple_bb (s2) are members of SAME_SUCC.  */
1107 
1108 static bool
1109 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1110 {
1111   unsigned int i;
1112   tree lhs1, lhs2;
1113   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1114   tree t1, t2;
1115   bool inv_cond;
1116   enum tree_code code1, code2;
1117 
1118   if (gimple_code (s1) != gimple_code (s2))
1119     return false;
1120 
1121   switch (gimple_code (s1))
1122     {
1123     case GIMPLE_CALL:
1124       if (!gimple_call_same_target_p (s1, s2))
1125 	return false;
1126 
1127       t1 = gimple_call_chain (s1);
1128       t2 = gimple_call_chain (s2);
1129       if (!gimple_operand_equal_value_p (t1, t2))
1130 	return false;
1131 
1132       if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1133 	return false;
1134 
1135       for (i = 0; i < gimple_call_num_args (s1); ++i)
1136 	{
1137 	  t1 = gimple_call_arg (s1, i);
1138 	  t2 = gimple_call_arg (s2, i);
1139 	  if (!gimple_operand_equal_value_p (t1, t2))
1140 	    return false;
1141 	}
1142 
1143       lhs1 = gimple_get_lhs (s1);
1144       lhs2 = gimple_get_lhs (s2);
1145       if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1146 	return true;
1147       if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1148 	return false;
1149       if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1150 	return vn_valueize (lhs1) == vn_valueize (lhs2);
1151       return operand_equal_p (lhs1, lhs2, 0);
1152 
1153     case GIMPLE_ASSIGN:
1154       lhs1 = gimple_get_lhs (s1);
1155       lhs2 = gimple_get_lhs (s2);
1156       if (TREE_CODE (lhs1) != SSA_NAME
1157 	  && TREE_CODE (lhs2) != SSA_NAME)
1158 	return (operand_equal_p (lhs1, lhs2, 0)
1159 		&& gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1160 						 gimple_assign_rhs1 (s2)));
1161       else if (TREE_CODE (lhs1) == SSA_NAME
1162 	       && TREE_CODE (lhs2) == SSA_NAME)
1163 	return operand_equal_p (gimple_assign_rhs1 (s1),
1164 				gimple_assign_rhs1 (s2), 0);
1165       return false;
1166 
1167     case GIMPLE_COND:
1168       t1 = gimple_cond_lhs (s1);
1169       t2 = gimple_cond_lhs (s2);
1170       if (!gimple_operand_equal_value_p (t1, t2))
1171 	return false;
1172 
1173       t1 = gimple_cond_rhs (s1);
1174       t2 = gimple_cond_rhs (s2);
1175       if (!gimple_operand_equal_value_p (t1, t2))
1176 	return false;
1177 
1178       code1 = gimple_expr_code (s1);
1179       code2 = gimple_expr_code (s2);
1180       inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1181 		  != bitmap_bit_p (same_succ->inverse, bb2->index));
1182       if (inv_cond)
1183 	{
1184 	  bool honor_nans = HONOR_NANS (t1);
1185 	  code2 = invert_tree_comparison (code2, honor_nans);
1186 	}
1187       return code1 == code2;
1188 
1189     default:
1190       return false;
1191     }
1192 }
1193 
1194 /* Let GSI skip backwards over local defs.  Return the earliest vuse in VUSE.
1195    Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1196    processed statements.  */
1197 
1198 static void
1199 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1200 				  bool *vuse_escaped)
1201 {
1202   gimple *stmt;
1203   tree lvuse;
1204 
1205   while (true)
1206     {
1207       if (gsi_end_p (*gsi))
1208 	return;
1209       stmt = gsi_stmt (*gsi);
1210 
1211       lvuse = gimple_vuse (stmt);
1212       if (lvuse != NULL_TREE)
1213 	{
1214 	  *vuse = lvuse;
1215 	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1216 	    *vuse_escaped = true;
1217 	}
1218 
1219       if (!stmt_local_def (stmt))
1220 	return;
1221       gsi_prev_nondebug (gsi);
1222     }
1223 }
1224 
1225 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1226    STMT2 are allowed to be merged.  */
1227 
1228 static bool
1229 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1230 {
1231   /* What could be better than this here is to blacklist the bb
1232      containing the stmt, when encountering the stmt f.i. in
1233      same_succ_hash.  */
1234   if (is_tm_ending (stmt1))
1235     return false;
1236 
1237   /* Verify EH landing pads.  */
1238   if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1239     return false;
1240 
1241   if (is_gimple_call (stmt1)
1242       && gimple_call_internal_p (stmt1))
1243     switch (gimple_call_internal_fn (stmt1))
1244       {
1245       case IFN_UBSAN_NULL:
1246       case IFN_UBSAN_BOUNDS:
1247       case IFN_UBSAN_VPTR:
1248       case IFN_UBSAN_CHECK_ADD:
1249       case IFN_UBSAN_CHECK_SUB:
1250       case IFN_UBSAN_CHECK_MUL:
1251       case IFN_UBSAN_OBJECT_SIZE:
1252       case IFN_ASAN_CHECK:
1253 	/* For these internal functions, gimple_location is an implicit
1254 	   parameter, which will be used explicitly after expansion.
1255 	   Merging these statements may cause confusing line numbers in
1256 	   sanitizer messages.  */
1257 	return gimple_location (stmt1) == gimple_location (stmt2);
1258       default:
1259 	break;
1260       }
1261 
1262   return true;
1263 }
1264 
1265 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates.  If so,
1266    clusters them.  */
1267 
1268 static void
1269 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1270 {
1271   gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1272   gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1273   tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1274   bool vuse_escaped = false;
1275 
1276   gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1277   gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1278 
1279   while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1280     {
1281       gimple *stmt1 = gsi_stmt (gsi1);
1282       gimple *stmt2 = gsi_stmt (gsi2);
1283 
1284       if (gimple_code (stmt1) == GIMPLE_LABEL
1285 	  && gimple_code (stmt2) == GIMPLE_LABEL)
1286 	break;
1287 
1288       if (!gimple_equal_p (same_succ, stmt1, stmt2))
1289 	return;
1290 
1291       if (!merge_stmts_p (stmt1, stmt2))
1292 	return;
1293 
1294       gsi_prev_nondebug (&gsi1);
1295       gsi_prev_nondebug (&gsi2);
1296       gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1297       gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1298     }
1299 
1300   while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1301     {
1302       tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1303       if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1304 	return;
1305       gsi_prev (&gsi1);
1306     }
1307   while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1308     {
1309       tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1310       if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1311 	return;
1312       gsi_prev (&gsi2);
1313     }
1314   if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1315     return;
1316 
1317   /* If the incoming vuses are not the same, and the vuse escaped into an
1318      SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1319      which potentially means the semantics of one of the blocks will be changed.
1320      TODO: make this check more precise.  */
1321   if (vuse_escaped && vuse1 != vuse2)
1322     return;
1323 
1324   if (dump_file)
1325     fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1326 	     bb1->index, bb2->index);
1327 
1328   set_cluster (bb1, bb2);
1329 }
1330 
1331 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1332    E2 are equal.  */
1333 
1334 static bool
1335 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1336 {
1337   int n1 = e1->dest_idx, n2 = e2->dest_idx;
1338   gphi_iterator gsi;
1339 
1340   for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1341     {
1342       gphi *phi = gsi.phi ();
1343       tree lhs = gimple_phi_result (phi);
1344       tree val1 = gimple_phi_arg_def (phi, n1);
1345       tree val2 = gimple_phi_arg_def (phi, n2);
1346 
1347       if (virtual_operand_p (lhs))
1348 	continue;
1349 
1350       if (operand_equal_for_phi_arg_p (val1, val2))
1351 	continue;
1352       if (gvn_uses_equal (val1, val2))
1353 	continue;
1354 
1355       return false;
1356     }
1357 
1358   return true;
1359 }
1360 
1361 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1362    phi alternatives for BB1 and BB2 are equal.  */
1363 
1364 static bool
1365 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1366 {
1367   unsigned int s;
1368   bitmap_iterator bs;
1369   edge e1, e2;
1370   basic_block succ;
1371 
1372   EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1373     {
1374       succ = BASIC_BLOCK_FOR_FN (cfun, s);
1375       e1 = find_edge (bb1, succ);
1376       e2 = find_edge (bb2, succ);
1377       if (e1->flags & EDGE_COMPLEX
1378 	  || e2->flags & EDGE_COMPLEX)
1379 	return false;
1380 
1381       /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1382 	 the same value.  */
1383       if (!same_phi_alternatives_1 (succ, e1, e2))
1384 	return false;
1385     }
1386 
1387   return true;
1388 }
1389 
1390 /* Return true if BB has non-vop phis.  */
1391 
1392 static bool
1393 bb_has_non_vop_phi (basic_block bb)
1394 {
1395   gimple_seq phis = phi_nodes (bb);
1396   gimple *phi;
1397 
1398   if (phis == NULL)
1399     return false;
1400 
1401   if (!gimple_seq_singleton_p (phis))
1402     return true;
1403 
1404   phi = gimple_seq_first_stmt (phis);
1405   return !virtual_operand_p (gimple_phi_result (phi));
1406 }
1407 
1408 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1409    invariant that uses in FROM are dominates by their defs.  */
1410 
1411 static bool
1412 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1413 {
1414   basic_block cd, dep_bb = BB_DEP_BB (to);
1415   edge_iterator ei;
1416   edge e;
1417 
1418   if (dep_bb == NULL)
1419     return true;
1420 
1421   bitmap from_preds = BITMAP_ALLOC (NULL);
1422   FOR_EACH_EDGE (e, ei, from->preds)
1423     bitmap_set_bit (from_preds, e->src->index);
1424   cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1425   BITMAP_FREE (from_preds);
1426 
1427   return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1428 }
1429 
1430 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1431    replacement bb) and vice versa maintains the invariant that uses in the
1432    replacement are dominates by their defs.  */
1433 
1434 static bool
1435 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1436 {
1437   if (BB_CLUSTER (bb1) != NULL)
1438     bb1 = BB_CLUSTER (bb1)->rep_bb;
1439 
1440   if (BB_CLUSTER (bb2) != NULL)
1441     bb2 = BB_CLUSTER (bb2)->rep_bb;
1442 
1443   return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1444 	  && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1445 }
1446 
1447 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged.  */
1448 
1449 static void
1450 find_clusters_1 (same_succ *same_succ)
1451 {
1452   basic_block bb1, bb2;
1453   unsigned int i, j;
1454   bitmap_iterator bi, bj;
1455   int nr_comparisons;
1456   int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1457 
1458   EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1459     {
1460       bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1461 
1462       /* TODO: handle blocks with phi-nodes.  We'll have to find corresponding
1463 	 phi-nodes in bb1 and bb2, with the same alternatives for the same
1464 	 preds.  */
1465       if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1)
1466 	  || bb_has_abnormal_pred (bb1))
1467 	continue;
1468 
1469       nr_comparisons = 0;
1470       EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1471 	{
1472 	  bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1473 
1474 	  if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2)
1475 	      || bb_has_abnormal_pred (bb2))
1476 	    continue;
1477 
1478 	  if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1479 	    continue;
1480 
1481 	  /* Limit quadratic behavior.  */
1482 	  nr_comparisons++;
1483 	  if (nr_comparisons > max_comparisons)
1484 	    break;
1485 
1486 	  /* This is a conservative dependency check.  We could test more
1487 	     precise for allowed replacement direction.  */
1488 	  if (!deps_ok_for_redirect (bb1, bb2))
1489 	    continue;
1490 
1491 	  if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1492 	    continue;
1493 
1494 	  find_duplicate (same_succ, bb1, bb2);
1495 	}
1496     }
1497 }
1498 
1499 /* Find clusters of bbs which can be merged.  */
1500 
1501 static void
1502 find_clusters (void)
1503 {
1504   same_succ *same;
1505 
1506   while (!worklist.is_empty ())
1507     {
1508       same = worklist.pop ();
1509       same->in_worklist = false;
1510       if (dump_file && (dump_flags & TDF_DETAILS))
1511 	{
1512 	  fprintf (dump_file, "processing worklist entry\n");
1513 	  same_succ_print (dump_file, same);
1514 	}
1515       find_clusters_1 (same);
1516     }
1517 }
1518 
1519 /* Returns the vop phi of BB, if any.  */
1520 
1521 static gphi *
1522 vop_phi (basic_block bb)
1523 {
1524   gphi *stmt;
1525   gphi_iterator gsi;
1526   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1527     {
1528       stmt = gsi.phi ();
1529       if (! virtual_operand_p (gimple_phi_result (stmt)))
1530 	continue;
1531       return stmt;
1532     }
1533   return NULL;
1534 }
1535 
1536 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed.  */
1537 
1538 static void
1539 replace_block_by (basic_block bb1, basic_block bb2)
1540 {
1541   edge pred_edge;
1542   edge e1, e2;
1543   edge_iterator ei;
1544   unsigned int i;
1545   gphi *bb2_phi;
1546 
1547   bb2_phi = vop_phi (bb2);
1548 
1549   /* Mark the basic block as deleted.  */
1550   mark_basic_block_deleted (bb1);
1551 
1552   /* Redirect the incoming edges of bb1 to bb2.  */
1553   for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1554     {
1555       pred_edge = EDGE_PRED (bb1, i - 1);
1556       pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1557       gcc_assert (pred_edge != NULL);
1558 
1559       if (bb2_phi == NULL)
1560 	continue;
1561 
1562       /* The phi might have run out of capacity when the redirect added an
1563 	 argument, which means it could have been replaced.  Refresh it.  */
1564       bb2_phi = vop_phi (bb2);
1565 
1566       add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1567 		   pred_edge, UNKNOWN_LOCATION);
1568     }
1569 
1570   bb2->frequency += bb1->frequency;
1571   if (bb2->frequency > BB_FREQ_MAX)
1572     bb2->frequency = BB_FREQ_MAX;
1573 
1574   bb2->count += bb1->count;
1575 
1576   /* Merge the outgoing edge counts from bb1 onto bb2.  */
1577   gcov_type out_sum = 0;
1578   FOR_EACH_EDGE (e1, ei, bb1->succs)
1579     {
1580       e2 = find_edge (bb2, e1->dest);
1581       gcc_assert (e2);
1582       e2->count += e1->count;
1583       out_sum += e2->count;
1584     }
1585   /* Recompute the edge probabilities from the new merged edge count.
1586      Use the sum of the new merged edge counts computed above instead
1587      of bb2's merged count, in case there are profile count insanities
1588      making the bb count inconsistent with the edge weights.  */
1589   FOR_EACH_EDGE (e2, ei, bb2->succs)
1590     {
1591       e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1592     }
1593 
1594   /* Move over any user labels from bb1 after the bb2 labels.  */
1595   gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1596   if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1597     {
1598       gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1599       while (!gsi_end_p (gsi1)
1600 	     && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1601 	{
1602 	  tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1603 	  gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1604 	  if (DECL_ARTIFICIAL (label))
1605 	    gsi_next (&gsi1);
1606 	  else
1607 	    gsi_move_before (&gsi1, &gsi2);
1608 	}
1609     }
1610 
1611   /* Clear range info from all stmts in BB2 -- this transformation
1612      could make them out of date.  */
1613   reset_flow_sensitive_info_in_bb (bb2);
1614 
1615   /* Do updates that use bb1, before deleting bb1.  */
1616   release_last_vdef (bb1);
1617   same_succ_flush_bb (bb1);
1618 
1619   delete_basic_block (bb1);
1620 }
1621 
1622 /* Bbs for which update_debug_stmt need to be called.  */
1623 
1624 static bitmap update_bbs;
1625 
1626 /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
1627    number of bbs removed.  */
1628 
1629 static int
1630 apply_clusters (void)
1631 {
1632   basic_block bb1, bb2;
1633   bb_cluster *c;
1634   unsigned int i, j;
1635   bitmap_iterator bj;
1636   int nr_bbs_removed = 0;
1637 
1638   for (i = 0; i < all_clusters.length (); ++i)
1639     {
1640       c = all_clusters[i];
1641       if (c == NULL)
1642 	continue;
1643 
1644       bb2 = c->rep_bb;
1645       bitmap_set_bit (update_bbs, bb2->index);
1646 
1647       bitmap_clear_bit (c->bbs, bb2->index);
1648       EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1649 	{
1650 	  bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1651 	  bitmap_clear_bit (update_bbs, bb1->index);
1652 
1653 	  replace_block_by (bb1, bb2);
1654 	  nr_bbs_removed++;
1655 	}
1656     }
1657 
1658   return nr_bbs_removed;
1659 }
1660 
1661 /* Resets debug statement STMT if it has uses that are not dominated by their
1662    defs.  */
1663 
1664 static void
1665 update_debug_stmt (gimple *stmt)
1666 {
1667   use_operand_p use_p;
1668   ssa_op_iter oi;
1669   basic_block bbuse;
1670 
1671   if (!gimple_debug_bind_p (stmt))
1672     return;
1673 
1674   bbuse = gimple_bb (stmt);
1675   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1676     {
1677       tree name = USE_FROM_PTR (use_p);
1678       gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1679       basic_block bbdef = gimple_bb (def_stmt);
1680       if (bbdef == NULL || bbuse == bbdef
1681 	  || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1682 	continue;
1683 
1684       gimple_debug_bind_reset_value (stmt);
1685       update_stmt (stmt);
1686       break;
1687     }
1688 }
1689 
1690 /* Resets all debug statements that have uses that are not
1691    dominated by their defs.  */
1692 
1693 static void
1694 update_debug_stmts (void)
1695 {
1696   basic_block bb;
1697   bitmap_iterator bi;
1698   unsigned int i;
1699 
1700   EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1701     {
1702       gimple *stmt;
1703       gimple_stmt_iterator gsi;
1704 
1705       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1706       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1707 	{
1708 	  stmt = gsi_stmt (gsi);
1709 	  if (!is_gimple_debug (stmt))
1710 	    continue;
1711 	  update_debug_stmt (stmt);
1712 	}
1713     }
1714 }
1715 
1716 /* Runs tail merge optimization.  */
1717 
1718 unsigned int
1719 tail_merge_optimize (unsigned int todo)
1720 {
1721   int nr_bbs_removed_total = 0;
1722   int nr_bbs_removed;
1723   bool loop_entered = false;
1724   int iteration_nr = 0;
1725   int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1726 
1727   if (!flag_tree_tail_merge
1728       || max_iterations == 0)
1729     return 0;
1730 
1731   timevar_push (TV_TREE_TAIL_MERGE);
1732 
1733   if (!dom_info_available_p (CDI_DOMINATORS))
1734     {
1735       /* PRE can leave us with unreachable blocks, remove them now.  */
1736       delete_unreachable_blocks ();
1737       calculate_dominance_info (CDI_DOMINATORS);
1738     }
1739   init_worklist ();
1740 
1741   while (!worklist.is_empty ())
1742     {
1743       if (!loop_entered)
1744 	{
1745 	  loop_entered = true;
1746 	  alloc_cluster_vectors ();
1747 	  update_bbs = BITMAP_ALLOC (NULL);
1748 	}
1749       else
1750 	reset_cluster_vectors ();
1751 
1752       iteration_nr++;
1753       if (dump_file && (dump_flags & TDF_DETAILS))
1754 	fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1755 
1756       find_clusters ();
1757       gcc_assert (worklist.is_empty ());
1758       if (all_clusters.is_empty ())
1759 	break;
1760 
1761       nr_bbs_removed = apply_clusters ();
1762       nr_bbs_removed_total += nr_bbs_removed;
1763       if (nr_bbs_removed == 0)
1764 	break;
1765 
1766       free_dominance_info (CDI_DOMINATORS);
1767 
1768       if (iteration_nr == max_iterations)
1769 	break;
1770 
1771       calculate_dominance_info (CDI_DOMINATORS);
1772       update_worklist ();
1773     }
1774 
1775   if (dump_file && (dump_flags & TDF_DETAILS))
1776     fprintf (dump_file, "htab collision / search: %f\n",
1777 	     same_succ_htab->collisions ());
1778 
1779   if (nr_bbs_removed_total > 0)
1780     {
1781       if (MAY_HAVE_DEBUG_STMTS)
1782 	{
1783 	  calculate_dominance_info (CDI_DOMINATORS);
1784 	  update_debug_stmts ();
1785 	}
1786 
1787       if (dump_file && (dump_flags & TDF_DETAILS))
1788 	{
1789 	  fprintf (dump_file, "Before TODOs.\n");
1790 	  dump_function_to_file (current_function_decl, dump_file, dump_flags);
1791 	}
1792 
1793       mark_virtual_operands_for_renaming (cfun);
1794     }
1795 
1796   delete_worklist ();
1797   if (loop_entered)
1798     {
1799       delete_cluster_vectors ();
1800       BITMAP_FREE (update_bbs);
1801     }
1802 
1803   timevar_pop (TV_TREE_TAIL_MERGE);
1804 
1805   return todo;
1806 }
1807