xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-tail-merge.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Tail merging for gimple.
2    Copyright (C) 2011-2022 Free Software Foundation, Inc.
3    Contributed by Tom de Vries (tom@codesourcery.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Pass overview.
22 
23 
24    MOTIVATIONAL EXAMPLE
25 
26    gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27 
28    hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29    {
30      struct FILED.1638 * fpD.2605;
31      charD.1 fileNameD.2604[1000];
32      intD.0 D.3915;
33      const charD.1 * restrict outputFileName.0D.3914;
34 
35      # BLOCK 2 freq:10000
36      # PRED: ENTRY [100.0%]  (fallthru,exec)
37      # PT = nonlocal { D.3926 } (restr)
38      outputFileName.0D.3914_3
39        = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40      # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43      sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44      # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47      D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48      if (D.3915_4 == 0)
49        goto <bb 3>;
50      else
51        goto <bb 4>;
52      # SUCC: 3 [10.0%]  (true,exec) 4 [90.0%]  (false,exec)
53 
54      # BLOCK 3 freq:1000
55      # PRED: 2 [10.0%]  (true,exec)
56      # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59      freeD.898 (ctxD.2601_5(D));
60      goto <bb 7>;
61      # SUCC: 7 [100.0%]  (fallthru,exec)
62 
63      # BLOCK 4 freq:9000
64      # PRED: 2 [90.0%]  (false,exec)
65      # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66      # PT = nonlocal escaped
67      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69      fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70      if (fpD.2605_8 == 0B)
71        goto <bb 5>;
72      else
73        goto <bb 6>;
74      # SUCC: 5 [1.9%]  (true,exec) 6 [98.1%]  (false,exec)
75 
76      # BLOCK 5 freq:173
77      # PRED: 4 [1.9%]  (true,exec)
78      # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81      freeD.898 (ctxD.2601_5(D));
82      goto <bb 7>;
83      # SUCC: 7 [100.0%]  (fallthru,exec)
84 
85      # BLOCK 6 freq:8827
86      # PRED: 4 [98.1%]  (false,exec)
87      # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90      fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91      # SUCC: 7 [100.0%]  (fallthru,exec)
92 
93      # BLOCK 7 freq:10000
94      # PRED: 3 [100.0%]  (fallthru,exec) 5 [100.0%]  (fallthru,exec)
95 	     6 [100.0%]  (fallthru,exec)
96      # PT = nonlocal null
97 
98      # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99      # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 			    .MEMD.3923_18(6)>
101      # VUSE <.MEMD.3923_11>
102      return ctxD.2601_1;
103      # SUCC: EXIT [100.0%]
104    }
105 
106    bb 3 and bb 5 can be merged.  The blocks have different predecessors, but the
107    same successors, and the same operations.
108 
109 
110    CONTEXT
111 
112    A technique called tail merging (or cross jumping) can fix the example
113    above.  For a block, we look for common code at the end (the tail) of the
114    predecessor blocks, and insert jumps from one block to the other.
115    The example is a special case for tail merging, in that 2 whole blocks
116    can be merged, rather than just the end parts of it.
117    We currently only focus on whole block merging, so in that sense
118    calling this pass tail merge is a bit of a misnomer.
119 
120    We distinguish 2 kinds of situations in which blocks can be merged:
121    - same operations, same predecessors.  The successor edges coming from one
122      block are redirected to come from the other block.
123    - same operations, same successors.  The predecessor edges entering one block
124      are redirected to enter the other block.  Note that this operation might
125      involve introducing phi operations.
126 
127    For efficient implementation, we would like to value numbers the blocks, and
128    have a comparison operator that tells us whether the blocks are equal.
129    Besides being runtime efficient, block value numbering should also abstract
130    from irrelevant differences in order of operations, much like normal value
131    numbering abstracts from irrelevant order of operations.
132 
133    For the first situation (same_operations, same predecessors), normal value
134    numbering fits well.  We can calculate a block value number based on the
135    value numbers of the defs and vdefs.
136 
137    For the second situation (same operations, same successors), this approach
138    doesn't work so well.  We can illustrate this using the example.  The calls
139    to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140    remain different in value numbering, since they represent different memory
141    states.  So the resulting vdefs of the frees will be different in value
142    numbering, so the block value numbers will be different.
143 
144    The reason why we call the blocks equal is not because they define the same
145    values, but because uses in the blocks use (possibly different) defs in the
146    same way.  To be able to detect this efficiently, we need to do some kind of
147    reverse value numbering, meaning number the uses rather than the defs, and
148    calculate a block value number based on the value number of the uses.
149    Ideally, a block comparison operator will also indicate which phis are needed
150    to merge the blocks.
151 
152    For the moment, we don't do block value numbering, but we do insn-by-insn
153    matching, using scc value numbers to match operations with results, and
154    structural comparison otherwise, while ignoring vop mismatches.
155 
156 
157    IMPLEMENTATION
158 
159    1. The pass first determines all groups of blocks with the same successor
160       blocks.
161    2. Within each group, it tries to determine clusters of equal basic blocks.
162    3. The clusters are applied.
163    4. The same successor groups are updated.
164    5. This process is repeated from 2 onwards, until no more changes.
165 
166 
167    LIMITATIONS/TODO
168 
169    - block only
170    - handles only 'same operations, same successors'.
171      It handles same predecessors as a special subcase though.
172    - does not implement the reverse value numbering and block value numbering.
173    - improve memory allocation: use garbage collected memory, obstacks,
174      allocpools where appropriate.
175    - no insertion of gimple_reg phis,  We only introduce vop-phis.
176    - handle blocks with gimple_reg phi_nodes.
177 
178 
179    PASS PLACEMENT
180    This 'pass' is not a stand-alone gimple pass, but runs as part of
181    pass_pre, in order to share the value numbering.
182 
183 
184    SWITCHES
185 
186    - ftree-tail-merge.  On at -O2.  We may have to enable it only at -Os.  */
187 
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "tree-ssa-sccvn.h"
205 #include "cfgloop.h"
206 #include "tree-eh.h"
207 #include "tree-cfgcleanup.h"
208 
209 const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
210 
211 /* Describes a group of bbs with the same successors.  The successor bbs are
212    cached in succs, and the successor edge flags are cached in succ_flags.
213    If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
214    it's marked in inverse.
215    Additionally, the hash value for the struct is cached in hashval, and
216    in_worklist indicates whether it's currently part of worklist.  */
217 
218 struct same_succ : pointer_hash <same_succ>
219 {
220   /* The bbs that have the same successor bbs.  */
221   bitmap bbs;
222   /* The successor bbs.  */
223   bitmap succs;
224   /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
225      bb.  */
226   bitmap inverse;
227   /* The edge flags for each of the successor bbs.  */
228   vec<int> succ_flags;
229   /* Indicates whether the struct is currently in the worklist.  */
230   bool in_worklist;
231   /* The hash value of the struct.  */
232   hashval_t hashval;
233 
234   /* hash_table support.  */
235   static inline hashval_t hash (const same_succ *);
236   static int equal (const same_succ *, const same_succ *);
237   static void remove (same_succ *);
238 };
239 
240 /* hash routine for hash_table support, returns hashval of E.  */
241 
242 inline hashval_t
hash(const same_succ * e)243 same_succ::hash (const same_succ *e)
244 {
245   return e->hashval;
246 }
247 
248 /* A group of bbs where 1 bb from bbs can replace the other bbs.  */
249 
250 struct bb_cluster
251 {
252   /* The bbs in the cluster.  */
253   bitmap bbs;
254   /* The preds of the bbs in the cluster.  */
255   bitmap preds;
256   /* Index in all_clusters vector.  */
257   int index;
258   /* The bb to replace the cluster with.  */
259   basic_block rep_bb;
260 };
261 
262 /* Per bb-info.  */
263 
264 struct aux_bb_info
265 {
266   /* The number of non-debug statements in the bb.  */
267   int size;
268   /* The same_succ that this bb is a member of.  */
269   same_succ *bb_same_succ;
270   /* The cluster that this bb is a member of.  */
271   bb_cluster *cluster;
272   /* The vop state at the exit of a bb.  This is shortlived data, used to
273      communicate data between update_block_by and update_vuses.  */
274   tree vop_at_exit;
275   /* The bb that either contains or is dominated by the dependencies of the
276      bb.  */
277   basic_block dep_bb;
278 };
279 
280 /* Macros to access the fields of struct aux_bb_info.  */
281 
282 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
283 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
284 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
285 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
286 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
287 
288 /* Valueization helper querying the VN lattice.  */
289 
290 static tree
tail_merge_valueize(tree name)291 tail_merge_valueize (tree name)
292 {
293   if (TREE_CODE (name) == SSA_NAME
294       && has_VN_INFO (name))
295     {
296       tree tem = VN_INFO (name)->valnum;
297       if (tem != VN_TOP)
298 	return tem;
299     }
300   return name;
301 }
302 
303 /* Returns true if the only effect a statement STMT has, is to define locally
304    used SSA_NAMEs.  */
305 
306 static bool
stmt_local_def(gimple * stmt)307 stmt_local_def (gimple *stmt)
308 {
309   basic_block bb, def_bb;
310   imm_use_iterator iter;
311   use_operand_p use_p;
312   tree val;
313   def_operand_p def_p;
314 
315   if (gimple_vdef (stmt) != NULL_TREE
316       || gimple_has_side_effects (stmt)
317       || gimple_could_trap_p_1 (stmt, false, false)
318       || gimple_vuse (stmt) != NULL_TREE
319       /* Copied from tree-ssa-ifcombine.cc:bb_no_side_effects_p():
320 	 const calls don't match any of the above, yet they could
321 	 still have some side-effects - they could contain
322 	 gimple_could_trap_p statements, like floating point
323 	 exceptions or integer division by zero.  See PR70586.
324 	 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
325 	 should handle this.  */
326       || is_gimple_call (stmt))
327     return false;
328 
329   def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
330   if (def_p == NULL)
331     return false;
332 
333   val = DEF_FROM_PTR (def_p);
334   if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
335     return false;
336 
337   def_bb = gimple_bb (stmt);
338 
339   FOR_EACH_IMM_USE_FAST (use_p, iter, val)
340     {
341       if (is_gimple_debug (USE_STMT (use_p)))
342 	continue;
343       bb = gimple_bb (USE_STMT (use_p));
344       if (bb == def_bb)
345 	continue;
346 
347       if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
348 	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
349 	continue;
350 
351       return false;
352     }
353 
354   return true;
355 }
356 
357 /* Let GSI skip forwards over local defs.  */
358 
359 static void
gsi_advance_fw_nondebug_nonlocal(gimple_stmt_iterator * gsi)360 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
361 {
362   gimple *stmt;
363 
364   while (true)
365     {
366       if (gsi_end_p (*gsi))
367 	return;
368       stmt = gsi_stmt (*gsi);
369       if (!stmt_local_def (stmt))
370 	return;
371       gsi_next_nondebug (gsi);
372     }
373 }
374 
375 /* VAL1 and VAL2 are either:
376    - uses in BB1 and BB2, or
377    - phi alternatives for BB1 and BB2.
378    Return true if the uses have the same gvn value.  */
379 
380 static bool
gvn_uses_equal(tree val1,tree val2)381 gvn_uses_equal (tree val1, tree val2)
382 {
383   gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
384 
385   if (val1 == val2)
386     return true;
387 
388   if (tail_merge_valueize (val1) != tail_merge_valueize (val2))
389     return false;
390 
391   return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
392 	  && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
393 }
394 
395 /* Prints E to FILE.  */
396 
397 static void
same_succ_print(FILE * file,const same_succ * e)398 same_succ_print (FILE *file, const same_succ *e)
399 {
400   unsigned int i;
401   bitmap_print (file, e->bbs, "bbs:", "\n");
402   bitmap_print (file, e->succs, "succs:", "\n");
403   bitmap_print (file, e->inverse, "inverse:", "\n");
404   fprintf (file, "flags:");
405   for (i = 0; i < e->succ_flags.length (); ++i)
406     fprintf (file, " %x", e->succ_flags[i]);
407   fprintf (file, "\n");
408 }
409 
410 /* Prints same_succ VE to VFILE.  */
411 
412 inline int
ssa_same_succ_print_traverse(same_succ ** pe,FILE * file)413 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
414 {
415   const same_succ *e = *pe;
416   same_succ_print (file, e);
417   return 1;
418 }
419 
420 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB.  */
421 
422 static void
update_dep_bb(basic_block use_bb,tree val)423 update_dep_bb (basic_block use_bb, tree val)
424 {
425   basic_block dep_bb;
426 
427   /* Not a dep.  */
428   if (TREE_CODE (val) != SSA_NAME)
429     return;
430 
431   /* Skip use of global def.  */
432   if (SSA_NAME_IS_DEFAULT_DEF (val))
433     return;
434 
435   /* Skip use of local def.  */
436   dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
437   if (dep_bb == use_bb)
438     return;
439 
440   if (BB_DEP_BB (use_bb) == NULL
441       || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
442     BB_DEP_BB (use_bb) = dep_bb;
443 }
444 
445 /* Update BB_DEP_BB, given the dependencies in STMT.  */
446 
447 static void
stmt_update_dep_bb(gimple * stmt)448 stmt_update_dep_bb (gimple *stmt)
449 {
450   ssa_op_iter iter;
451   use_operand_p use;
452 
453   FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
454     update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
455 }
456 
457 /* Calculates hash value for same_succ VE.  */
458 
459 static hashval_t
same_succ_hash(const same_succ * e)460 same_succ_hash (const same_succ *e)
461 {
462   inchash::hash hstate (bitmap_hash (e->succs));
463   int flags;
464   unsigned int i;
465   unsigned int first = bitmap_first_set_bit (e->bbs);
466   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
467   int size = 0;
468   gimple *stmt;
469   tree arg;
470   unsigned int s;
471   bitmap_iterator bs;
472 
473   for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
474        !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
475     {
476       stmt = gsi_stmt (gsi);
477       stmt_update_dep_bb (stmt);
478       if (stmt_local_def (stmt))
479 	continue;
480       size++;
481 
482       hstate.add_int (gimple_code (stmt));
483       if (is_gimple_assign (stmt))
484 	hstate.add_int (gimple_assign_rhs_code (stmt));
485       if (!is_gimple_call (stmt))
486 	continue;
487       if (gimple_call_internal_p (stmt))
488 	hstate.add_int (gimple_call_internal_fn (stmt));
489       else
490 	{
491 	  inchash::add_expr (gimple_call_fn (stmt), hstate);
492 	  if (gimple_call_chain (stmt))
493 	    inchash::add_expr (gimple_call_chain (stmt), hstate);
494 	}
495       for (i = 0; i < gimple_call_num_args (stmt); i++)
496 	{
497 	  arg = gimple_call_arg (stmt, i);
498 	  arg = tail_merge_valueize (arg);
499 	  inchash::add_expr (arg, hstate);
500 	}
501     }
502 
503   hstate.add_int (size);
504   BB_SIZE (bb) = size;
505 
506   hstate.add_int (bb->loop_father->num);
507 
508   for (i = 0; i < e->succ_flags.length (); ++i)
509     {
510       flags = e->succ_flags[i];
511       flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
512       hstate.add_int (flags);
513     }
514 
515   EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
516     {
517       int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
518       for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
519 	   !gsi_end_p (gsi);
520 	   gsi_next (&gsi))
521 	{
522 	  gphi *phi = gsi.phi ();
523 	  tree lhs = gimple_phi_result (phi);
524 	  tree val = gimple_phi_arg_def (phi, n);
525 
526 	  if (virtual_operand_p (lhs))
527 	    continue;
528 	  update_dep_bb (bb, val);
529 	}
530     }
531 
532   return hstate.end ();
533 }
534 
535 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
536    are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
537    the other edge flags.  */
538 
539 static bool
inverse_flags(const same_succ * e1,const same_succ * e2)540 inverse_flags (const same_succ *e1, const same_succ *e2)
541 {
542   int f1a, f1b, f2a, f2b;
543   int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
544 
545   if (e1->succ_flags.length () != 2)
546     return false;
547 
548   f1a = e1->succ_flags[0];
549   f1b = e1->succ_flags[1];
550   f2a = e2->succ_flags[0];
551   f2b = e2->succ_flags[1];
552 
553   if (f1a == f2a && f1b == f2b)
554     return false;
555 
556   return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
557 }
558 
559 /* Compares SAME_SUCCs E1 and E2.  */
560 
561 int
equal(const same_succ * e1,const same_succ * e2)562 same_succ::equal (const same_succ *e1, const same_succ *e2)
563 {
564   unsigned int i, first1, first2;
565   gimple_stmt_iterator gsi1, gsi2;
566   gimple *s1, *s2;
567   basic_block bb1, bb2;
568 
569   if (e1 == e2)
570     return 1;
571 
572   if (e1->hashval != e2->hashval)
573     return 0;
574 
575   if (e1->succ_flags.length () != e2->succ_flags.length ())
576     return 0;
577 
578   if (!bitmap_equal_p (e1->succs, e2->succs))
579     return 0;
580 
581   if (!inverse_flags (e1, e2))
582     {
583       for (i = 0; i < e1->succ_flags.length (); ++i)
584 	if (e1->succ_flags[i] != e2->succ_flags[i])
585 	  return 0;
586     }
587 
588   first1 = bitmap_first_set_bit (e1->bbs);
589   first2 = bitmap_first_set_bit (e2->bbs);
590 
591   bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
592   bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
593 
594   if (BB_SIZE (bb1) != BB_SIZE (bb2))
595     return 0;
596 
597   if (bb1->loop_father != bb2->loop_father)
598     return 0;
599 
600   gsi1 = gsi_start_nondebug_bb (bb1);
601   gsi2 = gsi_start_nondebug_bb (bb2);
602   gsi_advance_fw_nondebug_nonlocal (&gsi1);
603   gsi_advance_fw_nondebug_nonlocal (&gsi2);
604   while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
605     {
606       s1 = gsi_stmt (gsi1);
607       s2 = gsi_stmt (gsi2);
608       if (gimple_code (s1) != gimple_code (s2))
609 	return 0;
610       if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
611 	return 0;
612       gsi_next_nondebug (&gsi1);
613       gsi_next_nondebug (&gsi2);
614       gsi_advance_fw_nondebug_nonlocal (&gsi1);
615       gsi_advance_fw_nondebug_nonlocal (&gsi2);
616     }
617 
618   return 1;
619 }
620 
621 /* Alloc and init a new SAME_SUCC.  */
622 
623 static same_succ *
same_succ_alloc(void)624 same_succ_alloc (void)
625 {
626   same_succ *same = XNEW (struct same_succ);
627 
628   same->bbs = BITMAP_ALLOC (NULL);
629   same->succs = BITMAP_ALLOC (NULL);
630   same->inverse = BITMAP_ALLOC (NULL);
631   same->succ_flags.create (10);
632   same->in_worklist = false;
633 
634   return same;
635 }
636 
637 /* Delete same_succ E.  */
638 
639 void
remove(same_succ * e)640 same_succ::remove (same_succ *e)
641 {
642   BITMAP_FREE (e->bbs);
643   BITMAP_FREE (e->succs);
644   BITMAP_FREE (e->inverse);
645   e->succ_flags.release ();
646 
647   XDELETE (e);
648 }
649 
650 /* Reset same_succ SAME.  */
651 
652 static void
same_succ_reset(same_succ * same)653 same_succ_reset (same_succ *same)
654 {
655   bitmap_clear (same->bbs);
656   bitmap_clear (same->succs);
657   bitmap_clear (same->inverse);
658   same->succ_flags.truncate (0);
659 }
660 
661 static hash_table<same_succ> *same_succ_htab;
662 
663 /* Array that is used to store the edge flags for a successor.  */
664 
665 static int *same_succ_edge_flags;
666 
667 /* Bitmap that is used to mark bbs that are recently deleted.  */
668 
669 static bitmap deleted_bbs;
670 
671 /* Bitmap that is used to mark predecessors of bbs that are
672    deleted.  */
673 
674 static bitmap deleted_bb_preds;
675 
676 /* Prints same_succ_htab to stderr.  */
677 
678 extern void debug_same_succ (void);
679 DEBUG_FUNCTION void
debug_same_succ(void)680 debug_same_succ ( void)
681 {
682   same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
683 }
684 
685 
686 /* Vector of bbs to process.  */
687 
688 static vec<same_succ *> worklist;
689 
690 /* Prints worklist to FILE.  */
691 
692 static void
print_worklist(FILE * file)693 print_worklist (FILE *file)
694 {
695   unsigned int i;
696   for (i = 0; i < worklist.length (); ++i)
697     same_succ_print (file, worklist[i]);
698 }
699 
700 /* Adds SAME to worklist.  */
701 
702 static void
add_to_worklist(same_succ * same)703 add_to_worklist (same_succ *same)
704 {
705   if (same->in_worklist)
706     return;
707 
708   if (bitmap_count_bits (same->bbs) < 2)
709     return;
710 
711   same->in_worklist = true;
712   worklist.safe_push (same);
713 }
714 
715 /* Add BB to same_succ_htab.  */
716 
717 static void
find_same_succ_bb(basic_block bb,same_succ ** same_p)718 find_same_succ_bb (basic_block bb, same_succ **same_p)
719 {
720   unsigned int j;
721   bitmap_iterator bj;
722   same_succ *same = *same_p;
723   same_succ **slot;
724   edge_iterator ei;
725   edge e;
726 
727   if (bb == NULL)
728     return;
729   bitmap_set_bit (same->bbs, bb->index);
730   FOR_EACH_EDGE (e, ei, bb->succs)
731     {
732       int index = e->dest->index;
733       bitmap_set_bit (same->succs, index);
734       same_succ_edge_flags[index] = (e->flags & ~ignore_edge_flags);
735     }
736   EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
737     same->succ_flags.safe_push (same_succ_edge_flags[j]);
738 
739   same->hashval = same_succ_hash (same);
740 
741   slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
742   if (*slot == NULL)
743     {
744       *slot = same;
745       BB_SAME_SUCC (bb) = same;
746       add_to_worklist (same);
747       *same_p = NULL;
748     }
749   else
750     {
751       bitmap_set_bit ((*slot)->bbs, bb->index);
752       BB_SAME_SUCC (bb) = *slot;
753       add_to_worklist (*slot);
754       if (inverse_flags (same, *slot))
755 	bitmap_set_bit ((*slot)->inverse, bb->index);
756       same_succ_reset (same);
757     }
758 }
759 
760 /* Find bbs with same successors.  */
761 
762 static void
find_same_succ(void)763 find_same_succ (void)
764 {
765   same_succ *same = same_succ_alloc ();
766   basic_block bb;
767 
768   FOR_EACH_BB_FN (bb, cfun)
769     {
770       find_same_succ_bb (bb, &same);
771       if (same == NULL)
772 	same = same_succ_alloc ();
773     }
774 
775   same_succ::remove (same);
776 }
777 
778 /* Initializes worklist administration.  */
779 
780 static void
init_worklist(void)781 init_worklist (void)
782 {
783   alloc_aux_for_blocks (sizeof (struct aux_bb_info));
784   same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
785   same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
786   deleted_bbs = BITMAP_ALLOC (NULL);
787   deleted_bb_preds = BITMAP_ALLOC (NULL);
788   worklist.create (n_basic_blocks_for_fn (cfun));
789   find_same_succ ();
790 
791   if (dump_file && (dump_flags & TDF_DETAILS))
792     {
793       fprintf (dump_file, "initial worklist:\n");
794       print_worklist (dump_file);
795     }
796 }
797 
798 /* Deletes worklist administration.  */
799 
800 static void
delete_worklist(void)801 delete_worklist (void)
802 {
803   free_aux_for_blocks ();
804   delete same_succ_htab;
805   same_succ_htab = NULL;
806   XDELETEVEC (same_succ_edge_flags);
807   same_succ_edge_flags = NULL;
808   BITMAP_FREE (deleted_bbs);
809   BITMAP_FREE (deleted_bb_preds);
810   worklist.release ();
811 }
812 
813 /* Mark BB as deleted, and mark its predecessors.  */
814 
815 static void
mark_basic_block_deleted(basic_block bb)816 mark_basic_block_deleted (basic_block bb)
817 {
818   edge e;
819   edge_iterator ei;
820 
821   bitmap_set_bit (deleted_bbs, bb->index);
822 
823   FOR_EACH_EDGE (e, ei, bb->preds)
824     bitmap_set_bit (deleted_bb_preds, e->src->index);
825 }
826 
827 /* Removes BB from its corresponding same_succ.  */
828 
829 static void
same_succ_flush_bb(basic_block bb)830 same_succ_flush_bb (basic_block bb)
831 {
832   same_succ *same = BB_SAME_SUCC (bb);
833   if (! same)
834     return;
835 
836   BB_SAME_SUCC (bb) = NULL;
837   if (bitmap_single_bit_set_p (same->bbs))
838     same_succ_htab->remove_elt_with_hash (same, same->hashval);
839   else
840     bitmap_clear_bit (same->bbs, bb->index);
841 }
842 
843 /* Removes all bbs in BBS from their corresponding same_succ.  */
844 
845 static void
same_succ_flush_bbs(bitmap bbs)846 same_succ_flush_bbs (bitmap bbs)
847 {
848   unsigned int i;
849   bitmap_iterator bi;
850 
851   EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
852     same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
853 }
854 
855 /* Release the last vdef in BB, either normal or phi result.  */
856 
857 static void
release_last_vdef(basic_block bb)858 release_last_vdef (basic_block bb)
859 {
860   for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
861        gsi_prev_nondebug (&i))
862     {
863       gimple *stmt = gsi_stmt (i);
864       if (gimple_vdef (stmt) == NULL_TREE)
865 	continue;
866 
867       mark_virtual_operand_for_renaming (gimple_vdef (stmt));
868       return;
869     }
870 
871   for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
872        gsi_next (&i))
873     {
874       gphi *phi = i.phi ();
875       tree res = gimple_phi_result (phi);
876 
877       if (!virtual_operand_p (res))
878 	continue;
879 
880       mark_virtual_phi_result_for_renaming (phi);
881       return;
882     }
883 }
884 
885 /* For deleted_bb_preds, find bbs with same successors.  */
886 
887 static void
update_worklist(void)888 update_worklist (void)
889 {
890   unsigned int i;
891   bitmap_iterator bi;
892   basic_block bb;
893   same_succ *same;
894 
895   bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
896   bitmap_clear (deleted_bbs);
897 
898   bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
899   same_succ_flush_bbs (deleted_bb_preds);
900 
901   same = same_succ_alloc ();
902   EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
903     {
904       bb = BASIC_BLOCK_FOR_FN (cfun, i);
905       gcc_assert (bb != NULL);
906       find_same_succ_bb (bb, &same);
907       if (same == NULL)
908 	same = same_succ_alloc ();
909     }
910   same_succ::remove (same);
911   bitmap_clear (deleted_bb_preds);
912 }
913 
914 /* Prints cluster C to FILE.  */
915 
916 static void
print_cluster(FILE * file,bb_cluster * c)917 print_cluster (FILE *file, bb_cluster *c)
918 {
919   if (c == NULL)
920     return;
921   bitmap_print (file, c->bbs, "bbs:", "\n");
922   bitmap_print (file, c->preds, "preds:", "\n");
923 }
924 
925 /* Prints cluster C to stderr.  */
926 
927 extern void debug_cluster (bb_cluster *);
928 DEBUG_FUNCTION void
debug_cluster(bb_cluster * c)929 debug_cluster (bb_cluster *c)
930 {
931   print_cluster (stderr, c);
932 }
933 
934 /* Update C->rep_bb, given that BB is added to the cluster.  */
935 
936 static void
update_rep_bb(bb_cluster * c,basic_block bb)937 update_rep_bb (bb_cluster *c, basic_block bb)
938 {
939   /* Initial.  */
940   if (c->rep_bb == NULL)
941     {
942       c->rep_bb = bb;
943       return;
944     }
945 
946   /* Current needs no deps, keep it.  */
947   if (BB_DEP_BB (c->rep_bb) == NULL)
948     return;
949 
950   /* Bb needs no deps, change rep_bb.  */
951   if (BB_DEP_BB (bb) == NULL)
952     {
953       c->rep_bb = bb;
954       return;
955     }
956 
957   /* Bb needs last deps earlier than current, change rep_bb.  A potential
958      problem with this, is that the first deps might also be earlier, which
959      would mean we prefer longer lifetimes for the deps.  To be able to check
960      for this, we would have to trace BB_FIRST_DEP_BB as well, besides
961      BB_DEP_BB, which is really BB_LAST_DEP_BB.
962      The benefit of choosing the bb with last deps earlier, is that it can
963      potentially be used as replacement for more bbs.  */
964   if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
965     c->rep_bb = bb;
966 }
967 
968 /* Add BB to cluster C.  Sets BB in C->bbs, and preds of BB in C->preds.  */
969 
970 static void
add_bb_to_cluster(bb_cluster * c,basic_block bb)971 add_bb_to_cluster (bb_cluster *c, basic_block bb)
972 {
973   edge e;
974   edge_iterator ei;
975 
976   bitmap_set_bit (c->bbs, bb->index);
977 
978   FOR_EACH_EDGE (e, ei, bb->preds)
979     bitmap_set_bit (c->preds, e->src->index);
980 
981   update_rep_bb (c, bb);
982 }
983 
984 /* Allocate and init new cluster.  */
985 
986 static bb_cluster *
new_cluster(void)987 new_cluster (void)
988 {
989   bb_cluster *c;
990   c = XCNEW (bb_cluster);
991   c->bbs = BITMAP_ALLOC (NULL);
992   c->preds = BITMAP_ALLOC (NULL);
993   c->rep_bb = NULL;
994   return c;
995 }
996 
997 /* Delete clusters.  */
998 
999 static void
delete_cluster(bb_cluster * c)1000 delete_cluster (bb_cluster *c)
1001 {
1002   if (c == NULL)
1003     return;
1004   BITMAP_FREE (c->bbs);
1005   BITMAP_FREE (c->preds);
1006   XDELETE (c);
1007 }
1008 
1009 
1010 /* Array that contains all clusters.  */
1011 
1012 static vec<bb_cluster *> all_clusters;
1013 
1014 /* Allocate all cluster vectors.  */
1015 
1016 static void
alloc_cluster_vectors(void)1017 alloc_cluster_vectors (void)
1018 {
1019   all_clusters.create (n_basic_blocks_for_fn (cfun));
1020 }
1021 
1022 /* Reset all cluster vectors.  */
1023 
1024 static void
reset_cluster_vectors(void)1025 reset_cluster_vectors (void)
1026 {
1027   unsigned int i;
1028   basic_block bb;
1029   for (i = 0; i < all_clusters.length (); ++i)
1030     delete_cluster (all_clusters[i]);
1031   all_clusters.truncate (0);
1032   FOR_EACH_BB_FN (bb, cfun)
1033     BB_CLUSTER (bb) = NULL;
1034 }
1035 
1036 /* Delete all cluster vectors.  */
1037 
1038 static void
delete_cluster_vectors(void)1039 delete_cluster_vectors (void)
1040 {
1041   unsigned int i;
1042   for (i = 0; i < all_clusters.length (); ++i)
1043     delete_cluster (all_clusters[i]);
1044   all_clusters.release ();
1045 }
1046 
1047 /* Merge cluster C2 into C1.  */
1048 
1049 static void
merge_clusters(bb_cluster * c1,bb_cluster * c2)1050 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1051 {
1052   bitmap_ior_into (c1->bbs, c2->bbs);
1053   bitmap_ior_into (c1->preds, c2->preds);
1054 }
1055 
1056 /* Register equivalence of BB1 and BB2 (members of cluster C).  Store c in
1057    all_clusters, or merge c with existing cluster.  */
1058 
1059 static void
set_cluster(basic_block bb1,basic_block bb2)1060 set_cluster (basic_block bb1, basic_block bb2)
1061 {
1062   basic_block merge_bb, other_bb;
1063   bb_cluster *merge, *old, *c;
1064 
1065   if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1066     {
1067       c = new_cluster ();
1068       add_bb_to_cluster (c, bb1);
1069       add_bb_to_cluster (c, bb2);
1070       BB_CLUSTER (bb1) = c;
1071       BB_CLUSTER (bb2) = c;
1072       c->index = all_clusters.length ();
1073       all_clusters.safe_push (c);
1074     }
1075   else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1076     {
1077       merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1078       other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1079       merge = BB_CLUSTER (merge_bb);
1080       add_bb_to_cluster (merge, other_bb);
1081       BB_CLUSTER (other_bb) = merge;
1082     }
1083   else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1084     {
1085       unsigned int i;
1086       bitmap_iterator bi;
1087 
1088       old = BB_CLUSTER (bb2);
1089       merge = BB_CLUSTER (bb1);
1090       merge_clusters (merge, old);
1091       EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1092 	BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1093       all_clusters[old->index] = NULL;
1094       update_rep_bb (merge, old->rep_bb);
1095       delete_cluster (old);
1096     }
1097   else
1098     gcc_unreachable ();
1099 }
1100 
1101 /* Return true if gimple operands T1 and T2 have the same value.  */
1102 
1103 static bool
gimple_operand_equal_value_p(tree t1,tree t2)1104 gimple_operand_equal_value_p (tree t1, tree t2)
1105 {
1106   if (t1 == t2)
1107     return true;
1108 
1109   if (t1 == NULL_TREE
1110       || t2 == NULL_TREE)
1111     return false;
1112 
1113   if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1114     return true;
1115 
1116   return gvn_uses_equal (t1, t2);
1117 }
1118 
1119 /* Return true if gimple statements S1 and S2 are equal.  Gimple_bb (s1) and
1120    gimple_bb (s2) are members of SAME_SUCC.  */
1121 
1122 static bool
gimple_equal_p(same_succ * same_succ,gimple * s1,gimple * s2)1123 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1124 {
1125   unsigned int i;
1126   tree lhs1, lhs2;
1127   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1128   tree t1, t2;
1129   bool inv_cond;
1130   enum tree_code code1, code2;
1131 
1132   if (gimple_code (s1) != gimple_code (s2))
1133     return false;
1134 
1135   switch (gimple_code (s1))
1136     {
1137     case GIMPLE_CALL:
1138       if (!gimple_call_same_target_p (s1, s2))
1139 	return false;
1140 
1141       t1 = gimple_call_chain (s1);
1142       t2 = gimple_call_chain (s2);
1143       if (!gimple_operand_equal_value_p (t1, t2))
1144 	return false;
1145 
1146       if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1147 	return false;
1148 
1149       for (i = 0; i < gimple_call_num_args (s1); ++i)
1150 	{
1151 	  t1 = gimple_call_arg (s1, i);
1152 	  t2 = gimple_call_arg (s2, i);
1153 	  if (!gimple_operand_equal_value_p (t1, t2))
1154 	    return false;
1155 	}
1156 
1157       lhs1 = gimple_get_lhs (s1);
1158       lhs2 = gimple_get_lhs (s2);
1159       if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1160 	return true;
1161       if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1162 	return false;
1163       if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1164 	return tail_merge_valueize (lhs1) == tail_merge_valueize (lhs2);
1165       return operand_equal_p (lhs1, lhs2, 0);
1166 
1167     case GIMPLE_ASSIGN:
1168       if (gimple_assign_rhs_code (s1) != gimple_assign_rhs_code (s2))
1169 	return false;
1170 
1171       lhs1 = gimple_get_lhs (s1);
1172       lhs2 = gimple_get_lhs (s2);
1173       if (TREE_CODE (lhs1) != SSA_NAME
1174 	  && TREE_CODE (lhs2) != SSA_NAME)
1175 	return (operand_equal_p (lhs1, lhs2, 0)
1176 		&& gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1177 						 gimple_assign_rhs1 (s2)));
1178 
1179       if (TREE_CODE (lhs1) != SSA_NAME
1180 	  || TREE_CODE (lhs2) != SSA_NAME)
1181 	return false;
1182 
1183       gcc_checking_assert (gimple_num_args (s1) == gimple_num_args (s2));
1184       for (i = 0; i < gimple_num_args (s1); ++i)
1185 	{
1186 	  t1 = gimple_arg (s1, i);
1187 	  t2 = gimple_arg (s2, i);
1188 	  if (!gimple_operand_equal_value_p (t1, t2))
1189 	    return false;
1190 	}
1191       return true;
1192 
1193     case GIMPLE_COND:
1194       t1 = gimple_cond_lhs (s1);
1195       t2 = gimple_cond_lhs (s2);
1196       if (!gimple_operand_equal_value_p (t1, t2))
1197 	return false;
1198 
1199       t1 = gimple_cond_rhs (s1);
1200       t2 = gimple_cond_rhs (s2);
1201       if (!gimple_operand_equal_value_p (t1, t2))
1202 	return false;
1203 
1204       code1 = gimple_cond_code (s1);
1205       code2 = gimple_cond_code (s2);
1206       inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1207 		  != bitmap_bit_p (same_succ->inverse, bb2->index));
1208       if (inv_cond)
1209 	{
1210 	  bool honor_nans = HONOR_NANS (t1);
1211 	  code2 = invert_tree_comparison (code2, honor_nans);
1212 	}
1213       return code1 == code2;
1214 
1215     default:
1216       return false;
1217     }
1218 }
1219 
1220 /* Let GSI skip backwards over local defs.  Return the earliest vuse in VUSE.
1221    Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1222    processed statements.  */
1223 
1224 static void
gsi_advance_bw_nondebug_nonlocal(gimple_stmt_iterator * gsi,tree * vuse,bool * vuse_escaped)1225 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1226 				  bool *vuse_escaped)
1227 {
1228   gimple *stmt;
1229   tree lvuse;
1230 
1231   while (true)
1232     {
1233       if (gsi_end_p (*gsi))
1234 	return;
1235       stmt = gsi_stmt (*gsi);
1236 
1237       lvuse = gimple_vuse (stmt);
1238       if (lvuse != NULL_TREE)
1239 	{
1240 	  *vuse = lvuse;
1241 	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1242 	    *vuse_escaped = true;
1243 	}
1244 
1245       if (!stmt_local_def (stmt))
1246 	return;
1247       gsi_prev_nondebug (gsi);
1248     }
1249 }
1250 
1251 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1252    STMT2 are allowed to be merged.  */
1253 
1254 static bool
merge_stmts_p(gimple * stmt1,gimple * stmt2)1255 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1256 {
1257   /* What could be better than this here is to blacklist the bb
1258      containing the stmt, when encountering the stmt f.i. in
1259      same_succ_hash.  */
1260   if (is_tm_ending (stmt1))
1261     return false;
1262 
1263   /* Verify EH landing pads.  */
1264   if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1265     return false;
1266 
1267   if (is_gimple_call (stmt1)
1268       && gimple_call_internal_p (stmt1))
1269     switch (gimple_call_internal_fn (stmt1))
1270       {
1271       case IFN_UBSAN_NULL:
1272       case IFN_UBSAN_BOUNDS:
1273       case IFN_UBSAN_VPTR:
1274       case IFN_UBSAN_CHECK_ADD:
1275       case IFN_UBSAN_CHECK_SUB:
1276       case IFN_UBSAN_CHECK_MUL:
1277       case IFN_UBSAN_OBJECT_SIZE:
1278       case IFN_UBSAN_PTR:
1279       case IFN_ASAN_CHECK:
1280 	/* For these internal functions, gimple_location is an implicit
1281 	   parameter, which will be used explicitly after expansion.
1282 	   Merging these statements may cause confusing line numbers in
1283 	   sanitizer messages.  */
1284 	return gimple_location (stmt1) == gimple_location (stmt2);
1285       default:
1286 	break;
1287       }
1288 
1289   return true;
1290 }
1291 
1292 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates.  If so,
1293    clusters them.  */
1294 
1295 static void
find_duplicate(same_succ * same_succ,basic_block bb1,basic_block bb2)1296 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1297 {
1298   gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1299   gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1300   tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1301   bool vuse_escaped = false;
1302 
1303   gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1304   gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1305 
1306   while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1307     {
1308       gimple *stmt1 = gsi_stmt (gsi1);
1309       gimple *stmt2 = gsi_stmt (gsi2);
1310 
1311       if (gimple_code (stmt1) == GIMPLE_LABEL
1312 	  && gimple_code (stmt2) == GIMPLE_LABEL)
1313 	break;
1314 
1315       if (!gimple_equal_p (same_succ, stmt1, stmt2))
1316 	return;
1317 
1318       if (!merge_stmts_p (stmt1, stmt2))
1319 	return;
1320 
1321       gsi_prev_nondebug (&gsi1);
1322       gsi_prev_nondebug (&gsi2);
1323       gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1324       gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1325     }
1326 
1327   while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1328     {
1329       tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1330       if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1331 	return;
1332       gsi_prev (&gsi1);
1333     }
1334   while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1335     {
1336       tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1337       if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1338 	return;
1339       gsi_prev (&gsi2);
1340     }
1341   if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1342     return;
1343 
1344   /* If the incoming vuses are not the same, and the vuse escaped into an
1345      SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1346      which potentially means the semantics of one of the blocks will be changed.
1347      TODO: make this check more precise.  */
1348   if (vuse_escaped && vuse1 != vuse2)
1349     return;
1350 
1351   if (dump_file)
1352     fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1353 	     bb1->index, bb2->index);
1354 
1355   set_cluster (bb1, bb2);
1356 }
1357 
1358 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1359    E2 are equal.  */
1360 
1361 static bool
same_phi_alternatives_1(basic_block dest,edge e1,edge e2)1362 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1363 {
1364   int n1 = e1->dest_idx, n2 = e2->dest_idx;
1365   gphi_iterator gsi;
1366 
1367   for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1368     {
1369       gphi *phi = gsi.phi ();
1370       tree lhs = gimple_phi_result (phi);
1371       tree val1 = gimple_phi_arg_def (phi, n1);
1372       tree val2 = gimple_phi_arg_def (phi, n2);
1373 
1374       if (virtual_operand_p (lhs))
1375 	continue;
1376 
1377       if (operand_equal_for_phi_arg_p (val1, val2))
1378 	continue;
1379       if (gvn_uses_equal (val1, val2))
1380 	continue;
1381 
1382       return false;
1383     }
1384 
1385   return true;
1386 }
1387 
1388 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1389    phi alternatives for BB1 and BB2 are equal.  */
1390 
1391 static bool
same_phi_alternatives(same_succ * same_succ,basic_block bb1,basic_block bb2)1392 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1393 {
1394   unsigned int s;
1395   bitmap_iterator bs;
1396   edge e1, e2;
1397   basic_block succ;
1398 
1399   EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1400     {
1401       succ = BASIC_BLOCK_FOR_FN (cfun, s);
1402       e1 = find_edge (bb1, succ);
1403       e2 = find_edge (bb2, succ);
1404       if (e1->flags & EDGE_COMPLEX
1405 	  || e2->flags & EDGE_COMPLEX)
1406 	return false;
1407 
1408       /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1409 	 the same value.  */
1410       if (!same_phi_alternatives_1 (succ, e1, e2))
1411 	return false;
1412     }
1413 
1414   return true;
1415 }
1416 
1417 /* Return true if BB has non-vop phis.  */
1418 
1419 static bool
bb_has_non_vop_phi(basic_block bb)1420 bb_has_non_vop_phi (basic_block bb)
1421 {
1422   gimple_seq phis = phi_nodes (bb);
1423   gimple *phi;
1424 
1425   if (phis == NULL)
1426     return false;
1427 
1428   if (!gimple_seq_singleton_p (phis))
1429     return true;
1430 
1431   phi = gimple_seq_first_stmt (phis);
1432   return !virtual_operand_p (gimple_phi_result (phi));
1433 }
1434 
1435 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1436    invariant that uses in FROM are dominates by their defs.  */
1437 
1438 static bool
deps_ok_for_redirect_from_bb_to_bb(basic_block from,basic_block to)1439 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1440 {
1441   basic_block cd, dep_bb = BB_DEP_BB (to);
1442   edge_iterator ei;
1443   edge e;
1444 
1445   if (dep_bb == NULL)
1446     return true;
1447 
1448   bitmap from_preds = BITMAP_ALLOC (NULL);
1449   FOR_EACH_EDGE (e, ei, from->preds)
1450     bitmap_set_bit (from_preds, e->src->index);
1451   cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1452   BITMAP_FREE (from_preds);
1453 
1454   return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1455 }
1456 
1457 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1458    replacement bb) and vice versa maintains the invariant that uses in the
1459    replacement are dominates by their defs.  */
1460 
1461 static bool
deps_ok_for_redirect(basic_block bb1,basic_block bb2)1462 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1463 {
1464   if (BB_CLUSTER (bb1) != NULL)
1465     bb1 = BB_CLUSTER (bb1)->rep_bb;
1466 
1467   if (BB_CLUSTER (bb2) != NULL)
1468     bb2 = BB_CLUSTER (bb2)->rep_bb;
1469 
1470   return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1471 	  && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1472 }
1473 
1474 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged.  */
1475 
1476 static void
find_clusters_1(same_succ * same_succ)1477 find_clusters_1 (same_succ *same_succ)
1478 {
1479   basic_block bb1, bb2;
1480   unsigned int i, j;
1481   bitmap_iterator bi, bj;
1482   int nr_comparisons;
1483   int max_comparisons = param_max_tail_merge_comparisons;
1484 
1485   EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1486     {
1487       bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1488 
1489       /* TODO: handle blocks with phi-nodes.  We'll have to find corresponding
1490 	 phi-nodes in bb1 and bb2, with the same alternatives for the same
1491 	 preds.  */
1492       if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1)
1493 	  || bb_has_abnormal_pred (bb1))
1494 	continue;
1495 
1496       nr_comparisons = 0;
1497       EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1498 	{
1499 	  bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1500 
1501 	  if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2)
1502 	      || bb_has_abnormal_pred (bb2))
1503 	    continue;
1504 
1505 	  if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1506 	    continue;
1507 
1508 	  /* Limit quadratic behavior.  */
1509 	  nr_comparisons++;
1510 	  if (nr_comparisons > max_comparisons)
1511 	    break;
1512 
1513 	  /* This is a conservative dependency check.  We could test more
1514 	     precise for allowed replacement direction.  */
1515 	  if (!deps_ok_for_redirect (bb1, bb2))
1516 	    continue;
1517 
1518 	  if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1519 	    continue;
1520 
1521 	  find_duplicate (same_succ, bb1, bb2);
1522 	}
1523     }
1524 }
1525 
1526 /* Find clusters of bbs which can be merged.  */
1527 
1528 static void
find_clusters(void)1529 find_clusters (void)
1530 {
1531   same_succ *same;
1532 
1533   while (!worklist.is_empty ())
1534     {
1535       same = worklist.pop ();
1536       same->in_worklist = false;
1537       if (dump_file && (dump_flags & TDF_DETAILS))
1538 	{
1539 	  fprintf (dump_file, "processing worklist entry\n");
1540 	  same_succ_print (dump_file, same);
1541 	}
1542       find_clusters_1 (same);
1543     }
1544 }
1545 
1546 /* Returns the vop phi of BB, if any.  */
1547 
1548 static gphi *
vop_phi(basic_block bb)1549 vop_phi (basic_block bb)
1550 {
1551   gphi *stmt;
1552   gphi_iterator gsi;
1553   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1554     {
1555       stmt = gsi.phi ();
1556       if (! virtual_operand_p (gimple_phi_result (stmt)))
1557 	continue;
1558       return stmt;
1559     }
1560   return NULL;
1561 }
1562 
1563 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed.  */
1564 
1565 static void
replace_block_by(basic_block bb1,basic_block bb2)1566 replace_block_by (basic_block bb1, basic_block bb2)
1567 {
1568   edge pred_edge;
1569   unsigned int i;
1570   gphi *bb2_phi;
1571 
1572   bb2_phi = vop_phi (bb2);
1573 
1574   /* Mark the basic block as deleted.  */
1575   mark_basic_block_deleted (bb1);
1576 
1577   /* Redirect the incoming edges of bb1 to bb2.  */
1578   for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1579     {
1580       pred_edge = EDGE_PRED (bb1, i - 1);
1581       pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1582       gcc_assert (pred_edge != NULL);
1583 
1584       if (bb2_phi == NULL)
1585 	continue;
1586 
1587       /* The phi might have run out of capacity when the redirect added an
1588 	 argument, which means it could have been replaced.  Refresh it.  */
1589       bb2_phi = vop_phi (bb2);
1590 
1591       add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1592 		   pred_edge, UNKNOWN_LOCATION);
1593     }
1594 
1595 
1596   /* Merge the outgoing edge counts from bb1 onto bb2.  */
1597   edge e1, e2;
1598   edge_iterator ei;
1599 
1600   if (bb2->count.initialized_p ())
1601     FOR_EACH_EDGE (e1, ei, bb1->succs)
1602       {
1603         e2 = find_edge (bb2, e1->dest);
1604         gcc_assert (e2);
1605 
1606 	/* If probabilities are same, we are done.
1607 	   If counts are nonzero we can distribute accordingly. In remaining
1608 	   cases just avreage the values and hope for the best.  */
1609 	e2->probability = e1->probability.combine_with_count
1610 	                     (bb1->count, e2->probability, bb2->count);
1611       }
1612   bb2->count += bb1->count;
1613 
1614   /* Move over any user labels from bb1 after the bb2 labels.  */
1615   gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1616   if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1617     {
1618       gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1619       while (!gsi_end_p (gsi1)
1620 	     && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1621 	{
1622 	  tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1623 	  gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1624 	  if (DECL_ARTIFICIAL (label))
1625 	    gsi_next (&gsi1);
1626 	  else
1627 	    gsi_move_before (&gsi1, &gsi2);
1628 	}
1629     }
1630 
1631   /* Clear range info from all stmts in BB2 -- this transformation
1632      could make them out of date.  */
1633   reset_flow_sensitive_info_in_bb (bb2);
1634 
1635   /* Do updates that use bb1, before deleting bb1.  */
1636   release_last_vdef (bb1);
1637   same_succ_flush_bb (bb1);
1638 
1639   delete_basic_block (bb1);
1640 }
1641 
1642 /* Bbs for which update_debug_stmt need to be called.  */
1643 
1644 static bitmap update_bbs;
1645 
1646 /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
1647    number of bbs removed.  */
1648 
1649 static int
apply_clusters(void)1650 apply_clusters (void)
1651 {
1652   basic_block bb1, bb2;
1653   bb_cluster *c;
1654   unsigned int i, j;
1655   bitmap_iterator bj;
1656   int nr_bbs_removed = 0;
1657 
1658   for (i = 0; i < all_clusters.length (); ++i)
1659     {
1660       c = all_clusters[i];
1661       if (c == NULL)
1662 	continue;
1663 
1664       bb2 = c->rep_bb;
1665       bitmap_set_bit (update_bbs, bb2->index);
1666 
1667       bitmap_clear_bit (c->bbs, bb2->index);
1668       EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1669 	{
1670 	  bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1671 	  bitmap_clear_bit (update_bbs, bb1->index);
1672 
1673 	  replace_block_by (bb1, bb2);
1674 	  nr_bbs_removed++;
1675 	}
1676     }
1677 
1678   return nr_bbs_removed;
1679 }
1680 
1681 /* Resets debug statement STMT if it has uses that are not dominated by their
1682    defs.  */
1683 
1684 static void
update_debug_stmt(gimple * stmt)1685 update_debug_stmt (gimple *stmt)
1686 {
1687   use_operand_p use_p;
1688   ssa_op_iter oi;
1689   basic_block bbuse;
1690 
1691   if (!gimple_debug_bind_p (stmt))
1692     return;
1693 
1694   bbuse = gimple_bb (stmt);
1695   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1696     {
1697       tree name = USE_FROM_PTR (use_p);
1698       gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1699       basic_block bbdef = gimple_bb (def_stmt);
1700       if (bbdef == NULL || bbuse == bbdef
1701 	  || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1702 	continue;
1703 
1704       gimple_debug_bind_reset_value (stmt);
1705       update_stmt (stmt);
1706       break;
1707     }
1708 }
1709 
1710 /* Resets all debug statements that have uses that are not
1711    dominated by their defs.  */
1712 
1713 static void
update_debug_stmts(void)1714 update_debug_stmts (void)
1715 {
1716   basic_block bb;
1717   bitmap_iterator bi;
1718   unsigned int i;
1719 
1720   EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1721     {
1722       gimple *stmt;
1723       gimple_stmt_iterator gsi;
1724 
1725       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1726       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1727 	{
1728 	  stmt = gsi_stmt (gsi);
1729 	  if (!is_gimple_debug (stmt))
1730 	    continue;
1731 	  update_debug_stmt (stmt);
1732 	}
1733     }
1734 }
1735 
1736 /* Runs tail merge optimization.  */
1737 
1738 unsigned int
tail_merge_optimize(bool need_crit_edge_split)1739 tail_merge_optimize (bool need_crit_edge_split)
1740 {
1741   int nr_bbs_removed_total = 0;
1742   int nr_bbs_removed;
1743   bool loop_entered = false;
1744   int iteration_nr = 0;
1745   int max_iterations = param_max_tail_merge_iterations;
1746 
1747   if (!flag_tree_tail_merge
1748       || max_iterations == 0)
1749     return 0;
1750 
1751   timevar_push (TV_TREE_TAIL_MERGE);
1752 
1753   /* Re-split critical edges when PRE did a CFG cleanup.  */
1754   if (need_crit_edge_split)
1755     split_edges_for_insertion ();
1756 
1757   if (!dom_info_available_p (CDI_DOMINATORS))
1758     {
1759       /* PRE can leave us with unreachable blocks, remove them now.  */
1760       delete_unreachable_blocks ();
1761       calculate_dominance_info (CDI_DOMINATORS);
1762     }
1763   init_worklist ();
1764 
1765   while (!worklist.is_empty ())
1766     {
1767       if (!loop_entered)
1768 	{
1769 	  loop_entered = true;
1770 	  alloc_cluster_vectors ();
1771 	  update_bbs = BITMAP_ALLOC (NULL);
1772 	}
1773       else
1774 	reset_cluster_vectors ();
1775 
1776       iteration_nr++;
1777       if (dump_file && (dump_flags & TDF_DETAILS))
1778 	fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1779 
1780       find_clusters ();
1781       gcc_assert (worklist.is_empty ());
1782       if (all_clusters.is_empty ())
1783 	break;
1784 
1785       nr_bbs_removed = apply_clusters ();
1786       nr_bbs_removed_total += nr_bbs_removed;
1787       if (nr_bbs_removed == 0)
1788 	break;
1789 
1790       free_dominance_info (CDI_DOMINATORS);
1791 
1792       if (iteration_nr == max_iterations)
1793 	break;
1794 
1795       calculate_dominance_info (CDI_DOMINATORS);
1796       update_worklist ();
1797     }
1798 
1799   if (dump_file && (dump_flags & TDF_DETAILS))
1800     fprintf (dump_file, "htab collision / search: %f\n",
1801 	     same_succ_htab->collisions ());
1802 
1803   if (nr_bbs_removed_total > 0)
1804     {
1805       if (MAY_HAVE_DEBUG_BIND_STMTS)
1806 	{
1807 	  calculate_dominance_info (CDI_DOMINATORS);
1808 	  update_debug_stmts ();
1809 	}
1810 
1811       if (dump_file && (dump_flags & TDF_DETAILS))
1812 	{
1813 	  fprintf (dump_file, "Before TODOs.\n");
1814 	  dump_function_to_file (current_function_decl, dump_file, dump_flags);
1815 	}
1816 
1817       mark_virtual_operands_for_renaming (cfun);
1818     }
1819 
1820   delete_worklist ();
1821   if (loop_entered)
1822     {
1823       delete_cluster_vectors ();
1824       BITMAP_FREE (update_bbs);
1825     }
1826 
1827   timevar_pop (TV_TREE_TAIL_MERGE);
1828 
1829   return 0;
1830 }
1831