xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-ssa-threadupdate.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
138fd1498Szrj /* Thread edges through blocks and update the control flow and SSA graphs.
238fd1498Szrj    Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj 
438fd1498Szrj This file is part of GCC.
538fd1498Szrj 
638fd1498Szrj GCC is free software; you can redistribute it and/or modify
738fd1498Szrj it under the terms of the GNU General Public License as published by
838fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
938fd1498Szrj any later version.
1038fd1498Szrj 
1138fd1498Szrj GCC is distributed in the hope that it will be useful,
1238fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
1338fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1438fd1498Szrj GNU General Public License for more details.
1538fd1498Szrj 
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3.  If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>.  */
1938fd1498Szrj 
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "backend.h"
2438fd1498Szrj #include "tree.h"
2538fd1498Szrj #include "gimple.h"
2638fd1498Szrj #include "cfghooks.h"
2738fd1498Szrj #include "tree-pass.h"
2838fd1498Szrj #include "ssa.h"
2938fd1498Szrj #include "fold-const.h"
3038fd1498Szrj #include "cfganal.h"
3138fd1498Szrj #include "gimple-iterator.h"
3238fd1498Szrj #include "tree-ssa.h"
3338fd1498Szrj #include "tree-ssa-threadupdate.h"
3438fd1498Szrj #include "cfgloop.h"
3538fd1498Szrj #include "dbgcnt.h"
3638fd1498Szrj #include "tree-cfg.h"
3738fd1498Szrj #include "tree-vectorizer.h"
3838fd1498Szrj 
3938fd1498Szrj /* Given a block B, update the CFG and SSA graph to reflect redirecting
4038fd1498Szrj    one or more in-edges to B to instead reach the destination of an
4138fd1498Szrj    out-edge from B while preserving any side effects in B.
4238fd1498Szrj 
4338fd1498Szrj    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
4438fd1498Szrj    side effects of executing B.
4538fd1498Szrj 
4638fd1498Szrj      1. Make a copy of B (including its outgoing edges and statements).  Call
4738fd1498Szrj 	the copy B'.  Note B' has no incoming edges or PHIs at this time.
4838fd1498Szrj 
4938fd1498Szrj      2. Remove the control statement at the end of B' and all outgoing edges
5038fd1498Szrj 	except B'->C.
5138fd1498Szrj 
5238fd1498Szrj      3. Add a new argument to each PHI in C with the same value as the existing
5338fd1498Szrj 	argument associated with edge B->C.  Associate the new PHI arguments
5438fd1498Szrj 	with the edge B'->C.
5538fd1498Szrj 
5638fd1498Szrj      4. For each PHI in B, find or create a PHI in B' with an identical
5738fd1498Szrj 	PHI_RESULT.  Add an argument to the PHI in B' which has the same
5838fd1498Szrj 	value as the PHI in B associated with the edge A->B.  Associate
5938fd1498Szrj 	the new argument in the PHI in B' with the edge A->B.
6038fd1498Szrj 
6138fd1498Szrj      5. Change the edge A->B to A->B'.
6238fd1498Szrj 
6338fd1498Szrj 	5a. This automatically deletes any PHI arguments associated with the
6438fd1498Szrj 	    edge A->B in B.
6538fd1498Szrj 
6638fd1498Szrj 	5b. This automatically associates each new argument added in step 4
6738fd1498Szrj 	    with the edge A->B'.
6838fd1498Szrj 
6938fd1498Szrj      6. Repeat for other incoming edges into B.
7038fd1498Szrj 
7138fd1498Szrj      7. Put the duplicated resources in B and all the B' blocks into SSA form.
7238fd1498Szrj 
7338fd1498Szrj    Note that block duplication can be minimized by first collecting the
7438fd1498Szrj    set of unique destination blocks that the incoming edges should
7538fd1498Szrj    be threaded to.
7638fd1498Szrj 
7738fd1498Szrj    We reduce the number of edges and statements we create by not copying all
7838fd1498Szrj    the outgoing edges and the control statement in step #1.  We instead create
7938fd1498Szrj    a template block without the outgoing edges and duplicate the template.
8038fd1498Szrj 
8138fd1498Szrj    Another case this code handles is threading through a "joiner" block.  In
8238fd1498Szrj    this case, we do not know the destination of the joiner block, but one
8338fd1498Szrj    of the outgoing edges from the joiner block leads to a threadable path.  This
8438fd1498Szrj    case largely works as outlined above, except the duplicate of the joiner
8538fd1498Szrj    block still contains a full set of outgoing edges and its control statement.
8638fd1498Szrj    We just redirect one of its outgoing edges to our jump threading path.  */
8738fd1498Szrj 
8838fd1498Szrj 
8938fd1498Szrj /* Steps #5 and #6 of the above algorithm are best implemented by walking
9038fd1498Szrj    all the incoming edges which thread to the same destination edge at
9138fd1498Szrj    the same time.  That avoids lots of table lookups to get information
9238fd1498Szrj    for the destination edge.
9338fd1498Szrj 
9438fd1498Szrj    To realize that implementation we create a list of incoming edges
9538fd1498Szrj    which thread to the same outgoing edge.  Thus to implement steps
9638fd1498Szrj    #5 and #6 we traverse our hash table of outgoing edge information.
9738fd1498Szrj    For each entry we walk the list of incoming edges which thread to
9838fd1498Szrj    the current outgoing edge.  */
9938fd1498Szrj 
10038fd1498Szrj struct el
10138fd1498Szrj {
10238fd1498Szrj   edge e;
10338fd1498Szrj   struct el *next;
10438fd1498Szrj };
10538fd1498Szrj 
10638fd1498Szrj /* Main data structure recording information regarding B's duplicate
10738fd1498Szrj    blocks.  */
10838fd1498Szrj 
10938fd1498Szrj /* We need to efficiently record the unique thread destinations of this
11038fd1498Szrj    block and specific information associated with those destinations.  We
11138fd1498Szrj    may have many incoming edges threaded to the same outgoing edge.  This
11238fd1498Szrj    can be naturally implemented with a hash table.  */
11338fd1498Szrj 
11438fd1498Szrj struct redirection_data : free_ptr_hash<redirection_data>
11538fd1498Szrj {
11638fd1498Szrj   /* We support wiring up two block duplicates in a jump threading path.
11738fd1498Szrj 
11838fd1498Szrj      One is a normal block copy where we remove the control statement
11938fd1498Szrj      and wire up its single remaining outgoing edge to the thread path.
12038fd1498Szrj 
12138fd1498Szrj      The other is a joiner block where we leave the control statement
12238fd1498Szrj      in place, but wire one of the outgoing edges to a thread path.
12338fd1498Szrj 
12438fd1498Szrj      In theory we could have multiple block duplicates in a jump
12538fd1498Szrj      threading path, but I haven't tried that.
12638fd1498Szrj 
12738fd1498Szrj      The duplicate blocks appear in this array in the same order in
12838fd1498Szrj      which they appear in the jump thread path.  */
12938fd1498Szrj   basic_block dup_blocks[2];
13038fd1498Szrj 
13138fd1498Szrj   /* The jump threading path.  */
13238fd1498Szrj   vec<jump_thread_edge *> *path;
13338fd1498Szrj 
13438fd1498Szrj   /* A list of incoming edges which we want to thread to the
13538fd1498Szrj      same path.  */
13638fd1498Szrj   struct el *incoming_edges;
13738fd1498Szrj 
13838fd1498Szrj   /* hash_table support.  */
13938fd1498Szrj   static inline hashval_t hash (const redirection_data *);
14038fd1498Szrj   static inline int equal (const redirection_data *, const redirection_data *);
14138fd1498Szrj };
14238fd1498Szrj 
14338fd1498Szrj /* Dump a jump threading path, including annotations about each
14438fd1498Szrj    edge in the path.  */
14538fd1498Szrj 
14638fd1498Szrj static void
dump_jump_thread_path(FILE * dump_file,vec<jump_thread_edge * > path,bool registering)14738fd1498Szrj dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
14838fd1498Szrj 		       bool registering)
14938fd1498Szrj {
15038fd1498Szrj   fprintf (dump_file,
15138fd1498Szrj 	   "  %s%s jump thread: (%d, %d) incoming edge; ",
15238fd1498Szrj 	   (registering ? "Registering" : "Cancelling"),
15338fd1498Szrj 	   (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
15438fd1498Szrj 	   path[0]->e->src->index, path[0]->e->dest->index);
15538fd1498Szrj 
15638fd1498Szrj   for (unsigned int i = 1; i < path.length (); i++)
15738fd1498Szrj     {
15838fd1498Szrj       /* We can get paths with a NULL edge when the final destination
15938fd1498Szrj 	 of a jump thread turns out to be a constant address.  We dump
16038fd1498Szrj 	 those paths when debugging, so we have to be prepared for that
16138fd1498Szrj 	 possibility here.  */
16238fd1498Szrj       if (path[i]->e == NULL)
16338fd1498Szrj 	continue;
16438fd1498Szrj 
16538fd1498Szrj       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
16638fd1498Szrj 	fprintf (dump_file, " (%d, %d) joiner; ",
16738fd1498Szrj 		 path[i]->e->src->index, path[i]->e->dest->index);
16838fd1498Szrj       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
16938fd1498Szrj        fprintf (dump_file, " (%d, %d) normal;",
17038fd1498Szrj 		 path[i]->e->src->index, path[i]->e->dest->index);
17138fd1498Szrj       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
17238fd1498Szrj        fprintf (dump_file, " (%d, %d) nocopy;",
17338fd1498Szrj 		 path[i]->e->src->index, path[i]->e->dest->index);
17438fd1498Szrj       if (path[0]->type == EDGE_FSM_THREAD)
17538fd1498Szrj 	fprintf (dump_file, " (%d, %d) ",
17638fd1498Szrj 		 path[i]->e->src->index, path[i]->e->dest->index);
17738fd1498Szrj     }
17838fd1498Szrj   fputc ('\n', dump_file);
17938fd1498Szrj }
18038fd1498Szrj 
18138fd1498Szrj /* Simple hashing function.  For any given incoming edge E, we're going
18238fd1498Szrj    to be most concerned with the final destination of its jump thread
18338fd1498Szrj    path.  So hash on the block index of the final edge in the path.  */
18438fd1498Szrj 
18538fd1498Szrj inline hashval_t
hash(const redirection_data * p)18638fd1498Szrj redirection_data::hash (const redirection_data *p)
18738fd1498Szrj {
18838fd1498Szrj   vec<jump_thread_edge *> *path = p->path;
18938fd1498Szrj   return path->last ()->e->dest->index;
19038fd1498Szrj }
19138fd1498Szrj 
19238fd1498Szrj /* Given two hash table entries, return true if they have the same
19338fd1498Szrj    jump threading path.  */
19438fd1498Szrj inline int
equal(const redirection_data * p1,const redirection_data * p2)19538fd1498Szrj redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
19638fd1498Szrj {
19738fd1498Szrj   vec<jump_thread_edge *> *path1 = p1->path;
19838fd1498Szrj   vec<jump_thread_edge *> *path2 = p2->path;
19938fd1498Szrj 
20038fd1498Szrj   if (path1->length () != path2->length ())
20138fd1498Szrj     return false;
20238fd1498Szrj 
20338fd1498Szrj   for (unsigned int i = 1; i < path1->length (); i++)
20438fd1498Szrj     {
20538fd1498Szrj       if ((*path1)[i]->type != (*path2)[i]->type
20638fd1498Szrj 	  || (*path1)[i]->e != (*path2)[i]->e)
20738fd1498Szrj 	return false;
20838fd1498Szrj     }
20938fd1498Szrj 
21038fd1498Szrj   return true;
21138fd1498Szrj }
21238fd1498Szrj 
21338fd1498Szrj /* Rather than search all the edges in jump thread paths each time
21438fd1498Szrj    DOM is able to simply if control statement, we build a hash table
21538fd1498Szrj    with the deleted edges.  We only care about the address of the edge,
21638fd1498Szrj    not its contents.  */
21738fd1498Szrj struct removed_edges : nofree_ptr_hash<edge_def>
21838fd1498Szrj {
hashremoved_edges21938fd1498Szrj   static hashval_t hash (edge e) { return htab_hash_pointer (e); }
equalremoved_edges22038fd1498Szrj   static bool equal (edge e1, edge e2) { return e1 == e2; }
22138fd1498Szrj };
22238fd1498Szrj 
22338fd1498Szrj static hash_table<removed_edges> *removed_edges;
22438fd1498Szrj 
22538fd1498Szrj /* Data structure of information to pass to hash table traversal routines.  */
22638fd1498Szrj struct ssa_local_info_t
22738fd1498Szrj {
22838fd1498Szrj   /* The current block we are working on.  */
22938fd1498Szrj   basic_block bb;
23038fd1498Szrj 
23138fd1498Szrj   /* We only create a template block for the first duplicated block in a
23238fd1498Szrj      jump threading path as we may need many duplicates of that block.
23338fd1498Szrj 
23438fd1498Szrj      The second duplicate block in a path is specific to that path.  Creating
23538fd1498Szrj      and sharing a template for that block is considerably more difficult.  */
23638fd1498Szrj   basic_block template_block;
23738fd1498Szrj 
23838fd1498Szrj   /* Blocks duplicated for the thread.  */
23938fd1498Szrj   bitmap duplicate_blocks;
24038fd1498Szrj 
24138fd1498Szrj   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
24238fd1498Szrj   bool jumps_threaded;
24338fd1498Szrj 
24438fd1498Szrj   /* When we have multiple paths through a joiner which reach different
24538fd1498Szrj      final destinations, then we may need to correct for potential
24638fd1498Szrj      profile insanities.  */
24738fd1498Szrj   bool need_profile_correction;
24838fd1498Szrj };
24938fd1498Szrj 
25038fd1498Szrj /* Passes which use the jump threading code register jump threading
25138fd1498Szrj    opportunities as they are discovered.  We keep the registered
25238fd1498Szrj    jump threading opportunities in this vector as edge pairs
25338fd1498Szrj    (original_edge, target_edge).  */
25438fd1498Szrj static vec<vec<jump_thread_edge *> *> paths;
25538fd1498Szrj 
25638fd1498Szrj /* When we start updating the CFG for threading, data necessary for jump
25738fd1498Szrj    threading is attached to the AUX field for the incoming edge.  Use these
25838fd1498Szrj    macros to access the underlying structure attached to the AUX field.  */
25938fd1498Szrj #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
26038fd1498Szrj 
26138fd1498Szrj /* Jump threading statistics.  */
26238fd1498Szrj 
26338fd1498Szrj struct thread_stats_d
26438fd1498Szrj {
26538fd1498Szrj   unsigned long num_threaded_edges;
26638fd1498Szrj };
26738fd1498Szrj 
26838fd1498Szrj struct thread_stats_d thread_stats;
26938fd1498Szrj 
27038fd1498Szrj 
27138fd1498Szrj /* Remove the last statement in block BB if it is a control statement
27238fd1498Szrj    Also remove all outgoing edges except the edge which reaches DEST_BB.
27338fd1498Szrj    If DEST_BB is NULL, then remove all outgoing edges.  */
27438fd1498Szrj 
27538fd1498Szrj void
remove_ctrl_stmt_and_useless_edges(basic_block bb,basic_block dest_bb)27638fd1498Szrj remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
27738fd1498Szrj {
27838fd1498Szrj   gimple_stmt_iterator gsi;
27938fd1498Szrj   edge e;
28038fd1498Szrj   edge_iterator ei;
28138fd1498Szrj 
28238fd1498Szrj   gsi = gsi_last_bb (bb);
28338fd1498Szrj 
28438fd1498Szrj   /* If the duplicate ends with a control statement, then remove it.
28538fd1498Szrj 
28638fd1498Szrj      Note that if we are duplicating the template block rather than the
28738fd1498Szrj      original basic block, then the duplicate might not have any real
28838fd1498Szrj      statements in it.  */
28938fd1498Szrj   if (!gsi_end_p (gsi)
29038fd1498Szrj       && gsi_stmt (gsi)
29138fd1498Szrj       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
29238fd1498Szrj 	  || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
29338fd1498Szrj 	  || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
29438fd1498Szrj     gsi_remove (&gsi, true);
29538fd1498Szrj 
29638fd1498Szrj   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
29738fd1498Szrj     {
29838fd1498Szrj       if (e->dest != dest_bb)
29938fd1498Szrj 	{
30038fd1498Szrj 	  free_dom_edge_info (e);
30138fd1498Szrj 	  remove_edge (e);
30238fd1498Szrj 	}
30338fd1498Szrj       else
30438fd1498Szrj 	{
30538fd1498Szrj 	  e->probability = profile_probability::always ();
30638fd1498Szrj 	  ei_next (&ei);
30738fd1498Szrj 	}
30838fd1498Szrj     }
30938fd1498Szrj 
31038fd1498Szrj   /* If the remaining edge is a loop exit, there must have
31138fd1498Szrj      a removed edge that was not a loop exit.
31238fd1498Szrj 
31338fd1498Szrj      In that case BB and possibly other blocks were previously
31438fd1498Szrj      in the loop, but are now outside the loop.  Thus, we need
31538fd1498Szrj      to update the loop structures.  */
31638fd1498Szrj   if (single_succ_p (bb)
31738fd1498Szrj       && loop_outer (bb->loop_father)
31838fd1498Szrj       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
31938fd1498Szrj     loops_state_set (LOOPS_NEED_FIXUP);
32038fd1498Szrj }
32138fd1498Szrj 
32238fd1498Szrj /* Create a duplicate of BB.  Record the duplicate block in an array
32338fd1498Szrj    indexed by COUNT stored in RD.  */
32438fd1498Szrj 
32538fd1498Szrj static void
create_block_for_threading(basic_block bb,struct redirection_data * rd,unsigned int count,bitmap * duplicate_blocks)32638fd1498Szrj create_block_for_threading (basic_block bb,
32738fd1498Szrj 			    struct redirection_data *rd,
32838fd1498Szrj 			    unsigned int count,
32938fd1498Szrj 			    bitmap *duplicate_blocks)
33038fd1498Szrj {
33138fd1498Szrj   edge_iterator ei;
33238fd1498Szrj   edge e;
33338fd1498Szrj 
33438fd1498Szrj   /* We can use the generic block duplication code and simply remove
33538fd1498Szrj      the stuff we do not need.  */
33638fd1498Szrj   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
33738fd1498Szrj 
33838fd1498Szrj   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
33938fd1498Szrj     e->aux = NULL;
34038fd1498Szrj 
34138fd1498Szrj   /* Zero out the profile, since the block is unreachable for now.  */
34238fd1498Szrj   rd->dup_blocks[count]->count = profile_count::uninitialized ();
34338fd1498Szrj   if (duplicate_blocks)
34438fd1498Szrj     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
34538fd1498Szrj }
34638fd1498Szrj 
34738fd1498Szrj /* Main data structure to hold information for duplicates of BB.  */
34838fd1498Szrj 
34938fd1498Szrj static hash_table<redirection_data> *redirection_data;
35038fd1498Szrj 
35138fd1498Szrj /* Given an outgoing edge E lookup and return its entry in our hash table.
35238fd1498Szrj 
35338fd1498Szrj    If INSERT is true, then we insert the entry into the hash table if
35438fd1498Szrj    it is not already present.  INCOMING_EDGE is added to the list of incoming
35538fd1498Szrj    edges associated with E in the hash table.  */
35638fd1498Szrj 
35738fd1498Szrj static struct redirection_data *
lookup_redirection_data(edge e,enum insert_option insert)35838fd1498Szrj lookup_redirection_data (edge e, enum insert_option insert)
35938fd1498Szrj {
36038fd1498Szrj   struct redirection_data **slot;
36138fd1498Szrj   struct redirection_data *elt;
36238fd1498Szrj   vec<jump_thread_edge *> *path = THREAD_PATH (e);
36338fd1498Szrj 
36438fd1498Szrj   /* Build a hash table element so we can see if E is already
36538fd1498Szrj      in the table.  */
36638fd1498Szrj   elt = XNEW (struct redirection_data);
36738fd1498Szrj   elt->path = path;
36838fd1498Szrj   elt->dup_blocks[0] = NULL;
36938fd1498Szrj   elt->dup_blocks[1] = NULL;
37038fd1498Szrj   elt->incoming_edges = NULL;
37138fd1498Szrj 
37238fd1498Szrj   slot = redirection_data->find_slot (elt, insert);
37338fd1498Szrj 
37438fd1498Szrj   /* This will only happen if INSERT is false and the entry is not
37538fd1498Szrj      in the hash table.  */
37638fd1498Szrj   if (slot == NULL)
37738fd1498Szrj     {
37838fd1498Szrj       free (elt);
37938fd1498Szrj       return NULL;
38038fd1498Szrj     }
38138fd1498Szrj 
38238fd1498Szrj   /* This will only happen if E was not in the hash table and
38338fd1498Szrj      INSERT is true.  */
38438fd1498Szrj   if (*slot == NULL)
38538fd1498Szrj     {
38638fd1498Szrj       *slot = elt;
38738fd1498Szrj       elt->incoming_edges = XNEW (struct el);
38838fd1498Szrj       elt->incoming_edges->e = e;
38938fd1498Szrj       elt->incoming_edges->next = NULL;
39038fd1498Szrj       return elt;
39138fd1498Szrj     }
39238fd1498Szrj   /* E was in the hash table.  */
39338fd1498Szrj   else
39438fd1498Szrj     {
39538fd1498Szrj       /* Free ELT as we do not need it anymore, we will extract the
39638fd1498Szrj 	 relevant entry from the hash table itself.  */
39738fd1498Szrj       free (elt);
39838fd1498Szrj 
39938fd1498Szrj       /* Get the entry stored in the hash table.  */
40038fd1498Szrj       elt = *slot;
40138fd1498Szrj 
40238fd1498Szrj       /* If insertion was requested, then we need to add INCOMING_EDGE
40338fd1498Szrj 	 to the list of incoming edges associated with E.  */
40438fd1498Szrj       if (insert)
40538fd1498Szrj 	{
40638fd1498Szrj 	  struct el *el = XNEW (struct el);
40738fd1498Szrj 	  el->next = elt->incoming_edges;
40838fd1498Szrj 	  el->e = e;
40938fd1498Szrj 	  elt->incoming_edges = el;
41038fd1498Szrj 	}
41138fd1498Szrj 
41238fd1498Szrj       return elt;
41338fd1498Szrj     }
41438fd1498Szrj }
41538fd1498Szrj 
41638fd1498Szrj /* Similar to copy_phi_args, except that the PHI arg exists, it just
41738fd1498Szrj    does not have a value associated with it.  */
41838fd1498Szrj 
41938fd1498Szrj static void
copy_phi_arg_into_existing_phi(edge src_e,edge tgt_e)42038fd1498Szrj copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
42138fd1498Szrj {
42238fd1498Szrj   int src_idx = src_e->dest_idx;
42338fd1498Szrj   int tgt_idx = tgt_e->dest_idx;
42438fd1498Szrj 
42538fd1498Szrj   /* Iterate over each PHI in e->dest.  */
42638fd1498Szrj   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
42738fd1498Szrj 			   gsi2 = gsi_start_phis (tgt_e->dest);
42838fd1498Szrj        !gsi_end_p (gsi);
42938fd1498Szrj        gsi_next (&gsi), gsi_next (&gsi2))
43038fd1498Szrj     {
43138fd1498Szrj       gphi *src_phi = gsi.phi ();
43238fd1498Szrj       gphi *dest_phi = gsi2.phi ();
43338fd1498Szrj       tree val = gimple_phi_arg_def (src_phi, src_idx);
43438fd1498Szrj       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
43538fd1498Szrj 
43638fd1498Szrj       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
43738fd1498Szrj       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
43838fd1498Szrj     }
43938fd1498Szrj }
44038fd1498Szrj 
44138fd1498Szrj /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
44238fd1498Szrj    to see if it has constant value in a flow sensitive manner.  Set
44338fd1498Szrj    LOCUS to location of the constant phi arg and return the value.
44438fd1498Szrj    Return DEF directly if either PATH or idx is ZERO.  */
44538fd1498Szrj 
44638fd1498Szrj static tree
get_value_locus_in_path(tree def,vec<jump_thread_edge * > * path,basic_block bb,int idx,source_location * locus)44738fd1498Szrj get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
44838fd1498Szrj 			 basic_block bb, int idx, source_location *locus)
44938fd1498Szrj {
45038fd1498Szrj   tree arg;
45138fd1498Szrj   gphi *def_phi;
45238fd1498Szrj   basic_block def_bb;
45338fd1498Szrj 
45438fd1498Szrj   if (path == NULL || idx == 0)
45538fd1498Szrj     return def;
45638fd1498Szrj 
45738fd1498Szrj   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
45838fd1498Szrj   if (!def_phi)
45938fd1498Szrj     return def;
46038fd1498Szrj 
46138fd1498Szrj   def_bb = gimple_bb (def_phi);
46238fd1498Szrj   /* Don't propagate loop invariants into deeper loops.  */
46338fd1498Szrj   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
46438fd1498Szrj     return def;
46538fd1498Szrj 
46638fd1498Szrj   /* Backtrack jump threading path from IDX to see if def has constant
46738fd1498Szrj      value.  */
46838fd1498Szrj   for (int j = idx - 1; j >= 0; j--)
46938fd1498Szrj     {
47038fd1498Szrj       edge e = (*path)[j]->e;
47138fd1498Szrj       if (e->dest == def_bb)
47238fd1498Szrj 	{
47338fd1498Szrj 	  arg = gimple_phi_arg_def (def_phi, e->dest_idx);
47438fd1498Szrj 	  if (is_gimple_min_invariant (arg))
47538fd1498Szrj 	    {
47638fd1498Szrj 	      *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
47738fd1498Szrj 	      return arg;
47838fd1498Szrj 	    }
47938fd1498Szrj 	  break;
48038fd1498Szrj 	}
48138fd1498Szrj     }
48238fd1498Szrj 
48338fd1498Szrj   return def;
48438fd1498Szrj }
48538fd1498Szrj 
48638fd1498Szrj /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
48738fd1498Szrj    Try to backtrack jump threading PATH from node IDX to see if the arg
48838fd1498Szrj    has constant value, copy constant value instead of argument itself
48938fd1498Szrj    if yes.  */
49038fd1498Szrj 
49138fd1498Szrj static void
copy_phi_args(basic_block bb,edge src_e,edge tgt_e,vec<jump_thread_edge * > * path,int idx)49238fd1498Szrj copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
49338fd1498Szrj 	       vec<jump_thread_edge *> *path, int idx)
49438fd1498Szrj {
49538fd1498Szrj   gphi_iterator gsi;
49638fd1498Szrj   int src_indx = src_e->dest_idx;
49738fd1498Szrj 
49838fd1498Szrj   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
49938fd1498Szrj     {
50038fd1498Szrj       gphi *phi = gsi.phi ();
50138fd1498Szrj       tree def = gimple_phi_arg_def (phi, src_indx);
50238fd1498Szrj       source_location locus = gimple_phi_arg_location (phi, src_indx);
50338fd1498Szrj 
50438fd1498Szrj       if (TREE_CODE (def) == SSA_NAME
50538fd1498Szrj 	  && !virtual_operand_p (gimple_phi_result (phi)))
50638fd1498Szrj 	def = get_value_locus_in_path (def, path, bb, idx, &locus);
50738fd1498Szrj 
50838fd1498Szrj       add_phi_arg (phi, def, tgt_e, locus);
50938fd1498Szrj     }
51038fd1498Szrj }
51138fd1498Szrj 
51238fd1498Szrj /* We have recently made a copy of ORIG_BB, including its outgoing
51338fd1498Szrj    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
51438fd1498Szrj    ORIG_BB has a new argument associated with edge from NEW_BB to the
51538fd1498Szrj    successor.  Initialize the PHI argument so that it is equal to the PHI
51638fd1498Szrj    argument associated with the edge from ORIG_BB to the successor.
51738fd1498Szrj    PATH and IDX are used to check if the new PHI argument has constant
51838fd1498Szrj    value in a flow sensitive manner.  */
51938fd1498Szrj 
52038fd1498Szrj static void
update_destination_phis(basic_block orig_bb,basic_block new_bb,vec<jump_thread_edge * > * path,int idx)52138fd1498Szrj update_destination_phis (basic_block orig_bb, basic_block new_bb,
52238fd1498Szrj 			 vec<jump_thread_edge *> *path, int idx)
52338fd1498Szrj {
52438fd1498Szrj   edge_iterator ei;
52538fd1498Szrj   edge e;
52638fd1498Szrj 
52738fd1498Szrj   FOR_EACH_EDGE (e, ei, orig_bb->succs)
52838fd1498Szrj     {
52938fd1498Szrj       edge e2 = find_edge (new_bb, e->dest);
53038fd1498Szrj       copy_phi_args (e->dest, e, e2, path, idx);
53138fd1498Szrj     }
53238fd1498Szrj }
53338fd1498Szrj 
53438fd1498Szrj /* Given a duplicate block and its single destination (both stored
53538fd1498Szrj    in RD).  Create an edge between the duplicate and its single
53638fd1498Szrj    destination.
53738fd1498Szrj 
53838fd1498Szrj    Add an additional argument to any PHI nodes at the single
53938fd1498Szrj    destination.  IDX is the start node in jump threading path
54038fd1498Szrj    we start to check to see if the new PHI argument has constant
54138fd1498Szrj    value along the jump threading path.  */
54238fd1498Szrj 
54338fd1498Szrj static void
create_edge_and_update_destination_phis(struct redirection_data * rd,basic_block bb,int idx)54438fd1498Szrj create_edge_and_update_destination_phis (struct redirection_data *rd,
54538fd1498Szrj 					 basic_block bb, int idx)
54638fd1498Szrj {
54738fd1498Szrj   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
54838fd1498Szrj 
54938fd1498Szrj   rescan_loop_exit (e, true, false);
55038fd1498Szrj 
55138fd1498Szrj   /* We used to copy the thread path here.  That was added in 2007
55238fd1498Szrj      and dutifully updated through the representation changes in 2013.
55338fd1498Szrj 
55438fd1498Szrj      In 2013 we added code to thread from an interior node through
55538fd1498Szrj      the backedge to another interior node.  That runs after the code
55638fd1498Szrj      to thread through loop headers from outside the loop.
55738fd1498Szrj 
55838fd1498Szrj      The latter may delete edges in the CFG, including those
55938fd1498Szrj      which appeared in the jump threading path we copied here.  Thus
56038fd1498Szrj      we'd end up using a dangling pointer.
56138fd1498Szrj 
56238fd1498Szrj      After reviewing the 2007/2011 code, I can't see how anything
56338fd1498Szrj      depended on copying the AUX field and clearly copying the jump
56438fd1498Szrj      threading path is problematical due to embedded edge pointers.
56538fd1498Szrj      It has been removed.  */
56638fd1498Szrj   e->aux = NULL;
56738fd1498Szrj 
56838fd1498Szrj   /* If there are any PHI nodes at the destination of the outgoing edge
56938fd1498Szrj      from the duplicate block, then we will need to add a new argument
57038fd1498Szrj      to them.  The argument should have the same value as the argument
57138fd1498Szrj      associated with the outgoing edge stored in RD.  */
57238fd1498Szrj   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
57338fd1498Szrj }
57438fd1498Szrj 
57538fd1498Szrj /* Look through PATH beginning at START and return TRUE if there are
57638fd1498Szrj    any additional blocks that need to be duplicated.  Otherwise,
57738fd1498Szrj    return FALSE.  */
57838fd1498Szrj static bool
any_remaining_duplicated_blocks(vec<jump_thread_edge * > * path,unsigned int start)57938fd1498Szrj any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
58038fd1498Szrj 				 unsigned int start)
58138fd1498Szrj {
58238fd1498Szrj   for (unsigned int i = start + 1; i < path->length (); i++)
58338fd1498Szrj     {
58438fd1498Szrj       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
58538fd1498Szrj 	  || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
58638fd1498Szrj 	return true;
58738fd1498Szrj     }
58838fd1498Szrj   return false;
58938fd1498Szrj }
59038fd1498Szrj 
59138fd1498Szrj 
59238fd1498Szrj /* Compute the amount of profile count coming into the jump threading
59338fd1498Szrj    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
59438fd1498Szrj    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
59538fd1498Szrj    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
59638fd1498Szrj    identify blocks duplicated for jump threading, which have duplicated
59738fd1498Szrj    edges that need to be ignored in the analysis.  Return true if path contains
59838fd1498Szrj    a joiner, false otherwise.
59938fd1498Szrj 
60038fd1498Szrj    In the non-joiner case, this is straightforward - all the counts
60138fd1498Szrj    flowing into the jump threading path should flow through the duplicated
60238fd1498Szrj    block and out of the duplicated path.
60338fd1498Szrj 
60438fd1498Szrj    In the joiner case, it is very tricky.  Some of the counts flowing into
60538fd1498Szrj    the original path go offpath at the joiner.  The problem is that while
60638fd1498Szrj    we know how much total count goes off-path in the original control flow,
60738fd1498Szrj    we don't know how many of the counts corresponding to just the jump
60838fd1498Szrj    threading path go offpath at the joiner.
60938fd1498Szrj 
61038fd1498Szrj    For example, assume we have the following control flow and identified
61138fd1498Szrj    jump threading paths:
61238fd1498Szrj 
61338fd1498Szrj 		A     B     C
61438fd1498Szrj 		 \    |    /
61538fd1498Szrj 	       Ea \   |Eb / Ec
61638fd1498Szrj 		   \  |  /
61738fd1498Szrj 		    v v v
61838fd1498Szrj 		      J       <-- Joiner
61938fd1498Szrj 		     / \
62038fd1498Szrj 		Eoff/   \Eon
62138fd1498Szrj 		   /     \
62238fd1498Szrj 		  v       v
62338fd1498Szrj 		Soff     Son  <--- Normal
62438fd1498Szrj 			 /\
62538fd1498Szrj 		      Ed/  \ Ee
62638fd1498Szrj 		       /    \
62738fd1498Szrj 		      v     v
62838fd1498Szrj 		      D      E
62938fd1498Szrj 
63038fd1498Szrj 	    Jump threading paths: A -> J -> Son -> D (path 1)
63138fd1498Szrj 				  C -> J -> Son -> E (path 2)
63238fd1498Szrj 
63338fd1498Szrj    Note that the control flow could be more complicated:
63438fd1498Szrj    - Each jump threading path may have more than one incoming edge.  I.e. A and
63538fd1498Szrj    Ea could represent multiple incoming blocks/edges that are included in
63638fd1498Szrj    path 1.
63738fd1498Szrj    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
63838fd1498Szrj    before or after the "normal" copy block).  These are not duplicated onto
63938fd1498Szrj    the jump threading path, as they are single-successor.
64038fd1498Szrj    - Any of the blocks along the path may have other incoming edges that
64138fd1498Szrj    are not part of any jump threading path, but add profile counts along
64238fd1498Szrj    the path.
64338fd1498Szrj 
64438fd1498Szrj    In the above example, after all jump threading is complete, we will
64538fd1498Szrj    end up with the following control flow:
64638fd1498Szrj 
64738fd1498Szrj 		A	   B	       C
64838fd1498Szrj 		|	   |	       |
64938fd1498Szrj 	      Ea|	   |Eb	       |Ec
65038fd1498Szrj 		|	   |	       |
65138fd1498Szrj 		v	   v	       v
65238fd1498Szrj 	       Ja	   J	      Jc
65338fd1498Szrj 	       / \	  / \Eon'     / \
65438fd1498Szrj 	  Eona/   \   ---/---\--------   \Eonc
65538fd1498Szrj 	     /     \ /  /     \		  \
65638fd1498Szrj 	    v       v  v       v	  v
65738fd1498Szrj 	   Sona     Soff      Son	Sonc
65838fd1498Szrj 	     \		       /\	  /
65938fd1498Szrj 	      \___________    /  \  _____/
66038fd1498Szrj 			  \  /    \/
66138fd1498Szrj 			   vv      v
66238fd1498Szrj 			    D      E
66338fd1498Szrj 
66438fd1498Szrj    The main issue to notice here is that when we are processing path 1
66538fd1498Szrj    (A->J->Son->D) we need to figure out the outgoing edge weights to
66638fd1498Szrj    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
66738fd1498Szrj    sum of the incoming weights to D remain Ed.  The problem with simply
66838fd1498Szrj    assuming that Ja (and Jc when processing path 2) has the same outgoing
66938fd1498Szrj    probabilities to its successors as the original block J, is that after
67038fd1498Szrj    all paths are processed and other edges/counts removed (e.g. none
67138fd1498Szrj    of Ec will reach D after processing path 2), we may end up with not
67238fd1498Szrj    enough count flowing along duplicated edge Sona->D.
67338fd1498Szrj 
67438fd1498Szrj    Therefore, in the case of a joiner, we keep track of all counts
67538fd1498Szrj    coming in along the current path, as well as from predecessors not
67638fd1498Szrj    on any jump threading path (Eb in the above example).  While we
67738fd1498Szrj    first assume that the duplicated Eona for Ja->Sona has the same
67838fd1498Szrj    probability as the original, we later compensate for other jump
67938fd1498Szrj    threading paths that may eliminate edges.  We do that by keep track
68038fd1498Szrj    of all counts coming into the original path that are not in a jump
68138fd1498Szrj    thread (Eb in the above example, but as noted earlier, there could
68238fd1498Szrj    be other predecessors incoming to the path at various points, such
68338fd1498Szrj    as at Son).  Call this cumulative non-path count coming into the path
68438fd1498Szrj    before D as Enonpath.  We then ensure that the count from Sona->D is as at
68538fd1498Szrj    least as big as (Ed - Enonpath), but no bigger than the minimum
68638fd1498Szrj    weight along the jump threading path.  The probabilities of both the
68738fd1498Szrj    original and duplicated joiner block J and Ja will be adjusted
68838fd1498Szrj    accordingly after the updates.  */
68938fd1498Szrj 
69038fd1498Szrj static bool
compute_path_counts(struct redirection_data * rd,ssa_local_info_t * local_info,profile_count * path_in_count_ptr,profile_count * path_out_count_ptr)69138fd1498Szrj compute_path_counts (struct redirection_data *rd,
69238fd1498Szrj 		     ssa_local_info_t *local_info,
69338fd1498Szrj 		     profile_count *path_in_count_ptr,
69438fd1498Szrj 		     profile_count *path_out_count_ptr)
69538fd1498Szrj {
69638fd1498Szrj   edge e = rd->incoming_edges->e;
69738fd1498Szrj   vec<jump_thread_edge *> *path = THREAD_PATH (e);
69838fd1498Szrj   edge elast = path->last ()->e;
69938fd1498Szrj   profile_count nonpath_count = profile_count::zero ();
70038fd1498Szrj   bool has_joiner = false;
70138fd1498Szrj   profile_count path_in_count = profile_count::zero ();
70238fd1498Szrj 
70338fd1498Szrj   /* Start by accumulating incoming edge counts to the path's first bb
70438fd1498Szrj      into a couple buckets:
70538fd1498Szrj 	path_in_count: total count of incoming edges that flow into the
70638fd1498Szrj 		  current path.
70738fd1498Szrj 	nonpath_count: total count of incoming edges that are not
70838fd1498Szrj 		  flowing along *any* path.  These are the counts
70938fd1498Szrj 		  that will still flow along the original path after
71038fd1498Szrj 		  all path duplication is done by potentially multiple
71138fd1498Szrj 		  calls to this routine.
71238fd1498Szrj      (any other incoming edge counts are for a different jump threading
71338fd1498Szrj      path that will be handled by a later call to this routine.)
71438fd1498Szrj      To make this easier, start by recording all incoming edges that flow into
71538fd1498Szrj      the current path in a bitmap.  We could add up the path's incoming edge
71638fd1498Szrj      counts here, but we still need to walk all the first bb's incoming edges
71738fd1498Szrj      below to add up the counts of the other edges not included in this jump
71838fd1498Szrj      threading path.  */
71938fd1498Szrj   struct el *next, *el;
72038fd1498Szrj   auto_bitmap in_edge_srcs;
72138fd1498Szrj   for (el = rd->incoming_edges; el; el = next)
72238fd1498Szrj     {
72338fd1498Szrj       next = el->next;
72438fd1498Szrj       bitmap_set_bit (in_edge_srcs, el->e->src->index);
72538fd1498Szrj     }
72638fd1498Szrj   edge ein;
72738fd1498Szrj   edge_iterator ei;
72838fd1498Szrj   FOR_EACH_EDGE (ein, ei, e->dest->preds)
72938fd1498Szrj     {
73038fd1498Szrj       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
73138fd1498Szrj       /* Simply check the incoming edge src against the set captured above.  */
73238fd1498Szrj       if (ein_path
73338fd1498Szrj 	  && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
73438fd1498Szrj 	{
73538fd1498Szrj 	  /* It is necessary but not sufficient that the last path edges
73638fd1498Szrj 	     are identical.  There may be different paths that share the
73738fd1498Szrj 	     same last path edge in the case where the last edge has a nocopy
73838fd1498Szrj 	     source block.  */
73938fd1498Szrj 	  gcc_assert (ein_path->last ()->e == elast);
74038fd1498Szrj 	  path_in_count += ein->count ();
74138fd1498Szrj 	}
74238fd1498Szrj       else if (!ein_path)
74338fd1498Szrj 	{
74438fd1498Szrj 	  /* Keep track of the incoming edges that are not on any jump-threading
74538fd1498Szrj 	     path.  These counts will still flow out of original path after all
74638fd1498Szrj 	     jump threading is complete.  */
74738fd1498Szrj 	    nonpath_count += ein->count ();
74838fd1498Szrj 	}
74938fd1498Szrj     }
75038fd1498Szrj 
75138fd1498Szrj   /* Now compute the fraction of the total count coming into the first
75238fd1498Szrj      path bb that is from the current threading path.  */
75338fd1498Szrj   profile_count total_count = e->dest->count;
75438fd1498Szrj   /* Handle incoming profile insanities.  */
75538fd1498Szrj   if (total_count < path_in_count)
75638fd1498Szrj     path_in_count = total_count;
75738fd1498Szrj   profile_probability onpath_scale = path_in_count.probability_in (total_count);
75838fd1498Szrj 
75938fd1498Szrj   /* Walk the entire path to do some more computation in order to estimate
76038fd1498Szrj      how much of the path_in_count will flow out of the duplicated threading
76138fd1498Szrj      path.  In the non-joiner case this is straightforward (it should be
76238fd1498Szrj      the same as path_in_count, although we will handle incoming profile
76338fd1498Szrj      insanities by setting it equal to the minimum count along the path).
76438fd1498Szrj 
76538fd1498Szrj      In the joiner case, we need to estimate how much of the path_in_count
76638fd1498Szrj      will stay on the threading path after the joiner's conditional branch.
76738fd1498Szrj      We don't really know for sure how much of the counts
76838fd1498Szrj      associated with this path go to each successor of the joiner, but we'll
76938fd1498Szrj      estimate based on the fraction of the total count coming into the path
77038fd1498Szrj      bb was from the threading paths (computed above in onpath_scale).
77138fd1498Szrj      Afterwards, we will need to do some fixup to account for other threading
77238fd1498Szrj      paths and possible profile insanities.
77338fd1498Szrj 
77438fd1498Szrj      In order to estimate the joiner case's counts we also need to update
77538fd1498Szrj      nonpath_count with any additional counts coming into the path.  Other
77638fd1498Szrj      blocks along the path may have additional predecessors from outside
77738fd1498Szrj      the path.  */
77838fd1498Szrj   profile_count path_out_count = path_in_count;
77938fd1498Szrj   profile_count min_path_count = path_in_count;
78038fd1498Szrj   for (unsigned int i = 1; i < path->length (); i++)
78138fd1498Szrj     {
78238fd1498Szrj       edge epath = (*path)[i]->e;
78338fd1498Szrj       profile_count cur_count = epath->count ();
78438fd1498Szrj       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
78538fd1498Szrj 	{
78638fd1498Szrj 	  has_joiner = true;
78738fd1498Szrj 	  cur_count = cur_count.apply_probability (onpath_scale);
78838fd1498Szrj 	}
78938fd1498Szrj       /* In the joiner case we need to update nonpath_count for any edges
79038fd1498Szrj 	 coming into the path that will contribute to the count flowing
79138fd1498Szrj 	 into the path successor.  */
79238fd1498Szrj       if (has_joiner && epath != elast)
79338fd1498Szrj 	{
79438fd1498Szrj 	  /* Look for other incoming edges after joiner.  */
79538fd1498Szrj 	  FOR_EACH_EDGE (ein, ei, epath->dest->preds)
79638fd1498Szrj 	    {
79738fd1498Szrj 	      if (ein != epath
79838fd1498Szrj 		  /* Ignore in edges from blocks we have duplicated for a
79938fd1498Szrj 		     threading path, which have duplicated edge counts until
80038fd1498Szrj 		     they are redirected by an invocation of this routine.  */
80138fd1498Szrj 		  && !bitmap_bit_p (local_info->duplicate_blocks,
80238fd1498Szrj 				    ein->src->index))
80338fd1498Szrj 		nonpath_count += ein->count ();
80438fd1498Szrj 	    }
80538fd1498Szrj 	}
80638fd1498Szrj       if (cur_count < path_out_count)
80738fd1498Szrj 	path_out_count = cur_count;
80838fd1498Szrj       if (epath->count () < min_path_count)
80938fd1498Szrj 	min_path_count = epath->count ();
81038fd1498Szrj     }
81138fd1498Szrj 
81238fd1498Szrj   /* We computed path_out_count above assuming that this path targeted
81338fd1498Szrj      the joiner's on-path successor with the same likelihood as it
81438fd1498Szrj      reached the joiner.  However, other thread paths through the joiner
81538fd1498Szrj      may take a different path through the normal copy source block
81638fd1498Szrj      (i.e. they have a different elast), meaning that they do not
81738fd1498Szrj      contribute any counts to this path's elast.  As a result, it may
81838fd1498Szrj      turn out that this path must have more count flowing to the on-path
81938fd1498Szrj      successor of the joiner.  Essentially, all of this path's elast
82038fd1498Szrj      count must be contributed by this path and any nonpath counts
82138fd1498Szrj      (since any path through the joiner with a different elast will not
82238fd1498Szrj      include a copy of this elast in its duplicated path).
82338fd1498Szrj      So ensure that this path's path_out_count is at least the
82438fd1498Szrj      difference between elast->count () and nonpath_count.  Otherwise the edge
82538fd1498Szrj      counts after threading will not be sane.  */
82638fd1498Szrj   if (local_info->need_profile_correction
82738fd1498Szrj       && has_joiner && path_out_count < elast->count () - nonpath_count)
82838fd1498Szrj     {
82938fd1498Szrj       path_out_count = elast->count () - nonpath_count;
83038fd1498Szrj       /* But neither can we go above the minimum count along the path
83138fd1498Szrj 	 we are duplicating.  This can be an issue due to profile
83238fd1498Szrj 	 insanities coming in to this pass.  */
83338fd1498Szrj       if (path_out_count > min_path_count)
83438fd1498Szrj 	path_out_count = min_path_count;
83538fd1498Szrj     }
83638fd1498Szrj 
83738fd1498Szrj   *path_in_count_ptr = path_in_count;
83838fd1498Szrj   *path_out_count_ptr = path_out_count;
83938fd1498Szrj   return has_joiner;
84038fd1498Szrj }
84138fd1498Szrj 
84238fd1498Szrj 
84338fd1498Szrj /* Update the counts and frequencies for both an original path
84438fd1498Szrj    edge EPATH and its duplicate EDUP.  The duplicate source block
84538fd1498Szrj    will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
84638fd1498Szrj    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
84738fd1498Szrj static void
update_profile(edge epath,edge edup,profile_count path_in_count,profile_count path_out_count)84838fd1498Szrj update_profile (edge epath, edge edup, profile_count path_in_count,
84938fd1498Szrj 		profile_count path_out_count)
85038fd1498Szrj {
85138fd1498Szrj 
85238fd1498Szrj   /* First update the duplicated block's count.  */
85338fd1498Szrj   if (edup)
85438fd1498Szrj     {
85538fd1498Szrj       basic_block dup_block = edup->src;
85638fd1498Szrj 
85738fd1498Szrj       /* Edup's count is reduced by path_out_count.  We need to redistribute
85838fd1498Szrj          probabilities to the remaining edges.  */
85938fd1498Szrj 
86038fd1498Szrj       edge esucc;
86138fd1498Szrj       edge_iterator ei;
86238fd1498Szrj       profile_probability edup_prob
86338fd1498Szrj 	 = path_out_count.probability_in (path_in_count);
86438fd1498Szrj 
86538fd1498Szrj       /* Either scale up or down the remaining edges.
86638fd1498Szrj 	 probabilities are always in range <0,1> and thus we can't do
86738fd1498Szrj 	 both by same loop.  */
86838fd1498Szrj       if (edup->probability > edup_prob)
86938fd1498Szrj 	{
87038fd1498Szrj 	   profile_probability rev_scale
87138fd1498Szrj 	     = (profile_probability::always () - edup->probability)
87238fd1498Szrj 	       / (profile_probability::always () - edup_prob);
87338fd1498Szrj 	   FOR_EACH_EDGE (esucc, ei, dup_block->succs)
87438fd1498Szrj 	     if (esucc != edup)
87538fd1498Szrj 	       esucc->probability /= rev_scale;
87638fd1498Szrj 	}
87738fd1498Szrj       else if (edup->probability < edup_prob)
87838fd1498Szrj 	{
87938fd1498Szrj 	   profile_probability scale
88038fd1498Szrj 	     = (profile_probability::always () - edup_prob)
88138fd1498Szrj 	       / (profile_probability::always () - edup->probability);
88238fd1498Szrj 	  FOR_EACH_EDGE (esucc, ei, dup_block->succs)
88338fd1498Szrj 	    if (esucc != edup)
88438fd1498Szrj 	      esucc->probability *= scale;
88538fd1498Szrj 	}
88638fd1498Szrj       if (edup_prob.initialized_p ())
88738fd1498Szrj         edup->probability = edup_prob;
88838fd1498Szrj 
88938fd1498Szrj       gcc_assert (!dup_block->count.initialized_p ());
89038fd1498Szrj       dup_block->count = path_in_count;
89138fd1498Szrj     }
89238fd1498Szrj 
89338fd1498Szrj   if (path_in_count == profile_count::zero ())
89438fd1498Szrj     return;
89538fd1498Szrj 
89638fd1498Szrj   profile_count final_count = epath->count () - path_out_count;
89738fd1498Szrj 
89838fd1498Szrj   /* Now update the original block's count in the
89938fd1498Szrj      opposite manner - remove the counts/freq that will flow
90038fd1498Szrj      into the duplicated block.  Handle underflow due to precision/
90138fd1498Szrj      rounding issues.  */
90238fd1498Szrj   epath->src->count -= path_in_count;
90338fd1498Szrj 
90438fd1498Szrj   /* Next update this path edge's original and duplicated counts.  We know
90538fd1498Szrj      that the duplicated path will have path_out_count flowing
90638fd1498Szrj      out of it (in the joiner case this is the count along the duplicated path
90738fd1498Szrj      out of the duplicated joiner).  This count can then be removed from the
90838fd1498Szrj      original path edge.  */
90938fd1498Szrj 
91038fd1498Szrj   edge esucc;
91138fd1498Szrj   edge_iterator ei;
91238fd1498Szrj   profile_probability epath_prob = final_count.probability_in (epath->src->count);
91338fd1498Szrj 
91438fd1498Szrj   if (epath->probability > epath_prob)
91538fd1498Szrj     {
91638fd1498Szrj        profile_probability rev_scale
91738fd1498Szrj 	 = (profile_probability::always () - epath->probability)
91838fd1498Szrj 	   / (profile_probability::always () - epath_prob);
91938fd1498Szrj        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
92038fd1498Szrj 	 if (esucc != epath)
92138fd1498Szrj 	   esucc->probability /= rev_scale;
92238fd1498Szrj     }
92338fd1498Szrj   else if (epath->probability < epath_prob)
92438fd1498Szrj     {
92538fd1498Szrj        profile_probability scale
92638fd1498Szrj 	 = (profile_probability::always () - epath_prob)
92738fd1498Szrj 	   / (profile_probability::always () - epath->probability);
92838fd1498Szrj       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
92938fd1498Szrj 	if (esucc != epath)
93038fd1498Szrj 	  esucc->probability *= scale;
93138fd1498Szrj     }
93238fd1498Szrj   if (epath_prob.initialized_p ())
93338fd1498Szrj     epath->probability = epath_prob;
93438fd1498Szrj }
93538fd1498Szrj 
93638fd1498Szrj /* Wire up the outgoing edges from the duplicate blocks and
93738fd1498Szrj    update any PHIs as needed.  Also update the profile counts
93838fd1498Szrj    on the original and duplicate blocks and edges.  */
93938fd1498Szrj void
ssa_fix_duplicate_block_edges(struct redirection_data * rd,ssa_local_info_t * local_info)94038fd1498Szrj ssa_fix_duplicate_block_edges (struct redirection_data *rd,
94138fd1498Szrj 			       ssa_local_info_t *local_info)
94238fd1498Szrj {
94338fd1498Szrj   bool multi_incomings = (rd->incoming_edges->next != NULL);
94438fd1498Szrj   edge e = rd->incoming_edges->e;
94538fd1498Szrj   vec<jump_thread_edge *> *path = THREAD_PATH (e);
94638fd1498Szrj   edge elast = path->last ()->e;
94738fd1498Szrj   profile_count path_in_count = profile_count::zero ();
94838fd1498Szrj   profile_count path_out_count = profile_count::zero ();
94938fd1498Szrj 
95038fd1498Szrj   /* First determine how much profile count to move from original
95138fd1498Szrj      path to the duplicate path.  This is tricky in the presence of
95238fd1498Szrj      a joiner (see comments for compute_path_counts), where some portion
95338fd1498Szrj      of the path's counts will flow off-path from the joiner.  In the
95438fd1498Szrj      non-joiner case the path_in_count and path_out_count should be the
95538fd1498Szrj      same.  */
95638fd1498Szrj   bool has_joiner = compute_path_counts (rd, local_info,
95738fd1498Szrj 					 &path_in_count, &path_out_count);
95838fd1498Szrj 
95938fd1498Szrj   for (unsigned int count = 0, i = 1; i < path->length (); i++)
96038fd1498Szrj     {
96138fd1498Szrj       edge epath = (*path)[i]->e;
96238fd1498Szrj 
96338fd1498Szrj       /* If we were threading through an joiner block, then we want
96438fd1498Szrj 	 to keep its control statement and redirect an outgoing edge.
96538fd1498Szrj 	 Else we want to remove the control statement & edges, then create
96638fd1498Szrj 	 a new outgoing edge.  In both cases we may need to update PHIs.  */
96738fd1498Szrj       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
96838fd1498Szrj 	{
96938fd1498Szrj 	  edge victim;
97038fd1498Szrj 	  edge e2;
97138fd1498Szrj 
97238fd1498Szrj 	  gcc_assert (has_joiner);
97338fd1498Szrj 
97438fd1498Szrj 	  /* This updates the PHIs at the destination of the duplicate
97538fd1498Szrj 	     block.  Pass 0 instead of i if we are threading a path which
97638fd1498Szrj 	     has multiple incoming edges.  */
97738fd1498Szrj 	  update_destination_phis (local_info->bb, rd->dup_blocks[count],
97838fd1498Szrj 				   path, multi_incomings ? 0 : i);
97938fd1498Szrj 
98038fd1498Szrj 	  /* Find the edge from the duplicate block to the block we're
98138fd1498Szrj 	     threading through.  That's the edge we want to redirect.  */
98238fd1498Szrj 	  victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
98338fd1498Szrj 
98438fd1498Szrj 	  /* If there are no remaining blocks on the path to duplicate,
98538fd1498Szrj 	     then redirect VICTIM to the final destination of the jump
98638fd1498Szrj 	     threading path.  */
98738fd1498Szrj 	  if (!any_remaining_duplicated_blocks (path, i))
98838fd1498Szrj 	    {
98938fd1498Szrj 	      e2 = redirect_edge_and_branch (victim, elast->dest);
99038fd1498Szrj 	      /* If we redirected the edge, then we need to copy PHI arguments
99138fd1498Szrj 		 at the target.  If the edge already existed (e2 != victim
99238fd1498Szrj 		 case), then the PHIs in the target already have the correct
99338fd1498Szrj 		 arguments.  */
99438fd1498Szrj 	      if (e2 == victim)
99538fd1498Szrj 		copy_phi_args (e2->dest, elast, e2,
99638fd1498Szrj 			       path, multi_incomings ? 0 : i);
99738fd1498Szrj 	    }
99838fd1498Szrj 	  else
99938fd1498Szrj 	    {
100038fd1498Szrj 	      /* Redirect VICTIM to the next duplicated block in the path.  */
100138fd1498Szrj 	      e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
100238fd1498Szrj 
100338fd1498Szrj 	      /* We need to update the PHIs in the next duplicated block.  We
100438fd1498Szrj 		 want the new PHI args to have the same value as they had
100538fd1498Szrj 		 in the source of the next duplicate block.
100638fd1498Szrj 
100738fd1498Szrj 		 Thus, we need to know which edge we traversed into the
100838fd1498Szrj 		 source of the duplicate.  Furthermore, we may have
100938fd1498Szrj 		 traversed many edges to reach the source of the duplicate.
101038fd1498Szrj 
101138fd1498Szrj 		 Walk through the path starting at element I until we
101238fd1498Szrj 		 hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
101338fd1498Szrj 		 the edge from the prior element.  */
101438fd1498Szrj 	      for (unsigned int j = i + 1; j < path->length (); j++)
101538fd1498Szrj 		{
101638fd1498Szrj 		  if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
101738fd1498Szrj 		    {
101838fd1498Szrj 		      copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
101938fd1498Szrj 		      break;
102038fd1498Szrj 		    }
102138fd1498Szrj 		}
102238fd1498Szrj 	    }
102338fd1498Szrj 
102438fd1498Szrj 	  /* Update the counts of both the original block
102538fd1498Szrj 	     and path edge, and the duplicates.  The path duplicate's
102638fd1498Szrj 	     incoming count are the totals for all edges
102738fd1498Szrj 	     incoming to this jump threading path computed earlier.
102838fd1498Szrj 	     And we know that the duplicated path will have path_out_count
102938fd1498Szrj 	     flowing out of it (i.e. along the duplicated path out of the
103038fd1498Szrj 	     duplicated joiner).  */
103138fd1498Szrj 	  update_profile (epath, e2, path_in_count, path_out_count);
103238fd1498Szrj 	}
103338fd1498Szrj       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
103438fd1498Szrj 	{
103538fd1498Szrj 	  remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
103638fd1498Szrj 	  create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
103738fd1498Szrj 						   multi_incomings ? 0 : i);
103838fd1498Szrj 	  if (count == 1)
103938fd1498Szrj 	    single_succ_edge (rd->dup_blocks[1])->aux = NULL;
104038fd1498Szrj 
104138fd1498Szrj 	  /* Update the counts of both the original block
104238fd1498Szrj 	     and path edge, and the duplicates.  Since we are now after
104338fd1498Szrj 	     any joiner that may have existed on the path, the count
104438fd1498Szrj 	     flowing along the duplicated threaded path is path_out_count.
104538fd1498Szrj 	     If we didn't have a joiner, then cur_path_freq was the sum
104638fd1498Szrj 	     of the total frequencies along all incoming edges to the
104738fd1498Szrj 	     thread path (path_in_freq).  If we had a joiner, it would have
104838fd1498Szrj 	     been updated at the end of that handling to the edge frequency
104938fd1498Szrj 	     along the duplicated joiner path edge.  */
105038fd1498Szrj 	  update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
105138fd1498Szrj 			  path_out_count, path_out_count);
105238fd1498Szrj 	}
105338fd1498Szrj       else
105438fd1498Szrj 	{
105538fd1498Szrj 	  /* No copy case.  In this case we don't have an equivalent block
105638fd1498Szrj 	     on the duplicated thread path to update, but we do need
105738fd1498Szrj 	     to remove the portion of the counts/freqs that were moved
105838fd1498Szrj 	     to the duplicated path from the counts/freqs flowing through
105938fd1498Szrj 	     this block on the original path.  Since all the no-copy edges
106038fd1498Szrj 	     are after any joiner, the removed count is the same as
106138fd1498Szrj 	     path_out_count.
106238fd1498Szrj 
106338fd1498Szrj 	     If we didn't have a joiner, then cur_path_freq was the sum
106438fd1498Szrj 	     of the total frequencies along all incoming edges to the
106538fd1498Szrj 	     thread path (path_in_freq).  If we had a joiner, it would have
106638fd1498Szrj 	     been updated at the end of that handling to the edge frequency
106738fd1498Szrj 	     along the duplicated joiner path edge.  */
106838fd1498Szrj 	   update_profile (epath, NULL, path_out_count, path_out_count);
106938fd1498Szrj 	}
107038fd1498Szrj 
107138fd1498Szrj       /* Increment the index into the duplicated path when we processed
107238fd1498Szrj 	 a duplicated block.  */
107338fd1498Szrj       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
107438fd1498Szrj 	  || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
107538fd1498Szrj 	{
107638fd1498Szrj 	  count++;
107738fd1498Szrj 	}
107838fd1498Szrj     }
107938fd1498Szrj }
108038fd1498Szrj 
108138fd1498Szrj /* Hash table traversal callback routine to create duplicate blocks.  */
108238fd1498Szrj 
108338fd1498Szrj int
ssa_create_duplicates(struct redirection_data ** slot,ssa_local_info_t * local_info)108438fd1498Szrj ssa_create_duplicates (struct redirection_data **slot,
108538fd1498Szrj 		       ssa_local_info_t *local_info)
108638fd1498Szrj {
108738fd1498Szrj   struct redirection_data *rd = *slot;
108838fd1498Szrj 
108938fd1498Szrj   /* The second duplicated block in a jump threading path is specific
109038fd1498Szrj      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
109138fd1498Szrj 
109238fd1498Szrj      Each time we're called, we have to look through the path and see
109338fd1498Szrj      if a second block needs to be duplicated.
109438fd1498Szrj 
109538fd1498Szrj      Note the search starts with the third edge on the path.  The first
109638fd1498Szrj      edge is the incoming edge, the second edge always has its source
109738fd1498Szrj      duplicated.  Thus we start our search with the third edge.  */
109838fd1498Szrj   vec<jump_thread_edge *> *path = rd->path;
109938fd1498Szrj   for (unsigned int i = 2; i < path->length (); i++)
110038fd1498Szrj     {
110138fd1498Szrj       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
110238fd1498Szrj 	  || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
110338fd1498Szrj 	{
110438fd1498Szrj 	  create_block_for_threading ((*path)[i]->e->src, rd, 1,
110538fd1498Szrj 				      &local_info->duplicate_blocks);
110638fd1498Szrj 	  break;
110738fd1498Szrj 	}
110838fd1498Szrj     }
110938fd1498Szrj 
111038fd1498Szrj   /* Create a template block if we have not done so already.  Otherwise
111138fd1498Szrj      use the template to create a new block.  */
111238fd1498Szrj   if (local_info->template_block == NULL)
111338fd1498Szrj     {
111438fd1498Szrj       create_block_for_threading ((*path)[1]->e->src, rd, 0,
111538fd1498Szrj 				  &local_info->duplicate_blocks);
111638fd1498Szrj       local_info->template_block = rd->dup_blocks[0];
111738fd1498Szrj 
111838fd1498Szrj       /* We do not create any outgoing edges for the template.  We will
111938fd1498Szrj 	 take care of that in a later traversal.  That way we do not
112038fd1498Szrj 	 create edges that are going to just be deleted.  */
112138fd1498Szrj     }
112238fd1498Szrj   else
112338fd1498Szrj     {
112438fd1498Szrj       create_block_for_threading (local_info->template_block, rd, 0,
112538fd1498Szrj 				  &local_info->duplicate_blocks);
112638fd1498Szrj 
112738fd1498Szrj       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
112838fd1498Szrj 	 block.   */
112938fd1498Szrj       ssa_fix_duplicate_block_edges (rd, local_info);
113038fd1498Szrj     }
113138fd1498Szrj 
113238fd1498Szrj   /* Keep walking the hash table.  */
113338fd1498Szrj   return 1;
113438fd1498Szrj }
113538fd1498Szrj 
113638fd1498Szrj /* We did not create any outgoing edges for the template block during
113738fd1498Szrj    block creation.  This hash table traversal callback creates the
113838fd1498Szrj    outgoing edge for the template block.  */
113938fd1498Szrj 
114038fd1498Szrj inline int
ssa_fixup_template_block(struct redirection_data ** slot,ssa_local_info_t * local_info)114138fd1498Szrj ssa_fixup_template_block (struct redirection_data **slot,
114238fd1498Szrj 			  ssa_local_info_t *local_info)
114338fd1498Szrj {
114438fd1498Szrj   struct redirection_data *rd = *slot;
114538fd1498Szrj 
114638fd1498Szrj   /* If this is the template block halt the traversal after updating
114738fd1498Szrj      it appropriately.
114838fd1498Szrj 
114938fd1498Szrj      If we were threading through an joiner block, then we want
115038fd1498Szrj      to keep its control statement and redirect an outgoing edge.
115138fd1498Szrj      Else we want to remove the control statement & edges, then create
115238fd1498Szrj      a new outgoing edge.  In both cases we may need to update PHIs.  */
115338fd1498Szrj   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
115438fd1498Szrj     {
115538fd1498Szrj       ssa_fix_duplicate_block_edges (rd, local_info);
115638fd1498Szrj       return 0;
115738fd1498Szrj     }
115838fd1498Szrj 
115938fd1498Szrj   return 1;
116038fd1498Szrj }
116138fd1498Szrj 
116238fd1498Szrj /* Hash table traversal callback to redirect each incoming edge
116338fd1498Szrj    associated with this hash table element to its new destination.  */
116438fd1498Szrj 
116538fd1498Szrj int
ssa_redirect_edges(struct redirection_data ** slot,ssa_local_info_t * local_info)116638fd1498Szrj ssa_redirect_edges (struct redirection_data **slot,
116738fd1498Szrj 		    ssa_local_info_t *local_info)
116838fd1498Szrj {
116938fd1498Szrj   struct redirection_data *rd = *slot;
117038fd1498Szrj   struct el *next, *el;
117138fd1498Szrj 
117238fd1498Szrj   /* Walk over all the incoming edges associated with this hash table
117338fd1498Szrj      entry.  */
117438fd1498Szrj   for (el = rd->incoming_edges; el; el = next)
117538fd1498Szrj     {
117638fd1498Szrj       edge e = el->e;
117738fd1498Szrj       vec<jump_thread_edge *> *path = THREAD_PATH (e);
117838fd1498Szrj 
117938fd1498Szrj       /* Go ahead and free this element from the list.  Doing this now
118038fd1498Szrj 	 avoids the need for another list walk when we destroy the hash
118138fd1498Szrj 	 table.  */
118238fd1498Szrj       next = el->next;
118338fd1498Szrj       free (el);
118438fd1498Szrj 
118538fd1498Szrj       thread_stats.num_threaded_edges++;
118638fd1498Szrj 
118738fd1498Szrj       if (rd->dup_blocks[0])
118838fd1498Szrj 	{
118938fd1498Szrj 	  edge e2;
119038fd1498Szrj 
119138fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
119238fd1498Szrj 	    fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
119338fd1498Szrj 		     e->src->index, e->dest->index, rd->dup_blocks[0]->index);
119438fd1498Szrj 
119538fd1498Szrj 	  /* Redirect the incoming edge (possibly to the joiner block) to the
119638fd1498Szrj 	     appropriate duplicate block.  */
119738fd1498Szrj 	  e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
119838fd1498Szrj 	  gcc_assert (e == e2);
119938fd1498Szrj 	  flush_pending_stmts (e2);
120038fd1498Szrj 	}
120138fd1498Szrj 
120238fd1498Szrj       /* Go ahead and clear E->aux.  It's not needed anymore and failure
120338fd1498Szrj 	 to clear it will cause all kinds of unpleasant problems later.  */
120438fd1498Szrj       delete_jump_thread_path (path);
120538fd1498Szrj       e->aux = NULL;
120638fd1498Szrj 
120738fd1498Szrj     }
120838fd1498Szrj 
120938fd1498Szrj   /* Indicate that we actually threaded one or more jumps.  */
121038fd1498Szrj   if (rd->incoming_edges)
121138fd1498Szrj     local_info->jumps_threaded = true;
121238fd1498Szrj 
121338fd1498Szrj   return 1;
121438fd1498Szrj }
121538fd1498Szrj 
121638fd1498Szrj /* Return true if this block has no executable statements other than
121738fd1498Szrj    a simple ctrl flow instruction.  When the number of outgoing edges
121838fd1498Szrj    is one, this is equivalent to a "forwarder" block.  */
121938fd1498Szrj 
122038fd1498Szrj static bool
redirection_block_p(basic_block bb)122138fd1498Szrj redirection_block_p (basic_block bb)
122238fd1498Szrj {
122338fd1498Szrj   gimple_stmt_iterator gsi;
122438fd1498Szrj 
122538fd1498Szrj   /* Advance to the first executable statement.  */
122638fd1498Szrj   gsi = gsi_start_bb (bb);
122738fd1498Szrj   while (!gsi_end_p (gsi)
122838fd1498Szrj 	 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
122938fd1498Szrj 	     || is_gimple_debug (gsi_stmt (gsi))
123038fd1498Szrj 	     || gimple_nop_p (gsi_stmt (gsi))
123138fd1498Szrj 	     || gimple_clobber_p (gsi_stmt (gsi))))
123238fd1498Szrj     gsi_next (&gsi);
123338fd1498Szrj 
123438fd1498Szrj   /* Check if this is an empty block.  */
123538fd1498Szrj   if (gsi_end_p (gsi))
123638fd1498Szrj     return true;
123738fd1498Szrj 
123838fd1498Szrj   /* Test that we've reached the terminating control statement.  */
123938fd1498Szrj   return gsi_stmt (gsi)
124038fd1498Szrj 	 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
124138fd1498Szrj 	     || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
124238fd1498Szrj 	     || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
124338fd1498Szrj }
124438fd1498Szrj 
124538fd1498Szrj /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
124638fd1498Szrj    is reached via one or more specific incoming edges, we know which
124738fd1498Szrj    outgoing edge from BB will be traversed.
124838fd1498Szrj 
124938fd1498Szrj    We want to redirect those incoming edges to the target of the
125038fd1498Szrj    appropriate outgoing edge.  Doing so avoids a conditional branch
125138fd1498Szrj    and may expose new optimization opportunities.  Note that we have
125238fd1498Szrj    to update dominator tree and SSA graph after such changes.
125338fd1498Szrj 
125438fd1498Szrj    The key to keeping the SSA graph update manageable is to duplicate
125538fd1498Szrj    the side effects occurring in BB so that those side effects still
125638fd1498Szrj    occur on the paths which bypass BB after redirecting edges.
125738fd1498Szrj 
125838fd1498Szrj    We accomplish this by creating duplicates of BB and arranging for
125938fd1498Szrj    the duplicates to unconditionally pass control to one specific
126038fd1498Szrj    successor of BB.  We then revector the incoming edges into BB to
126138fd1498Szrj    the appropriate duplicate of BB.
126238fd1498Szrj 
126338fd1498Szrj    If NOLOOP_ONLY is true, we only perform the threading as long as it
126438fd1498Szrj    does not affect the structure of the loops in a nontrivial way.
126538fd1498Szrj 
126638fd1498Szrj    If JOINERS is true, then thread through joiner blocks as well.  */
126738fd1498Szrj 
126838fd1498Szrj static bool
thread_block_1(basic_block bb,bool noloop_only,bool joiners)126938fd1498Szrj thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
127038fd1498Szrj {
127138fd1498Szrj   /* E is an incoming edge into BB that we may or may not want to
127238fd1498Szrj      redirect to a duplicate of BB.  */
127338fd1498Szrj   edge e, e2;
127438fd1498Szrj   edge_iterator ei;
127538fd1498Szrj   ssa_local_info_t local_info;
127638fd1498Szrj 
127738fd1498Szrj   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
127838fd1498Szrj   local_info.need_profile_correction = false;
127938fd1498Szrj 
128038fd1498Szrj   /* To avoid scanning a linear array for the element we need we instead
128138fd1498Szrj      use a hash table.  For normal code there should be no noticeable
128238fd1498Szrj      difference.  However, if we have a block with a large number of
128338fd1498Szrj      incoming and outgoing edges such linear searches can get expensive.  */
128438fd1498Szrj   redirection_data
128538fd1498Szrj     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
128638fd1498Szrj 
128738fd1498Szrj   /* Record each unique threaded destination into a hash table for
128838fd1498Szrj      efficient lookups.  */
128938fd1498Szrj   edge last = NULL;
129038fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->preds)
129138fd1498Szrj     {
129238fd1498Szrj       if (e->aux == NULL)
129338fd1498Szrj 	continue;
129438fd1498Szrj 
129538fd1498Szrj       vec<jump_thread_edge *> *path = THREAD_PATH (e);
129638fd1498Szrj 
129738fd1498Szrj       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
129838fd1498Szrj 	  || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
129938fd1498Szrj 	continue;
130038fd1498Szrj 
130138fd1498Szrj       e2 = path->last ()->e;
130238fd1498Szrj       if (!e2 || noloop_only)
130338fd1498Szrj 	{
130438fd1498Szrj 	  /* If NOLOOP_ONLY is true, we only allow threading through the
130538fd1498Szrj 	     header of a loop to exit edges.  */
130638fd1498Szrj 
130738fd1498Szrj 	  /* One case occurs when there was loop header buried in a jump
130838fd1498Szrj 	     threading path that crosses loop boundaries.  We do not try
130938fd1498Szrj 	     and thread this elsewhere, so just cancel the jump threading
131038fd1498Szrj 	     request by clearing the AUX field now.  */
131138fd1498Szrj 	  if (bb->loop_father != e2->src->loop_father
1312*58e805e6Szrj 	      && (!loop_exit_edge_p (e2->src->loop_father, e2)
1313*58e805e6Szrj 		  || flow_loop_nested_p (bb->loop_father,
1314*58e805e6Szrj 					 e2->dest->loop_father)))
131538fd1498Szrj 	    {
131638fd1498Szrj 	      /* Since this case is not handled by our special code
131738fd1498Szrj 		 to thread through a loop header, we must explicitly
131838fd1498Szrj 		 cancel the threading request here.  */
131938fd1498Szrj 	      delete_jump_thread_path (path);
132038fd1498Szrj 	      e->aux = NULL;
132138fd1498Szrj 	      continue;
132238fd1498Szrj 	    }
132338fd1498Szrj 
132438fd1498Szrj 	  /* Another case occurs when trying to thread through our
132538fd1498Szrj 	     own loop header, possibly from inside the loop.  We will
132638fd1498Szrj 	     thread these later.  */
132738fd1498Szrj 	  unsigned int i;
132838fd1498Szrj 	  for (i = 1; i < path->length (); i++)
132938fd1498Szrj 	    {
133038fd1498Szrj 	      if ((*path)[i]->e->src == bb->loop_father->header
133138fd1498Szrj 		  && (!loop_exit_edge_p (bb->loop_father, e2)
133238fd1498Szrj 		      || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
133338fd1498Szrj 		break;
133438fd1498Szrj 	    }
133538fd1498Szrj 
133638fd1498Szrj 	  if (i != path->length ())
133738fd1498Szrj 	    continue;
133838fd1498Szrj 
133938fd1498Szrj 	  /* Loop parallelization can be confused by the result of
134038fd1498Szrj 	     threading through the loop exit test back into the loop.
134138fd1498Szrj 	     However, theading those jumps seems to help other codes.
134238fd1498Szrj 
134338fd1498Szrj 	     I have been unable to find anything related to the shape of
134438fd1498Szrj 	     the CFG, the contents of the affected blocks, etc which would
134538fd1498Szrj 	     allow a more sensible test than what we're using below which
134638fd1498Szrj 	     merely avoids the optimization when parallelizing loops.  */
134738fd1498Szrj 	  if (flag_tree_parallelize_loops > 1)
134838fd1498Szrj 	    {
134938fd1498Szrj 	      for (i = 1; i < path->length (); i++)
135038fd1498Szrj 	        if (bb->loop_father == e2->src->loop_father
135138fd1498Szrj 		    && loop_exits_from_bb_p (bb->loop_father,
135238fd1498Szrj 					     (*path)[i]->e->src)
135338fd1498Szrj 		    && !loop_exit_edge_p (bb->loop_father, e2))
135438fd1498Szrj 		  break;
135538fd1498Szrj 
135638fd1498Szrj 	      if (i != path->length ())
135738fd1498Szrj 		{
135838fd1498Szrj 		  delete_jump_thread_path (path);
135938fd1498Szrj 		  e->aux = NULL;
136038fd1498Szrj 		  continue;
136138fd1498Szrj 		}
136238fd1498Szrj 	    }
136338fd1498Szrj 	}
136438fd1498Szrj 
136538fd1498Szrj       /* Insert the outgoing edge into the hash table if it is not
136638fd1498Szrj 	 already in the hash table.  */
136738fd1498Szrj       lookup_redirection_data (e, INSERT);
136838fd1498Szrj 
136938fd1498Szrj       /* When we have thread paths through a common joiner with different
137038fd1498Szrj 	 final destinations, then we may need corrections to deal with
137138fd1498Szrj 	 profile insanities.  See the big comment before compute_path_counts.  */
137238fd1498Szrj       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
137338fd1498Szrj 	{
137438fd1498Szrj 	  if (!last)
137538fd1498Szrj 	    last = e2;
137638fd1498Szrj 	  else if (e2 != last)
137738fd1498Szrj 	    local_info.need_profile_correction = true;
137838fd1498Szrj 	}
137938fd1498Szrj     }
138038fd1498Szrj 
138138fd1498Szrj   /* We do not update dominance info.  */
138238fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
138338fd1498Szrj 
138438fd1498Szrj   /* We know we only thread through the loop header to loop exits.
138538fd1498Szrj      Let the basic block duplication hook know we are not creating
138638fd1498Szrj      a multiple entry loop.  */
138738fd1498Szrj   if (noloop_only
138838fd1498Szrj       && bb == bb->loop_father->header)
138938fd1498Szrj     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
139038fd1498Szrj 
139138fd1498Szrj   /* Now create duplicates of BB.
139238fd1498Szrj 
139338fd1498Szrj      Note that for a block with a high outgoing degree we can waste
139438fd1498Szrj      a lot of time and memory creating and destroying useless edges.
139538fd1498Szrj 
139638fd1498Szrj      So we first duplicate BB and remove the control structure at the
139738fd1498Szrj      tail of the duplicate as well as all outgoing edges from the
139838fd1498Szrj      duplicate.  We then use that duplicate block as a template for
139938fd1498Szrj      the rest of the duplicates.  */
140038fd1498Szrj   local_info.template_block = NULL;
140138fd1498Szrj   local_info.bb = bb;
140238fd1498Szrj   local_info.jumps_threaded = false;
140338fd1498Szrj   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
140438fd1498Szrj 			    (&local_info);
140538fd1498Szrj 
140638fd1498Szrj   /* The template does not have an outgoing edge.  Create that outgoing
140738fd1498Szrj      edge and update PHI nodes as the edge's target as necessary.
140838fd1498Szrj 
140938fd1498Szrj      We do this after creating all the duplicates to avoid creating
141038fd1498Szrj      unnecessary edges.  */
141138fd1498Szrj   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
141238fd1498Szrj 			    (&local_info);
141338fd1498Szrj 
141438fd1498Szrj   /* The hash table traversals above created the duplicate blocks (and the
141538fd1498Szrj      statements within the duplicate blocks).  This loop creates PHI nodes for
141638fd1498Szrj      the duplicated blocks and redirects the incoming edges into BB to reach
141738fd1498Szrj      the duplicates of BB.  */
141838fd1498Szrj   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
141938fd1498Szrj 			    (&local_info);
142038fd1498Szrj 
142138fd1498Szrj   /* Done with this block.  Clear REDIRECTION_DATA.  */
142238fd1498Szrj   delete redirection_data;
142338fd1498Szrj   redirection_data = NULL;
142438fd1498Szrj 
142538fd1498Szrj   if (noloop_only
142638fd1498Szrj       && bb == bb->loop_father->header)
142738fd1498Szrj     set_loop_copy (bb->loop_father, NULL);
142838fd1498Szrj 
142938fd1498Szrj   BITMAP_FREE (local_info.duplicate_blocks);
143038fd1498Szrj   local_info.duplicate_blocks = NULL;
143138fd1498Szrj 
143238fd1498Szrj   /* Indicate to our caller whether or not any jumps were threaded.  */
143338fd1498Szrj   return local_info.jumps_threaded;
143438fd1498Szrj }
143538fd1498Szrj 
143638fd1498Szrj /* Wrapper for thread_block_1 so that we can first handle jump
143738fd1498Szrj    thread paths which do not involve copying joiner blocks, then
143838fd1498Szrj    handle jump thread paths which have joiner blocks.
143938fd1498Szrj 
144038fd1498Szrj    By doing things this way we can be as aggressive as possible and
144138fd1498Szrj    not worry that copying a joiner block will create a jump threading
144238fd1498Szrj    opportunity.  */
144338fd1498Szrj 
144438fd1498Szrj static bool
thread_block(basic_block bb,bool noloop_only)144538fd1498Szrj thread_block (basic_block bb, bool noloop_only)
144638fd1498Szrj {
144738fd1498Szrj   bool retval;
144838fd1498Szrj   retval = thread_block_1 (bb, noloop_only, false);
144938fd1498Szrj   retval |= thread_block_1 (bb, noloop_only, true);
145038fd1498Szrj   return retval;
145138fd1498Szrj }
145238fd1498Szrj 
145338fd1498Szrj /* Callback for dfs_enumerate_from.  Returns true if BB is different
145438fd1498Szrj    from STOP and DBDS_CE_STOP.  */
145538fd1498Szrj 
145638fd1498Szrj static basic_block dbds_ce_stop;
145738fd1498Szrj static bool
dbds_continue_enumeration_p(const_basic_block bb,const void * stop)145838fd1498Szrj dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
145938fd1498Szrj {
146038fd1498Szrj   return (bb != (const_basic_block) stop
146138fd1498Szrj 	  && bb != dbds_ce_stop);
146238fd1498Szrj }
146338fd1498Szrj 
146438fd1498Szrj /* Evaluates the dominance relationship of latch of the LOOP and BB, and
146538fd1498Szrj    returns the state.  */
146638fd1498Szrj 
146738fd1498Szrj enum bb_dom_status
determine_bb_domination_status(struct loop * loop,basic_block bb)146838fd1498Szrj determine_bb_domination_status (struct loop *loop, basic_block bb)
146938fd1498Szrj {
147038fd1498Szrj   basic_block *bblocks;
147138fd1498Szrj   unsigned nblocks, i;
147238fd1498Szrj   bool bb_reachable = false;
147338fd1498Szrj   edge_iterator ei;
147438fd1498Szrj   edge e;
147538fd1498Szrj 
147638fd1498Szrj   /* This function assumes BB is a successor of LOOP->header.
147738fd1498Szrj      If that is not the case return DOMST_NONDOMINATING which
147838fd1498Szrj      is always safe.  */
147938fd1498Szrj     {
148038fd1498Szrj       bool ok = false;
148138fd1498Szrj 
148238fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
148338fd1498Szrj 	{
148438fd1498Szrj      	  if (e->src == loop->header)
148538fd1498Szrj 	    {
148638fd1498Szrj 	      ok = true;
148738fd1498Szrj 	      break;
148838fd1498Szrj 	    }
148938fd1498Szrj 	}
149038fd1498Szrj 
149138fd1498Szrj       if (!ok)
149238fd1498Szrj 	return DOMST_NONDOMINATING;
149338fd1498Szrj     }
149438fd1498Szrj 
149538fd1498Szrj   if (bb == loop->latch)
149638fd1498Szrj     return DOMST_DOMINATING;
149738fd1498Szrj 
149838fd1498Szrj   /* Check that BB dominates LOOP->latch, and that it is back-reachable
149938fd1498Szrj      from it.  */
150038fd1498Szrj 
150138fd1498Szrj   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
150238fd1498Szrj   dbds_ce_stop = loop->header;
150338fd1498Szrj   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
150438fd1498Szrj 				bblocks, loop->num_nodes, bb);
150538fd1498Szrj   for (i = 0; i < nblocks; i++)
150638fd1498Szrj     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
150738fd1498Szrj       {
150838fd1498Szrj 	if (e->src == loop->header)
150938fd1498Szrj 	  {
151038fd1498Szrj 	    free (bblocks);
151138fd1498Szrj 	    return DOMST_NONDOMINATING;
151238fd1498Szrj 	  }
151338fd1498Szrj 	if (e->src == bb)
151438fd1498Szrj 	  bb_reachable = true;
151538fd1498Szrj       }
151638fd1498Szrj 
151738fd1498Szrj   free (bblocks);
151838fd1498Szrj   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
151938fd1498Szrj }
152038fd1498Szrj 
152138fd1498Szrj /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
152238fd1498Szrj    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
152338fd1498Szrj    to the inside of the loop.  */
152438fd1498Szrj 
152538fd1498Szrj static bool
thread_through_loop_header(struct loop * loop,bool may_peel_loop_headers)152638fd1498Szrj thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
152738fd1498Szrj {
152838fd1498Szrj   basic_block header = loop->header;
152938fd1498Szrj   edge e, tgt_edge, latch = loop_latch_edge (loop);
153038fd1498Szrj   edge_iterator ei;
153138fd1498Szrj   basic_block tgt_bb, atgt_bb;
153238fd1498Szrj   enum bb_dom_status domst;
153338fd1498Szrj 
153438fd1498Szrj   /* We have already threaded through headers to exits, so all the threading
153538fd1498Szrj      requests now are to the inside of the loop.  We need to avoid creating
153638fd1498Szrj      irreducible regions (i.e., loops with more than one entry block), and
153738fd1498Szrj      also loop with several latch edges, or new subloops of the loop (although
153838fd1498Szrj      there are cases where it might be appropriate, it is difficult to decide,
153938fd1498Szrj      and doing it wrongly may confuse other optimizers).
154038fd1498Szrj 
154138fd1498Szrj      We could handle more general cases here.  However, the intention is to
154238fd1498Szrj      preserve some information about the loop, which is impossible if its
154338fd1498Szrj      structure changes significantly, in a way that is not well understood.
154438fd1498Szrj      Thus we only handle few important special cases, in which also updating
154538fd1498Szrj      of the loop-carried information should be feasible:
154638fd1498Szrj 
154738fd1498Szrj      1) Propagation of latch edge to a block that dominates the latch block
154838fd1498Szrj 	of a loop.  This aims to handle the following idiom:
154938fd1498Szrj 
155038fd1498Szrj 	first = 1;
155138fd1498Szrj 	while (1)
155238fd1498Szrj 	  {
155338fd1498Szrj 	    if (first)
155438fd1498Szrj 	      initialize;
155538fd1498Szrj 	    first = 0;
155638fd1498Szrj 	    body;
155738fd1498Szrj 	  }
155838fd1498Szrj 
155938fd1498Szrj 	After threading the latch edge, this becomes
156038fd1498Szrj 
156138fd1498Szrj 	first = 1;
156238fd1498Szrj 	if (first)
156338fd1498Szrj 	  initialize;
156438fd1498Szrj 	while (1)
156538fd1498Szrj 	  {
156638fd1498Szrj 	    first = 0;
156738fd1498Szrj 	    body;
156838fd1498Szrj 	  }
156938fd1498Szrj 
157038fd1498Szrj 	The original header of the loop is moved out of it, and we may thread
157138fd1498Szrj 	the remaining edges through it without further constraints.
157238fd1498Szrj 
157338fd1498Szrj      2) All entry edges are propagated to a single basic block that dominates
157438fd1498Szrj 	the latch block of the loop.  This aims to handle the following idiom
157538fd1498Szrj 	(normally created for "for" loops):
157638fd1498Szrj 
157738fd1498Szrj 	i = 0;
157838fd1498Szrj 	while (1)
157938fd1498Szrj 	  {
158038fd1498Szrj 	    if (i >= 100)
158138fd1498Szrj 	      break;
158238fd1498Szrj 	    body;
158338fd1498Szrj 	    i++;
158438fd1498Szrj 	  }
158538fd1498Szrj 
158638fd1498Szrj 	This becomes
158738fd1498Szrj 
158838fd1498Szrj 	i = 0;
158938fd1498Szrj 	while (1)
159038fd1498Szrj 	  {
159138fd1498Szrj 	    body;
159238fd1498Szrj 	    i++;
159338fd1498Szrj 	    if (i >= 100)
159438fd1498Szrj 	      break;
159538fd1498Szrj 	  }
159638fd1498Szrj      */
159738fd1498Szrj 
159838fd1498Szrj   /* Threading through the header won't improve the code if the header has just
159938fd1498Szrj      one successor.  */
160038fd1498Szrj   if (single_succ_p (header))
160138fd1498Szrj     goto fail;
160238fd1498Szrj 
160338fd1498Szrj   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
160438fd1498Szrj     goto fail;
160538fd1498Szrj   else
160638fd1498Szrj     {
160738fd1498Szrj       tgt_bb = NULL;
160838fd1498Szrj       tgt_edge = NULL;
160938fd1498Szrj       FOR_EACH_EDGE (e, ei, header->preds)
161038fd1498Szrj 	{
161138fd1498Szrj 	  if (!e->aux)
161238fd1498Szrj 	    {
161338fd1498Szrj 	      if (e == latch)
161438fd1498Szrj 		continue;
161538fd1498Szrj 
161638fd1498Szrj 	      /* If latch is not threaded, and there is a header
161738fd1498Szrj 		 edge that is not threaded, we would create loop
161838fd1498Szrj 		 with multiple entries.  */
161938fd1498Szrj 	      goto fail;
162038fd1498Szrj 	    }
162138fd1498Szrj 
162238fd1498Szrj 	  vec<jump_thread_edge *> *path = THREAD_PATH (e);
162338fd1498Szrj 
162438fd1498Szrj 	  if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
162538fd1498Szrj 	    goto fail;
162638fd1498Szrj 	  tgt_edge = (*path)[1]->e;
162738fd1498Szrj 	  atgt_bb = tgt_edge->dest;
162838fd1498Szrj 	  if (!tgt_bb)
162938fd1498Szrj 	    tgt_bb = atgt_bb;
163038fd1498Szrj 	  /* Two targets of threading would make us create loop
163138fd1498Szrj 	     with multiple entries.  */
163238fd1498Szrj 	  else if (tgt_bb != atgt_bb)
163338fd1498Szrj 	    goto fail;
163438fd1498Szrj 	}
163538fd1498Szrj 
163638fd1498Szrj       if (!tgt_bb)
163738fd1498Szrj 	{
163838fd1498Szrj 	  /* There are no threading requests.  */
163938fd1498Szrj 	  return false;
164038fd1498Szrj 	}
164138fd1498Szrj 
164238fd1498Szrj       /* Redirecting to empty loop latch is useless.  */
164338fd1498Szrj       if (tgt_bb == loop->latch
164438fd1498Szrj 	  && empty_block_p (loop->latch))
164538fd1498Szrj 	goto fail;
164638fd1498Szrj     }
164738fd1498Szrj 
164838fd1498Szrj   /* The target block must dominate the loop latch, otherwise we would be
164938fd1498Szrj      creating a subloop.  */
165038fd1498Szrj   domst = determine_bb_domination_status (loop, tgt_bb);
165138fd1498Szrj   if (domst == DOMST_NONDOMINATING)
165238fd1498Szrj     goto fail;
165338fd1498Szrj   if (domst == DOMST_LOOP_BROKEN)
165438fd1498Szrj     {
165538fd1498Szrj       /* If the loop ceased to exist, mark it as such, and thread through its
165638fd1498Szrj 	 original header.  */
165738fd1498Szrj       mark_loop_for_removal (loop);
165838fd1498Szrj       return thread_block (header, false);
165938fd1498Szrj     }
166038fd1498Szrj 
166138fd1498Szrj   if (tgt_bb->loop_father->header == tgt_bb)
166238fd1498Szrj     {
166338fd1498Szrj       /* If the target of the threading is a header of a subloop, we need
166438fd1498Szrj 	 to create a preheader for it, so that the headers of the two loops
166538fd1498Szrj 	 do not merge.  */
166638fd1498Szrj       if (EDGE_COUNT (tgt_bb->preds) > 2)
166738fd1498Szrj 	{
166838fd1498Szrj 	  tgt_bb = create_preheader (tgt_bb->loop_father, 0);
166938fd1498Szrj 	  gcc_assert (tgt_bb != NULL);
167038fd1498Szrj 	}
167138fd1498Szrj       else
167238fd1498Szrj 	tgt_bb = split_edge (tgt_edge);
167338fd1498Szrj     }
167438fd1498Szrj 
167538fd1498Szrj   basic_block new_preheader;
167638fd1498Szrj 
167738fd1498Szrj   /* Now consider the case entry edges are redirected to the new entry
167838fd1498Szrj      block.  Remember one entry edge, so that we can find the new
167938fd1498Szrj      preheader (its destination after threading).  */
168038fd1498Szrj   FOR_EACH_EDGE (e, ei, header->preds)
168138fd1498Szrj     {
168238fd1498Szrj       if (e->aux)
168338fd1498Szrj 	break;
168438fd1498Szrj     }
168538fd1498Szrj 
168638fd1498Szrj   /* The duplicate of the header is the new preheader of the loop.  Ensure
168738fd1498Szrj      that it is placed correctly in the loop hierarchy.  */
168838fd1498Szrj   set_loop_copy (loop, loop_outer (loop));
168938fd1498Szrj 
169038fd1498Szrj   thread_block (header, false);
169138fd1498Szrj   set_loop_copy (loop, NULL);
169238fd1498Szrj   new_preheader = e->dest;
169338fd1498Szrj 
169438fd1498Szrj   /* Create the new latch block.  This is always necessary, as the latch
169538fd1498Szrj      must have only a single successor, but the original header had at
169638fd1498Szrj      least two successors.  */
169738fd1498Szrj   loop->latch = NULL;
169838fd1498Szrj   mfb_kj_edge = single_succ_edge (new_preheader);
169938fd1498Szrj   loop->header = mfb_kj_edge->dest;
170038fd1498Szrj   latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
170138fd1498Szrj   loop->header = latch->dest;
170238fd1498Szrj   loop->latch = latch->src;
170338fd1498Szrj   return true;
170438fd1498Szrj 
170538fd1498Szrj fail:
170638fd1498Szrj   /* We failed to thread anything.  Cancel the requests.  */
170738fd1498Szrj   FOR_EACH_EDGE (e, ei, header->preds)
170838fd1498Szrj     {
170938fd1498Szrj       vec<jump_thread_edge *> *path = THREAD_PATH (e);
171038fd1498Szrj 
171138fd1498Szrj       if (path)
171238fd1498Szrj 	{
171338fd1498Szrj 	  delete_jump_thread_path (path);
171438fd1498Szrj 	  e->aux = NULL;
171538fd1498Szrj 	}
171638fd1498Szrj     }
171738fd1498Szrj   return false;
171838fd1498Szrj }
171938fd1498Szrj 
172038fd1498Szrj /* E1 and E2 are edges into the same basic block.  Return TRUE if the
172138fd1498Szrj    PHI arguments associated with those edges are equal or there are no
172238fd1498Szrj    PHI arguments, otherwise return FALSE.  */
172338fd1498Szrj 
172438fd1498Szrj static bool
phi_args_equal_on_edges(edge e1,edge e2)172538fd1498Szrj phi_args_equal_on_edges (edge e1, edge e2)
172638fd1498Szrj {
172738fd1498Szrj   gphi_iterator gsi;
172838fd1498Szrj   int indx1 = e1->dest_idx;
172938fd1498Szrj   int indx2 = e2->dest_idx;
173038fd1498Szrj 
173138fd1498Szrj   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
173238fd1498Szrj     {
173338fd1498Szrj       gphi *phi = gsi.phi ();
173438fd1498Szrj 
173538fd1498Szrj       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
173638fd1498Szrj 			    gimple_phi_arg_def (phi, indx2), 0))
173738fd1498Szrj 	return false;
173838fd1498Szrj     }
173938fd1498Szrj   return true;
174038fd1498Szrj }
174138fd1498Szrj 
174238fd1498Szrj /* Return the number of non-debug statements and non-virtual PHIs in a
174338fd1498Szrj    block.  */
174438fd1498Szrj 
174538fd1498Szrj static unsigned int
count_stmts_and_phis_in_block(basic_block bb)174638fd1498Szrj count_stmts_and_phis_in_block (basic_block bb)
174738fd1498Szrj {
174838fd1498Szrj   unsigned int num_stmts = 0;
174938fd1498Szrj 
175038fd1498Szrj   gphi_iterator gpi;
175138fd1498Szrj   for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
175238fd1498Szrj     if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
175338fd1498Szrj       num_stmts++;
175438fd1498Szrj 
175538fd1498Szrj   gimple_stmt_iterator gsi;
175638fd1498Szrj   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
175738fd1498Szrj     {
175838fd1498Szrj       gimple *stmt = gsi_stmt (gsi);
175938fd1498Szrj       if (!is_gimple_debug (stmt))
176038fd1498Szrj         num_stmts++;
176138fd1498Szrj     }
176238fd1498Szrj 
176338fd1498Szrj   return num_stmts;
176438fd1498Szrj }
176538fd1498Szrj 
176638fd1498Szrj 
176738fd1498Szrj /* Walk through the registered jump threads and convert them into a
176838fd1498Szrj    form convenient for this pass.
176938fd1498Szrj 
177038fd1498Szrj    Any block which has incoming edges threaded to outgoing edges
177138fd1498Szrj    will have its entry in THREADED_BLOCK set.
177238fd1498Szrj 
177338fd1498Szrj    Any threaded edge will have its new outgoing edge stored in the
177438fd1498Szrj    original edge's AUX field.
177538fd1498Szrj 
177638fd1498Szrj    This form avoids the need to walk all the edges in the CFG to
177738fd1498Szrj    discover blocks which need processing and avoids unnecessary
177838fd1498Szrj    hash table lookups to map from threaded edge to new target.  */
177938fd1498Szrj 
178038fd1498Szrj static void
mark_threaded_blocks(bitmap threaded_blocks)178138fd1498Szrj mark_threaded_blocks (bitmap threaded_blocks)
178238fd1498Szrj {
178338fd1498Szrj   unsigned int i;
178438fd1498Szrj   bitmap_iterator bi;
178538fd1498Szrj   auto_bitmap tmp;
178638fd1498Szrj   basic_block bb;
178738fd1498Szrj   edge e;
178838fd1498Szrj   edge_iterator ei;
178938fd1498Szrj 
179038fd1498Szrj   /* It is possible to have jump threads in which one is a subpath
179138fd1498Szrj      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
179238fd1498Szrj      block and (B, C), (C, D) where no joiner block exists.
179338fd1498Szrj 
179438fd1498Szrj      When this occurs ignore the jump thread request with the joiner
179538fd1498Szrj      block.  It's totally subsumed by the simpler jump thread request.
179638fd1498Szrj 
179738fd1498Szrj      This results in less block copying, simpler CFGs.  More importantly,
179838fd1498Szrj      when we duplicate the joiner block, B, in this case we will create
179938fd1498Szrj      a new threading opportunity that we wouldn't be able to optimize
180038fd1498Szrj      until the next jump threading iteration.
180138fd1498Szrj 
180238fd1498Szrj      So first convert the jump thread requests which do not require a
180338fd1498Szrj      joiner block.  */
180438fd1498Szrj   for (i = 0; i < paths.length (); i++)
180538fd1498Szrj     {
180638fd1498Szrj       vec<jump_thread_edge *> *path = paths[i];
180738fd1498Szrj 
180838fd1498Szrj       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
180938fd1498Szrj 	{
181038fd1498Szrj 	  edge e = (*path)[0]->e;
181138fd1498Szrj 	  e->aux = (void *)path;
181238fd1498Szrj 	  bitmap_set_bit (tmp, e->dest->index);
181338fd1498Szrj 	}
181438fd1498Szrj     }
181538fd1498Szrj 
181638fd1498Szrj   /* Now iterate again, converting cases where we want to thread
181738fd1498Szrj      through a joiner block, but only if no other edge on the path
181838fd1498Szrj      already has a jump thread attached to it.  We do this in two passes,
181938fd1498Szrj      to avoid situations where the order in the paths vec can hide overlapping
182038fd1498Szrj      threads (the path is recorded on the incoming edge, so we would miss
182138fd1498Szrj      cases where the second path starts at a downstream edge on the same
182238fd1498Szrj      path).  First record all joiner paths, deleting any in the unexpected
182338fd1498Szrj      case where there is already a path for that incoming edge.  */
182438fd1498Szrj   for (i = 0; i < paths.length ();)
182538fd1498Szrj     {
182638fd1498Szrj       vec<jump_thread_edge *> *path = paths[i];
182738fd1498Szrj 
182838fd1498Szrj       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
182938fd1498Szrj 	{
183038fd1498Szrj 	  /* Attach the path to the starting edge if none is yet recorded.  */
183138fd1498Szrj 	  if ((*path)[0]->e->aux == NULL)
183238fd1498Szrj 	    {
183338fd1498Szrj 	      (*path)[0]->e->aux = path;
183438fd1498Szrj 	      i++;
183538fd1498Szrj 	    }
183638fd1498Szrj 	  else
183738fd1498Szrj 	    {
183838fd1498Szrj 	      paths.unordered_remove (i);
183938fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
184038fd1498Szrj 		dump_jump_thread_path (dump_file, *path, false);
184138fd1498Szrj 	      delete_jump_thread_path (path);
184238fd1498Szrj 	    }
184338fd1498Szrj 	}
184438fd1498Szrj       else
184538fd1498Szrj 	{
184638fd1498Szrj 	  i++;
184738fd1498Szrj 	}
184838fd1498Szrj     }
184938fd1498Szrj 
185038fd1498Szrj   /* Second, look for paths that have any other jump thread attached to
185138fd1498Szrj      them, and either finish converting them or cancel them.  */
185238fd1498Szrj   for (i = 0; i < paths.length ();)
185338fd1498Szrj     {
185438fd1498Szrj       vec<jump_thread_edge *> *path = paths[i];
185538fd1498Szrj       edge e = (*path)[0]->e;
185638fd1498Szrj 
185738fd1498Szrj       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
185838fd1498Szrj 	{
185938fd1498Szrj 	  unsigned int j;
186038fd1498Szrj 	  for (j = 1; j < path->length (); j++)
186138fd1498Szrj 	    if ((*path)[j]->e->aux != NULL)
186238fd1498Szrj 	      break;
186338fd1498Szrj 
186438fd1498Szrj 	  /* If we iterated through the entire path without exiting the loop,
186538fd1498Szrj 	     then we are good to go, record it.  */
186638fd1498Szrj 	  if (j == path->length ())
186738fd1498Szrj 	    {
186838fd1498Szrj 	      bitmap_set_bit (tmp, e->dest->index);
186938fd1498Szrj 	      i++;
187038fd1498Szrj 	    }
187138fd1498Szrj 	  else
187238fd1498Szrj 	    {
187338fd1498Szrj 	      e->aux = NULL;
187438fd1498Szrj 	      paths.unordered_remove (i);
187538fd1498Szrj 	      if (dump_file && (dump_flags & TDF_DETAILS))
187638fd1498Szrj 		dump_jump_thread_path (dump_file, *path, false);
187738fd1498Szrj 	      delete_jump_thread_path (path);
187838fd1498Szrj 	    }
187938fd1498Szrj 	}
188038fd1498Szrj       else
188138fd1498Szrj 	{
188238fd1498Szrj 	  i++;
188338fd1498Szrj 	}
188438fd1498Szrj     }
188538fd1498Szrj 
188638fd1498Szrj   /* When optimizing for size, prune all thread paths where statement
188738fd1498Szrj      duplication is necessary.
188838fd1498Szrj 
188938fd1498Szrj      We walk the jump thread path looking for copied blocks.  There's
189038fd1498Szrj      two types of copied blocks.
189138fd1498Szrj 
189238fd1498Szrj        EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
189338fd1498Szrj        cancel the jump threading request when optimizing for size.
189438fd1498Szrj 
189538fd1498Szrj        EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
189638fd1498Szrj        will be killed by threading.  If threading does not kill all of
189738fd1498Szrj        its statements, then we should cancel the jump threading request
189838fd1498Szrj        when optimizing for size.  */
189938fd1498Szrj   if (optimize_function_for_size_p (cfun))
190038fd1498Szrj     {
190138fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
190238fd1498Szrj 	{
190338fd1498Szrj 	  FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
190438fd1498Szrj 	    if (e->aux)
190538fd1498Szrj 	      {
190638fd1498Szrj 		vec<jump_thread_edge *> *path = THREAD_PATH (e);
190738fd1498Szrj 
190838fd1498Szrj 		unsigned int j;
190938fd1498Szrj 		for (j = 1; j < path->length (); j++)
191038fd1498Szrj 		  {
191138fd1498Szrj 		    bb = (*path)[j]->e->src;
191238fd1498Szrj 		    if (redirection_block_p (bb))
191338fd1498Szrj 		      ;
191438fd1498Szrj 		    else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
191538fd1498Szrj 			     || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
191638fd1498Szrj 			         && (count_stmts_and_phis_in_block (bb)
191738fd1498Szrj 				     != estimate_threading_killed_stmts (bb))))
191838fd1498Szrj 		      break;
191938fd1498Szrj 		  }
192038fd1498Szrj 
192138fd1498Szrj 		if (j != path->length ())
192238fd1498Szrj 		  {
192338fd1498Szrj 		    if (dump_file && (dump_flags & TDF_DETAILS))
192438fd1498Szrj 		      dump_jump_thread_path (dump_file, *path, 0);
192538fd1498Szrj 		    delete_jump_thread_path (path);
192638fd1498Szrj 		    e->aux = NULL;
192738fd1498Szrj 		  }
192838fd1498Szrj 		else
192938fd1498Szrj 		  bitmap_set_bit (threaded_blocks, i);
193038fd1498Szrj 	      }
193138fd1498Szrj 	}
193238fd1498Szrj     }
193338fd1498Szrj   else
193438fd1498Szrj     bitmap_copy (threaded_blocks, tmp);
193538fd1498Szrj 
193638fd1498Szrj   /* If we have a joiner block (J) which has two successors S1 and S2 and
193738fd1498Szrj      we are threading though S1 and the final destination of the thread
193838fd1498Szrj      is S2, then we must verify that any PHI nodes in S2 have the same
193938fd1498Szrj      PHI arguments for the edge J->S2 and J->S1->...->S2.
194038fd1498Szrj 
194138fd1498Szrj      We used to detect this prior to registering the jump thread, but
194238fd1498Szrj      that prohibits propagation of edge equivalences into non-dominated
194338fd1498Szrj      PHI nodes as the equivalency test might occur before propagation.
194438fd1498Szrj 
194538fd1498Szrj      This must also occur after we truncate any jump threading paths
194638fd1498Szrj      as this scenario may only show up after truncation.
194738fd1498Szrj 
194838fd1498Szrj      This works for now, but will need improvement as part of the FSA
194938fd1498Szrj      optimization.
195038fd1498Szrj 
195138fd1498Szrj      Note since we've moved the thread request data to the edges,
195238fd1498Szrj      we have to iterate on those rather than the threaded_edges vector.  */
195338fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
195438fd1498Szrj     {
195538fd1498Szrj       bb = BASIC_BLOCK_FOR_FN (cfun, i);
195638fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
195738fd1498Szrj 	{
195838fd1498Szrj 	  if (e->aux)
195938fd1498Szrj 	    {
196038fd1498Szrj 	      vec<jump_thread_edge *> *path = THREAD_PATH (e);
196138fd1498Szrj 	      bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
196238fd1498Szrj 
196338fd1498Szrj 	      if (have_joiner)
196438fd1498Szrj 		{
196538fd1498Szrj 		  basic_block joiner = e->dest;
196638fd1498Szrj 		  edge final_edge = path->last ()->e;
196738fd1498Szrj 		  basic_block final_dest = final_edge->dest;
196838fd1498Szrj 		  edge e2 = find_edge (joiner, final_dest);
196938fd1498Szrj 
197038fd1498Szrj 		  if (e2 && !phi_args_equal_on_edges (e2, final_edge))
197138fd1498Szrj 		    {
197238fd1498Szrj 		      delete_jump_thread_path (path);
197338fd1498Szrj 		      e->aux = NULL;
197438fd1498Szrj 		    }
197538fd1498Szrj 		}
197638fd1498Szrj 	    }
197738fd1498Szrj 	}
197838fd1498Szrj     }
197938fd1498Szrj 
198038fd1498Szrj   /* Look for jump threading paths which cross multiple loop headers.
198138fd1498Szrj 
198238fd1498Szrj      The code to thread through loop headers will change the CFG in ways
198338fd1498Szrj      that invalidate the cached loop iteration information.  So we must
198438fd1498Szrj      detect that case and wipe the cached information.  */
198538fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
198638fd1498Szrj     {
198738fd1498Szrj       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
198838fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
198938fd1498Szrj 	{
199038fd1498Szrj 	  if (e->aux)
199138fd1498Szrj 	    {
199238fd1498Szrj 	      vec<jump_thread_edge *> *path = THREAD_PATH (e);
199338fd1498Szrj 
199438fd1498Szrj 	      for (unsigned int i = 0, crossed_headers = 0;
199538fd1498Szrj 		   i < path->length ();
199638fd1498Szrj 		   i++)
199738fd1498Szrj 		{
199838fd1498Szrj 		  basic_block dest = (*path)[i]->e->dest;
199938fd1498Szrj 		  basic_block src = (*path)[i]->e->src;
200038fd1498Szrj 		  /* If we enter a loop.  */
200138fd1498Szrj 		  if (flow_loop_nested_p (src->loop_father, dest->loop_father))
200238fd1498Szrj 		    ++crossed_headers;
200338fd1498Szrj 		  /* If we step from a block outside an irreducible region
200438fd1498Szrj 		     to a block inside an irreducible region, then we have
200538fd1498Szrj 		     crossed into a loop.  */
200638fd1498Szrj 		  else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
200738fd1498Szrj 			   && (dest->flags & BB_IRREDUCIBLE_LOOP))
200838fd1498Szrj 		      ++crossed_headers;
200938fd1498Szrj 		  if (crossed_headers > 1)
201038fd1498Szrj 		    {
201138fd1498Szrj 		      vect_free_loop_info_assumptions
201238fd1498Szrj 			((*path)[path->length () - 1]->e->dest->loop_father);
201338fd1498Szrj 		      break;
201438fd1498Szrj 		    }
201538fd1498Szrj 		}
201638fd1498Szrj 	    }
201738fd1498Szrj 	}
201838fd1498Szrj     }
201938fd1498Szrj }
202038fd1498Szrj 
202138fd1498Szrj 
202238fd1498Szrj /* Verify that the REGION is a valid jump thread.  A jump thread is a special
202338fd1498Szrj    case of SEME Single Entry Multiple Exits region in which all nodes in the
202438fd1498Szrj    REGION have exactly one incoming edge.  The only exception is the first block
202538fd1498Szrj    that may not have been connected to the rest of the cfg yet.  */
202638fd1498Szrj 
202738fd1498Szrj DEBUG_FUNCTION void
verify_jump_thread(basic_block * region,unsigned n_region)202838fd1498Szrj verify_jump_thread (basic_block *region, unsigned n_region)
202938fd1498Szrj {
203038fd1498Szrj   for (unsigned i = 0; i < n_region; i++)
203138fd1498Szrj     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
203238fd1498Szrj }
203338fd1498Szrj 
203438fd1498Szrj /* Return true when BB is one of the first N items in BBS.  */
203538fd1498Szrj 
203638fd1498Szrj static inline bool
bb_in_bbs(basic_block bb,basic_block * bbs,int n)203738fd1498Szrj bb_in_bbs (basic_block bb, basic_block *bbs, int n)
203838fd1498Szrj {
203938fd1498Szrj   for (int i = 0; i < n; i++)
204038fd1498Szrj     if (bb == bbs[i])
204138fd1498Szrj       return true;
204238fd1498Szrj 
204338fd1498Szrj   return false;
204438fd1498Szrj }
204538fd1498Szrj 
204638fd1498Szrj /* Duplicates a jump-thread path of N_REGION basic blocks.
204738fd1498Szrj    The ENTRY edge is redirected to the duplicate of the region.
204838fd1498Szrj 
204938fd1498Szrj    Remove the last conditional statement in the last basic block in the REGION,
205038fd1498Szrj    and create a single fallthru edge pointing to the same destination as the
205138fd1498Szrj    EXIT edge.
205238fd1498Szrj 
205338fd1498Szrj    Returns false if it is unable to copy the region, true otherwise.  */
205438fd1498Szrj 
205538fd1498Szrj static bool
duplicate_thread_path(edge entry,edge exit,basic_block * region,unsigned n_region)205638fd1498Szrj duplicate_thread_path (edge entry, edge exit, basic_block *region,
205738fd1498Szrj 		       unsigned n_region)
205838fd1498Szrj {
205938fd1498Szrj   unsigned i;
206038fd1498Szrj   struct loop *loop = entry->dest->loop_father;
206138fd1498Szrj   edge exit_copy;
206238fd1498Szrj   edge redirected;
206338fd1498Szrj   profile_count curr_count;
206438fd1498Szrj 
206538fd1498Szrj   if (!can_copy_bbs_p (region, n_region))
206638fd1498Szrj     return false;
206738fd1498Szrj 
206838fd1498Szrj   /* Some sanity checking.  Note that we do not check for all possible
206938fd1498Szrj      missuses of the functions.  I.e. if you ask to copy something weird,
207038fd1498Szrj      it will work, but the state of structures probably will not be
207138fd1498Szrj      correct.  */
207238fd1498Szrj   for (i = 0; i < n_region; i++)
207338fd1498Szrj     {
207438fd1498Szrj       /* We do not handle subloops, i.e. all the blocks must belong to the
207538fd1498Szrj 	 same loop.  */
207638fd1498Szrj       if (region[i]->loop_father != loop)
207738fd1498Szrj 	return false;
207838fd1498Szrj     }
207938fd1498Szrj 
208038fd1498Szrj   initialize_original_copy_tables ();
208138fd1498Szrj 
208238fd1498Szrj   set_loop_copy (loop, loop);
208338fd1498Szrj 
208438fd1498Szrj   basic_block *region_copy = XNEWVEC (basic_block, n_region);
208538fd1498Szrj   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
208638fd1498Szrj 	    split_edge_bb_loc (entry), false);
208738fd1498Szrj 
208838fd1498Szrj   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
208938fd1498Szrj      following code ensures that all the edges exiting the jump-thread path are
209038fd1498Szrj      redirected back to the original code: these edges are exceptions
209138fd1498Szrj      invalidating the property that is propagated by executing all the blocks of
209238fd1498Szrj      the jump-thread path in order.  */
209338fd1498Szrj 
209438fd1498Szrj   curr_count = entry->count ();
209538fd1498Szrj 
209638fd1498Szrj   for (i = 0; i < n_region; i++)
209738fd1498Szrj     {
209838fd1498Szrj       edge e;
209938fd1498Szrj       edge_iterator ei;
210038fd1498Szrj       basic_block bb = region_copy[i];
210138fd1498Szrj 
210238fd1498Szrj       /* Watch inconsistent profile.  */
210338fd1498Szrj       if (curr_count > region[i]->count)
210438fd1498Szrj 	curr_count = region[i]->count;
210538fd1498Szrj       /* Scale current BB.  */
210638fd1498Szrj       if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
210738fd1498Szrj 	{
210838fd1498Szrj 	  /* In the middle of the path we only scale the frequencies.
210938fd1498Szrj 	     In last BB we need to update probabilities of outgoing edges
211038fd1498Szrj 	     because we know which one is taken at the threaded path.  */
211138fd1498Szrj 	  if (i + 1 != n_region)
211238fd1498Szrj 	    scale_bbs_frequencies_profile_count (region + i, 1,
211338fd1498Szrj 					         region[i]->count - curr_count,
211438fd1498Szrj 					         region[i]->count);
211538fd1498Szrj 	  else
211638fd1498Szrj 	    update_bb_profile_for_threading (region[i],
211738fd1498Szrj 					     curr_count,
211838fd1498Szrj 					     exit);
211938fd1498Szrj 	  scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
212038fd1498Szrj 					       region_copy[i]->count);
212138fd1498Szrj 	}
212238fd1498Szrj 
212338fd1498Szrj       if (single_succ_p (bb))
212438fd1498Szrj 	{
212538fd1498Szrj 	  /* Make sure the successor is the next node in the path.  */
212638fd1498Szrj 	  gcc_assert (i + 1 == n_region
212738fd1498Szrj 		      || region_copy[i + 1] == single_succ_edge (bb)->dest);
212838fd1498Szrj 	  if (i + 1 != n_region)
212938fd1498Szrj 	    {
213038fd1498Szrj 	      curr_count = single_succ_edge (bb)->count ();
213138fd1498Szrj 	    }
213238fd1498Szrj 	  continue;
213338fd1498Szrj 	}
213438fd1498Szrj 
213538fd1498Szrj       /* Special case the last block on the path: make sure that it does not
213638fd1498Szrj 	 jump back on the copied path, including back to itself.  */
213738fd1498Szrj       if (i + 1 == n_region)
213838fd1498Szrj 	{
213938fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->succs)
214038fd1498Szrj 	    if (bb_in_bbs (e->dest, region_copy, n_region))
214138fd1498Szrj 	      {
214238fd1498Szrj 		basic_block orig = get_bb_original (e->dest);
214338fd1498Szrj 		if (orig)
214438fd1498Szrj 		  redirect_edge_and_branch_force (e, orig);
214538fd1498Szrj 	      }
214638fd1498Szrj 	  continue;
214738fd1498Szrj 	}
214838fd1498Szrj 
214938fd1498Szrj       /* Redirect all other edges jumping to non-adjacent blocks back to the
215038fd1498Szrj 	 original code.  */
215138fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
215238fd1498Szrj 	if (region_copy[i + 1] != e->dest)
215338fd1498Szrj 	  {
215438fd1498Szrj 	    basic_block orig = get_bb_original (e->dest);
215538fd1498Szrj 	    if (orig)
215638fd1498Szrj 	      redirect_edge_and_branch_force (e, orig);
215738fd1498Szrj 	  }
215838fd1498Szrj 	else
215938fd1498Szrj 	  {
216038fd1498Szrj 	    curr_count = e->count ();
216138fd1498Szrj 	  }
216238fd1498Szrj     }
216338fd1498Szrj 
216438fd1498Szrj 
216538fd1498Szrj   if (flag_checking)
216638fd1498Szrj     verify_jump_thread (region_copy, n_region);
216738fd1498Szrj 
216838fd1498Szrj   /* Remove the last branch in the jump thread path.  */
216938fd1498Szrj   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
217038fd1498Szrj 
217138fd1498Szrj   /* And fixup the flags on the single remaining edge.  */
217238fd1498Szrj   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
217338fd1498Szrj   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
217438fd1498Szrj   fix_e->flags |= EDGE_FALLTHRU;
217538fd1498Szrj 
217638fd1498Szrj   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
217738fd1498Szrj 
217838fd1498Szrj   if (e)
217938fd1498Szrj     {
218038fd1498Szrj       rescan_loop_exit (e, true, false);
218138fd1498Szrj       e->probability = profile_probability::always ();
218238fd1498Szrj     }
218338fd1498Szrj 
218438fd1498Szrj   /* Redirect the entry and add the phi node arguments.  */
218538fd1498Szrj   if (entry->dest == loop->header)
218638fd1498Szrj     mark_loop_for_removal (loop);
218738fd1498Szrj   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
218838fd1498Szrj   gcc_assert (redirected != NULL);
218938fd1498Szrj   flush_pending_stmts (entry);
219038fd1498Szrj 
219138fd1498Szrj   /* Add the other PHI node arguments.  */
219238fd1498Szrj   add_phi_args_after_copy (region_copy, n_region, NULL);
219338fd1498Szrj 
219438fd1498Szrj   free (region_copy);
219538fd1498Szrj 
219638fd1498Szrj   free_original_copy_tables ();
219738fd1498Szrj   return true;
219838fd1498Szrj }
219938fd1498Szrj 
220038fd1498Szrj /* Return true when PATH is a valid jump-thread path.  */
220138fd1498Szrj 
220238fd1498Szrj static bool
valid_jump_thread_path(vec<jump_thread_edge * > * path)220338fd1498Szrj valid_jump_thread_path (vec<jump_thread_edge *> *path)
220438fd1498Szrj {
220538fd1498Szrj   unsigned len = path->length ();
220638fd1498Szrj 
220738fd1498Szrj   /* Check that the path is connected.  */
220838fd1498Szrj   for (unsigned int j = 0; j < len - 1; j++)
220938fd1498Szrj     {
221038fd1498Szrj       edge e = (*path)[j]->e;
221138fd1498Szrj       if (e->dest != (*path)[j+1]->e->src)
221238fd1498Szrj 	return false;
221338fd1498Szrj     }
221438fd1498Szrj   return true;
221538fd1498Szrj }
221638fd1498Szrj 
221738fd1498Szrj /* Remove any queued jump threads that include edge E.
221838fd1498Szrj 
221938fd1498Szrj    We don't actually remove them here, just record the edges into ax
222038fd1498Szrj    hash table.  That way we can do the search once per iteration of
222138fd1498Szrj    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
222238fd1498Szrj 
222338fd1498Szrj void
remove_jump_threads_including(edge_def * e)222438fd1498Szrj remove_jump_threads_including (edge_def *e)
222538fd1498Szrj {
222638fd1498Szrj   if (!paths.exists ())
222738fd1498Szrj     return;
222838fd1498Szrj 
222938fd1498Szrj   if (!removed_edges)
223038fd1498Szrj     removed_edges = new hash_table<struct removed_edges> (17);
223138fd1498Szrj 
223238fd1498Szrj   edge *slot = removed_edges->find_slot (e, INSERT);
223338fd1498Szrj   *slot = e;
223438fd1498Szrj }
223538fd1498Szrj 
223638fd1498Szrj /* Walk through all blocks and thread incoming edges to the appropriate
223738fd1498Szrj    outgoing edge for each edge pair recorded in THREADED_EDGES.
223838fd1498Szrj 
223938fd1498Szrj    It is the caller's responsibility to fix the dominance information
224038fd1498Szrj    and rewrite duplicated SSA_NAMEs back into SSA form.
224138fd1498Szrj 
224238fd1498Szrj    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
224338fd1498Szrj    loop headers if it does not simplify the loop.
224438fd1498Szrj 
224538fd1498Szrj    Returns true if one or more edges were threaded, false otherwise.  */
224638fd1498Szrj 
224738fd1498Szrj bool
thread_through_all_blocks(bool may_peel_loop_headers)224838fd1498Szrj thread_through_all_blocks (bool may_peel_loop_headers)
224938fd1498Szrj {
225038fd1498Szrj   bool retval = false;
225138fd1498Szrj   unsigned int i;
225238fd1498Szrj   struct loop *loop;
225338fd1498Szrj   auto_bitmap threaded_blocks;
225438fd1498Szrj 
225538fd1498Szrj   if (!paths.exists ())
225638fd1498Szrj     {
225738fd1498Szrj       retval = false;
225838fd1498Szrj       goto out;
225938fd1498Szrj     }
226038fd1498Szrj 
226138fd1498Szrj   memset (&thread_stats, 0, sizeof (thread_stats));
226238fd1498Szrj 
226338fd1498Szrj   /* Remove any paths that referenced removed edges.  */
226438fd1498Szrj   if (removed_edges)
226538fd1498Szrj     for (i = 0; i < paths.length (); )
226638fd1498Szrj       {
226738fd1498Szrj 	unsigned int j;
226838fd1498Szrj 	vec<jump_thread_edge *> *path = paths[i];
226938fd1498Szrj 
227038fd1498Szrj 	for (j = 0; j < path->length (); j++)
227138fd1498Szrj 	  {
227238fd1498Szrj 	    edge e = (*path)[j]->e;
227338fd1498Szrj 	    if (removed_edges->find_slot (e, NO_INSERT))
227438fd1498Szrj 	      break;
227538fd1498Szrj 	  }
227638fd1498Szrj 
227738fd1498Szrj 	if (j != path->length ())
227838fd1498Szrj 	  {
227938fd1498Szrj 	    delete_jump_thread_path (path);
228038fd1498Szrj 	    paths.unordered_remove (i);
228138fd1498Szrj 	    continue;
228238fd1498Szrj 	  }
228338fd1498Szrj 	i++;
228438fd1498Szrj       }
228538fd1498Szrj 
228638fd1498Szrj   /* Jump-thread all FSM threads before other jump-threads.  */
228738fd1498Szrj   for (i = 0; i < paths.length ();)
228838fd1498Szrj     {
228938fd1498Szrj       vec<jump_thread_edge *> *path = paths[i];
229038fd1498Szrj       edge entry = (*path)[0]->e;
229138fd1498Szrj 
229238fd1498Szrj       /* Only code-generate FSM jump-threads in this loop.  */
229338fd1498Szrj       if ((*path)[0]->type != EDGE_FSM_THREAD)
229438fd1498Szrj 	{
229538fd1498Szrj 	  i++;
229638fd1498Szrj 	  continue;
229738fd1498Szrj 	}
229838fd1498Szrj 
229938fd1498Szrj       /* Do not jump-thread twice from the same block.  */
230038fd1498Szrj       if (bitmap_bit_p (threaded_blocks, entry->src->index)
230138fd1498Szrj 	  /* We may not want to realize this jump thread path
230238fd1498Szrj 	     for various reasons.  So check it first.  */
230338fd1498Szrj 	  || !valid_jump_thread_path (path))
230438fd1498Szrj 	{
230538fd1498Szrj 	  /* Remove invalid FSM jump-thread paths.  */
230638fd1498Szrj 	  delete_jump_thread_path (path);
230738fd1498Szrj 	  paths.unordered_remove (i);
230838fd1498Szrj 	  continue;
230938fd1498Szrj 	}
231038fd1498Szrj 
231138fd1498Szrj       unsigned len = path->length ();
231238fd1498Szrj       edge exit = (*path)[len - 1]->e;
231338fd1498Szrj       basic_block *region = XNEWVEC (basic_block, len - 1);
231438fd1498Szrj 
231538fd1498Szrj       for (unsigned int j = 0; j < len - 1; j++)
231638fd1498Szrj 	region[j] = (*path)[j]->e->dest;
231738fd1498Szrj 
231838fd1498Szrj       if (duplicate_thread_path (entry, exit, region, len - 1))
231938fd1498Szrj 	{
232038fd1498Szrj 	  /* We do not update dominance info.  */
232138fd1498Szrj 	  free_dominance_info (CDI_DOMINATORS);
232238fd1498Szrj 	  bitmap_set_bit (threaded_blocks, entry->src->index);
232338fd1498Szrj 	  retval = true;
232438fd1498Szrj 	  thread_stats.num_threaded_edges++;
232538fd1498Szrj 	}
232638fd1498Szrj 
232738fd1498Szrj       delete_jump_thread_path (path);
232838fd1498Szrj       paths.unordered_remove (i);
232938fd1498Szrj       free (region);
233038fd1498Szrj     }
233138fd1498Szrj 
233238fd1498Szrj   /* Remove from PATHS all the jump-threads starting with an edge already
233338fd1498Szrj      jump-threaded.  */
233438fd1498Szrj   for (i = 0; i < paths.length ();)
233538fd1498Szrj     {
233638fd1498Szrj       vec<jump_thread_edge *> *path = paths[i];
233738fd1498Szrj       edge entry = (*path)[0]->e;
233838fd1498Szrj 
233938fd1498Szrj       /* Do not jump-thread twice from the same block.  */
234038fd1498Szrj       if (bitmap_bit_p (threaded_blocks, entry->src->index))
234138fd1498Szrj 	{
234238fd1498Szrj 	  delete_jump_thread_path (path);
234338fd1498Szrj 	  paths.unordered_remove (i);
234438fd1498Szrj 	}
234538fd1498Szrj       else
234638fd1498Szrj 	i++;
234738fd1498Szrj     }
234838fd1498Szrj 
234938fd1498Szrj   bitmap_clear (threaded_blocks);
235038fd1498Szrj 
235138fd1498Szrj   mark_threaded_blocks (threaded_blocks);
235238fd1498Szrj 
235338fd1498Szrj   initialize_original_copy_tables ();
235438fd1498Szrj 
235538fd1498Szrj   /* The order in which we process jump threads can be important.
235638fd1498Szrj 
235738fd1498Szrj      Consider if we have two jump threading paths A and B.  If the
235838fd1498Szrj      target edge of A is the starting edge of B and we thread path A
235938fd1498Szrj      first, then we create an additional incoming edge into B->dest that
236038fd1498Szrj      we can not discover as a jump threading path on this iteration.
236138fd1498Szrj 
236238fd1498Szrj      If we instead thread B first, then the edge into B->dest will have
236338fd1498Szrj      already been redirected before we process path A and path A will
236438fd1498Szrj      natually, with no further work, target the redirected path for B.
236538fd1498Szrj 
236638fd1498Szrj      An post-order is sufficient here.  Compute the ordering first, then
236738fd1498Szrj      process the blocks.  */
236838fd1498Szrj   if (!bitmap_empty_p (threaded_blocks))
236938fd1498Szrj     {
237038fd1498Szrj       int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
237138fd1498Szrj       unsigned int postorder_num = post_order_compute (postorder, false, false);
237238fd1498Szrj       for (unsigned int i = 0; i < postorder_num; i++)
237338fd1498Szrj 	{
237438fd1498Szrj 	  unsigned int indx = postorder[i];
237538fd1498Szrj 	  if (bitmap_bit_p (threaded_blocks, indx))
237638fd1498Szrj 	    {
237738fd1498Szrj 	      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
237838fd1498Szrj 	      retval |= thread_block (bb, true);
237938fd1498Szrj 	    }
238038fd1498Szrj 	}
238138fd1498Szrj       free (postorder);
238238fd1498Szrj     }
238338fd1498Szrj 
238438fd1498Szrj   /* Then perform the threading through loop headers.  We start with the
238538fd1498Szrj      innermost loop, so that the changes in cfg we perform won't affect
238638fd1498Szrj      further threading.  */
238738fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
238838fd1498Szrj     {
238938fd1498Szrj       if (!loop->header
239038fd1498Szrj 	  || !bitmap_bit_p (threaded_blocks, loop->header->index))
239138fd1498Szrj 	continue;
239238fd1498Szrj 
239338fd1498Szrj       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
239438fd1498Szrj     }
239538fd1498Szrj 
239638fd1498Szrj   /* All jump threading paths should have been resolved at this
239738fd1498Szrj      point.  Verify that is the case.  */
239838fd1498Szrj   basic_block bb;
239938fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
240038fd1498Szrj     {
240138fd1498Szrj       edge_iterator ei;
240238fd1498Szrj       edge e;
240338fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
240438fd1498Szrj 	gcc_assert (e->aux == NULL);
240538fd1498Szrj     }
240638fd1498Szrj 
240738fd1498Szrj   statistics_counter_event (cfun, "Jumps threaded",
240838fd1498Szrj 			    thread_stats.num_threaded_edges);
240938fd1498Szrj 
241038fd1498Szrj   free_original_copy_tables ();
241138fd1498Szrj 
241238fd1498Szrj   paths.release ();
241338fd1498Szrj 
241438fd1498Szrj   if (retval)
241538fd1498Szrj     loops_state_set (LOOPS_NEED_FIXUP);
241638fd1498Szrj 
241738fd1498Szrj  out:
241838fd1498Szrj   delete removed_edges;
241938fd1498Szrj   removed_edges = NULL;
242038fd1498Szrj   return retval;
242138fd1498Szrj }
242238fd1498Szrj 
242338fd1498Szrj /* Delete the jump threading path PATH.  We have to explicitly delete
242438fd1498Szrj    each entry in the vector, then the container.  */
242538fd1498Szrj 
242638fd1498Szrj void
delete_jump_thread_path(vec<jump_thread_edge * > * path)242738fd1498Szrj delete_jump_thread_path (vec<jump_thread_edge *> *path)
242838fd1498Szrj {
242938fd1498Szrj   for (unsigned int i = 0; i < path->length (); i++)
243038fd1498Szrj     delete (*path)[i];
243138fd1498Szrj   path->release();
243238fd1498Szrj   delete path;
243338fd1498Szrj }
243438fd1498Szrj 
243538fd1498Szrj /* Register a jump threading opportunity.  We queue up all the jump
243638fd1498Szrj    threading opportunities discovered by a pass and update the CFG
243738fd1498Szrj    and SSA form all at once.
243838fd1498Szrj 
243938fd1498Szrj    E is the edge we can thread, E2 is the new target edge, i.e., we
244038fd1498Szrj    are effectively recording that E->dest can be changed to E2->dest
244138fd1498Szrj    after fixing the SSA graph.  */
244238fd1498Szrj 
244338fd1498Szrj void
register_jump_thread(vec<jump_thread_edge * > * path)244438fd1498Szrj register_jump_thread (vec<jump_thread_edge *> *path)
244538fd1498Szrj {
244638fd1498Szrj   if (!dbg_cnt (registered_jump_thread))
244738fd1498Szrj     {
244838fd1498Szrj       delete_jump_thread_path (path);
244938fd1498Szrj       return;
245038fd1498Szrj     }
245138fd1498Szrj 
245238fd1498Szrj   /* First make sure there are no NULL outgoing edges on the jump threading
245338fd1498Szrj      path.  That can happen for jumping to a constant address.  */
245438fd1498Szrj   for (unsigned int i = 0; i < path->length (); i++)
245538fd1498Szrj     {
245638fd1498Szrj       if ((*path)[i]->e == NULL)
245738fd1498Szrj 	{
245838fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
245938fd1498Szrj 	    {
246038fd1498Szrj 	      fprintf (dump_file,
246138fd1498Szrj 		       "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
246238fd1498Szrj 	      dump_jump_thread_path (dump_file, *path, false);
246338fd1498Szrj 	    }
246438fd1498Szrj 
246538fd1498Szrj 	  delete_jump_thread_path (path);
246638fd1498Szrj 	  return;
246738fd1498Szrj 	}
246838fd1498Szrj 
246938fd1498Szrj       /* Only the FSM threader is allowed to thread across
247038fd1498Szrj 	 backedges in the CFG.  */
247138fd1498Szrj       if (flag_checking
247238fd1498Szrj 	  && (*path)[0]->type != EDGE_FSM_THREAD)
247338fd1498Szrj 	gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0);
247438fd1498Szrj     }
247538fd1498Szrj 
247638fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
247738fd1498Szrj     dump_jump_thread_path (dump_file, *path, true);
247838fd1498Szrj 
247938fd1498Szrj   if (!paths.exists ())
248038fd1498Szrj     paths.create (5);
248138fd1498Szrj 
248238fd1498Szrj   paths.safe_push (path);
248338fd1498Szrj }
248438fd1498Szrj 
248538fd1498Szrj /* Return how many uses of T there are within BB, as long as there
248638fd1498Szrj    aren't any uses outside BB.  If there are any uses outside BB,
248738fd1498Szrj    return -1 if there's at most one use within BB, or -2 if there is
248838fd1498Szrj    more than one use within BB.  */
248938fd1498Szrj 
249038fd1498Szrj static int
uses_in_bb(tree t,basic_block bb)249138fd1498Szrj uses_in_bb (tree t, basic_block bb)
249238fd1498Szrj {
249338fd1498Szrj   int uses = 0;
249438fd1498Szrj   bool outside_bb = false;
249538fd1498Szrj 
249638fd1498Szrj   imm_use_iterator iter;
249738fd1498Szrj   use_operand_p use_p;
249838fd1498Szrj   FOR_EACH_IMM_USE_FAST (use_p, iter, t)
249938fd1498Szrj     {
250038fd1498Szrj       if (is_gimple_debug (USE_STMT (use_p)))
250138fd1498Szrj 	continue;
250238fd1498Szrj 
250338fd1498Szrj       if (gimple_bb (USE_STMT (use_p)) != bb)
250438fd1498Szrj 	outside_bb = true;
250538fd1498Szrj       else
250638fd1498Szrj 	uses++;
250738fd1498Szrj 
250838fd1498Szrj       if (outside_bb && uses > 1)
250938fd1498Szrj 	return -2;
251038fd1498Szrj     }
251138fd1498Szrj 
251238fd1498Szrj   if (outside_bb)
251338fd1498Szrj     return -1;
251438fd1498Szrj 
251538fd1498Szrj   return uses;
251638fd1498Szrj }
251738fd1498Szrj 
251838fd1498Szrj /* Starting from the final control flow stmt in BB, assuming it will
251938fd1498Szrj    be removed, follow uses in to-be-removed stmts back to their defs
252038fd1498Szrj    and count how many defs are to become dead and be removed as
252138fd1498Szrj    well.  */
252238fd1498Szrj 
252338fd1498Szrj unsigned int
estimate_threading_killed_stmts(basic_block bb)252438fd1498Szrj estimate_threading_killed_stmts (basic_block bb)
252538fd1498Szrj {
252638fd1498Szrj   int killed_stmts = 0;
252738fd1498Szrj   hash_map<tree, int> ssa_remaining_uses;
252838fd1498Szrj   auto_vec<gimple *, 4> dead_worklist;
252938fd1498Szrj 
253038fd1498Szrj   /* If the block has only two predecessors, threading will turn phi
253138fd1498Szrj      dsts into either src, so count them as dead stmts.  */
253238fd1498Szrj   bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
253338fd1498Szrj 
253438fd1498Szrj   if (drop_all_phis)
253538fd1498Szrj     for (gphi_iterator gsi = gsi_start_phis (bb);
253638fd1498Szrj 	 !gsi_end_p (gsi); gsi_next (&gsi))
253738fd1498Szrj       {
253838fd1498Szrj 	gphi *phi = gsi.phi ();
253938fd1498Szrj 	tree dst = gimple_phi_result (phi);
254038fd1498Szrj 
254138fd1498Szrj 	/* We don't count virtual PHIs as stmts in
254238fd1498Szrj 	   record_temporary_equivalences_from_phis.  */
254338fd1498Szrj 	if (virtual_operand_p (dst))
254438fd1498Szrj 	  continue;
254538fd1498Szrj 
254638fd1498Szrj 	killed_stmts++;
254738fd1498Szrj       }
254838fd1498Szrj 
254938fd1498Szrj   if (gsi_end_p (gsi_last_bb (bb)))
255038fd1498Szrj     return killed_stmts;
255138fd1498Szrj 
255238fd1498Szrj   gimple *stmt = gsi_stmt (gsi_last_bb (bb));
255338fd1498Szrj   if (gimple_code (stmt) != GIMPLE_COND
255438fd1498Szrj       && gimple_code (stmt) != GIMPLE_GOTO
255538fd1498Szrj       && gimple_code (stmt) != GIMPLE_SWITCH)
255638fd1498Szrj     return killed_stmts;
255738fd1498Szrj 
255838fd1498Szrj   /* The control statement is always dead.  */
255938fd1498Szrj   killed_stmts++;
256038fd1498Szrj   dead_worklist.quick_push (stmt);
256138fd1498Szrj   while (!dead_worklist.is_empty ())
256238fd1498Szrj     {
256338fd1498Szrj       stmt = dead_worklist.pop ();
256438fd1498Szrj 
256538fd1498Szrj       ssa_op_iter iter;
256638fd1498Szrj       use_operand_p use_p;
256738fd1498Szrj       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
256838fd1498Szrj 	{
256938fd1498Szrj 	  tree t = USE_FROM_PTR (use_p);
257038fd1498Szrj 	  gimple *def = SSA_NAME_DEF_STMT (t);
257138fd1498Szrj 
257238fd1498Szrj 	  if (gimple_bb (def) == bb
257338fd1498Szrj 	      && (gimple_code (def) != GIMPLE_PHI
257438fd1498Szrj 		  || !drop_all_phis)
257538fd1498Szrj 	      && !gimple_has_side_effects (def))
257638fd1498Szrj 	    {
257738fd1498Szrj 	      int *usesp = ssa_remaining_uses.get (t);
257838fd1498Szrj 	      int uses;
257938fd1498Szrj 
258038fd1498Szrj 	      if (usesp)
258138fd1498Szrj 		uses = *usesp;
258238fd1498Szrj 	      else
258338fd1498Szrj 		uses = uses_in_bb (t, bb);
258438fd1498Szrj 
258538fd1498Szrj 	      gcc_assert (uses);
258638fd1498Szrj 
258738fd1498Szrj 	      /* Don't bother recording the expected use count if we
258838fd1498Szrj 		 won't find any further uses within BB.  */
258938fd1498Szrj 	      if (!usesp && (uses < -1 || uses > 1))
259038fd1498Szrj 		{
259138fd1498Szrj 		  usesp = &ssa_remaining_uses.get_or_insert (t);
259238fd1498Szrj 		  *usesp = uses;
259338fd1498Szrj 		}
259438fd1498Szrj 
259538fd1498Szrj 	      if (uses < 0)
259638fd1498Szrj 		continue;
259738fd1498Szrj 
259838fd1498Szrj 	      --uses;
259938fd1498Szrj 	      if (usesp)
260038fd1498Szrj 		*usesp = uses;
260138fd1498Szrj 
260238fd1498Szrj 	      if (!uses)
260338fd1498Szrj 		{
260438fd1498Szrj 		  killed_stmts++;
260538fd1498Szrj 		  if (usesp)
260638fd1498Szrj 		    ssa_remaining_uses.remove (t);
260738fd1498Szrj 		  if (gimple_code (def) != GIMPLE_PHI)
260838fd1498Szrj 		    dead_worklist.safe_push (def);
260938fd1498Szrj 		}
261038fd1498Szrj 	    }
261138fd1498Szrj 	}
261238fd1498Szrj     }
261338fd1498Szrj 
261438fd1498Szrj   if (dump_file)
261538fd1498Szrj     fprintf (dump_file, "threading bb %i kills %i stmts\n",
261638fd1498Szrj 	     bb->index, killed_stmts);
261738fd1498Szrj 
261838fd1498Szrj   return killed_stmts;
261938fd1498Szrj }
2620