138fd1498Szrj /* Thread edges through blocks and update the control flow and SSA graphs.
238fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
338fd1498Szrj
438fd1498Szrj This file is part of GCC.
538fd1498Szrj
638fd1498Szrj GCC is free software; you can redistribute it and/or modify
738fd1498Szrj it under the terms of the GNU General Public License as published by
838fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
938fd1498Szrj any later version.
1038fd1498Szrj
1138fd1498Szrj GCC is distributed in the hope that it will be useful,
1238fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
1338fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1438fd1498Szrj GNU General Public License for more details.
1538fd1498Szrj
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3. If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>. */
1938fd1498Szrj
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "backend.h"
2438fd1498Szrj #include "tree.h"
2538fd1498Szrj #include "gimple.h"
2638fd1498Szrj #include "cfghooks.h"
2738fd1498Szrj #include "tree-pass.h"
2838fd1498Szrj #include "ssa.h"
2938fd1498Szrj #include "fold-const.h"
3038fd1498Szrj #include "cfganal.h"
3138fd1498Szrj #include "gimple-iterator.h"
3238fd1498Szrj #include "tree-ssa.h"
3338fd1498Szrj #include "tree-ssa-threadupdate.h"
3438fd1498Szrj #include "cfgloop.h"
3538fd1498Szrj #include "dbgcnt.h"
3638fd1498Szrj #include "tree-cfg.h"
3738fd1498Szrj #include "tree-vectorizer.h"
3838fd1498Szrj
3938fd1498Szrj /* Given a block B, update the CFG and SSA graph to reflect redirecting
4038fd1498Szrj one or more in-edges to B to instead reach the destination of an
4138fd1498Szrj out-edge from B while preserving any side effects in B.
4238fd1498Szrj
4338fd1498Szrj i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
4438fd1498Szrj side effects of executing B.
4538fd1498Szrj
4638fd1498Szrj 1. Make a copy of B (including its outgoing edges and statements). Call
4738fd1498Szrj the copy B'. Note B' has no incoming edges or PHIs at this time.
4838fd1498Szrj
4938fd1498Szrj 2. Remove the control statement at the end of B' and all outgoing edges
5038fd1498Szrj except B'->C.
5138fd1498Szrj
5238fd1498Szrj 3. Add a new argument to each PHI in C with the same value as the existing
5338fd1498Szrj argument associated with edge B->C. Associate the new PHI arguments
5438fd1498Szrj with the edge B'->C.
5538fd1498Szrj
5638fd1498Szrj 4. For each PHI in B, find or create a PHI in B' with an identical
5738fd1498Szrj PHI_RESULT. Add an argument to the PHI in B' which has the same
5838fd1498Szrj value as the PHI in B associated with the edge A->B. Associate
5938fd1498Szrj the new argument in the PHI in B' with the edge A->B.
6038fd1498Szrj
6138fd1498Szrj 5. Change the edge A->B to A->B'.
6238fd1498Szrj
6338fd1498Szrj 5a. This automatically deletes any PHI arguments associated with the
6438fd1498Szrj edge A->B in B.
6538fd1498Szrj
6638fd1498Szrj 5b. This automatically associates each new argument added in step 4
6738fd1498Szrj with the edge A->B'.
6838fd1498Szrj
6938fd1498Szrj 6. Repeat for other incoming edges into B.
7038fd1498Szrj
7138fd1498Szrj 7. Put the duplicated resources in B and all the B' blocks into SSA form.
7238fd1498Szrj
7338fd1498Szrj Note that block duplication can be minimized by first collecting the
7438fd1498Szrj set of unique destination blocks that the incoming edges should
7538fd1498Szrj be threaded to.
7638fd1498Szrj
7738fd1498Szrj We reduce the number of edges and statements we create by not copying all
7838fd1498Szrj the outgoing edges and the control statement in step #1. We instead create
7938fd1498Szrj a template block without the outgoing edges and duplicate the template.
8038fd1498Szrj
8138fd1498Szrj Another case this code handles is threading through a "joiner" block. In
8238fd1498Szrj this case, we do not know the destination of the joiner block, but one
8338fd1498Szrj of the outgoing edges from the joiner block leads to a threadable path. This
8438fd1498Szrj case largely works as outlined above, except the duplicate of the joiner
8538fd1498Szrj block still contains a full set of outgoing edges and its control statement.
8638fd1498Szrj We just redirect one of its outgoing edges to our jump threading path. */
8738fd1498Szrj
8838fd1498Szrj
8938fd1498Szrj /* Steps #5 and #6 of the above algorithm are best implemented by walking
9038fd1498Szrj all the incoming edges which thread to the same destination edge at
9138fd1498Szrj the same time. That avoids lots of table lookups to get information
9238fd1498Szrj for the destination edge.
9338fd1498Szrj
9438fd1498Szrj To realize that implementation we create a list of incoming edges
9538fd1498Szrj which thread to the same outgoing edge. Thus to implement steps
9638fd1498Szrj #5 and #6 we traverse our hash table of outgoing edge information.
9738fd1498Szrj For each entry we walk the list of incoming edges which thread to
9838fd1498Szrj the current outgoing edge. */
9938fd1498Szrj
10038fd1498Szrj struct el
10138fd1498Szrj {
10238fd1498Szrj edge e;
10338fd1498Szrj struct el *next;
10438fd1498Szrj };
10538fd1498Szrj
10638fd1498Szrj /* Main data structure recording information regarding B's duplicate
10738fd1498Szrj blocks. */
10838fd1498Szrj
10938fd1498Szrj /* We need to efficiently record the unique thread destinations of this
11038fd1498Szrj block and specific information associated with those destinations. We
11138fd1498Szrj may have many incoming edges threaded to the same outgoing edge. This
11238fd1498Szrj can be naturally implemented with a hash table. */
11338fd1498Szrj
11438fd1498Szrj struct redirection_data : free_ptr_hash<redirection_data>
11538fd1498Szrj {
11638fd1498Szrj /* We support wiring up two block duplicates in a jump threading path.
11738fd1498Szrj
11838fd1498Szrj One is a normal block copy where we remove the control statement
11938fd1498Szrj and wire up its single remaining outgoing edge to the thread path.
12038fd1498Szrj
12138fd1498Szrj The other is a joiner block where we leave the control statement
12238fd1498Szrj in place, but wire one of the outgoing edges to a thread path.
12338fd1498Szrj
12438fd1498Szrj In theory we could have multiple block duplicates in a jump
12538fd1498Szrj threading path, but I haven't tried that.
12638fd1498Szrj
12738fd1498Szrj The duplicate blocks appear in this array in the same order in
12838fd1498Szrj which they appear in the jump thread path. */
12938fd1498Szrj basic_block dup_blocks[2];
13038fd1498Szrj
13138fd1498Szrj /* The jump threading path. */
13238fd1498Szrj vec<jump_thread_edge *> *path;
13338fd1498Szrj
13438fd1498Szrj /* A list of incoming edges which we want to thread to the
13538fd1498Szrj same path. */
13638fd1498Szrj struct el *incoming_edges;
13738fd1498Szrj
13838fd1498Szrj /* hash_table support. */
13938fd1498Szrj static inline hashval_t hash (const redirection_data *);
14038fd1498Szrj static inline int equal (const redirection_data *, const redirection_data *);
14138fd1498Szrj };
14238fd1498Szrj
14338fd1498Szrj /* Dump a jump threading path, including annotations about each
14438fd1498Szrj edge in the path. */
14538fd1498Szrj
14638fd1498Szrj static void
dump_jump_thread_path(FILE * dump_file,vec<jump_thread_edge * > path,bool registering)14738fd1498Szrj dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
14838fd1498Szrj bool registering)
14938fd1498Szrj {
15038fd1498Szrj fprintf (dump_file,
15138fd1498Szrj " %s%s jump thread: (%d, %d) incoming edge; ",
15238fd1498Szrj (registering ? "Registering" : "Cancelling"),
15338fd1498Szrj (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
15438fd1498Szrj path[0]->e->src->index, path[0]->e->dest->index);
15538fd1498Szrj
15638fd1498Szrj for (unsigned int i = 1; i < path.length (); i++)
15738fd1498Szrj {
15838fd1498Szrj /* We can get paths with a NULL edge when the final destination
15938fd1498Szrj of a jump thread turns out to be a constant address. We dump
16038fd1498Szrj those paths when debugging, so we have to be prepared for that
16138fd1498Szrj possibility here. */
16238fd1498Szrj if (path[i]->e == NULL)
16338fd1498Szrj continue;
16438fd1498Szrj
16538fd1498Szrj if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
16638fd1498Szrj fprintf (dump_file, " (%d, %d) joiner; ",
16738fd1498Szrj path[i]->e->src->index, path[i]->e->dest->index);
16838fd1498Szrj if (path[i]->type == EDGE_COPY_SRC_BLOCK)
16938fd1498Szrj fprintf (dump_file, " (%d, %d) normal;",
17038fd1498Szrj path[i]->e->src->index, path[i]->e->dest->index);
17138fd1498Szrj if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
17238fd1498Szrj fprintf (dump_file, " (%d, %d) nocopy;",
17338fd1498Szrj path[i]->e->src->index, path[i]->e->dest->index);
17438fd1498Szrj if (path[0]->type == EDGE_FSM_THREAD)
17538fd1498Szrj fprintf (dump_file, " (%d, %d) ",
17638fd1498Szrj path[i]->e->src->index, path[i]->e->dest->index);
17738fd1498Szrj }
17838fd1498Szrj fputc ('\n', dump_file);
17938fd1498Szrj }
18038fd1498Szrj
18138fd1498Szrj /* Simple hashing function. For any given incoming edge E, we're going
18238fd1498Szrj to be most concerned with the final destination of its jump thread
18338fd1498Szrj path. So hash on the block index of the final edge in the path. */
18438fd1498Szrj
18538fd1498Szrj inline hashval_t
hash(const redirection_data * p)18638fd1498Szrj redirection_data::hash (const redirection_data *p)
18738fd1498Szrj {
18838fd1498Szrj vec<jump_thread_edge *> *path = p->path;
18938fd1498Szrj return path->last ()->e->dest->index;
19038fd1498Szrj }
19138fd1498Szrj
19238fd1498Szrj /* Given two hash table entries, return true if they have the same
19338fd1498Szrj jump threading path. */
19438fd1498Szrj inline int
equal(const redirection_data * p1,const redirection_data * p2)19538fd1498Szrj redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
19638fd1498Szrj {
19738fd1498Szrj vec<jump_thread_edge *> *path1 = p1->path;
19838fd1498Szrj vec<jump_thread_edge *> *path2 = p2->path;
19938fd1498Szrj
20038fd1498Szrj if (path1->length () != path2->length ())
20138fd1498Szrj return false;
20238fd1498Szrj
20338fd1498Szrj for (unsigned int i = 1; i < path1->length (); i++)
20438fd1498Szrj {
20538fd1498Szrj if ((*path1)[i]->type != (*path2)[i]->type
20638fd1498Szrj || (*path1)[i]->e != (*path2)[i]->e)
20738fd1498Szrj return false;
20838fd1498Szrj }
20938fd1498Szrj
21038fd1498Szrj return true;
21138fd1498Szrj }
21238fd1498Szrj
21338fd1498Szrj /* Rather than search all the edges in jump thread paths each time
21438fd1498Szrj DOM is able to simply if control statement, we build a hash table
21538fd1498Szrj with the deleted edges. We only care about the address of the edge,
21638fd1498Szrj not its contents. */
21738fd1498Szrj struct removed_edges : nofree_ptr_hash<edge_def>
21838fd1498Szrj {
hashremoved_edges21938fd1498Szrj static hashval_t hash (edge e) { return htab_hash_pointer (e); }
equalremoved_edges22038fd1498Szrj static bool equal (edge e1, edge e2) { return e1 == e2; }
22138fd1498Szrj };
22238fd1498Szrj
22338fd1498Szrj static hash_table<removed_edges> *removed_edges;
22438fd1498Szrj
22538fd1498Szrj /* Data structure of information to pass to hash table traversal routines. */
22638fd1498Szrj struct ssa_local_info_t
22738fd1498Szrj {
22838fd1498Szrj /* The current block we are working on. */
22938fd1498Szrj basic_block bb;
23038fd1498Szrj
23138fd1498Szrj /* We only create a template block for the first duplicated block in a
23238fd1498Szrj jump threading path as we may need many duplicates of that block.
23338fd1498Szrj
23438fd1498Szrj The second duplicate block in a path is specific to that path. Creating
23538fd1498Szrj and sharing a template for that block is considerably more difficult. */
23638fd1498Szrj basic_block template_block;
23738fd1498Szrj
23838fd1498Szrj /* Blocks duplicated for the thread. */
23938fd1498Szrj bitmap duplicate_blocks;
24038fd1498Szrj
24138fd1498Szrj /* TRUE if we thread one or more jumps, FALSE otherwise. */
24238fd1498Szrj bool jumps_threaded;
24338fd1498Szrj
24438fd1498Szrj /* When we have multiple paths through a joiner which reach different
24538fd1498Szrj final destinations, then we may need to correct for potential
24638fd1498Szrj profile insanities. */
24738fd1498Szrj bool need_profile_correction;
24838fd1498Szrj };
24938fd1498Szrj
25038fd1498Szrj /* Passes which use the jump threading code register jump threading
25138fd1498Szrj opportunities as they are discovered. We keep the registered
25238fd1498Szrj jump threading opportunities in this vector as edge pairs
25338fd1498Szrj (original_edge, target_edge). */
25438fd1498Szrj static vec<vec<jump_thread_edge *> *> paths;
25538fd1498Szrj
25638fd1498Szrj /* When we start updating the CFG for threading, data necessary for jump
25738fd1498Szrj threading is attached to the AUX field for the incoming edge. Use these
25838fd1498Szrj macros to access the underlying structure attached to the AUX field. */
25938fd1498Szrj #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
26038fd1498Szrj
26138fd1498Szrj /* Jump threading statistics. */
26238fd1498Szrj
26338fd1498Szrj struct thread_stats_d
26438fd1498Szrj {
26538fd1498Szrj unsigned long num_threaded_edges;
26638fd1498Szrj };
26738fd1498Szrj
26838fd1498Szrj struct thread_stats_d thread_stats;
26938fd1498Szrj
27038fd1498Szrj
27138fd1498Szrj /* Remove the last statement in block BB if it is a control statement
27238fd1498Szrj Also remove all outgoing edges except the edge which reaches DEST_BB.
27338fd1498Szrj If DEST_BB is NULL, then remove all outgoing edges. */
27438fd1498Szrj
27538fd1498Szrj void
remove_ctrl_stmt_and_useless_edges(basic_block bb,basic_block dest_bb)27638fd1498Szrj remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
27738fd1498Szrj {
27838fd1498Szrj gimple_stmt_iterator gsi;
27938fd1498Szrj edge e;
28038fd1498Szrj edge_iterator ei;
28138fd1498Szrj
28238fd1498Szrj gsi = gsi_last_bb (bb);
28338fd1498Szrj
28438fd1498Szrj /* If the duplicate ends with a control statement, then remove it.
28538fd1498Szrj
28638fd1498Szrj Note that if we are duplicating the template block rather than the
28738fd1498Szrj original basic block, then the duplicate might not have any real
28838fd1498Szrj statements in it. */
28938fd1498Szrj if (!gsi_end_p (gsi)
29038fd1498Szrj && gsi_stmt (gsi)
29138fd1498Szrj && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
29238fd1498Szrj || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
29338fd1498Szrj || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
29438fd1498Szrj gsi_remove (&gsi, true);
29538fd1498Szrj
29638fd1498Szrj for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
29738fd1498Szrj {
29838fd1498Szrj if (e->dest != dest_bb)
29938fd1498Szrj {
30038fd1498Szrj free_dom_edge_info (e);
30138fd1498Szrj remove_edge (e);
30238fd1498Szrj }
30338fd1498Szrj else
30438fd1498Szrj {
30538fd1498Szrj e->probability = profile_probability::always ();
30638fd1498Szrj ei_next (&ei);
30738fd1498Szrj }
30838fd1498Szrj }
30938fd1498Szrj
31038fd1498Szrj /* If the remaining edge is a loop exit, there must have
31138fd1498Szrj a removed edge that was not a loop exit.
31238fd1498Szrj
31338fd1498Szrj In that case BB and possibly other blocks were previously
31438fd1498Szrj in the loop, but are now outside the loop. Thus, we need
31538fd1498Szrj to update the loop structures. */
31638fd1498Szrj if (single_succ_p (bb)
31738fd1498Szrj && loop_outer (bb->loop_father)
31838fd1498Szrj && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
31938fd1498Szrj loops_state_set (LOOPS_NEED_FIXUP);
32038fd1498Szrj }
32138fd1498Szrj
32238fd1498Szrj /* Create a duplicate of BB. Record the duplicate block in an array
32338fd1498Szrj indexed by COUNT stored in RD. */
32438fd1498Szrj
32538fd1498Szrj static void
create_block_for_threading(basic_block bb,struct redirection_data * rd,unsigned int count,bitmap * duplicate_blocks)32638fd1498Szrj create_block_for_threading (basic_block bb,
32738fd1498Szrj struct redirection_data *rd,
32838fd1498Szrj unsigned int count,
32938fd1498Szrj bitmap *duplicate_blocks)
33038fd1498Szrj {
33138fd1498Szrj edge_iterator ei;
33238fd1498Szrj edge e;
33338fd1498Szrj
33438fd1498Szrj /* We can use the generic block duplication code and simply remove
33538fd1498Szrj the stuff we do not need. */
33638fd1498Szrj rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
33738fd1498Szrj
33838fd1498Szrj FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
33938fd1498Szrj e->aux = NULL;
34038fd1498Szrj
34138fd1498Szrj /* Zero out the profile, since the block is unreachable for now. */
34238fd1498Szrj rd->dup_blocks[count]->count = profile_count::uninitialized ();
34338fd1498Szrj if (duplicate_blocks)
34438fd1498Szrj bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
34538fd1498Szrj }
34638fd1498Szrj
34738fd1498Szrj /* Main data structure to hold information for duplicates of BB. */
34838fd1498Szrj
34938fd1498Szrj static hash_table<redirection_data> *redirection_data;
35038fd1498Szrj
35138fd1498Szrj /* Given an outgoing edge E lookup and return its entry in our hash table.
35238fd1498Szrj
35338fd1498Szrj If INSERT is true, then we insert the entry into the hash table if
35438fd1498Szrj it is not already present. INCOMING_EDGE is added to the list of incoming
35538fd1498Szrj edges associated with E in the hash table. */
35638fd1498Szrj
35738fd1498Szrj static struct redirection_data *
lookup_redirection_data(edge e,enum insert_option insert)35838fd1498Szrj lookup_redirection_data (edge e, enum insert_option insert)
35938fd1498Szrj {
36038fd1498Szrj struct redirection_data **slot;
36138fd1498Szrj struct redirection_data *elt;
36238fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
36338fd1498Szrj
36438fd1498Szrj /* Build a hash table element so we can see if E is already
36538fd1498Szrj in the table. */
36638fd1498Szrj elt = XNEW (struct redirection_data);
36738fd1498Szrj elt->path = path;
36838fd1498Szrj elt->dup_blocks[0] = NULL;
36938fd1498Szrj elt->dup_blocks[1] = NULL;
37038fd1498Szrj elt->incoming_edges = NULL;
37138fd1498Szrj
37238fd1498Szrj slot = redirection_data->find_slot (elt, insert);
37338fd1498Szrj
37438fd1498Szrj /* This will only happen if INSERT is false and the entry is not
37538fd1498Szrj in the hash table. */
37638fd1498Szrj if (slot == NULL)
37738fd1498Szrj {
37838fd1498Szrj free (elt);
37938fd1498Szrj return NULL;
38038fd1498Szrj }
38138fd1498Szrj
38238fd1498Szrj /* This will only happen if E was not in the hash table and
38338fd1498Szrj INSERT is true. */
38438fd1498Szrj if (*slot == NULL)
38538fd1498Szrj {
38638fd1498Szrj *slot = elt;
38738fd1498Szrj elt->incoming_edges = XNEW (struct el);
38838fd1498Szrj elt->incoming_edges->e = e;
38938fd1498Szrj elt->incoming_edges->next = NULL;
39038fd1498Szrj return elt;
39138fd1498Szrj }
39238fd1498Szrj /* E was in the hash table. */
39338fd1498Szrj else
39438fd1498Szrj {
39538fd1498Szrj /* Free ELT as we do not need it anymore, we will extract the
39638fd1498Szrj relevant entry from the hash table itself. */
39738fd1498Szrj free (elt);
39838fd1498Szrj
39938fd1498Szrj /* Get the entry stored in the hash table. */
40038fd1498Szrj elt = *slot;
40138fd1498Szrj
40238fd1498Szrj /* If insertion was requested, then we need to add INCOMING_EDGE
40338fd1498Szrj to the list of incoming edges associated with E. */
40438fd1498Szrj if (insert)
40538fd1498Szrj {
40638fd1498Szrj struct el *el = XNEW (struct el);
40738fd1498Szrj el->next = elt->incoming_edges;
40838fd1498Szrj el->e = e;
40938fd1498Szrj elt->incoming_edges = el;
41038fd1498Szrj }
41138fd1498Szrj
41238fd1498Szrj return elt;
41338fd1498Szrj }
41438fd1498Szrj }
41538fd1498Szrj
41638fd1498Szrj /* Similar to copy_phi_args, except that the PHI arg exists, it just
41738fd1498Szrj does not have a value associated with it. */
41838fd1498Szrj
41938fd1498Szrj static void
copy_phi_arg_into_existing_phi(edge src_e,edge tgt_e)42038fd1498Szrj copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
42138fd1498Szrj {
42238fd1498Szrj int src_idx = src_e->dest_idx;
42338fd1498Szrj int tgt_idx = tgt_e->dest_idx;
42438fd1498Szrj
42538fd1498Szrj /* Iterate over each PHI in e->dest. */
42638fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
42738fd1498Szrj gsi2 = gsi_start_phis (tgt_e->dest);
42838fd1498Szrj !gsi_end_p (gsi);
42938fd1498Szrj gsi_next (&gsi), gsi_next (&gsi2))
43038fd1498Szrj {
43138fd1498Szrj gphi *src_phi = gsi.phi ();
43238fd1498Szrj gphi *dest_phi = gsi2.phi ();
43338fd1498Szrj tree val = gimple_phi_arg_def (src_phi, src_idx);
43438fd1498Szrj source_location locus = gimple_phi_arg_location (src_phi, src_idx);
43538fd1498Szrj
43638fd1498Szrj SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
43738fd1498Szrj gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
43838fd1498Szrj }
43938fd1498Szrj }
44038fd1498Szrj
44138fd1498Szrj /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
44238fd1498Szrj to see if it has constant value in a flow sensitive manner. Set
44338fd1498Szrj LOCUS to location of the constant phi arg and return the value.
44438fd1498Szrj Return DEF directly if either PATH or idx is ZERO. */
44538fd1498Szrj
44638fd1498Szrj static tree
get_value_locus_in_path(tree def,vec<jump_thread_edge * > * path,basic_block bb,int idx,source_location * locus)44738fd1498Szrj get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
44838fd1498Szrj basic_block bb, int idx, source_location *locus)
44938fd1498Szrj {
45038fd1498Szrj tree arg;
45138fd1498Szrj gphi *def_phi;
45238fd1498Szrj basic_block def_bb;
45338fd1498Szrj
45438fd1498Szrj if (path == NULL || idx == 0)
45538fd1498Szrj return def;
45638fd1498Szrj
45738fd1498Szrj def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
45838fd1498Szrj if (!def_phi)
45938fd1498Szrj return def;
46038fd1498Szrj
46138fd1498Szrj def_bb = gimple_bb (def_phi);
46238fd1498Szrj /* Don't propagate loop invariants into deeper loops. */
46338fd1498Szrj if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
46438fd1498Szrj return def;
46538fd1498Szrj
46638fd1498Szrj /* Backtrack jump threading path from IDX to see if def has constant
46738fd1498Szrj value. */
46838fd1498Szrj for (int j = idx - 1; j >= 0; j--)
46938fd1498Szrj {
47038fd1498Szrj edge e = (*path)[j]->e;
47138fd1498Szrj if (e->dest == def_bb)
47238fd1498Szrj {
47338fd1498Szrj arg = gimple_phi_arg_def (def_phi, e->dest_idx);
47438fd1498Szrj if (is_gimple_min_invariant (arg))
47538fd1498Szrj {
47638fd1498Szrj *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
47738fd1498Szrj return arg;
47838fd1498Szrj }
47938fd1498Szrj break;
48038fd1498Szrj }
48138fd1498Szrj }
48238fd1498Szrj
48338fd1498Szrj return def;
48438fd1498Szrj }
48538fd1498Szrj
48638fd1498Szrj /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
48738fd1498Szrj Try to backtrack jump threading PATH from node IDX to see if the arg
48838fd1498Szrj has constant value, copy constant value instead of argument itself
48938fd1498Szrj if yes. */
49038fd1498Szrj
49138fd1498Szrj static void
copy_phi_args(basic_block bb,edge src_e,edge tgt_e,vec<jump_thread_edge * > * path,int idx)49238fd1498Szrj copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
49338fd1498Szrj vec<jump_thread_edge *> *path, int idx)
49438fd1498Szrj {
49538fd1498Szrj gphi_iterator gsi;
49638fd1498Szrj int src_indx = src_e->dest_idx;
49738fd1498Szrj
49838fd1498Szrj for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
49938fd1498Szrj {
50038fd1498Szrj gphi *phi = gsi.phi ();
50138fd1498Szrj tree def = gimple_phi_arg_def (phi, src_indx);
50238fd1498Szrj source_location locus = gimple_phi_arg_location (phi, src_indx);
50338fd1498Szrj
50438fd1498Szrj if (TREE_CODE (def) == SSA_NAME
50538fd1498Szrj && !virtual_operand_p (gimple_phi_result (phi)))
50638fd1498Szrj def = get_value_locus_in_path (def, path, bb, idx, &locus);
50738fd1498Szrj
50838fd1498Szrj add_phi_arg (phi, def, tgt_e, locus);
50938fd1498Szrj }
51038fd1498Szrj }
51138fd1498Szrj
51238fd1498Szrj /* We have recently made a copy of ORIG_BB, including its outgoing
51338fd1498Szrj edges. The copy is NEW_BB. Every PHI node in every direct successor of
51438fd1498Szrj ORIG_BB has a new argument associated with edge from NEW_BB to the
51538fd1498Szrj successor. Initialize the PHI argument so that it is equal to the PHI
51638fd1498Szrj argument associated with the edge from ORIG_BB to the successor.
51738fd1498Szrj PATH and IDX are used to check if the new PHI argument has constant
51838fd1498Szrj value in a flow sensitive manner. */
51938fd1498Szrj
52038fd1498Szrj static void
update_destination_phis(basic_block orig_bb,basic_block new_bb,vec<jump_thread_edge * > * path,int idx)52138fd1498Szrj update_destination_phis (basic_block orig_bb, basic_block new_bb,
52238fd1498Szrj vec<jump_thread_edge *> *path, int idx)
52338fd1498Szrj {
52438fd1498Szrj edge_iterator ei;
52538fd1498Szrj edge e;
52638fd1498Szrj
52738fd1498Szrj FOR_EACH_EDGE (e, ei, orig_bb->succs)
52838fd1498Szrj {
52938fd1498Szrj edge e2 = find_edge (new_bb, e->dest);
53038fd1498Szrj copy_phi_args (e->dest, e, e2, path, idx);
53138fd1498Szrj }
53238fd1498Szrj }
53338fd1498Szrj
53438fd1498Szrj /* Given a duplicate block and its single destination (both stored
53538fd1498Szrj in RD). Create an edge between the duplicate and its single
53638fd1498Szrj destination.
53738fd1498Szrj
53838fd1498Szrj Add an additional argument to any PHI nodes at the single
53938fd1498Szrj destination. IDX is the start node in jump threading path
54038fd1498Szrj we start to check to see if the new PHI argument has constant
54138fd1498Szrj value along the jump threading path. */
54238fd1498Szrj
54338fd1498Szrj static void
create_edge_and_update_destination_phis(struct redirection_data * rd,basic_block bb,int idx)54438fd1498Szrj create_edge_and_update_destination_phis (struct redirection_data *rd,
54538fd1498Szrj basic_block bb, int idx)
54638fd1498Szrj {
54738fd1498Szrj edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
54838fd1498Szrj
54938fd1498Szrj rescan_loop_exit (e, true, false);
55038fd1498Szrj
55138fd1498Szrj /* We used to copy the thread path here. That was added in 2007
55238fd1498Szrj and dutifully updated through the representation changes in 2013.
55338fd1498Szrj
55438fd1498Szrj In 2013 we added code to thread from an interior node through
55538fd1498Szrj the backedge to another interior node. That runs after the code
55638fd1498Szrj to thread through loop headers from outside the loop.
55738fd1498Szrj
55838fd1498Szrj The latter may delete edges in the CFG, including those
55938fd1498Szrj which appeared in the jump threading path we copied here. Thus
56038fd1498Szrj we'd end up using a dangling pointer.
56138fd1498Szrj
56238fd1498Szrj After reviewing the 2007/2011 code, I can't see how anything
56338fd1498Szrj depended on copying the AUX field and clearly copying the jump
56438fd1498Szrj threading path is problematical due to embedded edge pointers.
56538fd1498Szrj It has been removed. */
56638fd1498Szrj e->aux = NULL;
56738fd1498Szrj
56838fd1498Szrj /* If there are any PHI nodes at the destination of the outgoing edge
56938fd1498Szrj from the duplicate block, then we will need to add a new argument
57038fd1498Szrj to them. The argument should have the same value as the argument
57138fd1498Szrj associated with the outgoing edge stored in RD. */
57238fd1498Szrj copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
57338fd1498Szrj }
57438fd1498Szrj
57538fd1498Szrj /* Look through PATH beginning at START and return TRUE if there are
57638fd1498Szrj any additional blocks that need to be duplicated. Otherwise,
57738fd1498Szrj return FALSE. */
57838fd1498Szrj static bool
any_remaining_duplicated_blocks(vec<jump_thread_edge * > * path,unsigned int start)57938fd1498Szrj any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
58038fd1498Szrj unsigned int start)
58138fd1498Szrj {
58238fd1498Szrj for (unsigned int i = start + 1; i < path->length (); i++)
58338fd1498Szrj {
58438fd1498Szrj if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
58538fd1498Szrj || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
58638fd1498Szrj return true;
58738fd1498Szrj }
58838fd1498Szrj return false;
58938fd1498Szrj }
59038fd1498Szrj
59138fd1498Szrj
59238fd1498Szrj /* Compute the amount of profile count coming into the jump threading
59338fd1498Szrj path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
59438fd1498Szrj PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
59538fd1498Szrj duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
59638fd1498Szrj identify blocks duplicated for jump threading, which have duplicated
59738fd1498Szrj edges that need to be ignored in the analysis. Return true if path contains
59838fd1498Szrj a joiner, false otherwise.
59938fd1498Szrj
60038fd1498Szrj In the non-joiner case, this is straightforward - all the counts
60138fd1498Szrj flowing into the jump threading path should flow through the duplicated
60238fd1498Szrj block and out of the duplicated path.
60338fd1498Szrj
60438fd1498Szrj In the joiner case, it is very tricky. Some of the counts flowing into
60538fd1498Szrj the original path go offpath at the joiner. The problem is that while
60638fd1498Szrj we know how much total count goes off-path in the original control flow,
60738fd1498Szrj we don't know how many of the counts corresponding to just the jump
60838fd1498Szrj threading path go offpath at the joiner.
60938fd1498Szrj
61038fd1498Szrj For example, assume we have the following control flow and identified
61138fd1498Szrj jump threading paths:
61238fd1498Szrj
61338fd1498Szrj A B C
61438fd1498Szrj \ | /
61538fd1498Szrj Ea \ |Eb / Ec
61638fd1498Szrj \ | /
61738fd1498Szrj v v v
61838fd1498Szrj J <-- Joiner
61938fd1498Szrj / \
62038fd1498Szrj Eoff/ \Eon
62138fd1498Szrj / \
62238fd1498Szrj v v
62338fd1498Szrj Soff Son <--- Normal
62438fd1498Szrj /\
62538fd1498Szrj Ed/ \ Ee
62638fd1498Szrj / \
62738fd1498Szrj v v
62838fd1498Szrj D E
62938fd1498Szrj
63038fd1498Szrj Jump threading paths: A -> J -> Son -> D (path 1)
63138fd1498Szrj C -> J -> Son -> E (path 2)
63238fd1498Szrj
63338fd1498Szrj Note that the control flow could be more complicated:
63438fd1498Szrj - Each jump threading path may have more than one incoming edge. I.e. A and
63538fd1498Szrj Ea could represent multiple incoming blocks/edges that are included in
63638fd1498Szrj path 1.
63738fd1498Szrj - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
63838fd1498Szrj before or after the "normal" copy block). These are not duplicated onto
63938fd1498Szrj the jump threading path, as they are single-successor.
64038fd1498Szrj - Any of the blocks along the path may have other incoming edges that
64138fd1498Szrj are not part of any jump threading path, but add profile counts along
64238fd1498Szrj the path.
64338fd1498Szrj
64438fd1498Szrj In the above example, after all jump threading is complete, we will
64538fd1498Szrj end up with the following control flow:
64638fd1498Szrj
64738fd1498Szrj A B C
64838fd1498Szrj | | |
64938fd1498Szrj Ea| |Eb |Ec
65038fd1498Szrj | | |
65138fd1498Szrj v v v
65238fd1498Szrj Ja J Jc
65338fd1498Szrj / \ / \Eon' / \
65438fd1498Szrj Eona/ \ ---/---\-------- \Eonc
65538fd1498Szrj / \ / / \ \
65638fd1498Szrj v v v v v
65738fd1498Szrj Sona Soff Son Sonc
65838fd1498Szrj \ /\ /
65938fd1498Szrj \___________ / \ _____/
66038fd1498Szrj \ / \/
66138fd1498Szrj vv v
66238fd1498Szrj D E
66338fd1498Szrj
66438fd1498Szrj The main issue to notice here is that when we are processing path 1
66538fd1498Szrj (A->J->Son->D) we need to figure out the outgoing edge weights to
66638fd1498Szrj the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
66738fd1498Szrj sum of the incoming weights to D remain Ed. The problem with simply
66838fd1498Szrj assuming that Ja (and Jc when processing path 2) has the same outgoing
66938fd1498Szrj probabilities to its successors as the original block J, is that after
67038fd1498Szrj all paths are processed and other edges/counts removed (e.g. none
67138fd1498Szrj of Ec will reach D after processing path 2), we may end up with not
67238fd1498Szrj enough count flowing along duplicated edge Sona->D.
67338fd1498Szrj
67438fd1498Szrj Therefore, in the case of a joiner, we keep track of all counts
67538fd1498Szrj coming in along the current path, as well as from predecessors not
67638fd1498Szrj on any jump threading path (Eb in the above example). While we
67738fd1498Szrj first assume that the duplicated Eona for Ja->Sona has the same
67838fd1498Szrj probability as the original, we later compensate for other jump
67938fd1498Szrj threading paths that may eliminate edges. We do that by keep track
68038fd1498Szrj of all counts coming into the original path that are not in a jump
68138fd1498Szrj thread (Eb in the above example, but as noted earlier, there could
68238fd1498Szrj be other predecessors incoming to the path at various points, such
68338fd1498Szrj as at Son). Call this cumulative non-path count coming into the path
68438fd1498Szrj before D as Enonpath. We then ensure that the count from Sona->D is as at
68538fd1498Szrj least as big as (Ed - Enonpath), but no bigger than the minimum
68638fd1498Szrj weight along the jump threading path. The probabilities of both the
68738fd1498Szrj original and duplicated joiner block J and Ja will be adjusted
68838fd1498Szrj accordingly after the updates. */
68938fd1498Szrj
69038fd1498Szrj static bool
compute_path_counts(struct redirection_data * rd,ssa_local_info_t * local_info,profile_count * path_in_count_ptr,profile_count * path_out_count_ptr)69138fd1498Szrj compute_path_counts (struct redirection_data *rd,
69238fd1498Szrj ssa_local_info_t *local_info,
69338fd1498Szrj profile_count *path_in_count_ptr,
69438fd1498Szrj profile_count *path_out_count_ptr)
69538fd1498Szrj {
69638fd1498Szrj edge e = rd->incoming_edges->e;
69738fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
69838fd1498Szrj edge elast = path->last ()->e;
69938fd1498Szrj profile_count nonpath_count = profile_count::zero ();
70038fd1498Szrj bool has_joiner = false;
70138fd1498Szrj profile_count path_in_count = profile_count::zero ();
70238fd1498Szrj
70338fd1498Szrj /* Start by accumulating incoming edge counts to the path's first bb
70438fd1498Szrj into a couple buckets:
70538fd1498Szrj path_in_count: total count of incoming edges that flow into the
70638fd1498Szrj current path.
70738fd1498Szrj nonpath_count: total count of incoming edges that are not
70838fd1498Szrj flowing along *any* path. These are the counts
70938fd1498Szrj that will still flow along the original path after
71038fd1498Szrj all path duplication is done by potentially multiple
71138fd1498Szrj calls to this routine.
71238fd1498Szrj (any other incoming edge counts are for a different jump threading
71338fd1498Szrj path that will be handled by a later call to this routine.)
71438fd1498Szrj To make this easier, start by recording all incoming edges that flow into
71538fd1498Szrj the current path in a bitmap. We could add up the path's incoming edge
71638fd1498Szrj counts here, but we still need to walk all the first bb's incoming edges
71738fd1498Szrj below to add up the counts of the other edges not included in this jump
71838fd1498Szrj threading path. */
71938fd1498Szrj struct el *next, *el;
72038fd1498Szrj auto_bitmap in_edge_srcs;
72138fd1498Szrj for (el = rd->incoming_edges; el; el = next)
72238fd1498Szrj {
72338fd1498Szrj next = el->next;
72438fd1498Szrj bitmap_set_bit (in_edge_srcs, el->e->src->index);
72538fd1498Szrj }
72638fd1498Szrj edge ein;
72738fd1498Szrj edge_iterator ei;
72838fd1498Szrj FOR_EACH_EDGE (ein, ei, e->dest->preds)
72938fd1498Szrj {
73038fd1498Szrj vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
73138fd1498Szrj /* Simply check the incoming edge src against the set captured above. */
73238fd1498Szrj if (ein_path
73338fd1498Szrj && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
73438fd1498Szrj {
73538fd1498Szrj /* It is necessary but not sufficient that the last path edges
73638fd1498Szrj are identical. There may be different paths that share the
73738fd1498Szrj same last path edge in the case where the last edge has a nocopy
73838fd1498Szrj source block. */
73938fd1498Szrj gcc_assert (ein_path->last ()->e == elast);
74038fd1498Szrj path_in_count += ein->count ();
74138fd1498Szrj }
74238fd1498Szrj else if (!ein_path)
74338fd1498Szrj {
74438fd1498Szrj /* Keep track of the incoming edges that are not on any jump-threading
74538fd1498Szrj path. These counts will still flow out of original path after all
74638fd1498Szrj jump threading is complete. */
74738fd1498Szrj nonpath_count += ein->count ();
74838fd1498Szrj }
74938fd1498Szrj }
75038fd1498Szrj
75138fd1498Szrj /* Now compute the fraction of the total count coming into the first
75238fd1498Szrj path bb that is from the current threading path. */
75338fd1498Szrj profile_count total_count = e->dest->count;
75438fd1498Szrj /* Handle incoming profile insanities. */
75538fd1498Szrj if (total_count < path_in_count)
75638fd1498Szrj path_in_count = total_count;
75738fd1498Szrj profile_probability onpath_scale = path_in_count.probability_in (total_count);
75838fd1498Szrj
75938fd1498Szrj /* Walk the entire path to do some more computation in order to estimate
76038fd1498Szrj how much of the path_in_count will flow out of the duplicated threading
76138fd1498Szrj path. In the non-joiner case this is straightforward (it should be
76238fd1498Szrj the same as path_in_count, although we will handle incoming profile
76338fd1498Szrj insanities by setting it equal to the minimum count along the path).
76438fd1498Szrj
76538fd1498Szrj In the joiner case, we need to estimate how much of the path_in_count
76638fd1498Szrj will stay on the threading path after the joiner's conditional branch.
76738fd1498Szrj We don't really know for sure how much of the counts
76838fd1498Szrj associated with this path go to each successor of the joiner, but we'll
76938fd1498Szrj estimate based on the fraction of the total count coming into the path
77038fd1498Szrj bb was from the threading paths (computed above in onpath_scale).
77138fd1498Szrj Afterwards, we will need to do some fixup to account for other threading
77238fd1498Szrj paths and possible profile insanities.
77338fd1498Szrj
77438fd1498Szrj In order to estimate the joiner case's counts we also need to update
77538fd1498Szrj nonpath_count with any additional counts coming into the path. Other
77638fd1498Szrj blocks along the path may have additional predecessors from outside
77738fd1498Szrj the path. */
77838fd1498Szrj profile_count path_out_count = path_in_count;
77938fd1498Szrj profile_count min_path_count = path_in_count;
78038fd1498Szrj for (unsigned int i = 1; i < path->length (); i++)
78138fd1498Szrj {
78238fd1498Szrj edge epath = (*path)[i]->e;
78338fd1498Szrj profile_count cur_count = epath->count ();
78438fd1498Szrj if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
78538fd1498Szrj {
78638fd1498Szrj has_joiner = true;
78738fd1498Szrj cur_count = cur_count.apply_probability (onpath_scale);
78838fd1498Szrj }
78938fd1498Szrj /* In the joiner case we need to update nonpath_count for any edges
79038fd1498Szrj coming into the path that will contribute to the count flowing
79138fd1498Szrj into the path successor. */
79238fd1498Szrj if (has_joiner && epath != elast)
79338fd1498Szrj {
79438fd1498Szrj /* Look for other incoming edges after joiner. */
79538fd1498Szrj FOR_EACH_EDGE (ein, ei, epath->dest->preds)
79638fd1498Szrj {
79738fd1498Szrj if (ein != epath
79838fd1498Szrj /* Ignore in edges from blocks we have duplicated for a
79938fd1498Szrj threading path, which have duplicated edge counts until
80038fd1498Szrj they are redirected by an invocation of this routine. */
80138fd1498Szrj && !bitmap_bit_p (local_info->duplicate_blocks,
80238fd1498Szrj ein->src->index))
80338fd1498Szrj nonpath_count += ein->count ();
80438fd1498Szrj }
80538fd1498Szrj }
80638fd1498Szrj if (cur_count < path_out_count)
80738fd1498Szrj path_out_count = cur_count;
80838fd1498Szrj if (epath->count () < min_path_count)
80938fd1498Szrj min_path_count = epath->count ();
81038fd1498Szrj }
81138fd1498Szrj
81238fd1498Szrj /* We computed path_out_count above assuming that this path targeted
81338fd1498Szrj the joiner's on-path successor with the same likelihood as it
81438fd1498Szrj reached the joiner. However, other thread paths through the joiner
81538fd1498Szrj may take a different path through the normal copy source block
81638fd1498Szrj (i.e. they have a different elast), meaning that they do not
81738fd1498Szrj contribute any counts to this path's elast. As a result, it may
81838fd1498Szrj turn out that this path must have more count flowing to the on-path
81938fd1498Szrj successor of the joiner. Essentially, all of this path's elast
82038fd1498Szrj count must be contributed by this path and any nonpath counts
82138fd1498Szrj (since any path through the joiner with a different elast will not
82238fd1498Szrj include a copy of this elast in its duplicated path).
82338fd1498Szrj So ensure that this path's path_out_count is at least the
82438fd1498Szrj difference between elast->count () and nonpath_count. Otherwise the edge
82538fd1498Szrj counts after threading will not be sane. */
82638fd1498Szrj if (local_info->need_profile_correction
82738fd1498Szrj && has_joiner && path_out_count < elast->count () - nonpath_count)
82838fd1498Szrj {
82938fd1498Szrj path_out_count = elast->count () - nonpath_count;
83038fd1498Szrj /* But neither can we go above the minimum count along the path
83138fd1498Szrj we are duplicating. This can be an issue due to profile
83238fd1498Szrj insanities coming in to this pass. */
83338fd1498Szrj if (path_out_count > min_path_count)
83438fd1498Szrj path_out_count = min_path_count;
83538fd1498Szrj }
83638fd1498Szrj
83738fd1498Szrj *path_in_count_ptr = path_in_count;
83838fd1498Szrj *path_out_count_ptr = path_out_count;
83938fd1498Szrj return has_joiner;
84038fd1498Szrj }
84138fd1498Szrj
84238fd1498Szrj
84338fd1498Szrj /* Update the counts and frequencies for both an original path
84438fd1498Szrj edge EPATH and its duplicate EDUP. The duplicate source block
84538fd1498Szrj will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
84638fd1498Szrj and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
84738fd1498Szrj static void
update_profile(edge epath,edge edup,profile_count path_in_count,profile_count path_out_count)84838fd1498Szrj update_profile (edge epath, edge edup, profile_count path_in_count,
84938fd1498Szrj profile_count path_out_count)
85038fd1498Szrj {
85138fd1498Szrj
85238fd1498Szrj /* First update the duplicated block's count. */
85338fd1498Szrj if (edup)
85438fd1498Szrj {
85538fd1498Szrj basic_block dup_block = edup->src;
85638fd1498Szrj
85738fd1498Szrj /* Edup's count is reduced by path_out_count. We need to redistribute
85838fd1498Szrj probabilities to the remaining edges. */
85938fd1498Szrj
86038fd1498Szrj edge esucc;
86138fd1498Szrj edge_iterator ei;
86238fd1498Szrj profile_probability edup_prob
86338fd1498Szrj = path_out_count.probability_in (path_in_count);
86438fd1498Szrj
86538fd1498Szrj /* Either scale up or down the remaining edges.
86638fd1498Szrj probabilities are always in range <0,1> and thus we can't do
86738fd1498Szrj both by same loop. */
86838fd1498Szrj if (edup->probability > edup_prob)
86938fd1498Szrj {
87038fd1498Szrj profile_probability rev_scale
87138fd1498Szrj = (profile_probability::always () - edup->probability)
87238fd1498Szrj / (profile_probability::always () - edup_prob);
87338fd1498Szrj FOR_EACH_EDGE (esucc, ei, dup_block->succs)
87438fd1498Szrj if (esucc != edup)
87538fd1498Szrj esucc->probability /= rev_scale;
87638fd1498Szrj }
87738fd1498Szrj else if (edup->probability < edup_prob)
87838fd1498Szrj {
87938fd1498Szrj profile_probability scale
88038fd1498Szrj = (profile_probability::always () - edup_prob)
88138fd1498Szrj / (profile_probability::always () - edup->probability);
88238fd1498Szrj FOR_EACH_EDGE (esucc, ei, dup_block->succs)
88338fd1498Szrj if (esucc != edup)
88438fd1498Szrj esucc->probability *= scale;
88538fd1498Szrj }
88638fd1498Szrj if (edup_prob.initialized_p ())
88738fd1498Szrj edup->probability = edup_prob;
88838fd1498Szrj
88938fd1498Szrj gcc_assert (!dup_block->count.initialized_p ());
89038fd1498Szrj dup_block->count = path_in_count;
89138fd1498Szrj }
89238fd1498Szrj
89338fd1498Szrj if (path_in_count == profile_count::zero ())
89438fd1498Szrj return;
89538fd1498Szrj
89638fd1498Szrj profile_count final_count = epath->count () - path_out_count;
89738fd1498Szrj
89838fd1498Szrj /* Now update the original block's count in the
89938fd1498Szrj opposite manner - remove the counts/freq that will flow
90038fd1498Szrj into the duplicated block. Handle underflow due to precision/
90138fd1498Szrj rounding issues. */
90238fd1498Szrj epath->src->count -= path_in_count;
90338fd1498Szrj
90438fd1498Szrj /* Next update this path edge's original and duplicated counts. We know
90538fd1498Szrj that the duplicated path will have path_out_count flowing
90638fd1498Szrj out of it (in the joiner case this is the count along the duplicated path
90738fd1498Szrj out of the duplicated joiner). This count can then be removed from the
90838fd1498Szrj original path edge. */
90938fd1498Szrj
91038fd1498Szrj edge esucc;
91138fd1498Szrj edge_iterator ei;
91238fd1498Szrj profile_probability epath_prob = final_count.probability_in (epath->src->count);
91338fd1498Szrj
91438fd1498Szrj if (epath->probability > epath_prob)
91538fd1498Szrj {
91638fd1498Szrj profile_probability rev_scale
91738fd1498Szrj = (profile_probability::always () - epath->probability)
91838fd1498Szrj / (profile_probability::always () - epath_prob);
91938fd1498Szrj FOR_EACH_EDGE (esucc, ei, epath->src->succs)
92038fd1498Szrj if (esucc != epath)
92138fd1498Szrj esucc->probability /= rev_scale;
92238fd1498Szrj }
92338fd1498Szrj else if (epath->probability < epath_prob)
92438fd1498Szrj {
92538fd1498Szrj profile_probability scale
92638fd1498Szrj = (profile_probability::always () - epath_prob)
92738fd1498Szrj / (profile_probability::always () - epath->probability);
92838fd1498Szrj FOR_EACH_EDGE (esucc, ei, epath->src->succs)
92938fd1498Szrj if (esucc != epath)
93038fd1498Szrj esucc->probability *= scale;
93138fd1498Szrj }
93238fd1498Szrj if (epath_prob.initialized_p ())
93338fd1498Szrj epath->probability = epath_prob;
93438fd1498Szrj }
93538fd1498Szrj
93638fd1498Szrj /* Wire up the outgoing edges from the duplicate blocks and
93738fd1498Szrj update any PHIs as needed. Also update the profile counts
93838fd1498Szrj on the original and duplicate blocks and edges. */
93938fd1498Szrj void
ssa_fix_duplicate_block_edges(struct redirection_data * rd,ssa_local_info_t * local_info)94038fd1498Szrj ssa_fix_duplicate_block_edges (struct redirection_data *rd,
94138fd1498Szrj ssa_local_info_t *local_info)
94238fd1498Szrj {
94338fd1498Szrj bool multi_incomings = (rd->incoming_edges->next != NULL);
94438fd1498Szrj edge e = rd->incoming_edges->e;
94538fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
94638fd1498Szrj edge elast = path->last ()->e;
94738fd1498Szrj profile_count path_in_count = profile_count::zero ();
94838fd1498Szrj profile_count path_out_count = profile_count::zero ();
94938fd1498Szrj
95038fd1498Szrj /* First determine how much profile count to move from original
95138fd1498Szrj path to the duplicate path. This is tricky in the presence of
95238fd1498Szrj a joiner (see comments for compute_path_counts), where some portion
95338fd1498Szrj of the path's counts will flow off-path from the joiner. In the
95438fd1498Szrj non-joiner case the path_in_count and path_out_count should be the
95538fd1498Szrj same. */
95638fd1498Szrj bool has_joiner = compute_path_counts (rd, local_info,
95738fd1498Szrj &path_in_count, &path_out_count);
95838fd1498Szrj
95938fd1498Szrj for (unsigned int count = 0, i = 1; i < path->length (); i++)
96038fd1498Szrj {
96138fd1498Szrj edge epath = (*path)[i]->e;
96238fd1498Szrj
96338fd1498Szrj /* If we were threading through an joiner block, then we want
96438fd1498Szrj to keep its control statement and redirect an outgoing edge.
96538fd1498Szrj Else we want to remove the control statement & edges, then create
96638fd1498Szrj a new outgoing edge. In both cases we may need to update PHIs. */
96738fd1498Szrj if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
96838fd1498Szrj {
96938fd1498Szrj edge victim;
97038fd1498Szrj edge e2;
97138fd1498Szrj
97238fd1498Szrj gcc_assert (has_joiner);
97338fd1498Szrj
97438fd1498Szrj /* This updates the PHIs at the destination of the duplicate
97538fd1498Szrj block. Pass 0 instead of i if we are threading a path which
97638fd1498Szrj has multiple incoming edges. */
97738fd1498Szrj update_destination_phis (local_info->bb, rd->dup_blocks[count],
97838fd1498Szrj path, multi_incomings ? 0 : i);
97938fd1498Szrj
98038fd1498Szrj /* Find the edge from the duplicate block to the block we're
98138fd1498Szrj threading through. That's the edge we want to redirect. */
98238fd1498Szrj victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
98338fd1498Szrj
98438fd1498Szrj /* If there are no remaining blocks on the path to duplicate,
98538fd1498Szrj then redirect VICTIM to the final destination of the jump
98638fd1498Szrj threading path. */
98738fd1498Szrj if (!any_remaining_duplicated_blocks (path, i))
98838fd1498Szrj {
98938fd1498Szrj e2 = redirect_edge_and_branch (victim, elast->dest);
99038fd1498Szrj /* If we redirected the edge, then we need to copy PHI arguments
99138fd1498Szrj at the target. If the edge already existed (e2 != victim
99238fd1498Szrj case), then the PHIs in the target already have the correct
99338fd1498Szrj arguments. */
99438fd1498Szrj if (e2 == victim)
99538fd1498Szrj copy_phi_args (e2->dest, elast, e2,
99638fd1498Szrj path, multi_incomings ? 0 : i);
99738fd1498Szrj }
99838fd1498Szrj else
99938fd1498Szrj {
100038fd1498Szrj /* Redirect VICTIM to the next duplicated block in the path. */
100138fd1498Szrj e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
100238fd1498Szrj
100338fd1498Szrj /* We need to update the PHIs in the next duplicated block. We
100438fd1498Szrj want the new PHI args to have the same value as they had
100538fd1498Szrj in the source of the next duplicate block.
100638fd1498Szrj
100738fd1498Szrj Thus, we need to know which edge we traversed into the
100838fd1498Szrj source of the duplicate. Furthermore, we may have
100938fd1498Szrj traversed many edges to reach the source of the duplicate.
101038fd1498Szrj
101138fd1498Szrj Walk through the path starting at element I until we
101238fd1498Szrj hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
101338fd1498Szrj the edge from the prior element. */
101438fd1498Szrj for (unsigned int j = i + 1; j < path->length (); j++)
101538fd1498Szrj {
101638fd1498Szrj if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
101738fd1498Szrj {
101838fd1498Szrj copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
101938fd1498Szrj break;
102038fd1498Szrj }
102138fd1498Szrj }
102238fd1498Szrj }
102338fd1498Szrj
102438fd1498Szrj /* Update the counts of both the original block
102538fd1498Szrj and path edge, and the duplicates. The path duplicate's
102638fd1498Szrj incoming count are the totals for all edges
102738fd1498Szrj incoming to this jump threading path computed earlier.
102838fd1498Szrj And we know that the duplicated path will have path_out_count
102938fd1498Szrj flowing out of it (i.e. along the duplicated path out of the
103038fd1498Szrj duplicated joiner). */
103138fd1498Szrj update_profile (epath, e2, path_in_count, path_out_count);
103238fd1498Szrj }
103338fd1498Szrj else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
103438fd1498Szrj {
103538fd1498Szrj remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
103638fd1498Szrj create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
103738fd1498Szrj multi_incomings ? 0 : i);
103838fd1498Szrj if (count == 1)
103938fd1498Szrj single_succ_edge (rd->dup_blocks[1])->aux = NULL;
104038fd1498Szrj
104138fd1498Szrj /* Update the counts of both the original block
104238fd1498Szrj and path edge, and the duplicates. Since we are now after
104338fd1498Szrj any joiner that may have existed on the path, the count
104438fd1498Szrj flowing along the duplicated threaded path is path_out_count.
104538fd1498Szrj If we didn't have a joiner, then cur_path_freq was the sum
104638fd1498Szrj of the total frequencies along all incoming edges to the
104738fd1498Szrj thread path (path_in_freq). If we had a joiner, it would have
104838fd1498Szrj been updated at the end of that handling to the edge frequency
104938fd1498Szrj along the duplicated joiner path edge. */
105038fd1498Szrj update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
105138fd1498Szrj path_out_count, path_out_count);
105238fd1498Szrj }
105338fd1498Szrj else
105438fd1498Szrj {
105538fd1498Szrj /* No copy case. In this case we don't have an equivalent block
105638fd1498Szrj on the duplicated thread path to update, but we do need
105738fd1498Szrj to remove the portion of the counts/freqs that were moved
105838fd1498Szrj to the duplicated path from the counts/freqs flowing through
105938fd1498Szrj this block on the original path. Since all the no-copy edges
106038fd1498Szrj are after any joiner, the removed count is the same as
106138fd1498Szrj path_out_count.
106238fd1498Szrj
106338fd1498Szrj If we didn't have a joiner, then cur_path_freq was the sum
106438fd1498Szrj of the total frequencies along all incoming edges to the
106538fd1498Szrj thread path (path_in_freq). If we had a joiner, it would have
106638fd1498Szrj been updated at the end of that handling to the edge frequency
106738fd1498Szrj along the duplicated joiner path edge. */
106838fd1498Szrj update_profile (epath, NULL, path_out_count, path_out_count);
106938fd1498Szrj }
107038fd1498Szrj
107138fd1498Szrj /* Increment the index into the duplicated path when we processed
107238fd1498Szrj a duplicated block. */
107338fd1498Szrj if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
107438fd1498Szrj || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
107538fd1498Szrj {
107638fd1498Szrj count++;
107738fd1498Szrj }
107838fd1498Szrj }
107938fd1498Szrj }
108038fd1498Szrj
108138fd1498Szrj /* Hash table traversal callback routine to create duplicate blocks. */
108238fd1498Szrj
108338fd1498Szrj int
ssa_create_duplicates(struct redirection_data ** slot,ssa_local_info_t * local_info)108438fd1498Szrj ssa_create_duplicates (struct redirection_data **slot,
108538fd1498Szrj ssa_local_info_t *local_info)
108638fd1498Szrj {
108738fd1498Szrj struct redirection_data *rd = *slot;
108838fd1498Szrj
108938fd1498Szrj /* The second duplicated block in a jump threading path is specific
109038fd1498Szrj to the path. So it gets stored in RD rather than in LOCAL_DATA.
109138fd1498Szrj
109238fd1498Szrj Each time we're called, we have to look through the path and see
109338fd1498Szrj if a second block needs to be duplicated.
109438fd1498Szrj
109538fd1498Szrj Note the search starts with the third edge on the path. The first
109638fd1498Szrj edge is the incoming edge, the second edge always has its source
109738fd1498Szrj duplicated. Thus we start our search with the third edge. */
109838fd1498Szrj vec<jump_thread_edge *> *path = rd->path;
109938fd1498Szrj for (unsigned int i = 2; i < path->length (); i++)
110038fd1498Szrj {
110138fd1498Szrj if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
110238fd1498Szrj || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
110338fd1498Szrj {
110438fd1498Szrj create_block_for_threading ((*path)[i]->e->src, rd, 1,
110538fd1498Szrj &local_info->duplicate_blocks);
110638fd1498Szrj break;
110738fd1498Szrj }
110838fd1498Szrj }
110938fd1498Szrj
111038fd1498Szrj /* Create a template block if we have not done so already. Otherwise
111138fd1498Szrj use the template to create a new block. */
111238fd1498Szrj if (local_info->template_block == NULL)
111338fd1498Szrj {
111438fd1498Szrj create_block_for_threading ((*path)[1]->e->src, rd, 0,
111538fd1498Szrj &local_info->duplicate_blocks);
111638fd1498Szrj local_info->template_block = rd->dup_blocks[0];
111738fd1498Szrj
111838fd1498Szrj /* We do not create any outgoing edges for the template. We will
111938fd1498Szrj take care of that in a later traversal. That way we do not
112038fd1498Szrj create edges that are going to just be deleted. */
112138fd1498Szrj }
112238fd1498Szrj else
112338fd1498Szrj {
112438fd1498Szrj create_block_for_threading (local_info->template_block, rd, 0,
112538fd1498Szrj &local_info->duplicate_blocks);
112638fd1498Szrj
112738fd1498Szrj /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
112838fd1498Szrj block. */
112938fd1498Szrj ssa_fix_duplicate_block_edges (rd, local_info);
113038fd1498Szrj }
113138fd1498Szrj
113238fd1498Szrj /* Keep walking the hash table. */
113338fd1498Szrj return 1;
113438fd1498Szrj }
113538fd1498Szrj
113638fd1498Szrj /* We did not create any outgoing edges for the template block during
113738fd1498Szrj block creation. This hash table traversal callback creates the
113838fd1498Szrj outgoing edge for the template block. */
113938fd1498Szrj
114038fd1498Szrj inline int
ssa_fixup_template_block(struct redirection_data ** slot,ssa_local_info_t * local_info)114138fd1498Szrj ssa_fixup_template_block (struct redirection_data **slot,
114238fd1498Szrj ssa_local_info_t *local_info)
114338fd1498Szrj {
114438fd1498Szrj struct redirection_data *rd = *slot;
114538fd1498Szrj
114638fd1498Szrj /* If this is the template block halt the traversal after updating
114738fd1498Szrj it appropriately.
114838fd1498Szrj
114938fd1498Szrj If we were threading through an joiner block, then we want
115038fd1498Szrj to keep its control statement and redirect an outgoing edge.
115138fd1498Szrj Else we want to remove the control statement & edges, then create
115238fd1498Szrj a new outgoing edge. In both cases we may need to update PHIs. */
115338fd1498Szrj if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
115438fd1498Szrj {
115538fd1498Szrj ssa_fix_duplicate_block_edges (rd, local_info);
115638fd1498Szrj return 0;
115738fd1498Szrj }
115838fd1498Szrj
115938fd1498Szrj return 1;
116038fd1498Szrj }
116138fd1498Szrj
116238fd1498Szrj /* Hash table traversal callback to redirect each incoming edge
116338fd1498Szrj associated with this hash table element to its new destination. */
116438fd1498Szrj
116538fd1498Szrj int
ssa_redirect_edges(struct redirection_data ** slot,ssa_local_info_t * local_info)116638fd1498Szrj ssa_redirect_edges (struct redirection_data **slot,
116738fd1498Szrj ssa_local_info_t *local_info)
116838fd1498Szrj {
116938fd1498Szrj struct redirection_data *rd = *slot;
117038fd1498Szrj struct el *next, *el;
117138fd1498Szrj
117238fd1498Szrj /* Walk over all the incoming edges associated with this hash table
117338fd1498Szrj entry. */
117438fd1498Szrj for (el = rd->incoming_edges; el; el = next)
117538fd1498Szrj {
117638fd1498Szrj edge e = el->e;
117738fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
117838fd1498Szrj
117938fd1498Szrj /* Go ahead and free this element from the list. Doing this now
118038fd1498Szrj avoids the need for another list walk when we destroy the hash
118138fd1498Szrj table. */
118238fd1498Szrj next = el->next;
118338fd1498Szrj free (el);
118438fd1498Szrj
118538fd1498Szrj thread_stats.num_threaded_edges++;
118638fd1498Szrj
118738fd1498Szrj if (rd->dup_blocks[0])
118838fd1498Szrj {
118938fd1498Szrj edge e2;
119038fd1498Szrj
119138fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
119238fd1498Szrj fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
119338fd1498Szrj e->src->index, e->dest->index, rd->dup_blocks[0]->index);
119438fd1498Szrj
119538fd1498Szrj /* Redirect the incoming edge (possibly to the joiner block) to the
119638fd1498Szrj appropriate duplicate block. */
119738fd1498Szrj e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
119838fd1498Szrj gcc_assert (e == e2);
119938fd1498Szrj flush_pending_stmts (e2);
120038fd1498Szrj }
120138fd1498Szrj
120238fd1498Szrj /* Go ahead and clear E->aux. It's not needed anymore and failure
120338fd1498Szrj to clear it will cause all kinds of unpleasant problems later. */
120438fd1498Szrj delete_jump_thread_path (path);
120538fd1498Szrj e->aux = NULL;
120638fd1498Szrj
120738fd1498Szrj }
120838fd1498Szrj
120938fd1498Szrj /* Indicate that we actually threaded one or more jumps. */
121038fd1498Szrj if (rd->incoming_edges)
121138fd1498Szrj local_info->jumps_threaded = true;
121238fd1498Szrj
121338fd1498Szrj return 1;
121438fd1498Szrj }
121538fd1498Szrj
121638fd1498Szrj /* Return true if this block has no executable statements other than
121738fd1498Szrj a simple ctrl flow instruction. When the number of outgoing edges
121838fd1498Szrj is one, this is equivalent to a "forwarder" block. */
121938fd1498Szrj
122038fd1498Szrj static bool
redirection_block_p(basic_block bb)122138fd1498Szrj redirection_block_p (basic_block bb)
122238fd1498Szrj {
122338fd1498Szrj gimple_stmt_iterator gsi;
122438fd1498Szrj
122538fd1498Szrj /* Advance to the first executable statement. */
122638fd1498Szrj gsi = gsi_start_bb (bb);
122738fd1498Szrj while (!gsi_end_p (gsi)
122838fd1498Szrj && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
122938fd1498Szrj || is_gimple_debug (gsi_stmt (gsi))
123038fd1498Szrj || gimple_nop_p (gsi_stmt (gsi))
123138fd1498Szrj || gimple_clobber_p (gsi_stmt (gsi))))
123238fd1498Szrj gsi_next (&gsi);
123338fd1498Szrj
123438fd1498Szrj /* Check if this is an empty block. */
123538fd1498Szrj if (gsi_end_p (gsi))
123638fd1498Szrj return true;
123738fd1498Szrj
123838fd1498Szrj /* Test that we've reached the terminating control statement. */
123938fd1498Szrj return gsi_stmt (gsi)
124038fd1498Szrj && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
124138fd1498Szrj || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
124238fd1498Szrj || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
124338fd1498Szrj }
124438fd1498Szrj
124538fd1498Szrj /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
124638fd1498Szrj is reached via one or more specific incoming edges, we know which
124738fd1498Szrj outgoing edge from BB will be traversed.
124838fd1498Szrj
124938fd1498Szrj We want to redirect those incoming edges to the target of the
125038fd1498Szrj appropriate outgoing edge. Doing so avoids a conditional branch
125138fd1498Szrj and may expose new optimization opportunities. Note that we have
125238fd1498Szrj to update dominator tree and SSA graph after such changes.
125338fd1498Szrj
125438fd1498Szrj The key to keeping the SSA graph update manageable is to duplicate
125538fd1498Szrj the side effects occurring in BB so that those side effects still
125638fd1498Szrj occur on the paths which bypass BB after redirecting edges.
125738fd1498Szrj
125838fd1498Szrj We accomplish this by creating duplicates of BB and arranging for
125938fd1498Szrj the duplicates to unconditionally pass control to one specific
126038fd1498Szrj successor of BB. We then revector the incoming edges into BB to
126138fd1498Szrj the appropriate duplicate of BB.
126238fd1498Szrj
126338fd1498Szrj If NOLOOP_ONLY is true, we only perform the threading as long as it
126438fd1498Szrj does not affect the structure of the loops in a nontrivial way.
126538fd1498Szrj
126638fd1498Szrj If JOINERS is true, then thread through joiner blocks as well. */
126738fd1498Szrj
126838fd1498Szrj static bool
thread_block_1(basic_block bb,bool noloop_only,bool joiners)126938fd1498Szrj thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
127038fd1498Szrj {
127138fd1498Szrj /* E is an incoming edge into BB that we may or may not want to
127238fd1498Szrj redirect to a duplicate of BB. */
127338fd1498Szrj edge e, e2;
127438fd1498Szrj edge_iterator ei;
127538fd1498Szrj ssa_local_info_t local_info;
127638fd1498Szrj
127738fd1498Szrj local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
127838fd1498Szrj local_info.need_profile_correction = false;
127938fd1498Szrj
128038fd1498Szrj /* To avoid scanning a linear array for the element we need we instead
128138fd1498Szrj use a hash table. For normal code there should be no noticeable
128238fd1498Szrj difference. However, if we have a block with a large number of
128338fd1498Szrj incoming and outgoing edges such linear searches can get expensive. */
128438fd1498Szrj redirection_data
128538fd1498Szrj = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
128638fd1498Szrj
128738fd1498Szrj /* Record each unique threaded destination into a hash table for
128838fd1498Szrj efficient lookups. */
128938fd1498Szrj edge last = NULL;
129038fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
129138fd1498Szrj {
129238fd1498Szrj if (e->aux == NULL)
129338fd1498Szrj continue;
129438fd1498Szrj
129538fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
129638fd1498Szrj
129738fd1498Szrj if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
129838fd1498Szrj || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
129938fd1498Szrj continue;
130038fd1498Szrj
130138fd1498Szrj e2 = path->last ()->e;
130238fd1498Szrj if (!e2 || noloop_only)
130338fd1498Szrj {
130438fd1498Szrj /* If NOLOOP_ONLY is true, we only allow threading through the
130538fd1498Szrj header of a loop to exit edges. */
130638fd1498Szrj
130738fd1498Szrj /* One case occurs when there was loop header buried in a jump
130838fd1498Szrj threading path that crosses loop boundaries. We do not try
130938fd1498Szrj and thread this elsewhere, so just cancel the jump threading
131038fd1498Szrj request by clearing the AUX field now. */
131138fd1498Szrj if (bb->loop_father != e2->src->loop_father
1312*58e805e6Szrj && (!loop_exit_edge_p (e2->src->loop_father, e2)
1313*58e805e6Szrj || flow_loop_nested_p (bb->loop_father,
1314*58e805e6Szrj e2->dest->loop_father)))
131538fd1498Szrj {
131638fd1498Szrj /* Since this case is not handled by our special code
131738fd1498Szrj to thread through a loop header, we must explicitly
131838fd1498Szrj cancel the threading request here. */
131938fd1498Szrj delete_jump_thread_path (path);
132038fd1498Szrj e->aux = NULL;
132138fd1498Szrj continue;
132238fd1498Szrj }
132338fd1498Szrj
132438fd1498Szrj /* Another case occurs when trying to thread through our
132538fd1498Szrj own loop header, possibly from inside the loop. We will
132638fd1498Szrj thread these later. */
132738fd1498Szrj unsigned int i;
132838fd1498Szrj for (i = 1; i < path->length (); i++)
132938fd1498Szrj {
133038fd1498Szrj if ((*path)[i]->e->src == bb->loop_father->header
133138fd1498Szrj && (!loop_exit_edge_p (bb->loop_father, e2)
133238fd1498Szrj || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
133338fd1498Szrj break;
133438fd1498Szrj }
133538fd1498Szrj
133638fd1498Szrj if (i != path->length ())
133738fd1498Szrj continue;
133838fd1498Szrj
133938fd1498Szrj /* Loop parallelization can be confused by the result of
134038fd1498Szrj threading through the loop exit test back into the loop.
134138fd1498Szrj However, theading those jumps seems to help other codes.
134238fd1498Szrj
134338fd1498Szrj I have been unable to find anything related to the shape of
134438fd1498Szrj the CFG, the contents of the affected blocks, etc which would
134538fd1498Szrj allow a more sensible test than what we're using below which
134638fd1498Szrj merely avoids the optimization when parallelizing loops. */
134738fd1498Szrj if (flag_tree_parallelize_loops > 1)
134838fd1498Szrj {
134938fd1498Szrj for (i = 1; i < path->length (); i++)
135038fd1498Szrj if (bb->loop_father == e2->src->loop_father
135138fd1498Szrj && loop_exits_from_bb_p (bb->loop_father,
135238fd1498Szrj (*path)[i]->e->src)
135338fd1498Szrj && !loop_exit_edge_p (bb->loop_father, e2))
135438fd1498Szrj break;
135538fd1498Szrj
135638fd1498Szrj if (i != path->length ())
135738fd1498Szrj {
135838fd1498Szrj delete_jump_thread_path (path);
135938fd1498Szrj e->aux = NULL;
136038fd1498Szrj continue;
136138fd1498Szrj }
136238fd1498Szrj }
136338fd1498Szrj }
136438fd1498Szrj
136538fd1498Szrj /* Insert the outgoing edge into the hash table if it is not
136638fd1498Szrj already in the hash table. */
136738fd1498Szrj lookup_redirection_data (e, INSERT);
136838fd1498Szrj
136938fd1498Szrj /* When we have thread paths through a common joiner with different
137038fd1498Szrj final destinations, then we may need corrections to deal with
137138fd1498Szrj profile insanities. See the big comment before compute_path_counts. */
137238fd1498Szrj if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
137338fd1498Szrj {
137438fd1498Szrj if (!last)
137538fd1498Szrj last = e2;
137638fd1498Szrj else if (e2 != last)
137738fd1498Szrj local_info.need_profile_correction = true;
137838fd1498Szrj }
137938fd1498Szrj }
138038fd1498Szrj
138138fd1498Szrj /* We do not update dominance info. */
138238fd1498Szrj free_dominance_info (CDI_DOMINATORS);
138338fd1498Szrj
138438fd1498Szrj /* We know we only thread through the loop header to loop exits.
138538fd1498Szrj Let the basic block duplication hook know we are not creating
138638fd1498Szrj a multiple entry loop. */
138738fd1498Szrj if (noloop_only
138838fd1498Szrj && bb == bb->loop_father->header)
138938fd1498Szrj set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
139038fd1498Szrj
139138fd1498Szrj /* Now create duplicates of BB.
139238fd1498Szrj
139338fd1498Szrj Note that for a block with a high outgoing degree we can waste
139438fd1498Szrj a lot of time and memory creating and destroying useless edges.
139538fd1498Szrj
139638fd1498Szrj So we first duplicate BB and remove the control structure at the
139738fd1498Szrj tail of the duplicate as well as all outgoing edges from the
139838fd1498Szrj duplicate. We then use that duplicate block as a template for
139938fd1498Szrj the rest of the duplicates. */
140038fd1498Szrj local_info.template_block = NULL;
140138fd1498Szrj local_info.bb = bb;
140238fd1498Szrj local_info.jumps_threaded = false;
140338fd1498Szrj redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
140438fd1498Szrj (&local_info);
140538fd1498Szrj
140638fd1498Szrj /* The template does not have an outgoing edge. Create that outgoing
140738fd1498Szrj edge and update PHI nodes as the edge's target as necessary.
140838fd1498Szrj
140938fd1498Szrj We do this after creating all the duplicates to avoid creating
141038fd1498Szrj unnecessary edges. */
141138fd1498Szrj redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
141238fd1498Szrj (&local_info);
141338fd1498Szrj
141438fd1498Szrj /* The hash table traversals above created the duplicate blocks (and the
141538fd1498Szrj statements within the duplicate blocks). This loop creates PHI nodes for
141638fd1498Szrj the duplicated blocks and redirects the incoming edges into BB to reach
141738fd1498Szrj the duplicates of BB. */
141838fd1498Szrj redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
141938fd1498Szrj (&local_info);
142038fd1498Szrj
142138fd1498Szrj /* Done with this block. Clear REDIRECTION_DATA. */
142238fd1498Szrj delete redirection_data;
142338fd1498Szrj redirection_data = NULL;
142438fd1498Szrj
142538fd1498Szrj if (noloop_only
142638fd1498Szrj && bb == bb->loop_father->header)
142738fd1498Szrj set_loop_copy (bb->loop_father, NULL);
142838fd1498Szrj
142938fd1498Szrj BITMAP_FREE (local_info.duplicate_blocks);
143038fd1498Szrj local_info.duplicate_blocks = NULL;
143138fd1498Szrj
143238fd1498Szrj /* Indicate to our caller whether or not any jumps were threaded. */
143338fd1498Szrj return local_info.jumps_threaded;
143438fd1498Szrj }
143538fd1498Szrj
143638fd1498Szrj /* Wrapper for thread_block_1 so that we can first handle jump
143738fd1498Szrj thread paths which do not involve copying joiner blocks, then
143838fd1498Szrj handle jump thread paths which have joiner blocks.
143938fd1498Szrj
144038fd1498Szrj By doing things this way we can be as aggressive as possible and
144138fd1498Szrj not worry that copying a joiner block will create a jump threading
144238fd1498Szrj opportunity. */
144338fd1498Szrj
144438fd1498Szrj static bool
thread_block(basic_block bb,bool noloop_only)144538fd1498Szrj thread_block (basic_block bb, bool noloop_only)
144638fd1498Szrj {
144738fd1498Szrj bool retval;
144838fd1498Szrj retval = thread_block_1 (bb, noloop_only, false);
144938fd1498Szrj retval |= thread_block_1 (bb, noloop_only, true);
145038fd1498Szrj return retval;
145138fd1498Szrj }
145238fd1498Szrj
145338fd1498Szrj /* Callback for dfs_enumerate_from. Returns true if BB is different
145438fd1498Szrj from STOP and DBDS_CE_STOP. */
145538fd1498Szrj
145638fd1498Szrj static basic_block dbds_ce_stop;
145738fd1498Szrj static bool
dbds_continue_enumeration_p(const_basic_block bb,const void * stop)145838fd1498Szrj dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
145938fd1498Szrj {
146038fd1498Szrj return (bb != (const_basic_block) stop
146138fd1498Szrj && bb != dbds_ce_stop);
146238fd1498Szrj }
146338fd1498Szrj
146438fd1498Szrj /* Evaluates the dominance relationship of latch of the LOOP and BB, and
146538fd1498Szrj returns the state. */
146638fd1498Szrj
146738fd1498Szrj enum bb_dom_status
determine_bb_domination_status(struct loop * loop,basic_block bb)146838fd1498Szrj determine_bb_domination_status (struct loop *loop, basic_block bb)
146938fd1498Szrj {
147038fd1498Szrj basic_block *bblocks;
147138fd1498Szrj unsigned nblocks, i;
147238fd1498Szrj bool bb_reachable = false;
147338fd1498Szrj edge_iterator ei;
147438fd1498Szrj edge e;
147538fd1498Szrj
147638fd1498Szrj /* This function assumes BB is a successor of LOOP->header.
147738fd1498Szrj If that is not the case return DOMST_NONDOMINATING which
147838fd1498Szrj is always safe. */
147938fd1498Szrj {
148038fd1498Szrj bool ok = false;
148138fd1498Szrj
148238fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
148338fd1498Szrj {
148438fd1498Szrj if (e->src == loop->header)
148538fd1498Szrj {
148638fd1498Szrj ok = true;
148738fd1498Szrj break;
148838fd1498Szrj }
148938fd1498Szrj }
149038fd1498Szrj
149138fd1498Szrj if (!ok)
149238fd1498Szrj return DOMST_NONDOMINATING;
149338fd1498Szrj }
149438fd1498Szrj
149538fd1498Szrj if (bb == loop->latch)
149638fd1498Szrj return DOMST_DOMINATING;
149738fd1498Szrj
149838fd1498Szrj /* Check that BB dominates LOOP->latch, and that it is back-reachable
149938fd1498Szrj from it. */
150038fd1498Szrj
150138fd1498Szrj bblocks = XCNEWVEC (basic_block, loop->num_nodes);
150238fd1498Szrj dbds_ce_stop = loop->header;
150338fd1498Szrj nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
150438fd1498Szrj bblocks, loop->num_nodes, bb);
150538fd1498Szrj for (i = 0; i < nblocks; i++)
150638fd1498Szrj FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
150738fd1498Szrj {
150838fd1498Szrj if (e->src == loop->header)
150938fd1498Szrj {
151038fd1498Szrj free (bblocks);
151138fd1498Szrj return DOMST_NONDOMINATING;
151238fd1498Szrj }
151338fd1498Szrj if (e->src == bb)
151438fd1498Szrj bb_reachable = true;
151538fd1498Szrj }
151638fd1498Szrj
151738fd1498Szrj free (bblocks);
151838fd1498Szrj return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
151938fd1498Szrj }
152038fd1498Szrj
152138fd1498Szrj /* Thread jumps through the header of LOOP. Returns true if cfg changes.
152238fd1498Szrj If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
152338fd1498Szrj to the inside of the loop. */
152438fd1498Szrj
152538fd1498Szrj static bool
thread_through_loop_header(struct loop * loop,bool may_peel_loop_headers)152638fd1498Szrj thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
152738fd1498Szrj {
152838fd1498Szrj basic_block header = loop->header;
152938fd1498Szrj edge e, tgt_edge, latch = loop_latch_edge (loop);
153038fd1498Szrj edge_iterator ei;
153138fd1498Szrj basic_block tgt_bb, atgt_bb;
153238fd1498Szrj enum bb_dom_status domst;
153338fd1498Szrj
153438fd1498Szrj /* We have already threaded through headers to exits, so all the threading
153538fd1498Szrj requests now are to the inside of the loop. We need to avoid creating
153638fd1498Szrj irreducible regions (i.e., loops with more than one entry block), and
153738fd1498Szrj also loop with several latch edges, or new subloops of the loop (although
153838fd1498Szrj there are cases where it might be appropriate, it is difficult to decide,
153938fd1498Szrj and doing it wrongly may confuse other optimizers).
154038fd1498Szrj
154138fd1498Szrj We could handle more general cases here. However, the intention is to
154238fd1498Szrj preserve some information about the loop, which is impossible if its
154338fd1498Szrj structure changes significantly, in a way that is not well understood.
154438fd1498Szrj Thus we only handle few important special cases, in which also updating
154538fd1498Szrj of the loop-carried information should be feasible:
154638fd1498Szrj
154738fd1498Szrj 1) Propagation of latch edge to a block that dominates the latch block
154838fd1498Szrj of a loop. This aims to handle the following idiom:
154938fd1498Szrj
155038fd1498Szrj first = 1;
155138fd1498Szrj while (1)
155238fd1498Szrj {
155338fd1498Szrj if (first)
155438fd1498Szrj initialize;
155538fd1498Szrj first = 0;
155638fd1498Szrj body;
155738fd1498Szrj }
155838fd1498Szrj
155938fd1498Szrj After threading the latch edge, this becomes
156038fd1498Szrj
156138fd1498Szrj first = 1;
156238fd1498Szrj if (first)
156338fd1498Szrj initialize;
156438fd1498Szrj while (1)
156538fd1498Szrj {
156638fd1498Szrj first = 0;
156738fd1498Szrj body;
156838fd1498Szrj }
156938fd1498Szrj
157038fd1498Szrj The original header of the loop is moved out of it, and we may thread
157138fd1498Szrj the remaining edges through it without further constraints.
157238fd1498Szrj
157338fd1498Szrj 2) All entry edges are propagated to a single basic block that dominates
157438fd1498Szrj the latch block of the loop. This aims to handle the following idiom
157538fd1498Szrj (normally created for "for" loops):
157638fd1498Szrj
157738fd1498Szrj i = 0;
157838fd1498Szrj while (1)
157938fd1498Szrj {
158038fd1498Szrj if (i >= 100)
158138fd1498Szrj break;
158238fd1498Szrj body;
158338fd1498Szrj i++;
158438fd1498Szrj }
158538fd1498Szrj
158638fd1498Szrj This becomes
158738fd1498Szrj
158838fd1498Szrj i = 0;
158938fd1498Szrj while (1)
159038fd1498Szrj {
159138fd1498Szrj body;
159238fd1498Szrj i++;
159338fd1498Szrj if (i >= 100)
159438fd1498Szrj break;
159538fd1498Szrj }
159638fd1498Szrj */
159738fd1498Szrj
159838fd1498Szrj /* Threading through the header won't improve the code if the header has just
159938fd1498Szrj one successor. */
160038fd1498Szrj if (single_succ_p (header))
160138fd1498Szrj goto fail;
160238fd1498Szrj
160338fd1498Szrj if (!may_peel_loop_headers && !redirection_block_p (loop->header))
160438fd1498Szrj goto fail;
160538fd1498Szrj else
160638fd1498Szrj {
160738fd1498Szrj tgt_bb = NULL;
160838fd1498Szrj tgt_edge = NULL;
160938fd1498Szrj FOR_EACH_EDGE (e, ei, header->preds)
161038fd1498Szrj {
161138fd1498Szrj if (!e->aux)
161238fd1498Szrj {
161338fd1498Szrj if (e == latch)
161438fd1498Szrj continue;
161538fd1498Szrj
161638fd1498Szrj /* If latch is not threaded, and there is a header
161738fd1498Szrj edge that is not threaded, we would create loop
161838fd1498Szrj with multiple entries. */
161938fd1498Szrj goto fail;
162038fd1498Szrj }
162138fd1498Szrj
162238fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
162338fd1498Szrj
162438fd1498Szrj if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
162538fd1498Szrj goto fail;
162638fd1498Szrj tgt_edge = (*path)[1]->e;
162738fd1498Szrj atgt_bb = tgt_edge->dest;
162838fd1498Szrj if (!tgt_bb)
162938fd1498Szrj tgt_bb = atgt_bb;
163038fd1498Szrj /* Two targets of threading would make us create loop
163138fd1498Szrj with multiple entries. */
163238fd1498Szrj else if (tgt_bb != atgt_bb)
163338fd1498Szrj goto fail;
163438fd1498Szrj }
163538fd1498Szrj
163638fd1498Szrj if (!tgt_bb)
163738fd1498Szrj {
163838fd1498Szrj /* There are no threading requests. */
163938fd1498Szrj return false;
164038fd1498Szrj }
164138fd1498Szrj
164238fd1498Szrj /* Redirecting to empty loop latch is useless. */
164338fd1498Szrj if (tgt_bb == loop->latch
164438fd1498Szrj && empty_block_p (loop->latch))
164538fd1498Szrj goto fail;
164638fd1498Szrj }
164738fd1498Szrj
164838fd1498Szrj /* The target block must dominate the loop latch, otherwise we would be
164938fd1498Szrj creating a subloop. */
165038fd1498Szrj domst = determine_bb_domination_status (loop, tgt_bb);
165138fd1498Szrj if (domst == DOMST_NONDOMINATING)
165238fd1498Szrj goto fail;
165338fd1498Szrj if (domst == DOMST_LOOP_BROKEN)
165438fd1498Szrj {
165538fd1498Szrj /* If the loop ceased to exist, mark it as such, and thread through its
165638fd1498Szrj original header. */
165738fd1498Szrj mark_loop_for_removal (loop);
165838fd1498Szrj return thread_block (header, false);
165938fd1498Szrj }
166038fd1498Szrj
166138fd1498Szrj if (tgt_bb->loop_father->header == tgt_bb)
166238fd1498Szrj {
166338fd1498Szrj /* If the target of the threading is a header of a subloop, we need
166438fd1498Szrj to create a preheader for it, so that the headers of the two loops
166538fd1498Szrj do not merge. */
166638fd1498Szrj if (EDGE_COUNT (tgt_bb->preds) > 2)
166738fd1498Szrj {
166838fd1498Szrj tgt_bb = create_preheader (tgt_bb->loop_father, 0);
166938fd1498Szrj gcc_assert (tgt_bb != NULL);
167038fd1498Szrj }
167138fd1498Szrj else
167238fd1498Szrj tgt_bb = split_edge (tgt_edge);
167338fd1498Szrj }
167438fd1498Szrj
167538fd1498Szrj basic_block new_preheader;
167638fd1498Szrj
167738fd1498Szrj /* Now consider the case entry edges are redirected to the new entry
167838fd1498Szrj block. Remember one entry edge, so that we can find the new
167938fd1498Szrj preheader (its destination after threading). */
168038fd1498Szrj FOR_EACH_EDGE (e, ei, header->preds)
168138fd1498Szrj {
168238fd1498Szrj if (e->aux)
168338fd1498Szrj break;
168438fd1498Szrj }
168538fd1498Szrj
168638fd1498Szrj /* The duplicate of the header is the new preheader of the loop. Ensure
168738fd1498Szrj that it is placed correctly in the loop hierarchy. */
168838fd1498Szrj set_loop_copy (loop, loop_outer (loop));
168938fd1498Szrj
169038fd1498Szrj thread_block (header, false);
169138fd1498Szrj set_loop_copy (loop, NULL);
169238fd1498Szrj new_preheader = e->dest;
169338fd1498Szrj
169438fd1498Szrj /* Create the new latch block. This is always necessary, as the latch
169538fd1498Szrj must have only a single successor, but the original header had at
169638fd1498Szrj least two successors. */
169738fd1498Szrj loop->latch = NULL;
169838fd1498Szrj mfb_kj_edge = single_succ_edge (new_preheader);
169938fd1498Szrj loop->header = mfb_kj_edge->dest;
170038fd1498Szrj latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
170138fd1498Szrj loop->header = latch->dest;
170238fd1498Szrj loop->latch = latch->src;
170338fd1498Szrj return true;
170438fd1498Szrj
170538fd1498Szrj fail:
170638fd1498Szrj /* We failed to thread anything. Cancel the requests. */
170738fd1498Szrj FOR_EACH_EDGE (e, ei, header->preds)
170838fd1498Szrj {
170938fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
171038fd1498Szrj
171138fd1498Szrj if (path)
171238fd1498Szrj {
171338fd1498Szrj delete_jump_thread_path (path);
171438fd1498Szrj e->aux = NULL;
171538fd1498Szrj }
171638fd1498Szrj }
171738fd1498Szrj return false;
171838fd1498Szrj }
171938fd1498Szrj
172038fd1498Szrj /* E1 and E2 are edges into the same basic block. Return TRUE if the
172138fd1498Szrj PHI arguments associated with those edges are equal or there are no
172238fd1498Szrj PHI arguments, otherwise return FALSE. */
172338fd1498Szrj
172438fd1498Szrj static bool
phi_args_equal_on_edges(edge e1,edge e2)172538fd1498Szrj phi_args_equal_on_edges (edge e1, edge e2)
172638fd1498Szrj {
172738fd1498Szrj gphi_iterator gsi;
172838fd1498Szrj int indx1 = e1->dest_idx;
172938fd1498Szrj int indx2 = e2->dest_idx;
173038fd1498Szrj
173138fd1498Szrj for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
173238fd1498Szrj {
173338fd1498Szrj gphi *phi = gsi.phi ();
173438fd1498Szrj
173538fd1498Szrj if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
173638fd1498Szrj gimple_phi_arg_def (phi, indx2), 0))
173738fd1498Szrj return false;
173838fd1498Szrj }
173938fd1498Szrj return true;
174038fd1498Szrj }
174138fd1498Szrj
174238fd1498Szrj /* Return the number of non-debug statements and non-virtual PHIs in a
174338fd1498Szrj block. */
174438fd1498Szrj
174538fd1498Szrj static unsigned int
count_stmts_and_phis_in_block(basic_block bb)174638fd1498Szrj count_stmts_and_phis_in_block (basic_block bb)
174738fd1498Szrj {
174838fd1498Szrj unsigned int num_stmts = 0;
174938fd1498Szrj
175038fd1498Szrj gphi_iterator gpi;
175138fd1498Szrj for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
175238fd1498Szrj if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
175338fd1498Szrj num_stmts++;
175438fd1498Szrj
175538fd1498Szrj gimple_stmt_iterator gsi;
175638fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
175738fd1498Szrj {
175838fd1498Szrj gimple *stmt = gsi_stmt (gsi);
175938fd1498Szrj if (!is_gimple_debug (stmt))
176038fd1498Szrj num_stmts++;
176138fd1498Szrj }
176238fd1498Szrj
176338fd1498Szrj return num_stmts;
176438fd1498Szrj }
176538fd1498Szrj
176638fd1498Szrj
176738fd1498Szrj /* Walk through the registered jump threads and convert them into a
176838fd1498Szrj form convenient for this pass.
176938fd1498Szrj
177038fd1498Szrj Any block which has incoming edges threaded to outgoing edges
177138fd1498Szrj will have its entry in THREADED_BLOCK set.
177238fd1498Szrj
177338fd1498Szrj Any threaded edge will have its new outgoing edge stored in the
177438fd1498Szrj original edge's AUX field.
177538fd1498Szrj
177638fd1498Szrj This form avoids the need to walk all the edges in the CFG to
177738fd1498Szrj discover blocks which need processing and avoids unnecessary
177838fd1498Szrj hash table lookups to map from threaded edge to new target. */
177938fd1498Szrj
178038fd1498Szrj static void
mark_threaded_blocks(bitmap threaded_blocks)178138fd1498Szrj mark_threaded_blocks (bitmap threaded_blocks)
178238fd1498Szrj {
178338fd1498Szrj unsigned int i;
178438fd1498Szrj bitmap_iterator bi;
178538fd1498Szrj auto_bitmap tmp;
178638fd1498Szrj basic_block bb;
178738fd1498Szrj edge e;
178838fd1498Szrj edge_iterator ei;
178938fd1498Szrj
179038fd1498Szrj /* It is possible to have jump threads in which one is a subpath
179138fd1498Szrj of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
179238fd1498Szrj block and (B, C), (C, D) where no joiner block exists.
179338fd1498Szrj
179438fd1498Szrj When this occurs ignore the jump thread request with the joiner
179538fd1498Szrj block. It's totally subsumed by the simpler jump thread request.
179638fd1498Szrj
179738fd1498Szrj This results in less block copying, simpler CFGs. More importantly,
179838fd1498Szrj when we duplicate the joiner block, B, in this case we will create
179938fd1498Szrj a new threading opportunity that we wouldn't be able to optimize
180038fd1498Szrj until the next jump threading iteration.
180138fd1498Szrj
180238fd1498Szrj So first convert the jump thread requests which do not require a
180338fd1498Szrj joiner block. */
180438fd1498Szrj for (i = 0; i < paths.length (); i++)
180538fd1498Szrj {
180638fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
180738fd1498Szrj
180838fd1498Szrj if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
180938fd1498Szrj {
181038fd1498Szrj edge e = (*path)[0]->e;
181138fd1498Szrj e->aux = (void *)path;
181238fd1498Szrj bitmap_set_bit (tmp, e->dest->index);
181338fd1498Szrj }
181438fd1498Szrj }
181538fd1498Szrj
181638fd1498Szrj /* Now iterate again, converting cases where we want to thread
181738fd1498Szrj through a joiner block, but only if no other edge on the path
181838fd1498Szrj already has a jump thread attached to it. We do this in two passes,
181938fd1498Szrj to avoid situations where the order in the paths vec can hide overlapping
182038fd1498Szrj threads (the path is recorded on the incoming edge, so we would miss
182138fd1498Szrj cases where the second path starts at a downstream edge on the same
182238fd1498Szrj path). First record all joiner paths, deleting any in the unexpected
182338fd1498Szrj case where there is already a path for that incoming edge. */
182438fd1498Szrj for (i = 0; i < paths.length ();)
182538fd1498Szrj {
182638fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
182738fd1498Szrj
182838fd1498Szrj if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
182938fd1498Szrj {
183038fd1498Szrj /* Attach the path to the starting edge if none is yet recorded. */
183138fd1498Szrj if ((*path)[0]->e->aux == NULL)
183238fd1498Szrj {
183338fd1498Szrj (*path)[0]->e->aux = path;
183438fd1498Szrj i++;
183538fd1498Szrj }
183638fd1498Szrj else
183738fd1498Szrj {
183838fd1498Szrj paths.unordered_remove (i);
183938fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
184038fd1498Szrj dump_jump_thread_path (dump_file, *path, false);
184138fd1498Szrj delete_jump_thread_path (path);
184238fd1498Szrj }
184338fd1498Szrj }
184438fd1498Szrj else
184538fd1498Szrj {
184638fd1498Szrj i++;
184738fd1498Szrj }
184838fd1498Szrj }
184938fd1498Szrj
185038fd1498Szrj /* Second, look for paths that have any other jump thread attached to
185138fd1498Szrj them, and either finish converting them or cancel them. */
185238fd1498Szrj for (i = 0; i < paths.length ();)
185338fd1498Szrj {
185438fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
185538fd1498Szrj edge e = (*path)[0]->e;
185638fd1498Szrj
185738fd1498Szrj if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
185838fd1498Szrj {
185938fd1498Szrj unsigned int j;
186038fd1498Szrj for (j = 1; j < path->length (); j++)
186138fd1498Szrj if ((*path)[j]->e->aux != NULL)
186238fd1498Szrj break;
186338fd1498Szrj
186438fd1498Szrj /* If we iterated through the entire path without exiting the loop,
186538fd1498Szrj then we are good to go, record it. */
186638fd1498Szrj if (j == path->length ())
186738fd1498Szrj {
186838fd1498Szrj bitmap_set_bit (tmp, e->dest->index);
186938fd1498Szrj i++;
187038fd1498Szrj }
187138fd1498Szrj else
187238fd1498Szrj {
187338fd1498Szrj e->aux = NULL;
187438fd1498Szrj paths.unordered_remove (i);
187538fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
187638fd1498Szrj dump_jump_thread_path (dump_file, *path, false);
187738fd1498Szrj delete_jump_thread_path (path);
187838fd1498Szrj }
187938fd1498Szrj }
188038fd1498Szrj else
188138fd1498Szrj {
188238fd1498Szrj i++;
188338fd1498Szrj }
188438fd1498Szrj }
188538fd1498Szrj
188638fd1498Szrj /* When optimizing for size, prune all thread paths where statement
188738fd1498Szrj duplication is necessary.
188838fd1498Szrj
188938fd1498Szrj We walk the jump thread path looking for copied blocks. There's
189038fd1498Szrj two types of copied blocks.
189138fd1498Szrj
189238fd1498Szrj EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
189338fd1498Szrj cancel the jump threading request when optimizing for size.
189438fd1498Szrj
189538fd1498Szrj EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
189638fd1498Szrj will be killed by threading. If threading does not kill all of
189738fd1498Szrj its statements, then we should cancel the jump threading request
189838fd1498Szrj when optimizing for size. */
189938fd1498Szrj if (optimize_function_for_size_p (cfun))
190038fd1498Szrj {
190138fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
190238fd1498Szrj {
190338fd1498Szrj FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
190438fd1498Szrj if (e->aux)
190538fd1498Szrj {
190638fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
190738fd1498Szrj
190838fd1498Szrj unsigned int j;
190938fd1498Szrj for (j = 1; j < path->length (); j++)
191038fd1498Szrj {
191138fd1498Szrj bb = (*path)[j]->e->src;
191238fd1498Szrj if (redirection_block_p (bb))
191338fd1498Szrj ;
191438fd1498Szrj else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
191538fd1498Szrj || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
191638fd1498Szrj && (count_stmts_and_phis_in_block (bb)
191738fd1498Szrj != estimate_threading_killed_stmts (bb))))
191838fd1498Szrj break;
191938fd1498Szrj }
192038fd1498Szrj
192138fd1498Szrj if (j != path->length ())
192238fd1498Szrj {
192338fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
192438fd1498Szrj dump_jump_thread_path (dump_file, *path, 0);
192538fd1498Szrj delete_jump_thread_path (path);
192638fd1498Szrj e->aux = NULL;
192738fd1498Szrj }
192838fd1498Szrj else
192938fd1498Szrj bitmap_set_bit (threaded_blocks, i);
193038fd1498Szrj }
193138fd1498Szrj }
193238fd1498Szrj }
193338fd1498Szrj else
193438fd1498Szrj bitmap_copy (threaded_blocks, tmp);
193538fd1498Szrj
193638fd1498Szrj /* If we have a joiner block (J) which has two successors S1 and S2 and
193738fd1498Szrj we are threading though S1 and the final destination of the thread
193838fd1498Szrj is S2, then we must verify that any PHI nodes in S2 have the same
193938fd1498Szrj PHI arguments for the edge J->S2 and J->S1->...->S2.
194038fd1498Szrj
194138fd1498Szrj We used to detect this prior to registering the jump thread, but
194238fd1498Szrj that prohibits propagation of edge equivalences into non-dominated
194338fd1498Szrj PHI nodes as the equivalency test might occur before propagation.
194438fd1498Szrj
194538fd1498Szrj This must also occur after we truncate any jump threading paths
194638fd1498Szrj as this scenario may only show up after truncation.
194738fd1498Szrj
194838fd1498Szrj This works for now, but will need improvement as part of the FSA
194938fd1498Szrj optimization.
195038fd1498Szrj
195138fd1498Szrj Note since we've moved the thread request data to the edges,
195238fd1498Szrj we have to iterate on those rather than the threaded_edges vector. */
195338fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
195438fd1498Szrj {
195538fd1498Szrj bb = BASIC_BLOCK_FOR_FN (cfun, i);
195638fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
195738fd1498Szrj {
195838fd1498Szrj if (e->aux)
195938fd1498Szrj {
196038fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
196138fd1498Szrj bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
196238fd1498Szrj
196338fd1498Szrj if (have_joiner)
196438fd1498Szrj {
196538fd1498Szrj basic_block joiner = e->dest;
196638fd1498Szrj edge final_edge = path->last ()->e;
196738fd1498Szrj basic_block final_dest = final_edge->dest;
196838fd1498Szrj edge e2 = find_edge (joiner, final_dest);
196938fd1498Szrj
197038fd1498Szrj if (e2 && !phi_args_equal_on_edges (e2, final_edge))
197138fd1498Szrj {
197238fd1498Szrj delete_jump_thread_path (path);
197338fd1498Szrj e->aux = NULL;
197438fd1498Szrj }
197538fd1498Szrj }
197638fd1498Szrj }
197738fd1498Szrj }
197838fd1498Szrj }
197938fd1498Szrj
198038fd1498Szrj /* Look for jump threading paths which cross multiple loop headers.
198138fd1498Szrj
198238fd1498Szrj The code to thread through loop headers will change the CFG in ways
198338fd1498Szrj that invalidate the cached loop iteration information. So we must
198438fd1498Szrj detect that case and wipe the cached information. */
198538fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
198638fd1498Szrj {
198738fd1498Szrj basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
198838fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
198938fd1498Szrj {
199038fd1498Szrj if (e->aux)
199138fd1498Szrj {
199238fd1498Szrj vec<jump_thread_edge *> *path = THREAD_PATH (e);
199338fd1498Szrj
199438fd1498Szrj for (unsigned int i = 0, crossed_headers = 0;
199538fd1498Szrj i < path->length ();
199638fd1498Szrj i++)
199738fd1498Szrj {
199838fd1498Szrj basic_block dest = (*path)[i]->e->dest;
199938fd1498Szrj basic_block src = (*path)[i]->e->src;
200038fd1498Szrj /* If we enter a loop. */
200138fd1498Szrj if (flow_loop_nested_p (src->loop_father, dest->loop_father))
200238fd1498Szrj ++crossed_headers;
200338fd1498Szrj /* If we step from a block outside an irreducible region
200438fd1498Szrj to a block inside an irreducible region, then we have
200538fd1498Szrj crossed into a loop. */
200638fd1498Szrj else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
200738fd1498Szrj && (dest->flags & BB_IRREDUCIBLE_LOOP))
200838fd1498Szrj ++crossed_headers;
200938fd1498Szrj if (crossed_headers > 1)
201038fd1498Szrj {
201138fd1498Szrj vect_free_loop_info_assumptions
201238fd1498Szrj ((*path)[path->length () - 1]->e->dest->loop_father);
201338fd1498Szrj break;
201438fd1498Szrj }
201538fd1498Szrj }
201638fd1498Szrj }
201738fd1498Szrj }
201838fd1498Szrj }
201938fd1498Szrj }
202038fd1498Szrj
202138fd1498Szrj
202238fd1498Szrj /* Verify that the REGION is a valid jump thread. A jump thread is a special
202338fd1498Szrj case of SEME Single Entry Multiple Exits region in which all nodes in the
202438fd1498Szrj REGION have exactly one incoming edge. The only exception is the first block
202538fd1498Szrj that may not have been connected to the rest of the cfg yet. */
202638fd1498Szrj
202738fd1498Szrj DEBUG_FUNCTION void
verify_jump_thread(basic_block * region,unsigned n_region)202838fd1498Szrj verify_jump_thread (basic_block *region, unsigned n_region)
202938fd1498Szrj {
203038fd1498Szrj for (unsigned i = 0; i < n_region; i++)
203138fd1498Szrj gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
203238fd1498Szrj }
203338fd1498Szrj
203438fd1498Szrj /* Return true when BB is one of the first N items in BBS. */
203538fd1498Szrj
203638fd1498Szrj static inline bool
bb_in_bbs(basic_block bb,basic_block * bbs,int n)203738fd1498Szrj bb_in_bbs (basic_block bb, basic_block *bbs, int n)
203838fd1498Szrj {
203938fd1498Szrj for (int i = 0; i < n; i++)
204038fd1498Szrj if (bb == bbs[i])
204138fd1498Szrj return true;
204238fd1498Szrj
204338fd1498Szrj return false;
204438fd1498Szrj }
204538fd1498Szrj
204638fd1498Szrj /* Duplicates a jump-thread path of N_REGION basic blocks.
204738fd1498Szrj The ENTRY edge is redirected to the duplicate of the region.
204838fd1498Szrj
204938fd1498Szrj Remove the last conditional statement in the last basic block in the REGION,
205038fd1498Szrj and create a single fallthru edge pointing to the same destination as the
205138fd1498Szrj EXIT edge.
205238fd1498Szrj
205338fd1498Szrj Returns false if it is unable to copy the region, true otherwise. */
205438fd1498Szrj
205538fd1498Szrj static bool
duplicate_thread_path(edge entry,edge exit,basic_block * region,unsigned n_region)205638fd1498Szrj duplicate_thread_path (edge entry, edge exit, basic_block *region,
205738fd1498Szrj unsigned n_region)
205838fd1498Szrj {
205938fd1498Szrj unsigned i;
206038fd1498Szrj struct loop *loop = entry->dest->loop_father;
206138fd1498Szrj edge exit_copy;
206238fd1498Szrj edge redirected;
206338fd1498Szrj profile_count curr_count;
206438fd1498Szrj
206538fd1498Szrj if (!can_copy_bbs_p (region, n_region))
206638fd1498Szrj return false;
206738fd1498Szrj
206838fd1498Szrj /* Some sanity checking. Note that we do not check for all possible
206938fd1498Szrj missuses of the functions. I.e. if you ask to copy something weird,
207038fd1498Szrj it will work, but the state of structures probably will not be
207138fd1498Szrj correct. */
207238fd1498Szrj for (i = 0; i < n_region; i++)
207338fd1498Szrj {
207438fd1498Szrj /* We do not handle subloops, i.e. all the blocks must belong to the
207538fd1498Szrj same loop. */
207638fd1498Szrj if (region[i]->loop_father != loop)
207738fd1498Szrj return false;
207838fd1498Szrj }
207938fd1498Szrj
208038fd1498Szrj initialize_original_copy_tables ();
208138fd1498Szrj
208238fd1498Szrj set_loop_copy (loop, loop);
208338fd1498Szrj
208438fd1498Szrj basic_block *region_copy = XNEWVEC (basic_block, n_region);
208538fd1498Szrj copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
208638fd1498Szrj split_edge_bb_loc (entry), false);
208738fd1498Szrj
208838fd1498Szrj /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
208938fd1498Szrj following code ensures that all the edges exiting the jump-thread path are
209038fd1498Szrj redirected back to the original code: these edges are exceptions
209138fd1498Szrj invalidating the property that is propagated by executing all the blocks of
209238fd1498Szrj the jump-thread path in order. */
209338fd1498Szrj
209438fd1498Szrj curr_count = entry->count ();
209538fd1498Szrj
209638fd1498Szrj for (i = 0; i < n_region; i++)
209738fd1498Szrj {
209838fd1498Szrj edge e;
209938fd1498Szrj edge_iterator ei;
210038fd1498Szrj basic_block bb = region_copy[i];
210138fd1498Szrj
210238fd1498Szrj /* Watch inconsistent profile. */
210338fd1498Szrj if (curr_count > region[i]->count)
210438fd1498Szrj curr_count = region[i]->count;
210538fd1498Szrj /* Scale current BB. */
210638fd1498Szrj if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
210738fd1498Szrj {
210838fd1498Szrj /* In the middle of the path we only scale the frequencies.
210938fd1498Szrj In last BB we need to update probabilities of outgoing edges
211038fd1498Szrj because we know which one is taken at the threaded path. */
211138fd1498Szrj if (i + 1 != n_region)
211238fd1498Szrj scale_bbs_frequencies_profile_count (region + i, 1,
211338fd1498Szrj region[i]->count - curr_count,
211438fd1498Szrj region[i]->count);
211538fd1498Szrj else
211638fd1498Szrj update_bb_profile_for_threading (region[i],
211738fd1498Szrj curr_count,
211838fd1498Szrj exit);
211938fd1498Szrj scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
212038fd1498Szrj region_copy[i]->count);
212138fd1498Szrj }
212238fd1498Szrj
212338fd1498Szrj if (single_succ_p (bb))
212438fd1498Szrj {
212538fd1498Szrj /* Make sure the successor is the next node in the path. */
212638fd1498Szrj gcc_assert (i + 1 == n_region
212738fd1498Szrj || region_copy[i + 1] == single_succ_edge (bb)->dest);
212838fd1498Szrj if (i + 1 != n_region)
212938fd1498Szrj {
213038fd1498Szrj curr_count = single_succ_edge (bb)->count ();
213138fd1498Szrj }
213238fd1498Szrj continue;
213338fd1498Szrj }
213438fd1498Szrj
213538fd1498Szrj /* Special case the last block on the path: make sure that it does not
213638fd1498Szrj jump back on the copied path, including back to itself. */
213738fd1498Szrj if (i + 1 == n_region)
213838fd1498Szrj {
213938fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
214038fd1498Szrj if (bb_in_bbs (e->dest, region_copy, n_region))
214138fd1498Szrj {
214238fd1498Szrj basic_block orig = get_bb_original (e->dest);
214338fd1498Szrj if (orig)
214438fd1498Szrj redirect_edge_and_branch_force (e, orig);
214538fd1498Szrj }
214638fd1498Szrj continue;
214738fd1498Szrj }
214838fd1498Szrj
214938fd1498Szrj /* Redirect all other edges jumping to non-adjacent blocks back to the
215038fd1498Szrj original code. */
215138fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
215238fd1498Szrj if (region_copy[i + 1] != e->dest)
215338fd1498Szrj {
215438fd1498Szrj basic_block orig = get_bb_original (e->dest);
215538fd1498Szrj if (orig)
215638fd1498Szrj redirect_edge_and_branch_force (e, orig);
215738fd1498Szrj }
215838fd1498Szrj else
215938fd1498Szrj {
216038fd1498Szrj curr_count = e->count ();
216138fd1498Szrj }
216238fd1498Szrj }
216338fd1498Szrj
216438fd1498Szrj
216538fd1498Szrj if (flag_checking)
216638fd1498Szrj verify_jump_thread (region_copy, n_region);
216738fd1498Szrj
216838fd1498Szrj /* Remove the last branch in the jump thread path. */
216938fd1498Szrj remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
217038fd1498Szrj
217138fd1498Szrj /* And fixup the flags on the single remaining edge. */
217238fd1498Szrj edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
217338fd1498Szrj fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
217438fd1498Szrj fix_e->flags |= EDGE_FALLTHRU;
217538fd1498Szrj
217638fd1498Szrj edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
217738fd1498Szrj
217838fd1498Szrj if (e)
217938fd1498Szrj {
218038fd1498Szrj rescan_loop_exit (e, true, false);
218138fd1498Szrj e->probability = profile_probability::always ();
218238fd1498Szrj }
218338fd1498Szrj
218438fd1498Szrj /* Redirect the entry and add the phi node arguments. */
218538fd1498Szrj if (entry->dest == loop->header)
218638fd1498Szrj mark_loop_for_removal (loop);
218738fd1498Szrj redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
218838fd1498Szrj gcc_assert (redirected != NULL);
218938fd1498Szrj flush_pending_stmts (entry);
219038fd1498Szrj
219138fd1498Szrj /* Add the other PHI node arguments. */
219238fd1498Szrj add_phi_args_after_copy (region_copy, n_region, NULL);
219338fd1498Szrj
219438fd1498Szrj free (region_copy);
219538fd1498Szrj
219638fd1498Szrj free_original_copy_tables ();
219738fd1498Szrj return true;
219838fd1498Szrj }
219938fd1498Szrj
220038fd1498Szrj /* Return true when PATH is a valid jump-thread path. */
220138fd1498Szrj
220238fd1498Szrj static bool
valid_jump_thread_path(vec<jump_thread_edge * > * path)220338fd1498Szrj valid_jump_thread_path (vec<jump_thread_edge *> *path)
220438fd1498Szrj {
220538fd1498Szrj unsigned len = path->length ();
220638fd1498Szrj
220738fd1498Szrj /* Check that the path is connected. */
220838fd1498Szrj for (unsigned int j = 0; j < len - 1; j++)
220938fd1498Szrj {
221038fd1498Szrj edge e = (*path)[j]->e;
221138fd1498Szrj if (e->dest != (*path)[j+1]->e->src)
221238fd1498Szrj return false;
221338fd1498Szrj }
221438fd1498Szrj return true;
221538fd1498Szrj }
221638fd1498Szrj
221738fd1498Szrj /* Remove any queued jump threads that include edge E.
221838fd1498Szrj
221938fd1498Szrj We don't actually remove them here, just record the edges into ax
222038fd1498Szrj hash table. That way we can do the search once per iteration of
222138fd1498Szrj DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
222238fd1498Szrj
222338fd1498Szrj void
remove_jump_threads_including(edge_def * e)222438fd1498Szrj remove_jump_threads_including (edge_def *e)
222538fd1498Szrj {
222638fd1498Szrj if (!paths.exists ())
222738fd1498Szrj return;
222838fd1498Szrj
222938fd1498Szrj if (!removed_edges)
223038fd1498Szrj removed_edges = new hash_table<struct removed_edges> (17);
223138fd1498Szrj
223238fd1498Szrj edge *slot = removed_edges->find_slot (e, INSERT);
223338fd1498Szrj *slot = e;
223438fd1498Szrj }
223538fd1498Szrj
223638fd1498Szrj /* Walk through all blocks and thread incoming edges to the appropriate
223738fd1498Szrj outgoing edge for each edge pair recorded in THREADED_EDGES.
223838fd1498Szrj
223938fd1498Szrj It is the caller's responsibility to fix the dominance information
224038fd1498Szrj and rewrite duplicated SSA_NAMEs back into SSA form.
224138fd1498Szrj
224238fd1498Szrj If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
224338fd1498Szrj loop headers if it does not simplify the loop.
224438fd1498Szrj
224538fd1498Szrj Returns true if one or more edges were threaded, false otherwise. */
224638fd1498Szrj
224738fd1498Szrj bool
thread_through_all_blocks(bool may_peel_loop_headers)224838fd1498Szrj thread_through_all_blocks (bool may_peel_loop_headers)
224938fd1498Szrj {
225038fd1498Szrj bool retval = false;
225138fd1498Szrj unsigned int i;
225238fd1498Szrj struct loop *loop;
225338fd1498Szrj auto_bitmap threaded_blocks;
225438fd1498Szrj
225538fd1498Szrj if (!paths.exists ())
225638fd1498Szrj {
225738fd1498Szrj retval = false;
225838fd1498Szrj goto out;
225938fd1498Szrj }
226038fd1498Szrj
226138fd1498Szrj memset (&thread_stats, 0, sizeof (thread_stats));
226238fd1498Szrj
226338fd1498Szrj /* Remove any paths that referenced removed edges. */
226438fd1498Szrj if (removed_edges)
226538fd1498Szrj for (i = 0; i < paths.length (); )
226638fd1498Szrj {
226738fd1498Szrj unsigned int j;
226838fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
226938fd1498Szrj
227038fd1498Szrj for (j = 0; j < path->length (); j++)
227138fd1498Szrj {
227238fd1498Szrj edge e = (*path)[j]->e;
227338fd1498Szrj if (removed_edges->find_slot (e, NO_INSERT))
227438fd1498Szrj break;
227538fd1498Szrj }
227638fd1498Szrj
227738fd1498Szrj if (j != path->length ())
227838fd1498Szrj {
227938fd1498Szrj delete_jump_thread_path (path);
228038fd1498Szrj paths.unordered_remove (i);
228138fd1498Szrj continue;
228238fd1498Szrj }
228338fd1498Szrj i++;
228438fd1498Szrj }
228538fd1498Szrj
228638fd1498Szrj /* Jump-thread all FSM threads before other jump-threads. */
228738fd1498Szrj for (i = 0; i < paths.length ();)
228838fd1498Szrj {
228938fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
229038fd1498Szrj edge entry = (*path)[0]->e;
229138fd1498Szrj
229238fd1498Szrj /* Only code-generate FSM jump-threads in this loop. */
229338fd1498Szrj if ((*path)[0]->type != EDGE_FSM_THREAD)
229438fd1498Szrj {
229538fd1498Szrj i++;
229638fd1498Szrj continue;
229738fd1498Szrj }
229838fd1498Szrj
229938fd1498Szrj /* Do not jump-thread twice from the same block. */
230038fd1498Szrj if (bitmap_bit_p (threaded_blocks, entry->src->index)
230138fd1498Szrj /* We may not want to realize this jump thread path
230238fd1498Szrj for various reasons. So check it first. */
230338fd1498Szrj || !valid_jump_thread_path (path))
230438fd1498Szrj {
230538fd1498Szrj /* Remove invalid FSM jump-thread paths. */
230638fd1498Szrj delete_jump_thread_path (path);
230738fd1498Szrj paths.unordered_remove (i);
230838fd1498Szrj continue;
230938fd1498Szrj }
231038fd1498Szrj
231138fd1498Szrj unsigned len = path->length ();
231238fd1498Szrj edge exit = (*path)[len - 1]->e;
231338fd1498Szrj basic_block *region = XNEWVEC (basic_block, len - 1);
231438fd1498Szrj
231538fd1498Szrj for (unsigned int j = 0; j < len - 1; j++)
231638fd1498Szrj region[j] = (*path)[j]->e->dest;
231738fd1498Szrj
231838fd1498Szrj if (duplicate_thread_path (entry, exit, region, len - 1))
231938fd1498Szrj {
232038fd1498Szrj /* We do not update dominance info. */
232138fd1498Szrj free_dominance_info (CDI_DOMINATORS);
232238fd1498Szrj bitmap_set_bit (threaded_blocks, entry->src->index);
232338fd1498Szrj retval = true;
232438fd1498Szrj thread_stats.num_threaded_edges++;
232538fd1498Szrj }
232638fd1498Szrj
232738fd1498Szrj delete_jump_thread_path (path);
232838fd1498Szrj paths.unordered_remove (i);
232938fd1498Szrj free (region);
233038fd1498Szrj }
233138fd1498Szrj
233238fd1498Szrj /* Remove from PATHS all the jump-threads starting with an edge already
233338fd1498Szrj jump-threaded. */
233438fd1498Szrj for (i = 0; i < paths.length ();)
233538fd1498Szrj {
233638fd1498Szrj vec<jump_thread_edge *> *path = paths[i];
233738fd1498Szrj edge entry = (*path)[0]->e;
233838fd1498Szrj
233938fd1498Szrj /* Do not jump-thread twice from the same block. */
234038fd1498Szrj if (bitmap_bit_p (threaded_blocks, entry->src->index))
234138fd1498Szrj {
234238fd1498Szrj delete_jump_thread_path (path);
234338fd1498Szrj paths.unordered_remove (i);
234438fd1498Szrj }
234538fd1498Szrj else
234638fd1498Szrj i++;
234738fd1498Szrj }
234838fd1498Szrj
234938fd1498Szrj bitmap_clear (threaded_blocks);
235038fd1498Szrj
235138fd1498Szrj mark_threaded_blocks (threaded_blocks);
235238fd1498Szrj
235338fd1498Szrj initialize_original_copy_tables ();
235438fd1498Szrj
235538fd1498Szrj /* The order in which we process jump threads can be important.
235638fd1498Szrj
235738fd1498Szrj Consider if we have two jump threading paths A and B. If the
235838fd1498Szrj target edge of A is the starting edge of B and we thread path A
235938fd1498Szrj first, then we create an additional incoming edge into B->dest that
236038fd1498Szrj we can not discover as a jump threading path on this iteration.
236138fd1498Szrj
236238fd1498Szrj If we instead thread B first, then the edge into B->dest will have
236338fd1498Szrj already been redirected before we process path A and path A will
236438fd1498Szrj natually, with no further work, target the redirected path for B.
236538fd1498Szrj
236638fd1498Szrj An post-order is sufficient here. Compute the ordering first, then
236738fd1498Szrj process the blocks. */
236838fd1498Szrj if (!bitmap_empty_p (threaded_blocks))
236938fd1498Szrj {
237038fd1498Szrj int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
237138fd1498Szrj unsigned int postorder_num = post_order_compute (postorder, false, false);
237238fd1498Szrj for (unsigned int i = 0; i < postorder_num; i++)
237338fd1498Szrj {
237438fd1498Szrj unsigned int indx = postorder[i];
237538fd1498Szrj if (bitmap_bit_p (threaded_blocks, indx))
237638fd1498Szrj {
237738fd1498Szrj basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
237838fd1498Szrj retval |= thread_block (bb, true);
237938fd1498Szrj }
238038fd1498Szrj }
238138fd1498Szrj free (postorder);
238238fd1498Szrj }
238338fd1498Szrj
238438fd1498Szrj /* Then perform the threading through loop headers. We start with the
238538fd1498Szrj innermost loop, so that the changes in cfg we perform won't affect
238638fd1498Szrj further threading. */
238738fd1498Szrj FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
238838fd1498Szrj {
238938fd1498Szrj if (!loop->header
239038fd1498Szrj || !bitmap_bit_p (threaded_blocks, loop->header->index))
239138fd1498Szrj continue;
239238fd1498Szrj
239338fd1498Szrj retval |= thread_through_loop_header (loop, may_peel_loop_headers);
239438fd1498Szrj }
239538fd1498Szrj
239638fd1498Szrj /* All jump threading paths should have been resolved at this
239738fd1498Szrj point. Verify that is the case. */
239838fd1498Szrj basic_block bb;
239938fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
240038fd1498Szrj {
240138fd1498Szrj edge_iterator ei;
240238fd1498Szrj edge e;
240338fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
240438fd1498Szrj gcc_assert (e->aux == NULL);
240538fd1498Szrj }
240638fd1498Szrj
240738fd1498Szrj statistics_counter_event (cfun, "Jumps threaded",
240838fd1498Szrj thread_stats.num_threaded_edges);
240938fd1498Szrj
241038fd1498Szrj free_original_copy_tables ();
241138fd1498Szrj
241238fd1498Szrj paths.release ();
241338fd1498Szrj
241438fd1498Szrj if (retval)
241538fd1498Szrj loops_state_set (LOOPS_NEED_FIXUP);
241638fd1498Szrj
241738fd1498Szrj out:
241838fd1498Szrj delete removed_edges;
241938fd1498Szrj removed_edges = NULL;
242038fd1498Szrj return retval;
242138fd1498Szrj }
242238fd1498Szrj
242338fd1498Szrj /* Delete the jump threading path PATH. We have to explicitly delete
242438fd1498Szrj each entry in the vector, then the container. */
242538fd1498Szrj
242638fd1498Szrj void
delete_jump_thread_path(vec<jump_thread_edge * > * path)242738fd1498Szrj delete_jump_thread_path (vec<jump_thread_edge *> *path)
242838fd1498Szrj {
242938fd1498Szrj for (unsigned int i = 0; i < path->length (); i++)
243038fd1498Szrj delete (*path)[i];
243138fd1498Szrj path->release();
243238fd1498Szrj delete path;
243338fd1498Szrj }
243438fd1498Szrj
243538fd1498Szrj /* Register a jump threading opportunity. We queue up all the jump
243638fd1498Szrj threading opportunities discovered by a pass and update the CFG
243738fd1498Szrj and SSA form all at once.
243838fd1498Szrj
243938fd1498Szrj E is the edge we can thread, E2 is the new target edge, i.e., we
244038fd1498Szrj are effectively recording that E->dest can be changed to E2->dest
244138fd1498Szrj after fixing the SSA graph. */
244238fd1498Szrj
244338fd1498Szrj void
register_jump_thread(vec<jump_thread_edge * > * path)244438fd1498Szrj register_jump_thread (vec<jump_thread_edge *> *path)
244538fd1498Szrj {
244638fd1498Szrj if (!dbg_cnt (registered_jump_thread))
244738fd1498Szrj {
244838fd1498Szrj delete_jump_thread_path (path);
244938fd1498Szrj return;
245038fd1498Szrj }
245138fd1498Szrj
245238fd1498Szrj /* First make sure there are no NULL outgoing edges on the jump threading
245338fd1498Szrj path. That can happen for jumping to a constant address. */
245438fd1498Szrj for (unsigned int i = 0; i < path->length (); i++)
245538fd1498Szrj {
245638fd1498Szrj if ((*path)[i]->e == NULL)
245738fd1498Szrj {
245838fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
245938fd1498Szrj {
246038fd1498Szrj fprintf (dump_file,
246138fd1498Szrj "Found NULL edge in jump threading path. Cancelling jump thread:\n");
246238fd1498Szrj dump_jump_thread_path (dump_file, *path, false);
246338fd1498Szrj }
246438fd1498Szrj
246538fd1498Szrj delete_jump_thread_path (path);
246638fd1498Szrj return;
246738fd1498Szrj }
246838fd1498Szrj
246938fd1498Szrj /* Only the FSM threader is allowed to thread across
247038fd1498Szrj backedges in the CFG. */
247138fd1498Szrj if (flag_checking
247238fd1498Szrj && (*path)[0]->type != EDGE_FSM_THREAD)
247338fd1498Szrj gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0);
247438fd1498Szrj }
247538fd1498Szrj
247638fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
247738fd1498Szrj dump_jump_thread_path (dump_file, *path, true);
247838fd1498Szrj
247938fd1498Szrj if (!paths.exists ())
248038fd1498Szrj paths.create (5);
248138fd1498Szrj
248238fd1498Szrj paths.safe_push (path);
248338fd1498Szrj }
248438fd1498Szrj
248538fd1498Szrj /* Return how many uses of T there are within BB, as long as there
248638fd1498Szrj aren't any uses outside BB. If there are any uses outside BB,
248738fd1498Szrj return -1 if there's at most one use within BB, or -2 if there is
248838fd1498Szrj more than one use within BB. */
248938fd1498Szrj
249038fd1498Szrj static int
uses_in_bb(tree t,basic_block bb)249138fd1498Szrj uses_in_bb (tree t, basic_block bb)
249238fd1498Szrj {
249338fd1498Szrj int uses = 0;
249438fd1498Szrj bool outside_bb = false;
249538fd1498Szrj
249638fd1498Szrj imm_use_iterator iter;
249738fd1498Szrj use_operand_p use_p;
249838fd1498Szrj FOR_EACH_IMM_USE_FAST (use_p, iter, t)
249938fd1498Szrj {
250038fd1498Szrj if (is_gimple_debug (USE_STMT (use_p)))
250138fd1498Szrj continue;
250238fd1498Szrj
250338fd1498Szrj if (gimple_bb (USE_STMT (use_p)) != bb)
250438fd1498Szrj outside_bb = true;
250538fd1498Szrj else
250638fd1498Szrj uses++;
250738fd1498Szrj
250838fd1498Szrj if (outside_bb && uses > 1)
250938fd1498Szrj return -2;
251038fd1498Szrj }
251138fd1498Szrj
251238fd1498Szrj if (outside_bb)
251338fd1498Szrj return -1;
251438fd1498Szrj
251538fd1498Szrj return uses;
251638fd1498Szrj }
251738fd1498Szrj
251838fd1498Szrj /* Starting from the final control flow stmt in BB, assuming it will
251938fd1498Szrj be removed, follow uses in to-be-removed stmts back to their defs
252038fd1498Szrj and count how many defs are to become dead and be removed as
252138fd1498Szrj well. */
252238fd1498Szrj
252338fd1498Szrj unsigned int
estimate_threading_killed_stmts(basic_block bb)252438fd1498Szrj estimate_threading_killed_stmts (basic_block bb)
252538fd1498Szrj {
252638fd1498Szrj int killed_stmts = 0;
252738fd1498Szrj hash_map<tree, int> ssa_remaining_uses;
252838fd1498Szrj auto_vec<gimple *, 4> dead_worklist;
252938fd1498Szrj
253038fd1498Szrj /* If the block has only two predecessors, threading will turn phi
253138fd1498Szrj dsts into either src, so count them as dead stmts. */
253238fd1498Szrj bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
253338fd1498Szrj
253438fd1498Szrj if (drop_all_phis)
253538fd1498Szrj for (gphi_iterator gsi = gsi_start_phis (bb);
253638fd1498Szrj !gsi_end_p (gsi); gsi_next (&gsi))
253738fd1498Szrj {
253838fd1498Szrj gphi *phi = gsi.phi ();
253938fd1498Szrj tree dst = gimple_phi_result (phi);
254038fd1498Szrj
254138fd1498Szrj /* We don't count virtual PHIs as stmts in
254238fd1498Szrj record_temporary_equivalences_from_phis. */
254338fd1498Szrj if (virtual_operand_p (dst))
254438fd1498Szrj continue;
254538fd1498Szrj
254638fd1498Szrj killed_stmts++;
254738fd1498Szrj }
254838fd1498Szrj
254938fd1498Szrj if (gsi_end_p (gsi_last_bb (bb)))
255038fd1498Szrj return killed_stmts;
255138fd1498Szrj
255238fd1498Szrj gimple *stmt = gsi_stmt (gsi_last_bb (bb));
255338fd1498Szrj if (gimple_code (stmt) != GIMPLE_COND
255438fd1498Szrj && gimple_code (stmt) != GIMPLE_GOTO
255538fd1498Szrj && gimple_code (stmt) != GIMPLE_SWITCH)
255638fd1498Szrj return killed_stmts;
255738fd1498Szrj
255838fd1498Szrj /* The control statement is always dead. */
255938fd1498Szrj killed_stmts++;
256038fd1498Szrj dead_worklist.quick_push (stmt);
256138fd1498Szrj while (!dead_worklist.is_empty ())
256238fd1498Szrj {
256338fd1498Szrj stmt = dead_worklist.pop ();
256438fd1498Szrj
256538fd1498Szrj ssa_op_iter iter;
256638fd1498Szrj use_operand_p use_p;
256738fd1498Szrj FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
256838fd1498Szrj {
256938fd1498Szrj tree t = USE_FROM_PTR (use_p);
257038fd1498Szrj gimple *def = SSA_NAME_DEF_STMT (t);
257138fd1498Szrj
257238fd1498Szrj if (gimple_bb (def) == bb
257338fd1498Szrj && (gimple_code (def) != GIMPLE_PHI
257438fd1498Szrj || !drop_all_phis)
257538fd1498Szrj && !gimple_has_side_effects (def))
257638fd1498Szrj {
257738fd1498Szrj int *usesp = ssa_remaining_uses.get (t);
257838fd1498Szrj int uses;
257938fd1498Szrj
258038fd1498Szrj if (usesp)
258138fd1498Szrj uses = *usesp;
258238fd1498Szrj else
258338fd1498Szrj uses = uses_in_bb (t, bb);
258438fd1498Szrj
258538fd1498Szrj gcc_assert (uses);
258638fd1498Szrj
258738fd1498Szrj /* Don't bother recording the expected use count if we
258838fd1498Szrj won't find any further uses within BB. */
258938fd1498Szrj if (!usesp && (uses < -1 || uses > 1))
259038fd1498Szrj {
259138fd1498Szrj usesp = &ssa_remaining_uses.get_or_insert (t);
259238fd1498Szrj *usesp = uses;
259338fd1498Szrj }
259438fd1498Szrj
259538fd1498Szrj if (uses < 0)
259638fd1498Szrj continue;
259738fd1498Szrj
259838fd1498Szrj --uses;
259938fd1498Szrj if (usesp)
260038fd1498Szrj *usesp = uses;
260138fd1498Szrj
260238fd1498Szrj if (!uses)
260338fd1498Szrj {
260438fd1498Szrj killed_stmts++;
260538fd1498Szrj if (usesp)
260638fd1498Szrj ssa_remaining_uses.remove (t);
260738fd1498Szrj if (gimple_code (def) != GIMPLE_PHI)
260838fd1498Szrj dead_worklist.safe_push (def);
260938fd1498Szrj }
261038fd1498Szrj }
261138fd1498Szrj }
261238fd1498Szrj }
261338fd1498Szrj
261438fd1498Szrj if (dump_file)
261538fd1498Szrj fprintf (dump_file, "threading bb %i kills %i stmts\n",
261638fd1498Szrj bb->index, killed_stmts);
261738fd1498Szrj
261838fd1498Szrj return killed_stmts;
261938fd1498Szrj }
2620