138fd1498Szrj /* The tracer pass for the GNU compiler.
238fd1498Szrj Contributed by Jan Hubicka, SuSE Labs.
338fd1498Szrj Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
438fd1498Szrj Copyright (C) 2001-2018 Free Software Foundation, Inc.
538fd1498Szrj
638fd1498Szrj This file is part of GCC.
738fd1498Szrj
838fd1498Szrj GCC is free software; you can redistribute it and/or modify it
938fd1498Szrj under the terms of the GNU General Public License as published by
1038fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
1138fd1498Szrj any later version.
1238fd1498Szrj
1338fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
1438fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1538fd1498Szrj or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
1638fd1498Szrj License for more details.
1738fd1498Szrj
1838fd1498Szrj You should have received a copy of the GNU General Public License
1938fd1498Szrj along with GCC; see the file COPYING3. If not see
2038fd1498Szrj <http://www.gnu.org/licenses/>. */
2138fd1498Szrj
2238fd1498Szrj /* This pass performs the tail duplication needed for superblock formation.
2338fd1498Szrj For more information see:
2438fd1498Szrj
2538fd1498Szrj Design and Analysis of Profile-Based Optimization in Compaq's
2638fd1498Szrj Compilation Tools for Alpha; Journal of Instruction-Level
2738fd1498Szrj Parallelism 3 (2000) 1-25
2838fd1498Szrj
2938fd1498Szrj Unlike Compaq's implementation we don't do the loop peeling as most
3038fd1498Szrj probably a better job can be done by a special pass and we don't
3138fd1498Szrj need to worry too much about the code size implications as the tail
3238fd1498Szrj duplicates are crossjumped again if optimizations are not
3338fd1498Szrj performed. */
3438fd1498Szrj
3538fd1498Szrj
3638fd1498Szrj #include "config.h"
3738fd1498Szrj #include "system.h"
3838fd1498Szrj #include "coretypes.h"
3938fd1498Szrj #include "backend.h"
4038fd1498Szrj #include "rtl.h"
4138fd1498Szrj #include "tree.h"
4238fd1498Szrj #include "gimple.h"
4338fd1498Szrj #include "cfghooks.h"
4438fd1498Szrj #include "tree-pass.h"
4538fd1498Szrj #include "profile.h"
4638fd1498Szrj #include "cfganal.h"
4738fd1498Szrj #include "params.h"
4838fd1498Szrj #include "gimple-iterator.h"
4938fd1498Szrj #include "tree-cfg.h"
5038fd1498Szrj #include "tree-ssa.h"
5138fd1498Szrj #include "tree-inline.h"
5238fd1498Szrj #include "cfgloop.h"
5338fd1498Szrj #include "fibonacci_heap.h"
5438fd1498Szrj #include "tracer.h"
5538fd1498Szrj
5638fd1498Szrj static int count_insns (basic_block);
5738fd1498Szrj static bool better_p (const_edge, const_edge);
5838fd1498Szrj static edge find_best_successor (basic_block);
5938fd1498Szrj static edge find_best_predecessor (basic_block);
6038fd1498Szrj static int find_trace (basic_block, basic_block *);
6138fd1498Szrj
6238fd1498Szrj /* Minimal outgoing edge probability considered for superblock formation. */
6338fd1498Szrj static int probability_cutoff;
6438fd1498Szrj static int branch_ratio_cutoff;
6538fd1498Szrj
6638fd1498Szrj /* A bit BB->index is set if BB has already been seen, i.e. it is
6738fd1498Szrj connected to some trace already. */
6838fd1498Szrj static sbitmap bb_seen;
6938fd1498Szrj
7038fd1498Szrj static inline void
mark_bb_seen(basic_block bb)7138fd1498Szrj mark_bb_seen (basic_block bb)
7238fd1498Szrj {
7338fd1498Szrj unsigned int size = SBITMAP_SIZE (bb_seen);
7438fd1498Szrj
7538fd1498Szrj if ((unsigned int)bb->index >= size)
7638fd1498Szrj bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
7738fd1498Szrj
7838fd1498Szrj bitmap_set_bit (bb_seen, bb->index);
7938fd1498Szrj }
8038fd1498Szrj
8138fd1498Szrj static inline bool
bb_seen_p(basic_block bb)8238fd1498Szrj bb_seen_p (basic_block bb)
8338fd1498Szrj {
8438fd1498Szrj return bitmap_bit_p (bb_seen, bb->index);
8538fd1498Szrj }
8638fd1498Szrj
8738fd1498Szrj /* Return true if we should ignore the basic block for purposes of tracing. */
8838fd1498Szrj bool
ignore_bb_p(const_basic_block bb)8938fd1498Szrj ignore_bb_p (const_basic_block bb)
9038fd1498Szrj {
9138fd1498Szrj if (bb->index < NUM_FIXED_BLOCKS)
9238fd1498Szrj return true;
9338fd1498Szrj if (optimize_bb_for_size_p (bb))
9438fd1498Szrj return true;
9538fd1498Szrj
9638fd1498Szrj if (gimple *g = last_stmt (CONST_CAST_BB (bb)))
9738fd1498Szrj {
9838fd1498Szrj /* A transaction is a single entry multiple exit region. It
9938fd1498Szrj must be duplicated in its entirety or not at all. */
10038fd1498Szrj if (gimple_code (g) == GIMPLE_TRANSACTION)
10138fd1498Szrj return true;
10238fd1498Szrj
10338fd1498Szrj /* An IFN_UNIQUE call must be duplicated as part of its group,
10438fd1498Szrj or not at all. */
10538fd1498Szrj if (is_gimple_call (g)
10638fd1498Szrj && gimple_call_internal_p (g)
10738fd1498Szrj && gimple_call_internal_unique_p (g))
10838fd1498Szrj return true;
10938fd1498Szrj }
11038fd1498Szrj
11138fd1498Szrj return false;
11238fd1498Szrj }
11338fd1498Szrj
11438fd1498Szrj /* Return number of instructions in the block. */
11538fd1498Szrj
11638fd1498Szrj static int
count_insns(basic_block bb)11738fd1498Szrj count_insns (basic_block bb)
11838fd1498Szrj {
11938fd1498Szrj gimple_stmt_iterator gsi;
12038fd1498Szrj gimple *stmt;
12138fd1498Szrj int n = 0;
12238fd1498Szrj
12338fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
12438fd1498Szrj {
12538fd1498Szrj stmt = gsi_stmt (gsi);
12638fd1498Szrj n += estimate_num_insns (stmt, &eni_size_weights);
12738fd1498Szrj }
12838fd1498Szrj return n;
12938fd1498Szrj }
13038fd1498Szrj
13138fd1498Szrj /* Return true if E1 is more frequent than E2. */
13238fd1498Szrj static bool
better_p(const_edge e1,const_edge e2)13338fd1498Szrj better_p (const_edge e1, const_edge e2)
13438fd1498Szrj {
135*58e805e6Szrj if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
13638fd1498Szrj return e1->count () > e2->count ();
13738fd1498Szrj /* This is needed to avoid changes in the decision after
13838fd1498Szrj CFG is modified. */
13938fd1498Szrj if (e1->src != e2->src)
14038fd1498Szrj return e1->src->index > e2->src->index;
14138fd1498Szrj return e1->dest->index > e2->dest->index;
14238fd1498Szrj }
14338fd1498Szrj
14438fd1498Szrj /* Return most frequent successor of basic block BB. */
14538fd1498Szrj
14638fd1498Szrj static edge
find_best_successor(basic_block bb)14738fd1498Szrj find_best_successor (basic_block bb)
14838fd1498Szrj {
14938fd1498Szrj edge e;
15038fd1498Szrj edge best = NULL;
15138fd1498Szrj edge_iterator ei;
15238fd1498Szrj
15338fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
154*58e805e6Szrj {
155*58e805e6Szrj if (!e->count ().initialized_p ())
156*58e805e6Szrj return NULL;
15738fd1498Szrj if (!best || better_p (e, best))
15838fd1498Szrj best = e;
159*58e805e6Szrj }
16038fd1498Szrj if (!best || ignore_bb_p (best->dest))
16138fd1498Szrj return NULL;
162*58e805e6Szrj if (!best->probability.initialized_p ()
163*58e805e6Szrj || best->probability.to_reg_br_prob_base () <= probability_cutoff)
16438fd1498Szrj return NULL;
16538fd1498Szrj return best;
16638fd1498Szrj }
16738fd1498Szrj
16838fd1498Szrj /* Return most frequent predecessor of basic block BB. */
16938fd1498Szrj
17038fd1498Szrj static edge
find_best_predecessor(basic_block bb)17138fd1498Szrj find_best_predecessor (basic_block bb)
17238fd1498Szrj {
17338fd1498Szrj edge e;
17438fd1498Szrj edge best = NULL;
17538fd1498Szrj edge_iterator ei;
17638fd1498Szrj
17738fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
178*58e805e6Szrj {
179*58e805e6Szrj if (!e->count ().initialized_p ())
180*58e805e6Szrj return NULL;
18138fd1498Szrj if (!best || better_p (e, best))
18238fd1498Szrj best = e;
183*58e805e6Szrj }
18438fd1498Szrj if (!best || ignore_bb_p (best->src))
18538fd1498Szrj return NULL;
186*58e805e6Szrj if (bb->count.initialized_p ()
187*58e805e6Szrj && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
188*58e805e6Szrj < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
18938fd1498Szrj return NULL;
19038fd1498Szrj return best;
19138fd1498Szrj }
19238fd1498Szrj
19338fd1498Szrj /* Find the trace using bb and record it in the TRACE array.
19438fd1498Szrj Return number of basic blocks recorded. */
19538fd1498Szrj
19638fd1498Szrj static int
find_trace(basic_block bb,basic_block * trace)19738fd1498Szrj find_trace (basic_block bb, basic_block *trace)
19838fd1498Szrj {
19938fd1498Szrj int i = 0;
20038fd1498Szrj edge e;
20138fd1498Szrj
20238fd1498Szrj if (dump_file)
20338fd1498Szrj fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
20438fd1498Szrj
20538fd1498Szrj while ((e = find_best_predecessor (bb)) != NULL)
20638fd1498Szrj {
20738fd1498Szrj basic_block bb2 = e->src;
20838fd1498Szrj if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
20938fd1498Szrj || find_best_successor (bb2) != e)
21038fd1498Szrj break;
21138fd1498Szrj if (dump_file)
21238fd1498Szrj fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
21338fd1498Szrj bb = bb2;
21438fd1498Szrj }
21538fd1498Szrj if (dump_file)
21638fd1498Szrj fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
21738fd1498Szrj trace[i++] = bb;
21838fd1498Szrj
21938fd1498Szrj /* Follow the trace in forward direction. */
22038fd1498Szrj while ((e = find_best_successor (bb)) != NULL)
22138fd1498Szrj {
22238fd1498Szrj bb = e->dest;
22338fd1498Szrj if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
22438fd1498Szrj || find_best_predecessor (bb) != e)
22538fd1498Szrj break;
22638fd1498Szrj if (dump_file)
22738fd1498Szrj fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
22838fd1498Szrj trace[i++] = bb;
22938fd1498Szrj }
23038fd1498Szrj if (dump_file)
23138fd1498Szrj fprintf (dump_file, "\n");
23238fd1498Szrj return i;
23338fd1498Szrj }
23438fd1498Szrj
23538fd1498Szrj /* Duplicate block BB2, placing it after BB in the CFG. Return the
23638fd1498Szrj newly created block. */
23738fd1498Szrj basic_block
transform_duplicate(basic_block bb,basic_block bb2)23838fd1498Szrj transform_duplicate (basic_block bb, basic_block bb2)
23938fd1498Szrj {
24038fd1498Szrj edge e;
24138fd1498Szrj basic_block copy;
24238fd1498Szrj
24338fd1498Szrj e = find_edge (bb, bb2);
24438fd1498Szrj
24538fd1498Szrj copy = duplicate_block (bb2, e, bb);
24638fd1498Szrj flush_pending_stmts (e);
24738fd1498Szrj
24838fd1498Szrj add_phi_args_after_copy (©, 1, NULL);
24938fd1498Szrj
25038fd1498Szrj return (copy);
25138fd1498Szrj }
25238fd1498Szrj
25338fd1498Szrj /* Look for basic blocks in frequency order, construct traces and tail duplicate
25438fd1498Szrj if profitable. */
25538fd1498Szrj
25638fd1498Szrj static bool
tail_duplicate(void)25738fd1498Szrj tail_duplicate (void)
25838fd1498Szrj {
25938fd1498Szrj auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
26038fd1498Szrj blocks.safe_grow_cleared (last_basic_block_for_fn (cfun));
26138fd1498Szrj
26238fd1498Szrj basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
26338fd1498Szrj int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
26438fd1498Szrj int ninsns = 0, nduplicated = 0;
26538fd1498Szrj gcov_type weighted_insns = 0, traced_insns = 0;
26638fd1498Szrj fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
26738fd1498Szrj gcov_type cover_insns;
26838fd1498Szrj int max_dup_insns;
26938fd1498Szrj basic_block bb;
27038fd1498Szrj bool changed = false;
27138fd1498Szrj
27238fd1498Szrj /* Create an oversized sbitmap to reduce the chance that we need to
27338fd1498Szrj resize it. */
27438fd1498Szrj bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
27538fd1498Szrj bitmap_clear (bb_seen);
27638fd1498Szrj initialize_original_copy_tables ();
27738fd1498Szrj
27838fd1498Szrj if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
27938fd1498Szrj probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
28038fd1498Szrj else
28138fd1498Szrj probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
28238fd1498Szrj probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
28338fd1498Szrj
28438fd1498Szrj branch_ratio_cutoff =
28538fd1498Szrj (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
28638fd1498Szrj
28738fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
28838fd1498Szrj {
28938fd1498Szrj int n = count_insns (bb);
29038fd1498Szrj if (!ignore_bb_p (bb))
29138fd1498Szrj blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
29238fd1498Szrj
29338fd1498Szrj counts [bb->index] = n;
29438fd1498Szrj ninsns += n;
29538fd1498Szrj weighted_insns += n * bb->count.to_frequency (cfun);
29638fd1498Szrj }
29738fd1498Szrj
29838fd1498Szrj if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
29938fd1498Szrj cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
30038fd1498Szrj else
30138fd1498Szrj cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
30238fd1498Szrj cover_insns = (weighted_insns * cover_insns + 50) / 100;
30338fd1498Szrj max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
30438fd1498Szrj
30538fd1498Szrj while (traced_insns < cover_insns && nduplicated < max_dup_insns
30638fd1498Szrj && !heap.empty ())
30738fd1498Szrj {
30838fd1498Szrj basic_block bb = heap.extract_min ();
30938fd1498Szrj int n, pos;
31038fd1498Szrj
31138fd1498Szrj if (!bb)
31238fd1498Szrj break;
31338fd1498Szrj
31438fd1498Szrj blocks[bb->index] = NULL;
31538fd1498Szrj
31638fd1498Szrj if (ignore_bb_p (bb))
31738fd1498Szrj continue;
31838fd1498Szrj gcc_assert (!bb_seen_p (bb));
31938fd1498Szrj
32038fd1498Szrj n = find_trace (bb, trace);
32138fd1498Szrj
32238fd1498Szrj bb = trace[0];
32338fd1498Szrj traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
32438fd1498Szrj if (blocks[bb->index])
32538fd1498Szrj {
32638fd1498Szrj heap.delete_node (blocks[bb->index]);
32738fd1498Szrj blocks[bb->index] = NULL;
32838fd1498Szrj }
32938fd1498Szrj
33038fd1498Szrj for (pos = 1; pos < n; pos++)
33138fd1498Szrj {
33238fd1498Szrj basic_block bb2 = trace[pos];
33338fd1498Szrj
33438fd1498Szrj if (blocks[bb2->index])
33538fd1498Szrj {
33638fd1498Szrj heap.delete_node (blocks[bb2->index]);
33738fd1498Szrj blocks[bb2->index] = NULL;
33838fd1498Szrj }
33938fd1498Szrj traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
34038fd1498Szrj if (EDGE_COUNT (bb2->preds) > 1
34138fd1498Szrj && can_duplicate_block_p (bb2)
34238fd1498Szrj /* We have the tendency to duplicate the loop header
34338fd1498Szrj of all do { } while loops. Do not do that - it is
34438fd1498Szrj not profitable and it might create a loop with multiple
34538fd1498Szrj entries or at least rotate the loop. */
34638fd1498Szrj && bb2->loop_father->header != bb2)
34738fd1498Szrj {
34838fd1498Szrj nduplicated += counts [bb2->index];
34938fd1498Szrj basic_block copy = transform_duplicate (bb, bb2);
35038fd1498Szrj
35138fd1498Szrj /* Reconsider the original copy of block we've duplicated.
35238fd1498Szrj Removing the most common predecessor may make it to be
35338fd1498Szrj head. */
35438fd1498Szrj blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
35538fd1498Szrj
35638fd1498Szrj if (dump_file)
35738fd1498Szrj fprintf (dump_file, "Duplicated %i as %i [%i]\n",
35838fd1498Szrj bb2->index, copy->index, copy->count.to_frequency (cfun));
35938fd1498Szrj
36038fd1498Szrj bb2 = copy;
36138fd1498Szrj changed = true;
36238fd1498Szrj }
36338fd1498Szrj mark_bb_seen (bb2);
36438fd1498Szrj bb = bb2;
36538fd1498Szrj /* In case the trace became infrequent, stop duplicating. */
36638fd1498Szrj if (ignore_bb_p (bb))
36738fd1498Szrj break;
36838fd1498Szrj }
36938fd1498Szrj if (dump_file)
37038fd1498Szrj fprintf (dump_file, " covered now %.1f\n\n",
37138fd1498Szrj traced_insns * 100.0 / weighted_insns);
37238fd1498Szrj }
37338fd1498Szrj if (dump_file)
37438fd1498Szrj fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
37538fd1498Szrj nduplicated * 100 / ninsns);
37638fd1498Szrj
37738fd1498Szrj free_original_copy_tables ();
37838fd1498Szrj sbitmap_free (bb_seen);
37938fd1498Szrj free (trace);
38038fd1498Szrj free (counts);
38138fd1498Szrj
38238fd1498Szrj return changed;
38338fd1498Szrj }
38438fd1498Szrj
38538fd1498Szrj namespace {
38638fd1498Szrj
38738fd1498Szrj const pass_data pass_data_tracer =
38838fd1498Szrj {
38938fd1498Szrj GIMPLE_PASS, /* type */
39038fd1498Szrj "tracer", /* name */
39138fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
39238fd1498Szrj TV_TRACER, /* tv_id */
39338fd1498Szrj 0, /* properties_required */
39438fd1498Szrj 0, /* properties_provided */
39538fd1498Szrj 0, /* properties_destroyed */
39638fd1498Szrj 0, /* todo_flags_start */
39738fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
39838fd1498Szrj };
39938fd1498Szrj
40038fd1498Szrj class pass_tracer : public gimple_opt_pass
40138fd1498Szrj {
40238fd1498Szrj public:
pass_tracer(gcc::context * ctxt)40338fd1498Szrj pass_tracer (gcc::context *ctxt)
40438fd1498Szrj : gimple_opt_pass (pass_data_tracer, ctxt)
40538fd1498Szrj {}
40638fd1498Szrj
40738fd1498Szrj /* opt_pass methods: */
gate(function *)40838fd1498Szrj virtual bool gate (function *)
40938fd1498Szrj {
41038fd1498Szrj return (optimize > 0 && flag_tracer && flag_reorder_blocks);
41138fd1498Szrj }
41238fd1498Szrj
41338fd1498Szrj virtual unsigned int execute (function *);
41438fd1498Szrj
41538fd1498Szrj }; // class pass_tracer
41638fd1498Szrj
41738fd1498Szrj unsigned int
execute(function * fun)41838fd1498Szrj pass_tracer::execute (function *fun)
41938fd1498Szrj {
42038fd1498Szrj bool changed;
42138fd1498Szrj
42238fd1498Szrj if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
42338fd1498Szrj return 0;
42438fd1498Szrj
42538fd1498Szrj mark_dfs_back_edges ();
42638fd1498Szrj if (dump_file)
42738fd1498Szrj brief_dump_cfg (dump_file, dump_flags);
42838fd1498Szrj
42938fd1498Szrj /* Trace formation is done on the fly inside tail_duplicate */
43038fd1498Szrj changed = tail_duplicate ();
43138fd1498Szrj if (changed)
43238fd1498Szrj {
43338fd1498Szrj free_dominance_info (CDI_DOMINATORS);
43438fd1498Szrj /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */
43538fd1498Szrj loops_state_set (LOOPS_NEED_FIXUP);
43638fd1498Szrj }
43738fd1498Szrj
43838fd1498Szrj if (dump_file)
43938fd1498Szrj brief_dump_cfg (dump_file, dump_flags);
44038fd1498Szrj
44138fd1498Szrj return changed ? TODO_cleanup_cfg : 0;
44238fd1498Szrj }
44338fd1498Szrj } // anon namespace
44438fd1498Szrj
44538fd1498Szrj gimple_opt_pass *
make_pass_tracer(gcc::context * ctxt)44638fd1498Szrj make_pass_tracer (gcc::context *ctxt)
44738fd1498Szrj {
44838fd1498Szrj return new pass_tracer (ctxt);
44938fd1498Szrj }
450