138fd1498Szrj /* Inlining decision heuristics.
238fd1498Szrj Copyright (C) 2003-2018 Free Software Foundation, Inc.
338fd1498Szrj Contributed by Jan Hubicka
438fd1498Szrj
538fd1498Szrj This file is part of GCC.
638fd1498Szrj
738fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
838fd1498Szrj the terms of the GNU General Public License as published by the Free
938fd1498Szrj Software Foundation; either version 3, or (at your option) any later
1038fd1498Szrj version.
1138fd1498Szrj
1238fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1338fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1438fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1538fd1498Szrj for more details.
1638fd1498Szrj
1738fd1498Szrj You should have received a copy of the GNU General Public License
1838fd1498Szrj along with GCC; see the file COPYING3. If not see
1938fd1498Szrj <http://www.gnu.org/licenses/>. */
2038fd1498Szrj
2138fd1498Szrj /* Inlining decision heuristics
2238fd1498Szrj
2338fd1498Szrj The implementation of inliner is organized as follows:
2438fd1498Szrj
2538fd1498Szrj inlining heuristics limits
2638fd1498Szrj
2738fd1498Szrj can_inline_edge_p allow to check that particular inlining is allowed
2838fd1498Szrj by the limits specified by user (allowed function growth, growth and so
2938fd1498Szrj on).
3038fd1498Szrj
3138fd1498Szrj Functions are inlined when it is obvious the result is profitable (such
3238fd1498Szrj as functions called once or when inlining reduce code size).
3338fd1498Szrj In addition to that we perform inlining of small functions and recursive
3438fd1498Szrj inlining.
3538fd1498Szrj
3638fd1498Szrj inlining heuristics
3738fd1498Szrj
3838fd1498Szrj The inliner itself is split into two passes:
3938fd1498Szrj
4038fd1498Szrj pass_early_inlining
4138fd1498Szrj
4238fd1498Szrj Simple local inlining pass inlining callees into current function.
4338fd1498Szrj This pass makes no use of whole unit analysis and thus it can do only
4438fd1498Szrj very simple decisions based on local properties.
4538fd1498Szrj
4638fd1498Szrj The strength of the pass is that it is run in topological order
4738fd1498Szrj (reverse postorder) on the callgraph. Functions are converted into SSA
4838fd1498Szrj form just before this pass and optimized subsequently. As a result, the
4938fd1498Szrj callees of the function seen by the early inliner was already optimized
5038fd1498Szrj and results of early inlining adds a lot of optimization opportunities
5138fd1498Szrj for the local optimization.
5238fd1498Szrj
5338fd1498Szrj The pass handle the obvious inlining decisions within the compilation
5438fd1498Szrj unit - inlining auto inline functions, inlining for size and
5538fd1498Szrj flattening.
5638fd1498Szrj
5738fd1498Szrj main strength of the pass is the ability to eliminate abstraction
5838fd1498Szrj penalty in C++ code (via combination of inlining and early
5938fd1498Szrj optimization) and thus improve quality of analysis done by real IPA
6038fd1498Szrj optimizers.
6138fd1498Szrj
6238fd1498Szrj Because of lack of whole unit knowledge, the pass can not really make
6338fd1498Szrj good code size/performance tradeoffs. It however does very simple
6438fd1498Szrj speculative inlining allowing code size to grow by
6538fd1498Szrj EARLY_INLINING_INSNS when callee is leaf function. In this case the
6638fd1498Szrj optimizations performed later are very likely to eliminate the cost.
6738fd1498Szrj
6838fd1498Szrj pass_ipa_inline
6938fd1498Szrj
7038fd1498Szrj This is the real inliner able to handle inlining with whole program
7138fd1498Szrj knowledge. It performs following steps:
7238fd1498Szrj
7338fd1498Szrj 1) inlining of small functions. This is implemented by greedy
7438fd1498Szrj algorithm ordering all inlinable cgraph edges by their badness and
7538fd1498Szrj inlining them in this order as long as inline limits allows doing so.
7638fd1498Szrj
7738fd1498Szrj This heuristics is not very good on inlining recursive calls. Recursive
7838fd1498Szrj calls can be inlined with results similar to loop unrolling. To do so,
7938fd1498Szrj special purpose recursive inliner is executed on function when
8038fd1498Szrj recursive edge is met as viable candidate.
8138fd1498Szrj
8238fd1498Szrj 2) Unreachable functions are removed from callgraph. Inlining leads
8338fd1498Szrj to devirtualization and other modification of callgraph so functions
8438fd1498Szrj may become unreachable during the process. Also functions declared as
8538fd1498Szrj extern inline or virtual functions are removed, since after inlining
8638fd1498Szrj we no longer need the offline bodies.
8738fd1498Szrj
8838fd1498Szrj 3) Functions called once and not exported from the unit are inlined.
8938fd1498Szrj This should almost always lead to reduction of code size by eliminating
9038fd1498Szrj the need for offline copy of the function. */
9138fd1498Szrj
9238fd1498Szrj #include "config.h"
9338fd1498Szrj #include "system.h"
9438fd1498Szrj #include "coretypes.h"
9538fd1498Szrj #include "backend.h"
9638fd1498Szrj #include "target.h"
9738fd1498Szrj #include "rtl.h"
9838fd1498Szrj #include "tree.h"
9938fd1498Szrj #include "gimple.h"
10038fd1498Szrj #include "alloc-pool.h"
10138fd1498Szrj #include "tree-pass.h"
10238fd1498Szrj #include "gimple-ssa.h"
10338fd1498Szrj #include "cgraph.h"
10438fd1498Szrj #include "lto-streamer.h"
10538fd1498Szrj #include "trans-mem.h"
10638fd1498Szrj #include "calls.h"
10738fd1498Szrj #include "tree-inline.h"
10838fd1498Szrj #include "params.h"
10938fd1498Szrj #include "profile.h"
11038fd1498Szrj #include "symbol-summary.h"
11138fd1498Szrj #include "tree-vrp.h"
11238fd1498Szrj #include "ipa-prop.h"
11338fd1498Szrj #include "ipa-fnsummary.h"
11438fd1498Szrj #include "ipa-inline.h"
11538fd1498Szrj #include "ipa-utils.h"
11638fd1498Szrj #include "sreal.h"
11738fd1498Szrj #include "auto-profile.h"
11838fd1498Szrj #include "builtins.h"
11938fd1498Szrj #include "fibonacci_heap.h"
12038fd1498Szrj #include "stringpool.h"
12138fd1498Szrj #include "attribs.h"
12238fd1498Szrj #include "asan.h"
12338fd1498Szrj
12438fd1498Szrj typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
12538fd1498Szrj typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
12638fd1498Szrj
12738fd1498Szrj /* Statistics we collect about inlining algorithm. */
12838fd1498Szrj static int overall_size;
12938fd1498Szrj static profile_count max_count;
13038fd1498Szrj static profile_count spec_rem;
13138fd1498Szrj
13238fd1498Szrj /* Return false when inlining edge E would lead to violating
13338fd1498Szrj limits on function unit growth or stack usage growth.
13438fd1498Szrj
13538fd1498Szrj The relative function body growth limit is present generally
13638fd1498Szrj to avoid problems with non-linear behavior of the compiler.
13738fd1498Szrj To allow inlining huge functions into tiny wrapper, the limit
13838fd1498Szrj is always based on the bigger of the two functions considered.
13938fd1498Szrj
14038fd1498Szrj For stack growth limits we always base the growth in stack usage
14138fd1498Szrj of the callers. We want to prevent applications from segfaulting
14238fd1498Szrj on stack overflow when functions with huge stack frames gets
14338fd1498Szrj inlined. */
14438fd1498Szrj
14538fd1498Szrj static bool
caller_growth_limits(struct cgraph_edge * e)14638fd1498Szrj caller_growth_limits (struct cgraph_edge *e)
14738fd1498Szrj {
14838fd1498Szrj struct cgraph_node *to = e->caller;
14938fd1498Szrj struct cgraph_node *what = e->callee->ultimate_alias_target ();
15038fd1498Szrj int newsize;
15138fd1498Szrj int limit = 0;
15238fd1498Szrj HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
15338fd1498Szrj ipa_fn_summary *info, *what_info, *outer_info = ipa_fn_summaries->get (to);
15438fd1498Szrj
15538fd1498Szrj /* Look for function e->caller is inlined to. While doing
15638fd1498Szrj so work out the largest function body on the way. As
15738fd1498Szrj described above, we want to base our function growth
15838fd1498Szrj limits based on that. Not on the self size of the
15938fd1498Szrj outer function, not on the self size of inline code
16038fd1498Szrj we immediately inline to. This is the most relaxed
16138fd1498Szrj interpretation of the rule "do not grow large functions
16238fd1498Szrj too much in order to prevent compiler from exploding". */
16338fd1498Szrj while (true)
16438fd1498Szrj {
16538fd1498Szrj info = ipa_fn_summaries->get (to);
16638fd1498Szrj if (limit < info->self_size)
16738fd1498Szrj limit = info->self_size;
16838fd1498Szrj if (stack_size_limit < info->estimated_self_stack_size)
16938fd1498Szrj stack_size_limit = info->estimated_self_stack_size;
17038fd1498Szrj if (to->global.inlined_to)
17138fd1498Szrj to = to->callers->caller;
17238fd1498Szrj else
17338fd1498Szrj break;
17438fd1498Szrj }
17538fd1498Szrj
17638fd1498Szrj what_info = ipa_fn_summaries->get (what);
17738fd1498Szrj
17838fd1498Szrj if (limit < what_info->self_size)
17938fd1498Szrj limit = what_info->self_size;
18038fd1498Szrj
18138fd1498Szrj limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
18238fd1498Szrj
18338fd1498Szrj /* Check the size after inlining against the function limits. But allow
18438fd1498Szrj the function to shrink if it went over the limits by forced inlining. */
18538fd1498Szrj newsize = estimate_size_after_inlining (to, e);
18638fd1498Szrj if (newsize >= info->size
18738fd1498Szrj && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
18838fd1498Szrj && newsize > limit)
18938fd1498Szrj {
19038fd1498Szrj e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
19138fd1498Szrj return false;
19238fd1498Szrj }
19338fd1498Szrj
19438fd1498Szrj if (!what_info->estimated_stack_size)
19538fd1498Szrj return true;
19638fd1498Szrj
19738fd1498Szrj /* FIXME: Stack size limit often prevents inlining in Fortran programs
19838fd1498Szrj due to large i/o datastructures used by the Fortran front-end.
19938fd1498Szrj We ought to ignore this limit when we know that the edge is executed
20038fd1498Szrj on every invocation of the caller (i.e. its call statement dominates
20138fd1498Szrj exit block). We do not track this information, yet. */
20238fd1498Szrj stack_size_limit += ((gcov_type)stack_size_limit
20338fd1498Szrj * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
20438fd1498Szrj
20538fd1498Szrj inlined_stack = (outer_info->stack_frame_offset
20638fd1498Szrj + outer_info->estimated_self_stack_size
20738fd1498Szrj + what_info->estimated_stack_size);
20838fd1498Szrj /* Check new stack consumption with stack consumption at the place
20938fd1498Szrj stack is used. */
21038fd1498Szrj if (inlined_stack > stack_size_limit
21138fd1498Szrj /* If function already has large stack usage from sibling
21238fd1498Szrj inline call, we can inline, too.
21338fd1498Szrj This bit overoptimistically assume that we are good at stack
21438fd1498Szrj packing. */
21538fd1498Szrj && inlined_stack > info->estimated_stack_size
21638fd1498Szrj && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
21738fd1498Szrj {
21838fd1498Szrj e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
21938fd1498Szrj return false;
22038fd1498Szrj }
22138fd1498Szrj return true;
22238fd1498Szrj }
22338fd1498Szrj
22438fd1498Szrj /* Dump info about why inlining has failed. */
22538fd1498Szrj
22638fd1498Szrj static void
report_inline_failed_reason(struct cgraph_edge * e)22738fd1498Szrj report_inline_failed_reason (struct cgraph_edge *e)
22838fd1498Szrj {
22938fd1498Szrj if (dump_file)
23038fd1498Szrj {
23138fd1498Szrj fprintf (dump_file, " not inlinable: %s -> %s, %s\n",
23238fd1498Szrj e->caller->dump_name (),
23338fd1498Szrj e->callee->dump_name (),
23438fd1498Szrj cgraph_inline_failed_string (e->inline_failed));
23538fd1498Szrj if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
23638fd1498Szrj || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
23738fd1498Szrj && e->caller->lto_file_data
23838fd1498Szrj && e->callee->ultimate_alias_target ()->lto_file_data)
23938fd1498Szrj {
24038fd1498Szrj fprintf (dump_file, " LTO objects: %s, %s\n",
24138fd1498Szrj e->caller->lto_file_data->file_name,
24238fd1498Szrj e->callee->ultimate_alias_target ()->lto_file_data->file_name);
24338fd1498Szrj }
24438fd1498Szrj if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
24538fd1498Szrj cl_target_option_print_diff
24638fd1498Szrj (dump_file, 2, target_opts_for_fn (e->caller->decl),
24738fd1498Szrj target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
24838fd1498Szrj if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
24938fd1498Szrj cl_optimization_print_diff
25038fd1498Szrj (dump_file, 2, opts_for_fn (e->caller->decl),
25138fd1498Szrj opts_for_fn (e->callee->ultimate_alias_target ()->decl));
25238fd1498Szrj }
25338fd1498Szrj }
25438fd1498Szrj
25538fd1498Szrj /* Decide whether sanitizer-related attributes allow inlining. */
25638fd1498Szrj
25738fd1498Szrj static bool
sanitize_attrs_match_for_inline_p(const_tree caller,const_tree callee)25838fd1498Szrj sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
25938fd1498Szrj {
26038fd1498Szrj if (!caller || !callee)
26138fd1498Szrj return true;
26238fd1498Szrj
263*58e805e6Szrj /* Allow inlining always_inline functions into no_sanitize_address
264*58e805e6Szrj functions. */
265*58e805e6Szrj if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
266*58e805e6Szrj && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
267*58e805e6Szrj return true;
268*58e805e6Szrj
26938fd1498Szrj return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
27038fd1498Szrj == sanitize_flags_p (SANITIZE_ADDRESS, callee))
27138fd1498Szrj && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
27238fd1498Szrj == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
27338fd1498Szrj && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
27438fd1498Szrj == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
27538fd1498Szrj }
27638fd1498Szrj
27738fd1498Szrj /* Used for flags where it is safe to inline when caller's value is
27838fd1498Szrj grater than callee's. */
27938fd1498Szrj #define check_maybe_up(flag) \
28038fd1498Szrj (opts_for_fn (caller->decl)->x_##flag \
28138fd1498Szrj != opts_for_fn (callee->decl)->x_##flag \
28238fd1498Szrj && (!always_inline \
28338fd1498Szrj || opts_for_fn (caller->decl)->x_##flag \
28438fd1498Szrj < opts_for_fn (callee->decl)->x_##flag))
28538fd1498Szrj /* Used for flags where it is safe to inline when caller's value is
28638fd1498Szrj smaller than callee's. */
28738fd1498Szrj #define check_maybe_down(flag) \
28838fd1498Szrj (opts_for_fn (caller->decl)->x_##flag \
28938fd1498Szrj != opts_for_fn (callee->decl)->x_##flag \
29038fd1498Szrj && (!always_inline \
29138fd1498Szrj || opts_for_fn (caller->decl)->x_##flag \
29238fd1498Szrj > opts_for_fn (callee->decl)->x_##flag))
29338fd1498Szrj /* Used for flags where exact match is needed for correctness. */
29438fd1498Szrj #define check_match(flag) \
29538fd1498Szrj (opts_for_fn (caller->decl)->x_##flag \
29638fd1498Szrj != opts_for_fn (callee->decl)->x_##flag)
29738fd1498Szrj
29838fd1498Szrj /* Decide if we can inline the edge and possibly update
29938fd1498Szrj inline_failed reason.
30038fd1498Szrj We check whether inlining is possible at all and whether
30138fd1498Szrj caller growth limits allow doing so.
30238fd1498Szrj
30338fd1498Szrj if REPORT is true, output reason to the dump file. */
30438fd1498Szrj
30538fd1498Szrj static bool
30638fd1498Szrj can_inline_edge_p (struct cgraph_edge *e, bool report,
30738fd1498Szrj bool early = false)
30838fd1498Szrj {
30938fd1498Szrj gcc_checking_assert (e->inline_failed);
31038fd1498Szrj
31138fd1498Szrj if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
31238fd1498Szrj {
31338fd1498Szrj if (report)
31438fd1498Szrj report_inline_failed_reason (e);
31538fd1498Szrj return false;
31638fd1498Szrj }
31738fd1498Szrj
31838fd1498Szrj bool inlinable = true;
31938fd1498Szrj enum availability avail;
32038fd1498Szrj cgraph_node *caller = e->caller->global.inlined_to
32138fd1498Szrj ? e->caller->global.inlined_to : e->caller;
32238fd1498Szrj cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
32338fd1498Szrj
32438fd1498Szrj if (!callee->definition)
32538fd1498Szrj {
32638fd1498Szrj e->inline_failed = CIF_BODY_NOT_AVAILABLE;
32738fd1498Szrj inlinable = false;
32838fd1498Szrj }
32938fd1498Szrj if (!early && (!opt_for_fn (callee->decl, optimize)
33038fd1498Szrj || !opt_for_fn (caller->decl, optimize)))
33138fd1498Szrj {
33238fd1498Szrj e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
33338fd1498Szrj inlinable = false;
33438fd1498Szrj }
33538fd1498Szrj else if (callee->calls_comdat_local)
33638fd1498Szrj {
33738fd1498Szrj e->inline_failed = CIF_USES_COMDAT_LOCAL;
33838fd1498Szrj inlinable = false;
33938fd1498Szrj }
34038fd1498Szrj else if (avail <= AVAIL_INTERPOSABLE)
34138fd1498Szrj {
34238fd1498Szrj e->inline_failed = CIF_OVERWRITABLE;
34338fd1498Szrj inlinable = false;
34438fd1498Szrj }
34538fd1498Szrj /* All edges with call_stmt_cannot_inline_p should have inline_failed
34638fd1498Szrj initialized to one of FINAL_ERROR reasons. */
34738fd1498Szrj else if (e->call_stmt_cannot_inline_p)
34838fd1498Szrj gcc_unreachable ();
34938fd1498Szrj /* Don't inline if the functions have different EH personalities. */
35038fd1498Szrj else if (DECL_FUNCTION_PERSONALITY (caller->decl)
35138fd1498Szrj && DECL_FUNCTION_PERSONALITY (callee->decl)
35238fd1498Szrj && (DECL_FUNCTION_PERSONALITY (caller->decl)
35338fd1498Szrj != DECL_FUNCTION_PERSONALITY (callee->decl)))
35438fd1498Szrj {
35538fd1498Szrj e->inline_failed = CIF_EH_PERSONALITY;
35638fd1498Szrj inlinable = false;
35738fd1498Szrj }
35838fd1498Szrj /* TM pure functions should not be inlined into non-TM_pure
35938fd1498Szrj functions. */
36038fd1498Szrj else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
36138fd1498Szrj {
36238fd1498Szrj e->inline_failed = CIF_UNSPECIFIED;
36338fd1498Szrj inlinable = false;
36438fd1498Szrj }
36538fd1498Szrj /* Check compatibility of target optimization options. */
36638fd1498Szrj else if (!targetm.target_option.can_inline_p (caller->decl,
36738fd1498Szrj callee->decl))
36838fd1498Szrj {
36938fd1498Szrj e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
37038fd1498Szrj inlinable = false;
37138fd1498Szrj }
37238fd1498Szrj else if (!ipa_fn_summaries->get (callee)->inlinable)
37338fd1498Szrj {
37438fd1498Szrj e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
37538fd1498Szrj inlinable = false;
37638fd1498Szrj }
37738fd1498Szrj /* Don't inline a function with mismatched sanitization attributes. */
37838fd1498Szrj else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
37938fd1498Szrj {
38038fd1498Szrj e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
38138fd1498Szrj inlinable = false;
38238fd1498Szrj }
38338fd1498Szrj if (!inlinable && report)
38438fd1498Szrj report_inline_failed_reason (e);
38538fd1498Szrj return inlinable;
38638fd1498Szrj }
38738fd1498Szrj
38838fd1498Szrj /* Decide if we can inline the edge and possibly update
38938fd1498Szrj inline_failed reason.
39038fd1498Szrj We check whether inlining is possible at all and whether
39138fd1498Szrj caller growth limits allow doing so.
39238fd1498Szrj
39338fd1498Szrj if REPORT is true, output reason to the dump file.
39438fd1498Szrj
39538fd1498Szrj if DISREGARD_LIMITS is true, ignore size limits. */
39638fd1498Szrj
39738fd1498Szrj static bool
39838fd1498Szrj can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
39938fd1498Szrj bool disregard_limits = false, bool early = false)
40038fd1498Szrj {
40138fd1498Szrj gcc_checking_assert (e->inline_failed);
40238fd1498Szrj
40338fd1498Szrj if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
40438fd1498Szrj {
40538fd1498Szrj if (report)
40638fd1498Szrj report_inline_failed_reason (e);
40738fd1498Szrj return false;
40838fd1498Szrj }
40938fd1498Szrj
41038fd1498Szrj bool inlinable = true;
41138fd1498Szrj enum availability avail;
41238fd1498Szrj cgraph_node *caller = e->caller->global.inlined_to
41338fd1498Szrj ? e->caller->global.inlined_to : e->caller;
41438fd1498Szrj cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
41538fd1498Szrj tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
41638fd1498Szrj tree callee_tree
41738fd1498Szrj = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
41838fd1498Szrj /* Check if caller growth allows the inlining. */
41938fd1498Szrj if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
42038fd1498Szrj && !disregard_limits
42138fd1498Szrj && !lookup_attribute ("flatten",
42238fd1498Szrj DECL_ATTRIBUTES (caller->decl))
42338fd1498Szrj && !caller_growth_limits (e))
42438fd1498Szrj inlinable = false;
42538fd1498Szrj /* Don't inline a function with a higher optimization level than the
42638fd1498Szrj caller. FIXME: this is really just tip of iceberg of handling
42738fd1498Szrj optimization attribute. */
42838fd1498Szrj else if (caller_tree != callee_tree)
42938fd1498Szrj {
43038fd1498Szrj bool always_inline =
43138fd1498Szrj (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
43238fd1498Szrj && lookup_attribute ("always_inline",
43338fd1498Szrj DECL_ATTRIBUTES (callee->decl)));
43438fd1498Szrj ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
43538fd1498Szrj ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
43638fd1498Szrj
43738fd1498Szrj /* Until GCC 4.9 we did not check the semantics alterning flags
43838fd1498Szrj bellow and inline across optimization boundry.
43938fd1498Szrj Enabling checks bellow breaks several packages by refusing
44038fd1498Szrj to inline library always_inline functions. See PR65873.
44138fd1498Szrj Disable the check for early inlining for now until better solution
44238fd1498Szrj is found. */
44338fd1498Szrj if (always_inline && early)
44438fd1498Szrj ;
44538fd1498Szrj /* There are some options that change IL semantics which means
44638fd1498Szrj we cannot inline in these cases for correctness reason.
44738fd1498Szrj Not even for always_inline declared functions. */
44838fd1498Szrj else if (check_match (flag_wrapv)
44938fd1498Szrj || check_match (flag_trapv)
45038fd1498Szrj || check_match (flag_pcc_struct_return)
45138fd1498Szrj /* When caller or callee does FP math, be sure FP codegen flags
45238fd1498Szrj compatible. */
45338fd1498Szrj || ((caller_info->fp_expressions && callee_info->fp_expressions)
45438fd1498Szrj && (check_maybe_up (flag_rounding_math)
45538fd1498Szrj || check_maybe_up (flag_trapping_math)
45638fd1498Szrj || check_maybe_down (flag_unsafe_math_optimizations)
45738fd1498Szrj || check_maybe_down (flag_finite_math_only)
45838fd1498Szrj || check_maybe_up (flag_signaling_nans)
45938fd1498Szrj || check_maybe_down (flag_cx_limited_range)
46038fd1498Szrj || check_maybe_up (flag_signed_zeros)
46138fd1498Szrj || check_maybe_down (flag_associative_math)
46238fd1498Szrj || check_maybe_down (flag_reciprocal_math)
46338fd1498Szrj || check_maybe_down (flag_fp_int_builtin_inexact)
46438fd1498Szrj /* Strictly speaking only when the callee contains function
46538fd1498Szrj calls that may end up setting errno. */
46638fd1498Szrj || check_maybe_up (flag_errno_math)))
46738fd1498Szrj /* We do not want to make code compiled with exceptions to be
46838fd1498Szrj brought into a non-EH function unless we know that the callee
46938fd1498Szrj does not throw.
47038fd1498Szrj This is tracked by DECL_FUNCTION_PERSONALITY. */
47138fd1498Szrj || (check_maybe_up (flag_non_call_exceptions)
47238fd1498Szrj && DECL_FUNCTION_PERSONALITY (callee->decl))
47338fd1498Szrj || (check_maybe_up (flag_exceptions)
47438fd1498Szrj && DECL_FUNCTION_PERSONALITY (callee->decl))
47538fd1498Szrj /* When devirtualization is diabled for callee, it is not safe
47638fd1498Szrj to inline it as we possibly mangled the type info.
47738fd1498Szrj Allow early inlining of always inlines. */
47838fd1498Szrj || (!early && check_maybe_down (flag_devirtualize)))
47938fd1498Szrj {
48038fd1498Szrj e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
48138fd1498Szrj inlinable = false;
48238fd1498Szrj }
48338fd1498Szrj /* gcc.dg/pr43564.c. Apply user-forced inline even at -O0. */
48438fd1498Szrj else if (always_inline)
48538fd1498Szrj ;
48638fd1498Szrj /* When user added an attribute to the callee honor it. */
48738fd1498Szrj else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
48838fd1498Szrj && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
48938fd1498Szrj {
49038fd1498Szrj e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
49138fd1498Szrj inlinable = false;
49238fd1498Szrj }
49338fd1498Szrj /* If explicit optimize attribute are not used, the mismatch is caused
49438fd1498Szrj by different command line options used to build different units.
49538fd1498Szrj Do not care about COMDAT functions - those are intended to be
49638fd1498Szrj optimized with the optimization flags of module they are used in.
49738fd1498Szrj Also do not care about mixing up size/speed optimization when
49838fd1498Szrj DECL_DISREGARD_INLINE_LIMITS is set. */
49938fd1498Szrj else if ((callee->merged_comdat
50038fd1498Szrj && !lookup_attribute ("optimize",
50138fd1498Szrj DECL_ATTRIBUTES (caller->decl)))
50238fd1498Szrj || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
50338fd1498Szrj ;
50438fd1498Szrj /* If mismatch is caused by merging two LTO units with different
50538fd1498Szrj optimizationflags we want to be bit nicer. However never inline
50638fd1498Szrj if one of functions is not optimized at all. */
50738fd1498Szrj else if (!opt_for_fn (callee->decl, optimize)
50838fd1498Szrj || !opt_for_fn (caller->decl, optimize))
50938fd1498Szrj {
51038fd1498Szrj e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
51138fd1498Szrj inlinable = false;
51238fd1498Szrj }
51338fd1498Szrj /* If callee is optimized for size and caller is not, allow inlining if
51438fd1498Szrj code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
51538fd1498Szrj is inline (and thus likely an unified comdat). This will allow caller
51638fd1498Szrj to run faster. */
51738fd1498Szrj else if (opt_for_fn (callee->decl, optimize_size)
51838fd1498Szrj > opt_for_fn (caller->decl, optimize_size))
51938fd1498Szrj {
52038fd1498Szrj int growth = estimate_edge_growth (e);
52138fd1498Szrj if (growth > 0
52238fd1498Szrj && (!DECL_DECLARED_INLINE_P (callee->decl)
52338fd1498Szrj && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
52438fd1498Szrj MAX_INLINE_INSNS_AUTO)))
52538fd1498Szrj {
52638fd1498Szrj e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
52738fd1498Szrj inlinable = false;
52838fd1498Szrj }
52938fd1498Szrj }
53038fd1498Szrj /* If callee is more aggressively optimized for performance than caller,
53138fd1498Szrj we generally want to inline only cheap (runtime wise) functions. */
53238fd1498Szrj else if (opt_for_fn (callee->decl, optimize_size)
53338fd1498Szrj < opt_for_fn (caller->decl, optimize_size)
53438fd1498Szrj || (opt_for_fn (callee->decl, optimize)
53538fd1498Szrj > opt_for_fn (caller->decl, optimize)))
53638fd1498Szrj {
53738fd1498Szrj if (estimate_edge_time (e)
53838fd1498Szrj >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
53938fd1498Szrj {
54038fd1498Szrj e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
54138fd1498Szrj inlinable = false;
54238fd1498Szrj }
54338fd1498Szrj }
54438fd1498Szrj
54538fd1498Szrj }
54638fd1498Szrj
54738fd1498Szrj if (!inlinable && report)
54838fd1498Szrj report_inline_failed_reason (e);
54938fd1498Szrj return inlinable;
55038fd1498Szrj }
55138fd1498Szrj
55238fd1498Szrj
55338fd1498Szrj /* Return true if the edge E is inlinable during early inlining. */
55438fd1498Szrj
55538fd1498Szrj static bool
can_early_inline_edge_p(struct cgraph_edge * e)55638fd1498Szrj can_early_inline_edge_p (struct cgraph_edge *e)
55738fd1498Szrj {
55838fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
55938fd1498Szrj /* Early inliner might get called at WPA stage when IPA pass adds new
56038fd1498Szrj function. In this case we can not really do any of early inlining
56138fd1498Szrj because function bodies are missing. */
56238fd1498Szrj if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
56338fd1498Szrj return false;
56438fd1498Szrj if (!gimple_has_body_p (callee->decl))
56538fd1498Szrj {
56638fd1498Szrj e->inline_failed = CIF_BODY_NOT_AVAILABLE;
56738fd1498Szrj return false;
56838fd1498Szrj }
56938fd1498Szrj /* In early inliner some of callees may not be in SSA form yet
57038fd1498Szrj (i.e. the callgraph is cyclic and we did not process
57138fd1498Szrj the callee by early inliner, yet). We don't have CIF code for this
57238fd1498Szrj case; later we will re-do the decision in the real inliner. */
57338fd1498Szrj if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
57438fd1498Szrj || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
57538fd1498Szrj {
57638fd1498Szrj if (dump_file)
57738fd1498Szrj fprintf (dump_file, " edge not inlinable: not in SSA form\n");
57838fd1498Szrj return false;
57938fd1498Szrj }
58038fd1498Szrj if (!can_inline_edge_p (e, true, true)
58138fd1498Szrj || !can_inline_edge_by_limits_p (e, true, false, true))
58238fd1498Szrj return false;
58338fd1498Szrj return true;
58438fd1498Szrj }
58538fd1498Szrj
58638fd1498Szrj
58738fd1498Szrj /* Return number of calls in N. Ignore cheap builtins. */
58838fd1498Szrj
58938fd1498Szrj static int
num_calls(struct cgraph_node * n)59038fd1498Szrj num_calls (struct cgraph_node *n)
59138fd1498Szrj {
59238fd1498Szrj struct cgraph_edge *e;
59338fd1498Szrj int num = 0;
59438fd1498Szrj
59538fd1498Szrj for (e = n->callees; e; e = e->next_callee)
59638fd1498Szrj if (!is_inexpensive_builtin (e->callee->decl))
59738fd1498Szrj num++;
59838fd1498Szrj return num;
59938fd1498Szrj }
60038fd1498Szrj
60138fd1498Szrj
60238fd1498Szrj /* Return true if we are interested in inlining small function. */
60338fd1498Szrj
60438fd1498Szrj static bool
want_early_inline_function_p(struct cgraph_edge * e)60538fd1498Szrj want_early_inline_function_p (struct cgraph_edge *e)
60638fd1498Szrj {
60738fd1498Szrj bool want_inline = true;
60838fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
60938fd1498Szrj
61038fd1498Szrj if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
61138fd1498Szrj ;
61238fd1498Szrj /* For AutoFDO, we need to make sure that before profile summary, all
61338fd1498Szrj hot paths' IR look exactly the same as profiled binary. As a result,
61438fd1498Szrj in einliner, we will disregard size limit and inline those callsites
61538fd1498Szrj that are:
61638fd1498Szrj * inlined in the profiled binary, and
61738fd1498Szrj * the cloned callee has enough samples to be considered "hot". */
61838fd1498Szrj else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
61938fd1498Szrj ;
62038fd1498Szrj else if (!DECL_DECLARED_INLINE_P (callee->decl)
62138fd1498Szrj && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
62238fd1498Szrj {
62338fd1498Szrj e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
62438fd1498Szrj report_inline_failed_reason (e);
62538fd1498Szrj want_inline = false;
62638fd1498Szrj }
62738fd1498Szrj else
62838fd1498Szrj {
62938fd1498Szrj int growth = estimate_edge_growth (e);
63038fd1498Szrj int n;
63138fd1498Szrj
63238fd1498Szrj if (growth <= 0)
63338fd1498Szrj ;
63438fd1498Szrj else if (!e->maybe_hot_p ()
63538fd1498Szrj && growth > 0)
63638fd1498Szrj {
63738fd1498Szrj if (dump_file)
63838fd1498Szrj fprintf (dump_file, " will not early inline: %s->%s, "
63938fd1498Szrj "call is cold and code would grow by %i\n",
64038fd1498Szrj e->caller->dump_name (),
64138fd1498Szrj callee->dump_name (),
64238fd1498Szrj growth);
64338fd1498Szrj want_inline = false;
64438fd1498Szrj }
64538fd1498Szrj else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
64638fd1498Szrj {
64738fd1498Szrj if (dump_file)
64838fd1498Szrj fprintf (dump_file, " will not early inline: %s->%s, "
64938fd1498Szrj "growth %i exceeds --param early-inlining-insns\n",
65038fd1498Szrj e->caller->dump_name (),
65138fd1498Szrj callee->dump_name (),
65238fd1498Szrj growth);
65338fd1498Szrj want_inline = false;
65438fd1498Szrj }
65538fd1498Szrj else if ((n = num_calls (callee)) != 0
65638fd1498Szrj && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
65738fd1498Szrj {
65838fd1498Szrj if (dump_file)
65938fd1498Szrj fprintf (dump_file, " will not early inline: %s->%s, "
66038fd1498Szrj "growth %i exceeds --param early-inlining-insns "
66138fd1498Szrj "divided by number of calls\n",
66238fd1498Szrj e->caller->dump_name (),
66338fd1498Szrj callee->dump_name (),
66438fd1498Szrj growth);
66538fd1498Szrj want_inline = false;
66638fd1498Szrj }
66738fd1498Szrj }
66838fd1498Szrj return want_inline;
66938fd1498Szrj }
67038fd1498Szrj
67138fd1498Szrj /* Compute time of the edge->caller + edge->callee execution when inlining
67238fd1498Szrj does not happen. */
67338fd1498Szrj
67438fd1498Szrj inline sreal
compute_uninlined_call_time(struct cgraph_edge * edge,sreal uninlined_call_time)67538fd1498Szrj compute_uninlined_call_time (struct cgraph_edge *edge,
67638fd1498Szrj sreal uninlined_call_time)
67738fd1498Szrj {
67838fd1498Szrj cgraph_node *caller = (edge->caller->global.inlined_to
67938fd1498Szrj ? edge->caller->global.inlined_to
68038fd1498Szrj : edge->caller);
68138fd1498Szrj
68238fd1498Szrj sreal freq = edge->sreal_frequency ();
68338fd1498Szrj if (freq > 0)
68438fd1498Szrj uninlined_call_time *= freq;
68538fd1498Szrj else
68638fd1498Szrj uninlined_call_time = uninlined_call_time >> 11;
68738fd1498Szrj
68838fd1498Szrj sreal caller_time = ipa_fn_summaries->get (caller)->time;
68938fd1498Szrj return uninlined_call_time + caller_time;
69038fd1498Szrj }
69138fd1498Szrj
69238fd1498Szrj /* Same as compute_uinlined_call_time but compute time when inlining
69338fd1498Szrj does happen. */
69438fd1498Szrj
69538fd1498Szrj inline sreal
compute_inlined_call_time(struct cgraph_edge * edge,sreal time)69638fd1498Szrj compute_inlined_call_time (struct cgraph_edge *edge,
69738fd1498Szrj sreal time)
69838fd1498Szrj {
69938fd1498Szrj cgraph_node *caller = (edge->caller->global.inlined_to
70038fd1498Szrj ? edge->caller->global.inlined_to
70138fd1498Szrj : edge->caller);
70238fd1498Szrj sreal caller_time = ipa_fn_summaries->get (caller)->time;
70338fd1498Szrj
70438fd1498Szrj sreal freq = edge->sreal_frequency ();
70538fd1498Szrj if (freq > 0)
70638fd1498Szrj time *= freq;
70738fd1498Szrj else
70838fd1498Szrj time = time >> 11;
70938fd1498Szrj
71038fd1498Szrj /* This calculation should match one in ipa-inline-analysis.c
71138fd1498Szrj (estimate_edge_size_and_time). */
71238fd1498Szrj time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
71338fd1498Szrj time += caller_time;
71438fd1498Szrj if (time <= 0)
71538fd1498Szrj time = ((sreal) 1) >> 8;
71638fd1498Szrj gcc_checking_assert (time >= 0);
71738fd1498Szrj return time;
71838fd1498Szrj }
71938fd1498Szrj
72038fd1498Szrj /* Return true if the speedup for inlining E is bigger than
72138fd1498Szrj PARAM_MAX_INLINE_MIN_SPEEDUP. */
72238fd1498Szrj
72338fd1498Szrj static bool
big_speedup_p(struct cgraph_edge * e)72438fd1498Szrj big_speedup_p (struct cgraph_edge *e)
72538fd1498Szrj {
72638fd1498Szrj sreal unspec_time;
72738fd1498Szrj sreal spec_time = estimate_edge_time (e, &unspec_time);
72838fd1498Szrj sreal time = compute_uninlined_call_time (e, unspec_time);
72938fd1498Szrj sreal inlined_time = compute_inlined_call_time (e, spec_time);
73038fd1498Szrj
73138fd1498Szrj if ((time - inlined_time) * 100
73238fd1498Szrj > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
73338fd1498Szrj return true;
73438fd1498Szrj return false;
73538fd1498Szrj }
73638fd1498Szrj
73738fd1498Szrj /* Return true if we are interested in inlining small function.
73838fd1498Szrj When REPORT is true, report reason to dump file. */
73938fd1498Szrj
74038fd1498Szrj static bool
want_inline_small_function_p(struct cgraph_edge * e,bool report)74138fd1498Szrj want_inline_small_function_p (struct cgraph_edge *e, bool report)
74238fd1498Szrj {
74338fd1498Szrj bool want_inline = true;
74438fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
74538fd1498Szrj
74638fd1498Szrj /* Allow this function to be called before can_inline_edge_p,
74738fd1498Szrj since it's usually cheaper. */
74838fd1498Szrj if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
74938fd1498Szrj want_inline = false;
75038fd1498Szrj else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
75138fd1498Szrj ;
75238fd1498Szrj else if (!DECL_DECLARED_INLINE_P (callee->decl)
75338fd1498Szrj && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
75438fd1498Szrj {
75538fd1498Szrj e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
75638fd1498Szrj want_inline = false;
75738fd1498Szrj }
75838fd1498Szrj /* Do fast and conservative check if the function can be good
75938fd1498Szrj inline candidate. At the moment we allow inline hints to
76038fd1498Szrj promote non-inline functions to inline and we increase
76138fd1498Szrj MAX_INLINE_INSNS_SINGLE 16-fold for inline functions. */
76238fd1498Szrj else if ((!DECL_DECLARED_INLINE_P (callee->decl)
76338fd1498Szrj && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
76438fd1498Szrj && ipa_fn_summaries->get (callee)->min_size
76538fd1498Szrj - ipa_call_summaries->get (e)->call_stmt_size
76638fd1498Szrj > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
76738fd1498Szrj {
76838fd1498Szrj e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
76938fd1498Szrj want_inline = false;
77038fd1498Szrj }
77138fd1498Szrj else if ((DECL_DECLARED_INLINE_P (callee->decl)
77238fd1498Szrj || e->count.ipa ().nonzero_p ())
77338fd1498Szrj && ipa_fn_summaries->get (callee)->min_size
77438fd1498Szrj - ipa_call_summaries->get (e)->call_stmt_size
77538fd1498Szrj > 16 * MAX_INLINE_INSNS_SINGLE)
77638fd1498Szrj {
77738fd1498Szrj e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
77838fd1498Szrj ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
77938fd1498Szrj : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
78038fd1498Szrj want_inline = false;
78138fd1498Szrj }
78238fd1498Szrj else
78338fd1498Szrj {
78438fd1498Szrj int growth = estimate_edge_growth (e);
78538fd1498Szrj ipa_hints hints = estimate_edge_hints (e);
78638fd1498Szrj bool big_speedup = big_speedup_p (e);
78738fd1498Szrj
78838fd1498Szrj if (growth <= 0)
78938fd1498Szrj ;
79038fd1498Szrj /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when
79138fd1498Szrj hints suggests that inlining given function is very profitable. */
79238fd1498Szrj else if (DECL_DECLARED_INLINE_P (callee->decl)
79338fd1498Szrj && growth >= MAX_INLINE_INSNS_SINGLE
79438fd1498Szrj && ((!big_speedup
79538fd1498Szrj && !(hints & (INLINE_HINT_indirect_call
79638fd1498Szrj | INLINE_HINT_known_hot
79738fd1498Szrj | INLINE_HINT_loop_iterations
79838fd1498Szrj | INLINE_HINT_array_index
79938fd1498Szrj | INLINE_HINT_loop_stride)))
80038fd1498Szrj || growth >= MAX_INLINE_INSNS_SINGLE * 16))
80138fd1498Szrj {
80238fd1498Szrj e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
80338fd1498Szrj want_inline = false;
80438fd1498Szrj }
80538fd1498Szrj else if (!DECL_DECLARED_INLINE_P (callee->decl)
80638fd1498Szrj && !opt_for_fn (e->caller->decl, flag_inline_functions))
80738fd1498Szrj {
80838fd1498Szrj /* growth_likely_positive is expensive, always test it last. */
80938fd1498Szrj if (growth >= MAX_INLINE_INSNS_SINGLE
81038fd1498Szrj || growth_likely_positive (callee, growth))
81138fd1498Szrj {
81238fd1498Szrj e->inline_failed = CIF_NOT_DECLARED_INLINED;
81338fd1498Szrj want_inline = false;
81438fd1498Szrj }
81538fd1498Szrj }
81638fd1498Szrj /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
81738fd1498Szrj Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
81838fd1498Szrj inlining given function is very profitable. */
81938fd1498Szrj else if (!DECL_DECLARED_INLINE_P (callee->decl)
82038fd1498Szrj && !big_speedup
82138fd1498Szrj && !(hints & INLINE_HINT_known_hot)
82238fd1498Szrj && growth >= ((hints & (INLINE_HINT_indirect_call
82338fd1498Szrj | INLINE_HINT_loop_iterations
82438fd1498Szrj | INLINE_HINT_array_index
82538fd1498Szrj | INLINE_HINT_loop_stride))
82638fd1498Szrj ? MAX (MAX_INLINE_INSNS_AUTO,
82738fd1498Szrj MAX_INLINE_INSNS_SINGLE)
82838fd1498Szrj : MAX_INLINE_INSNS_AUTO))
82938fd1498Szrj {
83038fd1498Szrj /* growth_likely_positive is expensive, always test it last. */
83138fd1498Szrj if (growth >= MAX_INLINE_INSNS_SINGLE
83238fd1498Szrj || growth_likely_positive (callee, growth))
83338fd1498Szrj {
83438fd1498Szrj e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
83538fd1498Szrj want_inline = false;
83638fd1498Szrj }
83738fd1498Szrj }
83838fd1498Szrj /* If call is cold, do not inline when function body would grow. */
83938fd1498Szrj else if (!e->maybe_hot_p ()
84038fd1498Szrj && (growth >= MAX_INLINE_INSNS_SINGLE
84138fd1498Szrj || growth_likely_positive (callee, growth)))
84238fd1498Szrj {
84338fd1498Szrj e->inline_failed = CIF_UNLIKELY_CALL;
84438fd1498Szrj want_inline = false;
84538fd1498Szrj }
84638fd1498Szrj }
84738fd1498Szrj if (!want_inline && report)
84838fd1498Szrj report_inline_failed_reason (e);
84938fd1498Szrj return want_inline;
85038fd1498Szrj }
85138fd1498Szrj
85238fd1498Szrj /* EDGE is self recursive edge.
85338fd1498Szrj We hand two cases - when function A is inlining into itself
85438fd1498Szrj or when function A is being inlined into another inliner copy of function
85538fd1498Szrj A within function B.
85638fd1498Szrj
85738fd1498Szrj In first case OUTER_NODE points to the toplevel copy of A, while
85838fd1498Szrj in the second case OUTER_NODE points to the outermost copy of A in B.
85938fd1498Szrj
86038fd1498Szrj In both cases we want to be extra selective since
86138fd1498Szrj inlining the call will just introduce new recursive calls to appear. */
86238fd1498Szrj
86338fd1498Szrj static bool
want_inline_self_recursive_call_p(struct cgraph_edge * edge,struct cgraph_node * outer_node,bool peeling,int depth)86438fd1498Szrj want_inline_self_recursive_call_p (struct cgraph_edge *edge,
86538fd1498Szrj struct cgraph_node *outer_node,
86638fd1498Szrj bool peeling,
86738fd1498Szrj int depth)
86838fd1498Szrj {
86938fd1498Szrj char const *reason = NULL;
87038fd1498Szrj bool want_inline = true;
87138fd1498Szrj sreal caller_freq = 1;
87238fd1498Szrj int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
87338fd1498Szrj
87438fd1498Szrj if (DECL_DECLARED_INLINE_P (edge->caller->decl))
87538fd1498Szrj max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
87638fd1498Szrj
87738fd1498Szrj if (!edge->maybe_hot_p ())
87838fd1498Szrj {
87938fd1498Szrj reason = "recursive call is cold";
88038fd1498Szrj want_inline = false;
88138fd1498Szrj }
88238fd1498Szrj else if (depth > max_depth)
88338fd1498Szrj {
88438fd1498Szrj reason = "--param max-inline-recursive-depth exceeded.";
88538fd1498Szrj want_inline = false;
88638fd1498Szrj }
88738fd1498Szrj else if (outer_node->global.inlined_to
88838fd1498Szrj && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
88938fd1498Szrj {
89038fd1498Szrj reason = "caller frequency is 0";
89138fd1498Szrj want_inline = false;
89238fd1498Szrj }
89338fd1498Szrj
89438fd1498Szrj if (!want_inline)
89538fd1498Szrj ;
89638fd1498Szrj /* Inlining of self recursive function into copy of itself within other
89738fd1498Szrj function is transformation similar to loop peeling.
89838fd1498Szrj
89938fd1498Szrj Peeling is profitable if we can inline enough copies to make probability
90038fd1498Szrj of actual call to the self recursive function very small. Be sure that
90138fd1498Szrj the probability of recursion is small.
90238fd1498Szrj
90338fd1498Szrj We ensure that the frequency of recursing is at most 1 - (1/max_depth).
90438fd1498Szrj This way the expected number of recursion is at most max_depth. */
90538fd1498Szrj else if (peeling)
90638fd1498Szrj {
90738fd1498Szrj sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
90838fd1498Szrj int i;
90938fd1498Szrj for (i = 1; i < depth; i++)
91038fd1498Szrj max_prob = max_prob * max_prob;
91138fd1498Szrj if (edge->sreal_frequency () >= max_prob * caller_freq)
91238fd1498Szrj {
91338fd1498Szrj reason = "frequency of recursive call is too large";
91438fd1498Szrj want_inline = false;
91538fd1498Szrj }
91638fd1498Szrj }
91738fd1498Szrj /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
91838fd1498Szrj recursion depth is large. We reduce function call overhead and increase
91938fd1498Szrj chances that things fit in hardware return predictor.
92038fd1498Szrj
92138fd1498Szrj Recursive inlining might however increase cost of stack frame setup
92238fd1498Szrj actually slowing down functions whose recursion tree is wide rather than
92338fd1498Szrj deep.
92438fd1498Szrj
92538fd1498Szrj Deciding reliably on when to do recursive inlining without profile feedback
92638fd1498Szrj is tricky. For now we disable recursive inlining when probability of self
92738fd1498Szrj recursion is low.
92838fd1498Szrj
92938fd1498Szrj Recursive inlining of self recursive call within loop also results in
93038fd1498Szrj large loop depths that generally optimize badly. We may want to throttle
93138fd1498Szrj down inlining in those cases. In particular this seems to happen in one
93238fd1498Szrj of libstdc++ rb tree methods. */
93338fd1498Szrj else
93438fd1498Szrj {
93538fd1498Szrj if (edge->sreal_frequency () * 100
93638fd1498Szrj <= caller_freq
93738fd1498Szrj * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
93838fd1498Szrj {
93938fd1498Szrj reason = "frequency of recursive call is too small";
94038fd1498Szrj want_inline = false;
94138fd1498Szrj }
94238fd1498Szrj }
94338fd1498Szrj if (!want_inline && dump_file)
94438fd1498Szrj fprintf (dump_file, " not inlining recursively: %s\n", reason);
94538fd1498Szrj return want_inline;
94638fd1498Szrj }
94738fd1498Szrj
94838fd1498Szrj /* Return true when NODE has uninlinable caller;
94938fd1498Szrj set HAS_HOT_CALL if it has hot call.
95038fd1498Szrj Worker for cgraph_for_node_and_aliases. */
95138fd1498Szrj
95238fd1498Szrj static bool
check_callers(struct cgraph_node * node,void * has_hot_call)95338fd1498Szrj check_callers (struct cgraph_node *node, void *has_hot_call)
95438fd1498Szrj {
95538fd1498Szrj struct cgraph_edge *e;
95638fd1498Szrj for (e = node->callers; e; e = e->next_caller)
95738fd1498Szrj {
95838fd1498Szrj if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
95938fd1498Szrj || !opt_for_fn (e->caller->decl, optimize))
96038fd1498Szrj return true;
96138fd1498Szrj if (!can_inline_edge_p (e, true))
96238fd1498Szrj return true;
96338fd1498Szrj if (e->recursive_p ())
96438fd1498Szrj return true;
96538fd1498Szrj if (!can_inline_edge_by_limits_p (e, true))
96638fd1498Szrj return true;
96738fd1498Szrj if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
96838fd1498Szrj *(bool *)has_hot_call = true;
96938fd1498Szrj }
97038fd1498Szrj return false;
97138fd1498Szrj }
97238fd1498Szrj
97338fd1498Szrj /* If NODE has a caller, return true. */
97438fd1498Szrj
97538fd1498Szrj static bool
has_caller_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)97638fd1498Szrj has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
97738fd1498Szrj {
97838fd1498Szrj if (node->callers)
97938fd1498Szrj return true;
98038fd1498Szrj return false;
98138fd1498Szrj }
98238fd1498Szrj
98338fd1498Szrj /* Decide if inlining NODE would reduce unit size by eliminating
98438fd1498Szrj the offline copy of function.
98538fd1498Szrj When COLD is true the cold calls are considered, too. */
98638fd1498Szrj
98738fd1498Szrj static bool
want_inline_function_to_all_callers_p(struct cgraph_node * node,bool cold)98838fd1498Szrj want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
98938fd1498Szrj {
99038fd1498Szrj bool has_hot_call = false;
99138fd1498Szrj
99238fd1498Szrj /* Aliases gets inlined along with the function they alias. */
99338fd1498Szrj if (node->alias)
99438fd1498Szrj return false;
99538fd1498Szrj /* Already inlined? */
99638fd1498Szrj if (node->global.inlined_to)
99738fd1498Szrj return false;
99838fd1498Szrj /* Does it have callers? */
99938fd1498Szrj if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
100038fd1498Szrj return false;
100138fd1498Szrj /* Inlining into all callers would increase size? */
100238fd1498Szrj if (estimate_growth (node) > 0)
100338fd1498Szrj return false;
100438fd1498Szrj /* All inlines must be possible. */
100538fd1498Szrj if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
100638fd1498Szrj true))
100738fd1498Szrj return false;
100838fd1498Szrj if (!cold && !has_hot_call)
100938fd1498Szrj return false;
101038fd1498Szrj return true;
101138fd1498Szrj }
101238fd1498Szrj
101338fd1498Szrj /* A cost model driving the inlining heuristics in a way so the edges with
101438fd1498Szrj smallest badness are inlined first. After each inlining is performed
101538fd1498Szrj the costs of all caller edges of nodes affected are recomputed so the
101638fd1498Szrj metrics may accurately depend on values such as number of inlinable callers
101738fd1498Szrj of the function or function body size. */
101838fd1498Szrj
101938fd1498Szrj static sreal
edge_badness(struct cgraph_edge * edge,bool dump)102038fd1498Szrj edge_badness (struct cgraph_edge *edge, bool dump)
102138fd1498Szrj {
102238fd1498Szrj sreal badness;
102338fd1498Szrj int growth;
102438fd1498Szrj sreal edge_time, unspec_edge_time;
102538fd1498Szrj struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
102638fd1498Szrj struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
102738fd1498Szrj ipa_hints hints;
102838fd1498Szrj cgraph_node *caller = (edge->caller->global.inlined_to
102938fd1498Szrj ? edge->caller->global.inlined_to
103038fd1498Szrj : edge->caller);
103138fd1498Szrj
103238fd1498Szrj growth = estimate_edge_growth (edge);
103338fd1498Szrj edge_time = estimate_edge_time (edge, &unspec_edge_time);
103438fd1498Szrj hints = estimate_edge_hints (edge);
103538fd1498Szrj gcc_checking_assert (edge_time >= 0);
103638fd1498Szrj /* Check that inlined time is better, but tolerate some roundoff issues.
103738fd1498Szrj FIXME: When callee profile drops to 0 we account calls more. This
103838fd1498Szrj should be fixed by never doing that. */
103938fd1498Szrj gcc_checking_assert ((edge_time * 100
104038fd1498Szrj - callee_info->time * 101).to_int () <= 0
104138fd1498Szrj || callee->count.ipa ().initialized_p ());
104238fd1498Szrj gcc_checking_assert (growth <= callee_info->size);
104338fd1498Szrj
104438fd1498Szrj if (dump)
104538fd1498Szrj {
104638fd1498Szrj fprintf (dump_file, " Badness calculation for %s -> %s\n",
104738fd1498Szrj edge->caller->dump_name (),
104838fd1498Szrj edge->callee->dump_name ());
104938fd1498Szrj fprintf (dump_file, " size growth %i, time %f unspec %f ",
105038fd1498Szrj growth,
105138fd1498Szrj edge_time.to_double (),
105238fd1498Szrj unspec_edge_time.to_double ());
105338fd1498Szrj ipa_dump_hints (dump_file, hints);
105438fd1498Szrj if (big_speedup_p (edge))
105538fd1498Szrj fprintf (dump_file, " big_speedup");
105638fd1498Szrj fprintf (dump_file, "\n");
105738fd1498Szrj }
105838fd1498Szrj
105938fd1498Szrj /* Always prefer inlining saving code size. */
106038fd1498Szrj if (growth <= 0)
106138fd1498Szrj {
106238fd1498Szrj badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
106338fd1498Szrj if (dump)
106438fd1498Szrj fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
106538fd1498Szrj growth);
106638fd1498Szrj }
106738fd1498Szrj /* Inlining into EXTERNAL functions is not going to change anything unless
106838fd1498Szrj they are themselves inlined. */
106938fd1498Szrj else if (DECL_EXTERNAL (caller->decl))
107038fd1498Szrj {
107138fd1498Szrj if (dump)
107238fd1498Szrj fprintf (dump_file, " max: function is external\n");
107338fd1498Szrj return sreal::max ();
107438fd1498Szrj }
107538fd1498Szrj /* When profile is available. Compute badness as:
107638fd1498Szrj
107738fd1498Szrj time_saved * caller_count
107838fd1498Szrj goodness = -------------------------------------------------
107938fd1498Szrj growth_of_caller * overall_growth * combined_size
108038fd1498Szrj
108138fd1498Szrj badness = - goodness
108238fd1498Szrj
108338fd1498Szrj Again use negative value to make calls with profile appear hotter
108438fd1498Szrj then calls without.
108538fd1498Szrj */
108638fd1498Szrj else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
108738fd1498Szrj || caller->count.ipa ().nonzero_p ())
108838fd1498Szrj {
108938fd1498Szrj sreal numerator, denominator;
109038fd1498Szrj int overall_growth;
109138fd1498Szrj sreal inlined_time = compute_inlined_call_time (edge, edge_time);
109238fd1498Szrj
109338fd1498Szrj numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
109438fd1498Szrj - inlined_time);
109538fd1498Szrj if (numerator <= 0)
109638fd1498Szrj numerator = ((sreal) 1 >> 8);
109738fd1498Szrj if (caller->count.ipa ().nonzero_p ())
109838fd1498Szrj numerator *= caller->count.ipa ().to_gcov_type ();
109938fd1498Szrj else if (caller->count.ipa ().initialized_p ())
110038fd1498Szrj numerator = numerator >> 11;
110138fd1498Szrj denominator = growth;
110238fd1498Szrj
110338fd1498Szrj overall_growth = callee_info->growth;
110438fd1498Szrj
110538fd1498Szrj /* Look for inliner wrappers of the form:
110638fd1498Szrj
110738fd1498Szrj inline_caller ()
110838fd1498Szrj {
110938fd1498Szrj do_fast_job...
111038fd1498Szrj if (need_more_work)
111138fd1498Szrj noninline_callee ();
111238fd1498Szrj }
111338fd1498Szrj Withhout panilizing this case, we usually inline noninline_callee
111438fd1498Szrj into the inline_caller because overall_growth is small preventing
111538fd1498Szrj further inlining of inline_caller.
111638fd1498Szrj
111738fd1498Szrj Penalize only callgraph edges to functions with small overall
111838fd1498Szrj growth ...
111938fd1498Szrj */
112038fd1498Szrj if (growth > overall_growth
112138fd1498Szrj /* ... and having only one caller which is not inlined ... */
112238fd1498Szrj && callee_info->single_caller
112338fd1498Szrj && !edge->caller->global.inlined_to
112438fd1498Szrj /* ... and edges executed only conditionally ... */
112538fd1498Szrj && edge->sreal_frequency () < 1
112638fd1498Szrj /* ... consider case where callee is not inline but caller is ... */
112738fd1498Szrj && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
112838fd1498Szrj && DECL_DECLARED_INLINE_P (caller->decl))
112938fd1498Szrj /* ... or when early optimizers decided to split and edge
113038fd1498Szrj frequency still indicates splitting is a win ... */
113138fd1498Szrj || (callee->split_part && !caller->split_part
113238fd1498Szrj && edge->sreal_frequency () * 100
113338fd1498Szrj < PARAM_VALUE
113438fd1498Szrj (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
113538fd1498Szrj /* ... and do not overwrite user specified hints. */
113638fd1498Szrj && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
113738fd1498Szrj || DECL_DECLARED_INLINE_P (caller->decl)))))
113838fd1498Szrj {
113938fd1498Szrj struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
114038fd1498Szrj int caller_growth = caller_info->growth;
114138fd1498Szrj
114238fd1498Szrj /* Only apply the penalty when caller looks like inline candidate,
114338fd1498Szrj and it is not called once and. */
114438fd1498Szrj if (!caller_info->single_caller && overall_growth < caller_growth
114538fd1498Szrj && caller_info->inlinable
114638fd1498Szrj && caller_info->size
114738fd1498Szrj < (DECL_DECLARED_INLINE_P (caller->decl)
114838fd1498Szrj ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
114938fd1498Szrj {
115038fd1498Szrj if (dump)
115138fd1498Szrj fprintf (dump_file,
115238fd1498Szrj " Wrapper penalty. Increasing growth %i to %i\n",
115338fd1498Szrj overall_growth, caller_growth);
115438fd1498Szrj overall_growth = caller_growth;
115538fd1498Szrj }
115638fd1498Szrj }
115738fd1498Szrj if (overall_growth > 0)
115838fd1498Szrj {
115938fd1498Szrj /* Strongly preffer functions with few callers that can be inlined
116038fd1498Szrj fully. The square root here leads to smaller binaries at average.
116138fd1498Szrj Watch however for extreme cases and return to linear function
116238fd1498Szrj when growth is large. */
116338fd1498Szrj if (overall_growth < 256)
116438fd1498Szrj overall_growth *= overall_growth;
116538fd1498Szrj else
116638fd1498Szrj overall_growth += 256 * 256 - 256;
116738fd1498Szrj denominator *= overall_growth;
116838fd1498Szrj }
1169*58e805e6Szrj denominator *= ipa_fn_summaries->get (caller)->self_size + growth;
117038fd1498Szrj
117138fd1498Szrj badness = - numerator / denominator;
117238fd1498Szrj
117338fd1498Szrj if (dump)
117438fd1498Szrj {
117538fd1498Szrj fprintf (dump_file,
117638fd1498Szrj " %f: guessed profile. frequency %f, count %" PRId64
117738fd1498Szrj " caller count %" PRId64
117838fd1498Szrj " time w/o inlining %f, time with inlining %f"
117938fd1498Szrj " overall growth %i (current) %i (original)"
118038fd1498Szrj " %i (compensated)\n",
118138fd1498Szrj badness.to_double (),
118238fd1498Szrj edge->sreal_frequency ().to_double (),
118338fd1498Szrj edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
118438fd1498Szrj caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
118538fd1498Szrj compute_uninlined_call_time (edge,
118638fd1498Szrj unspec_edge_time).to_double (),
118738fd1498Szrj inlined_time.to_double (),
118838fd1498Szrj estimate_growth (callee),
118938fd1498Szrj callee_info->growth, overall_growth);
119038fd1498Szrj }
119138fd1498Szrj }
119238fd1498Szrj /* When function local profile is not available or it does not give
119338fd1498Szrj useful information (ie frequency is zero), base the cost on
119438fd1498Szrj loop nest and overall size growth, so we optimize for overall number
119538fd1498Szrj of functions fully inlined in program. */
119638fd1498Szrj else
119738fd1498Szrj {
119838fd1498Szrj int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
119938fd1498Szrj badness = growth;
120038fd1498Szrj
120138fd1498Szrj /* Decrease badness if call is nested. */
120238fd1498Szrj if (badness > 0)
120338fd1498Szrj badness = badness >> nest;
120438fd1498Szrj else
120538fd1498Szrj badness = badness << nest;
120638fd1498Szrj if (dump)
120738fd1498Szrj fprintf (dump_file, " %f: no profile. nest %i\n",
120838fd1498Szrj badness.to_double (), nest);
120938fd1498Szrj }
121038fd1498Szrj gcc_checking_assert (badness != 0);
121138fd1498Szrj
121238fd1498Szrj if (edge->recursive_p ())
121338fd1498Szrj badness = badness.shift (badness > 0 ? 4 : -4);
121438fd1498Szrj if ((hints & (INLINE_HINT_indirect_call
121538fd1498Szrj | INLINE_HINT_loop_iterations
121638fd1498Szrj | INLINE_HINT_array_index
121738fd1498Szrj | INLINE_HINT_loop_stride))
121838fd1498Szrj || callee_info->growth <= 0)
121938fd1498Szrj badness = badness.shift (badness > 0 ? -2 : 2);
122038fd1498Szrj if (hints & (INLINE_HINT_same_scc))
122138fd1498Szrj badness = badness.shift (badness > 0 ? 3 : -3);
122238fd1498Szrj else if (hints & (INLINE_HINT_in_scc))
122338fd1498Szrj badness = badness.shift (badness > 0 ? 2 : -2);
122438fd1498Szrj else if (hints & (INLINE_HINT_cross_module))
122538fd1498Szrj badness = badness.shift (badness > 0 ? 1 : -1);
122638fd1498Szrj if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
122738fd1498Szrj badness = badness.shift (badness > 0 ? -4 : 4);
122838fd1498Szrj else if ((hints & INLINE_HINT_declared_inline))
122938fd1498Szrj badness = badness.shift (badness > 0 ? -3 : 3);
123038fd1498Szrj if (dump)
123138fd1498Szrj fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
123238fd1498Szrj return badness;
123338fd1498Szrj }
123438fd1498Szrj
123538fd1498Szrj /* Recompute badness of EDGE and update its key in HEAP if needed. */
123638fd1498Szrj static inline void
update_edge_key(edge_heap_t * heap,struct cgraph_edge * edge)123738fd1498Szrj update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
123838fd1498Szrj {
123938fd1498Szrj sreal badness = edge_badness (edge, false);
124038fd1498Szrj if (edge->aux)
124138fd1498Szrj {
124238fd1498Szrj edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
124338fd1498Szrj gcc_checking_assert (n->get_data () == edge);
124438fd1498Szrj
124538fd1498Szrj /* fibonacci_heap::replace_key does busy updating of the
124638fd1498Szrj heap that is unnecesarily expensive.
124738fd1498Szrj We do lazy increases: after extracting minimum if the key
124838fd1498Szrj turns out to be out of date, it is re-inserted into heap
124938fd1498Szrj with correct value. */
125038fd1498Szrj if (badness < n->get_key ())
125138fd1498Szrj {
125238fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
125338fd1498Szrj {
125438fd1498Szrj fprintf (dump_file,
125538fd1498Szrj " decreasing badness %s -> %s, %f to %f\n",
125638fd1498Szrj edge->caller->dump_name (),
125738fd1498Szrj edge->callee->dump_name (),
125838fd1498Szrj n->get_key ().to_double (),
125938fd1498Szrj badness.to_double ());
126038fd1498Szrj }
126138fd1498Szrj heap->decrease_key (n, badness);
126238fd1498Szrj }
126338fd1498Szrj }
126438fd1498Szrj else
126538fd1498Szrj {
126638fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
126738fd1498Szrj {
126838fd1498Szrj fprintf (dump_file,
126938fd1498Szrj " enqueuing call %s -> %s, badness %f\n",
127038fd1498Szrj edge->caller->dump_name (),
127138fd1498Szrj edge->callee->dump_name (),
127238fd1498Szrj badness.to_double ());
127338fd1498Szrj }
127438fd1498Szrj edge->aux = heap->insert (badness, edge);
127538fd1498Szrj }
127638fd1498Szrj }
127738fd1498Szrj
127838fd1498Szrj
127938fd1498Szrj /* NODE was inlined.
128038fd1498Szrj All caller edges needs to be resetted because
128138fd1498Szrj size estimates change. Similarly callees needs reset
128238fd1498Szrj because better context may be known. */
128338fd1498Szrj
128438fd1498Szrj static void
reset_edge_caches(struct cgraph_node * node)128538fd1498Szrj reset_edge_caches (struct cgraph_node *node)
128638fd1498Szrj {
128738fd1498Szrj struct cgraph_edge *edge;
128838fd1498Szrj struct cgraph_edge *e = node->callees;
128938fd1498Szrj struct cgraph_node *where = node;
129038fd1498Szrj struct ipa_ref *ref;
129138fd1498Szrj
129238fd1498Szrj if (where->global.inlined_to)
129338fd1498Szrj where = where->global.inlined_to;
129438fd1498Szrj
129538fd1498Szrj for (edge = where->callers; edge; edge = edge->next_caller)
129638fd1498Szrj if (edge->inline_failed)
129738fd1498Szrj reset_edge_growth_cache (edge);
129838fd1498Szrj
129938fd1498Szrj FOR_EACH_ALIAS (where, ref)
130038fd1498Szrj reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
130138fd1498Szrj
130238fd1498Szrj if (!e)
130338fd1498Szrj return;
130438fd1498Szrj
130538fd1498Szrj while (true)
130638fd1498Szrj if (!e->inline_failed && e->callee->callees)
130738fd1498Szrj e = e->callee->callees;
130838fd1498Szrj else
130938fd1498Szrj {
131038fd1498Szrj if (e->inline_failed)
131138fd1498Szrj reset_edge_growth_cache (e);
131238fd1498Szrj if (e->next_callee)
131338fd1498Szrj e = e->next_callee;
131438fd1498Szrj else
131538fd1498Szrj {
131638fd1498Szrj do
131738fd1498Szrj {
131838fd1498Szrj if (e->caller == node)
131938fd1498Szrj return;
132038fd1498Szrj e = e->caller->callers;
132138fd1498Szrj }
132238fd1498Szrj while (!e->next_callee);
132338fd1498Szrj e = e->next_callee;
132438fd1498Szrj }
132538fd1498Szrj }
132638fd1498Szrj }
132738fd1498Szrj
132838fd1498Szrj /* Recompute HEAP nodes for each of caller of NODE.
132938fd1498Szrj UPDATED_NODES track nodes we already visited, to avoid redundant work.
133038fd1498Szrj When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
133138fd1498Szrj it is inlinable. Otherwise check all edges. */
133238fd1498Szrj
133338fd1498Szrj static void
update_caller_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes,struct cgraph_edge * check_inlinablity_for)133438fd1498Szrj update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
133538fd1498Szrj bitmap updated_nodes,
133638fd1498Szrj struct cgraph_edge *check_inlinablity_for)
133738fd1498Szrj {
133838fd1498Szrj struct cgraph_edge *edge;
133938fd1498Szrj struct ipa_ref *ref;
134038fd1498Szrj
134138fd1498Szrj if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
134238fd1498Szrj || node->global.inlined_to)
134338fd1498Szrj return;
134438fd1498Szrj if (!bitmap_set_bit (updated_nodes, node->uid))
134538fd1498Szrj return;
134638fd1498Szrj
134738fd1498Szrj FOR_EACH_ALIAS (node, ref)
134838fd1498Szrj {
134938fd1498Szrj struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
135038fd1498Szrj update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
135138fd1498Szrj }
135238fd1498Szrj
135338fd1498Szrj for (edge = node->callers; edge; edge = edge->next_caller)
135438fd1498Szrj if (edge->inline_failed)
135538fd1498Szrj {
135638fd1498Szrj if (!check_inlinablity_for
135738fd1498Szrj || check_inlinablity_for == edge)
135838fd1498Szrj {
135938fd1498Szrj if (can_inline_edge_p (edge, false)
136038fd1498Szrj && want_inline_small_function_p (edge, false)
136138fd1498Szrj && can_inline_edge_by_limits_p (edge, false))
136238fd1498Szrj update_edge_key (heap, edge);
136338fd1498Szrj else if (edge->aux)
136438fd1498Szrj {
136538fd1498Szrj report_inline_failed_reason (edge);
136638fd1498Szrj heap->delete_node ((edge_heap_node_t *) edge->aux);
136738fd1498Szrj edge->aux = NULL;
136838fd1498Szrj }
136938fd1498Szrj }
137038fd1498Szrj else if (edge->aux)
137138fd1498Szrj update_edge_key (heap, edge);
137238fd1498Szrj }
137338fd1498Szrj }
137438fd1498Szrj
137538fd1498Szrj /* Recompute HEAP nodes for each uninlined call in NODE.
137638fd1498Szrj This is used when we know that edge badnesses are going only to increase
137738fd1498Szrj (we introduced new call site) and thus all we need is to insert newly
137838fd1498Szrj created edges into heap. */
137938fd1498Szrj
138038fd1498Szrj static void
update_callee_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes)138138fd1498Szrj update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
138238fd1498Szrj bitmap updated_nodes)
138338fd1498Szrj {
138438fd1498Szrj struct cgraph_edge *e = node->callees;
138538fd1498Szrj
138638fd1498Szrj if (!e)
138738fd1498Szrj return;
138838fd1498Szrj while (true)
138938fd1498Szrj if (!e->inline_failed && e->callee->callees)
139038fd1498Szrj e = e->callee->callees;
139138fd1498Szrj else
139238fd1498Szrj {
139338fd1498Szrj enum availability avail;
139438fd1498Szrj struct cgraph_node *callee;
139538fd1498Szrj /* We do not reset callee growth cache here. Since we added a new call,
139638fd1498Szrj growth chould have just increased and consequentely badness metric
139738fd1498Szrj don't need updating. */
139838fd1498Szrj if (e->inline_failed
139938fd1498Szrj && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
140038fd1498Szrj && ipa_fn_summaries->get (callee)->inlinable
140138fd1498Szrj && avail >= AVAIL_AVAILABLE
140238fd1498Szrj && !bitmap_bit_p (updated_nodes, callee->uid))
140338fd1498Szrj {
140438fd1498Szrj if (can_inline_edge_p (e, false)
140538fd1498Szrj && want_inline_small_function_p (e, false)
140638fd1498Szrj && can_inline_edge_by_limits_p (e, false))
140738fd1498Szrj update_edge_key (heap, e);
140838fd1498Szrj else if (e->aux)
140938fd1498Szrj {
141038fd1498Szrj report_inline_failed_reason (e);
141138fd1498Szrj heap->delete_node ((edge_heap_node_t *) e->aux);
141238fd1498Szrj e->aux = NULL;
141338fd1498Szrj }
141438fd1498Szrj }
141538fd1498Szrj if (e->next_callee)
141638fd1498Szrj e = e->next_callee;
141738fd1498Szrj else
141838fd1498Szrj {
141938fd1498Szrj do
142038fd1498Szrj {
142138fd1498Szrj if (e->caller == node)
142238fd1498Szrj return;
142338fd1498Szrj e = e->caller->callers;
142438fd1498Szrj }
142538fd1498Szrj while (!e->next_callee);
142638fd1498Szrj e = e->next_callee;
142738fd1498Szrj }
142838fd1498Szrj }
142938fd1498Szrj }
143038fd1498Szrj
143138fd1498Szrj /* Enqueue all recursive calls from NODE into priority queue depending on
143238fd1498Szrj how likely we want to recursively inline the call. */
143338fd1498Szrj
143438fd1498Szrj static void
lookup_recursive_calls(struct cgraph_node * node,struct cgraph_node * where,edge_heap_t * heap)143538fd1498Szrj lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
143638fd1498Szrj edge_heap_t *heap)
143738fd1498Szrj {
143838fd1498Szrj struct cgraph_edge *e;
143938fd1498Szrj enum availability avail;
144038fd1498Szrj
144138fd1498Szrj for (e = where->callees; e; e = e->next_callee)
144238fd1498Szrj if (e->callee == node
144338fd1498Szrj || (e->callee->ultimate_alias_target (&avail, e->caller) == node
144438fd1498Szrj && avail > AVAIL_INTERPOSABLE))
144538fd1498Szrj heap->insert (-e->sreal_frequency (), e);
144638fd1498Szrj for (e = where->callees; e; e = e->next_callee)
144738fd1498Szrj if (!e->inline_failed)
144838fd1498Szrj lookup_recursive_calls (node, e->callee, heap);
144938fd1498Szrj }
145038fd1498Szrj
145138fd1498Szrj /* Decide on recursive inlining: in the case function has recursive calls,
145238fd1498Szrj inline until body size reaches given argument. If any new indirect edges
145338fd1498Szrj are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
145438fd1498Szrj is NULL. */
145538fd1498Szrj
145638fd1498Szrj static bool
recursive_inlining(struct cgraph_edge * edge,vec<cgraph_edge * > * new_edges)145738fd1498Szrj recursive_inlining (struct cgraph_edge *edge,
145838fd1498Szrj vec<cgraph_edge *> *new_edges)
145938fd1498Szrj {
146038fd1498Szrj int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
146138fd1498Szrj edge_heap_t heap (sreal::min ());
146238fd1498Szrj struct cgraph_node *node;
146338fd1498Szrj struct cgraph_edge *e;
146438fd1498Szrj struct cgraph_node *master_clone = NULL, *next;
146538fd1498Szrj int depth = 0;
146638fd1498Szrj int n = 0;
146738fd1498Szrj
146838fd1498Szrj node = edge->caller;
146938fd1498Szrj if (node->global.inlined_to)
147038fd1498Szrj node = node->global.inlined_to;
147138fd1498Szrj
147238fd1498Szrj if (DECL_DECLARED_INLINE_P (node->decl))
147338fd1498Szrj limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
147438fd1498Szrj
147538fd1498Szrj /* Make sure that function is small enough to be considered for inlining. */
147638fd1498Szrj if (estimate_size_after_inlining (node, edge) >= limit)
147738fd1498Szrj return false;
147838fd1498Szrj lookup_recursive_calls (node, node, &heap);
147938fd1498Szrj if (heap.empty ())
148038fd1498Szrj return false;
148138fd1498Szrj
148238fd1498Szrj if (dump_file)
148338fd1498Szrj fprintf (dump_file,
148438fd1498Szrj " Performing recursive inlining on %s\n",
148538fd1498Szrj node->name ());
148638fd1498Szrj
148738fd1498Szrj /* Do the inlining and update list of recursive call during process. */
148838fd1498Szrj while (!heap.empty ())
148938fd1498Szrj {
149038fd1498Szrj struct cgraph_edge *curr = heap.extract_min ();
149138fd1498Szrj struct cgraph_node *cnode, *dest = curr->callee;
149238fd1498Szrj
149338fd1498Szrj if (!can_inline_edge_p (curr, true)
149438fd1498Szrj || can_inline_edge_by_limits_p (curr, true))
149538fd1498Szrj continue;
149638fd1498Szrj
149738fd1498Szrj /* MASTER_CLONE is produced in the case we already started modified
149838fd1498Szrj the function. Be sure to redirect edge to the original body before
149938fd1498Szrj estimating growths otherwise we will be seeing growths after inlining
150038fd1498Szrj the already modified body. */
150138fd1498Szrj if (master_clone)
150238fd1498Szrj {
150338fd1498Szrj curr->redirect_callee (master_clone);
150438fd1498Szrj reset_edge_growth_cache (curr);
150538fd1498Szrj }
150638fd1498Szrj
150738fd1498Szrj if (estimate_size_after_inlining (node, curr) > limit)
150838fd1498Szrj {
150938fd1498Szrj curr->redirect_callee (dest);
151038fd1498Szrj reset_edge_growth_cache (curr);
151138fd1498Szrj break;
151238fd1498Szrj }
151338fd1498Szrj
151438fd1498Szrj depth = 1;
151538fd1498Szrj for (cnode = curr->caller;
151638fd1498Szrj cnode->global.inlined_to; cnode = cnode->callers->caller)
151738fd1498Szrj if (node->decl
151838fd1498Szrj == curr->callee->ultimate_alias_target ()->decl)
151938fd1498Szrj depth++;
152038fd1498Szrj
152138fd1498Szrj if (!want_inline_self_recursive_call_p (curr, node, false, depth))
152238fd1498Szrj {
152338fd1498Szrj curr->redirect_callee (dest);
152438fd1498Szrj reset_edge_growth_cache (curr);
152538fd1498Szrj continue;
152638fd1498Szrj }
152738fd1498Szrj
152838fd1498Szrj if (dump_file)
152938fd1498Szrj {
153038fd1498Szrj fprintf (dump_file,
153138fd1498Szrj " Inlining call of depth %i", depth);
153238fd1498Szrj if (node->count.nonzero_p ())
153338fd1498Szrj {
153438fd1498Szrj fprintf (dump_file, " called approx. %.2f times per call",
153538fd1498Szrj (double)curr->count.to_gcov_type ()
153638fd1498Szrj / node->count.to_gcov_type ());
153738fd1498Szrj }
153838fd1498Szrj fprintf (dump_file, "\n");
153938fd1498Szrj }
154038fd1498Szrj if (!master_clone)
154138fd1498Szrj {
154238fd1498Szrj /* We need original clone to copy around. */
154338fd1498Szrj master_clone = node->create_clone (node->decl, node->count,
154438fd1498Szrj false, vNULL, true, NULL, NULL);
154538fd1498Szrj for (e = master_clone->callees; e; e = e->next_callee)
154638fd1498Szrj if (!e->inline_failed)
154738fd1498Szrj clone_inlined_nodes (e, true, false, NULL);
154838fd1498Szrj curr->redirect_callee (master_clone);
154938fd1498Szrj reset_edge_growth_cache (curr);
155038fd1498Szrj }
155138fd1498Szrj
155238fd1498Szrj inline_call (curr, false, new_edges, &overall_size, true);
155338fd1498Szrj lookup_recursive_calls (node, curr->callee, &heap);
155438fd1498Szrj n++;
155538fd1498Szrj }
155638fd1498Szrj
155738fd1498Szrj if (!heap.empty () && dump_file)
155838fd1498Szrj fprintf (dump_file, " Recursive inlining growth limit met.\n");
155938fd1498Szrj
156038fd1498Szrj if (!master_clone)
156138fd1498Szrj return false;
156238fd1498Szrj
156338fd1498Szrj if (dump_file)
156438fd1498Szrj fprintf (dump_file,
156538fd1498Szrj "\n Inlined %i times, "
156638fd1498Szrj "body grown from size %i to %i, time %f to %f\n", n,
156738fd1498Szrj ipa_fn_summaries->get (master_clone)->size,
156838fd1498Szrj ipa_fn_summaries->get (node)->size,
156938fd1498Szrj ipa_fn_summaries->get (master_clone)->time.to_double (),
157038fd1498Szrj ipa_fn_summaries->get (node)->time.to_double ());
157138fd1498Szrj
157238fd1498Szrj /* Remove master clone we used for inlining. We rely that clones inlined
157338fd1498Szrj into master clone gets queued just before master clone so we don't
157438fd1498Szrj need recursion. */
157538fd1498Szrj for (node = symtab->first_function (); node != master_clone;
157638fd1498Szrj node = next)
157738fd1498Szrj {
157838fd1498Szrj next = symtab->next_function (node);
157938fd1498Szrj if (node->global.inlined_to == master_clone)
158038fd1498Szrj node->remove ();
158138fd1498Szrj }
158238fd1498Szrj master_clone->remove ();
158338fd1498Szrj return true;
158438fd1498Szrj }
158538fd1498Szrj
158638fd1498Szrj
158738fd1498Szrj /* Given whole compilation unit estimate of INSNS, compute how large we can
158838fd1498Szrj allow the unit to grow. */
158938fd1498Szrj
159038fd1498Szrj static int
compute_max_insns(int insns)159138fd1498Szrj compute_max_insns (int insns)
159238fd1498Szrj {
159338fd1498Szrj int max_insns = insns;
159438fd1498Szrj if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
159538fd1498Szrj max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
159638fd1498Szrj
159738fd1498Szrj return ((int64_t) max_insns
159838fd1498Szrj * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
159938fd1498Szrj }
160038fd1498Szrj
160138fd1498Szrj
160238fd1498Szrj /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
160338fd1498Szrj
160438fd1498Szrj static void
add_new_edges_to_heap(edge_heap_t * heap,vec<cgraph_edge * > new_edges)160538fd1498Szrj add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
160638fd1498Szrj {
160738fd1498Szrj while (new_edges.length () > 0)
160838fd1498Szrj {
160938fd1498Szrj struct cgraph_edge *edge = new_edges.pop ();
161038fd1498Szrj
161138fd1498Szrj gcc_assert (!edge->aux);
161238fd1498Szrj if (edge->inline_failed
161338fd1498Szrj && can_inline_edge_p (edge, true)
161438fd1498Szrj && want_inline_small_function_p (edge, true)
161538fd1498Szrj && can_inline_edge_by_limits_p (edge, true))
161638fd1498Szrj edge->aux = heap->insert (edge_badness (edge, false), edge);
161738fd1498Szrj }
161838fd1498Szrj }
161938fd1498Szrj
162038fd1498Szrj /* Remove EDGE from the fibheap. */
162138fd1498Szrj
162238fd1498Szrj static void
heap_edge_removal_hook(struct cgraph_edge * e,void * data)162338fd1498Szrj heap_edge_removal_hook (struct cgraph_edge *e, void *data)
162438fd1498Szrj {
162538fd1498Szrj if (e->aux)
162638fd1498Szrj {
162738fd1498Szrj ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
162838fd1498Szrj e->aux = NULL;
162938fd1498Szrj }
163038fd1498Szrj }
163138fd1498Szrj
163238fd1498Szrj /* Return true if speculation of edge E seems useful.
163338fd1498Szrj If ANTICIPATE_INLINING is true, be conservative and hope that E
163438fd1498Szrj may get inlined. */
163538fd1498Szrj
163638fd1498Szrj bool
speculation_useful_p(struct cgraph_edge * e,bool anticipate_inlining)163738fd1498Szrj speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
163838fd1498Szrj {
163938fd1498Szrj enum availability avail;
164038fd1498Szrj struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
164138fd1498Szrj e->caller);
164238fd1498Szrj struct cgraph_edge *direct, *indirect;
164338fd1498Szrj struct ipa_ref *ref;
164438fd1498Szrj
164538fd1498Szrj gcc_assert (e->speculative && !e->indirect_unknown_callee);
164638fd1498Szrj
164738fd1498Szrj if (!e->maybe_hot_p ())
164838fd1498Szrj return false;
164938fd1498Szrj
165038fd1498Szrj /* See if IP optimizations found something potentially useful about the
165138fd1498Szrj function. For now we look only for CONST/PURE flags. Almost everything
165238fd1498Szrj else we propagate is useless. */
165338fd1498Szrj if (avail >= AVAIL_AVAILABLE)
165438fd1498Szrj {
165538fd1498Szrj int ecf_flags = flags_from_decl_or_type (target->decl);
165638fd1498Szrj if (ecf_flags & ECF_CONST)
165738fd1498Szrj {
165838fd1498Szrj e->speculative_call_info (direct, indirect, ref);
165938fd1498Szrj if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
166038fd1498Szrj return true;
166138fd1498Szrj }
166238fd1498Szrj else if (ecf_flags & ECF_PURE)
166338fd1498Szrj {
166438fd1498Szrj e->speculative_call_info (direct, indirect, ref);
166538fd1498Szrj if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
166638fd1498Szrj return true;
166738fd1498Szrj }
166838fd1498Szrj }
166938fd1498Szrj /* If we did not managed to inline the function nor redirect
167038fd1498Szrj to an ipa-cp clone (that are seen by having local flag set),
167138fd1498Szrj it is probably pointless to inline it unless hardware is missing
167238fd1498Szrj indirect call predictor. */
167338fd1498Szrj if (!anticipate_inlining && e->inline_failed && !target->local.local)
167438fd1498Szrj return false;
167538fd1498Szrj /* For overwritable targets there is not much to do. */
167638fd1498Szrj if (e->inline_failed
167738fd1498Szrj && (!can_inline_edge_p (e, false)
167838fd1498Szrj || !can_inline_edge_by_limits_p (e, false, true)))
167938fd1498Szrj return false;
168038fd1498Szrj /* OK, speculation seems interesting. */
168138fd1498Szrj return true;
168238fd1498Szrj }
168338fd1498Szrj
168438fd1498Szrj /* We know that EDGE is not going to be inlined.
168538fd1498Szrj See if we can remove speculation. */
168638fd1498Szrj
168738fd1498Szrj static void
resolve_noninline_speculation(edge_heap_t * edge_heap,struct cgraph_edge * edge)168838fd1498Szrj resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
168938fd1498Szrj {
169038fd1498Szrj if (edge->speculative && !speculation_useful_p (edge, false))
169138fd1498Szrj {
169238fd1498Szrj struct cgraph_node *node = edge->caller;
169338fd1498Szrj struct cgraph_node *where = node->global.inlined_to
169438fd1498Szrj ? node->global.inlined_to : node;
169538fd1498Szrj auto_bitmap updated_nodes;
169638fd1498Szrj
169738fd1498Szrj if (edge->count.ipa ().initialized_p ())
169838fd1498Szrj spec_rem += edge->count.ipa ();
169938fd1498Szrj edge->resolve_speculation ();
170038fd1498Szrj reset_edge_caches (where);
170138fd1498Szrj ipa_update_overall_fn_summary (where);
170238fd1498Szrj update_caller_keys (edge_heap, where,
170338fd1498Szrj updated_nodes, NULL);
170438fd1498Szrj update_callee_keys (edge_heap, where,
170538fd1498Szrj updated_nodes);
170638fd1498Szrj }
170738fd1498Szrj }
170838fd1498Szrj
170938fd1498Szrj /* Return true if NODE should be accounted for overall size estimate.
171038fd1498Szrj Skip all nodes optimized for size so we can measure the growth of hot
171138fd1498Szrj part of program no matter of the padding. */
171238fd1498Szrj
171338fd1498Szrj bool
inline_account_function_p(struct cgraph_node * node)171438fd1498Szrj inline_account_function_p (struct cgraph_node *node)
171538fd1498Szrj {
171638fd1498Szrj return (!DECL_EXTERNAL (node->decl)
171738fd1498Szrj && !opt_for_fn (node->decl, optimize_size)
171838fd1498Szrj && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
171938fd1498Szrj }
172038fd1498Szrj
172138fd1498Szrj /* Count number of callers of NODE and store it into DATA (that
172238fd1498Szrj points to int. Worker for cgraph_for_node_and_aliases. */
172338fd1498Szrj
172438fd1498Szrj static bool
sum_callers(struct cgraph_node * node,void * data)172538fd1498Szrj sum_callers (struct cgraph_node *node, void *data)
172638fd1498Szrj {
172738fd1498Szrj struct cgraph_edge *e;
172838fd1498Szrj int *num_calls = (int *)data;
172938fd1498Szrj
173038fd1498Szrj for (e = node->callers; e; e = e->next_caller)
173138fd1498Szrj (*num_calls)++;
173238fd1498Szrj return false;
173338fd1498Szrj }
173438fd1498Szrj
173538fd1498Szrj /* We use greedy algorithm for inlining of small functions:
173638fd1498Szrj All inline candidates are put into prioritized heap ordered in
173738fd1498Szrj increasing badness.
173838fd1498Szrj
173938fd1498Szrj The inlining of small functions is bounded by unit growth parameters. */
174038fd1498Szrj
174138fd1498Szrj static void
inline_small_functions(void)174238fd1498Szrj inline_small_functions (void)
174338fd1498Szrj {
174438fd1498Szrj struct cgraph_node *node;
174538fd1498Szrj struct cgraph_edge *edge;
174638fd1498Szrj edge_heap_t edge_heap (sreal::min ());
174738fd1498Szrj auto_bitmap updated_nodes;
174838fd1498Szrj int min_size, max_size;
174938fd1498Szrj auto_vec<cgraph_edge *> new_indirect_edges;
175038fd1498Szrj int initial_size = 0;
175138fd1498Szrj struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
175238fd1498Szrj struct cgraph_edge_hook_list *edge_removal_hook_holder;
175338fd1498Szrj new_indirect_edges.create (8);
175438fd1498Szrj
175538fd1498Szrj edge_removal_hook_holder
175638fd1498Szrj = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
175738fd1498Szrj
175838fd1498Szrj /* Compute overall unit size and other global parameters used by badness
175938fd1498Szrj metrics. */
176038fd1498Szrj
176138fd1498Szrj max_count = profile_count::uninitialized ();
1762*58e805e6Szrj ipa_reduced_postorder (order, true, NULL);
176338fd1498Szrj free (order);
176438fd1498Szrj
176538fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
176638fd1498Szrj if (!node->global.inlined_to)
176738fd1498Szrj {
176838fd1498Szrj if (!node->alias && node->analyzed
176938fd1498Szrj && (node->has_gimple_body_p () || node->thunk.thunk_p)
177038fd1498Szrj && opt_for_fn (node->decl, optimize))
177138fd1498Szrj {
177238fd1498Szrj struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
177338fd1498Szrj struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
177438fd1498Szrj
177538fd1498Szrj /* Do not account external functions, they will be optimized out
177638fd1498Szrj if not inlined. Also only count the non-cold portion of program. */
177738fd1498Szrj if (inline_account_function_p (node))
177838fd1498Szrj initial_size += info->size;
177938fd1498Szrj info->growth = estimate_growth (node);
178038fd1498Szrj
178138fd1498Szrj int num_calls = 0;
178238fd1498Szrj node->call_for_symbol_and_aliases (sum_callers, &num_calls,
178338fd1498Szrj true);
178438fd1498Szrj if (num_calls == 1)
178538fd1498Szrj info->single_caller = true;
178638fd1498Szrj if (dfs && dfs->next_cycle)
178738fd1498Szrj {
178838fd1498Szrj struct cgraph_node *n2;
178938fd1498Szrj int id = dfs->scc_no + 1;
179038fd1498Szrj for (n2 = node; n2;
179138fd1498Szrj n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
179238fd1498Szrj if (opt_for_fn (n2->decl, optimize))
179338fd1498Szrj {
179438fd1498Szrj struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
179538fd1498Szrj if (info2->scc_no)
179638fd1498Szrj break;
179738fd1498Szrj info2->scc_no = id;
179838fd1498Szrj }
179938fd1498Szrj }
180038fd1498Szrj }
180138fd1498Szrj
180238fd1498Szrj for (edge = node->callers; edge; edge = edge->next_caller)
180338fd1498Szrj max_count = max_count.max (edge->count.ipa ());
180438fd1498Szrj }
180538fd1498Szrj ipa_free_postorder_info ();
180638fd1498Szrj initialize_growth_caches ();
180738fd1498Szrj
180838fd1498Szrj if (dump_file)
180938fd1498Szrj fprintf (dump_file,
181038fd1498Szrj "\nDeciding on inlining of small functions. Starting with size %i.\n",
181138fd1498Szrj initial_size);
181238fd1498Szrj
181338fd1498Szrj overall_size = initial_size;
181438fd1498Szrj max_size = compute_max_insns (overall_size);
181538fd1498Szrj min_size = overall_size;
181638fd1498Szrj
181738fd1498Szrj /* Populate the heap with all edges we might inline. */
181838fd1498Szrj
181938fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
182038fd1498Szrj {
182138fd1498Szrj bool update = false;
182238fd1498Szrj struct cgraph_edge *next = NULL;
182338fd1498Szrj bool has_speculative = false;
182438fd1498Szrj
182538fd1498Szrj if (!opt_for_fn (node->decl, optimize))
182638fd1498Szrj continue;
182738fd1498Szrj
182838fd1498Szrj if (dump_file)
182938fd1498Szrj fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
183038fd1498Szrj
183138fd1498Szrj for (edge = node->callees; edge; edge = next)
183238fd1498Szrj {
183338fd1498Szrj next = edge->next_callee;
183438fd1498Szrj if (edge->inline_failed
183538fd1498Szrj && !edge->aux
183638fd1498Szrj && can_inline_edge_p (edge, true)
183738fd1498Szrj && want_inline_small_function_p (edge, true)
183838fd1498Szrj && can_inline_edge_by_limits_p (edge, true)
183938fd1498Szrj && edge->inline_failed)
184038fd1498Szrj {
184138fd1498Szrj gcc_assert (!edge->aux);
184238fd1498Szrj update_edge_key (&edge_heap, edge);
184338fd1498Szrj }
184438fd1498Szrj if (edge->speculative)
184538fd1498Szrj has_speculative = true;
184638fd1498Szrj }
184738fd1498Szrj if (has_speculative)
184838fd1498Szrj for (edge = node->callees; edge; edge = next)
184938fd1498Szrj if (edge->speculative && !speculation_useful_p (edge,
185038fd1498Szrj edge->aux != NULL))
185138fd1498Szrj {
185238fd1498Szrj edge->resolve_speculation ();
185338fd1498Szrj update = true;
185438fd1498Szrj }
185538fd1498Szrj if (update)
185638fd1498Szrj {
185738fd1498Szrj struct cgraph_node *where = node->global.inlined_to
185838fd1498Szrj ? node->global.inlined_to : node;
185938fd1498Szrj ipa_update_overall_fn_summary (where);
186038fd1498Szrj reset_edge_caches (where);
186138fd1498Szrj update_caller_keys (&edge_heap, where,
186238fd1498Szrj updated_nodes, NULL);
186338fd1498Szrj update_callee_keys (&edge_heap, where,
186438fd1498Szrj updated_nodes);
186538fd1498Szrj bitmap_clear (updated_nodes);
186638fd1498Szrj }
186738fd1498Szrj }
186838fd1498Szrj
186938fd1498Szrj gcc_assert (in_lto_p
187038fd1498Szrj || !(max_count > 0)
187138fd1498Szrj || (profile_info && flag_branch_probabilities));
187238fd1498Szrj
187338fd1498Szrj while (!edge_heap.empty ())
187438fd1498Szrj {
187538fd1498Szrj int old_size = overall_size;
187638fd1498Szrj struct cgraph_node *where, *callee;
187738fd1498Szrj sreal badness = edge_heap.min_key ();
187838fd1498Szrj sreal current_badness;
187938fd1498Szrj int growth;
188038fd1498Szrj
188138fd1498Szrj edge = edge_heap.extract_min ();
188238fd1498Szrj gcc_assert (edge->aux);
188338fd1498Szrj edge->aux = NULL;
188438fd1498Szrj if (!edge->inline_failed || !edge->callee->analyzed)
188538fd1498Szrj continue;
188638fd1498Szrj
188738fd1498Szrj #if CHECKING_P
188838fd1498Szrj /* Be sure that caches are maintained consistent.
188938fd1498Szrj This check is affected by scaling roundoff errors when compiling for
189038fd1498Szrj IPA this we skip it in that case. */
189138fd1498Szrj if (!edge->callee->count.ipa_p ()
189238fd1498Szrj && (!max_count.initialized_p () || !max_count.nonzero_p ()))
189338fd1498Szrj {
189438fd1498Szrj sreal cached_badness = edge_badness (edge, false);
189538fd1498Szrj
189638fd1498Szrj int old_size_est = estimate_edge_size (edge);
189738fd1498Szrj sreal old_time_est = estimate_edge_time (edge);
189838fd1498Szrj int old_hints_est = estimate_edge_hints (edge);
189938fd1498Szrj
190038fd1498Szrj reset_edge_growth_cache (edge);
190138fd1498Szrj gcc_assert (old_size_est == estimate_edge_size (edge));
190238fd1498Szrj gcc_assert (old_time_est == estimate_edge_time (edge));
190338fd1498Szrj /* FIXME:
190438fd1498Szrj
190538fd1498Szrj gcc_assert (old_hints_est == estimate_edge_hints (edge));
190638fd1498Szrj
190738fd1498Szrj fails with profile feedback because some hints depends on
190838fd1498Szrj maybe_hot_edge_p predicate and because callee gets inlined to other
190938fd1498Szrj calls, the edge may become cold.
191038fd1498Szrj This ought to be fixed by computing relative probabilities
191138fd1498Szrj for given invocation but that will be better done once whole
191238fd1498Szrj code is converted to sreals. Disable for now and revert to "wrong"
191338fd1498Szrj value so enable/disable checking paths agree. */
191438fd1498Szrj edge_growth_cache[edge->uid].hints = old_hints_est + 1;
191538fd1498Szrj
191638fd1498Szrj /* When updating the edge costs, we only decrease badness in the keys.
191738fd1498Szrj Increases of badness are handled lazilly; when we see key with out
191838fd1498Szrj of date value on it, we re-insert it now. */
191938fd1498Szrj current_badness = edge_badness (edge, false);
192038fd1498Szrj gcc_assert (cached_badness == current_badness);
192138fd1498Szrj gcc_assert (current_badness >= badness);
192238fd1498Szrj }
192338fd1498Szrj else
192438fd1498Szrj current_badness = edge_badness (edge, false);
192538fd1498Szrj #else
192638fd1498Szrj current_badness = edge_badness (edge, false);
192738fd1498Szrj #endif
192838fd1498Szrj if (current_badness != badness)
192938fd1498Szrj {
193038fd1498Szrj if (edge_heap.min () && current_badness > edge_heap.min_key ())
193138fd1498Szrj {
193238fd1498Szrj edge->aux = edge_heap.insert (current_badness, edge);
193338fd1498Szrj continue;
193438fd1498Szrj }
193538fd1498Szrj else
193638fd1498Szrj badness = current_badness;
193738fd1498Szrj }
193838fd1498Szrj
193938fd1498Szrj if (!can_inline_edge_p (edge, true)
194038fd1498Szrj || !can_inline_edge_by_limits_p (edge, true))
194138fd1498Szrj {
194238fd1498Szrj resolve_noninline_speculation (&edge_heap, edge);
194338fd1498Szrj continue;
194438fd1498Szrj }
194538fd1498Szrj
194638fd1498Szrj callee = edge->callee->ultimate_alias_target ();
194738fd1498Szrj growth = estimate_edge_growth (edge);
194838fd1498Szrj if (dump_file)
194938fd1498Szrj {
195038fd1498Szrj fprintf (dump_file,
195138fd1498Szrj "\nConsidering %s with %i size\n",
195238fd1498Szrj callee->dump_name (),
195338fd1498Szrj ipa_fn_summaries->get (callee)->size);
195438fd1498Szrj fprintf (dump_file,
195538fd1498Szrj " to be inlined into %s in %s:%i\n"
195638fd1498Szrj " Estimated badness is %f, frequency %.2f.\n",
195738fd1498Szrj edge->caller->dump_name (),
195838fd1498Szrj edge->call_stmt
195938fd1498Szrj && (LOCATION_LOCUS (gimple_location ((const gimple *)
196038fd1498Szrj edge->call_stmt))
196138fd1498Szrj > BUILTINS_LOCATION)
196238fd1498Szrj ? gimple_filename ((const gimple *) edge->call_stmt)
196338fd1498Szrj : "unknown",
196438fd1498Szrj edge->call_stmt
196538fd1498Szrj ? gimple_lineno ((const gimple *) edge->call_stmt)
196638fd1498Szrj : -1,
196738fd1498Szrj badness.to_double (),
196838fd1498Szrj edge->sreal_frequency ().to_double ());
196938fd1498Szrj if (edge->count.ipa ().initialized_p ())
197038fd1498Szrj {
197138fd1498Szrj fprintf (dump_file, " Called ");
197238fd1498Szrj edge->count.ipa ().dump (dump_file);
197338fd1498Szrj fprintf (dump_file, " times\n");
197438fd1498Szrj }
197538fd1498Szrj if (dump_flags & TDF_DETAILS)
197638fd1498Szrj edge_badness (edge, true);
197738fd1498Szrj }
197838fd1498Szrj
197938fd1498Szrj if (overall_size + growth > max_size
198038fd1498Szrj && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
198138fd1498Szrj {
198238fd1498Szrj edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
198338fd1498Szrj report_inline_failed_reason (edge);
198438fd1498Szrj resolve_noninline_speculation (&edge_heap, edge);
198538fd1498Szrj continue;
198638fd1498Szrj }
198738fd1498Szrj
198838fd1498Szrj if (!want_inline_small_function_p (edge, true))
198938fd1498Szrj {
199038fd1498Szrj resolve_noninline_speculation (&edge_heap, edge);
199138fd1498Szrj continue;
199238fd1498Szrj }
199338fd1498Szrj
199438fd1498Szrj /* Heuristics for inlining small functions work poorly for
199538fd1498Szrj recursive calls where we do effects similar to loop unrolling.
199638fd1498Szrj When inlining such edge seems profitable, leave decision on
199738fd1498Szrj specific inliner. */
199838fd1498Szrj if (edge->recursive_p ())
199938fd1498Szrj {
200038fd1498Szrj where = edge->caller;
200138fd1498Szrj if (where->global.inlined_to)
200238fd1498Szrj where = where->global.inlined_to;
200338fd1498Szrj if (!recursive_inlining (edge,
200438fd1498Szrj opt_for_fn (edge->caller->decl,
200538fd1498Szrj flag_indirect_inlining)
200638fd1498Szrj ? &new_indirect_edges : NULL))
200738fd1498Szrj {
200838fd1498Szrj edge->inline_failed = CIF_RECURSIVE_INLINING;
200938fd1498Szrj resolve_noninline_speculation (&edge_heap, edge);
201038fd1498Szrj continue;
201138fd1498Szrj }
201238fd1498Szrj reset_edge_caches (where);
201338fd1498Szrj /* Recursive inliner inlines all recursive calls of the function
201438fd1498Szrj at once. Consequently we need to update all callee keys. */
201538fd1498Szrj if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
201638fd1498Szrj add_new_edges_to_heap (&edge_heap, new_indirect_edges);
201738fd1498Szrj update_callee_keys (&edge_heap, where, updated_nodes);
201838fd1498Szrj bitmap_clear (updated_nodes);
201938fd1498Szrj }
202038fd1498Szrj else
202138fd1498Szrj {
202238fd1498Szrj struct cgraph_node *outer_node = NULL;
202338fd1498Szrj int depth = 0;
202438fd1498Szrj
202538fd1498Szrj /* Consider the case where self recursive function A is inlined
202638fd1498Szrj into B. This is desired optimization in some cases, since it
202738fd1498Szrj leads to effect similar of loop peeling and we might completely
202838fd1498Szrj optimize out the recursive call. However we must be extra
202938fd1498Szrj selective. */
203038fd1498Szrj
203138fd1498Szrj where = edge->caller;
203238fd1498Szrj while (where->global.inlined_to)
203338fd1498Szrj {
203438fd1498Szrj if (where->decl == callee->decl)
203538fd1498Szrj outer_node = where, depth++;
203638fd1498Szrj where = where->callers->caller;
203738fd1498Szrj }
203838fd1498Szrj if (outer_node
203938fd1498Szrj && !want_inline_self_recursive_call_p (edge, outer_node,
204038fd1498Szrj true, depth))
204138fd1498Szrj {
204238fd1498Szrj edge->inline_failed
204338fd1498Szrj = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
204438fd1498Szrj ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
204538fd1498Szrj resolve_noninline_speculation (&edge_heap, edge);
204638fd1498Szrj continue;
204738fd1498Szrj }
204838fd1498Szrj else if (depth && dump_file)
204938fd1498Szrj fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
205038fd1498Szrj
205138fd1498Szrj gcc_checking_assert (!callee->global.inlined_to);
205238fd1498Szrj inline_call (edge, true, &new_indirect_edges, &overall_size, true);
205338fd1498Szrj add_new_edges_to_heap (&edge_heap, new_indirect_edges);
205438fd1498Szrj
205538fd1498Szrj reset_edge_caches (edge->callee);
205638fd1498Szrj
205738fd1498Szrj update_callee_keys (&edge_heap, where, updated_nodes);
205838fd1498Szrj }
205938fd1498Szrj where = edge->caller;
206038fd1498Szrj if (where->global.inlined_to)
206138fd1498Szrj where = where->global.inlined_to;
206238fd1498Szrj
206338fd1498Szrj /* Our profitability metric can depend on local properties
206438fd1498Szrj such as number of inlinable calls and size of the function body.
206538fd1498Szrj After inlining these properties might change for the function we
206638fd1498Szrj inlined into (since it's body size changed) and for the functions
206738fd1498Szrj called by function we inlined (since number of it inlinable callers
206838fd1498Szrj might change). */
206938fd1498Szrj update_caller_keys (&edge_heap, where, updated_nodes, NULL);
207038fd1498Szrj /* Offline copy count has possibly changed, recompute if profile is
207138fd1498Szrj available. */
207238fd1498Szrj struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
207338fd1498Szrj if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
207438fd1498Szrj update_callee_keys (&edge_heap, n, updated_nodes);
207538fd1498Szrj bitmap_clear (updated_nodes);
207638fd1498Szrj
207738fd1498Szrj if (dump_file)
207838fd1498Szrj {
207938fd1498Szrj fprintf (dump_file,
208038fd1498Szrj " Inlined %s into %s which now has time %f and size %i, "
208138fd1498Szrj "net change of %+i.\n",
208238fd1498Szrj xstrdup_for_dump (edge->callee->name ()),
208338fd1498Szrj xstrdup_for_dump (edge->caller->name ()),
208438fd1498Szrj ipa_fn_summaries->get (edge->caller)->time.to_double (),
208538fd1498Szrj ipa_fn_summaries->get (edge->caller)->size,
208638fd1498Szrj overall_size - old_size);
208738fd1498Szrj }
208838fd1498Szrj if (min_size > overall_size)
208938fd1498Szrj {
209038fd1498Szrj min_size = overall_size;
209138fd1498Szrj max_size = compute_max_insns (min_size);
209238fd1498Szrj
209338fd1498Szrj if (dump_file)
209438fd1498Szrj fprintf (dump_file, "New minimal size reached: %i\n", min_size);
209538fd1498Szrj }
209638fd1498Szrj }
209738fd1498Szrj
209838fd1498Szrj free_growth_caches ();
209938fd1498Szrj if (dump_file)
210038fd1498Szrj fprintf (dump_file,
210138fd1498Szrj "Unit growth for small function inlining: %i->%i (%i%%)\n",
210238fd1498Szrj initial_size, overall_size,
210338fd1498Szrj initial_size ? overall_size * 100 / (initial_size) - 100: 0);
210438fd1498Szrj symtab->remove_edge_removal_hook (edge_removal_hook_holder);
210538fd1498Szrj }
210638fd1498Szrj
210738fd1498Szrj /* Flatten NODE. Performed both during early inlining and
210838fd1498Szrj at IPA inlining time. */
210938fd1498Szrj
211038fd1498Szrj static void
flatten_function(struct cgraph_node * node,bool early)211138fd1498Szrj flatten_function (struct cgraph_node *node, bool early)
211238fd1498Szrj {
211338fd1498Szrj struct cgraph_edge *e;
211438fd1498Szrj
211538fd1498Szrj /* We shouldn't be called recursively when we are being processed. */
211638fd1498Szrj gcc_assert (node->aux == NULL);
211738fd1498Szrj
211838fd1498Szrj node->aux = (void *) node;
211938fd1498Szrj
212038fd1498Szrj for (e = node->callees; e; e = e->next_callee)
212138fd1498Szrj {
212238fd1498Szrj struct cgraph_node *orig_callee;
212338fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
212438fd1498Szrj
212538fd1498Szrj /* We've hit cycle? It is time to give up. */
212638fd1498Szrj if (callee->aux)
212738fd1498Szrj {
212838fd1498Szrj if (dump_file)
212938fd1498Szrj fprintf (dump_file,
213038fd1498Szrj "Not inlining %s into %s to avoid cycle.\n",
213138fd1498Szrj xstrdup_for_dump (callee->name ()),
213238fd1498Szrj xstrdup_for_dump (e->caller->name ()));
213338fd1498Szrj if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
213438fd1498Szrj e->inline_failed = CIF_RECURSIVE_INLINING;
213538fd1498Szrj continue;
213638fd1498Szrj }
213738fd1498Szrj
213838fd1498Szrj /* When the edge is already inlined, we just need to recurse into
213938fd1498Szrj it in order to fully flatten the leaves. */
214038fd1498Szrj if (!e->inline_failed)
214138fd1498Szrj {
214238fd1498Szrj flatten_function (callee, early);
214338fd1498Szrj continue;
214438fd1498Szrj }
214538fd1498Szrj
214638fd1498Szrj /* Flatten attribute needs to be processed during late inlining. For
214738fd1498Szrj extra code quality we however do flattening during early optimization,
214838fd1498Szrj too. */
214938fd1498Szrj if (!early
215038fd1498Szrj ? !can_inline_edge_p (e, true)
215138fd1498Szrj && !can_inline_edge_by_limits_p (e, true)
215238fd1498Szrj : !can_early_inline_edge_p (e))
215338fd1498Szrj continue;
215438fd1498Szrj
215538fd1498Szrj if (e->recursive_p ())
215638fd1498Szrj {
215738fd1498Szrj if (dump_file)
215838fd1498Szrj fprintf (dump_file, "Not inlining: recursive call.\n");
215938fd1498Szrj continue;
216038fd1498Szrj }
216138fd1498Szrj
216238fd1498Szrj if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
216338fd1498Szrj != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
216438fd1498Szrj {
216538fd1498Szrj if (dump_file)
216638fd1498Szrj fprintf (dump_file, "Not inlining: SSA form does not match.\n");
216738fd1498Szrj continue;
216838fd1498Szrj }
216938fd1498Szrj
217038fd1498Szrj /* Inline the edge and flatten the inline clone. Avoid
217138fd1498Szrj recursing through the original node if the node was cloned. */
217238fd1498Szrj if (dump_file)
217338fd1498Szrj fprintf (dump_file, " Inlining %s into %s.\n",
217438fd1498Szrj xstrdup_for_dump (callee->name ()),
217538fd1498Szrj xstrdup_for_dump (e->caller->name ()));
217638fd1498Szrj orig_callee = callee;
217738fd1498Szrj inline_call (e, true, NULL, NULL, false);
217838fd1498Szrj if (e->callee != orig_callee)
217938fd1498Szrj orig_callee->aux = (void *) node;
218038fd1498Szrj flatten_function (e->callee, early);
218138fd1498Szrj if (e->callee != orig_callee)
218238fd1498Szrj orig_callee->aux = NULL;
218338fd1498Szrj }
218438fd1498Szrj
218538fd1498Szrj node->aux = NULL;
218638fd1498Szrj if (!node->global.inlined_to)
218738fd1498Szrj ipa_update_overall_fn_summary (node);
218838fd1498Szrj }
218938fd1498Szrj
219038fd1498Szrj /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
219138fd1498Szrj DATA points to number of calls originally found so we avoid infinite
219238fd1498Szrj recursion. */
219338fd1498Szrj
219438fd1498Szrj static bool
inline_to_all_callers_1(struct cgraph_node * node,void * data,hash_set<cgraph_node * > * callers)219538fd1498Szrj inline_to_all_callers_1 (struct cgraph_node *node, void *data,
219638fd1498Szrj hash_set<cgraph_node *> *callers)
219738fd1498Szrj {
219838fd1498Szrj int *num_calls = (int *)data;
219938fd1498Szrj bool callee_removed = false;
220038fd1498Szrj
220138fd1498Szrj while (node->callers && !node->global.inlined_to)
220238fd1498Szrj {
220338fd1498Szrj struct cgraph_node *caller = node->callers->caller;
220438fd1498Szrj
220538fd1498Szrj if (!can_inline_edge_p (node->callers, true)
220638fd1498Szrj || !can_inline_edge_by_limits_p (node->callers, true)
220738fd1498Szrj || node->callers->recursive_p ())
220838fd1498Szrj {
220938fd1498Szrj if (dump_file)
221038fd1498Szrj fprintf (dump_file, "Uninlinable call found; giving up.\n");
221138fd1498Szrj *num_calls = 0;
221238fd1498Szrj return false;
221338fd1498Szrj }
221438fd1498Szrj
221538fd1498Szrj if (dump_file)
221638fd1498Szrj {
221738fd1498Szrj fprintf (dump_file,
221838fd1498Szrj "\nInlining %s size %i.\n",
221938fd1498Szrj node->name (),
222038fd1498Szrj ipa_fn_summaries->get (node)->size);
222138fd1498Szrj fprintf (dump_file,
222238fd1498Szrj " Called once from %s %i insns.\n",
222338fd1498Szrj node->callers->caller->name (),
222438fd1498Szrj ipa_fn_summaries->get (node->callers->caller)->size);
222538fd1498Szrj }
222638fd1498Szrj
222738fd1498Szrj /* Remember which callers we inlined to, delaying updating the
222838fd1498Szrj overall summary. */
222938fd1498Szrj callers->add (node->callers->caller);
223038fd1498Szrj inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
223138fd1498Szrj if (dump_file)
223238fd1498Szrj fprintf (dump_file,
223338fd1498Szrj " Inlined into %s which now has %i size\n",
223438fd1498Szrj caller->name (),
223538fd1498Szrj ipa_fn_summaries->get (caller)->size);
223638fd1498Szrj if (!(*num_calls)--)
223738fd1498Szrj {
223838fd1498Szrj if (dump_file)
223938fd1498Szrj fprintf (dump_file, "New calls found; giving up.\n");
224038fd1498Szrj return callee_removed;
224138fd1498Szrj }
224238fd1498Szrj if (callee_removed)
224338fd1498Szrj return true;
224438fd1498Szrj }
224538fd1498Szrj return false;
224638fd1498Szrj }
224738fd1498Szrj
224838fd1498Szrj /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
224938fd1498Szrj update. */
225038fd1498Szrj
225138fd1498Szrj static bool
inline_to_all_callers(struct cgraph_node * node,void * data)225238fd1498Szrj inline_to_all_callers (struct cgraph_node *node, void *data)
225338fd1498Szrj {
225438fd1498Szrj hash_set<cgraph_node *> callers;
225538fd1498Szrj bool res = inline_to_all_callers_1 (node, data, &callers);
225638fd1498Szrj /* Perform the delayed update of the overall summary of all callers
225738fd1498Szrj processed. This avoids quadratic behavior in the cases where
225838fd1498Szrj we have a lot of calls to the same function. */
225938fd1498Szrj for (hash_set<cgraph_node *>::iterator i = callers.begin ();
226038fd1498Szrj i != callers.end (); ++i)
226138fd1498Szrj ipa_update_overall_fn_summary (*i);
226238fd1498Szrj return res;
226338fd1498Szrj }
226438fd1498Szrj
226538fd1498Szrj /* Output overall time estimate. */
226638fd1498Szrj static void
dump_overall_stats(void)226738fd1498Szrj dump_overall_stats (void)
226838fd1498Szrj {
226938fd1498Szrj sreal sum_weighted = 0, sum = 0;
227038fd1498Szrj struct cgraph_node *node;
227138fd1498Szrj
227238fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
227338fd1498Szrj if (!node->global.inlined_to
227438fd1498Szrj && !node->alias)
227538fd1498Szrj {
227638fd1498Szrj sreal time = ipa_fn_summaries->get (node)->time;
227738fd1498Szrj sum += time;
227838fd1498Szrj if (node->count.ipa ().initialized_p ())
227938fd1498Szrj sum_weighted += time * node->count.ipa ().to_gcov_type ();
228038fd1498Szrj }
228138fd1498Szrj fprintf (dump_file, "Overall time estimate: "
228238fd1498Szrj "%f weighted by profile: "
228338fd1498Szrj "%f\n", sum.to_double (), sum_weighted.to_double ());
228438fd1498Szrj }
228538fd1498Szrj
228638fd1498Szrj /* Output some useful stats about inlining. */
228738fd1498Szrj
228838fd1498Szrj static void
dump_inline_stats(void)228938fd1498Szrj dump_inline_stats (void)
229038fd1498Szrj {
229138fd1498Szrj int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
229238fd1498Szrj int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
229338fd1498Szrj int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
229438fd1498Szrj int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
229538fd1498Szrj int64_t inlined_speculative = 0, inlined_speculative_ply = 0;
229638fd1498Szrj int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
229738fd1498Szrj int64_t reason[CIF_N_REASONS][2];
229838fd1498Szrj sreal reason_freq[CIF_N_REASONS];
229938fd1498Szrj int i;
230038fd1498Szrj struct cgraph_node *node;
230138fd1498Szrj
230238fd1498Szrj memset (reason, 0, sizeof (reason));
230338fd1498Szrj for (i=0; i < CIF_N_REASONS; i++)
230438fd1498Szrj reason_freq[i] = 0;
230538fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
230638fd1498Szrj {
230738fd1498Szrj struct cgraph_edge *e;
230838fd1498Szrj for (e = node->callees; e; e = e->next_callee)
230938fd1498Szrj {
231038fd1498Szrj if (e->inline_failed)
231138fd1498Szrj {
231238fd1498Szrj if (e->count.ipa ().initialized_p ())
231338fd1498Szrj reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
231438fd1498Szrj reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
231538fd1498Szrj reason[(int) e->inline_failed][1] ++;
231638fd1498Szrj if (DECL_VIRTUAL_P (e->callee->decl)
231738fd1498Szrj && e->count.ipa ().initialized_p ())
231838fd1498Szrj {
231938fd1498Szrj if (e->indirect_inlining_edge)
232038fd1498Szrj noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
232138fd1498Szrj else
232238fd1498Szrj noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
232338fd1498Szrj }
232438fd1498Szrj else if (e->count.ipa ().initialized_p ())
232538fd1498Szrj {
232638fd1498Szrj if (e->indirect_inlining_edge)
232738fd1498Szrj noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
232838fd1498Szrj else
232938fd1498Szrj noninlined_cnt += e->count.ipa ().to_gcov_type ();
233038fd1498Szrj }
233138fd1498Szrj }
233238fd1498Szrj else if (e->count.ipa ().initialized_p ())
233338fd1498Szrj {
233438fd1498Szrj if (e->speculative)
233538fd1498Szrj {
233638fd1498Szrj if (DECL_VIRTUAL_P (e->callee->decl))
233738fd1498Szrj inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
233838fd1498Szrj else
233938fd1498Szrj inlined_speculative += e->count.ipa ().to_gcov_type ();
234038fd1498Szrj }
234138fd1498Szrj else if (DECL_VIRTUAL_P (e->callee->decl))
234238fd1498Szrj {
234338fd1498Szrj if (e->indirect_inlining_edge)
234438fd1498Szrj inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
234538fd1498Szrj else
234638fd1498Szrj inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
234738fd1498Szrj }
234838fd1498Szrj else
234938fd1498Szrj {
235038fd1498Szrj if (e->indirect_inlining_edge)
235138fd1498Szrj inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
235238fd1498Szrj else
235338fd1498Szrj inlined_cnt += e->count.ipa ().to_gcov_type ();
235438fd1498Szrj }
235538fd1498Szrj }
235638fd1498Szrj }
235738fd1498Szrj for (e = node->indirect_calls; e; e = e->next_callee)
235838fd1498Szrj if (e->indirect_info->polymorphic
235938fd1498Szrj & e->count.ipa ().initialized_p ())
236038fd1498Szrj indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
236138fd1498Szrj else if (e->count.ipa ().initialized_p ())
236238fd1498Szrj indirect_cnt += e->count.ipa ().to_gcov_type ();
236338fd1498Szrj }
236438fd1498Szrj if (max_count.initialized_p ())
236538fd1498Szrj {
236638fd1498Szrj fprintf (dump_file,
236738fd1498Szrj "Inlined %" PRId64 " + speculative "
236838fd1498Szrj "%" PRId64 " + speculative polymorphic "
236938fd1498Szrj "%" PRId64 " + previously indirect "
237038fd1498Szrj "%" PRId64 " + virtual "
237138fd1498Szrj "%" PRId64 " + virtual and previously indirect "
237238fd1498Szrj "%" PRId64 "\n" "Not inlined "
237338fd1498Szrj "%" PRId64 " + previously indirect "
237438fd1498Szrj "%" PRId64 " + virtual "
237538fd1498Szrj "%" PRId64 " + virtual and previously indirect "
237638fd1498Szrj "%" PRId64 " + stil indirect "
237738fd1498Szrj "%" PRId64 " + still indirect polymorphic "
237838fd1498Szrj "%" PRId64 "\n", inlined_cnt,
237938fd1498Szrj inlined_speculative, inlined_speculative_ply,
238038fd1498Szrj inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
238138fd1498Szrj noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
238238fd1498Szrj noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
238338fd1498Szrj fprintf (dump_file, "Removed speculations ");
238438fd1498Szrj spec_rem.dump (dump_file);
238538fd1498Szrj fprintf (dump_file, "\n");
238638fd1498Szrj }
238738fd1498Szrj dump_overall_stats ();
238838fd1498Szrj fprintf (dump_file, "\nWhy inlining failed?\n");
238938fd1498Szrj for (i = 0; i < CIF_N_REASONS; i++)
239038fd1498Szrj if (reason[i][1])
239138fd1498Szrj fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
239238fd1498Szrj cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
239338fd1498Szrj (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
239438fd1498Szrj }
239538fd1498Szrj
239638fd1498Szrj /* Called when node is removed. */
239738fd1498Szrj
239838fd1498Szrj static void
flatten_remove_node_hook(struct cgraph_node * node,void * data)239938fd1498Szrj flatten_remove_node_hook (struct cgraph_node *node, void *data)
240038fd1498Szrj {
240138fd1498Szrj if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
240238fd1498Szrj return;
240338fd1498Szrj
240438fd1498Szrj hash_set<struct cgraph_node *> *removed
240538fd1498Szrj = (hash_set<struct cgraph_node *> *) data;
240638fd1498Szrj removed->add (node);
240738fd1498Szrj }
240838fd1498Szrj
240938fd1498Szrj /* Decide on the inlining. We do so in the topological order to avoid
241038fd1498Szrj expenses on updating data structures. */
241138fd1498Szrj
241238fd1498Szrj static unsigned int
ipa_inline(void)241338fd1498Szrj ipa_inline (void)
241438fd1498Szrj {
241538fd1498Szrj struct cgraph_node *node;
241638fd1498Szrj int nnodes;
241738fd1498Szrj struct cgraph_node **order;
241838fd1498Szrj int i, j;
241938fd1498Szrj int cold;
242038fd1498Szrj bool remove_functions = false;
242138fd1498Szrj
242238fd1498Szrj order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
242338fd1498Szrj
242438fd1498Szrj if (dump_file)
242538fd1498Szrj ipa_dump_fn_summaries (dump_file);
242638fd1498Szrj
242738fd1498Szrj nnodes = ipa_reverse_postorder (order);
242838fd1498Szrj spec_rem = profile_count::zero ();
242938fd1498Szrj
243038fd1498Szrj FOR_EACH_FUNCTION (node)
243138fd1498Szrj {
243238fd1498Szrj node->aux = 0;
243338fd1498Szrj
243438fd1498Szrj /* Recompute the default reasons for inlining because they may have
243538fd1498Szrj changed during merging. */
243638fd1498Szrj if (in_lto_p)
243738fd1498Szrj {
243838fd1498Szrj for (cgraph_edge *e = node->callees; e; e = e->next_callee)
243938fd1498Szrj {
244038fd1498Szrj gcc_assert (e->inline_failed);
244138fd1498Szrj initialize_inline_failed (e);
244238fd1498Szrj }
244338fd1498Szrj for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
244438fd1498Szrj initialize_inline_failed (e);
244538fd1498Szrj }
244638fd1498Szrj }
244738fd1498Szrj
244838fd1498Szrj if (dump_file)
244938fd1498Szrj fprintf (dump_file, "\nFlattening functions:\n");
245038fd1498Szrj
245138fd1498Szrj /* First shrink order array, so that it only contains nodes with
245238fd1498Szrj flatten attribute. */
245338fd1498Szrj for (i = nnodes - 1, j = i; i >= 0; i--)
245438fd1498Szrj {
245538fd1498Szrj node = order[i];
245638fd1498Szrj if (lookup_attribute ("flatten",
245738fd1498Szrj DECL_ATTRIBUTES (node->decl)) != NULL)
245838fd1498Szrj order[j--] = order[i];
245938fd1498Szrj }
246038fd1498Szrj
246138fd1498Szrj /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
246238fd1498Szrj nodes with flatten attribute. If there is more than one such
246338fd1498Szrj node, we need to register a node removal hook, as flatten_function
246438fd1498Szrj could remove other nodes with flatten attribute. See PR82801. */
246538fd1498Szrj struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
246638fd1498Szrj hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
246738fd1498Szrj if (j < nnodes - 2)
246838fd1498Szrj {
246938fd1498Szrj flatten_removed_nodes = new hash_set<struct cgraph_node *>;
247038fd1498Szrj node_removal_hook_holder
247138fd1498Szrj = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
247238fd1498Szrj flatten_removed_nodes);
247338fd1498Szrj }
247438fd1498Szrj
247538fd1498Szrj /* In the first pass handle functions to be flattened. Do this with
247638fd1498Szrj a priority so none of our later choices will make this impossible. */
247738fd1498Szrj for (i = nnodes - 1; i > j; i--)
247838fd1498Szrj {
247938fd1498Szrj node = order[i];
248038fd1498Szrj if (flatten_removed_nodes
248138fd1498Szrj && flatten_removed_nodes->contains (node))
248238fd1498Szrj continue;
248338fd1498Szrj
248438fd1498Szrj /* Handle nodes to be flattened.
248538fd1498Szrj Ideally when processing callees we stop inlining at the
248638fd1498Szrj entry of cycles, possibly cloning that entry point and
248738fd1498Szrj try to flatten itself turning it into a self-recursive
248838fd1498Szrj function. */
248938fd1498Szrj if (dump_file)
249038fd1498Szrj fprintf (dump_file, "Flattening %s\n", node->name ());
249138fd1498Szrj flatten_function (node, false);
249238fd1498Szrj }
249338fd1498Szrj
249438fd1498Szrj if (j < nnodes - 2)
249538fd1498Szrj {
249638fd1498Szrj symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
249738fd1498Szrj delete flatten_removed_nodes;
249838fd1498Szrj }
249938fd1498Szrj free (order);
250038fd1498Szrj
250138fd1498Szrj if (dump_file)
250238fd1498Szrj dump_overall_stats ();
250338fd1498Szrj
250438fd1498Szrj inline_small_functions ();
250538fd1498Szrj
250638fd1498Szrj gcc_assert (symtab->state == IPA_SSA);
250738fd1498Szrj symtab->state = IPA_SSA_AFTER_INLINING;
250838fd1498Szrj /* Do first after-inlining removal. We want to remove all "stale" extern
250938fd1498Szrj inline functions and virtual functions so we really know what is called
251038fd1498Szrj once. */
251138fd1498Szrj symtab->remove_unreachable_nodes (dump_file);
251238fd1498Szrj
251338fd1498Szrj /* Inline functions with a property that after inlining into all callers the
251438fd1498Szrj code size will shrink because the out-of-line copy is eliminated.
251538fd1498Szrj We do this regardless on the callee size as long as function growth limits
251638fd1498Szrj are met. */
251738fd1498Szrj if (dump_file)
251838fd1498Szrj fprintf (dump_file,
251938fd1498Szrj "\nDeciding on functions to be inlined into all callers and "
252038fd1498Szrj "removing useless speculations:\n");
252138fd1498Szrj
252238fd1498Szrj /* Inlining one function called once has good chance of preventing
252338fd1498Szrj inlining other function into the same callee. Ideally we should
252438fd1498Szrj work in priority order, but probably inlining hot functions first
252538fd1498Szrj is good cut without the extra pain of maintaining the queue.
252638fd1498Szrj
252738fd1498Szrj ??? this is not really fitting the bill perfectly: inlining function
252838fd1498Szrj into callee often leads to better optimization of callee due to
252938fd1498Szrj increased context for optimization.
253038fd1498Szrj For example if main() function calls a function that outputs help
253138fd1498Szrj and then function that does the main optmization, we should inline
253238fd1498Szrj the second with priority even if both calls are cold by themselves.
253338fd1498Szrj
253438fd1498Szrj We probably want to implement new predicate replacing our use of
253538fd1498Szrj maybe_hot_edge interpreted as maybe_hot_edge || callee is known
253638fd1498Szrj to be hot. */
253738fd1498Szrj for (cold = 0; cold <= 1; cold ++)
253838fd1498Szrj {
253938fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
254038fd1498Szrj {
254138fd1498Szrj struct cgraph_edge *edge, *next;
254238fd1498Szrj bool update=false;
254338fd1498Szrj
254438fd1498Szrj if (!opt_for_fn (node->decl, optimize)
254538fd1498Szrj || !opt_for_fn (node->decl, flag_inline_functions_called_once))
254638fd1498Szrj continue;
254738fd1498Szrj
254838fd1498Szrj for (edge = node->callees; edge; edge = next)
254938fd1498Szrj {
255038fd1498Szrj next = edge->next_callee;
255138fd1498Szrj if (edge->speculative && !speculation_useful_p (edge, false))
255238fd1498Szrj {
255338fd1498Szrj if (edge->count.ipa ().initialized_p ())
255438fd1498Szrj spec_rem += edge->count.ipa ();
255538fd1498Szrj edge->resolve_speculation ();
255638fd1498Szrj update = true;
255738fd1498Szrj remove_functions = true;
255838fd1498Szrj }
255938fd1498Szrj }
256038fd1498Szrj if (update)
256138fd1498Szrj {
256238fd1498Szrj struct cgraph_node *where = node->global.inlined_to
256338fd1498Szrj ? node->global.inlined_to : node;
256438fd1498Szrj reset_edge_caches (where);
256538fd1498Szrj ipa_update_overall_fn_summary (where);
256638fd1498Szrj }
256738fd1498Szrj if (want_inline_function_to_all_callers_p (node, cold))
256838fd1498Szrj {
256938fd1498Szrj int num_calls = 0;
257038fd1498Szrj node->call_for_symbol_and_aliases (sum_callers, &num_calls,
257138fd1498Szrj true);
257238fd1498Szrj while (node->call_for_symbol_and_aliases
257338fd1498Szrj (inline_to_all_callers, &num_calls, true))
257438fd1498Szrj ;
257538fd1498Szrj remove_functions = true;
257638fd1498Szrj }
257738fd1498Szrj }
257838fd1498Szrj }
257938fd1498Szrj
258038fd1498Szrj /* Free ipa-prop structures if they are no longer needed. */
258138fd1498Szrj ipa_free_all_structures_after_iinln ();
258238fd1498Szrj
258338fd1498Szrj if (dump_file)
258438fd1498Szrj {
258538fd1498Szrj fprintf (dump_file,
258638fd1498Szrj "\nInlined %i calls, eliminated %i functions\n\n",
258738fd1498Szrj ncalls_inlined, nfunctions_inlined);
258838fd1498Szrj dump_inline_stats ();
258938fd1498Szrj }
259038fd1498Szrj
259138fd1498Szrj if (dump_file)
259238fd1498Szrj ipa_dump_fn_summaries (dump_file);
259338fd1498Szrj return remove_functions ? TODO_remove_functions : 0;
259438fd1498Szrj }
259538fd1498Szrj
259638fd1498Szrj /* Inline always-inline function calls in NODE. */
259738fd1498Szrj
259838fd1498Szrj static bool
inline_always_inline_functions(struct cgraph_node * node)259938fd1498Szrj inline_always_inline_functions (struct cgraph_node *node)
260038fd1498Szrj {
260138fd1498Szrj struct cgraph_edge *e;
260238fd1498Szrj bool inlined = false;
260338fd1498Szrj
260438fd1498Szrj for (e = node->callees; e; e = e->next_callee)
260538fd1498Szrj {
260638fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
260738fd1498Szrj if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
260838fd1498Szrj continue;
260938fd1498Szrj
261038fd1498Szrj if (e->recursive_p ())
261138fd1498Szrj {
261238fd1498Szrj if (dump_file)
261338fd1498Szrj fprintf (dump_file, " Not inlining recursive call to %s.\n",
261438fd1498Szrj e->callee->name ());
261538fd1498Szrj e->inline_failed = CIF_RECURSIVE_INLINING;
261638fd1498Szrj continue;
261738fd1498Szrj }
261838fd1498Szrj
261938fd1498Szrj if (!can_early_inline_edge_p (e))
262038fd1498Szrj {
262138fd1498Szrj /* Set inlined to true if the callee is marked "always_inline" but
262238fd1498Szrj is not inlinable. This will allow flagging an error later in
262338fd1498Szrj expand_call_inline in tree-inline.c. */
262438fd1498Szrj if (lookup_attribute ("always_inline",
262538fd1498Szrj DECL_ATTRIBUTES (callee->decl)) != NULL)
262638fd1498Szrj inlined = true;
262738fd1498Szrj continue;
262838fd1498Szrj }
262938fd1498Szrj
263038fd1498Szrj if (dump_file)
263138fd1498Szrj fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
263238fd1498Szrj xstrdup_for_dump (e->callee->name ()),
263338fd1498Szrj xstrdup_for_dump (e->caller->name ()));
263438fd1498Szrj inline_call (e, true, NULL, NULL, false);
263538fd1498Szrj inlined = true;
263638fd1498Szrj }
263738fd1498Szrj if (inlined)
263838fd1498Szrj ipa_update_overall_fn_summary (node);
263938fd1498Szrj
264038fd1498Szrj return inlined;
264138fd1498Szrj }
264238fd1498Szrj
264338fd1498Szrj /* Decide on the inlining. We do so in the topological order to avoid
264438fd1498Szrj expenses on updating data structures. */
264538fd1498Szrj
264638fd1498Szrj static bool
early_inline_small_functions(struct cgraph_node * node)264738fd1498Szrj early_inline_small_functions (struct cgraph_node *node)
264838fd1498Szrj {
264938fd1498Szrj struct cgraph_edge *e;
265038fd1498Szrj bool inlined = false;
265138fd1498Szrj
265238fd1498Szrj for (e = node->callees; e; e = e->next_callee)
265338fd1498Szrj {
265438fd1498Szrj struct cgraph_node *callee = e->callee->ultimate_alias_target ();
265538fd1498Szrj if (!ipa_fn_summaries->get (callee)->inlinable
265638fd1498Szrj || !e->inline_failed)
265738fd1498Szrj continue;
265838fd1498Szrj
265938fd1498Szrj /* Do not consider functions not declared inline. */
266038fd1498Szrj if (!DECL_DECLARED_INLINE_P (callee->decl)
266138fd1498Szrj && !opt_for_fn (node->decl, flag_inline_small_functions)
266238fd1498Szrj && !opt_for_fn (node->decl, flag_inline_functions))
266338fd1498Szrj continue;
266438fd1498Szrj
266538fd1498Szrj if (dump_file)
266638fd1498Szrj fprintf (dump_file, "Considering inline candidate %s.\n",
266738fd1498Szrj callee->name ());
266838fd1498Szrj
266938fd1498Szrj if (!can_early_inline_edge_p (e))
267038fd1498Szrj continue;
267138fd1498Szrj
267238fd1498Szrj if (e->recursive_p ())
267338fd1498Szrj {
267438fd1498Szrj if (dump_file)
267538fd1498Szrj fprintf (dump_file, " Not inlining: recursive call.\n");
267638fd1498Szrj continue;
267738fd1498Szrj }
267838fd1498Szrj
267938fd1498Szrj if (!want_early_inline_function_p (e))
268038fd1498Szrj continue;
268138fd1498Szrj
268238fd1498Szrj if (dump_file)
268338fd1498Szrj fprintf (dump_file, " Inlining %s into %s.\n",
268438fd1498Szrj xstrdup_for_dump (callee->name ()),
268538fd1498Szrj xstrdup_for_dump (e->caller->name ()));
268638fd1498Szrj inline_call (e, true, NULL, NULL, false);
268738fd1498Szrj inlined = true;
268838fd1498Szrj }
268938fd1498Szrj
269038fd1498Szrj if (inlined)
269138fd1498Szrj ipa_update_overall_fn_summary (node);
269238fd1498Szrj
269338fd1498Szrj return inlined;
269438fd1498Szrj }
269538fd1498Szrj
269638fd1498Szrj unsigned int
early_inliner(function * fun)269738fd1498Szrj early_inliner (function *fun)
269838fd1498Szrj {
269938fd1498Szrj struct cgraph_node *node = cgraph_node::get (current_function_decl);
270038fd1498Szrj struct cgraph_edge *edge;
270138fd1498Szrj unsigned int todo = 0;
270238fd1498Szrj int iterations = 0;
270338fd1498Szrj bool inlined = false;
270438fd1498Szrj
270538fd1498Szrj if (seen_error ())
270638fd1498Szrj return 0;
270738fd1498Szrj
270838fd1498Szrj /* Do nothing if datastructures for ipa-inliner are already computed. This
270938fd1498Szrj happens when some pass decides to construct new function and
271038fd1498Szrj cgraph_add_new_function calls lowering passes and early optimization on
271138fd1498Szrj it. This may confuse ourself when early inliner decide to inline call to
271238fd1498Szrj function clone, because function clones don't have parameter list in
271338fd1498Szrj ipa-prop matching their signature. */
271438fd1498Szrj if (ipa_node_params_sum)
271538fd1498Szrj return 0;
271638fd1498Szrj
271738fd1498Szrj if (flag_checking)
271838fd1498Szrj node->verify ();
271938fd1498Szrj node->remove_all_references ();
272038fd1498Szrj
272138fd1498Szrj /* Rebuild this reference because it dosn't depend on
272238fd1498Szrj function's body and it's required to pass cgraph_node
272338fd1498Szrj verification. */
272438fd1498Szrj if (node->instrumented_version
272538fd1498Szrj && !node->instrumentation_clone)
272638fd1498Szrj node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
272738fd1498Szrj
272838fd1498Szrj /* Even when not optimizing or not inlining inline always-inline
272938fd1498Szrj functions. */
273038fd1498Szrj inlined = inline_always_inline_functions (node);
273138fd1498Szrj
273238fd1498Szrj if (!optimize
273338fd1498Szrj || flag_no_inline
273438fd1498Szrj || !flag_early_inlining
273538fd1498Szrj /* Never inline regular functions into always-inline functions
273638fd1498Szrj during incremental inlining. This sucks as functions calling
273738fd1498Szrj always inline functions will get less optimized, but at the
273838fd1498Szrj same time inlining of functions calling always inline
273938fd1498Szrj function into an always inline function might introduce
274038fd1498Szrj cycles of edges to be always inlined in the callgraph.
274138fd1498Szrj
274238fd1498Szrj We might want to be smarter and just avoid this type of inlining. */
274338fd1498Szrj || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
274438fd1498Szrj && lookup_attribute ("always_inline",
274538fd1498Szrj DECL_ATTRIBUTES (node->decl))))
274638fd1498Szrj ;
274738fd1498Szrj else if (lookup_attribute ("flatten",
274838fd1498Szrj DECL_ATTRIBUTES (node->decl)) != NULL)
274938fd1498Szrj {
275038fd1498Szrj /* When the function is marked to be flattened, recursively inline
275138fd1498Szrj all calls in it. */
275238fd1498Szrj if (dump_file)
275338fd1498Szrj fprintf (dump_file,
275438fd1498Szrj "Flattening %s\n", node->name ());
275538fd1498Szrj flatten_function (node, true);
275638fd1498Szrj inlined = true;
275738fd1498Szrj }
275838fd1498Szrj else
275938fd1498Szrj {
276038fd1498Szrj /* If some always_inline functions was inlined, apply the changes.
276138fd1498Szrj This way we will not account always inline into growth limits and
276238fd1498Szrj moreover we will inline calls from always inlines that we skipped
276338fd1498Szrj previously because of conditional above. */
276438fd1498Szrj if (inlined)
276538fd1498Szrj {
276638fd1498Szrj timevar_push (TV_INTEGRATION);
276738fd1498Szrj todo |= optimize_inline_calls (current_function_decl);
276838fd1498Szrj /* optimize_inline_calls call above might have introduced new
276938fd1498Szrj statements that don't have inline parameters computed. */
277038fd1498Szrj for (edge = node->callees; edge; edge = edge->next_callee)
277138fd1498Szrj {
277238fd1498Szrj struct ipa_call_summary *es = ipa_call_summaries->get (edge);
277338fd1498Szrj es->call_stmt_size
277438fd1498Szrj = estimate_num_insns (edge->call_stmt, &eni_size_weights);
277538fd1498Szrj es->call_stmt_time
277638fd1498Szrj = estimate_num_insns (edge->call_stmt, &eni_time_weights);
277738fd1498Szrj }
277838fd1498Szrj ipa_update_overall_fn_summary (node);
277938fd1498Szrj inlined = false;
278038fd1498Szrj timevar_pop (TV_INTEGRATION);
278138fd1498Szrj }
278238fd1498Szrj /* We iterate incremental inlining to get trivial cases of indirect
278338fd1498Szrj inlining. */
278438fd1498Szrj while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
278538fd1498Szrj && early_inline_small_functions (node))
278638fd1498Szrj {
278738fd1498Szrj timevar_push (TV_INTEGRATION);
278838fd1498Szrj todo |= optimize_inline_calls (current_function_decl);
278938fd1498Szrj
279038fd1498Szrj /* Technically we ought to recompute inline parameters so the new
279138fd1498Szrj iteration of early inliner works as expected. We however have
279238fd1498Szrj values approximately right and thus we only need to update edge
279338fd1498Szrj info that might be cleared out for newly discovered edges. */
279438fd1498Szrj for (edge = node->callees; edge; edge = edge->next_callee)
279538fd1498Szrj {
279638fd1498Szrj /* We have no summary for new bound store calls yet. */
279738fd1498Szrj struct ipa_call_summary *es = ipa_call_summaries->get (edge);
279838fd1498Szrj es->call_stmt_size
279938fd1498Szrj = estimate_num_insns (edge->call_stmt, &eni_size_weights);
280038fd1498Szrj es->call_stmt_time
280138fd1498Szrj = estimate_num_insns (edge->call_stmt, &eni_time_weights);
280238fd1498Szrj
280338fd1498Szrj if (edge->callee->decl
280438fd1498Szrj && !gimple_check_call_matching_types (
280538fd1498Szrj edge->call_stmt, edge->callee->decl, false))
280638fd1498Szrj {
280738fd1498Szrj edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
280838fd1498Szrj edge->call_stmt_cannot_inline_p = true;
280938fd1498Szrj }
281038fd1498Szrj }
281138fd1498Szrj if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
281238fd1498Szrj ipa_update_overall_fn_summary (node);
281338fd1498Szrj timevar_pop (TV_INTEGRATION);
281438fd1498Szrj iterations++;
281538fd1498Szrj inlined = false;
281638fd1498Szrj }
281738fd1498Szrj if (dump_file)
281838fd1498Szrj fprintf (dump_file, "Iterations: %i\n", iterations);
281938fd1498Szrj }
282038fd1498Szrj
282138fd1498Szrj if (inlined)
282238fd1498Szrj {
282338fd1498Szrj timevar_push (TV_INTEGRATION);
282438fd1498Szrj todo |= optimize_inline_calls (current_function_decl);
282538fd1498Szrj timevar_pop (TV_INTEGRATION);
282638fd1498Szrj }
282738fd1498Szrj
282838fd1498Szrj fun->always_inline_functions_inlined = true;
282938fd1498Szrj
283038fd1498Szrj return todo;
283138fd1498Szrj }
283238fd1498Szrj
283338fd1498Szrj /* Do inlining of small functions. Doing so early helps profiling and other
283438fd1498Szrj passes to be somewhat more effective and avoids some code duplication in
283538fd1498Szrj later real inlining pass for testcases with very many function calls. */
283638fd1498Szrj
283738fd1498Szrj namespace {
283838fd1498Szrj
283938fd1498Szrj const pass_data pass_data_early_inline =
284038fd1498Szrj {
284138fd1498Szrj GIMPLE_PASS, /* type */
284238fd1498Szrj "einline", /* name */
284338fd1498Szrj OPTGROUP_INLINE, /* optinfo_flags */
284438fd1498Szrj TV_EARLY_INLINING, /* tv_id */
284538fd1498Szrj PROP_ssa, /* properties_required */
284638fd1498Szrj 0, /* properties_provided */
284738fd1498Szrj 0, /* properties_destroyed */
284838fd1498Szrj 0, /* todo_flags_start */
284938fd1498Szrj 0, /* todo_flags_finish */
285038fd1498Szrj };
285138fd1498Szrj
285238fd1498Szrj class pass_early_inline : public gimple_opt_pass
285338fd1498Szrj {
285438fd1498Szrj public:
pass_early_inline(gcc::context * ctxt)285538fd1498Szrj pass_early_inline (gcc::context *ctxt)
285638fd1498Szrj : gimple_opt_pass (pass_data_early_inline, ctxt)
285738fd1498Szrj {}
285838fd1498Szrj
285938fd1498Szrj /* opt_pass methods: */
286038fd1498Szrj virtual unsigned int execute (function *);
286138fd1498Szrj
286238fd1498Szrj }; // class pass_early_inline
286338fd1498Szrj
286438fd1498Szrj unsigned int
execute(function * fun)286538fd1498Szrj pass_early_inline::execute (function *fun)
286638fd1498Szrj {
286738fd1498Szrj return early_inliner (fun);
286838fd1498Szrj }
286938fd1498Szrj
287038fd1498Szrj } // anon namespace
287138fd1498Szrj
287238fd1498Szrj gimple_opt_pass *
make_pass_early_inline(gcc::context * ctxt)287338fd1498Szrj make_pass_early_inline (gcc::context *ctxt)
287438fd1498Szrj {
287538fd1498Szrj return new pass_early_inline (ctxt);
287638fd1498Szrj }
287738fd1498Szrj
287838fd1498Szrj namespace {
287938fd1498Szrj
288038fd1498Szrj const pass_data pass_data_ipa_inline =
288138fd1498Szrj {
288238fd1498Szrj IPA_PASS, /* type */
288338fd1498Szrj "inline", /* name */
288438fd1498Szrj OPTGROUP_INLINE, /* optinfo_flags */
288538fd1498Szrj TV_IPA_INLINING, /* tv_id */
288638fd1498Szrj 0, /* properties_required */
288738fd1498Szrj 0, /* properties_provided */
288838fd1498Szrj 0, /* properties_destroyed */
288938fd1498Szrj 0, /* todo_flags_start */
289038fd1498Szrj ( TODO_dump_symtab ), /* todo_flags_finish */
289138fd1498Szrj };
289238fd1498Szrj
289338fd1498Szrj class pass_ipa_inline : public ipa_opt_pass_d
289438fd1498Szrj {
289538fd1498Szrj public:
pass_ipa_inline(gcc::context * ctxt)289638fd1498Szrj pass_ipa_inline (gcc::context *ctxt)
289738fd1498Szrj : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
289838fd1498Szrj NULL, /* generate_summary */
289938fd1498Szrj NULL, /* write_summary */
290038fd1498Szrj NULL, /* read_summary */
290138fd1498Szrj NULL, /* write_optimization_summary */
290238fd1498Szrj NULL, /* read_optimization_summary */
290338fd1498Szrj NULL, /* stmt_fixup */
290438fd1498Szrj 0, /* function_transform_todo_flags_start */
290538fd1498Szrj inline_transform, /* function_transform */
290638fd1498Szrj NULL) /* variable_transform */
290738fd1498Szrj {}
290838fd1498Szrj
290938fd1498Szrj /* opt_pass methods: */
execute(function *)291038fd1498Szrj virtual unsigned int execute (function *) { return ipa_inline (); }
291138fd1498Szrj
291238fd1498Szrj }; // class pass_ipa_inline
291338fd1498Szrj
291438fd1498Szrj } // anon namespace
291538fd1498Szrj
291638fd1498Szrj ipa_opt_pass_d *
make_pass_ipa_inline(gcc::context * ctxt)291738fd1498Szrj make_pass_ipa_inline (gcc::context *ctxt)
291838fd1498Szrj {
291938fd1498Szrj return new pass_ipa_inline (ctxt);
292038fd1498Szrj }
2921