xref: /netbsd-src/external/gpl3/gcc/dist/gcc/ipa-inline.cc (revision 2683f5b185977c9184701f18c843971cd908b00e)
1 /* Inlining decision heuristics.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Jan Hubicka
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /*  Inlining decision heuristics
22 
23     The implementation of inliner is organized as follows:
24 
25     inlining heuristics limits
26 
27       can_inline_edge_p allow to check that particular inlining is allowed
28       by the limits specified by user (allowed function growth, growth and so
29       on).
30 
31       Functions are inlined when it is obvious the result is profitable (such
32       as functions called once or when inlining reduce code size).
33       In addition to that we perform inlining of small functions and recursive
34       inlining.
35 
36     inlining heuristics
37 
38        The inliner itself is split into two passes:
39 
40        pass_early_inlining
41 
42 	 Simple local inlining pass inlining callees into current function.
43 	 This pass makes no use of whole unit analysis and thus it can do only
44 	 very simple decisions based on local properties.
45 
46 	 The strength of the pass is that it is run in topological order
47 	 (reverse postorder) on the callgraph. Functions are converted into SSA
48 	 form just before this pass and optimized subsequently. As a result, the
49 	 callees of the function seen by the early inliner was already optimized
50 	 and results of early inlining adds a lot of optimization opportunities
51 	 for the local optimization.
52 
53 	 The pass handle the obvious inlining decisions within the compilation
54 	 unit - inlining auto inline functions, inlining for size and
55 	 flattening.
56 
57 	 main strength of the pass is the ability to eliminate abstraction
58 	 penalty in C++ code (via combination of inlining and early
59 	 optimization) and thus improve quality of analysis done by real IPA
60 	 optimizers.
61 
62 	 Because of lack of whole unit knowledge, the pass cannot really make
63 	 good code size/performance tradeoffs.  It however does very simple
64 	 speculative inlining allowing code size to grow by
65 	 EARLY_INLINING_INSNS when callee is leaf function.  In this case the
66 	 optimizations performed later are very likely to eliminate the cost.
67 
68        pass_ipa_inline
69 
70 	 This is the real inliner able to handle inlining with whole program
71 	 knowledge. It performs following steps:
72 
73 	 1) inlining of small functions.  This is implemented by greedy
74 	 algorithm ordering all inlinable cgraph edges by their badness and
75 	 inlining them in this order as long as inline limits allows doing so.
76 
77 	 This heuristics is not very good on inlining recursive calls. Recursive
78 	 calls can be inlined with results similar to loop unrolling. To do so,
79 	 special purpose recursive inliner is executed on function when
80 	 recursive edge is met as viable candidate.
81 
82 	 2) Unreachable functions are removed from callgraph.  Inlining leads
83 	 to devirtualization and other modification of callgraph so functions
84 	 may become unreachable during the process. Also functions declared as
85 	 extern inline or virtual functions are removed, since after inlining
86 	 we no longer need the offline bodies.
87 
88 	 3) Functions called once and not exported from the unit are inlined.
89 	 This should almost always lead to reduction of code size by eliminating
90 	 the need for offline copy of the function.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
122 
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
125 
126 /* Statistics we collect about inlining algorithm.  */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
130 
131 /* Return false when inlining edge E would lead to violating
132    limits on function unit growth or stack usage growth.
133 
134    The relative function body growth limit is present generally
135    to avoid problems with non-linear behavior of the compiler.
136    To allow inlining huge functions into tiny wrapper, the limit
137    is always based on the bigger of the two functions considered.
138 
139    For stack growth limits we always base the growth in stack usage
140    of the callers.  We want to prevent applications from segfaulting
141    on stack overflow when functions with huge stack frames gets
142    inlined. */
143 
144 static bool
caller_growth_limits(struct cgraph_edge * e)145 caller_growth_limits (struct cgraph_edge *e)
146 {
147   struct cgraph_node *to = e->caller;
148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
149   int newsize;
150   int limit = 0;
151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
153 
154   /* Look for function e->caller is inlined to.  While doing
155      so work out the largest function body on the way.  As
156      described above, we want to base our function growth
157      limits based on that.  Not on the self size of the
158      outer function, not on the self size of inline code
159      we immediately inline to.  This is the most relaxed
160      interpretation of the rule "do not grow large functions
161      too much in order to prevent compiler from exploding".  */
162   while (true)
163     {
164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
165       if (limit < size_info->self_size)
166 	limit = size_info->self_size;
167       if (stack_size_limit < size_info->estimated_self_stack_size)
168 	stack_size_limit = size_info->estimated_self_stack_size;
169       if (to->inlined_to)
170         to = to->callers->caller;
171       else
172 	break;
173     }
174 
175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
177 
178   if (limit < what_size_info->self_size)
179     limit = what_size_info->self_size;
180 
181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
182 
183   /* Check the size after inlining against the function limits.  But allow
184      the function to shrink if it went over the limits by forced inlining.  */
185   newsize = estimate_size_after_inlining (to, e);
186   if (newsize >= ipa_size_summaries->get (what)->size
187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
188       && newsize > limit)
189     {
190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191       return false;
192     }
193 
194   if (!what_info->estimated_stack_size)
195     return true;
196 
197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
198      due to large i/o datastructures used by the Fortran front-end.
199      We ought to ignore this limit when we know that the edge is executed
200      on every invocation of the caller (i.e. its call statement dominates
201      exit block).  We do not track this information, yet.  */
202   stack_size_limit += ((gcov_type)stack_size_limit
203 		       * opt_for_fn (to->decl, param_stack_frame_growth)
204 		       / 100);
205 
206   inlined_stack = (ipa_get_stack_frame_offset (to)
207 		   + outer_info->estimated_self_stack_size
208 		   + what_info->estimated_stack_size);
209   /* Check new stack consumption with stack consumption at the place
210      stack is used.  */
211   if (inlined_stack > stack_size_limit
212       /* If function already has large stack usage from sibling
213 	 inline call, we can inline, too.
214 	 This bit overoptimistically assume that we are good at stack
215 	 packing.  */
216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
218     {
219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220       return false;
221     }
222   return true;
223 }
224 
225 /* Dump info about why inlining has failed.  */
226 
227 static void
report_inline_failed_reason(struct cgraph_edge * e)228 report_inline_failed_reason (struct cgraph_edge *e)
229 {
230   if (dump_enabled_p ())
231     {
232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 		       "  not inlinable: %C -> %C, %s\n",
234 		       e->caller, e->callee,
235 		       cgraph_inline_failed_string (e->inline_failed));
236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 	   || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 	  && e->caller->lto_file_data
239 	  && e->callee->ultimate_alias_target ()->lto_file_data)
240 	{
241 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 			   "  LTO objects: %s, %s\n",
243 			   e->caller->lto_file_data->file_name,
244 			   e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 	}
246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 	if (dump_file)
248 	  cl_target_option_print_diff
249 	    (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 	     target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 	if (dump_file)
253 	  cl_optimization_print_diff
254 	    (dump_file, 2, opts_for_fn (e->caller->decl),
255 	     opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256     }
257 }
258 
259  /* Decide whether sanitizer-related attributes allow inlining. */
260 
261 static bool
sanitize_attrs_match_for_inline_p(const_tree caller,const_tree callee)262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
263 {
264   if (!caller || !callee)
265     return true;
266 
267   /* Follow clang and allow inlining for always_inline functions.  */
268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269     return true;
270 
271   const sanitize_code codes[] =
272     {
273       SANITIZE_ADDRESS,
274       SANITIZE_THREAD,
275       SANITIZE_UNDEFINED,
276       SANITIZE_UNDEFINED_NONDEFAULT,
277       SANITIZE_POINTER_COMPARE,
278       SANITIZE_POINTER_SUBTRACT
279     };
280 
281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282     if (sanitize_flags_p (codes[i], caller)
283 	!= sanitize_flags_p (codes[i], callee))
284       return false;
285 
286   if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
287     return false;
288 
289   return true;
290 }
291 
292 /* Used for flags where it is safe to inline when caller's value is
293    grater than callee's.  */
294 #define check_maybe_up(flag) \
295       (opts_for_fn (caller->decl)->x_##flag		\
296        != opts_for_fn (callee->decl)->x_##flag		\
297        && (!always_inline 				\
298 	   || opts_for_fn (caller->decl)->x_##flag	\
299 	      < opts_for_fn (callee->decl)->x_##flag))
300 /* Used for flags where it is safe to inline when caller's value is
301    smaller than callee's.  */
302 #define check_maybe_down(flag) \
303       (opts_for_fn (caller->decl)->x_##flag		\
304        != opts_for_fn (callee->decl)->x_##flag		\
305        && (!always_inline 				\
306 	   || opts_for_fn (caller->decl)->x_##flag	\
307 	      > opts_for_fn (callee->decl)->x_##flag))
308 /* Used for flags where exact match is needed for correctness.  */
309 #define check_match(flag) \
310       (opts_for_fn (caller->decl)->x_##flag		\
311        != opts_for_fn (callee->decl)->x_##flag)
312 
313 /* Decide if we can inline the edge and possibly update
314    inline_failed reason.
315    We check whether inlining is possible at all and whether
316    caller growth limits allow doing so.
317 
318    if REPORT is true, output reason to the dump file. */
319 
320 static bool
can_inline_edge_p(struct cgraph_edge * e,bool report,bool early=false)321 can_inline_edge_p (struct cgraph_edge *e, bool report,
322 		   bool early = false)
323 {
324   gcc_checking_assert (e->inline_failed);
325 
326   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
327     {
328       if (report)
329         report_inline_failed_reason (e);
330       return false;
331     }
332 
333   bool inlinable = true;
334   enum availability avail;
335   cgraph_node *caller = (e->caller->inlined_to
336 			 ? e->caller->inlined_to : e->caller);
337   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
338 
339   if (!callee->definition)
340     {
341       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
342       inlinable = false;
343     }
344   if (!early && (!opt_for_fn (callee->decl, optimize)
345 		 || !opt_for_fn (caller->decl, optimize)))
346     {
347       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
348       inlinable = false;
349     }
350   else if (callee->calls_comdat_local)
351     {
352       e->inline_failed = CIF_USES_COMDAT_LOCAL;
353       inlinable = false;
354     }
355   else if (avail <= AVAIL_INTERPOSABLE)
356     {
357       e->inline_failed = CIF_OVERWRITABLE;
358       inlinable = false;
359     }
360   /* All edges with call_stmt_cannot_inline_p should have inline_failed
361      initialized to one of FINAL_ERROR reasons.  */
362   else if (e->call_stmt_cannot_inline_p)
363     gcc_unreachable ();
364   /* Don't inline if the functions have different EH personalities.  */
365   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
366 	   && DECL_FUNCTION_PERSONALITY (callee->decl)
367 	   && (DECL_FUNCTION_PERSONALITY (caller->decl)
368 	       != DECL_FUNCTION_PERSONALITY (callee->decl)))
369     {
370       e->inline_failed = CIF_EH_PERSONALITY;
371       inlinable = false;
372     }
373   /* TM pure functions should not be inlined into non-TM_pure
374      functions.  */
375   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
376     {
377       e->inline_failed = CIF_UNSPECIFIED;
378       inlinable = false;
379     }
380   /* Check compatibility of target optimization options.  */
381   else if (!targetm.target_option.can_inline_p (caller->decl,
382 						callee->decl))
383     {
384       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
385       inlinable = false;
386     }
387   else if (ipa_fn_summaries->get (callee) == NULL
388 	   || !ipa_fn_summaries->get (callee)->inlinable)
389     {
390       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
391       inlinable = false;
392     }
393   /* Don't inline a function with mismatched sanitization attributes. */
394   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
395     {
396       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
397       inlinable = false;
398     }
399 
400   if (!inlinable && report)
401     report_inline_failed_reason (e);
402   return inlinable;
403 }
404 
405 /* Return inlining_insns_single limit for function N.  If HINT or HINT2 is true
406    scale up the bound.  */
407 
408 static int
inline_insns_single(cgraph_node * n,bool hint,bool hint2)409 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
410 {
411   if (hint && hint2)
412     {
413       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
414       spd = spd * spd;
415       if (spd > 1000000)
416 	spd = 1000000;
417       return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
418     }
419   if (hint || hint2)
420     return opt_for_fn (n->decl, param_max_inline_insns_single)
421 	   * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
422   return opt_for_fn (n->decl, param_max_inline_insns_single);
423 }
424 
425 /* Return inlining_insns_auto limit for function N.  If HINT or HINT2 is true
426    scale up the bound.   */
427 
428 static int
inline_insns_auto(cgraph_node * n,bool hint,bool hint2)429 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
430 {
431   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
432   if (hint && hint2)
433     {
434       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
435       spd = spd * spd;
436       if (spd > 1000000)
437 	spd = 1000000;
438       return max_inline_insns_auto * spd / 100;
439     }
440   if (hint || hint2)
441     return max_inline_insns_auto
442 	   * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
443   return max_inline_insns_auto;
444 }
445 
446 /* Decide if we can inline the edge and possibly update
447    inline_failed reason.
448    We check whether inlining is possible at all and whether
449    caller growth limits allow doing so.
450 
451    if REPORT is true, output reason to the dump file.
452 
453    if DISREGARD_LIMITS is true, ignore size limits.  */
454 
455 static bool
can_inline_edge_by_limits_p(struct cgraph_edge * e,bool report,bool disregard_limits=false,bool early=false)456 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
457 		             bool disregard_limits = false, bool early = false)
458 {
459   gcc_checking_assert (e->inline_failed);
460 
461   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
462     {
463       if (report)
464         report_inline_failed_reason (e);
465       return false;
466     }
467 
468   bool inlinable = true;
469   enum availability avail;
470   cgraph_node *caller = (e->caller->inlined_to
471 			 ? e->caller->inlined_to : e->caller);
472   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
473   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
474   tree callee_tree
475     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
476   /* Check if caller growth allows the inlining.  */
477   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
478       && !disregard_limits
479       && !lookup_attribute ("flatten",
480      		 DECL_ATTRIBUTES (caller->decl))
481       && !caller_growth_limits (e))
482     inlinable = false;
483   else if (callee->externally_visible
484 	   && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
485 	   && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
486     {
487       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
488       inlinable = false;
489     }
490   /* Don't inline a function with a higher optimization level than the
491      caller.  FIXME: this is really just tip of iceberg of handling
492      optimization attribute.  */
493   else if (caller_tree != callee_tree)
494     {
495       bool always_inline =
496 	     (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
497 	      && lookup_attribute ("always_inline",
498 				   DECL_ATTRIBUTES (callee->decl)));
499       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
500       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
501 
502      /* Until GCC 4.9 we did not check the semantics-altering flags
503 	below and inlined across optimization boundaries.
504 	Enabling checks below breaks several packages by refusing
505 	to inline library always_inline functions. See PR65873.
506 	Disable the check for early inlining for now until better solution
507 	is found.  */
508      if (always_inline && early)
509 	;
510       /* There are some options that change IL semantics which means
511          we cannot inline in these cases for correctness reason.
512 	 Not even for always_inline declared functions.  */
513      else if (check_match (flag_wrapv)
514 	      || check_match (flag_trapv)
515 	      || check_match (flag_pcc_struct_return)
516 	      || check_maybe_down (optimize_debug)
517 	      /* When caller or callee does FP math, be sure FP codegen flags
518 		 compatible.  */
519 	      || ((caller_info->fp_expressions && callee_info->fp_expressions)
520 		  && (check_maybe_up (flag_rounding_math)
521 		      || check_maybe_up (flag_trapping_math)
522 		      || check_maybe_down (flag_unsafe_math_optimizations)
523 		      || check_maybe_down (flag_finite_math_only)
524 		      || check_maybe_up (flag_signaling_nans)
525 		      || check_maybe_down (flag_cx_limited_range)
526 		      || check_maybe_up (flag_signed_zeros)
527 		      || check_maybe_down (flag_associative_math)
528 		      || check_maybe_down (flag_reciprocal_math)
529 		      || check_maybe_down (flag_fp_int_builtin_inexact)
530 		      /* Strictly speaking only when the callee contains function
531 			 calls that may end up setting errno.  */
532 		      || check_maybe_up (flag_errno_math)))
533 	      /* We do not want to make code compiled with exceptions to be
534 		 brought into a non-EH function unless we know that the callee
535 		 does not throw.
536 		 This is tracked by DECL_FUNCTION_PERSONALITY.  */
537 	      || (check_maybe_up (flag_non_call_exceptions)
538 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
539 	      || (check_maybe_up (flag_exceptions)
540 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
541 	      /* When devirtualization is disabled for callee, it is not safe
542 		 to inline it as we possibly mangled the type info.
543 		 Allow early inlining of always inlines.  */
544 	      || (!early && check_maybe_down (flag_devirtualize)))
545 	{
546 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
547 	  inlinable = false;
548 	}
549       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
550       else if (always_inline)
551 	;
552       /* When user added an attribute to the callee honor it.  */
553       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
554 	       && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
555 	{
556 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
557 	  inlinable = false;
558 	}
559       /* If explicit optimize attribute are not used, the mismatch is caused
560 	 by different command line options used to build different units.
561 	 Do not care about COMDAT functions - those are intended to be
562          optimized with the optimization flags of module they are used in.
563 	 Also do not care about mixing up size/speed optimization when
564 	 DECL_DISREGARD_INLINE_LIMITS is set.  */
565       else if ((callee->merged_comdat
566 	        && !lookup_attribute ("optimize",
567 				      DECL_ATTRIBUTES (caller->decl)))
568 	       || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
569 	;
570       /* If mismatch is caused by merging two LTO units with different
571 	 optimization flags we want to be bit nicer.  However never inline
572 	 if one of functions is not optimized at all.  */
573       else if (!opt_for_fn (callee->decl, optimize)
574       	       || !opt_for_fn (caller->decl, optimize))
575 	{
576 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
577 	  inlinable = false;
578 	}
579       /* If callee is optimized for size and caller is not, allow inlining if
580 	 code shrinks or we are in param_max_inline_insns_single limit and
581 	 callee is inline (and thus likely an unified comdat).
582 	 This will allow caller to run faster.  */
583       else if (opt_for_fn (callee->decl, optimize_size)
584 	       > opt_for_fn (caller->decl, optimize_size))
585 	{
586 	  int growth = estimate_edge_growth (e);
587 	  if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
588 	      && (!DECL_DECLARED_INLINE_P (callee->decl)
589 		  && growth >= MAX (inline_insns_single (caller, false, false),
590 				    inline_insns_auto (caller, false, false))))
591 	    {
592 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
593 	      inlinable = false;
594 	    }
595 	}
596       /* If callee is more aggressively optimized for performance than caller,
597 	 we generally want to inline only cheap (runtime wise) functions.  */
598       else if (opt_for_fn (callee->decl, optimize_size)
599 	       < opt_for_fn (caller->decl, optimize_size)
600 	       || (opt_for_fn (callee->decl, optimize)
601 		   > opt_for_fn (caller->decl, optimize)))
602 	{
603 	  if (estimate_edge_time (e)
604 	      >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
605 	    {
606 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
607 	      inlinable = false;
608 	    }
609 	}
610 
611     }
612 
613   if (!inlinable && report)
614     report_inline_failed_reason (e);
615   return inlinable;
616 }
617 
618 
619 /* Return true if the edge E is inlinable during early inlining.  */
620 
621 static bool
can_early_inline_edge_p(struct cgraph_edge * e)622 can_early_inline_edge_p (struct cgraph_edge *e)
623 {
624   cgraph_node *caller = (e->caller->inlined_to
625 			 ? e->caller->inlined_to : e->caller);
626   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
627   /* Early inliner might get called at WPA stage when IPA pass adds new
628      function.  In this case we cannot really do any of early inlining
629      because function bodies are missing.  */
630   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
631     return false;
632   if (!gimple_has_body_p (callee->decl))
633     {
634       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
635       return false;
636     }
637   /* In early inliner some of callees may not be in SSA form yet
638      (i.e. the callgraph is cyclic and we did not process
639      the callee by early inliner, yet).  We don't have CIF code for this
640      case; later we will re-do the decision in the real inliner.  */
641   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
642       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
643     {
644       if (dump_enabled_p ())
645 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
646 			 "  edge not inlinable: not in SSA form\n");
647       return false;
648     }
649   else if (profile_arc_flag
650 	   && ((lookup_attribute ("no_profile_instrument_function",
651 				 DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
652 	       != (lookup_attribute ("no_profile_instrument_function",
653 				     DECL_ATTRIBUTES (callee->decl)) == NULL_TREE)))
654     return false;
655 
656   if (!can_inline_edge_p (e, true, true)
657       || !can_inline_edge_by_limits_p (e, true, false, true))
658     return false;
659   return true;
660 }
661 
662 
663 /* Return number of calls in N.  Ignore cheap builtins.  */
664 
665 static int
num_calls(struct cgraph_node * n)666 num_calls (struct cgraph_node *n)
667 {
668   struct cgraph_edge *e;
669   int num = 0;
670 
671   for (e = n->callees; e; e = e->next_callee)
672     if (!is_inexpensive_builtin (e->callee->decl))
673       num++;
674   return num;
675 }
676 
677 
678 /* Return true if we are interested in inlining small function.  */
679 
680 static bool
want_early_inline_function_p(struct cgraph_edge * e)681 want_early_inline_function_p (struct cgraph_edge *e)
682 {
683   bool want_inline = true;
684   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
685 
686   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
687     ;
688   /* For AutoFDO, we need to make sure that before profile summary, all
689      hot paths' IR look exactly the same as profiled binary. As a result,
690      in einliner, we will disregard size limit and inline those callsites
691      that are:
692        * inlined in the profiled binary, and
693        * the cloned callee has enough samples to be considered "hot".  */
694   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
695     ;
696   else if (!DECL_DECLARED_INLINE_P (callee->decl)
697 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
698     {
699       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
700       report_inline_failed_reason (e);
701       want_inline = false;
702     }
703   else
704     {
705       /* First take care of very large functions.  */
706       int min_growth = estimate_min_edge_growth (e), growth = 0;
707       int n;
708       int early_inlining_insns = param_early_inlining_insns;
709 
710       if (min_growth > early_inlining_insns)
711 	{
712 	  if (dump_enabled_p ())
713 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
714 			     "  will not early inline: %C->%C, "
715 			     "call is cold and code would grow "
716 			     "at least by %i\n",
717 			     e->caller, callee,
718 			     min_growth);
719 	  want_inline = false;
720 	}
721       else
722         growth = estimate_edge_growth (e);
723 
724 
725       if (!want_inline || growth <= param_max_inline_insns_size)
726 	;
727       else if (!e->maybe_hot_p ())
728 	{
729 	  if (dump_enabled_p ())
730 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
731 			     "  will not early inline: %C->%C, "
732 			     "call is cold and code would grow by %i\n",
733 			     e->caller, callee,
734 			     growth);
735 	  want_inline = false;
736 	}
737       else if (growth > early_inlining_insns)
738 	{
739 	  if (dump_enabled_p ())
740 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
741 			     "  will not early inline: %C->%C, "
742 			     "growth %i exceeds --param early-inlining-insns\n",
743 			     e->caller, callee, growth);
744 	  want_inline = false;
745 	}
746       else if ((n = num_calls (callee)) != 0
747 	       && growth * (n + 1) > early_inlining_insns)
748 	{
749 	  if (dump_enabled_p ())
750 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
751 			     "  will not early inline: %C->%C, "
752 			     "growth %i exceeds --param early-inlining-insns "
753 			     "divided by number of calls\n",
754 			     e->caller, callee, growth);
755 	  want_inline = false;
756 	}
757     }
758   return want_inline;
759 }
760 
761 /* Compute time of the edge->caller + edge->callee execution when inlining
762    does not happen.  */
763 
764 inline sreal
compute_uninlined_call_time(struct cgraph_edge * edge,sreal uninlined_call_time,sreal freq)765 compute_uninlined_call_time (struct cgraph_edge *edge,
766 			     sreal uninlined_call_time,
767 			     sreal freq)
768 {
769   cgraph_node *caller = (edge->caller->inlined_to
770 			 ? edge->caller->inlined_to
771 			 : edge->caller);
772 
773   if (freq > 0)
774     uninlined_call_time *= freq;
775   else
776     uninlined_call_time = uninlined_call_time >> 11;
777 
778   sreal caller_time = ipa_fn_summaries->get (caller)->time;
779   return uninlined_call_time + caller_time;
780 }
781 
782 /* Same as compute_uinlined_call_time but compute time when inlining
783    does happen.  */
784 
785 inline sreal
compute_inlined_call_time(struct cgraph_edge * edge,sreal time,sreal freq)786 compute_inlined_call_time (struct cgraph_edge *edge,
787 			   sreal time,
788 			   sreal freq)
789 {
790   cgraph_node *caller = (edge->caller->inlined_to
791 			 ? edge->caller->inlined_to
792 			 : edge->caller);
793   sreal caller_time = ipa_fn_summaries->get (caller)->time;
794 
795   if (freq > 0)
796     time *= freq;
797   else
798     time = time >> 11;
799 
800   /* This calculation should match one in ipa-inline-analysis.cc
801      (estimate_edge_size_and_time).  */
802   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
803   time += caller_time;
804   if (time <= 0)
805     time = ((sreal) 1) >> 8;
806   gcc_checking_assert (time >= 0);
807   return time;
808 }
809 
810 /* Determine time saved by inlining EDGE of frequency FREQ
811    where callee's runtime w/o inlining is UNINLINED_TYPE
812    and with inlined is INLINED_TYPE.  */
813 
814 inline sreal
inlining_speedup(struct cgraph_edge * edge,sreal freq,sreal uninlined_time,sreal inlined_time)815 inlining_speedup (struct cgraph_edge *edge,
816     		  sreal freq,
817 		  sreal uninlined_time,
818 		  sreal inlined_time)
819 {
820   sreal speedup = uninlined_time - inlined_time;
821   /* Handling of call_time should match one in ipa-inline-fnsummary.c
822      (estimate_edge_size_and_time).  */
823   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
824 
825   if (freq > 0)
826     {
827       speedup = (speedup + call_time);
828       if (freq != 1)
829        speedup = speedup * freq;
830     }
831   else if (freq == 0)
832     speedup = speedup >> 11;
833   gcc_checking_assert (speedup >= 0);
834   return speedup;
835 }
836 
837 /* Return true if the speedup for inlining E is bigger than
838    param_inline_min_speedup.  */
839 
840 static bool
big_speedup_p(struct cgraph_edge * e)841 big_speedup_p (struct cgraph_edge *e)
842 {
843   sreal unspec_time;
844   sreal spec_time = estimate_edge_time (e, &unspec_time);
845   sreal freq = e->sreal_frequency ();
846   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
847   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
848   cgraph_node *caller = (e->caller->inlined_to
849 			 ? e->caller->inlined_to
850 			 : e->caller);
851   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
852 
853   if ((time - inlined_time) * 100 > time * limit)
854     return true;
855   return false;
856 }
857 
858 /* Return true if we are interested in inlining small function.
859    When REPORT is true, report reason to dump file.  */
860 
861 static bool
want_inline_small_function_p(struct cgraph_edge * e,bool report)862 want_inline_small_function_p (struct cgraph_edge *e, bool report)
863 {
864   bool want_inline = true;
865   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
866   cgraph_node *to  = (e->caller->inlined_to
867 		      ? e->caller->inlined_to : e->caller);
868 
869   /* Allow this function to be called before can_inline_edge_p,
870      since it's usually cheaper.  */
871   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
872     want_inline = false;
873   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
874     ;
875   else if (!DECL_DECLARED_INLINE_P (callee->decl)
876 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
877     {
878       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
879       want_inline = false;
880     }
881   /* Do fast and conservative check if the function can be good
882      inline candidate.  */
883   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
884 	   && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
885 	   && ipa_fn_summaries->get (callee)->min_size
886 		- ipa_call_summaries->get (e)->call_stmt_size
887 	      > inline_insns_auto (e->caller, true, true))
888     {
889       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
890       want_inline = false;
891     }
892   else if ((DECL_DECLARED_INLINE_P (callee->decl)
893 	    || e->count.ipa ().nonzero_p ())
894 	   && ipa_fn_summaries->get (callee)->min_size
895 		- ipa_call_summaries->get (e)->call_stmt_size
896 	      > inline_insns_single (e->caller, true, true))
897     {
898       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
899 			  ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
900 			  : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
901       want_inline = false;
902     }
903   else
904     {
905       int growth = estimate_edge_growth (e);
906       ipa_hints hints = estimate_edge_hints (e);
907       /* We have two independent groups of hints.  If one matches in each
908 	 of groups the limits are inreased.  If both groups matches, limit
909 	 is increased even more.  */
910       bool apply_hints = (hints & (INLINE_HINT_indirect_call
911 				   | INLINE_HINT_known_hot
912 				   | INLINE_HINT_loop_iterations
913 				   | INLINE_HINT_loop_stride));
914       bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
915 
916       if (growth <= opt_for_fn (to->decl,
917 				param_max_inline_insns_size))
918 	;
919       /* Apply param_max_inline_insns_single limit.  Do not do so when
920 	 hints suggests that inlining given function is very profitable.
921 	 Avoid computation of big_speedup_p when not necessary to change
922 	 outcome of decision.  */
923       else if (DECL_DECLARED_INLINE_P (callee->decl)
924 	       && growth >= inline_insns_single (e->caller, apply_hints,
925 						 apply_hints2)
926 	       && (apply_hints || apply_hints2
927 		   || growth >= inline_insns_single (e->caller, true,
928 						     apply_hints2)
929 		   || !big_speedup_p (e)))
930 	{
931           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
932 	  want_inline = false;
933 	}
934       else if (!DECL_DECLARED_INLINE_P (callee->decl)
935 	       && !opt_for_fn (e->caller->decl, flag_inline_functions)
936 	       && growth >= opt_for_fn (to->decl,
937 					param_max_inline_insns_small))
938 	{
939 	  /* growth_positive_p is expensive, always test it last.  */
940 	  if (growth >= inline_insns_single (e->caller, false, false)
941 	      || growth_positive_p (callee, e, growth))
942 	    {
943               e->inline_failed = CIF_NOT_DECLARED_INLINED;
944 	      want_inline = false;
945  	    }
946 	}
947       /* Apply param_max_inline_insns_auto limit for functions not declared
948 	 inline.  Bypass the limit when speedup seems big.  */
949       else if (!DECL_DECLARED_INLINE_P (callee->decl)
950 	       && growth >= inline_insns_auto (e->caller, apply_hints,
951 					       apply_hints2)
952 	       && (apply_hints || apply_hints2
953 		   || growth >= inline_insns_auto (e->caller, true,
954 						   apply_hints2)
955 		   || !big_speedup_p (e)))
956 	{
957 	  /* growth_positive_p is expensive, always test it last.  */
958 	  if (growth >= inline_insns_single (e->caller, false, false)
959 	      || growth_positive_p (callee, e, growth))
960 	    {
961 	      e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
962 	      want_inline = false;
963  	    }
964 	}
965       /* If call is cold, do not inline when function body would grow. */
966       else if (!e->maybe_hot_p ()
967 	       && (growth >= inline_insns_single (e->caller, false, false)
968 		   || growth_positive_p (callee, e, growth)))
969 	{
970           e->inline_failed = CIF_UNLIKELY_CALL;
971 	  want_inline = false;
972 	}
973     }
974   if (!want_inline && report)
975     report_inline_failed_reason (e);
976   return want_inline;
977 }
978 
979 /* EDGE is self recursive edge.
980    We handle two cases - when function A is inlining into itself
981    or when function A is being inlined into another inliner copy of function
982    A within function B.
983 
984    In first case OUTER_NODE points to the toplevel copy of A, while
985    in the second case OUTER_NODE points to the outermost copy of A in B.
986 
987    In both cases we want to be extra selective since
988    inlining the call will just introduce new recursive calls to appear.  */
989 
990 static bool
want_inline_self_recursive_call_p(struct cgraph_edge * edge,struct cgraph_node * outer_node,bool peeling,int depth)991 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
992 				   struct cgraph_node *outer_node,
993 				   bool peeling,
994 				   int depth)
995 {
996   char const *reason = NULL;
997   bool want_inline = true;
998   sreal caller_freq = 1;
999   int max_depth = opt_for_fn (outer_node->decl,
1000 			      param_max_inline_recursive_depth_auto);
1001 
1002   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1003     max_depth = opt_for_fn (outer_node->decl,
1004 			    param_max_inline_recursive_depth);
1005 
1006   if (!edge->maybe_hot_p ())
1007     {
1008       reason = "recursive call is cold";
1009       want_inline = false;
1010     }
1011   else if (depth > max_depth)
1012     {
1013       reason = "--param max-inline-recursive-depth exceeded.";
1014       want_inline = false;
1015     }
1016   else if (outer_node->inlined_to
1017 	   && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1018     {
1019       reason = "caller frequency is 0";
1020       want_inline = false;
1021     }
1022 
1023   if (!want_inline)
1024     ;
1025   /* Inlining of self recursive function into copy of itself within other
1026      function is transformation similar to loop peeling.
1027 
1028      Peeling is profitable if we can inline enough copies to make probability
1029      of actual call to the self recursive function very small.  Be sure that
1030      the probability of recursion is small.
1031 
1032      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1033      This way the expected number of recursion is at most max_depth.  */
1034   else if (peeling)
1035     {
1036       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1037       int i;
1038       for (i = 1; i < depth; i++)
1039 	max_prob = max_prob * max_prob;
1040       if (edge->sreal_frequency () >= max_prob * caller_freq)
1041 	{
1042 	  reason = "frequency of recursive call is too large";
1043 	  want_inline = false;
1044 	}
1045     }
1046   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1047      recursion depth is large.  We reduce function call overhead and increase
1048      chances that things fit in hardware return predictor.
1049 
1050      Recursive inlining might however increase cost of stack frame setup
1051      actually slowing down functions whose recursion tree is wide rather than
1052      deep.
1053 
1054      Deciding reliably on when to do recursive inlining without profile feedback
1055      is tricky.  For now we disable recursive inlining when probability of self
1056      recursion is low.
1057 
1058      Recursive inlining of self recursive call within loop also results in
1059      large loop depths that generally optimize badly.  We may want to throttle
1060      down inlining in those cases.  In particular this seems to happen in one
1061      of libstdc++ rb tree methods.  */
1062   else
1063     {
1064       if (edge->sreal_frequency () * 100
1065           <= caller_freq
1066 	     * opt_for_fn (outer_node->decl,
1067 			   param_min_inline_recursive_probability))
1068 	{
1069 	  reason = "frequency of recursive call is too small";
1070 	  want_inline = false;
1071 	}
1072     }
1073   if (!want_inline && dump_enabled_p ())
1074     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1075 		     "   not inlining recursively: %s\n", reason);
1076   return want_inline;
1077 }
1078 
1079 /* Return true when NODE has uninlinable caller;
1080    set HAS_HOT_CALL if it has hot call.
1081    Worker for cgraph_for_node_and_aliases.  */
1082 
1083 static bool
check_callers(struct cgraph_node * node,void * has_hot_call)1084 check_callers (struct cgraph_node *node, void *has_hot_call)
1085 {
1086   struct cgraph_edge *e;
1087   for (e = node->callers; e; e = e->next_caller)
1088     {
1089       if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1090 	  || !opt_for_fn (e->caller->decl, optimize))
1091 	return true;
1092       if (!can_inline_edge_p (e, true))
1093 	return true;
1094       if (e->recursive_p ())
1095 	return true;
1096       if (!can_inline_edge_by_limits_p (e, true))
1097 	return true;
1098       /* Inlining large functions to large loop depth is often harmful because
1099 	 of register pressure it implies.  */
1100       if ((int)ipa_call_summaries->get (e)->loop_depth
1101 	  > param_inline_functions_called_once_loop_depth)
1102 	return true;
1103       /* Do not produce gigantic functions.  */
1104       if (estimate_size_after_inlining (e->caller->inlined_to ?
1105 					e->caller->inlined_to : e->caller, e)
1106 	  > param_inline_functions_called_once_insns)
1107 	return true;
1108       if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1109 	*(bool *)has_hot_call = true;
1110     }
1111   return false;
1112 }
1113 
1114 /* If NODE has a caller, return true.  */
1115 
1116 static bool
has_caller_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)1117 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1118 {
1119   if (node->callers)
1120     return true;
1121   return false;
1122 }
1123 
1124 /* Decide if inlining NODE would reduce unit size by eliminating
1125    the offline copy of function.
1126    When COLD is true the cold calls are considered, too.  */
1127 
1128 static bool
want_inline_function_to_all_callers_p(struct cgraph_node * node,bool cold)1129 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1130 {
1131   bool has_hot_call = false;
1132 
1133   /* Aliases gets inlined along with the function they alias.  */
1134   if (node->alias)
1135     return false;
1136   /* Already inlined?  */
1137   if (node->inlined_to)
1138     return false;
1139   /* Does it have callers?  */
1140   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1141     return false;
1142   /* Inlining into all callers would increase size?  */
1143   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1144     return false;
1145   /* All inlines must be possible.  */
1146   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1147 					 true))
1148     return false;
1149   if (!cold && !has_hot_call)
1150     return false;
1151   return true;
1152 }
1153 
1154 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1155    in estimate_edge_badness.  */
1156 
1157 static bool
wrapper_heuristics_may_apply(struct cgraph_node * where,int size)1158 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1159 {
1160   return size < (DECL_DECLARED_INLINE_P (where->decl)
1161 		 ? inline_insns_single (where, false, false)
1162 		 : inline_insns_auto (where, false, false));
1163 }
1164 
1165 /* A cost model driving the inlining heuristics in a way so the edges with
1166    smallest badness are inlined first.  After each inlining is performed
1167    the costs of all caller edges of nodes affected are recomputed so the
1168    metrics may accurately depend on values such as number of inlinable callers
1169    of the function or function body size.  */
1170 
1171 static sreal
edge_badness(struct cgraph_edge * edge,bool dump)1172 edge_badness (struct cgraph_edge *edge, bool dump)
1173 {
1174   sreal badness;
1175   int growth;
1176   sreal edge_time, unspec_edge_time;
1177   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1178   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1179   ipa_hints hints;
1180   cgraph_node *caller = (edge->caller->inlined_to
1181 			 ? edge->caller->inlined_to
1182 			 : edge->caller);
1183 
1184   growth = estimate_edge_growth (edge);
1185   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1186   hints = estimate_edge_hints (edge);
1187   gcc_checking_assert (edge_time >= 0);
1188   /* Check that inlined time is better, but tolerate some roundoff issues.
1189      FIXME: When callee profile drops to 0 we account calls more.  This
1190      should be fixed by never doing that.  */
1191   gcc_checking_assert ((edge_time * 100
1192 			- callee_info->time * 101).to_int () <= 0
1193 			|| callee->count.ipa ().initialized_p ());
1194   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1195 
1196   if (dump)
1197     {
1198       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1199 	       edge->caller->dump_name (),
1200 	       edge->callee->dump_name ());
1201       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1202 	       growth,
1203 	       edge_time.to_double (),
1204 	       unspec_edge_time.to_double ());
1205       ipa_dump_hints (dump_file, hints);
1206       if (big_speedup_p (edge))
1207 	fprintf (dump_file, " big_speedup");
1208       fprintf (dump_file, "\n");
1209     }
1210 
1211   /* Always prefer inlining saving code size.  */
1212   if (growth <= 0)
1213     {
1214       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1215       if (dump)
1216 	fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1217 		 growth);
1218     }
1219    /* Inlining into EXTERNAL functions is not going to change anything unless
1220       they are themselves inlined.  */
1221    else if (DECL_EXTERNAL (caller->decl))
1222     {
1223       if (dump)
1224 	fprintf (dump_file, "      max: function is external\n");
1225       return sreal::max ();
1226     }
1227   /* When profile is available. Compute badness as:
1228 
1229                  time_saved * caller_count
1230      goodness =  -------------------------------------------------
1231 	         growth_of_caller * overall_growth * combined_size
1232 
1233      badness = - goodness
1234 
1235      Again use negative value to make calls with profile appear hotter
1236      then calls without.
1237   */
1238   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1239 	   || caller->count.ipa ().nonzero_p ())
1240     {
1241       sreal numerator, denominator;
1242       int overall_growth;
1243       sreal freq = edge->sreal_frequency ();
1244 
1245       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1246       if (numerator <= 0)
1247 	numerator = ((sreal) 1 >> 8);
1248       if (caller->count.ipa ().nonzero_p ())
1249 	numerator *= caller->count.ipa ().to_gcov_type ();
1250       else if (caller->count.ipa ().initialized_p ())
1251 	numerator = numerator >> 11;
1252       denominator = growth;
1253 
1254       overall_growth = callee_info->growth;
1255 
1256       /* Look for inliner wrappers of the form:
1257 
1258 	 inline_caller ()
1259 	   {
1260 	     do_fast_job...
1261 	     if (need_more_work)
1262 	       noninline_callee ();
1263 	   }
1264 	 Without penalizing this case, we usually inline noninline_callee
1265 	 into the inline_caller because overall_growth is small preventing
1266 	 further inlining of inline_caller.
1267 
1268 	 Penalize only callgraph edges to functions with small overall
1269 	 growth ...
1270 	*/
1271       if (growth > overall_growth
1272 	  /* ... and having only one caller which is not inlined ... */
1273 	  && callee_info->single_caller
1274 	  && !edge->caller->inlined_to
1275 	  /* ... and edges executed only conditionally ... */
1276 	  && freq < 1
1277 	  /* ... consider case where callee is not inline but caller is ... */
1278 	  && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1279 	       && DECL_DECLARED_INLINE_P (caller->decl))
1280 	      /* ... or when early optimizers decided to split and edge
1281 		 frequency still indicates splitting is a win ... */
1282 	      || (callee->split_part && !caller->split_part
1283 		  && freq * 100
1284 			 < opt_for_fn (caller->decl,
1285 				       param_partial_inlining_entry_probability)
1286 		  /* ... and do not overwrite user specified hints.   */
1287 		  && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1288 		      || DECL_DECLARED_INLINE_P (caller->decl)))))
1289 	{
1290 	  ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1291 	  int caller_growth = caller_info->growth;
1292 
1293 	  /* Only apply the penalty when caller looks like inline candidate,
1294 	     and it is not called once.  */
1295 	  if (!caller_info->single_caller && overall_growth < caller_growth
1296 	      && caller_info->inlinable
1297 	      && wrapper_heuristics_may_apply
1298 	     	 (caller, ipa_size_summaries->get (caller)->size))
1299 	    {
1300 	      if (dump)
1301 		fprintf (dump_file,
1302 			 "     Wrapper penalty. Increasing growth %i to %i\n",
1303 			 overall_growth, caller_growth);
1304 	      overall_growth = caller_growth;
1305 	    }
1306 	}
1307       if (overall_growth > 0)
1308         {
1309 	  /* Strongly prefer functions with few callers that can be inlined
1310 	     fully.  The square root here leads to smaller binaries at average.
1311 	     Watch however for extreme cases and return to linear function
1312 	     when growth is large.  */
1313 	  if (overall_growth < 256)
1314 	    overall_growth *= overall_growth;
1315 	  else
1316 	    overall_growth += 256 * 256 - 256;
1317 	  denominator *= overall_growth;
1318         }
1319       denominator *= ipa_size_summaries->get (caller)->size + growth;
1320 
1321       badness = - numerator / denominator;
1322 
1323       if (dump)
1324 	{
1325 	  fprintf (dump_file,
1326 		   "      %f: guessed profile. frequency %f, count %" PRId64
1327 		   " caller count %" PRId64
1328 		   " time saved %f"
1329 		   " overall growth %i (current) %i (original)"
1330 		   " %i (compensated)\n",
1331 		   badness.to_double (),
1332 		   freq.to_double (),
1333 		   edge->count.ipa ().initialized_p ()
1334 		   ? edge->count.ipa ().to_gcov_type () : -1,
1335 		   caller->count.ipa ().initialized_p ()
1336 		   ? caller->count.ipa ().to_gcov_type () : -1,
1337 		   inlining_speedup (edge, freq, unspec_edge_time,
1338 				     edge_time).to_double (),
1339 		   estimate_growth (callee),
1340 		   callee_info->growth, overall_growth);
1341 	}
1342     }
1343   /* When function local profile is not available or it does not give
1344      useful information (i.e. frequency is zero), base the cost on
1345      loop nest and overall size growth, so we optimize for overall number
1346      of functions fully inlined in program.  */
1347   else
1348     {
1349       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1350       badness = growth;
1351 
1352       /* Decrease badness if call is nested.  */
1353       if (badness > 0)
1354 	badness = badness >> nest;
1355       else
1356 	badness = badness << nest;
1357       if (dump)
1358 	fprintf (dump_file, "      %f: no profile. nest %i\n",
1359 		 badness.to_double (), nest);
1360     }
1361   gcc_checking_assert (badness != 0);
1362 
1363   if (edge->recursive_p ())
1364     badness = badness.shift (badness > 0 ? 4 : -4);
1365   if ((hints & (INLINE_HINT_indirect_call
1366 		| INLINE_HINT_loop_iterations
1367 		| INLINE_HINT_loop_stride))
1368       || callee_info->growth <= 0)
1369     badness = badness.shift (badness > 0 ? -2 : 2);
1370   if (hints & INLINE_HINT_builtin_constant_p)
1371     badness = badness.shift (badness > 0 ? -4 : 4);
1372   if (hints & (INLINE_HINT_same_scc))
1373     badness = badness.shift (badness > 0 ? 3 : -3);
1374   else if (hints & (INLINE_HINT_in_scc))
1375     badness = badness.shift (badness > 0 ? 2 : -2);
1376   else if (hints & (INLINE_HINT_cross_module))
1377     badness = badness.shift (badness > 0 ? 1 : -1);
1378   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1379     badness = badness.shift (badness > 0 ? -4 : 4);
1380   else if ((hints & INLINE_HINT_declared_inline))
1381     badness = badness.shift (badness > 0 ? -3 : 3);
1382   if (dump)
1383     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1384   return badness;
1385 }
1386 
1387 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1388 static inline void
update_edge_key(edge_heap_t * heap,struct cgraph_edge * edge)1389 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1390 {
1391   sreal badness = edge_badness (edge, false);
1392   if (edge->aux)
1393     {
1394       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1395       gcc_checking_assert (n->get_data () == edge);
1396 
1397       /* fibonacci_heap::replace_key does busy updating of the
1398 	 heap that is unnecessarily expensive.
1399 	 We do lazy increases: after extracting minimum if the key
1400 	 turns out to be out of date, it is re-inserted into heap
1401 	 with correct value.  */
1402       if (badness < n->get_key ())
1403 	{
1404 	  if (dump_file && (dump_flags & TDF_DETAILS))
1405 	    {
1406 	      fprintf (dump_file,
1407 		       "  decreasing badness %s -> %s, %f to %f\n",
1408 		       edge->caller->dump_name (),
1409 		       edge->callee->dump_name (),
1410 		       n->get_key ().to_double (),
1411 		       badness.to_double ());
1412 	    }
1413 	  heap->decrease_key (n, badness);
1414 	}
1415     }
1416   else
1417     {
1418        if (dump_file && (dump_flags & TDF_DETAILS))
1419 	 {
1420 	   fprintf (dump_file,
1421 		    "  enqueuing call %s -> %s, badness %f\n",
1422 		    edge->caller->dump_name (),
1423 		    edge->callee->dump_name (),
1424 		    badness.to_double ());
1425 	 }
1426       edge->aux = heap->insert (badness, edge);
1427     }
1428 }
1429 
1430 
1431 /* NODE was inlined.
1432    All caller edges needs to be reset because
1433    size estimates change. Similarly callees needs reset
1434    because better context may be known.  */
1435 
1436 static void
reset_edge_caches(struct cgraph_node * node)1437 reset_edge_caches (struct cgraph_node *node)
1438 {
1439   struct cgraph_edge *edge;
1440   struct cgraph_edge *e = node->callees;
1441   struct cgraph_node *where = node;
1442   struct ipa_ref *ref;
1443 
1444   if (where->inlined_to)
1445     where = where->inlined_to;
1446 
1447   reset_node_cache (where);
1448 
1449   if (edge_growth_cache != NULL)
1450     for (edge = where->callers; edge; edge = edge->next_caller)
1451       if (edge->inline_failed)
1452 	edge_growth_cache->remove (edge);
1453 
1454   FOR_EACH_ALIAS (where, ref)
1455     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1456 
1457   if (!e)
1458     return;
1459 
1460   while (true)
1461     if (!e->inline_failed && e->callee->callees)
1462       e = e->callee->callees;
1463     else
1464       {
1465 	if (edge_growth_cache != NULL && e->inline_failed)
1466 	  edge_growth_cache->remove (e);
1467 	if (e->next_callee)
1468 	  e = e->next_callee;
1469 	else
1470 	  {
1471 	    do
1472 	      {
1473 		if (e->caller == node)
1474 		  return;
1475 		e = e->caller->callers;
1476 	      }
1477 	    while (!e->next_callee);
1478 	    e = e->next_callee;
1479 	  }
1480       }
1481 }
1482 
1483 /* Recompute HEAP nodes for each of caller of NODE.
1484    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1485    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1486    it is inlinable. Otherwise check all edges.  */
1487 
1488 static void
update_caller_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes,struct cgraph_edge * check_inlinablity_for)1489 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1490 		    bitmap updated_nodes,
1491 		    struct cgraph_edge *check_inlinablity_for)
1492 {
1493   struct cgraph_edge *edge;
1494   struct ipa_ref *ref;
1495 
1496   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1497       || node->inlined_to)
1498     return;
1499   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1500     return;
1501 
1502   FOR_EACH_ALIAS (node, ref)
1503     {
1504       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1505       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1506     }
1507 
1508   for (edge = node->callers; edge; edge = edge->next_caller)
1509     if (edge->inline_failed)
1510       {
1511         if (!check_inlinablity_for
1512 	    || check_inlinablity_for == edge)
1513 	  {
1514 	    if (can_inline_edge_p (edge, false)
1515 		&& want_inline_small_function_p (edge, false)
1516 		&& can_inline_edge_by_limits_p (edge, false))
1517 	      update_edge_key (heap, edge);
1518 	    else if (edge->aux)
1519 	      {
1520 		report_inline_failed_reason (edge);
1521 		heap->delete_node ((edge_heap_node_t *) edge->aux);
1522 		edge->aux = NULL;
1523 	      }
1524 	  }
1525 	else if (edge->aux)
1526 	  update_edge_key (heap, edge);
1527       }
1528 }
1529 
1530 /* Recompute HEAP nodes for each uninlined call in NODE
1531    If UPDATE_SINCE is non-NULL check if edges called within that function
1532    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1533    where all edges have new context).
1534 
1535    This is used when we know that edge badnesses are going only to increase
1536    (we introduced new call site) and thus all we need is to insert newly
1537    created edges into heap.  */
1538 
1539 static void
update_callee_keys(edge_heap_t * heap,struct cgraph_node * node,struct cgraph_node * update_since,bitmap updated_nodes)1540 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1541 		    struct cgraph_node *update_since,
1542 		    bitmap updated_nodes)
1543 {
1544   struct cgraph_edge *e = node->callees;
1545   bool check_inlinability = update_since == node;
1546 
1547   if (!e)
1548     return;
1549   while (true)
1550     if (!e->inline_failed && e->callee->callees)
1551       {
1552 	if (e->callee == update_since)
1553 	  check_inlinability = true;
1554         e = e->callee->callees;
1555       }
1556     else
1557       {
1558 	enum availability avail;
1559 	struct cgraph_node *callee;
1560 	if (!check_inlinability)
1561 	  {
1562 	    if (e->aux
1563 		&& !bitmap_bit_p (updated_nodes,
1564 		 		  e->callee->ultimate_alias_target
1565 				    (&avail, e->caller)->get_uid ()))
1566 	      update_edge_key (heap, e);
1567 	  }
1568 	/* We do not reset callee growth cache here.  Since we added a new call,
1569 	   growth should have just increased and consequently badness metric
1570            don't need updating.  */
1571 	else if (e->inline_failed
1572 		 && (callee = e->callee->ultimate_alias_target (&avail,
1573 		  						e->caller))
1574 		 && avail >= AVAIL_AVAILABLE
1575 		 && ipa_fn_summaries->get (callee) != NULL
1576 		 && ipa_fn_summaries->get (callee)->inlinable
1577 		 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1578 	  {
1579 	    if (can_inline_edge_p (e, false)
1580 		&& want_inline_small_function_p (e, false)
1581 		&& can_inline_edge_by_limits_p (e, false))
1582 	      {
1583 		gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1584 		gcc_checking_assert (check_inlinability || e->aux);
1585 	        update_edge_key (heap, e);
1586 	      }
1587 	    else if (e->aux)
1588 	      {
1589 		report_inline_failed_reason (e);
1590 		heap->delete_node ((edge_heap_node_t *) e->aux);
1591 		e->aux = NULL;
1592 	      }
1593 	  }
1594 	/* In case we redirected to unreachable node we only need to remove the
1595 	   fibheap entry.  */
1596 	else if (e->aux)
1597 	  {
1598 	    heap->delete_node ((edge_heap_node_t *) e->aux);
1599 	    e->aux = NULL;
1600 	  }
1601 	if (e->next_callee)
1602 	  e = e->next_callee;
1603 	else
1604 	  {
1605 	    do
1606 	      {
1607 		if (e->caller == node)
1608 		  return;
1609 		if (e->caller == update_since)
1610 		  check_inlinability = false;
1611 		e = e->caller->callers;
1612 	      }
1613 	    while (!e->next_callee);
1614 	    e = e->next_callee;
1615 	  }
1616       }
1617 }
1618 
1619 /* Enqueue all recursive calls from NODE into priority queue depending on
1620    how likely we want to recursively inline the call.  */
1621 
1622 static void
lookup_recursive_calls(struct cgraph_node * node,struct cgraph_node * where,edge_heap_t * heap)1623 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1624 			edge_heap_t *heap)
1625 {
1626   struct cgraph_edge *e;
1627   enum availability avail;
1628 
1629   for (e = where->callees; e; e = e->next_callee)
1630     if (e->callee == node
1631 	|| (e->callee->ultimate_alias_target (&avail, e->caller) == node
1632 	    && avail > AVAIL_INTERPOSABLE))
1633       heap->insert (-e->sreal_frequency (), e);
1634   for (e = where->callees; e; e = e->next_callee)
1635     if (!e->inline_failed)
1636       lookup_recursive_calls (node, e->callee, heap);
1637 }
1638 
1639 /* Decide on recursive inlining: in the case function has recursive calls,
1640    inline until body size reaches given argument.  If any new indirect edges
1641    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1642    is NULL.  */
1643 
1644 static bool
recursive_inlining(struct cgraph_edge * edge,vec<cgraph_edge * > * new_edges)1645 recursive_inlining (struct cgraph_edge *edge,
1646 		    vec<cgraph_edge *> *new_edges)
1647 {
1648   cgraph_node *to  = (edge->caller->inlined_to
1649 		      ? edge->caller->inlined_to : edge->caller);
1650   int limit = opt_for_fn (to->decl,
1651 			  param_max_inline_insns_recursive_auto);
1652   edge_heap_t heap (sreal::min ());
1653   struct cgraph_node *node;
1654   struct cgraph_edge *e;
1655   struct cgraph_node *master_clone = NULL, *next;
1656   int depth = 0;
1657   int n = 0;
1658 
1659   node = edge->caller;
1660   if (node->inlined_to)
1661     node = node->inlined_to;
1662 
1663   if (DECL_DECLARED_INLINE_P (node->decl))
1664     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1665 
1666   /* Make sure that function is small enough to be considered for inlining.  */
1667   if (estimate_size_after_inlining (node, edge)  >= limit)
1668     return false;
1669   lookup_recursive_calls (node, node, &heap);
1670   if (heap.empty ())
1671     return false;
1672 
1673   if (dump_file)
1674     fprintf (dump_file,
1675 	     "  Performing recursive inlining on %s\n", node->dump_name ());
1676 
1677   /* Do the inlining and update list of recursive call during process.  */
1678   while (!heap.empty ())
1679     {
1680       struct cgraph_edge *curr = heap.extract_min ();
1681       struct cgraph_node *cnode, *dest = curr->callee;
1682 
1683       if (!can_inline_edge_p (curr, true)
1684 	  || !can_inline_edge_by_limits_p (curr, true))
1685 	continue;
1686 
1687       /* MASTER_CLONE is produced in the case we already started modified
1688 	 the function. Be sure to redirect edge to the original body before
1689 	 estimating growths otherwise we will be seeing growths after inlining
1690 	 the already modified body.  */
1691       if (master_clone)
1692 	{
1693 	  curr->redirect_callee (master_clone);
1694 	  if (edge_growth_cache != NULL)
1695 	    edge_growth_cache->remove (curr);
1696 	}
1697 
1698       if (estimate_size_after_inlining (node, curr) > limit)
1699 	{
1700 	  curr->redirect_callee (dest);
1701 	  if (edge_growth_cache != NULL)
1702 	    edge_growth_cache->remove (curr);
1703 	  break;
1704 	}
1705 
1706       depth = 1;
1707       for (cnode = curr->caller;
1708 	   cnode->inlined_to; cnode = cnode->callers->caller)
1709 	if (node->decl
1710 	    == curr->callee->ultimate_alias_target ()->decl)
1711           depth++;
1712 
1713       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1714 	{
1715 	  curr->redirect_callee (dest);
1716 	  if (edge_growth_cache != NULL)
1717 	    edge_growth_cache->remove (curr);
1718 	  continue;
1719 	}
1720 
1721       if (dump_file)
1722 	{
1723 	  fprintf (dump_file,
1724 		   "   Inlining call of depth %i", depth);
1725 	  if (node->count.nonzero_p () && curr->count.initialized_p ())
1726 	    {
1727 	      fprintf (dump_file, " called approx. %.2f times per call",
1728 		       (double)curr->count.to_gcov_type ()
1729 		       / node->count.to_gcov_type ());
1730 	    }
1731 	  fprintf (dump_file, "\n");
1732 	}
1733       if (!master_clone)
1734 	{
1735 	  /* We need original clone to copy around.  */
1736 	  master_clone = node->create_clone (node->decl, node->count,
1737 	    false, vNULL, true, NULL, NULL);
1738 	  for (e = master_clone->callees; e; e = e->next_callee)
1739 	    if (!e->inline_failed)
1740 	      clone_inlined_nodes (e, true, false, NULL);
1741 	  curr->redirect_callee (master_clone);
1742 	  if (edge_growth_cache != NULL)
1743 	    edge_growth_cache->remove (curr);
1744 	}
1745 
1746       inline_call (curr, false, new_edges, &overall_size, true);
1747       reset_node_cache (node);
1748       lookup_recursive_calls (node, curr->callee, &heap);
1749       n++;
1750     }
1751 
1752   if (!heap.empty () && dump_file)
1753     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1754 
1755   if (!master_clone)
1756     return false;
1757 
1758   if (dump_enabled_p ())
1759     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1760 		     "\n   Inlined %i times, "
1761 		     "body grown from size %i to %i, time %f to %f\n", n,
1762 		     ipa_size_summaries->get (master_clone)->size,
1763 		     ipa_size_summaries->get (node)->size,
1764 		     ipa_fn_summaries->get (master_clone)->time.to_double (),
1765 		     ipa_fn_summaries->get (node)->time.to_double ());
1766 
1767   /* Remove master clone we used for inlining.  We rely that clones inlined
1768      into master clone gets queued just before master clone so we don't
1769      need recursion.  */
1770   for (node = symtab->first_function (); node != master_clone;
1771        node = next)
1772     {
1773       next = symtab->next_function (node);
1774       if (node->inlined_to == master_clone)
1775 	node->remove ();
1776     }
1777   master_clone->remove ();
1778   return true;
1779 }
1780 
1781 
1782 /* Given whole compilation unit estimate of INSNS, compute how large we can
1783    allow the unit to grow.  */
1784 
1785 static int64_t
compute_max_insns(cgraph_node * node,int insns)1786 compute_max_insns (cgraph_node *node, int insns)
1787 {
1788   int max_insns = insns;
1789   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1790     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1791 
1792   return ((int64_t) max_insns
1793 	  * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1794 }
1795 
1796 
1797 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1798 
1799 static void
add_new_edges_to_heap(edge_heap_t * heap,vec<cgraph_edge * > & new_edges)1800 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1801 {
1802   while (new_edges.length () > 0)
1803     {
1804       struct cgraph_edge *edge = new_edges.pop ();
1805 
1806       gcc_assert (!edge->aux);
1807       gcc_assert (edge->callee);
1808       if (edge->inline_failed
1809 	  && can_inline_edge_p (edge, true)
1810 	  && want_inline_small_function_p (edge, true)
1811 	  && can_inline_edge_by_limits_p (edge, true))
1812         edge->aux = heap->insert (edge_badness (edge, false), edge);
1813     }
1814 }
1815 
1816 /* Remove EDGE from the fibheap.  */
1817 
1818 static void
heap_edge_removal_hook(struct cgraph_edge * e,void * data)1819 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1820 {
1821   if (e->aux)
1822     {
1823       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1824       e->aux = NULL;
1825     }
1826 }
1827 
1828 /* Return true if speculation of edge E seems useful.
1829    If ANTICIPATE_INLINING is true, be conservative and hope that E
1830    may get inlined.  */
1831 
1832 bool
speculation_useful_p(struct cgraph_edge * e,bool anticipate_inlining)1833 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1834 {
1835   /* If we have already decided to inline the edge, it seems useful.  */
1836   if (!e->inline_failed)
1837     return true;
1838 
1839   enum availability avail;
1840   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1841 								 e->caller);
1842 
1843   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1844 
1845   if (!e->maybe_hot_p ())
1846     return false;
1847 
1848   /* See if IP optimizations found something potentially useful about the
1849      function.  For now we look only for CONST/PURE flags.  Almost everything
1850      else we propagate is useless.  */
1851   if (avail >= AVAIL_AVAILABLE)
1852     {
1853       int ecf_flags = flags_from_decl_or_type (target->decl);
1854       if (ecf_flags & ECF_CONST)
1855         {
1856 	  if (!(e->speculative_call_indirect_edge ()->indirect_info
1857 		->ecf_flags & ECF_CONST))
1858 	    return true;
1859         }
1860       else if (ecf_flags & ECF_PURE)
1861         {
1862 	  if (!(e->speculative_call_indirect_edge ()->indirect_info
1863 		->ecf_flags & ECF_PURE))
1864 	    return true;
1865         }
1866     }
1867   /* If we did not managed to inline the function nor redirect
1868      to an ipa-cp clone (that are seen by having local flag set),
1869      it is probably pointless to inline it unless hardware is missing
1870      indirect call predictor.  */
1871   if (!anticipate_inlining && !target->local)
1872     return false;
1873   /* For overwritable targets there is not much to do.  */
1874   if (!can_inline_edge_p (e, false)
1875       || !can_inline_edge_by_limits_p (e, false, true))
1876     return false;
1877   /* OK, speculation seems interesting.  */
1878   return true;
1879 }
1880 
1881 /* We know that EDGE is not going to be inlined.
1882    See if we can remove speculation.  */
1883 
1884 static void
resolve_noninline_speculation(edge_heap_t * edge_heap,struct cgraph_edge * edge)1885 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1886 {
1887   if (edge->speculative && !speculation_useful_p (edge, false))
1888     {
1889       struct cgraph_node *node = edge->caller;
1890       struct cgraph_node *where = node->inlined_to
1891 				  ? node->inlined_to : node;
1892       auto_bitmap updated_nodes;
1893 
1894       if (edge->count.ipa ().initialized_p ())
1895         spec_rem += edge->count.ipa ();
1896       cgraph_edge::resolve_speculation (edge);
1897       reset_edge_caches (where);
1898       ipa_update_overall_fn_summary (where);
1899       update_caller_keys (edge_heap, where,
1900 			  updated_nodes, NULL);
1901       update_callee_keys (edge_heap, where, NULL,
1902 			  updated_nodes);
1903     }
1904 }
1905 
1906 /* Return true if NODE should be accounted for overall size estimate.
1907    Skip all nodes optimized for size so we can measure the growth of hot
1908    part of program no matter of the padding.  */
1909 
1910 bool
inline_account_function_p(struct cgraph_node * node)1911 inline_account_function_p (struct cgraph_node *node)
1912 {
1913    return (!DECL_EXTERNAL (node->decl)
1914 	   && !opt_for_fn (node->decl, optimize_size)
1915 	   && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1916 }
1917 
1918 /* Count number of callers of NODE and store it into DATA (that
1919    points to int.  Worker for cgraph_for_node_and_aliases.  */
1920 
1921 static bool
sum_callers(struct cgraph_node * node,void * data)1922 sum_callers (struct cgraph_node *node, void *data)
1923 {
1924   struct cgraph_edge *e;
1925   int *num_calls = (int *)data;
1926 
1927   for (e = node->callers; e; e = e->next_caller)
1928     (*num_calls)++;
1929   return false;
1930 }
1931 
1932 /* We only propagate across edges with non-interposable callee.  */
1933 
1934 inline bool
ignore_edge_p(struct cgraph_edge * e)1935 ignore_edge_p (struct cgraph_edge *e)
1936 {
1937   enum availability avail;
1938   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1939   return (avail <= AVAIL_INTERPOSABLE);
1940 }
1941 
1942 /* We use greedy algorithm for inlining of small functions:
1943    All inline candidates are put into prioritized heap ordered in
1944    increasing badness.
1945 
1946    The inlining of small functions is bounded by unit growth parameters.  */
1947 
1948 static void
inline_small_functions(void)1949 inline_small_functions (void)
1950 {
1951   struct cgraph_node *node;
1952   struct cgraph_edge *edge;
1953   edge_heap_t edge_heap (sreal::min ());
1954   auto_bitmap updated_nodes;
1955   int min_size;
1956   auto_vec<cgraph_edge *> new_indirect_edges;
1957   int initial_size = 0;
1958   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1959   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1960   new_indirect_edges.create (8);
1961 
1962   edge_removal_hook_holder
1963     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1964 
1965   /* Compute overall unit size and other global parameters used by badness
1966      metrics.  */
1967 
1968   max_count = profile_count::uninitialized ();
1969   ipa_reduced_postorder (order, true, ignore_edge_p);
1970   free (order);
1971 
1972   FOR_EACH_DEFINED_FUNCTION (node)
1973     if (!node->inlined_to)
1974       {
1975 	if (!node->alias && node->analyzed
1976 	    && (node->has_gimple_body_p () || node->thunk)
1977 	    && opt_for_fn (node->decl, optimize))
1978 	  {
1979 	    class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1980 	    struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1981 
1982 	    /* Do not account external functions, they will be optimized out
1983 	       if not inlined.  Also only count the non-cold portion of program.  */
1984 	    if (inline_account_function_p (node))
1985 	      initial_size += ipa_size_summaries->get (node)->size;
1986 	    info->growth = estimate_growth (node);
1987 
1988 	    int num_calls = 0;
1989 	    node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1990 					       true);
1991 	    if (num_calls == 1)
1992 	      info->single_caller = true;
1993 	    if (dfs && dfs->next_cycle)
1994 	      {
1995 		struct cgraph_node *n2;
1996 		int id = dfs->scc_no + 1;
1997 		for (n2 = node; n2;
1998 		     n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1999 		  if (opt_for_fn (n2->decl, optimize))
2000 		    {
2001 		      ipa_fn_summary *info2 = ipa_fn_summaries->get
2002 			 (n2->inlined_to ? n2->inlined_to : n2);
2003 		      if (info2->scc_no)
2004 			break;
2005 		      info2->scc_no = id;
2006 		    }
2007 	      }
2008 	  }
2009 
2010 	for (edge = node->callers; edge; edge = edge->next_caller)
2011 	  max_count = max_count.max (edge->count.ipa ());
2012       }
2013   ipa_free_postorder_info ();
2014   initialize_growth_caches ();
2015 
2016   if (dump_file)
2017     fprintf (dump_file,
2018 	     "\nDeciding on inlining of small functions.  Starting with size %i.\n",
2019 	     initial_size);
2020 
2021   overall_size = initial_size;
2022   min_size = overall_size;
2023 
2024   /* Populate the heap with all edges we might inline.  */
2025 
2026   FOR_EACH_DEFINED_FUNCTION (node)
2027     {
2028       bool update = false;
2029       struct cgraph_edge *next = NULL;
2030       bool has_speculative = false;
2031 
2032       if (!opt_for_fn (node->decl, optimize)
2033 	  /* With -Og we do not want to perform IPA inlining of small
2034 	     functions since there are no scalar cleanups after it
2035 	     that would realize the anticipated win.  All abstraction
2036 	     is removed during early inlining.  */
2037 	  || opt_for_fn (node->decl, optimize_debug))
2038 	continue;
2039 
2040       if (dump_file)
2041 	fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2042 
2043       for (edge = node->callees; edge; edge = edge->next_callee)
2044 	{
2045 	  if (edge->inline_failed
2046 	      && !edge->aux
2047 	      && can_inline_edge_p (edge, true)
2048 	      && want_inline_small_function_p (edge, true)
2049 	      && can_inline_edge_by_limits_p (edge, true)
2050 	      && edge->inline_failed)
2051 	    {
2052 	      gcc_assert (!edge->aux);
2053 	      update_edge_key (&edge_heap, edge);
2054 	    }
2055 	  if (edge->speculative)
2056 	    has_speculative = true;
2057 	}
2058       if (has_speculative)
2059 	for (edge = node->callees; edge; edge = next)
2060 	  {
2061 	    next = edge->next_callee;
2062 	    if (edge->speculative
2063 		&& !speculation_useful_p (edge, edge->aux != NULL))
2064 	      {
2065 		cgraph_edge::resolve_speculation (edge);
2066 		update = true;
2067 	      }
2068 	  }
2069       if (update)
2070 	{
2071 	  struct cgraph_node *where = node->inlined_to
2072 				      ? node->inlined_to : node;
2073 	  ipa_update_overall_fn_summary (where);
2074 	  reset_edge_caches (where);
2075           update_caller_keys (&edge_heap, where,
2076 			      updated_nodes, NULL);
2077           update_callee_keys (&edge_heap, where, NULL,
2078 			      updated_nodes);
2079           bitmap_clear (updated_nodes);
2080 	}
2081     }
2082 
2083   gcc_assert (in_lto_p
2084 	      || !(max_count > 0)
2085 	      || (profile_info && flag_branch_probabilities));
2086 
2087   while (!edge_heap.empty ())
2088     {
2089       int old_size = overall_size;
2090       struct cgraph_node *where, *callee;
2091       sreal badness = edge_heap.min_key ();
2092       sreal current_badness;
2093       int growth;
2094 
2095       edge = edge_heap.extract_min ();
2096       gcc_assert (edge->aux);
2097       edge->aux = NULL;
2098       if (!edge->inline_failed || !edge->callee->analyzed)
2099 	continue;
2100 
2101       /* Be sure that caches are maintained consistent.
2102 	 This check is affected by scaling roundoff errors when compiling for
2103 	 IPA this we skip it in that case.  */
2104       if (flag_checking && !edge->callee->count.ipa_p ()
2105 	  && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2106 	{
2107 	  sreal cached_badness = edge_badness (edge, false);
2108 
2109 	  int old_size_est = estimate_edge_size (edge);
2110 	  sreal old_time_est = estimate_edge_time (edge);
2111 	  int old_hints_est = estimate_edge_hints (edge);
2112 
2113 	  if (edge_growth_cache != NULL)
2114 	    edge_growth_cache->remove (edge);
2115 	  reset_node_cache (edge->caller->inlined_to
2116 			    ? edge->caller->inlined_to
2117 			    : edge->caller);
2118 	  gcc_assert (old_size_est == estimate_edge_size (edge));
2119 	  gcc_assert (old_time_est == estimate_edge_time (edge));
2120 	  /* FIXME:
2121 
2122 	     gcc_assert (old_hints_est == estimate_edge_hints (edge));
2123 
2124 	     fails with profile feedback because some hints depends on
2125 	     maybe_hot_edge_p predicate and because callee gets inlined to other
2126 	     calls, the edge may become cold.
2127 	     This ought to be fixed by computing relative probabilities
2128 	     for given invocation but that will be better done once whole
2129 	     code is converted to sreals.  Disable for now and revert to "wrong"
2130 	     value so enable/disable checking paths agree.  */
2131 	  edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2132 
2133 	  /* When updating the edge costs, we only decrease badness in the keys.
2134 	     Increases of badness are handled lazily; when we see key with out
2135 	     of date value on it, we re-insert it now.  */
2136 	  current_badness = edge_badness (edge, false);
2137 	  gcc_assert (cached_badness == current_badness);
2138 	  gcc_assert (current_badness >= badness);
2139 	}
2140       else
2141         current_badness = edge_badness (edge, false);
2142       if (current_badness != badness)
2143 	{
2144 	  if (edge_heap.min () && current_badness > edge_heap.min_key ())
2145 	    {
2146 	      edge->aux = edge_heap.insert (current_badness, edge);
2147 	      continue;
2148 	    }
2149 	  else
2150 	    badness = current_badness;
2151 	}
2152 
2153       if (!can_inline_edge_p (edge, true)
2154 	  || !can_inline_edge_by_limits_p (edge, true))
2155 	{
2156 	  resolve_noninline_speculation (&edge_heap, edge);
2157 	  continue;
2158 	}
2159 
2160       callee = edge->callee->ultimate_alias_target ();
2161       growth = estimate_edge_growth (edge);
2162       if (dump_file)
2163 	{
2164 	  fprintf (dump_file,
2165 		   "\nConsidering %s with %i size\n",
2166 		   callee->dump_name (),
2167 		   ipa_size_summaries->get (callee)->size);
2168 	  fprintf (dump_file,
2169 		   " to be inlined into %s in %s:%i\n"
2170 		   " Estimated badness is %f, frequency %.2f.\n",
2171 		   edge->caller->dump_name (),
2172 		   edge->call_stmt
2173 		   && (LOCATION_LOCUS (gimple_location ((const gimple *)
2174 							edge->call_stmt))
2175 		       > BUILTINS_LOCATION)
2176 		   ? gimple_filename ((const gimple *) edge->call_stmt)
2177 		   : "unknown",
2178 		   edge->call_stmt
2179 		   ? gimple_lineno ((const gimple *) edge->call_stmt)
2180 		   : -1,
2181 		   badness.to_double (),
2182 		   edge->sreal_frequency ().to_double ());
2183 	  if (edge->count.ipa ().initialized_p ())
2184 	    {
2185 	      fprintf (dump_file, " Called ");
2186 	      edge->count.ipa ().dump (dump_file);
2187 	      fprintf (dump_file, " times\n");
2188             }
2189 	  if (dump_flags & TDF_DETAILS)
2190 	    edge_badness (edge, true);
2191 	}
2192 
2193       where = edge->caller;
2194 
2195       if (overall_size + growth > compute_max_insns (where, min_size)
2196 	  && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2197 	{
2198 	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2199 	  report_inline_failed_reason (edge);
2200 	  resolve_noninline_speculation (&edge_heap, edge);
2201 	  continue;
2202 	}
2203 
2204       if (!want_inline_small_function_p (edge, true))
2205 	{
2206 	  resolve_noninline_speculation (&edge_heap, edge);
2207 	  continue;
2208 	}
2209 
2210       profile_count old_count = callee->count;
2211 
2212       /* Heuristics for inlining small functions work poorly for
2213 	 recursive calls where we do effects similar to loop unrolling.
2214 	 When inlining such edge seems profitable, leave decision on
2215 	 specific inliner.  */
2216       if (edge->recursive_p ())
2217 	{
2218 	  if (where->inlined_to)
2219 	    where = where->inlined_to;
2220 	  if (!recursive_inlining (edge,
2221 				   opt_for_fn (edge->caller->decl,
2222 					       flag_indirect_inlining)
2223 				   ? &new_indirect_edges : NULL))
2224 	    {
2225 	      edge->inline_failed = CIF_RECURSIVE_INLINING;
2226 	      resolve_noninline_speculation (&edge_heap, edge);
2227 	      continue;
2228 	    }
2229 	  reset_edge_caches (where);
2230 	  /* Recursive inliner inlines all recursive calls of the function
2231 	     at once. Consequently we need to update all callee keys.  */
2232 	  if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2233 	    add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2234           update_callee_keys (&edge_heap, where, where, updated_nodes);
2235 	  bitmap_clear (updated_nodes);
2236 	}
2237       else
2238 	{
2239 	  struct cgraph_node *outer_node = NULL;
2240 	  int depth = 0;
2241 
2242 	  /* Consider the case where self recursive function A is inlined
2243 	     into B.  This is desired optimization in some cases, since it
2244 	     leads to effect similar of loop peeling and we might completely
2245 	     optimize out the recursive call.  However we must be extra
2246 	     selective.  */
2247 
2248 	  where = edge->caller;
2249 	  while (where->inlined_to)
2250 	    {
2251 	      if (where->decl == callee->decl)
2252 		outer_node = where, depth++;
2253 	      where = where->callers->caller;
2254 	    }
2255 	  if (outer_node
2256 	      && !want_inline_self_recursive_call_p (edge, outer_node,
2257 						     true, depth))
2258 	    {
2259 	      edge->inline_failed
2260 		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2261 		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2262 	      resolve_noninline_speculation (&edge_heap, edge);
2263 	      continue;
2264 	    }
2265 	  else if (depth && dump_file)
2266 	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2267 
2268 	  gcc_checking_assert (!callee->inlined_to);
2269 
2270 	  int old_size = ipa_size_summaries->get (where)->size;
2271 	  sreal old_time = ipa_fn_summaries->get (where)->time;
2272 
2273 	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2274 	  reset_edge_caches (edge->callee);
2275 	  add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2276 
2277 	  /* If caller's size and time increased we do not need to update
2278 	     all edges because badness is not going to decrease.  */
2279 	  if (old_size <= ipa_size_summaries->get (where)->size
2280 	      && old_time <= ipa_fn_summaries->get (where)->time
2281 	      /* Wrapper penalty may be non-monotonous in this respect.
2282 	         Fortunately it only affects small functions.  */
2283 	      && !wrapper_heuristics_may_apply (where, old_size))
2284 	    update_callee_keys (&edge_heap, edge->callee, edge->callee,
2285 			   	updated_nodes);
2286 	  else
2287 	    update_callee_keys (&edge_heap, where,
2288 				edge->callee,
2289 			   	updated_nodes);
2290 	}
2291       where = edge->caller;
2292       if (where->inlined_to)
2293 	where = where->inlined_to;
2294 
2295       /* Our profitability metric can depend on local properties
2296 	 such as number of inlinable calls and size of the function body.
2297 	 After inlining these properties might change for the function we
2298 	 inlined into (since it's body size changed) and for the functions
2299 	 called by function we inlined (since number of it inlinable callers
2300 	 might change).  */
2301       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2302       /* Offline copy count has possibly changed, recompute if profile is
2303 	 available.  */
2304       struct cgraph_node *n
2305 	      = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2306       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2307 	  && n->count.ipa_p ())
2308 	update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2309       bitmap_clear (updated_nodes);
2310 
2311       if (dump_enabled_p ())
2312 	{
2313 	  ipa_fn_summary *s = ipa_fn_summaries->get (where);
2314 
2315 	  /* dump_printf can't handle %+i.  */
2316 	  char buf_net_change[100];
2317 	  snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2318 		    overall_size - old_size);
2319 
2320 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2321 			   " Inlined %C into %C which now has time %f and "
2322 			   "size %i, net change of %s%s.\n",
2323 			   edge->callee, edge->caller,
2324 			   s->time.to_double (),
2325 			   ipa_size_summaries->get (edge->caller)->size,
2326 			   buf_net_change,
2327 			   cross_module_call_p (edge) ? " (cross module)":"");
2328 	}
2329       if (min_size > overall_size)
2330 	{
2331 	  min_size = overall_size;
2332 
2333 	  if (dump_file)
2334 	    fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2335 	}
2336     }
2337 
2338   free_growth_caches ();
2339   if (dump_enabled_p ())
2340     dump_printf (MSG_NOTE,
2341 		 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2342 		 initial_size, overall_size,
2343 		 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2344   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2345 }
2346 
2347 /* Flatten NODE.  Performed both during early inlining and
2348    at IPA inlining time.  */
2349 
2350 static void
flatten_function(struct cgraph_node * node,bool early,bool update)2351 flatten_function (struct cgraph_node *node, bool early, bool update)
2352 {
2353   struct cgraph_edge *e;
2354 
2355   /* We shouldn't be called recursively when we are being processed.  */
2356   gcc_assert (node->aux == NULL);
2357 
2358   node->aux = (void *) node;
2359 
2360   for (e = node->callees; e; e = e->next_callee)
2361     {
2362       struct cgraph_node *orig_callee;
2363       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2364 
2365       /* We've hit cycle?  It is time to give up.  */
2366       if (callee->aux)
2367 	{
2368 	  if (dump_enabled_p ())
2369 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2370 			     "Not inlining %C into %C to avoid cycle.\n",
2371 			     callee, e->caller);
2372 	  if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2373 	    e->inline_failed = CIF_RECURSIVE_INLINING;
2374 	  continue;
2375 	}
2376 
2377       /* When the edge is already inlined, we just need to recurse into
2378 	 it in order to fully flatten the leaves.  */
2379       if (!e->inline_failed)
2380 	{
2381 	  flatten_function (callee, early, false);
2382 	  continue;
2383 	}
2384 
2385       /* Flatten attribute needs to be processed during late inlining. For
2386 	 extra code quality we however do flattening during early optimization,
2387 	 too.  */
2388       if (!early
2389 	  ? !can_inline_edge_p (e, true)
2390 	    && !can_inline_edge_by_limits_p (e, true)
2391 	  : !can_early_inline_edge_p (e))
2392 	continue;
2393 
2394       if (e->recursive_p ())
2395 	{
2396 	  if (dump_enabled_p ())
2397 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2398 			     "Not inlining: recursive call.\n");
2399 	  continue;
2400 	}
2401 
2402       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2403 	  != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2404 	{
2405 	  if (dump_enabled_p ())
2406 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2407 			     "Not inlining: SSA form does not match.\n");
2408 	  continue;
2409 	}
2410 
2411       /* Inline the edge and flatten the inline clone.  Avoid
2412          recursing through the original node if the node was cloned.  */
2413       if (dump_enabled_p ())
2414 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2415 			 " Inlining %C into %C.\n",
2416 			 callee, e->caller);
2417       orig_callee = callee;
2418       inline_call (e, true, NULL, NULL, false);
2419       if (e->callee != orig_callee)
2420 	orig_callee->aux = (void *) node;
2421       flatten_function (e->callee, early, false);
2422       if (e->callee != orig_callee)
2423 	orig_callee->aux = NULL;
2424     }
2425 
2426   node->aux = NULL;
2427   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2428   if (update && opt_for_fn (where->decl, optimize))
2429     ipa_update_overall_fn_summary (where);
2430 }
2431 
2432 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2433    DATA points to number of calls originally found so we avoid infinite
2434    recursion.  */
2435 
2436 static bool
inline_to_all_callers_1(struct cgraph_node * node,void * data,hash_set<cgraph_node * > * callers)2437 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2438 			 hash_set<cgraph_node *> *callers)
2439 {
2440   int *num_calls = (int *)data;
2441   bool callee_removed = false;
2442 
2443   while (node->callers && !node->inlined_to)
2444     {
2445       struct cgraph_node *caller = node->callers->caller;
2446 
2447       if (!can_inline_edge_p (node->callers, true)
2448 	  || !can_inline_edge_by_limits_p (node->callers, true)
2449 	  || node->callers->recursive_p ())
2450 	{
2451 	  if (dump_file)
2452 	    fprintf (dump_file, "Uninlinable call found; giving up.\n");
2453 	  *num_calls = 0;
2454 	  return false;
2455 	}
2456 
2457       if (dump_file)
2458 	{
2459 	  cgraph_node *ultimate = node->ultimate_alias_target ();
2460 	  fprintf (dump_file,
2461 		   "\nInlining %s size %i.\n",
2462 		   ultimate->dump_name (),
2463 		   ipa_size_summaries->get (ultimate)->size);
2464 	  fprintf (dump_file,
2465 		   " Called once from %s %i insns.\n",
2466 		   node->callers->caller->dump_name (),
2467 		   ipa_size_summaries->get (node->callers->caller)->size);
2468 	}
2469 
2470       /* Remember which callers we inlined to, delaying updating the
2471 	 overall summary.  */
2472       callers->add (node->callers->caller);
2473       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2474       if (dump_file)
2475 	fprintf (dump_file,
2476 		 " Inlined into %s which now has %i size\n",
2477 		 caller->dump_name (),
2478 		 ipa_size_summaries->get (caller)->size);
2479       if (!(*num_calls)--)
2480 	{
2481 	  if (dump_file)
2482 	    fprintf (dump_file, "New calls found; giving up.\n");
2483 	  return callee_removed;
2484 	}
2485       if (callee_removed)
2486 	return true;
2487     }
2488   return false;
2489 }
2490 
2491 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2492    update.  */
2493 
2494 static bool
inline_to_all_callers(struct cgraph_node * node,void * data)2495 inline_to_all_callers (struct cgraph_node *node, void *data)
2496 {
2497   hash_set<cgraph_node *> callers;
2498   bool res = inline_to_all_callers_1 (node, data, &callers);
2499   /* Perform the delayed update of the overall summary of all callers
2500      processed.  This avoids quadratic behavior in the cases where
2501      we have a lot of calls to the same function.  */
2502   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2503        i != callers.end (); ++i)
2504     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2505   return res;
2506 }
2507 
2508 /* Output overall time estimate.  */
2509 static void
dump_overall_stats(void)2510 dump_overall_stats (void)
2511 {
2512   sreal sum_weighted = 0, sum = 0;
2513   struct cgraph_node *node;
2514 
2515   FOR_EACH_DEFINED_FUNCTION (node)
2516     if (!node->inlined_to
2517 	&& !node->alias)
2518       {
2519 	ipa_fn_summary *s = ipa_fn_summaries->get (node);
2520 	if (s != NULL)
2521 	  {
2522 	  sum += s->time;
2523 	  if (node->count.ipa ().initialized_p ())
2524 	    sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2525 	  }
2526       }
2527   fprintf (dump_file, "Overall time estimate: "
2528 	   "%f weighted by profile: "
2529 	   "%f\n", sum.to_double (), sum_weighted.to_double ());
2530 }
2531 
2532 /* Output some useful stats about inlining.  */
2533 
2534 static void
dump_inline_stats(void)2535 dump_inline_stats (void)
2536 {
2537   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2538   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2539   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2540   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2541   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2542   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2543   int64_t reason[CIF_N_REASONS][2];
2544   sreal reason_freq[CIF_N_REASONS];
2545   int i;
2546   struct cgraph_node *node;
2547 
2548   memset (reason, 0, sizeof (reason));
2549   for (i=0; i < CIF_N_REASONS; i++)
2550     reason_freq[i] = 0;
2551   FOR_EACH_DEFINED_FUNCTION (node)
2552   {
2553     struct cgraph_edge *e;
2554     for (e = node->callees; e; e = e->next_callee)
2555       {
2556 	if (e->inline_failed)
2557 	  {
2558 	    if (e->count.ipa ().initialized_p ())
2559 	      reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2560 	    reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2561 	    reason[(int) e->inline_failed][1] ++;
2562 	    if (DECL_VIRTUAL_P (e->callee->decl)
2563 		&& e->count.ipa ().initialized_p ())
2564 	      {
2565 		if (e->indirect_inlining_edge)
2566 		  noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2567 		else
2568 		  noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2569 	      }
2570 	    else if (e->count.ipa ().initialized_p ())
2571 	      {
2572 		if (e->indirect_inlining_edge)
2573 		  noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2574 		else
2575 		  noninlined_cnt += e->count.ipa ().to_gcov_type ();
2576 	      }
2577 	  }
2578 	else if (e->count.ipa ().initialized_p ())
2579 	  {
2580 	    if (e->speculative)
2581 	      {
2582 		if (DECL_VIRTUAL_P (e->callee->decl))
2583 		  inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2584 		else
2585 		  inlined_speculative += e->count.ipa ().to_gcov_type ();
2586 	      }
2587 	    else if (DECL_VIRTUAL_P (e->callee->decl))
2588 	      {
2589 		if (e->indirect_inlining_edge)
2590 		  inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2591 		else
2592 		  inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2593 	      }
2594 	    else
2595 	      {
2596 		if (e->indirect_inlining_edge)
2597 		  inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2598 		else
2599 		  inlined_cnt += e->count.ipa ().to_gcov_type ();
2600 	      }
2601 	  }
2602       }
2603     for (e = node->indirect_calls; e; e = e->next_callee)
2604       if (e->indirect_info->polymorphic
2605 	  & e->count.ipa ().initialized_p ())
2606 	indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2607       else if (e->count.ipa ().initialized_p ())
2608 	indirect_cnt += e->count.ipa ().to_gcov_type ();
2609   }
2610   if (max_count.initialized_p ())
2611     {
2612       fprintf (dump_file,
2613 	       "Inlined %" PRId64 " + speculative "
2614 	       "%" PRId64 " + speculative polymorphic "
2615 	       "%" PRId64 " + previously indirect "
2616 	       "%" PRId64 " + virtual "
2617 	       "%" PRId64 " + virtual and previously indirect "
2618 	       "%" PRId64 "\n" "Not inlined "
2619 	       "%" PRId64 " + previously indirect "
2620 	       "%" PRId64 " + virtual "
2621 	       "%" PRId64 " + virtual and previously indirect "
2622 	       "%" PRId64 " + still indirect "
2623 	       "%" PRId64 " + still indirect polymorphic "
2624 	       "%" PRId64 "\n", inlined_cnt,
2625 	       inlined_speculative, inlined_speculative_ply,
2626 	       inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2627 	       noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2628 	       noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2629       fprintf (dump_file, "Removed speculations ");
2630       spec_rem.dump (dump_file);
2631       fprintf (dump_file, "\n");
2632     }
2633   dump_overall_stats ();
2634   fprintf (dump_file, "\nWhy inlining failed?\n");
2635   for (i = 0; i < CIF_N_REASONS; i++)
2636     if (reason[i][1])
2637       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2638 	       cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2639 	       (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2640 }
2641 
2642 /* Called when node is removed.  */
2643 
2644 static void
flatten_remove_node_hook(struct cgraph_node * node,void * data)2645 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2646 {
2647   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2648     return;
2649 
2650   hash_set<struct cgraph_node *> *removed
2651     = (hash_set<struct cgraph_node *> *) data;
2652   removed->add (node);
2653 }
2654 
2655 /* Decide on the inlining.  We do so in the topological order to avoid
2656    expenses on updating data structures.  */
2657 
2658 static unsigned int
ipa_inline(void)2659 ipa_inline (void)
2660 {
2661   struct cgraph_node *node;
2662   int nnodes;
2663   struct cgraph_node **order;
2664   int i, j;
2665   int cold;
2666   bool remove_functions = false;
2667 
2668   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2669 
2670   if (dump_file)
2671     ipa_dump_fn_summaries (dump_file);
2672 
2673   nnodes = ipa_reverse_postorder (order);
2674   spec_rem = profile_count::zero ();
2675 
2676   FOR_EACH_FUNCTION (node)
2677     {
2678       node->aux = 0;
2679 
2680       /* Recompute the default reasons for inlining because they may have
2681 	 changed during merging.  */
2682       if (in_lto_p)
2683 	{
2684 	  for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2685 	    {
2686 	      gcc_assert (e->inline_failed);
2687 	      initialize_inline_failed (e);
2688 	    }
2689 	  for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2690 	    initialize_inline_failed (e);
2691 	}
2692     }
2693 
2694   if (dump_file)
2695     fprintf (dump_file, "\nFlattening functions:\n");
2696 
2697   /* First shrink order array, so that it only contains nodes with
2698      flatten attribute.  */
2699   for (i = nnodes - 1, j = i; i >= 0; i--)
2700     {
2701       node = order[i];
2702       if (node->definition
2703 	  /* Do not try to flatten aliases.  These may happen for example when
2704 	     creating local aliases.  */
2705 	  && !node->alias
2706 	  && lookup_attribute ("flatten",
2707 			       DECL_ATTRIBUTES (node->decl)) != NULL)
2708 	order[j--] = order[i];
2709     }
2710 
2711   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2712      nodes with flatten attribute.  If there is more than one such
2713      node, we need to register a node removal hook, as flatten_function
2714      could remove other nodes with flatten attribute.  See PR82801.  */
2715   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2716   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2717   /*
2718    * XXXMRG: added "nnodes > 1" as -O2 (but not -O) warn:
2719    *    "assuming signed overflow does not occur"
2720    */
2721   if (nnodes > 1 && j < nnodes - 2)
2722     {
2723       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2724       node_removal_hook_holder
2725 	= symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2726 					   flatten_removed_nodes);
2727     }
2728 
2729   /* In the first pass handle functions to be flattened.  Do this with
2730      a priority so none of our later choices will make this impossible.  */
2731   for (i = nnodes - 1; i > j; i--)
2732     {
2733       node = order[i];
2734       if (flatten_removed_nodes
2735 	  && flatten_removed_nodes->contains (node))
2736 	continue;
2737 
2738       /* Handle nodes to be flattened.
2739 	 Ideally when processing callees we stop inlining at the
2740 	 entry of cycles, possibly cloning that entry point and
2741 	 try to flatten itself turning it into a self-recursive
2742 	 function.  */
2743       if (dump_file)
2744 	fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2745       flatten_function (node, false, true);
2746     }
2747 
2748   if (j < nnodes - 2)
2749     {
2750       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2751       delete flatten_removed_nodes;
2752     }
2753   free (order);
2754 
2755   if (dump_file)
2756     dump_overall_stats ();
2757 
2758   inline_small_functions ();
2759 
2760   gcc_assert (symtab->state == IPA_SSA);
2761   symtab->state = IPA_SSA_AFTER_INLINING;
2762   /* Do first after-inlining removal.  We want to remove all "stale" extern
2763      inline functions and virtual functions so we really know what is called
2764      once.  */
2765   symtab->remove_unreachable_nodes (dump_file);
2766 
2767   /* Inline functions with a property that after inlining into all callers the
2768      code size will shrink because the out-of-line copy is eliminated.
2769      We do this regardless on the callee size as long as function growth limits
2770      are met.  */
2771   if (dump_file)
2772     fprintf (dump_file,
2773 	     "\nDeciding on functions to be inlined into all callers and "
2774 	     "removing useless speculations:\n");
2775 
2776   /* Inlining one function called once has good chance of preventing
2777      inlining other function into the same callee.  Ideally we should
2778      work in priority order, but probably inlining hot functions first
2779      is good cut without the extra pain of maintaining the queue.
2780 
2781      ??? this is not really fitting the bill perfectly: inlining function
2782      into callee often leads to better optimization of callee due to
2783      increased context for optimization.
2784      For example if main() function calls a function that outputs help
2785      and then function that does the main optimization, we should inline
2786      the second with priority even if both calls are cold by themselves.
2787 
2788      We probably want to implement new predicate replacing our use of
2789      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2790      to be hot.  */
2791   for (cold = 0; cold <= 1; cold ++)
2792     {
2793       FOR_EACH_DEFINED_FUNCTION (node)
2794 	{
2795 	  struct cgraph_edge *edge, *next;
2796 	  bool update=false;
2797 
2798 	  if (!opt_for_fn (node->decl, optimize)
2799 	      || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2800 	    continue;
2801 
2802 	  for (edge = node->callees; edge; edge = next)
2803 	    {
2804 	      next = edge->next_callee;
2805 	      if (edge->speculative && !speculation_useful_p (edge, false))
2806 		{
2807 		  if (edge->count.ipa ().initialized_p ())
2808 		    spec_rem += edge->count.ipa ();
2809 		  cgraph_edge::resolve_speculation (edge);
2810 		  update = true;
2811 		  remove_functions = true;
2812 		}
2813 	    }
2814 	  if (update)
2815 	    {
2816 	      struct cgraph_node *where = node->inlined_to
2817 					  ? node->inlined_to : node;
2818 	      reset_edge_caches (where);
2819 	      ipa_update_overall_fn_summary (where);
2820 	    }
2821 	  if (want_inline_function_to_all_callers_p (node, cold))
2822 	    {
2823 	      int num_calls = 0;
2824 	      node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2825 						 true);
2826 	      while (node->call_for_symbol_and_aliases
2827 		       (inline_to_all_callers, &num_calls, true))
2828 		;
2829 	      remove_functions = true;
2830 	    }
2831 	}
2832     }
2833 
2834   if (dump_enabled_p ())
2835     dump_printf (MSG_NOTE,
2836 		 "\nInlined %i calls, eliminated %i functions\n\n",
2837 		 ncalls_inlined, nfunctions_inlined);
2838   if (dump_file)
2839     dump_inline_stats ();
2840 
2841   if (dump_file)
2842     ipa_dump_fn_summaries (dump_file);
2843   return remove_functions ? TODO_remove_functions : 0;
2844 }
2845 
2846 /* Inline always-inline function calls in NODE.  */
2847 
2848 static bool
inline_always_inline_functions(struct cgraph_node * node)2849 inline_always_inline_functions (struct cgraph_node *node)
2850 {
2851   struct cgraph_edge *e;
2852   bool inlined = false;
2853 
2854   for (e = node->callees; e; e = e->next_callee)
2855     {
2856       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2857       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2858 	continue;
2859 
2860       if (e->recursive_p ())
2861 	{
2862 	  if (dump_enabled_p ())
2863 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2864 			     "  Not inlining recursive call to %C.\n",
2865 			     e->callee);
2866 	  e->inline_failed = CIF_RECURSIVE_INLINING;
2867 	  continue;
2868 	}
2869 
2870       if (!can_early_inline_edge_p (e))
2871 	{
2872 	  /* Set inlined to true if the callee is marked "always_inline" but
2873 	     is not inlinable.  This will allow flagging an error later in
2874 	     expand_call_inline in tree-inline.cc.  */
2875 	  if (lookup_attribute ("always_inline",
2876 				 DECL_ATTRIBUTES (callee->decl)) != NULL)
2877 	    inlined = true;
2878 	  continue;
2879 	}
2880 
2881       if (dump_enabled_p ())
2882 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2883 			 "  Inlining %C into %C (always_inline).\n",
2884 			 e->callee, e->caller);
2885       inline_call (e, true, NULL, NULL, false);
2886       inlined = true;
2887     }
2888   if (inlined)
2889     ipa_update_overall_fn_summary (node);
2890 
2891   return inlined;
2892 }
2893 
2894 /* Decide on the inlining.  We do so in the topological order to avoid
2895    expenses on updating data structures.  */
2896 
2897 static bool
early_inline_small_functions(struct cgraph_node * node)2898 early_inline_small_functions (struct cgraph_node *node)
2899 {
2900   struct cgraph_edge *e;
2901   bool inlined = false;
2902 
2903   for (e = node->callees; e; e = e->next_callee)
2904     {
2905       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2906 
2907       /* We can encounter not-yet-analyzed function during
2908 	 early inlining on callgraphs with strongly
2909 	 connected components.  */
2910       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2911       if (s == NULL || !s->inlinable || !e->inline_failed)
2912 	continue;
2913 
2914       /* Do not consider functions not declared inline.  */
2915       if (!DECL_DECLARED_INLINE_P (callee->decl)
2916 	  && !opt_for_fn (node->decl, flag_inline_small_functions)
2917 	  && !opt_for_fn (node->decl, flag_inline_functions))
2918 	continue;
2919 
2920       if (dump_enabled_p ())
2921 	dump_printf_loc (MSG_NOTE, e->call_stmt,
2922 			 "Considering inline candidate %C.\n",
2923 			 callee);
2924 
2925       if (!can_early_inline_edge_p (e))
2926 	continue;
2927 
2928       if (e->recursive_p ())
2929 	{
2930 	  if (dump_enabled_p ())
2931 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2932 			     "  Not inlining: recursive call.\n");
2933 	  continue;
2934 	}
2935 
2936       if (!want_early_inline_function_p (e))
2937 	continue;
2938 
2939       if (dump_enabled_p ())
2940 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2941 			 " Inlining %C into %C.\n",
2942 			 callee, e->caller);
2943       inline_call (e, true, NULL, NULL, false);
2944       inlined = true;
2945     }
2946 
2947   if (inlined)
2948     ipa_update_overall_fn_summary (node);
2949 
2950   return inlined;
2951 }
2952 
2953 unsigned int
early_inliner(function * fun)2954 early_inliner (function *fun)
2955 {
2956   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2957   struct cgraph_edge *edge;
2958   unsigned int todo = 0;
2959   int iterations = 0;
2960   bool inlined = false;
2961 
2962   if (seen_error ())
2963     return 0;
2964 
2965   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2966      happens when some pass decides to construct new function and
2967      cgraph_add_new_function calls lowering passes and early optimization on
2968      it.  This may confuse ourself when early inliner decide to inline call to
2969      function clone, because function clones don't have parameter list in
2970      ipa-prop matching their signature.  */
2971   if (ipa_node_params_sum)
2972     return 0;
2973 
2974   if (flag_checking)
2975     node->verify ();
2976   node->remove_all_references ();
2977 
2978   /* Even when not optimizing or not inlining inline always-inline
2979      functions.  */
2980   inlined = inline_always_inline_functions (node);
2981 
2982   if (!optimize
2983       || flag_no_inline
2984       || !flag_early_inlining
2985       /* Never inline regular functions into always-inline functions
2986 	 during incremental inlining.  This sucks as functions calling
2987 	 always inline functions will get less optimized, but at the
2988 	 same time inlining of functions calling always inline
2989 	 function into an always inline function might introduce
2990 	 cycles of edges to be always inlined in the callgraph.
2991 
2992 	 We might want to be smarter and just avoid this type of inlining.  */
2993       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2994 	  && lookup_attribute ("always_inline",
2995 			       DECL_ATTRIBUTES (node->decl))))
2996     ;
2997   else if (lookup_attribute ("flatten",
2998 			     DECL_ATTRIBUTES (node->decl)) != NULL)
2999     {
3000       /* When the function is marked to be flattened, recursively inline
3001 	 all calls in it.  */
3002       if (dump_enabled_p ())
3003 	dump_printf (MSG_OPTIMIZED_LOCATIONS,
3004 		     "Flattening %C\n", node);
3005       flatten_function (node, true, true);
3006       inlined = true;
3007     }
3008   else
3009     {
3010       /* If some always_inline functions was inlined, apply the changes.
3011 	 This way we will not account always inline into growth limits and
3012 	 moreover we will inline calls from always inlines that we skipped
3013 	 previously because of conditional above.  */
3014       if (inlined)
3015 	{
3016 	  timevar_push (TV_INTEGRATION);
3017 	  todo |= optimize_inline_calls (current_function_decl);
3018 	  /* optimize_inline_calls call above might have introduced new
3019 	     statements that don't have inline parameters computed.  */
3020 	  for (edge = node->callees; edge; edge = edge->next_callee)
3021 	    {
3022 	      /* We can enounter not-yet-analyzed function during
3023 		 early inlining on callgraphs with strongly
3024 		 connected components.  */
3025 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3026 	      es->call_stmt_size
3027 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
3028 	      es->call_stmt_time
3029 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
3030 	    }
3031 	  ipa_update_overall_fn_summary (node);
3032 	  inlined = false;
3033 	  timevar_pop (TV_INTEGRATION);
3034 	}
3035       /* We iterate incremental inlining to get trivial cases of indirect
3036 	 inlining.  */
3037       while (iterations < opt_for_fn (node->decl,
3038 				      param_early_inliner_max_iterations)
3039 	     && early_inline_small_functions (node))
3040 	{
3041 	  timevar_push (TV_INTEGRATION);
3042 	  todo |= optimize_inline_calls (current_function_decl);
3043 
3044 	  /* Technically we ought to recompute inline parameters so the new
3045  	     iteration of early inliner works as expected.  We however have
3046 	     values approximately right and thus we only need to update edge
3047 	     info that might be cleared out for newly discovered edges.  */
3048 	  for (edge = node->callees; edge; edge = edge->next_callee)
3049 	    {
3050 	      /* We have no summary for new bound store calls yet.  */
3051 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3052 	      es->call_stmt_size
3053 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
3054 	      es->call_stmt_time
3055 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
3056 	    }
3057 	  if (iterations < opt_for_fn (node->decl,
3058 				       param_early_inliner_max_iterations) - 1)
3059 	    ipa_update_overall_fn_summary (node);
3060 	  timevar_pop (TV_INTEGRATION);
3061 	  iterations++;
3062 	  inlined = false;
3063 	}
3064       if (dump_file)
3065 	fprintf (dump_file, "Iterations: %i\n", iterations);
3066     }
3067 
3068   if (inlined)
3069     {
3070       timevar_push (TV_INTEGRATION);
3071       todo |= optimize_inline_calls (current_function_decl);
3072       timevar_pop (TV_INTEGRATION);
3073     }
3074 
3075   fun->always_inline_functions_inlined = true;
3076 
3077   return todo;
3078 }
3079 
3080 /* Do inlining of small functions.  Doing so early helps profiling and other
3081    passes to be somewhat more effective and avoids some code duplication in
3082    later real inlining pass for testcases with very many function calls.  */
3083 
3084 namespace {
3085 
3086 const pass_data pass_data_early_inline =
3087 {
3088   GIMPLE_PASS, /* type */
3089   "einline", /* name */
3090   OPTGROUP_INLINE, /* optinfo_flags */
3091   TV_EARLY_INLINING, /* tv_id */
3092   PROP_ssa, /* properties_required */
3093   0, /* properties_provided */
3094   0, /* properties_destroyed */
3095   0, /* todo_flags_start */
3096   0, /* todo_flags_finish */
3097 };
3098 
3099 class pass_early_inline : public gimple_opt_pass
3100 {
3101 public:
pass_early_inline(gcc::context * ctxt)3102   pass_early_inline (gcc::context *ctxt)
3103     : gimple_opt_pass (pass_data_early_inline, ctxt)
3104   {}
3105 
3106   /* opt_pass methods: */
3107   virtual unsigned int execute (function *);
3108 
3109 }; // class pass_early_inline
3110 
3111 unsigned int
execute(function * fun)3112 pass_early_inline::execute (function *fun)
3113 {
3114   return early_inliner (fun);
3115 }
3116 
3117 } // anon namespace
3118 
3119 gimple_opt_pass *
make_pass_early_inline(gcc::context * ctxt)3120 make_pass_early_inline (gcc::context *ctxt)
3121 {
3122   return new pass_early_inline (ctxt);
3123 }
3124 
3125 namespace {
3126 
3127 const pass_data pass_data_ipa_inline =
3128 {
3129   IPA_PASS, /* type */
3130   "inline", /* name */
3131   OPTGROUP_INLINE, /* optinfo_flags */
3132   TV_IPA_INLINING, /* tv_id */
3133   0, /* properties_required */
3134   0, /* properties_provided */
3135   0, /* properties_destroyed */
3136   0, /* todo_flags_start */
3137   ( TODO_dump_symtab ), /* todo_flags_finish */
3138 };
3139 
3140 class pass_ipa_inline : public ipa_opt_pass_d
3141 {
3142 public:
pass_ipa_inline(gcc::context * ctxt)3143   pass_ipa_inline (gcc::context *ctxt)
3144     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3145 		      NULL, /* generate_summary */
3146 		      NULL, /* write_summary */
3147 		      NULL, /* read_summary */
3148 		      NULL, /* write_optimization_summary */
3149 		      NULL, /* read_optimization_summary */
3150 		      NULL, /* stmt_fixup */
3151 		      0, /* function_transform_todo_flags_start */
3152 		      inline_transform, /* function_transform */
3153 		      NULL) /* variable_transform */
3154   {}
3155 
3156   /* opt_pass methods: */
execute(function *)3157   virtual unsigned int execute (function *) { return ipa_inline (); }
3158 
3159 }; // class pass_ipa_inline
3160 
3161 } // anon namespace
3162 
3163 ipa_opt_pass_d *
make_pass_ipa_inline(gcc::context * ctxt)3164 make_pass_ipa_inline (gcc::context *ctxt)
3165 {
3166   return new pass_ipa_inline (ctxt);
3167 }
3168