xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/ipa-sra.c (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1 /* Interprocedural scalar replacement of aggregates
2    Copyright (C) 2019-2022 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22    values (turning functions returning a value which is never used into void
23    functions) and removes unused function parameters.  It can also replace an
24    aggregate parameter by a set of other parameters representing part of the
25    original, turning those passed by reference into new ones which pass the
26    value directly.
27 
28    The pass is a true IPA one, which means that it works in three stages in
29    order to be able to take advantage of LTO.  First, summaries about functions
30    and each calls are generated.  Function summaries (often called call graph
31    node summaries) contain mainly information about which parameters are
32    potential transformation candidates and which bits of candidates are
33    accessed.  We differentiate between accesses done as a part of a call
34    statement (which might be not necessary if the callee is also transformed)
35    and others (which are mandatory).  Call summaries (often called call graph
36    edge summaries) contain information about which function formal parameters
37    feed into which actual call arguments so that if two parameters are only
38    used in a sum which is then passed to another function which then however
39    does not use this parameter, all three parameters of the two functions can
40    be eliminated.  Edge summaries also have flags whether the return value is
41    used or if it is only returned in the caller too.  In LTO mode these
42    summaries are then streamed to the object file in the compilation phase and
43    streamed back in in the WPA analysis stage.
44 
45    The interprocedural analysis phase traverses the graph in topological order
46    in two sweeps, one in each direction.  First, from callees to callers for
47    parameter removal and splitting.  Each strongly-connected component is
48    processed iteratively until the situation in it stabilizes.  The pass from
49    callers to callees is then carried out to remove unused return values in a
50    very similar fashion.
51 
52    Because parameter manipulation has big implications for call redirection
53    which is done only after all call graph nodes materialize, the
54    transformation phase is not part of this patch but is carried out by the
55    clone materialization and edge redirection itself, see comments in
56    ipa-param-manipulation.h for more details.  */
57 
58 
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 
86 /* Bits used to track size of an aggregate in bytes interprocedurally.  */
87 #define ISRA_ARG_SIZE_LIMIT_BITS 16
88 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
89 /* How many parameters can feed into a call actual argument and still be
90    tracked.  */
91 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
92 
93 /* Structure describing accesses to a specific portion of an aggregate
94    parameter, as given by the offset and size.  Any smaller accesses that occur
95    within a function that fall within another access form a tree.  The pass
96    cannot analyze parameters with only partially overlapping accesses.  */
97 
98 struct GTY(()) param_access
99 {
100   /* Type that a potential replacement should have.  This field only has
101      meaning in the summary building and transformation phases, when it is
102      reconstructed from the body.  Must not be touched in IPA analysis
103      stage.  */
104   tree type;
105 
106   /* Alias reference type to be used in MEM_REFs when adjusting caller
107      arguments.  */
108   tree alias_ptr_type;
109 
110   /* Values returned by get_ref_base_and_extent but converted to bytes and
111      stored as unsigned ints.  */
112   unsigned unit_offset;
113   unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
114 
115   /* Set once we are sure that the access will really end up in a potentially
116      transformed function - initially not set for portions of formal parameters
117      that are only used as actual function arguments passed to callees.  */
118   unsigned certain : 1;
119   /* Set if the access has reverse scalar storage order.  */
120   unsigned reverse : 1;
121 };
122 
123 /* This structure has the same purpose as the one above and additionally it
124    contains some fields that are only necessary in the summary generation
125    phase.  */
126 
127 struct gensum_param_access
128 {
129   /* Values returned by get_ref_base_and_extent.  */
130   HOST_WIDE_INT offset;
131   HOST_WIDE_INT size;
132 
133   /* if this access has any children (in terms of the definition above), this
134      points to the first one.  */
135   struct gensum_param_access *first_child;
136   /* In intraprocedural SRA, pointer to the next sibling in the access tree as
137      described above.  */
138   struct gensum_param_access *next_sibling;
139 
140   /* Type that a potential replacement should have.  This field only has
141      meaning in the summary building and transformation phases, when it is
142      reconstructed from the body.  Must not be touched in IPA analysis
143      stage.  */
144   tree type;
145   /* Alias reference type to be used in MEM_REFs when adjusting caller
146      arguments.  */
147   tree alias_ptr_type;
148 
149   /* Have there been writes to or reads from this exact location except for as
150      arguments to a function call that can be tracked.  */
151   bool nonarg;
152 
153   /* Set if the access has reverse scalar storage order.  */
154   bool reverse;
155 };
156 
157 /* Summary describing a parameter in the IPA stages.  */
158 
159 struct GTY(()) isra_param_desc
160 {
161   /* List of access representatives to the parameters, sorted according to
162      their offset.  */
163   vec <param_access *, va_gc> *accesses;
164 
165   /* Unit size limit of total size of all replacements.  */
166   unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
167   /* Sum of unit sizes of all certain replacements.  */
168   unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
169 
170   /* A parameter that is used only in call arguments and can be removed if all
171      concerned actual arguments are removed.  */
172   unsigned locally_unused : 1;
173   /* An aggregate that is a candidate for breaking up or complete removal.  */
174   unsigned split_candidate : 1;
175   /* Is this a parameter passing stuff by reference?  */
176   unsigned by_ref : 1;
177 };
178 
179 /* Structure used when generating summaries that describes a parameter.  */
180 
181 struct gensum_param_desc
182 {
183   /* Roots of param_accesses.  */
184   gensum_param_access *accesses;
185   /* Number of accesses in the access tree rooted in field accesses.  */
186   unsigned access_count;
187 
188   /* If the below is non-zero, this is the number of uses as actual arguments.  */
189   int call_uses;
190   /* Number of times this parameter has been directly passed to.  */
191   unsigned ptr_pt_count;
192 
193   /* Size limit of total size of all replacements.  */
194   unsigned param_size_limit;
195   /* Sum of sizes of nonarg accesses.  */
196   unsigned nonarg_acc_size;
197 
198   /* A parameter that is used only in call arguments and can be removed if all
199      concerned actual arguments are removed.  */
200   bool locally_unused;
201   /* An aggregate that is a candidate for breaking up or a pointer passing data
202      by reference that is a candidate for being converted to a set of
203      parameters passing those data by value.  */
204   bool split_candidate;
205   /* Is this a parameter passing stuff by reference?  */
206   bool by_ref;
207 
208   /* The number of this parameter as they are ordered in function decl.  */
209   int param_number;
210   /* For parameters passing data by reference, this is parameter index to
211      compute indices to bb_dereferences.  */
212   int deref_index;
213 };
214 
215 /* Properly deallocate accesses of DESC.  TODO: Since this data structure is
216    allocated in GC memory, this is not necessary and we can consider removing
217    the function.  */
218 
219 static void
free_param_decl_accesses(isra_param_desc * desc)220 free_param_decl_accesses (isra_param_desc *desc)
221 {
222   unsigned len = vec_safe_length (desc->accesses);
223   for (unsigned i = 0; i < len; ++i)
224     ggc_free ((*desc->accesses)[i]);
225   vec_free (desc->accesses);
226 }
227 
228 /* Class used to convey information about functions from the
229    intra-procedural analysis stage to inter-procedural one.  */
230 
class(for_user)231 class GTY((for_user)) isra_func_summary
232 {
233 public:
234   /* initialize the object.  */
235 
236   isra_func_summary ()
237     : m_parameters (NULL), m_candidate (false), m_returns_value (false),
238     m_return_ignored (false), m_queued (false)
239   {}
240 
241   /* Destroy m_parameters.  */
242 
243   ~isra_func_summary ();
244 
245   /* Mark the function as not a candidate for any IPA-SRA transformation.
246      Return true if it was a candidate until now.  */
247 
248   bool zap ();
249 
250   /* Vector of parameter descriptors corresponding to the function being
251      analyzed.  */
252   vec<isra_param_desc, va_gc> *m_parameters;
253 
254   /* Whether the node is even a candidate for any IPA-SRA transformation at
255      all.  */
256   unsigned m_candidate : 1;
257 
258   /* Whether the original function returns any value.  */
259   unsigned m_returns_value : 1;
260 
261   /* Set to true if all call statements do not actually use the returned
262      value.  */
263 
264   unsigned m_return_ignored : 1;
265 
266   /* Whether the node is already queued in IPA SRA stack during processing of
267      call graphs SCCs.  */
268 
269   unsigned m_queued : 1;
270 };
271 
272 /* Deallocate the memory pointed to by isra_func_summary.  TODO: Since this
273    data structure is allocated in GC memory, this is not necessary and we can
274    consider removing the destructor.  */
275 
~isra_func_summary()276 isra_func_summary::~isra_func_summary ()
277 {
278   unsigned len = vec_safe_length (m_parameters);
279   for (unsigned i = 0; i < len; ++i)
280     free_param_decl_accesses (&(*m_parameters)[i]);
281   vec_free (m_parameters);
282 }
283 
284 /* Mark the function as not a candidate for any IPA-SRA transformation.  Return
285    true if it was a candidate until now.  */
286 
287 bool
zap()288 isra_func_summary::zap ()
289 {
290   bool ret = m_candidate;
291   m_candidate = false;
292 
293   /* TODO: see the destructor above.  */
294   unsigned len = vec_safe_length (m_parameters);
295   for (unsigned i = 0; i < len; ++i)
296     free_param_decl_accesses (&(*m_parameters)[i]);
297   vec_free (m_parameters);
298 
299   return ret;
300 }
301 
302 /* Structure to describe which formal parameters feed into a particular actual
303    argument.  */
304 
305 struct isra_param_flow
306 {
307   /* Number of elements in array inputs that contain valid data.  */
308   char length;
309   /* Indices of formal parameters that feed into the described actual argument.
310      If aggregate_pass_through or pointer_pass_through below are true, it must
311      contain exactly one element which is passed through from a formal
312      parameter if the given number.  Otherwise, the array contains indices of
313      callee's formal parameters which are used to calculate value of this
314      actual argument. */
315   unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
316 
317   /* Offset within the formal parameter.  */
318   unsigned unit_offset;
319   /* Size of the portion of the formal parameter that is being passed.  */
320   unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
321 
322   /* True when the value of this actual copy is a portion of a formal
323      parameter.  */
324   unsigned aggregate_pass_through : 1;
325   /* True when the value of this actual copy is a verbatim pass through of an
326      obtained pointer.  */
327   unsigned pointer_pass_through : 1;
328   /* True when it is safe to copy access candidates here from the callee, which
329      would mean introducing dereferences into callers of the caller.  */
330   unsigned safe_to_import_accesses : 1;
331 };
332 
333 /* Structure used to convey information about calls from the intra-procedural
334    analysis stage to inter-procedural one.  */
335 
336 class isra_call_summary
337 {
338 public:
isra_call_summary()339   isra_call_summary ()
340     : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
341       m_bit_aligned_arg (false), m_before_any_store (false)
342   {}
343 
344   void init_inputs (unsigned arg_count);
345   void dump (FILE *f);
346 
347   /* Information about what formal parameters of the caller are used to compute
348      individual actual arguments of this call.  */
349   auto_vec <isra_param_flow> m_arg_flow;
350 
351   /* Set to true if the call statement does not have a LHS.  */
352   unsigned m_return_ignored : 1;
353 
354   /* Set to true if the LHS of call statement is only used to construct the
355      return value of the caller.  */
356   unsigned m_return_returned : 1;
357 
358   /* Set when any of the call arguments are not byte-aligned.  */
359   unsigned m_bit_aligned_arg : 1;
360 
361   /* Set to true if the call happend before any (other) store to memory in the
362      caller.  */
363   unsigned m_before_any_store : 1;
364 };
365 
366 /* Class to manage function summaries.  */
367 
class(user)368 class GTY((user)) ipa_sra_function_summaries
369   : public function_summary <isra_func_summary *>
370 {
371 public:
372   ipa_sra_function_summaries (symbol_table *table, bool ggc):
373     function_summary<isra_func_summary *> (table, ggc) { }
374 
375   virtual void duplicate (cgraph_node *, cgraph_node *,
376 			  isra_func_summary *old_sum,
377 			  isra_func_summary *new_sum);
378 };
379 
380 /* Hook that is called by summary when a node is duplicated.  */
381 
382 void
duplicate(cgraph_node *,cgraph_node *,isra_func_summary * old_sum,isra_func_summary * new_sum)383 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
384 				       isra_func_summary *old_sum,
385 				       isra_func_summary *new_sum)
386 {
387   /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
388      useless.  */
389   new_sum->m_candidate  = old_sum->m_candidate;
390   new_sum->m_returns_value = old_sum->m_returns_value;
391   new_sum->m_return_ignored = old_sum->m_return_ignored;
392   gcc_assert (!old_sum->m_queued);
393   new_sum->m_queued = false;
394 
395   unsigned param_count = vec_safe_length (old_sum->m_parameters);
396   if (!param_count)
397     return;
398   vec_safe_reserve_exact (new_sum->m_parameters, param_count);
399   new_sum->m_parameters->quick_grow_cleared (param_count);
400   for (unsigned i = 0; i < param_count; i++)
401     {
402       isra_param_desc *s = &(*old_sum->m_parameters)[i];
403       isra_param_desc *d = &(*new_sum->m_parameters)[i];
404 
405       d->param_size_limit = s->param_size_limit;
406       d->size_reached = s->size_reached;
407       d->locally_unused = s->locally_unused;
408       d->split_candidate = s->split_candidate;
409       d->by_ref = s->by_ref;
410 
411       unsigned acc_count = vec_safe_length (s->accesses);
412       vec_safe_reserve_exact (d->accesses, acc_count);
413       for (unsigned j = 0; j < acc_count; j++)
414 	{
415 	  param_access *from = (*s->accesses)[j];
416 	  param_access *to = ggc_cleared_alloc<param_access> ();
417 	  to->type = from->type;
418 	  to->alias_ptr_type = from->alias_ptr_type;
419 	  to->unit_offset = from->unit_offset;
420 	  to->unit_size = from->unit_size;
421 	  to->certain = from->certain;
422 	  to->reverse = from->reverse;
423 	  d->accesses->quick_push (to);
424 	}
425     }
426 }
427 
428 /* Pointer to the pass function summary holder.  */
429 
430 static GTY(()) ipa_sra_function_summaries *func_sums;
431 
432 /* Class to manage call summaries.  */
433 
434 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
435 {
436 public:
ipa_sra_call_summaries(symbol_table * table)437   ipa_sra_call_summaries (symbol_table *table):
438     call_summary<isra_call_summary *> (table) { }
439 
440   /* Duplicate info when an edge is cloned.  */
441   virtual void duplicate (cgraph_edge *, cgraph_edge *,
442 			  isra_call_summary *old_sum,
443 			  isra_call_summary *new_sum);
444 };
445 
446 static ipa_sra_call_summaries *call_sums;
447 
448 
449 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
450    ARG_COUNT is the number of actual arguments passed.  */
451 
452 void
init_inputs(unsigned arg_count)453 isra_call_summary::init_inputs (unsigned arg_count)
454 {
455   if (arg_count == 0)
456     {
457       gcc_checking_assert (m_arg_flow.length () == 0);
458       return;
459     }
460   if (m_arg_flow.length () == 0)
461     {
462       m_arg_flow.reserve_exact (arg_count);
463       m_arg_flow.quick_grow_cleared (arg_count);
464     }
465   else
466     gcc_checking_assert (arg_count == m_arg_flow.length ());
467 }
468 
469 /* Dump all information in call summary to F.  */
470 
471 void
dump(FILE * f)472 isra_call_summary::dump (FILE *f)
473 {
474   if (m_return_ignored)
475     fprintf (f, "    return value ignored\n");
476   if (m_return_returned)
477     fprintf (f, "    return value used only to compute caller return value\n");
478   if (m_before_any_store)
479     fprintf (f, "    happens before any store to memory\n");
480   for (unsigned i = 0; i < m_arg_flow.length (); i++)
481     {
482       fprintf (f, "    Parameter %u:\n", i);
483       isra_param_flow *ipf = &m_arg_flow[i];
484 
485       if (ipf->length)
486 	{
487 	  bool first = true;
488 	  fprintf (f, "      Scalar param sources: ");
489 	  for (int j = 0; j < ipf->length; j++)
490 	    {
491 	      if (!first)
492 		fprintf (f, ", ");
493 	      else
494 		first = false;
495 	      fprintf (f, "%i", (int) ipf->inputs[j]);
496 	    }
497 	  fprintf (f, "\n");
498 	}
499       if (ipf->aggregate_pass_through)
500 	fprintf (f, "      Aggregate pass through from the param given above, "
501 		 "unit offset: %u , unit size: %u\n",
502 		 ipf->unit_offset, ipf->unit_size);
503       if (ipf->pointer_pass_through)
504 	fprintf (f, "      Pointer pass through from the param given above, "
505 		 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
506     }
507 }
508 
509 /* Duplicate edge summary when an edge is cloned.  */
510 
511 void
duplicate(cgraph_edge *,cgraph_edge *,isra_call_summary * old_sum,isra_call_summary * new_sum)512 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
513 				   isra_call_summary *old_sum,
514 				   isra_call_summary *new_sum)
515 {
516   unsigned arg_count = old_sum->m_arg_flow.length ();
517   new_sum->init_inputs (arg_count);
518   for (unsigned i = 0; i < arg_count; i++)
519     new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
520 
521   new_sum->m_return_ignored = old_sum->m_return_ignored;
522   new_sum->m_return_returned = old_sum->m_return_returned;
523   new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
524   new_sum->m_before_any_store = old_sum->m_before_any_store;
525 }
526 
527 
528 /* With all GTY stuff done, we can move to anonymous namespace.  */
529 namespace {
530 /* Quick mapping from a decl to its param descriptor.  */
531 
532 hash_map<tree, gensum_param_desc *> *decl2desc;
533 
534 /* Countdown of allowed Alias Analysis steps during summary building.  */
535 
536 int aa_walking_limit;
537 
538 /* This is a table in which for each basic block and parameter there is a
539    distance (offset + size) in that parameter which is dereferenced and
540    accessed in that BB.  */
541 HOST_WIDE_INT *bb_dereferences = NULL;
542 /* How many by-reference parameters there are in the current function.  */
543 int by_ref_count;
544 
545 /* Bitmap of BBs that can cause the function to "stop" progressing by
546    returning, throwing externally, looping infinitely or calling a function
547    which might abort etc.. */
548 bitmap final_bbs;
549 
550 /* Obstack to allocate various small structures required only when generating
551    summary for a function.  */
552 struct obstack gensum_obstack;
553 
554 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
555    attributes, return true otherwise.  NODE is the cgraph node of the current
556    function.  */
557 
558 static bool
ipa_sra_preliminary_function_checks(cgraph_node * node)559 ipa_sra_preliminary_function_checks (cgraph_node *node)
560 {
561   if (!node->can_change_signature)
562     {
563       if (dump_file)
564 	fprintf (dump_file, "Function cannot change signature.\n");
565       return false;
566     }
567 
568   if (!tree_versionable_function_p (node->decl))
569     {
570       if (dump_file)
571 	fprintf (dump_file, "Function is not versionable.\n");
572       return false;
573     }
574 
575   if (!opt_for_fn (node->decl, optimize)
576       || !opt_for_fn (node->decl, flag_ipa_sra))
577     {
578       if (dump_file)
579 	fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
580 		 "function.\n");
581       return false;
582     }
583 
584   if (DECL_VIRTUAL_P (node->decl))
585     {
586       if (dump_file)
587 	fprintf (dump_file, "Function is a virtual method.\n");
588       return false;
589     }
590 
591   struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
592   if (fun->stdarg)
593     {
594       if (dump_file)
595 	fprintf (dump_file, "Function uses stdarg. \n");
596       return false;
597     }
598 
599   if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
600     {
601       if (dump_file)
602 	fprintf (dump_file, "Function type has attributes. \n");
603       return false;
604     }
605 
606   if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
607     {
608       if (dump_file)
609 	fprintf (dump_file, "Always inline function will be inlined "
610 		 "anyway. \n");
611       return false;
612     }
613 
614   return true;
615 }
616 
617 /* Print access tree starting at ACCESS to F.  */
618 
619 static void
dump_gensum_access(FILE * f,gensum_param_access * access,unsigned indent)620 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
621 {
622   fprintf (f, "  ");
623   for (unsigned i = 0; i < indent; i++)
624     fprintf (f, " ");
625   fprintf (f, "    * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
626 	   access->offset);
627   fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
628   fprintf (f, ", type: ");
629   print_generic_expr (f, access->type);
630   fprintf (f, ", alias_ptr_type: ");
631   print_generic_expr (f, access->alias_ptr_type);
632   fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
633   for (gensum_param_access *ch = access->first_child;
634        ch;
635        ch = ch->next_sibling)
636     dump_gensum_access (f, ch, indent + 2);
637 }
638 
639 
640 /* Print access tree starting at ACCESS to F.  */
641 
642 static void
dump_isra_access(FILE * f,param_access * access)643 dump_isra_access (FILE *f, param_access *access)
644 {
645   fprintf (f, "    * Access to unit offset: %u", access->unit_offset);
646   fprintf (f, ", unit size: %u", access->unit_size);
647   fprintf (f, ", type: ");
648   print_generic_expr (f, access->type);
649   fprintf (f, ", alias_ptr_type: ");
650   print_generic_expr (f, access->alias_ptr_type);
651   if (access->certain)
652     fprintf (f, ", certain");
653   else
654     fprintf (f, ", not certain");
655   if (access->reverse)
656     fprintf (f, ", reverse");
657   fprintf (f, "\n");
658 }
659 
660 /* Dump access tree starting at ACCESS to stderr.  */
661 
662 DEBUG_FUNCTION void
debug_isra_access(param_access * access)663 debug_isra_access (param_access *access)
664 {
665   dump_isra_access (stderr, access);
666 }
667 
668 /* Dump DESC to F.  */
669 
670 static void
dump_gensum_param_descriptor(FILE * f,gensum_param_desc * desc)671 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
672 {
673   if (desc->locally_unused)
674     fprintf (f, "    unused with %i call_uses\n", desc->call_uses);
675   if (!desc->split_candidate)
676     {
677       fprintf (f, "    not a candidate\n");
678       return;
679     }
680   if (desc->by_ref)
681     fprintf (f, "    by_ref with %u pass throughs\n", desc->ptr_pt_count);
682 
683   for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
684     dump_gensum_access (f, acc, 2);
685 }
686 
687 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
688    F.  */
689 
690 static void
dump_gensum_param_descriptors(FILE * f,tree fndecl,vec<gensum_param_desc> * param_descriptions)691 dump_gensum_param_descriptors (FILE *f, tree fndecl,
692 			       vec<gensum_param_desc> *param_descriptions)
693 {
694   tree parm = DECL_ARGUMENTS (fndecl);
695   for (unsigned i = 0;
696        i < param_descriptions->length ();
697        ++i, parm = DECL_CHAIN (parm))
698     {
699       fprintf (f, "  Descriptor for parameter %i ", i);
700       print_generic_expr (f, parm, TDF_UID);
701       fprintf (f, "\n");
702       dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
703     }
704 }
705 
706 
707 /* Dump DESC to F.   */
708 
709 static void
dump_isra_param_descriptor(FILE * f,isra_param_desc * desc)710 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
711 {
712   if (desc->locally_unused)
713     {
714       fprintf (f, "    (locally) unused\n");
715     }
716   if (!desc->split_candidate)
717     {
718       fprintf (f, "    not a candidate for splitting\n");
719       return;
720     }
721   fprintf (f, "    param_size_limit: %u, size_reached: %u%s\n",
722 	   desc->param_size_limit, desc->size_reached,
723 	   desc->by_ref ? ", by_ref" : "");
724 
725   for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
726     {
727       param_access *access = (*desc->accesses)[i];
728       dump_isra_access (f, access);
729     }
730 }
731 
732 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
733    F.  */
734 
735 static void
dump_isra_param_descriptors(FILE * f,tree fndecl,isra_func_summary * ifs)736 dump_isra_param_descriptors (FILE *f, tree fndecl,
737 			     isra_func_summary *ifs)
738 {
739   tree parm = DECL_ARGUMENTS (fndecl);
740   if (!ifs->m_parameters)
741     {
742       fprintf (f, "  parameter descriptors not available\n");
743       return;
744     }
745 
746   for (unsigned i = 0;
747        i < ifs->m_parameters->length ();
748        ++i, parm = DECL_CHAIN (parm))
749     {
750       fprintf (f, "  Descriptor for parameter %i ", i);
751       print_generic_expr (f, parm, TDF_UID);
752       fprintf (f, "\n");
753       dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
754     }
755 }
756 
757 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage.  If the
758    function fails return false, otherwise return true.  SRC must fit into an
759    unsigned char.  Used for purposes of transitive unused parameter
760    removal.  */
761 
762 static bool
add_src_to_param_flow(isra_param_flow * param_flow,int src)763 add_src_to_param_flow (isra_param_flow *param_flow, int src)
764 {
765   gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
766   if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
767     return false;
768 
769   param_flow->inputs[(int) param_flow->length] = src;
770   param_flow->length++;
771   return true;
772 }
773 
774 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
775    it is the only input.  Used for purposes of transitive parameter
776    splitting.  */
777 
778 static void
set_single_param_flow_source(isra_param_flow * param_flow,int src)779 set_single_param_flow_source (isra_param_flow *param_flow, int src)
780 {
781   gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
782   if (param_flow->length == 0)
783     {
784       param_flow->inputs[0] = src;
785       param_flow->length = 1;
786     }
787   else if (param_flow->length == 1)
788     gcc_assert (param_flow->inputs[0] == src);
789   else
790     gcc_unreachable ();
791 }
792 
793 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
794    it.  */
795 
796 static unsigned
get_single_param_flow_source(const isra_param_flow * param_flow)797 get_single_param_flow_source (const isra_param_flow *param_flow)
798 {
799   gcc_assert (param_flow->length == 1);
800   return param_flow->inputs[0];
801 }
802 
803 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
804    in FUN represented with NODE and return a negative number if any of them is
805    used for something else than either an actual call argument, simple
806    arithmetic operation or debug statement.  If there are no such uses, return
807    the number of actual arguments that this parameter eventually feeds to (or
808    zero if there is none).  For any such parameter, mark PARM_NUM as one of its
809    sources.  ANALYZED is a bitmap that tracks which SSA names we have already
810    started investigating.  */
811 
812 static int
isra_track_scalar_value_uses(function * fun,cgraph_node * node,tree name,int parm_num,bitmap analyzed)813 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
814 			      int parm_num, bitmap analyzed)
815 {
816   int res = 0;
817   imm_use_iterator imm_iter;
818   gimple *stmt;
819 
820   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
821     {
822       if (is_gimple_debug (stmt))
823 	continue;
824 
825       /* TODO: We could handle at least const builtin functions like arithmetic
826 	 operations below.  */
827       if (is_gimple_call (stmt))
828 	{
829 	  int all_uses = 0;
830 	  use_operand_p use_p;
831 	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
832 	    all_uses++;
833 
834 	  gcall *call = as_a <gcall *> (stmt);
835 	  unsigned arg_count;
836 	  if (gimple_call_internal_p (call)
837 	      || (arg_count = gimple_call_num_args (call)) == 0)
838 	    {
839 	      res = -1;
840 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
841 	    }
842 
843 	  cgraph_edge *cs = node->get_edge (stmt);
844 	  gcc_checking_assert (cs);
845 	  isra_call_summary *csum = call_sums->get_create (cs);
846 	  csum->init_inputs (arg_count);
847 
848 	  int simple_uses = 0;
849 	  for (unsigned i = 0; i < arg_count; i++)
850 	    if (gimple_call_arg (call, i) == name)
851 	      {
852 		if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
853 		  {
854 		    simple_uses = -1;
855 		    break;
856 		  }
857 		simple_uses++;
858 	      }
859 
860 	  if (simple_uses < 0
861 	      || all_uses != simple_uses)
862 	    {
863 	      res = -1;
864 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
865 	    }
866 	  res += all_uses;
867 	}
868       else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
869 	       && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
870 		   || gimple_code (stmt) == GIMPLE_PHI))
871 	{
872 	  tree lhs;
873 	  if (gimple_code (stmt) == GIMPLE_PHI)
874 	    lhs = gimple_phi_result (stmt);
875 	  else
876 	    lhs = gimple_assign_lhs (stmt);
877 
878 	  if (TREE_CODE (lhs) != SSA_NAME)
879 	    {
880 	      res = -1;
881 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
882 	    }
883 	  gcc_assert (!gimple_vdef (stmt));
884 	  if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
885 	    {
886 	      int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
887 						      analyzed);
888 	      if (tmp < 0)
889 		{
890 		  res = tmp;
891 		  BREAK_FROM_IMM_USE_STMT (imm_iter);
892 		}
893 	      res += tmp;
894 	    }
895 	}
896       else
897 	{
898 	  res = -1;
899 	  BREAK_FROM_IMM_USE_STMT (imm_iter);
900 	}
901     }
902   return res;
903 }
904 
905 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
906    also described by NODE) and simple arithmetic calculations involving PARM
907    and return false if any of them is used for something else than either an
908    actual call argument, simple arithmetic operation or debug statement.  If
909    there are no such uses, return true and store the number of actual arguments
910    that this parameter eventually feeds to (or zero if there is none) to
911    *CALL_USES_P.  For any such parameter, mark PARM_NUM as one of its
912    sources.
913 
914    This function is similar to ptr_parm_has_nonarg_uses but its results are
915    meant for unused parameter removal, as opposed to splitting of parameters
916    passed by reference or converting them to passed by value.  */
917 
918 static bool
isra_track_scalar_param_local_uses(function * fun,cgraph_node * node,tree parm,int parm_num,int * call_uses_p)919 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
920 				    int parm_num, int *call_uses_p)
921 {
922   gcc_checking_assert (is_gimple_reg (parm));
923 
924   tree name = ssa_default_def (fun, parm);
925   if (!name || has_zero_uses (name))
926     {
927       *call_uses_p = 0;
928       return false;
929     }
930 
931   /* Edge summaries can only handle callers with fewer than 256 parameters.  */
932   if (parm_num > UCHAR_MAX)
933     return true;
934 
935   bitmap analyzed = BITMAP_ALLOC (NULL);
936   int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
937 						analyzed);
938   BITMAP_FREE (analyzed);
939   if (call_uses < 0)
940     return true;
941   *call_uses_p = call_uses;
942   return false;
943 }
944 
945 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
946    examine whether there are any nonarg uses that are not actual arguments or
947    otherwise infeasible uses.  If so, return true, otherwise return false.
948    Create pass-through IPA flow records for any direct uses as argument calls
949    and if returning false, store their number into *PT_COUNT_P.  NODE and FUN
950    must represent the function that is currently analyzed, PARM_NUM must be the
951    index of the analyzed parameter.
952 
953    This function is similar to isra_track_scalar_param_local_uses but its
954    results are meant for splitting of parameters passed by reference or turning
955    them into bits passed by value, as opposed to generic unused parameter
956    removal.  */
957 
958 static bool
ptr_parm_has_nonarg_uses(cgraph_node * node,function * fun,tree parm,int parm_num,unsigned * pt_count_p)959 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
960 			  int parm_num, unsigned *pt_count_p)
961 {
962   imm_use_iterator ui;
963   gimple *stmt;
964   tree name = ssa_default_def (fun, parm);
965   bool ret = false;
966   unsigned pt_count = 0;
967 
968   if (!name || has_zero_uses (name))
969     return false;
970 
971   /* Edge summaries can only handle callers with fewer than 256 parameters.  */
972   if (parm_num > UCHAR_MAX)
973     return true;
974 
975   FOR_EACH_IMM_USE_STMT (stmt, ui, name)
976     {
977       unsigned uses_ok = 0;
978       use_operand_p use_p;
979 
980       if (is_gimple_debug (stmt))
981 	continue;
982 
983       if (gimple_assign_single_p (stmt))
984 	{
985 	  tree rhs = gimple_assign_rhs1 (stmt);
986 	  if (!TREE_THIS_VOLATILE (rhs))
987 	    {
988 	      while (handled_component_p (rhs))
989 		rhs = TREE_OPERAND (rhs, 0);
990 	      if (TREE_CODE (rhs) == MEM_REF
991 		  && TREE_OPERAND (rhs, 0) == name
992 		  && integer_zerop (TREE_OPERAND (rhs, 1))
993 		  && types_compatible_p (TREE_TYPE (rhs),
994 					 TREE_TYPE (TREE_TYPE (name))))
995 		uses_ok++;
996 	    }
997 	}
998       else if (is_gimple_call (stmt))
999 	{
1000 	  gcall *call = as_a <gcall *> (stmt);
1001 	  unsigned arg_count;
1002 	  if (gimple_call_internal_p (call)
1003 	      || (arg_count = gimple_call_num_args (call)) == 0)
1004 	    {
1005 	      ret = true;
1006 	      BREAK_FROM_IMM_USE_STMT (ui);
1007 	    }
1008 
1009 	  cgraph_edge *cs = node->get_edge (stmt);
1010 	  gcc_checking_assert (cs);
1011 	  isra_call_summary *csum = call_sums->get_create (cs);
1012 	  csum->init_inputs (arg_count);
1013 
1014 	  for (unsigned i = 0; i < arg_count; ++i)
1015 	    {
1016 	      tree arg = gimple_call_arg (stmt, i);
1017 
1018 	      if (arg == name)
1019 		{
1020 		  /* TODO: Allow &MEM_REF[name + offset] here,
1021 		     ipa_param_body_adjustments::modify_call_stmt has to be
1022 		     adjusted too.  */
1023 		  csum->m_arg_flow[i].pointer_pass_through = true;
1024 		  set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1025 		  pt_count++;
1026 		  uses_ok++;
1027 		  continue;
1028 		}
1029 
1030 	      if (!TREE_THIS_VOLATILE (arg))
1031 		{
1032 		  while (handled_component_p (arg))
1033 		    arg = TREE_OPERAND (arg, 0);
1034 		  if (TREE_CODE (arg) == MEM_REF
1035 		      && TREE_OPERAND (arg, 0) == name
1036 		      && integer_zerop (TREE_OPERAND (arg, 1))
1037 		      && types_compatible_p (TREE_TYPE (arg),
1038 					     TREE_TYPE (TREE_TYPE (name))))
1039 		    uses_ok++;
1040 		}
1041 	    }
1042 	}
1043 
1044       /* If the number of valid uses does not match the number of
1045          uses in this stmt there is an unhandled use.  */
1046       unsigned all_uses = 0;
1047       FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1048 	all_uses++;
1049 
1050       gcc_checking_assert (uses_ok <= all_uses);
1051       if (uses_ok != all_uses)
1052 	{
1053 	  ret = true;
1054 	  BREAK_FROM_IMM_USE_STMT (ui);
1055 	}
1056     }
1057 
1058   *pt_count_p = pt_count;
1059   return ret;
1060 }
1061 
1062 /* Initialize vector of parameter descriptors of NODE.  Return true if there
1063    are any candidates for splitting or unused aggregate parameter removal (the
1064    function may return false if there are candidates for removal of register
1065    parameters) and function body must be scanned.  */
1066 
1067 static bool
create_parameter_descriptors(cgraph_node * node,vec<gensum_param_desc> * param_descriptions)1068 create_parameter_descriptors (cgraph_node *node,
1069 			      vec<gensum_param_desc> *param_descriptions)
1070 {
1071   function *fun = DECL_STRUCT_FUNCTION (node->decl);
1072   bool ret = false;
1073 
1074   int num = 0;
1075   for (tree parm = DECL_ARGUMENTS (node->decl);
1076        parm;
1077        parm = DECL_CHAIN (parm), num++)
1078     {
1079       const char *msg;
1080       gensum_param_desc *desc = &(*param_descriptions)[num];
1081       /* param_descriptions vector is grown cleared in the caller.  */
1082       desc->param_number = num;
1083       decl2desc->put (parm, desc);
1084 
1085       if (dump_file && (dump_flags & TDF_DETAILS))
1086 	print_generic_expr (dump_file, parm, TDF_UID);
1087 
1088       int scalar_call_uses;
1089       tree type = TREE_TYPE (parm);
1090       if (TREE_THIS_VOLATILE (parm))
1091 	{
1092 	  if (dump_file && (dump_flags & TDF_DETAILS))
1093 	    fprintf (dump_file, " not a candidate, is volatile\n");
1094 	  continue;
1095 	}
1096       if (!is_gimple_reg_type (type) && is_va_list_type (type))
1097 	{
1098 	  if (dump_file && (dump_flags & TDF_DETAILS))
1099 	    fprintf (dump_file, " not a candidate, is a va_list type\n");
1100 	  continue;
1101 	}
1102       if (TREE_ADDRESSABLE (parm))
1103 	{
1104 	  if (dump_file && (dump_flags & TDF_DETAILS))
1105 	    fprintf (dump_file, " not a candidate, is addressable\n");
1106 	  continue;
1107 	}
1108       if (TREE_ADDRESSABLE (type))
1109 	{
1110 	  if (dump_file && (dump_flags & TDF_DETAILS))
1111 	    fprintf (dump_file, " not a candidate, type cannot be split\n");
1112 	  continue;
1113 	}
1114 
1115       if (is_gimple_reg (parm)
1116 	  && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1117 						  &scalar_call_uses))
1118 	{
1119 	  if (dump_file && (dump_flags & TDF_DETAILS))
1120 	    fprintf (dump_file, " is a scalar with only %i call uses\n",
1121 		     scalar_call_uses);
1122 
1123 	  desc->locally_unused = true;
1124 	  desc->call_uses = scalar_call_uses;
1125 	}
1126 
1127       if (POINTER_TYPE_P (type))
1128 	{
1129 	  type = TREE_TYPE (type);
1130 
1131 	  if (TREE_CODE (type) == FUNCTION_TYPE
1132 	      || TREE_CODE (type) == METHOD_TYPE)
1133 	    {
1134 	      if (dump_file && (dump_flags & TDF_DETAILS))
1135 		fprintf (dump_file, " not a candidate, reference to "
1136 			 "a function\n");
1137 	      continue;
1138 	    }
1139 	  if (TYPE_VOLATILE (type))
1140 	    {
1141 	      if (dump_file && (dump_flags & TDF_DETAILS))
1142 		fprintf (dump_file, " not a candidate, reference to "
1143 			 "a volatile type\n");
1144 	      continue;
1145 	    }
1146 	  if (TREE_CODE (type) == ARRAY_TYPE
1147 	      && TYPE_NONALIASED_COMPONENT (type))
1148 	    {
1149 	      if (dump_file && (dump_flags & TDF_DETAILS))
1150 		fprintf (dump_file, " not a candidate, reference to "
1151 			 "a nonaliased component array\n");
1152 	      continue;
1153 	    }
1154 	  if (!is_gimple_reg (parm))
1155 	    {
1156 	      if (dump_file && (dump_flags & TDF_DETAILS))
1157 		fprintf (dump_file, " not a candidate, a reference which is "
1158 			 "not a gimple register (probably addressable)\n");
1159 	      continue;
1160 	    }
1161 	  if (is_va_list_type (type))
1162 	    {
1163 	      if (dump_file && (dump_flags & TDF_DETAILS))
1164 		fprintf (dump_file, " not a candidate, reference to "
1165 			 "a va list\n");
1166 	      continue;
1167 	    }
1168 	  if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1169 					&desc->ptr_pt_count))
1170 	    {
1171 	      if (dump_file && (dump_flags & TDF_DETAILS))
1172 		fprintf (dump_file, " not a candidate, reference has "
1173 			 "nonarg uses\n");
1174 	      continue;
1175 	    }
1176 	  desc->by_ref = true;
1177 	}
1178       else if (!AGGREGATE_TYPE_P (type))
1179 	{
1180 	  /* This is in an else branch because scalars passed by reference are
1181 	     still candidates to be passed by value.  */
1182 	  if (dump_file && (dump_flags & TDF_DETAILS))
1183 	    fprintf (dump_file, " not a candidate, not an aggregate\n");
1184 	  continue;
1185 	}
1186 
1187       if (!COMPLETE_TYPE_P (type))
1188 	{
1189 	  if (dump_file && (dump_flags & TDF_DETAILS))
1190 	    fprintf (dump_file, " not a candidate, not a complete type\n");
1191 	  continue;
1192 	}
1193       if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1194 	{
1195 	  if (dump_file && (dump_flags & TDF_DETAILS))
1196 	    fprintf (dump_file, " not a candidate, size not representable\n");
1197 	  continue;
1198 	}
1199       unsigned HOST_WIDE_INT type_size
1200 	= tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1201       if (type_size == 0
1202 	  || type_size >= ISRA_ARG_SIZE_LIMIT)
1203 	{
1204 	  if (dump_file && (dump_flags & TDF_DETAILS))
1205 	    fprintf (dump_file, " not a candidate, has zero or huge size\n");
1206 	  continue;
1207 	}
1208       if (type_internals_preclude_sra_p (type, &msg))
1209 	{
1210 	  if (dump_file && (dump_flags & TDF_DETAILS))
1211 	      fprintf (dump_file, " not a candidate, %s\n", msg);
1212 	  continue;
1213 	}
1214 
1215       if (dump_file && (dump_flags & TDF_DETAILS))
1216 	fprintf (dump_file, " is a candidate\n");
1217 
1218       ret = true;
1219       desc->split_candidate = true;
1220       if (desc->by_ref)
1221 	desc->deref_index = by_ref_count++;
1222     }
1223   return ret;
1224 }
1225 
1226 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1227    found, which happens if DECL is for a static chain.  */
1228 
1229 static gensum_param_desc *
get_gensum_param_desc(tree decl)1230 get_gensum_param_desc (tree decl)
1231 {
1232   gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1233   gensum_param_desc **slot = decl2desc->get (decl);
1234   if (!slot)
1235     /* This can happen for static chains which we cannot handle so far.  */
1236     return NULL;
1237   gcc_checking_assert (*slot);
1238   return *slot;
1239 }
1240 
1241 
1242 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1243    write REASON to the dump file if there is one.  */
1244 
1245 static void
disqualify_split_candidate(gensum_param_desc * desc,const char * reason)1246 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1247 {
1248   if (!desc->split_candidate)
1249     return;
1250 
1251   if (dump_file && (dump_flags & TDF_DETAILS))
1252     fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1253 	     desc->param_number, reason);
1254 
1255   desc->split_candidate = false;
1256 }
1257 
1258 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1259    there is one.  */
1260 
1261 static void
disqualify_split_candidate(tree decl,const char * reason)1262 disqualify_split_candidate (tree decl, const char *reason)
1263 {
1264   gensum_param_desc *desc = get_gensum_param_desc (decl);
1265   if (desc)
1266     disqualify_split_candidate (desc, reason);
1267 }
1268 
1269 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE.  But
1270    first, check that there are not too many of them already.  If so, do not
1271    allocate anything and return NULL.  */
1272 
1273 static gensum_param_access *
allocate_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size)1274 allocate_access (gensum_param_desc *desc,
1275 		 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1276 {
1277   if (desc->access_count
1278       == (unsigned) param_ipa_sra_max_replacements)
1279     {
1280       disqualify_split_candidate (desc, "Too many replacement candidates");
1281       return NULL;
1282     }
1283 
1284   gensum_param_access *access
1285     = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1286 					     sizeof (gensum_param_access));
1287   memset (access, 0, sizeof (*access));
1288   access->offset = offset;
1289   access->size = size;
1290   return access;
1291 }
1292 
1293 /* In what context scan_expr_access has been called, whether it deals with a
1294    load, a function argument, or a store.  Please note that in rare
1295    circumstances when it is not clear if the access is a load or store,
1296    ISRA_CTX_STORE is used too.  */
1297 
1298 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1299 
1300 /* Return an access describing memory access to the variable described by DESC
1301    at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1302    at a certain tree level FIRST.  Attempt to create it and put into the
1303    appropriate place in the access tree if does not exist, but fail and return
1304    NULL if there are already too many accesses, if it would create a partially
1305    overlapping access or if an access would end up within a pre-existing
1306    non-call access.  */
1307 
1308 static gensum_param_access *
get_access_1(gensum_param_desc * desc,gensum_param_access ** first,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1309 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1310 	      HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1311 {
1312   gensum_param_access *access = *first, **ptr = first;
1313 
1314   if (!access)
1315     {
1316       /* No pre-existing access at this level, just create it.  */
1317       gensum_param_access *a = allocate_access (desc, offset, size);
1318       if (!a)
1319 	return NULL;
1320       *first = a;
1321       return *first;
1322     }
1323 
1324   if (access->offset >= offset + size)
1325     {
1326       /* We want to squeeze it in front of the very first access, just do
1327 	 it.  */
1328       gensum_param_access *r = allocate_access (desc, offset, size);
1329       if (!r)
1330 	return NULL;
1331       r->next_sibling = access;
1332       *first = r;
1333       return r;
1334     }
1335 
1336   /* Skip all accesses that have to come before us until the next sibling is
1337      already too far.  */
1338   while (offset >= access->offset + access->size
1339 	 && access->next_sibling
1340 	 && access->next_sibling->offset < offset + size)
1341     {
1342       ptr = &access->next_sibling;
1343       access = access->next_sibling;
1344     }
1345 
1346   /* At this point we know we do not belong before access.  */
1347   gcc_assert (access->offset < offset + size);
1348 
1349   if (access->offset == offset && access->size == size)
1350     /* We found what we were looking for.  */
1351     return access;
1352 
1353   if (access->offset <= offset
1354       && access->offset + access->size >= offset + size)
1355     {
1356     /* We fit into access which is larger than us.  We need to find/create
1357        something below access.  But we only allow nesting in call
1358        arguments.  */
1359       if (access->nonarg)
1360 	return NULL;
1361 
1362       return get_access_1 (desc, &access->first_child, offset, size, ctx);
1363     }
1364 
1365   if (offset <= access->offset
1366       && offset + size  >= access->offset + access->size)
1367     /* We are actually bigger than access, which fully fits into us, take its
1368        place and make all accesses fitting into it its children.  */
1369     {
1370       /* But first, we only allow nesting in call arguments so check if that is
1371 	 what we are trying to represent.  */
1372       if (ctx != ISRA_CTX_ARG)
1373 	return NULL;
1374 
1375       gensum_param_access *r = allocate_access (desc, offset, size);
1376       if (!r)
1377 	return NULL;
1378       r->first_child = access;
1379 
1380       while (access->next_sibling
1381 	     && access->next_sibling->offset < offset + size)
1382 	access = access->next_sibling;
1383       if (access->offset + access->size > offset + size)
1384 	{
1385 	  /* This must be a different access, which are sorted, so the
1386 	     following must be true and this signals a partial overlap.  */
1387 	  gcc_assert (access->offset > offset);
1388 	  return NULL;
1389 	}
1390 
1391       r->next_sibling = access->next_sibling;
1392       access->next_sibling = NULL;
1393       *ptr = r;
1394       return r;
1395     }
1396 
1397   if (offset >= access->offset + access->size)
1398     {
1399       /* We belong after access.  */
1400       gensum_param_access *r = allocate_access (desc, offset, size);
1401       if (!r)
1402 	return NULL;
1403       r->next_sibling = access->next_sibling;
1404       access->next_sibling = r;
1405       return r;
1406     }
1407 
1408   if (offset < access->offset)
1409     {
1410       /* We know the following, otherwise we would have created a
1411 	 super-access. */
1412       gcc_checking_assert (offset + size < access->offset + access->size);
1413       return NULL;
1414     }
1415 
1416   if (offset + size > access->offset + access->size)
1417     {
1418       /* Likewise.  */
1419       gcc_checking_assert (offset > access->offset);
1420       return NULL;
1421     }
1422 
1423   gcc_unreachable ();
1424 }
1425 
1426 /* Return an access describing memory access to the variable described by DESC
1427    at OFFSET with SIZE in context CTX, mark it as used in context CTX.  Attempt
1428    to create if it does not exist, but fail and return NULL if there are
1429    already too many accesses, if it would create a partially overlapping access
1430    or if an access would end up in a non-call access.  */
1431 
1432 static gensum_param_access *
get_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1433 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1434 	    isra_scan_context ctx)
1435 {
1436   gcc_checking_assert (desc->split_candidate);
1437 
1438   gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1439 					      size, ctx);
1440   if (!access)
1441     {
1442       disqualify_split_candidate (desc,
1443 				  "Bad access overlap or too many accesses");
1444       return NULL;
1445     }
1446 
1447   switch (ctx)
1448     {
1449     case ISRA_CTX_STORE:
1450       gcc_assert (!desc->by_ref);
1451       /* Fall-through */
1452     case ISRA_CTX_LOAD:
1453       access->nonarg = true;
1454       break;
1455     case ISRA_CTX_ARG:
1456       break;
1457     }
1458 
1459   return access;
1460 }
1461 
1462 /* Verify that parameter access tree starting with ACCESS is in good shape.
1463    PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1464    ACCESS or zero if there is none.  */
1465 
1466 static bool
verify_access_tree_1(gensum_param_access * access,HOST_WIDE_INT parent_offset,HOST_WIDE_INT parent_size)1467 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1468 		      HOST_WIDE_INT parent_size)
1469 {
1470   while (access)
1471     {
1472       gcc_assert (access->offset >= 0 && access->size >= 0);
1473 
1474       if (parent_size != 0)
1475 	{
1476 	  if (access->offset < parent_offset)
1477 	    {
1478 	      error ("Access offset before parent offset");
1479 	      return true;
1480 	    }
1481 	  if (access->size >= parent_size)
1482 	    {
1483 	      error ("Access size greater or equal to its parent size");
1484 	      return true;
1485 	    }
1486 	  if (access->offset + access->size > parent_offset + parent_size)
1487 	    {
1488 	      error ("Access terminates outside of its parent");
1489 	      return true;
1490 	    }
1491 	}
1492 
1493       if (verify_access_tree_1 (access->first_child, access->offset,
1494 				access->size))
1495 	return true;
1496 
1497       if (access->next_sibling
1498 	  && (access->next_sibling->offset < access->offset + access->size))
1499 	{
1500 	  error ("Access overlaps with its sibling");
1501 	  return true;
1502 	}
1503 
1504       access = access->next_sibling;
1505     }
1506   return false;
1507 }
1508 
1509 /* Verify that parameter access tree starting with ACCESS is in good shape,
1510    halt compilation and dump the tree to stderr if not.  */
1511 
1512 DEBUG_FUNCTION void
isra_verify_access_tree(gensum_param_access * access)1513 isra_verify_access_tree (gensum_param_access *access)
1514 {
1515   if (verify_access_tree_1 (access, 0, 0))
1516     {
1517       for (; access; access = access->next_sibling)
1518         dump_gensum_access (stderr, access, 2);
1519       internal_error ("IPA-SRA access verification failed");
1520     }
1521 }
1522 
1523 
1524 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1525    GIMPLE_ASM operands with memory constrains which cannot be scalarized.  */
1526 
1527 static bool
asm_visit_addr(gimple *,tree op,tree,void *)1528 asm_visit_addr (gimple *, tree op, tree, void *)
1529 {
1530   op = get_base_address (op);
1531   if (op
1532       && TREE_CODE (op) == PARM_DECL)
1533     disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1534 
1535   return false;
1536 }
1537 
1538 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1539    basic block BB, unless the BB has already been marked as a potentially
1540    final.  */
1541 
1542 static void
mark_param_dereference(gensum_param_desc * desc,HOST_WIDE_INT dist,basic_block bb)1543 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1544 		       basic_block bb)
1545 {
1546   gcc_assert (desc->by_ref);
1547   gcc_checking_assert (desc->split_candidate);
1548 
1549   if (bitmap_bit_p (final_bbs, bb->index))
1550     return;
1551 
1552   int idx = bb->index * by_ref_count + desc->deref_index;
1553   if (bb_dereferences[idx] < dist)
1554     bb_dereferences[idx] = dist;
1555 }
1556 
1557 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1558    previously recorded OLD_TYPE.  */
1559 
1560 static bool
type_prevails_p(tree old_type,tree new_type)1561 type_prevails_p (tree old_type, tree new_type)
1562 {
1563   if (old_type == new_type)
1564     return false;
1565 
1566   /* Non-aggregates are always better.  */
1567   if (!is_gimple_reg_type (old_type)
1568       && is_gimple_reg_type (new_type))
1569     return true;
1570   if (is_gimple_reg_type (old_type)
1571       && !is_gimple_reg_type (new_type))
1572     return false;
1573 
1574   /* Prefer any complex or vector type over any other scalar type.  */
1575   if (TREE_CODE (old_type) != COMPLEX_TYPE
1576       && TREE_CODE (old_type) != VECTOR_TYPE
1577       && (TREE_CODE (new_type) == COMPLEX_TYPE
1578 	  || TREE_CODE (new_type) == VECTOR_TYPE))
1579     return true;
1580   if ((TREE_CODE (old_type) == COMPLEX_TYPE
1581        || TREE_CODE (old_type) == VECTOR_TYPE)
1582       && TREE_CODE (new_type) != COMPLEX_TYPE
1583       && TREE_CODE (new_type) != VECTOR_TYPE)
1584     return false;
1585 
1586   /* Use the integral type with the bigger precision.  */
1587   if (INTEGRAL_TYPE_P (old_type)
1588       && INTEGRAL_TYPE_P (new_type))
1589     return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1590 
1591   /* Attempt to disregard any integral type with non-full precision.  */
1592   if (INTEGRAL_TYPE_P (old_type)
1593       && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1594 	  != TYPE_PRECISION (old_type)))
1595     return true;
1596   if (INTEGRAL_TYPE_P (new_type)
1597       && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1598 	  != TYPE_PRECISION (new_type)))
1599     return false;
1600   /* Stabilize the selection.  */
1601   return TYPE_UID (old_type) < TYPE_UID (new_type);
1602 }
1603 
1604 /* When scanning an expression which is a call argument, this structure
1605    specifies the call and the position of the argument.  */
1606 
1607 struct scan_call_info
1608 {
1609   /* Call graph edge representing the call. */
1610   cgraph_edge *cs;
1611   /* Total number of arguments in the call.  */
1612   unsigned argument_count;
1613   /* Number of the actual argument being scanned.  */
1614   unsigned arg_idx;
1615 };
1616 
1617 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1618    call argument described by CALL_INFO.  */
1619 
1620 static void
record_nonregister_call_use(gensum_param_desc * desc,scan_call_info * call_info,unsigned unit_offset,unsigned unit_size)1621 record_nonregister_call_use (gensum_param_desc *desc,
1622 			     scan_call_info *call_info,
1623 			     unsigned unit_offset, unsigned unit_size)
1624 {
1625   isra_call_summary *csum = call_sums->get_create (call_info->cs);
1626   csum->init_inputs (call_info->argument_count);
1627 
1628   isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1629   param_flow->aggregate_pass_through = true;
1630   set_single_param_flow_source (param_flow, desc->param_number);
1631   param_flow->unit_offset = unit_offset;
1632   param_flow->unit_size = unit_size;
1633   desc->call_uses++;
1634 }
1635 
1636 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1637    modification.  */
1638 
1639 static bool
mark_maybe_modified(ao_ref *,tree,void * data)1640 mark_maybe_modified (ao_ref *, tree, void *data)
1641 {
1642   bool *maybe_modified = (bool *) data;
1643   *maybe_modified = true;
1644   return true;
1645 }
1646 
1647 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1648    specifies whether EXPR is used in a load, store or as an argument call. BB
1649    must be the basic block in which expr resides.  If CTX specifies call
1650    argument context, CALL_INFO must describe that call and argument position,
1651    otherwise it is ignored.  */
1652 
1653 static void
1654 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1655 		  basic_block bb, scan_call_info *call_info = NULL)
1656 {
1657   poly_int64 poffset, psize, pmax_size;
1658   HOST_WIDE_INT offset, size, max_size;
1659   tree base;
1660   bool deref = false;
1661   bool reverse;
1662 
1663   if (TREE_CODE (expr) == BIT_FIELD_REF
1664       || TREE_CODE (expr) == IMAGPART_EXPR
1665       || TREE_CODE (expr) == REALPART_EXPR)
1666     expr = TREE_OPERAND (expr, 0);
1667 
1668   base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1669 
1670   if (TREE_CODE (base) == MEM_REF)
1671     {
1672       tree op = TREE_OPERAND (base, 0);
1673       if (TREE_CODE (op) != SSA_NAME
1674 	  || !SSA_NAME_IS_DEFAULT_DEF (op))
1675 	return;
1676       base = SSA_NAME_VAR (op);
1677       if (!base)
1678 	return;
1679       deref = true;
1680     }
1681   if (TREE_CODE (base) != PARM_DECL)
1682     return;
1683 
1684   gensum_param_desc *desc = get_gensum_param_desc (base);
1685   if (!desc || !desc->split_candidate)
1686     return;
1687 
1688   if (!poffset.is_constant (&offset)
1689       || !psize.is_constant (&size)
1690       || !pmax_size.is_constant (&max_size))
1691     {
1692       disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1693 				  "access.");
1694       return;
1695     }
1696   if (size < 0 || size != max_size)
1697     {
1698       disqualify_split_candidate (desc, "Encountered a variable sized access.");
1699       return;
1700     }
1701   if (TREE_CODE (expr) == COMPONENT_REF
1702       && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1703     {
1704       disqualify_split_candidate (desc, "Encountered a bit-field access.");
1705       return;
1706     }
1707   if (offset < 0)
1708     {
1709       disqualify_split_candidate (desc, "Encountered an access at a "
1710 				  "negative offset.");
1711       return;
1712     }
1713   gcc_assert ((offset % BITS_PER_UNIT) == 0);
1714   gcc_assert ((size % BITS_PER_UNIT) == 0);
1715   if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1716       || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1717     {
1718       disqualify_split_candidate (desc, "Encountered an access with too big "
1719 				  "offset or size");
1720       return;
1721     }
1722 
1723   tree type = TREE_TYPE (expr);
1724   unsigned int exp_align = get_object_alignment (expr);
1725 
1726   if (exp_align < TYPE_ALIGN (type))
1727     {
1728       disqualify_split_candidate (desc, "Underaligned access.");
1729       return;
1730     }
1731 
1732   if (deref)
1733     {
1734       if (!desc->by_ref)
1735 	{
1736 	  disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1737 	  return;
1738 	}
1739       else if (ctx == ISRA_CTX_STORE)
1740 	{
1741 	  disqualify_split_candidate (desc, "Storing to data passed by "
1742 				      "reference.");
1743 	  return;
1744 	}
1745 
1746       if (!aa_walking_limit)
1747 	{
1748 	  disqualify_split_candidate (desc, "Out of alias analysis step "
1749 				      "limit.");
1750 	  return;
1751 	}
1752 
1753       gcc_checking_assert (gimple_vuse (stmt));
1754       bool maybe_modified = false;
1755       ao_ref ar;
1756 
1757       ao_ref_init (&ar, expr);
1758       bitmap visited = BITMAP_ALLOC (NULL);
1759       int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1760 				       mark_maybe_modified, &maybe_modified,
1761 				       &visited, NULL, aa_walking_limit);
1762       BITMAP_FREE (visited);
1763       if (walked > 0)
1764 	{
1765 	  gcc_assert (aa_walking_limit > walked);
1766 	  aa_walking_limit = aa_walking_limit - walked;
1767 	}
1768       if (walked < 0)
1769 	aa_walking_limit = 0;
1770       if (maybe_modified || walked < 0)
1771 	{
1772 	  disqualify_split_candidate (desc, "Data passed by reference possibly "
1773 				      "modified through an alias.");
1774 	  return;
1775 	}
1776       else
1777 	mark_param_dereference (desc, offset + size, bb);
1778     }
1779   else
1780     /* Pointer parameters with direct uses should have been ruled out by
1781        analyzing SSA default def when looking at the parameters.  */
1782     gcc_assert (!desc->by_ref);
1783 
1784   gensum_param_access *access = get_access (desc, offset, size, ctx);
1785   if (!access)
1786     return;
1787 
1788   if (ctx == ISRA_CTX_ARG)
1789     {
1790       gcc_checking_assert (call_info);
1791 
1792       if (!deref)
1793 	record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1794 				     size / BITS_PER_UNIT);
1795       else
1796 	/* This is not a pass-through of a pointer, this is a use like any
1797 	   other.  */
1798 	access->nonarg = true;
1799     }
1800 
1801   if (!access->type)
1802     {
1803       access->type = type;
1804       access->alias_ptr_type = reference_alias_ptr_type (expr);
1805       access->reverse = reverse;
1806     }
1807   else
1808     {
1809       if (exp_align < TYPE_ALIGN (access->type))
1810 	{
1811 	  disqualify_split_candidate (desc, "Reference has lower alignment "
1812 				      "than a previous one.");
1813 	  return;
1814 	}
1815       if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1816 	{
1817 	  disqualify_split_candidate (desc, "Multiple alias pointer types.");
1818 	  return;
1819 	}
1820       if (access->reverse != reverse)
1821 	{
1822 	  disqualify_split_candidate (desc, "Both normal and reverse "
1823 				      "scalar storage order.");
1824 	  return;
1825 	}
1826       if (!deref
1827 	  && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1828 	  && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1829 	{
1830 	  /* We need the same aggregate type on all accesses to be able to
1831 	     distinguish transformation spots from pass-through arguments in
1832 	     the transformation phase.  */
1833 	  disqualify_split_candidate (desc, "We do not support aggregate "
1834 				      "type punning.");
1835 	  return;
1836 	}
1837 
1838       if (type_prevails_p (access->type, type))
1839 	 access->type = type;
1840     }
1841 }
1842 
1843 /* Scan body function described by NODE and FUN and create access trees for
1844    parameters.  */
1845 
1846 static void
scan_function(cgraph_node * node,struct function * fun)1847 scan_function (cgraph_node *node, struct function *fun)
1848 {
1849   basic_block bb;
1850 
1851   FOR_EACH_BB_FN (bb, fun)
1852     {
1853       gimple_stmt_iterator gsi;
1854       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1855 	{
1856 	  gimple *stmt = gsi_stmt (gsi);
1857 
1858 	  if (stmt_can_throw_external (fun, stmt))
1859 	    bitmap_set_bit (final_bbs, bb->index);
1860 	  switch (gimple_code (stmt))
1861 	    {
1862 	    case GIMPLE_RETURN:
1863 	      {
1864 		tree t = gimple_return_retval (as_a <greturn *> (stmt));
1865 		if (t != NULL_TREE)
1866 		  scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1867 		bitmap_set_bit (final_bbs, bb->index);
1868 	      }
1869 	      break;
1870 
1871 	    case GIMPLE_ASSIGN:
1872 	      if (gimple_assign_single_p (stmt)
1873 		  && !gimple_clobber_p (stmt))
1874 		{
1875 		  tree rhs = gimple_assign_rhs1 (stmt);
1876 		  scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1877 		  tree lhs = gimple_assign_lhs (stmt);
1878 		  scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1879 		}
1880 	      break;
1881 
1882 	    case GIMPLE_CALL:
1883 	      {
1884 		unsigned argument_count = gimple_call_num_args (stmt);
1885 		isra_scan_context ctx = ISRA_CTX_ARG;
1886 		scan_call_info call_info, *call_info_p = &call_info;
1887 		if (gimple_call_internal_p (stmt))
1888 		  {
1889 		    call_info_p = NULL;
1890 		    ctx = ISRA_CTX_LOAD;
1891 		    internal_fn ifn = gimple_call_internal_fn (stmt);
1892 		    if (internal_store_fn_p (ifn))
1893 		      ctx = ISRA_CTX_STORE;
1894 		  }
1895 		else
1896 		  {
1897 		    call_info.cs = node->get_edge (stmt);
1898 		    call_info.argument_count = argument_count;
1899 		  }
1900 
1901 		for (unsigned i = 0; i < argument_count; i++)
1902 		  {
1903 		    call_info.arg_idx = i;
1904 		    scan_expr_access (gimple_call_arg (stmt, i), stmt,
1905 				      ctx, bb, call_info_p);
1906 		  }
1907 
1908 		tree lhs = gimple_call_lhs (stmt);
1909 		if (lhs)
1910 		  scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1911 		int flags = gimple_call_flags (stmt);
1912 		if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1913 		    || (flags & ECF_LOOPING_CONST_OR_PURE))
1914 		  bitmap_set_bit (final_bbs, bb->index);
1915 	      }
1916 	      break;
1917 
1918 	    case GIMPLE_ASM:
1919 	      {
1920 		gasm *asm_stmt = as_a <gasm *> (stmt);
1921 		walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1922 					       asm_visit_addr);
1923 		bitmap_set_bit (final_bbs, bb->index);
1924 
1925 		for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1926 		  {
1927 		    tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1928 		    scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1929 		  }
1930 		for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1931 		  {
1932 		    tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1933 		    scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1934 		  }
1935 	      }
1936 	      break;
1937 
1938 	    default:
1939 	      break;
1940 	    }
1941 	}
1942     }
1943 }
1944 
1945 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1946    return statements, or if results of any operations it is involved in are
1947    only used in return statements.  ANALYZED is a bitmap that tracks which SSA
1948    names we have already started investigating.  */
1949 
1950 static bool
ssa_name_only_returned_p(function * fun,tree name,bitmap analyzed)1951 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1952 {
1953   bool res = true;
1954   imm_use_iterator imm_iter;
1955   gimple *stmt;
1956 
1957   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1958     {
1959       if (is_gimple_debug (stmt))
1960 	continue;
1961 
1962       if (gimple_code (stmt) == GIMPLE_RETURN)
1963 	{
1964 	  tree t = gimple_return_retval (as_a <greturn *> (stmt));
1965 	  if (t != name)
1966 	    {
1967 	      res = false;
1968 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
1969 	    }
1970 	}
1971       else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1972 	       && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1973 		   || gimple_code (stmt) == GIMPLE_PHI))
1974 	{
1975 	  /* TODO: And perhaps for const function calls too?  */
1976 	  tree lhs;
1977 	  if (gimple_code (stmt) == GIMPLE_PHI)
1978 	    lhs = gimple_phi_result (stmt);
1979 	  else
1980 	    lhs = gimple_assign_lhs (stmt);
1981 
1982 	  if (TREE_CODE (lhs) != SSA_NAME)
1983 	    {
1984 	      res = false;
1985 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
1986 	    }
1987 	  gcc_assert (!gimple_vdef (stmt));
1988 	  if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1989 	      && !ssa_name_only_returned_p (fun, lhs, analyzed))
1990 	    {
1991 	      res = false;
1992 	      BREAK_FROM_IMM_USE_STMT (imm_iter);
1993 	    }
1994 	}
1995       else
1996 	{
1997 	  res = false;
1998 	  BREAK_FROM_IMM_USE_STMT (imm_iter);
1999 	}
2000     }
2001   return res;
2002 }
2003 
2004 /* Inspect the uses of the return value of the call associated with CS, and if
2005    it is not used or if it is only used to construct the return value of the
2006    caller, mark it as such in call or caller summary.  Also check for
2007    misaligned arguments.  */
2008 
2009 static void
isra_analyze_call(cgraph_edge * cs)2010 isra_analyze_call (cgraph_edge *cs)
2011 {
2012   gcall *call_stmt = cs->call_stmt;
2013   unsigned count = gimple_call_num_args (call_stmt);
2014   isra_call_summary *csum = call_sums->get_create (cs);
2015 
2016   for (unsigned i = 0; i < count; i++)
2017     {
2018       tree arg = gimple_call_arg (call_stmt, i);
2019       if (is_gimple_reg (arg))
2020 	continue;
2021 
2022       tree offset;
2023       poly_int64 bitsize, bitpos;
2024       machine_mode mode;
2025       int unsignedp, reversep, volatilep = 0;
2026       get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2027 			   &unsignedp, &reversep, &volatilep);
2028       if (!multiple_p (bitpos, BITS_PER_UNIT))
2029 	{
2030 	  csum->m_bit_aligned_arg = true;
2031 	  break;
2032 	}
2033     }
2034 
2035   tree lhs = gimple_call_lhs (call_stmt);
2036   if (lhs)
2037     {
2038       /* TODO: Also detect aggregates on a LHS of a call that are only returned
2039 	 from this function (without being read anywhere).  */
2040       if (TREE_CODE (lhs) == SSA_NAME)
2041 	{
2042 	  bitmap analyzed = BITMAP_ALLOC (NULL);
2043 	  if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2044 					lhs, analyzed))
2045 	    csum->m_return_returned = true;
2046 	  BITMAP_FREE (analyzed);
2047 	}
2048     }
2049   else
2050     csum->m_return_ignored = true;
2051 }
2052 
2053 /* Look at all calls going out of NODE, described also by IFS and perform all
2054    analyses necessary for IPA-SRA that are not done at body scan time or done
2055    even when body is not scanned because the function is not a candidate.  */
2056 
2057 static void
isra_analyze_all_outgoing_calls(cgraph_node * node)2058 isra_analyze_all_outgoing_calls (cgraph_node *node)
2059 {
2060   for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2061     isra_analyze_call (cs);
2062   for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2063     isra_analyze_call (cs);
2064 }
2065 
2066 /* Dump a dereferences table with heading STR to file F.  */
2067 
2068 static void
dump_dereferences_table(FILE * f,struct function * fun,const char * str)2069 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2070 {
2071   basic_block bb;
2072 
2073   fprintf (dump_file, "%s", str);
2074   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2075 		  EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2076     {
2077       fprintf (f, "%4i  %i   ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2078       if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2079 	{
2080 	  int i;
2081 	  for (i = 0; i < by_ref_count; i++)
2082 	    {
2083 	      int idx = bb->index * by_ref_count + i;
2084 	      fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2085 	    }
2086 	}
2087       fprintf (f, "\n");
2088     }
2089   fprintf (dump_file, "\n");
2090 }
2091 
2092 /* Propagate distances in bb_dereferences in the opposite direction than the
2093    control flow edges, in each step storing the maximum of the current value
2094    and the minimum of all successors.  These steps are repeated until the table
2095    stabilizes.  Note that BBs which might terminate the functions (according to
2096    final_bbs bitmap) never updated in this way.  */
2097 
2098 static void
propagate_dereference_distances(struct function * fun)2099 propagate_dereference_distances (struct function *fun)
2100 {
2101   basic_block bb;
2102 
2103   if (dump_file && (dump_flags & TDF_DETAILS))
2104     dump_dereferences_table (dump_file, fun,
2105 			     "Dereference table before propagation:\n");
2106 
2107   auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2108   queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2109   FOR_EACH_BB_FN (bb, fun)
2110     {
2111       queue.quick_push (bb);
2112       bb->aux = bb;
2113     }
2114 
2115   while (!queue.is_empty ())
2116     {
2117       edge_iterator ei;
2118       edge e;
2119       bool change = false;
2120       int i;
2121 
2122       bb = queue.pop ();
2123       bb->aux = NULL;
2124 
2125       if (bitmap_bit_p (final_bbs, bb->index))
2126 	continue;
2127 
2128       for (i = 0; i < by_ref_count; i++)
2129 	{
2130 	  int idx = bb->index * by_ref_count + i;
2131 	  bool first = true;
2132 	  HOST_WIDE_INT inh = 0;
2133 
2134 	  FOR_EACH_EDGE (e, ei, bb->succs)
2135 	  {
2136 	    int succ_idx = e->dest->index * by_ref_count + i;
2137 
2138 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2139 	      continue;
2140 
2141 	    if (first)
2142 	      {
2143 		first = false;
2144 		inh = bb_dereferences [succ_idx];
2145 	      }
2146 	    else if (bb_dereferences [succ_idx] < inh)
2147 	      inh = bb_dereferences [succ_idx];
2148 	  }
2149 
2150 	  if (!first && bb_dereferences[idx] < inh)
2151 	    {
2152 	      bb_dereferences[idx] = inh;
2153 	      change = true;
2154 	    }
2155 	}
2156 
2157       if (change)
2158 	FOR_EACH_EDGE (e, ei, bb->preds)
2159 	  {
2160 	    if (e->src->aux)
2161 	      continue;
2162 
2163 	    e->src->aux = e->src;
2164 	    queue.quick_push (e->src);
2165 	  }
2166     }
2167 
2168   if (dump_file && (dump_flags & TDF_DETAILS))
2169     dump_dereferences_table (dump_file, fun,
2170 			     "Dereference table after propagation:\n");
2171 }
2172 
2173 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2174    children, return true if the parameter cannot be split, otherwise return
2175    true and update *TOTAL_SIZE and *ONLY_CALLS.  ENTRY_BB_INDEX must be the
2176    index of the entry BB in the function of PARM.  */
2177 
2178 static bool
check_gensum_access(tree parm,gensum_param_desc * desc,gensum_param_access * access,HOST_WIDE_INT * nonarg_acc_size,bool * only_calls,int entry_bb_index)2179 check_gensum_access (tree parm, gensum_param_desc *desc,
2180 		     gensum_param_access *access,
2181 		     HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2182 		     int entry_bb_index)
2183 {
2184   if (access->nonarg)
2185     {
2186       *only_calls = false;
2187       *nonarg_acc_size += access->size;
2188 
2189       if (access->first_child)
2190 	{
2191 	  disqualify_split_candidate (desc, "Overlapping non-call uses.");
2192 	  return true;
2193 	}
2194     }
2195   /* Do not decompose a non-BLKmode param in a way that would create
2196      BLKmode params.  Especially for by-reference passing (thus,
2197      pointer-type param) this is hardly worthwhile.  */
2198   if (DECL_MODE (parm) != BLKmode
2199       && TYPE_MODE (access->type) == BLKmode)
2200     {
2201       disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2202       return true;
2203     }
2204 
2205   if (desc->by_ref)
2206     {
2207       int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2208       if ((access->offset + access->size) > bb_dereferences[idx])
2209 	{
2210 	  disqualify_split_candidate (desc, "Would create a possibly "
2211 				      "illegal dereference in a caller.");
2212 	  return true;
2213 	}
2214     }
2215 
2216   for (gensum_param_access *ch = access->first_child;
2217        ch;
2218        ch = ch->next_sibling)
2219     if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2220 			     entry_bb_index))
2221       return true;
2222 
2223   return false;
2224 }
2225 
2226 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2227    descriptor DESC.  */
2228 
2229 static void
copy_accesses_to_ipa_desc(gensum_param_access * from,isra_param_desc * desc)2230 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2231 {
2232   param_access *to = ggc_cleared_alloc<param_access> ();
2233   gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2234   gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2235   to->unit_offset = from->offset / BITS_PER_UNIT;
2236   to->unit_size = from->size / BITS_PER_UNIT;
2237   to->type = from->type;
2238   to->alias_ptr_type = from->alias_ptr_type;
2239   to->certain = from->nonarg;
2240   to->reverse = from->reverse;
2241   vec_safe_push (desc->accesses, to);
2242 
2243   for (gensum_param_access *ch = from->first_child;
2244        ch;
2245        ch = ch->next_sibling)
2246     copy_accesses_to_ipa_desc (ch, desc);
2247 }
2248 
2249 /* Analyze function body scan results stored in param_accesses and
2250    param_accesses, detect possible transformations and store information of
2251    those in function summary.  NODE, FUN and IFS are all various structures
2252    describing the currently analyzed function.  */
2253 
2254 static void
process_scan_results(cgraph_node * node,struct function * fun,isra_func_summary * ifs,vec<gensum_param_desc> * param_descriptions)2255 process_scan_results (cgraph_node *node, struct function *fun,
2256 		      isra_func_summary *ifs,
2257 		      vec<gensum_param_desc> *param_descriptions)
2258 {
2259   bool check_pass_throughs = false;
2260   bool dereferences_propagated = false;
2261   tree parm = DECL_ARGUMENTS (node->decl);
2262   unsigned param_count = param_descriptions->length();
2263 
2264   for (unsigned desc_index = 0;
2265        desc_index < param_count;
2266        desc_index++, parm = DECL_CHAIN (parm))
2267     {
2268       gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2269       if (!desc->split_candidate)
2270 	continue;
2271 
2272       if (flag_checking)
2273 	isra_verify_access_tree (desc->accesses);
2274 
2275       if (!dereferences_propagated
2276 	  && desc->by_ref
2277 	  && desc->accesses)
2278 	{
2279 	  propagate_dereference_distances (fun);
2280 	  dereferences_propagated = true;
2281 	}
2282 
2283       HOST_WIDE_INT nonarg_acc_size = 0;
2284       bool only_calls = true;
2285       bool check_failed = false;
2286 
2287       int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2288       for (gensum_param_access *acc = desc->accesses;
2289 	   acc;
2290 	   acc = acc->next_sibling)
2291 	if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2292 				 entry_bb_index))
2293 	  {
2294 	    check_failed = true;
2295 	    break;
2296 	  }
2297       if (check_failed)
2298 	continue;
2299 
2300       if (only_calls)
2301 	desc->locally_unused = true;
2302 
2303       HOST_WIDE_INT cur_param_size
2304 	= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2305       HOST_WIDE_INT param_size_limit;
2306       if (!desc->by_ref || optimize_function_for_size_p (fun))
2307 	param_size_limit = cur_param_size;
2308       else
2309 	  param_size_limit
2310 	    = opt_for_fn (node->decl,
2311 			  param_ipa_sra_ptr_growth_factor) * cur_param_size;
2312       if (nonarg_acc_size > param_size_limit
2313 	  || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2314 	{
2315 	  disqualify_split_candidate (desc, "Would result into a too big set "
2316 				      "of replacements.");
2317 	}
2318       else
2319 	{
2320 	  /* create_parameter_descriptors makes sure unit sizes of all
2321 	     candidate parameters fit unsigned integers restricted to
2322 	     ISRA_ARG_SIZE_LIMIT.  */
2323 	  desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2324 	  desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2325 	  if (desc->split_candidate && desc->ptr_pt_count)
2326 	    {
2327 	      gcc_assert (desc->by_ref);
2328 	      check_pass_throughs = true;
2329 	    }
2330 	}
2331     }
2332 
2333   /* When a pointer parameter is passed-through to a callee, in which it is
2334      only used to read only one or a few items, we can attempt to transform it
2335      to obtaining and passing through the items instead of the pointer.  But we
2336      must take extra care that 1) we do not introduce any segfault by moving
2337      dereferences above control flow and that 2) the data is not modified
2338      through an alias in this function.  The IPA analysis must not introduce
2339      any accesses candidates unless it can prove both.
2340 
2341      The current solution is very crude as it consists of ensuring that the
2342      call postdominates entry BB and that the definition of VUSE of the call is
2343      default definition.  TODO: For non-recursive callees in the same
2344      compilation unit we could do better by doing analysis in topological order
2345      an looking into access candidates of callees, using their alias_ptr_types
2346      to attempt real AA.  We could also use the maximum known dereferenced
2347      offset in this function at IPA level.
2348 
2349      TODO: Measure the overhead and the effect of just being pessimistic.
2350      Maybe this is only -O3 material?  */
2351 
2352   bool pdoms_calculated = false;
2353   if (check_pass_throughs)
2354     for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2355       {
2356 	gcall *call_stmt = cs->call_stmt;
2357 	tree vuse = gimple_vuse (call_stmt);
2358 
2359 	/* If the callee is a const function, we don't get a VUSE.  In such
2360 	   case there will be no memory accesses in the called function (or the
2361 	   const attribute is wrong) and then we just don't care.  */
2362 	bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2363 
2364 	unsigned count = gimple_call_num_args (call_stmt);
2365 	isra_call_summary *csum = call_sums->get_create (cs);
2366 	csum->init_inputs (count);
2367 	csum->m_before_any_store = uses_memory_as_obtained;
2368 	for (unsigned argidx = 0; argidx < count; argidx++)
2369 	  {
2370 	    if (!csum->m_arg_flow[argidx].pointer_pass_through)
2371 	      continue;
2372 	    unsigned pidx
2373 	      = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2374 	    gensum_param_desc *desc = &(*param_descriptions)[pidx];
2375 	    if (!desc->split_candidate)
2376 	      {
2377 		csum->m_arg_flow[argidx].pointer_pass_through = false;
2378 		continue;
2379 	      }
2380 	    if (!uses_memory_as_obtained)
2381 	      continue;
2382 
2383 	    /* Post-dominator check placed last, hoping that it usually won't
2384 	       be needed.  */
2385 	    if (!pdoms_calculated)
2386 	      {
2387 		gcc_checking_assert (cfun);
2388 		add_noreturn_fake_exit_edges ();
2389 		connect_infinite_loops_to_exit ();
2390 		calculate_dominance_info (CDI_POST_DOMINATORS);
2391 		pdoms_calculated = true;
2392 	      }
2393 	    if (dominated_by_p (CDI_POST_DOMINATORS,
2394 				gimple_bb (call_stmt),
2395 				single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2396 	      csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2397 	  }
2398 
2399       }
2400   if (pdoms_calculated)
2401     {
2402       free_dominance_info (CDI_POST_DOMINATORS);
2403       remove_fake_exit_edges ();
2404     }
2405 
2406   /* TODO: Add early exit if we disqualified everything.  This also requires
2407      that we either relax the restriction that
2408      ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2409      or store the number of parameters to IPA-SRA function summary and use that
2410      when just removing params.  */
2411 
2412   vec_safe_reserve_exact (ifs->m_parameters, param_count);
2413   ifs->m_parameters->quick_grow_cleared (param_count);
2414   for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2415     {
2416       gensum_param_desc *s = &(*param_descriptions)[desc_index];
2417       isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2418 
2419       d->param_size_limit = s->param_size_limit;
2420       d->size_reached = s->nonarg_acc_size;
2421       d->locally_unused = s->locally_unused;
2422       d->split_candidate = s->split_candidate;
2423       d->by_ref = s->by_ref;
2424 
2425       for (gensum_param_access *acc = s->accesses;
2426 	   acc;
2427 	   acc = acc->next_sibling)
2428 	copy_accesses_to_ipa_desc (acc, d);
2429     }
2430 
2431   if (dump_file)
2432     dump_isra_param_descriptors (dump_file, node->decl, ifs);
2433 }
2434 
2435 /* Return true if there are any overlaps among certain accesses of DESC.  If
2436    non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2437    too.  DESC is assumed to be a split candidate that is not locally
2438    unused.  */
2439 
2440 static bool
overlapping_certain_accesses_p(isra_param_desc * desc,bool * certain_access_present_p)2441 overlapping_certain_accesses_p (isra_param_desc *desc,
2442 				bool *certain_access_present_p)
2443 {
2444   unsigned pclen = vec_safe_length (desc->accesses);
2445   for (unsigned i = 0; i < pclen; i++)
2446     {
2447       param_access *a1 = (*desc->accesses)[i];
2448 
2449       if (!a1->certain)
2450 	continue;
2451       if (certain_access_present_p)
2452 	*certain_access_present_p = true;
2453       for (unsigned j = i + 1; j < pclen; j++)
2454 	{
2455 	  param_access *a2 = (*desc->accesses)[j];
2456 	  if (a2->certain
2457 	      && a1->unit_offset < a2->unit_offset + a2->unit_size
2458 	      && a1->unit_offset + a1->unit_size > a2->unit_offset)
2459 	    return true;
2460 	}
2461     }
2462   return false;
2463 }
2464 
2465 /* Check for any overlaps of certain param accesses among splitting candidates
2466    and signal an ICE if there are any.  If CERTAIN_MUST_EXIST is set, also
2467    check that used splitting candidates have at least one certain access.  */
2468 
2469 static void
verify_splitting_accesses(cgraph_node * node,bool certain_must_exist)2470 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2471 {
2472   isra_func_summary *ifs = func_sums->get (node);
2473   if (!ifs || !ifs->m_candidate)
2474     return;
2475   unsigned param_count = vec_safe_length (ifs->m_parameters);
2476   for (unsigned pidx = 0; pidx < param_count; pidx++)
2477     {
2478       isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2479       if (!desc->split_candidate || desc->locally_unused)
2480 	continue;
2481 
2482       bool certain_access_present = !certain_must_exist;
2483       if (overlapping_certain_accesses_p (desc, &certain_access_present))
2484 	internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2485 			"which overlap", node->dump_name (), pidx);
2486       if (!certain_access_present)
2487 	internal_error ("Function %s, parameter %u, is used but does not "
2488 			"have any certain IPA-SRA access",
2489 			node->dump_name (), pidx);
2490     }
2491 }
2492 
2493 /* Intraprocedural part of IPA-SRA analysis.  Scan function body of NODE and
2494    create a summary structure describing IPA-SRA opportunities and constraints
2495    in it.  */
2496 
2497 static void
ipa_sra_summarize_function(cgraph_node * node)2498 ipa_sra_summarize_function (cgraph_node *node)
2499 {
2500   if (dump_file)
2501     fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
2502 	     node->order);
2503   if (!ipa_sra_preliminary_function_checks (node))
2504     return;
2505   gcc_obstack_init (&gensum_obstack);
2506   isra_func_summary *ifs = func_sums->get_create (node);
2507   ifs->m_candidate = true;
2508   tree ret = TREE_TYPE (TREE_TYPE (node->decl));
2509   ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
2510 
2511   decl2desc = new hash_map<tree, gensum_param_desc *>;
2512   unsigned count = 0;
2513   for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
2514     count++;
2515 
2516   if (count > 0)
2517     {
2518       auto_vec<gensum_param_desc, 16> param_descriptions (count);
2519       param_descriptions.reserve_exact (count);
2520       param_descriptions.quick_grow_cleared (count);
2521 
2522       bool cfun_pushed = false;
2523       struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
2524       if (create_parameter_descriptors (node, &param_descriptions))
2525 	{
2526 	  push_cfun (fun);
2527 	  cfun_pushed = true;
2528 	  final_bbs = BITMAP_ALLOC (NULL);
2529 	  bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
2530 				      by_ref_count
2531 				      * last_basic_block_for_fn (fun));
2532 	  aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2533 	  scan_function (node, fun);
2534 
2535 	  if (dump_file)
2536 	    {
2537 	      dump_gensum_param_descriptors (dump_file, node->decl,
2538 					     &param_descriptions);
2539 	      fprintf (dump_file, "----------------------------------------\n");
2540 	    }
2541 	}
2542       process_scan_results (node, fun, ifs, &param_descriptions);
2543 
2544       if (cfun_pushed)
2545 	pop_cfun ();
2546       if (bb_dereferences)
2547 	{
2548 	  free (bb_dereferences);
2549 	  bb_dereferences = NULL;
2550 	  BITMAP_FREE (final_bbs);
2551 	  final_bbs = NULL;
2552 	}
2553     }
2554   isra_analyze_all_outgoing_calls (node);
2555 
2556   delete decl2desc;
2557   decl2desc = NULL;
2558   obstack_free (&gensum_obstack, NULL);
2559   if (dump_file)
2560     fprintf (dump_file, "\n\n");
2561   if (flag_checking)
2562     verify_splitting_accesses (node, false);
2563   return;
2564 }
2565 
2566 /* Intraprocedural part of IPA-SRA analysis.  Scan bodies of all functions in
2567    this compilation unit and create summary structures describing IPA-SRA
2568    opportunities and constraints in them.  */
2569 
2570 static void
ipa_sra_generate_summary(void)2571 ipa_sra_generate_summary (void)
2572 {
2573   struct cgraph_node *node;
2574 
2575   gcc_checking_assert (!func_sums);
2576   gcc_checking_assert (!call_sums);
2577   func_sums
2578     = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2579        ipa_sra_function_summaries (symtab, true));
2580   call_sums = new ipa_sra_call_summaries (symtab);
2581 
2582   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2583     ipa_sra_summarize_function (node);
2584   return;
2585 }
2586 
2587 /* Write intraprocedural analysis information about E and all of its outgoing
2588    edges into a stream for LTO WPA.  */
2589 
2590 static void
isra_write_edge_summary(output_block * ob,cgraph_edge * e)2591 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2592 {
2593   isra_call_summary *csum = call_sums->get (e);
2594   unsigned input_count = csum->m_arg_flow.length ();
2595   streamer_write_uhwi (ob, input_count);
2596   for (unsigned i = 0; i < input_count; i++)
2597     {
2598       isra_param_flow *ipf = &csum->m_arg_flow[i];
2599       streamer_write_hwi (ob, ipf->length);
2600       bitpack_d bp = bitpack_create (ob->main_stream);
2601       for (int j = 0; j < ipf->length; j++)
2602 	bp_pack_value (&bp, ipf->inputs[j], 8);
2603       bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2604       bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2605       bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2606       streamer_write_bitpack (&bp);
2607       streamer_write_uhwi (ob, ipf->unit_offset);
2608       streamer_write_uhwi (ob, ipf->unit_size);
2609     }
2610   bitpack_d bp = bitpack_create (ob->main_stream);
2611   bp_pack_value (&bp, csum->m_return_ignored, 1);
2612   bp_pack_value (&bp, csum->m_return_returned, 1);
2613   bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2614   bp_pack_value (&bp, csum->m_before_any_store, 1);
2615   streamer_write_bitpack (&bp);
2616 }
2617 
2618 /* Write intraprocedural analysis information about NODE and all of its outgoing
2619    edges into a stream for LTO WPA.  */
2620 
2621 static void
isra_write_node_summary(output_block * ob,cgraph_node * node)2622 isra_write_node_summary (output_block *ob, cgraph_node *node)
2623 {
2624   isra_func_summary *ifs = func_sums->get (node);
2625   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2626   int node_ref = lto_symtab_encoder_encode (encoder, node);
2627   streamer_write_uhwi (ob, node_ref);
2628 
2629   unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2630   streamer_write_uhwi (ob, param_desc_count);
2631   for (unsigned i = 0; i < param_desc_count; i++)
2632     {
2633       isra_param_desc *desc = &(*ifs->m_parameters)[i];
2634       unsigned access_count = vec_safe_length (desc->accesses);
2635       streamer_write_uhwi (ob, access_count);
2636       for (unsigned j = 0; j < access_count; j++)
2637 	{
2638 	  param_access *acc = (*desc->accesses)[j];
2639 	  stream_write_tree (ob, acc->type, true);
2640 	  stream_write_tree (ob, acc->alias_ptr_type, true);
2641 	  streamer_write_uhwi (ob, acc->unit_offset);
2642 	  streamer_write_uhwi (ob, acc->unit_size);
2643 	  bitpack_d bp = bitpack_create (ob->main_stream);
2644 	  bp_pack_value (&bp, acc->certain, 1);
2645 	  bp_pack_value (&bp, acc->reverse, 1);
2646 	  streamer_write_bitpack (&bp);
2647 	}
2648       streamer_write_uhwi (ob, desc->param_size_limit);
2649       streamer_write_uhwi (ob, desc->size_reached);
2650       bitpack_d bp = bitpack_create (ob->main_stream);
2651       bp_pack_value (&bp, desc->locally_unused, 1);
2652       bp_pack_value (&bp, desc->split_candidate, 1);
2653       bp_pack_value (&bp, desc->by_ref, 1);
2654       streamer_write_bitpack (&bp);
2655     }
2656   bitpack_d bp = bitpack_create (ob->main_stream);
2657   bp_pack_value (&bp, ifs->m_candidate, 1);
2658   bp_pack_value (&bp, ifs->m_returns_value, 1);
2659   bp_pack_value (&bp, ifs->m_return_ignored, 1);
2660   gcc_assert (!ifs->m_queued);
2661   streamer_write_bitpack (&bp);
2662 
2663   for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2664     isra_write_edge_summary (ob, e);
2665   for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2666     isra_write_edge_summary (ob, e);
2667 }
2668 
2669 /* Write intraprocedural analysis information into a stream for LTO WPA.  */
2670 
2671 static void
ipa_sra_write_summary(void)2672 ipa_sra_write_summary (void)
2673 {
2674   if (!func_sums || !call_sums)
2675     return;
2676 
2677   struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2678   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2679   ob->symbol = NULL;
2680 
2681   unsigned int count = 0;
2682   lto_symtab_encoder_iterator lsei;
2683   for (lsei = lsei_start_function_in_partition (encoder);
2684        !lsei_end_p (lsei);
2685        lsei_next_function_in_partition (&lsei))
2686     {
2687       cgraph_node *node = lsei_cgraph_node (lsei);
2688       if (node->has_gimple_body_p ()
2689 	  && func_sums->get (node) != NULL)
2690 	count++;
2691     }
2692   streamer_write_uhwi (ob, count);
2693 
2694   /* Process all of the functions.  */
2695   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2696        lsei_next_function_in_partition (&lsei))
2697     {
2698       cgraph_node *node = lsei_cgraph_node (lsei);
2699       if (node->has_gimple_body_p ()
2700 	  && func_sums->get (node) != NULL)
2701         isra_write_node_summary (ob, node);
2702     }
2703   streamer_write_char_stream (ob->main_stream, 0);
2704   produce_asm (ob, NULL);
2705   destroy_output_block (ob);
2706 }
2707 
2708 /* Read intraprocedural analysis information about E and all of its outgoing
2709    edges into a stream for LTO WPA.  */
2710 
2711 static void
isra_read_edge_summary(struct lto_input_block * ib,cgraph_edge * cs)2712 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2713 {
2714   isra_call_summary *csum = call_sums->get_create (cs);
2715   unsigned input_count = streamer_read_uhwi (ib);
2716   csum->init_inputs (input_count);
2717   for (unsigned i = 0; i < input_count; i++)
2718     {
2719       isra_param_flow *ipf = &csum->m_arg_flow[i];
2720       ipf->length = streamer_read_hwi (ib);
2721       bitpack_d bp = streamer_read_bitpack (ib);
2722       for (int j = 0; j < ipf->length; j++)
2723 	ipf->inputs[j] = bp_unpack_value (&bp, 8);
2724       ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2725       ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2726       ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2727       ipf->unit_offset = streamer_read_uhwi (ib);
2728       ipf->unit_size = streamer_read_uhwi (ib);
2729     }
2730   bitpack_d bp = streamer_read_bitpack (ib);
2731   csum->m_return_ignored = bp_unpack_value (&bp, 1);
2732   csum->m_return_returned = bp_unpack_value (&bp, 1);
2733   csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2734   csum->m_before_any_store = bp_unpack_value (&bp, 1);
2735 }
2736 
2737 /* Read intraprocedural analysis information about NODE and all of its outgoing
2738    edges into a stream for LTO WPA.  */
2739 
2740 static void
isra_read_node_info(struct lto_input_block * ib,cgraph_node * node,struct data_in * data_in)2741 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2742 		     struct data_in *data_in)
2743 {
2744   isra_func_summary *ifs = func_sums->get_create (node);
2745   unsigned param_desc_count = streamer_read_uhwi (ib);
2746   if (param_desc_count > 0)
2747     {
2748       vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2749       ifs->m_parameters->quick_grow_cleared (param_desc_count);
2750     }
2751   for (unsigned i = 0; i < param_desc_count; i++)
2752     {
2753       isra_param_desc *desc = &(*ifs->m_parameters)[i];
2754       unsigned access_count = streamer_read_uhwi (ib);
2755       for (unsigned j = 0; j < access_count; j++)
2756 	{
2757 	  param_access *acc = ggc_cleared_alloc<param_access> ();
2758 	  acc->type = stream_read_tree (ib, data_in);
2759 	  acc->alias_ptr_type = stream_read_tree (ib, data_in);
2760 	  acc->unit_offset = streamer_read_uhwi (ib);
2761 	  acc->unit_size = streamer_read_uhwi (ib);
2762 	  bitpack_d bp = streamer_read_bitpack (ib);
2763 	  acc->certain = bp_unpack_value (&bp, 1);
2764 	  acc->reverse = bp_unpack_value (&bp, 1);
2765 	  vec_safe_push (desc->accesses, acc);
2766 	}
2767       desc->param_size_limit = streamer_read_uhwi (ib);
2768       desc->size_reached = streamer_read_uhwi (ib);
2769       bitpack_d bp = streamer_read_bitpack (ib);
2770       desc->locally_unused = bp_unpack_value (&bp, 1);
2771       desc->split_candidate = bp_unpack_value (&bp, 1);
2772       desc->by_ref = bp_unpack_value (&bp, 1);
2773     }
2774   bitpack_d bp = streamer_read_bitpack (ib);
2775   ifs->m_candidate = bp_unpack_value (&bp, 1);
2776   ifs->m_returns_value = bp_unpack_value (&bp, 1);
2777   ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2778   ifs->m_queued = 0;
2779 
2780   for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2781     isra_read_edge_summary (ib, e);
2782   for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2783     isra_read_edge_summary (ib, e);
2784 }
2785 
2786 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2787    data DATA.  TODO: This function was copied almost verbatim from ipa-prop.c,
2788    it should be possible to unify them somehow.  */
2789 
2790 static void
isra_read_summary_section(struct lto_file_decl_data * file_data,const char * data,size_t len)2791 isra_read_summary_section (struct lto_file_decl_data *file_data,
2792 			   const char *data, size_t len)
2793 {
2794   const struct lto_function_header *header =
2795     (const struct lto_function_header *) data;
2796   const int cfg_offset = sizeof (struct lto_function_header);
2797   const int main_offset = cfg_offset + header->cfg_size;
2798   const int string_offset = main_offset + header->main_size;
2799   struct data_in *data_in;
2800   unsigned int i;
2801   unsigned int count;
2802 
2803   lto_input_block ib_main ((const char *) data + main_offset,
2804 			   header->main_size, file_data->mode_table);
2805 
2806   data_in =
2807     lto_data_in_create (file_data, (const char *) data + string_offset,
2808 			header->string_size, vNULL);
2809   count = streamer_read_uhwi (&ib_main);
2810 
2811   for (i = 0; i < count; i++)
2812     {
2813       unsigned int index;
2814       struct cgraph_node *node;
2815       lto_symtab_encoder_t encoder;
2816 
2817       index = streamer_read_uhwi (&ib_main);
2818       encoder = file_data->symtab_node_encoder;
2819       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2820 								index));
2821       gcc_assert (node->definition);
2822       isra_read_node_info (&ib_main, node, data_in);
2823     }
2824   lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2825 			 len);
2826   lto_data_in_delete (data_in);
2827 }
2828 
2829 /* Read intraprocedural analysis information into a stream for LTO WPA.  */
2830 
2831 static void
ipa_sra_read_summary(void)2832 ipa_sra_read_summary (void)
2833 {
2834   struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2835   struct lto_file_decl_data *file_data;
2836   unsigned int j = 0;
2837 
2838   gcc_checking_assert (!func_sums);
2839   gcc_checking_assert (!call_sums);
2840   func_sums
2841     = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2842        ipa_sra_function_summaries (symtab, true));
2843   call_sums = new ipa_sra_call_summaries (symtab);
2844 
2845   while ((file_data = file_data_vec[j++]))
2846     {
2847       size_t len;
2848       const char *data
2849 	= lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2850       if (data)
2851         isra_read_summary_section (file_data, data, len);
2852     }
2853 }
2854 
2855 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F.  */
2856 
2857 static void
ipa_sra_dump_all_summaries(FILE * f)2858 ipa_sra_dump_all_summaries (FILE *f)
2859 {
2860   cgraph_node *node;
2861   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2862     {
2863       fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2864 
2865       isra_func_summary *ifs = func_sums->get (node);
2866       if (!ifs)
2867 	{
2868 	  fprintf (f, "  Function does not have any associated IPA-SRA "
2869 		   "summary\n");
2870 	  continue;
2871 	}
2872       if (!ifs->m_candidate)
2873 	{
2874 	  fprintf (f, "  Not a candidate function\n");
2875 	  continue;
2876 	}
2877       if (ifs->m_returns_value)
2878 	  fprintf (f, "  Returns value\n");
2879       if (vec_safe_is_empty (ifs->m_parameters))
2880 	fprintf (f, "  No parameter information. \n");
2881       else
2882 	for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2883 	  {
2884 	    fprintf (f, "  Descriptor for parameter %i:\n", i);
2885 	    dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2886 	  }
2887       fprintf (f, "\n");
2888 
2889       struct cgraph_edge *cs;
2890       for (cs = node->callees; cs; cs = cs->next_callee)
2891 	{
2892 	  fprintf (f, "  Summary for edge %s->%s:\n", cs->caller->dump_name (),
2893 		   cs->callee->dump_name ());
2894 	  isra_call_summary *csum = call_sums->get (cs);
2895 	  if (csum)
2896 	    csum->dump (f);
2897 	  else
2898 	    fprintf (f, "    Call summary is MISSING!\n");
2899 	}
2900 
2901     }
2902   fprintf (f, "\n\n");
2903 }
2904 
2905 /* Perform function-scope viability tests that can be only made at IPA level
2906    and return false if the function is deemed unsuitable for IPA-SRA.  */
2907 
2908 static bool
ipa_sra_ipa_function_checks(cgraph_node * node)2909 ipa_sra_ipa_function_checks (cgraph_node *node)
2910 {
2911   if (!node->can_be_local_p ())
2912     {
2913       if (dump_file)
2914 	fprintf (dump_file, "Function %s disqualified because it cannot be "
2915 		 "made local.\n", node->dump_name ());
2916       return false;
2917     }
2918   if (!node->can_change_signature)
2919     {
2920       if (dump_file)
2921 	fprintf (dump_file, "Function can not change signature.\n");
2922       return false;
2923     }
2924 
2925   return true;
2926 }
2927 
2928 /* Issues found out by check_callers_for_issues.  */
2929 
2930 struct caller_issues
2931 {
2932   /* The candidate being considered.  */
2933   cgraph_node *candidate;
2934   /* There is a thunk among callers.  */
2935   bool thunk;
2936   /* Call site with no available information.  */
2937   bool unknown_callsite;
2938   /* Call from outside the the candidate's comdat group.  */
2939   bool call_from_outside_comdat;
2940   /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2941   bool bit_aligned_aggregate_argument;
2942 };
2943 
2944 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2945    that apply.  */
2946 
2947 static bool
check_for_caller_issues(struct cgraph_node * node,void * data)2948 check_for_caller_issues (struct cgraph_node *node, void *data)
2949 {
2950   struct caller_issues *issues = (struct caller_issues *) data;
2951 
2952   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2953     {
2954       if (cs->caller->thunk.thunk_p)
2955 	{
2956 	  issues->thunk = true;
2957 	  /* TODO: We should be able to process at least some types of
2958 	     thunks.  */
2959 	  return true;
2960 	}
2961       if (issues->candidate->calls_comdat_local
2962 	  && issues->candidate->same_comdat_group
2963 	  && !issues->candidate->in_same_comdat_group_p (cs->caller))
2964 	{
2965 	  issues->call_from_outside_comdat = true;
2966 	  return true;
2967 	}
2968 
2969       isra_call_summary *csum = call_sums->get (cs);
2970       if (!csum)
2971 	{
2972 	  issues->unknown_callsite = true;
2973 	  return true;
2974 	}
2975 
2976       if (csum->m_bit_aligned_arg)
2977 	issues->bit_aligned_aggregate_argument = true;
2978     }
2979   return false;
2980 }
2981 
2982 /* Look at all incoming edges to NODE, including aliases and thunks and look
2983    for problems.  Return true if NODE type should not be modified at all.  */
2984 
2985 static bool
check_all_callers_for_issues(cgraph_node * node)2986 check_all_callers_for_issues (cgraph_node *node)
2987 {
2988   struct caller_issues issues;
2989   memset (&issues, 0, sizeof (issues));
2990   issues.candidate = node;
2991 
2992   node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2993   if (issues.unknown_callsite)
2994     {
2995       if (dump_file && (dump_flags & TDF_DETAILS))
2996 	fprintf (dump_file, "A call of %s has not been analyzed.  Disabling "
2997 		 "all modifications.\n", node->dump_name ());
2998       return true;
2999     }
3000   /* TODO: We should be able to process at least some types of thunks.  */
3001   if (issues.thunk)
3002     {
3003       if (dump_file && (dump_flags & TDF_DETAILS))
3004 	fprintf (dump_file, "A call of %s is through thunk, which are not"
3005 		 " handled yet.  Disabling all modifications.\n",
3006 		 node->dump_name ());
3007       return true;
3008     }
3009   if (issues.call_from_outside_comdat)
3010     {
3011       if (dump_file)
3012 	fprintf (dump_file, "Function would become private comdat called "
3013 		 "outside of its comdat group.\n");
3014       return true;
3015     }
3016 
3017   if (issues.bit_aligned_aggregate_argument)
3018     {
3019       /* Let's only remove parameters/return values from such functions.
3020 	 TODO: We could only prevent splitting the problematic parameters if
3021 	 anybody thinks it is worth it.  */
3022       if (dump_file && (dump_flags & TDF_DETAILS))
3023 	fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3024 		 " disabling parameter splitting.\n", node->dump_name ());
3025 
3026       isra_func_summary *ifs = func_sums->get (node);
3027       gcc_checking_assert (ifs);
3028       unsigned param_count = vec_safe_length (ifs->m_parameters);
3029       for (unsigned i = 0; i < param_count; i++)
3030 	(*ifs->m_parameters)[i].split_candidate = false;
3031     }
3032   return false;
3033 }
3034 
3035 /* Find the access with corresponding OFFSET and SIZE among accesses in
3036    PARAM_DESC and return it or NULL if such an access is not there.  */
3037 
3038 static param_access *
find_param_access(isra_param_desc * param_desc,unsigned offset,unsigned size)3039 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3040 {
3041   unsigned pclen = vec_safe_length (param_desc->accesses);
3042 
3043   /* The search is linear but the number of stored accesses is bound by
3044      PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8.  */
3045 
3046   for (unsigned i = 0; i < pclen; i++)
3047     if ((*param_desc->accesses)[i]->unit_offset == offset
3048 	&& (*param_desc->accesses)[i]->unit_size == size)
3049       return (*param_desc->accesses)[i];
3050 
3051   return NULL;
3052 }
3053 
3054 /* Return iff the total size of definite replacement SIZE would violate the
3055    limit set for it in PARAM.  */
3056 
3057 static bool
size_would_violate_limit_p(isra_param_desc * desc,unsigned size)3058 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3059 {
3060   unsigned limit = desc->param_size_limit;
3061   if (size > limit
3062       || (!desc->by_ref && size == limit))
3063     return true;
3064   return false;
3065 }
3066 
3067 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3068    the set limit.  IDX is the parameter number which is dumped when
3069    disqualifying.  */
3070 
3071 static void
bump_reached_size(isra_param_desc * desc,unsigned size,unsigned idx)3072 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3073 {
3074   unsigned after = desc->size_reached + size;
3075   if (size_would_violate_limit_p (desc, after))
3076     {
3077       if (dump_file && (dump_flags & TDF_DETAILS))
3078 	fprintf (dump_file, "    ...size limit reached, disqualifying "
3079 		 "candidate parameter %u\n", idx);
3080       desc->split_candidate = false;
3081       return;
3082     }
3083   desc->size_reached = after;
3084 }
3085 
3086 /* Take all actions required to deal with an edge CS that represents a call to
3087    an unknown or un-analyzed function, for both parameter removal and
3088    splitting.  */
3089 
3090 static void
process_edge_to_unknown_caller(cgraph_edge * cs)3091 process_edge_to_unknown_caller (cgraph_edge *cs)
3092 {
3093   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3094   gcc_checking_assert (from_ifs);
3095   isra_call_summary *csum = call_sums->get (cs);
3096 
3097   if (dump_file && (dump_flags & TDF_DETAILS))
3098     fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3099 	     cs->caller->dump_name ());
3100 
3101   unsigned args_count = csum->m_arg_flow.length ();
3102   for (unsigned i = 0; i < args_count; i++)
3103     {
3104       isra_param_flow *ipf = &csum->m_arg_flow[i];
3105 
3106      if (ipf->pointer_pass_through)
3107        {
3108          isra_param_desc *param_desc
3109            = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3110          param_desc->locally_unused = false;
3111          param_desc->split_candidate = false;
3112         continue;
3113        }
3114       if (ipf->aggregate_pass_through)
3115 	{
3116 	  unsigned idx = get_single_param_flow_source (ipf);
3117 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3118 
3119 	  param_desc->locally_unused = false;
3120 	  if (!param_desc->split_candidate)
3121 	    continue;
3122 	  gcc_assert (!param_desc->by_ref);
3123 	  param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3124 						  ipf->unit_size);
3125 	  gcc_checking_assert (pacc);
3126 	  pacc->certain = true;
3127 	  if (overlapping_certain_accesses_p (param_desc, NULL))
3128 	    {
3129 	      if (dump_file && (dump_flags & TDF_DETAILS))
3130 		fprintf (dump_file, "    ...leading to overlap, "
3131 			 " disqualifying candidate parameter %u\n",
3132 			 idx);
3133 	      param_desc->split_candidate = false;
3134 	    }
3135 	  else
3136 	    bump_reached_size (param_desc, pacc->unit_size, idx);
3137 	  ipf->aggregate_pass_through = false;
3138 	  continue;
3139 	}
3140 
3141       for (int j = 0; j < ipf->length; j++)
3142 	{
3143 	  int input_idx = ipf->inputs[j];
3144 	  (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3145 	}
3146     }
3147 }
3148 
3149 /* Propagate parameter removal information through cross-SCC edge CS,
3150    i.e. decrease the use count in the caller parameter descriptor for each use
3151    in this call.  */
3152 
3153 static void
param_removal_cross_scc_edge(cgraph_edge * cs)3154 param_removal_cross_scc_edge (cgraph_edge *cs)
3155 {
3156   enum availability availability;
3157   cgraph_node *callee = cs->callee->function_symbol (&availability);
3158   isra_func_summary *to_ifs = func_sums->get (callee);
3159   if (!to_ifs || !to_ifs->m_candidate
3160       || (availability < AVAIL_AVAILABLE)
3161       || vec_safe_is_empty (to_ifs->m_parameters))
3162     {
3163       process_edge_to_unknown_caller (cs);
3164       return;
3165     }
3166   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3167   gcc_checking_assert (from_ifs);
3168 
3169   isra_call_summary *csum = call_sums->get (cs);
3170   unsigned args_count = csum->m_arg_flow.length ();
3171   unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3172 
3173   for (unsigned i = 0; i < args_count; i++)
3174     {
3175       bool unused_in_callee;
3176       if (i < param_count)
3177 	unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3178       else
3179 	unused_in_callee = false;
3180 
3181       if (!unused_in_callee)
3182 	{
3183 	  isra_param_flow *ipf = &csum->m_arg_flow[i];
3184 	  for (int j = 0; j < ipf->length; j++)
3185 	    {
3186 	      int input_idx = ipf->inputs[j];
3187 	      (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3188 	    }
3189 	}
3190     }
3191 }
3192 
3193 /* Unless it is already there, push NODE which is also described by IFS to
3194    STACK.  */
3195 
3196 static void
isra_push_node_to_stack(cgraph_node * node,isra_func_summary * ifs,vec<cgraph_node * > * stack)3197 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3198 		    vec<cgraph_node *> *stack)
3199 {
3200   if (!ifs->m_queued)
3201     {
3202       ifs->m_queued = true;
3203       stack->safe_push (node);
3204     }
3205 }
3206 
3207 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3208    used and push CALLER on STACK.  */
3209 
3210 static void
isra_mark_caller_param_used(isra_func_summary * from_ifs,int input_idx,cgraph_node * caller,vec<cgraph_node * > * stack)3211 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3212 			     cgraph_node *caller, vec<cgraph_node *> *stack)
3213 {
3214   if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3215     {
3216       (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3217       isra_push_node_to_stack (caller, from_ifs, stack);
3218     }
3219 }
3220 
3221 
3222 /* Propagate information that any parameter is not used only locally within a
3223    SCC across CS to the caller, which must be in the same SCC as the
3224    callee.  Push any callers that need to be re-processed to STACK.  */
3225 
3226 static void
propagate_used_across_scc_edge(cgraph_edge * cs,vec<cgraph_node * > * stack)3227 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3228 {
3229   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3230   if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3231     return;
3232 
3233   isra_call_summary *csum = call_sums->get (cs);
3234   gcc_checking_assert (csum);
3235   unsigned args_count = csum->m_arg_flow.length ();
3236   enum availability availability;
3237   cgraph_node *callee = cs->callee->function_symbol (&availability);
3238   isra_func_summary *to_ifs = func_sums->get (callee);
3239 
3240   unsigned param_count
3241     = (to_ifs && (availability >= AVAIL_AVAILABLE))
3242     ? vec_safe_length (to_ifs->m_parameters) : 0;
3243   for (unsigned i = 0; i < args_count; i++)
3244     {
3245       if (i < param_count
3246 	  && (*to_ifs->m_parameters)[i].locally_unused)
3247 	    continue;
3248 
3249       /* The argument is needed in the callee it, we must mark the parameter as
3250 	 used also in the caller and its callers within this SCC.  */
3251       isra_param_flow *ipf = &csum->m_arg_flow[i];
3252       for (int j = 0; j < ipf->length; j++)
3253 	{
3254 	  int input_idx = ipf->inputs[j];
3255 	  isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3256 	}
3257     }
3258 }
3259 
3260 /* Propagate information that any parameter is not used only locally within a
3261    SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3262    same SCC.  Push any callers that need to be re-processed to STACK.  */
3263 
3264 static bool
propagate_used_to_scc_callers(cgraph_node * node,void * data)3265 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3266 {
3267   vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3268   cgraph_edge *cs;
3269   for (cs = node->callers; cs; cs = cs->next_caller)
3270     if (ipa_edge_within_scc (cs))
3271       propagate_used_across_scc_edge (cs, stack);
3272   return false;
3273 }
3274 
3275 /* Return true iff all certain accesses in ARG_DESC are also present as
3276    certain accesses in PARAM_DESC.  */
3277 
3278 static bool
all_callee_accesses_present_p(isra_param_desc * param_desc,isra_param_desc * arg_desc)3279 all_callee_accesses_present_p (isra_param_desc *param_desc,
3280 			       isra_param_desc *arg_desc)
3281 {
3282   unsigned aclen = vec_safe_length (arg_desc->accesses);
3283   for (unsigned j = 0; j < aclen; j++)
3284     {
3285       param_access *argacc = (*arg_desc->accesses)[j];
3286       if (!argacc->certain)
3287 	continue;
3288       param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3289 					      argacc->unit_size);
3290       if (!pacc
3291 	  || !pacc->certain
3292 	  || !types_compatible_p (argacc->type, pacc->type))
3293 	return false;
3294     }
3295   return true;
3296 }
3297 
3298 /* Type internal to function pull_accesses_from_callee.  Unfortunately gcc 4.8
3299    does not allow instantiating an auto_vec with a type defined within a
3300    function so it is a global type.   */
3301 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3302 
3303 
3304 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3305    (which belongs to CALLER) if they would not violate some constraint there.
3306    If successful, return NULL, otherwise return the string reason for failure
3307    (which can be written to the dump file).  DELTA_OFFSET is the known offset
3308    of the actual argument withing the formal parameter (so of ARG_DESCS within
3309    PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3310    known. In case of success, set *CHANGE_P to true if propagation actually
3311    changed anything.  */
3312 
3313 static const char *
pull_accesses_from_callee(cgraph_node * caller,isra_param_desc * param_desc,isra_param_desc * arg_desc,unsigned delta_offset,unsigned arg_size,bool * change_p)3314 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3315 			   isra_param_desc *arg_desc,
3316 			   unsigned delta_offset, unsigned arg_size,
3317 			   bool *change_p)
3318 {
3319   unsigned pclen = vec_safe_length (param_desc->accesses);
3320   unsigned aclen = vec_safe_length (arg_desc->accesses);
3321   unsigned prop_count = 0;
3322   unsigned prop_size = 0;
3323   bool change = false;
3324 
3325   auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3326   for (unsigned j = 0; j < aclen; j++)
3327     {
3328       param_access *argacc = (*arg_desc->accesses)[j];
3329       prop_kinds.safe_push (ACC_PROP_DONT);
3330 
3331       if (arg_size > 0
3332 	  && argacc->unit_offset + argacc->unit_size > arg_size)
3333 	return "callee access outsize size boundary";
3334 
3335       if (!argacc->certain)
3336 	continue;
3337 
3338       unsigned offset = argacc->unit_offset + delta_offset;
3339       /* Given that accesses are initially stored according to increasing
3340 	 offset and decreasing size in case of equal offsets, the following
3341 	 searches could be written more efficiently if we kept the ordering
3342 	 when copying. But the number of accesses is capped at
3343 	 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3344 	 messy quickly, so let's improve on that only if necessary.  */
3345 
3346       bool exact_match = false;
3347       for (unsigned i = 0; i < pclen; i++)
3348 	{
3349 	  /* Check for overlaps.  */
3350 	  param_access *pacc = (*param_desc->accesses)[i];
3351 	  if (pacc->unit_offset == offset
3352 	      && pacc->unit_size == argacc->unit_size)
3353 	    {
3354 	      if (argacc->alias_ptr_type != pacc->alias_ptr_type
3355 		  || !types_compatible_p (argacc->type, pacc->type)
3356 		  || argacc->reverse != pacc->reverse)
3357 		return "propagated access types would not match existing ones";
3358 
3359 	      exact_match = true;
3360 	      if (!pacc->certain)
3361 		{
3362 		  prop_kinds[j] = ACC_PROP_CERTAIN;
3363 		  prop_size += argacc->unit_size;
3364 		  change = true;
3365 		}
3366 	      continue;
3367 	    }
3368 
3369 	  if (offset < pacc->unit_offset + pacc->unit_size
3370 	      && offset + argacc->unit_size > pacc->unit_offset)
3371 	    {
3372 	      /* None permissible with load accesses, possible to fit into
3373 		 argument ones.  */
3374 	      if (pacc->certain
3375 		  || offset < pacc->unit_offset
3376 		  || (offset + argacc->unit_size
3377 		      > pacc->unit_offset + pacc->unit_size))
3378 		return "a propagated access would conflict in caller";
3379 	    }
3380 	}
3381 
3382       if (!exact_match)
3383 	{
3384 	  prop_kinds[j] = ACC_PROP_COPY;
3385 	  prop_count++;
3386 	  prop_size += argacc->unit_size;
3387 	  change = true;
3388 	}
3389     }
3390 
3391     if (!change)
3392       return NULL;
3393 
3394     if ((prop_count + pclen
3395 	 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3396 	|| size_would_violate_limit_p (param_desc,
3397 				       param_desc->size_reached + prop_size))
3398       return "propagating accesses would violate the count or size limit";
3399 
3400   *change_p = true;
3401   for (unsigned j = 0; j < aclen; j++)
3402     {
3403       if (prop_kinds[j] == ACC_PROP_COPY)
3404 	{
3405 	  param_access *argacc = (*arg_desc->accesses)[j];
3406 
3407 	  param_access *copy = ggc_cleared_alloc<param_access> ();
3408 	  copy->unit_offset = argacc->unit_offset + delta_offset;
3409 	  copy->unit_size = argacc->unit_size;
3410 	  copy->type = argacc->type;
3411 	  copy->alias_ptr_type = argacc->alias_ptr_type;
3412 	  copy->certain = true;
3413 	  copy->reverse = argacc->reverse;
3414 	  vec_safe_push (param_desc->accesses, copy);
3415 	}
3416       else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3417 	{
3418 	  param_access *argacc = (*arg_desc->accesses)[j];
3419 	  param_access *csp
3420 	    = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3421 				 argacc->unit_size);
3422 	  csp->certain = true;
3423 	}
3424     }
3425 
3426   param_desc->size_reached += prop_size;
3427 
3428   return NULL;
3429 }
3430 
3431 /* Propagate parameter splitting information through call graph edge CS.
3432    Return true if any changes that might need to be propagated within SCCs have
3433    been made.  The function also clears the aggregate_pass_through and
3434    pointer_pass_through in call summaries which do not need to be processed
3435    again if this CS is revisited when iterating while changes are propagated
3436    within an SCC.  */
3437 
3438 static bool
param_splitting_across_edge(cgraph_edge * cs)3439 param_splitting_across_edge (cgraph_edge *cs)
3440 {
3441   bool res = false;
3442   bool cross_scc = !ipa_edge_within_scc (cs);
3443   enum availability availability;
3444   cgraph_node *callee = cs->callee->function_symbol (&availability);
3445   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3446   gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3447 
3448   isra_call_summary *csum = call_sums->get (cs);
3449   gcc_checking_assert (csum);
3450   unsigned args_count = csum->m_arg_flow.length ();
3451   isra_func_summary *to_ifs = func_sums->get (callee);
3452   unsigned param_count
3453     = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3454        ? vec_safe_length (to_ifs->m_parameters)
3455        : 0);
3456 
3457   if (dump_file && (dump_flags & TDF_DETAILS))
3458     fprintf (dump_file, "Splitting across %s->%s:\n",
3459 	     cs->caller->dump_name (), callee->dump_name ());
3460 
3461   unsigned i;
3462   for (i = 0; (i < args_count) && (i < param_count); i++)
3463     {
3464       isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3465       isra_param_flow *ipf = &csum->m_arg_flow[i];
3466 
3467       if (arg_desc->locally_unused)
3468 	{
3469 	  if (dump_file && (dump_flags & TDF_DETAILS))
3470 	    fprintf (dump_file, "    ->%u: unused in callee\n", i);
3471 	  ipf->pointer_pass_through = false;
3472 	  continue;
3473 	}
3474 
3475       if (ipf->pointer_pass_through)
3476 	{
3477 	  int idx = get_single_param_flow_source (ipf);
3478 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3479 	  if (!param_desc->split_candidate)
3480 	    continue;
3481 	  gcc_assert (param_desc->by_ref);
3482 
3483 	  if (!arg_desc->split_candidate || !arg_desc->by_ref)
3484 	    {
3485 	      if (dump_file && (dump_flags & TDF_DETAILS))
3486 		fprintf (dump_file, "  %u->%u: not candidate or not by "
3487 			 "reference in callee\n", idx, i);
3488 	      param_desc->split_candidate = false;
3489 	      ipf->pointer_pass_through = false;
3490 	      res = true;
3491 	    }
3492 	  else if (!ipf->safe_to_import_accesses)
3493 	    {
3494 	      if (!csum->m_before_any_store
3495 		  || !all_callee_accesses_present_p (param_desc, arg_desc))
3496 		{
3497 		  if (dump_file && (dump_flags & TDF_DETAILS))
3498 		    fprintf (dump_file, "  %u->%u: cannot import accesses.\n",
3499 			     idx, i);
3500 		  param_desc->split_candidate = false;
3501 		  ipf->pointer_pass_through = false;
3502 		  res = true;
3503 
3504 		}
3505 	      else
3506 		{
3507 		  if (dump_file && (dump_flags & TDF_DETAILS))
3508 		    fprintf (dump_file, "  %u->%u: verified callee accesses "
3509 			     "present.\n", idx, i);
3510 		  if (cross_scc)
3511 		    ipf->pointer_pass_through = false;
3512 		}
3513 	    }
3514 	  else
3515 	    {
3516 	      const char *pull_failure
3517 		= pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3518 					     0, 0, &res);
3519 	      if (pull_failure)
3520 		{
3521 		  if (dump_file && (dump_flags & TDF_DETAILS))
3522 		    fprintf (dump_file, "  %u->%u: by_ref access pull "
3523 			     "failed: %s.\n", idx, i, pull_failure);
3524 		  param_desc->split_candidate = false;
3525 		  ipf->pointer_pass_through = false;
3526 		  res = true;
3527 		}
3528 	      else
3529 		{
3530 		  if (dump_file && (dump_flags & TDF_DETAILS))
3531 		    fprintf (dump_file, "  %u->%u: by_ref access pull "
3532 			     "succeeded.\n", idx, i);
3533 		  if (cross_scc)
3534 		    ipf->pointer_pass_through = false;
3535 		}
3536 	    }
3537 	}
3538       else if (ipf->aggregate_pass_through)
3539 	{
3540 	  int idx = get_single_param_flow_source (ipf);
3541 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3542 	  if (!param_desc->split_candidate)
3543 	    continue;
3544 	  gcc_assert (!param_desc->by_ref);
3545 	  param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3546 						  ipf->unit_size);
3547 	  gcc_checking_assert (pacc);
3548 
3549 	  if (pacc->certain)
3550 	    {
3551 	      if (dump_file && (dump_flags & TDF_DETAILS))
3552 		fprintf (dump_file, "  %u->%u: already certain\n", idx, i);
3553 	      ipf->aggregate_pass_through = false;
3554 	    }
3555 	  else if (!arg_desc->split_candidate || arg_desc->by_ref)
3556 	    {
3557 	      if (dump_file && (dump_flags & TDF_DETAILS))
3558 		fprintf (dump_file, "  %u->%u: not candidate or by "
3559 			 "reference in callee\n", idx, i);
3560 
3561 	      pacc->certain = true;
3562 	      if (overlapping_certain_accesses_p (param_desc, NULL))
3563 		{
3564 		  if (dump_file && (dump_flags & TDF_DETAILS))
3565 		    fprintf (dump_file, "    ...leading to overlap, "
3566 			     " disqualifying candidate parameter %u\n",
3567 			     idx);
3568 		  param_desc->split_candidate = false;
3569 		}
3570 	      else
3571 		bump_reached_size (param_desc, pacc->unit_size, idx);
3572 
3573 	      ipf->aggregate_pass_through = false;
3574 	      res = true;
3575 	    }
3576 	  else
3577 	    {
3578 	      const char *pull_failure
3579 		= pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3580 					     ipf->unit_offset,
3581 					     ipf->unit_size, &res);
3582 	      if (pull_failure)
3583 		{
3584 		  if (dump_file && (dump_flags & TDF_DETAILS))
3585 		    fprintf (dump_file, "  %u->%u: arg access pull "
3586 			     "failed: %s.\n", idx, i, pull_failure);
3587 
3588 		  ipf->aggregate_pass_through = false;
3589 		  pacc->certain = true;
3590 
3591 		  if (overlapping_certain_accesses_p (param_desc, NULL))
3592 		    {
3593 		      if (dump_file && (dump_flags & TDF_DETAILS))
3594 			fprintf (dump_file, "    ...leading to overlap, "
3595 				 " disqualifying candidate parameter %u\n",
3596 				 idx);
3597 		      param_desc->split_candidate = false;
3598 		    }
3599 		  else
3600 		    bump_reached_size (param_desc, pacc->unit_size, idx);
3601 
3602 		  res = true;
3603 		}
3604 	      else
3605 		{
3606 		  if (dump_file && (dump_flags & TDF_DETAILS))
3607 		    fprintf (dump_file, "  %u->%u: arg access pull "
3608 			     "succeeded.\n", idx, i);
3609 		  if (cross_scc)
3610 		    ipf->aggregate_pass_through = false;
3611 		}
3612 	    }
3613 	}
3614     }
3615 
3616   /* Handle argument-parameter count mismatches. */
3617   for (; (i < args_count); i++)
3618     {
3619       isra_param_flow *ipf = &csum->m_arg_flow[i];
3620 
3621       if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3622 	{
3623 	  int idx = get_single_param_flow_source (ipf);
3624 	  ipf->pointer_pass_through = false;
3625 	  ipf->aggregate_pass_through = false;
3626 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3627 	  if (!param_desc->split_candidate)
3628 	    continue;
3629 
3630 	  if (dump_file && (dump_flags & TDF_DETAILS))
3631 	    fprintf (dump_file, "  %u->%u: no corresponding formal parameter\n",
3632 		     idx, i);
3633 	  param_desc->split_candidate = false;
3634 	  res = true;
3635 	}
3636     }
3637   return res;
3638 }
3639 
3640 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3641    callers ignore the return value, or come from the same SCC and use the
3642    return value only to compute their return value, return false, otherwise
3643    return true.  */
3644 
3645 static bool
retval_used_p(cgraph_node * node,void *)3646 retval_used_p (cgraph_node *node, void *)
3647 {
3648   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3649     {
3650       isra_call_summary *csum = call_sums->get (cs);
3651       gcc_checking_assert (csum);
3652       if (csum->m_return_ignored)
3653 	continue;
3654       if (!csum->m_return_returned)
3655 	return true;
3656 
3657       isra_func_summary *from_ifs = func_sums->get (cs->caller);
3658       if (!from_ifs || !from_ifs->m_candidate)
3659 	return true;
3660 
3661       if (!ipa_edge_within_scc (cs)
3662 	  && !from_ifs->m_return_ignored)
3663 	    return true;
3664     }
3665 
3666   return false;
3667 }
3668 
3669 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3670    modify parameter which originally had index BASE_INDEX, in the adjustment
3671    vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3672    PREV_ADJUSTMENT.  If the parent clone is the original function,
3673    PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX.  */
3674 
3675 static void
push_param_adjustments_for_index(isra_func_summary * ifs,unsigned base_index,unsigned prev_clone_index,ipa_adjusted_param * prev_adjustment,vec<ipa_adjusted_param,va_gc> ** new_params)3676 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3677 				  unsigned prev_clone_index,
3678 				  ipa_adjusted_param *prev_adjustment,
3679 				  vec<ipa_adjusted_param, va_gc> **new_params)
3680 {
3681   isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3682   if (desc->locally_unused)
3683     {
3684       if (dump_file)
3685 	fprintf (dump_file, "  Will remove parameter %u\n", base_index);
3686       return;
3687     }
3688 
3689   if (!desc->split_candidate)
3690     {
3691       ipa_adjusted_param adj;
3692       if (prev_adjustment)
3693 	{
3694 	  adj = *prev_adjustment;
3695 	  adj.prev_clone_adjustment = true;
3696 	  adj.prev_clone_index = prev_clone_index;
3697 	}
3698       else
3699 	{
3700 	  memset (&adj, 0, sizeof (adj));
3701 	  adj.op = IPA_PARAM_OP_COPY;
3702 	  adj.base_index = base_index;
3703 	  adj.prev_clone_index = prev_clone_index;
3704 	}
3705       vec_safe_push ((*new_params), adj);
3706       return;
3707     }
3708 
3709   if (dump_file)
3710     fprintf (dump_file, "  Will split parameter %u\n", base_index);
3711 
3712   gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3713   unsigned aclen = vec_safe_length (desc->accesses);
3714   for (unsigned j = 0; j < aclen; j++)
3715     {
3716       param_access *pa = (*desc->accesses)[j];
3717       if (!pa->certain)
3718 	continue;
3719       if (dump_file)
3720 	fprintf (dump_file, "    - component at byte offset %u, "
3721 		 "size %u\n", pa->unit_offset, pa->unit_size);
3722 
3723       ipa_adjusted_param adj;
3724       memset (&adj, 0, sizeof (adj));
3725       adj.op = IPA_PARAM_OP_SPLIT;
3726       adj.base_index = base_index;
3727       adj.prev_clone_index = prev_clone_index;
3728       adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3729       adj.reverse = pa->reverse;
3730       adj.type = pa->type;
3731       adj.alias_ptr_type = pa->alias_ptr_type;
3732       adj.unit_offset = pa->unit_offset;
3733       vec_safe_push ((*new_params), adj);
3734     }
3735 }
3736 
3737 /* Worker for all call_for_symbol_thunks_and_aliases.  Set calls_comdat_local
3738    flag of all callers of NODE.  */
3739 
3740 static bool
mark_callers_calls_comdat_local(struct cgraph_node * node,void *)3741 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3742 {
3743   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3744     cs->caller->calls_comdat_local = true;
3745   return false;
3746 }
3747 
3748 
3749 /* Do final processing of results of IPA propagation regarding NODE, clone it
3750    if appropriate.  */
3751 
3752 static void
process_isra_node_results(cgraph_node * node,hash_map<const char *,unsigned> * clone_num_suffixes)3753 process_isra_node_results (cgraph_node *node,
3754 			   hash_map<const char *, unsigned> *clone_num_suffixes)
3755 {
3756   isra_func_summary *ifs = func_sums->get (node);
3757   if (!ifs || !ifs->m_candidate)
3758     return;
3759 
3760   auto_vec<bool, 16> surviving_params;
3761   bool check_surviving = false;
3762   if (node->clone.param_adjustments)
3763     {
3764       check_surviving = true;
3765       node->clone.param_adjustments->get_surviving_params (&surviving_params);
3766     }
3767 
3768   unsigned param_count = vec_safe_length (ifs->m_parameters);
3769   bool will_change_function = false;
3770   if (ifs->m_returns_value && ifs->m_return_ignored)
3771     will_change_function = true;
3772   else
3773     for (unsigned i = 0; i < param_count; i++)
3774       {
3775 	isra_param_desc *desc = &(*ifs->m_parameters)[i];
3776 	if ((desc->locally_unused || desc->split_candidate)
3777 	    /* Make sure we do not clone just to attempt to remove an already
3778 	       removed unused argument.  */
3779 	    && (!check_surviving
3780 		|| (i < surviving_params.length ()
3781 		    && surviving_params[i])))
3782 	  {
3783 	    will_change_function = true;
3784 	    break;
3785 	  }
3786       }
3787   if (!will_change_function)
3788     return;
3789 
3790   if (dump_file)
3791     {
3792       fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3793 	       node->dump_name ());
3794       if (ifs->m_returns_value && ifs->m_return_ignored)
3795 	fprintf (dump_file, "  Will remove return value.\n");
3796     }
3797 
3798   vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3799   if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3800     {
3801       unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3802       for (unsigned i = 0; i < old_adj_len; i++)
3803 	{
3804 	  ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3805 	  push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3806 					    old_adj, &new_params);
3807 	}
3808     }
3809   else
3810     for (unsigned i = 0; i < param_count; i++)
3811       push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3812 
3813   ipa_param_adjustments *new_adjustments
3814     = (new (ggc_alloc <ipa_param_adjustments> ())
3815        ipa_param_adjustments (new_params, param_count,
3816 			      ifs->m_returns_value && ifs->m_return_ignored));
3817 
3818   if (dump_file && (dump_flags & TDF_DETAILS))
3819     {
3820       fprintf (dump_file, "\n  Created adjustments:\n");
3821       new_adjustments->dump (dump_file);
3822     }
3823 
3824   unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3825 			       IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3826 				 node->decl)));
3827   vec<cgraph_edge *> callers = node->collect_callers ();
3828   cgraph_node *new_node
3829     = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3830 				  suffix_counter);
3831   suffix_counter++;
3832   if (node->calls_comdat_local && node->same_comdat_group)
3833     {
3834       new_node->add_to_same_comdat_group (node);
3835       new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3836 					     NULL, true);
3837     }
3838   new_node->calls_comdat_local = node->calls_comdat_local;
3839 
3840   if (dump_file)
3841     fprintf (dump_file, "  Created new node %s\n", new_node->dump_name ());
3842   callers.release ();
3843 }
3844 
3845 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3846    and disable transformations for those which have not or which should not
3847    transformed because the associated debug counter reached its limit.  Return
3848    true if none survived or if there were no candidates to begin with.  */
3849 
3850 static bool
disable_unavailable_parameters(cgraph_node * node,isra_func_summary * ifs)3851 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3852 {
3853   bool ret = true;
3854   unsigned len = vec_safe_length (ifs->m_parameters);
3855   if (!len)
3856     return true;
3857 
3858   auto_vec<bool, 16> surviving_params;
3859   bool check_surviving = false;
3860   if (node->clone.param_adjustments)
3861     {
3862       check_surviving = true;
3863       node->clone.param_adjustments->get_surviving_params (&surviving_params);
3864     }
3865   bool dumped_first = false;
3866   for (unsigned i = 0; i < len; i++)
3867     {
3868       isra_param_desc *desc = &(*ifs->m_parameters)[i];
3869       if (!dbg_cnt (ipa_sra_params))
3870 	{
3871 	  desc->locally_unused = false;
3872 	  desc->split_candidate = false;
3873 	}
3874       else if (check_surviving
3875 	       && (i >= surviving_params.length ()
3876 		   || !surviving_params[i]))
3877 	{
3878 	  /* Even if the parameter was removed by a previous IPA pass, we do
3879 	     not clear locally_unused because if it really is unused, this
3880 	     information might be useful in callers.  */
3881 	  desc->split_candidate = false;
3882 
3883 	  if (dump_file && (dump_flags & TDF_DETAILS))
3884 	    {
3885 	      if (!dumped_first)
3886 		{
3887 		  fprintf (dump_file,
3888 			   "The following parameters of %s are dead on "
3889 			   "arrival:", node->dump_name ());
3890 		  dumped_first = true;
3891 		}
3892 	      fprintf (dump_file, " %u", i);
3893 	    }
3894 	}
3895       else if (desc->locally_unused || desc->split_candidate)
3896 	ret = false;
3897     }
3898 
3899   if (dumped_first)
3900     fprintf (dump_file, "\n");
3901 
3902   return ret;
3903 }
3904 
3905 
3906 /* Run the interprocedural part of IPA-SRA. */
3907 
3908 static unsigned int
ipa_sra_analysis(void)3909 ipa_sra_analysis (void)
3910 {
3911   if (dump_file)
3912     {
3913       fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3914       ipa_sra_dump_all_summaries (dump_file);
3915     }
3916 
3917   gcc_checking_assert (func_sums);
3918   gcc_checking_assert (call_sums);
3919   cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3920   auto_vec <cgraph_node *, 16> stack;
3921   int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3922 
3923   /* One sweep from callees to callers for parameter removal and splitting.  */
3924   for (int i = 0; i < node_scc_count; i++)
3925     {
3926       cgraph_node *scc_rep = order[i];
3927       vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3928       unsigned j;
3929 
3930       /* Preliminary IPA function level checks and first step of parameter
3931 	 removal.  */
3932       cgraph_node *v;
3933       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3934 	{
3935 	  isra_func_summary *ifs = func_sums->get (v);
3936 	  if (!ifs || !ifs->m_candidate)
3937 	    continue;
3938 	  if (!ipa_sra_ipa_function_checks (v)
3939 	      || check_all_callers_for_issues (v))
3940 	    {
3941 	      ifs->zap ();
3942 	      continue;
3943 	    }
3944 	  if (disable_unavailable_parameters (v, ifs))
3945 	    continue;
3946 	  for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3947 	    process_edge_to_unknown_caller (cs);
3948 	  for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3949 	    if (!ipa_edge_within_scc (cs))
3950 	      param_removal_cross_scc_edge (cs);
3951 	}
3952 
3953       /* Look at edges within the current SCC and propagate used-ness across
3954 	 them, pushing onto the stack all notes which might need to be
3955 	 revisited.  */
3956       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3957 	v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3958 					       &stack, true);
3959 
3960       /* Keep revisiting and pushing until nothing changes.  */
3961       while (!stack.is_empty ())
3962 	{
3963 	  cgraph_node *v = stack.pop ();
3964 	  isra_func_summary *ifs = func_sums->get (v);
3965 	  gcc_checking_assert (ifs && ifs->m_queued);
3966 	  ifs->m_queued = false;
3967 
3968 	  v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3969 						 &stack, true);
3970 	}
3971 
3972       /* Parameter splitting.  */
3973       bool repeat_scc_access_propagation;
3974       do
3975 	{
3976 	  repeat_scc_access_propagation = false;
3977 	  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3978 	    {
3979 	      isra_func_summary *ifs = func_sums->get (v);
3980 	      if (!ifs
3981 		  || !ifs->m_candidate
3982 		  || vec_safe_is_empty (ifs->m_parameters))
3983 		continue;
3984 	      for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3985 		if (param_splitting_across_edge (cs))
3986 		  repeat_scc_access_propagation = true;
3987 	    }
3988 	}
3989       while (repeat_scc_access_propagation);
3990 
3991       if (flag_checking)
3992 	FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3993 	  verify_splitting_accesses (v, true);
3994 
3995       cycle_nodes.release ();
3996     }
3997 
3998   /* One sweep from caller to callees for result removal.  */
3999   for (int i = node_scc_count - 1; i >= 0 ; i--)
4000     {
4001       cgraph_node *scc_rep = order[i];
4002       vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4003       unsigned j;
4004 
4005       cgraph_node *v;
4006       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4007 	{
4008 	  isra_func_summary *ifs = func_sums->get (v);
4009 	  if (!ifs || !ifs->m_candidate)
4010 	    continue;
4011 
4012 	  bool return_needed
4013 	    = (ifs->m_returns_value
4014 	       && (!dbg_cnt (ipa_sra_retvalues)
4015 		   || v->call_for_symbol_and_aliases (retval_used_p,
4016 						      NULL, true)));
4017 	  ifs->m_return_ignored = !return_needed;
4018 	  if (return_needed)
4019 	    isra_push_node_to_stack (v, ifs, &stack);
4020 	}
4021 
4022       while (!stack.is_empty ())
4023 	{
4024 	  cgraph_node *node = stack.pop ();
4025 	  isra_func_summary *ifs = func_sums->get (node);
4026 	  gcc_checking_assert (ifs && ifs->m_queued);
4027 	  ifs->m_queued = false;
4028 
4029 	  for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4030 	    if (ipa_edge_within_scc (cs)
4031 		&& call_sums->get (cs)->m_return_returned)
4032 	      {
4033 		enum availability av;
4034 		cgraph_node *callee = cs->callee->function_symbol (&av);
4035 		isra_func_summary *to_ifs = func_sums->get (callee);
4036 		if (to_ifs && to_ifs->m_return_ignored)
4037 		  {
4038 		    to_ifs->m_return_ignored = false;
4039 		    isra_push_node_to_stack (callee, to_ifs, &stack);
4040 		  }
4041 	      }
4042 	}
4043       cycle_nodes.release ();
4044     }
4045 
4046   ipa_free_postorder_info ();
4047   free (order);
4048 
4049   if (dump_file)
4050     {
4051       if (dump_flags & TDF_DETAILS)
4052 	{
4053 	  fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4054 		   " ==========\n");
4055 	  ipa_sra_dump_all_summaries (dump_file);
4056 	}
4057       fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4058     }
4059 
4060   hash_map<const char *, unsigned> *clone_num_suffixes
4061     = new hash_map<const char *, unsigned>;
4062 
4063   cgraph_node *node;
4064   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4065     process_isra_node_results (node, clone_num_suffixes);
4066 
4067   delete clone_num_suffixes;
4068   ggc_delete (func_sums);
4069   func_sums = NULL;
4070   delete call_sums;
4071   call_sums = NULL;
4072 
4073   if (dump_file)
4074     fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4075 	     "==========\n\n");
4076   return 0;
4077 }
4078 
4079 
4080 const pass_data pass_data_ipa_sra =
4081 {
4082   IPA_PASS, /* type */
4083   "sra", /* name */
4084   OPTGROUP_NONE, /* optinfo_flags */
4085   TV_IPA_SRA, /* tv_id */
4086   0, /* properties_required */
4087   0, /* properties_provided */
4088   0, /* properties_destroyed */
4089   0, /* todo_flags_start */
4090   ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4091 };
4092 
4093 class pass_ipa_sra : public ipa_opt_pass_d
4094 {
4095 public:
pass_ipa_sra(gcc::context * ctxt)4096   pass_ipa_sra (gcc::context *ctxt)
4097     : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4098 		      ipa_sra_generate_summary, /* generate_summary */
4099 		      ipa_sra_write_summary, /* write_summary */
4100 		      ipa_sra_read_summary, /* read_summary */
4101 		      NULL , /* write_optimization_summary */
4102 		      NULL, /* read_optimization_summary */
4103 		      NULL, /* stmt_fixup */
4104 		      0, /* function_transform_todo_flags_start */
4105 		      NULL, /* function_transform */
4106 		      NULL) /* variable_transform */
4107   {}
4108 
4109   /* opt_pass methods: */
gate(function *)4110   virtual bool gate (function *)
4111     {
4112       /* TODO: We should remove the optimize check after we ensure we never run
4113 	 IPA passes when not optimizing.  */
4114       return (flag_ipa_sra && optimize);
4115     }
4116 
execute(function *)4117   virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4118 
4119 }; // class pass_ipa_sra
4120 
4121 } // anon namespace
4122 
4123 ipa_opt_pass_d *
make_pass_ipa_sra(gcc::context * ctxt)4124 make_pass_ipa_sra (gcc::context *ctxt)
4125 {
4126   return new pass_ipa_sra (ctxt);
4127 }
4128 
4129 
4130 #include "gt-ipa-sra.h"
4131