xref: /netbsd-src/external/gpl3/gcc/dist/gcc/ipa-sra.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Interprocedural scalar replacement of aggregates
2    Copyright (C) 2019-2022 Free Software Foundation, Inc.
3    Contributed by Martin Jambor <mjambor@suse.cz>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22    values (turning functions returning a value which is never used into void
23    functions) and removes unused function parameters.  It can also replace an
24    aggregate parameter by a set of other parameters representing part of the
25    original, turning those passed by reference into new ones which pass the
26    value directly.
27 
28    The pass is a true IPA one, which means that it works in three stages in
29    order to be able to take advantage of LTO.  First, summaries about functions
30    and each calls are generated.  Function summaries (often called call graph
31    node summaries) contain mainly information about which parameters are
32    potential transformation candidates and which bits of candidates are
33    accessed.  We differentiate between accesses done as a part of a call
34    statement (which might be not necessary if the callee is also transformed)
35    and others (which are mandatory).  Call summaries (often called call graph
36    edge summaries) contain information about which function formal parameters
37    feed into which actual call arguments so that if two parameters are only
38    used in a sum which is then passed to another function which then however
39    does not use this parameter, all three parameters of the two functions can
40    be eliminated.  Edge summaries also have flags whether the return value is
41    used or if it is only returned in the caller too.  In LTO mode these
42    summaries are then streamed to the object file in the compilation phase and
43    streamed back in in the WPA analysis stage.
44 
45    The interprocedural analysis phase traverses the graph in topological order
46    in two sweeps, one in each direction.  First, from callees to callers for
47    parameter removal and splitting.  Each strongly-connected component is
48    processed iteratively until the situation in it stabilizes.  The pass from
49    callers to callees is then carried out to remove unused return values in a
50    very similar fashion.
51 
52    Because parameter manipulation has big implications for call redirection
53    which is done only after all call graph nodes materialize, the
54    transformation phase is not part of this patch but is carried out by the
55    clone materialization and edge redirection itself, see comments in
56    ipa-param-manipulation.h for more details.  */
57 
58 
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
86 #include "attribs.h"
87 
88 static void ipa_sra_summarize_function (cgraph_node *);
89 
90 /* Bits used to track size of an aggregate in bytes interprocedurally.  */
91 #define ISRA_ARG_SIZE_LIMIT_BITS 16
92 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
93 /* How many parameters can feed into a call actual argument and still be
94    tracked.  */
95 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
96 
97 /* Structure describing accesses to a specific portion of an aggregate
98    parameter, as given by the offset and size.  Any smaller accesses that occur
99    within a function that fall within another access form a tree.  The pass
100    cannot analyze parameters with only partially overlapping accesses.  */
101 
102 struct GTY(()) param_access
103 {
104   /* Type that a potential replacement should have.  This field only has
105      meaning in the summary building and transformation phases, when it is
106      reconstructed from the body.  Must not be touched in IPA analysis
107      stage.  */
108   tree type;
109 
110   /* Alias reference type to be used in MEM_REFs when adjusting caller
111      arguments.  */
112   tree alias_ptr_type;
113 
114   /* Values returned by get_ref_base_and_extent but converted to bytes and
115      stored as unsigned ints.  */
116   unsigned unit_offset;
117   unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
118 
119   /* Set once we are sure that the access will really end up in a potentially
120      transformed function - initially not set for portions of formal parameters
121      that are only used as actual function arguments passed to callees.  */
122   unsigned certain : 1;
123   /* Set if the access has reverse scalar storage order.  */
124   unsigned reverse : 1;
125 };
126 
127 /* This structure has the same purpose as the one above and additionally it
128    contains some fields that are only necessary in the summary generation
129    phase.  */
130 
131 struct gensum_param_access
132 {
133   /* Values returned by get_ref_base_and_extent.  */
134   HOST_WIDE_INT offset;
135   HOST_WIDE_INT size;
136 
137   /* if this access has any children (in terms of the definition above), this
138      points to the first one.  */
139   struct gensum_param_access *first_child;
140   /* In intraprocedural SRA, pointer to the next sibling in the access tree as
141      described above.  */
142   struct gensum_param_access *next_sibling;
143 
144   /* Type that a potential replacement should have.  This field only has
145      meaning in the summary building and transformation phases, when it is
146      reconstructed from the body.  Must not be touched in IPA analysis
147      stage.  */
148   tree type;
149   /* Alias reference type to be used in MEM_REFs when adjusting caller
150      arguments.  */
151   tree alias_ptr_type;
152 
153   /* Have there been writes to or reads from this exact location except for as
154      arguments to a function call that can be tracked.  */
155   bool nonarg;
156 
157   /* Set if the access has reverse scalar storage order.  */
158   bool reverse;
159 };
160 
161 /* Summary describing a parameter in the IPA stages.  */
162 
163 struct GTY(()) isra_param_desc
164 {
165   /* List of access representatives to the parameters, sorted according to
166      their offset.  */
167   vec <param_access *, va_gc> *accesses;
168 
169   /* Unit size limit of total size of all replacements.  */
170   unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
171   /* Sum of unit sizes of all certain replacements.  */
172   unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
173 
174   /* A parameter that is used only in call arguments and can be removed if all
175      concerned actual arguments are removed.  */
176   unsigned locally_unused : 1;
177   /* An aggregate that is a candidate for breaking up or complete removal.  */
178   unsigned split_candidate : 1;
179   /* Is this a parameter passing stuff by reference?  */
180   unsigned by_ref : 1;
181 };
182 
183 /* Structure used when generating summaries that describes a parameter.  */
184 
185 struct gensum_param_desc
186 {
187   /* Roots of param_accesses.  */
188   gensum_param_access *accesses;
189   /* Number of accesses in the access tree rooted in field accesses.  */
190   unsigned access_count;
191 
192   /* If the below is non-zero, this is the number of uses as actual arguments.  */
193   int call_uses;
194   /* Number of times this parameter has been directly passed to.  */
195   unsigned ptr_pt_count;
196 
197   /* Size limit of total size of all replacements.  */
198   unsigned param_size_limit;
199   /* Sum of sizes of nonarg accesses.  */
200   unsigned nonarg_acc_size;
201 
202   /* A parameter that is used only in call arguments and can be removed if all
203      concerned actual arguments are removed.  */
204   bool locally_unused;
205   /* An aggregate that is a candidate for breaking up or a pointer passing data
206      by reference that is a candidate for being converted to a set of
207      parameters passing those data by value.  */
208   bool split_candidate;
209   /* Is this a parameter passing stuff by reference?  */
210   bool by_ref;
211 
212   /* The number of this parameter as they are ordered in function decl.  */
213   int param_number;
214   /* For parameters passing data by reference, this is parameter index to
215      compute indices to bb_dereferences.  */
216   int deref_index;
217 };
218 
219 /* Properly deallocate accesses of DESC.  TODO: Since this data structure is
220    allocated in GC memory, this is not necessary and we can consider removing
221    the function.  */
222 
223 static void
free_param_decl_accesses(isra_param_desc * desc)224 free_param_decl_accesses (isra_param_desc *desc)
225 {
226   unsigned len = vec_safe_length (desc->accesses);
227   for (unsigned i = 0; i < len; ++i)
228     ggc_free ((*desc->accesses)[i]);
229   vec_free (desc->accesses);
230 }
231 
232 /* Class used to convey information about functions from the
233    intra-procedural analysis stage to inter-procedural one.  */
234 
235 class GTY((for_user)) isra_func_summary
236 {
237 public:
238   /* initialize the object.  */
239 
isra_func_summary()240   isra_func_summary ()
241     : m_parameters (NULL), m_candidate (false), m_returns_value (false),
242     m_return_ignored (false), m_queued (false)
243   {}
244 
245   /* Destroy m_parameters.  */
246 
247   ~isra_func_summary ();
248 
249   /* Mark the function as not a candidate for any IPA-SRA transformation.
250      Return true if it was a candidate until now.  */
251 
252   bool zap ();
253 
254   /* Vector of parameter descriptors corresponding to the function being
255      analyzed.  */
256   vec<isra_param_desc, va_gc> *m_parameters;
257 
258   /* Whether the node is even a candidate for any IPA-SRA transformation at
259      all.  */
260   unsigned m_candidate : 1;
261 
262   /* Whether the original function returns any value.  */
263   unsigned m_returns_value : 1;
264 
265   /* Set to true if all call statements do not actually use the returned
266      value.  */
267 
268   unsigned m_return_ignored : 1;
269 
270   /* Whether the node is already queued in IPA SRA stack during processing of
271      call graphs SCCs.  */
272 
273   unsigned m_queued : 1;
274 };
275 
276 /* Deallocate the memory pointed to by isra_func_summary.  TODO: Since this
277    data structure is allocated in GC memory, this is not necessary and we can
278    consider removing the destructor.  */
279 
~isra_func_summary()280 isra_func_summary::~isra_func_summary ()
281 {
282   unsigned len = vec_safe_length (m_parameters);
283   for (unsigned i = 0; i < len; ++i)
284     free_param_decl_accesses (&(*m_parameters)[i]);
285   vec_free (m_parameters);
286 }
287 
288 /* Mark the function as not a candidate for any IPA-SRA transformation.  Return
289    true if it was a candidate until now.  */
290 
291 bool
zap()292 isra_func_summary::zap ()
293 {
294   bool ret = m_candidate;
295   m_candidate = false;
296 
297   /* TODO: see the destructor above.  */
298   unsigned len = vec_safe_length (m_parameters);
299   for (unsigned i = 0; i < len; ++i)
300     free_param_decl_accesses (&(*m_parameters)[i]);
301   vec_free (m_parameters);
302 
303   return ret;
304 }
305 
306 /* Structure to describe which formal parameters feed into a particular actual
307    argument.  */
308 
309 struct isra_param_flow
310 {
311   /* Number of elements in array inputs that contain valid data.  */
312   char length;
313   /* Indices of formal parameters that feed into the described actual argument.
314      If aggregate_pass_through or pointer_pass_through below are true, it must
315      contain exactly one element which is passed through from a formal
316      parameter if the given number.  Otherwise, the array contains indices of
317      callee's formal parameters which are used to calculate value of this
318      actual argument. */
319   unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
320 
321   /* Offset within the formal parameter.  */
322   unsigned unit_offset;
323   /* Size of the portion of the formal parameter that is being passed.  */
324   unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
325 
326   /* True when the value of this actual copy is a portion of a formal
327      parameter.  */
328   unsigned aggregate_pass_through : 1;
329   /* True when the value of this actual copy is a verbatim pass through of an
330      obtained pointer.  */
331   unsigned pointer_pass_through : 1;
332   /* True when it is safe to copy access candidates here from the callee, which
333      would mean introducing dereferences into callers of the caller.  */
334   unsigned safe_to_import_accesses : 1;
335 };
336 
337 /* Structure used to convey information about calls from the intra-procedural
338    analysis stage to inter-procedural one.  */
339 
340 class isra_call_summary
341 {
342 public:
isra_call_summary()343   isra_call_summary ()
344     : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
345       m_bit_aligned_arg (false), m_before_any_store (false)
346   {}
347 
348   void init_inputs (unsigned arg_count);
349   void dump (FILE *f);
350 
351   /* Information about what formal parameters of the caller are used to compute
352      individual actual arguments of this call.  */
353   auto_vec <isra_param_flow> m_arg_flow;
354 
355   /* Set to true if the call statement does not have a LHS.  */
356   unsigned m_return_ignored : 1;
357 
358   /* Set to true if the LHS of call statement is only used to construct the
359      return value of the caller.  */
360   unsigned m_return_returned : 1;
361 
362   /* Set when any of the call arguments are not byte-aligned.  */
363   unsigned m_bit_aligned_arg : 1;
364 
365   /* Set to true if the call happend before any (other) store to memory in the
366      caller.  */
367   unsigned m_before_any_store : 1;
368 };
369 
370 /* Class to manage function summaries.  */
371 
372 class GTY((user)) ipa_sra_function_summaries
373   : public function_summary <isra_func_summary *>
374 {
375 public:
ipa_sra_function_summaries(symbol_table * table,bool ggc)376   ipa_sra_function_summaries (symbol_table *table, bool ggc):
377     function_summary<isra_func_summary *> (table, ggc) { }
378 
379   virtual void duplicate (cgraph_node *, cgraph_node *,
380 			  isra_func_summary *old_sum,
381 			  isra_func_summary *new_sum);
382   virtual void insert (cgraph_node *, isra_func_summary *);
383 };
384 
385 /* Hook that is called by summary when a node is duplicated.  */
386 
387 void
duplicate(cgraph_node *,cgraph_node *,isra_func_summary * old_sum,isra_func_summary * new_sum)388 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
389 				       isra_func_summary *old_sum,
390 				       isra_func_summary *new_sum)
391 {
392   /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
393      useless.  */
394   new_sum->m_candidate  = old_sum->m_candidate;
395   new_sum->m_returns_value = old_sum->m_returns_value;
396   new_sum->m_return_ignored = old_sum->m_return_ignored;
397   gcc_assert (!old_sum->m_queued);
398   new_sum->m_queued = false;
399 
400   unsigned param_count = vec_safe_length (old_sum->m_parameters);
401   if (!param_count)
402     return;
403   vec_safe_reserve_exact (new_sum->m_parameters, param_count);
404   new_sum->m_parameters->quick_grow_cleared (param_count);
405   for (unsigned i = 0; i < param_count; i++)
406     {
407       isra_param_desc *s = &(*old_sum->m_parameters)[i];
408       isra_param_desc *d = &(*new_sum->m_parameters)[i];
409 
410       d->param_size_limit = s->param_size_limit;
411       d->size_reached = s->size_reached;
412       d->locally_unused = s->locally_unused;
413       d->split_candidate = s->split_candidate;
414       d->by_ref = s->by_ref;
415 
416       unsigned acc_count = vec_safe_length (s->accesses);
417       vec_safe_reserve_exact (d->accesses, acc_count);
418       for (unsigned j = 0; j < acc_count; j++)
419 	{
420 	  param_access *from = (*s->accesses)[j];
421 	  param_access *to = ggc_cleared_alloc<param_access> ();
422 	  to->type = from->type;
423 	  to->alias_ptr_type = from->alias_ptr_type;
424 	  to->unit_offset = from->unit_offset;
425 	  to->unit_size = from->unit_size;
426 	  to->certain = from->certain;
427 	  to->reverse = from->reverse;
428 	  d->accesses->quick_push (to);
429 	}
430     }
431 }
432 
433 /* Pointer to the pass function summary holder.  */
434 
435 static GTY(()) ipa_sra_function_summaries *func_sums;
436 
437 /* Hook that is called by summary when new node appears.  */
438 
439 void
insert(cgraph_node * node,isra_func_summary *)440 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
441 {
442   if (opt_for_fn (node->decl, flag_ipa_sra))
443     {
444       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
445       ipa_sra_summarize_function (node);
446       pop_cfun ();
447     }
448   else
449     func_sums->remove (node);
450 }
451 
452 /* Class to manage call summaries.  */
453 
454 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
455 {
456 public:
ipa_sra_call_summaries(symbol_table * table)457   ipa_sra_call_summaries (symbol_table *table):
458     call_summary<isra_call_summary *> (table) { }
459 
460   /* Duplicate info when an edge is cloned.  */
461   virtual void duplicate (cgraph_edge *, cgraph_edge *,
462 			  isra_call_summary *old_sum,
463 			  isra_call_summary *new_sum);
464 };
465 
466 static ipa_sra_call_summaries *call_sums;
467 
468 
469 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
470    ARG_COUNT is the number of actual arguments passed.  */
471 
472 void
init_inputs(unsigned arg_count)473 isra_call_summary::init_inputs (unsigned arg_count)
474 {
475   if (arg_count == 0)
476     {
477       gcc_checking_assert (m_arg_flow.length () == 0);
478       return;
479     }
480   if (m_arg_flow.length () == 0)
481     {
482       m_arg_flow.reserve_exact (arg_count);
483       m_arg_flow.quick_grow_cleared (arg_count);
484     }
485   else
486     gcc_checking_assert (arg_count == m_arg_flow.length ());
487 }
488 
489 /* Dump all information in call summary to F.  */
490 
491 void
dump(FILE * f)492 isra_call_summary::dump (FILE *f)
493 {
494   if (m_return_ignored)
495     fprintf (f, "    return value ignored\n");
496   if (m_return_returned)
497     fprintf (f, "    return value used only to compute caller return value\n");
498   if (m_before_any_store)
499     fprintf (f, "    happens before any store to memory\n");
500   for (unsigned i = 0; i < m_arg_flow.length (); i++)
501     {
502       fprintf (f, "    Parameter %u:\n", i);
503       isra_param_flow *ipf = &m_arg_flow[i];
504 
505       if (ipf->length)
506 	{
507 	  bool first = true;
508 	  fprintf (f, "      Scalar param sources: ");
509 	  for (int j = 0; j < ipf->length; j++)
510 	    {
511 	      if (!first)
512 		fprintf (f, ", ");
513 	      else
514 		first = false;
515 	      fprintf (f, "%i", (int) ipf->inputs[j]);
516 	    }
517 	  fprintf (f, "\n");
518 	}
519       if (ipf->aggregate_pass_through)
520 	fprintf (f, "      Aggregate pass through from the param given above, "
521 		 "unit offset: %u , unit size: %u\n",
522 		 ipf->unit_offset, ipf->unit_size);
523       if (ipf->pointer_pass_through)
524 	fprintf (f, "      Pointer pass through from the param given above, "
525 		 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
526     }
527 }
528 
529 /* Duplicate edge summary when an edge is cloned.  */
530 
531 void
duplicate(cgraph_edge *,cgraph_edge *,isra_call_summary * old_sum,isra_call_summary * new_sum)532 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
533 				   isra_call_summary *old_sum,
534 				   isra_call_summary *new_sum)
535 {
536   unsigned arg_count = old_sum->m_arg_flow.length ();
537   new_sum->init_inputs (arg_count);
538   for (unsigned i = 0; i < arg_count; i++)
539     new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
540 
541   new_sum->m_return_ignored = old_sum->m_return_ignored;
542   new_sum->m_return_returned = old_sum->m_return_returned;
543   new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
544   new_sum->m_before_any_store = old_sum->m_before_any_store;
545 }
546 
547 
548 /* With all GTY stuff done, we can move to anonymous namespace.  */
549 namespace {
550 /* Quick mapping from a decl to its param descriptor.  */
551 
552 hash_map<tree, gensum_param_desc *> *decl2desc;
553 
554 /* Countdown of allowed Alias Analysis steps during summary building.  */
555 
556 int aa_walking_limit;
557 
558 /* This is a table in which for each basic block and parameter there is a
559    distance (offset + size) in that parameter which is dereferenced and
560    accessed in that BB.  */
561 HOST_WIDE_INT *bb_dereferences = NULL;
562 /* How many by-reference parameters there are in the current function.  */
563 int by_ref_count;
564 
565 /* Bitmap of BBs that can cause the function to "stop" progressing by
566    returning, throwing externally, looping infinitely or calling a function
567    which might abort etc.. */
568 bitmap final_bbs;
569 
570 /* Obstack to allocate various small structures required only when generating
571    summary for a function.  */
572 struct obstack gensum_obstack;
573 
574 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
575    attributes, return true otherwise.  NODE is the cgraph node of the current
576    function.  */
577 
578 static bool
ipa_sra_preliminary_function_checks(cgraph_node * node)579 ipa_sra_preliminary_function_checks (cgraph_node *node)
580 {
581   if (!node->can_change_signature)
582     {
583       if (dump_file)
584 	fprintf (dump_file, "Function cannot change signature.\n");
585       return false;
586     }
587 
588   if (!tree_versionable_function_p (node->decl))
589     {
590       if (dump_file)
591 	fprintf (dump_file, "Function is not versionable.\n");
592       return false;
593     }
594 
595   if (!opt_for_fn (node->decl, optimize)
596       || !opt_for_fn (node->decl, flag_ipa_sra))
597     {
598       if (dump_file)
599 	fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
600 		 "function.\n");
601       return false;
602     }
603 
604   if (DECL_VIRTUAL_P (node->decl))
605     {
606       if (dump_file)
607 	fprintf (dump_file, "Function is a virtual method.\n");
608       return false;
609     }
610 
611   struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
612   if (fun->stdarg)
613     {
614       if (dump_file)
615 	fprintf (dump_file, "Function uses stdarg. \n");
616       return false;
617     }
618 
619   if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
620     {
621       if (dump_file)
622 	fprintf (dump_file, "Always inline function will be inlined "
623 		 "anyway. \n");
624       return false;
625     }
626 
627   return true;
628 }
629 
630 /* Print access tree starting at ACCESS to F.  */
631 
632 static void
dump_gensum_access(FILE * f,gensum_param_access * access,unsigned indent)633 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
634 {
635   fprintf (f, "  ");
636   for (unsigned i = 0; i < indent; i++)
637     fprintf (f, " ");
638   fprintf (f, "    * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
639 	   access->offset);
640   fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
641   fprintf (f, ", type: ");
642   print_generic_expr (f, access->type);
643   fprintf (f, ", alias_ptr_type: ");
644   print_generic_expr (f, access->alias_ptr_type);
645   fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
646   for (gensum_param_access *ch = access->first_child;
647        ch;
648        ch = ch->next_sibling)
649     dump_gensum_access (f, ch, indent + 2);
650 }
651 
652 
653 /* Print access tree starting at ACCESS to F.  */
654 
655 static void
dump_isra_access(FILE * f,param_access * access)656 dump_isra_access (FILE *f, param_access *access)
657 {
658   fprintf (f, "    * Access to unit offset: %u", access->unit_offset);
659   fprintf (f, ", unit size: %u", access->unit_size);
660   fprintf (f, ", type: ");
661   print_generic_expr (f, access->type);
662   fprintf (f, ", alias_ptr_type: ");
663   print_generic_expr (f, access->alias_ptr_type);
664   if (access->certain)
665     fprintf (f, ", certain");
666   else
667     fprintf (f, ", not certain");
668   if (access->reverse)
669     fprintf (f, ", reverse");
670   fprintf (f, "\n");
671 }
672 
673 /* Dump access tree starting at ACCESS to stderr.  */
674 
675 DEBUG_FUNCTION void
debug_isra_access(param_access * access)676 debug_isra_access (param_access *access)
677 {
678   dump_isra_access (stderr, access);
679 }
680 
681 /* Dump DESC to F.  */
682 
683 static void
dump_gensum_param_descriptor(FILE * f,gensum_param_desc * desc)684 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
685 {
686   if (desc->locally_unused)
687     fprintf (f, "    unused with %i call_uses\n", desc->call_uses);
688   if (!desc->split_candidate)
689     {
690       fprintf (f, "    not a candidate\n");
691       return;
692     }
693   if (desc->by_ref)
694     fprintf (f, "    by_ref with %u pass throughs\n", desc->ptr_pt_count);
695 
696   for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
697     dump_gensum_access (f, acc, 2);
698 }
699 
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
701    F.  */
702 
703 static void
dump_gensum_param_descriptors(FILE * f,tree fndecl,vec<gensum_param_desc> * param_descriptions)704 dump_gensum_param_descriptors (FILE *f, tree fndecl,
705 			       vec<gensum_param_desc> *param_descriptions)
706 {
707   tree parm = DECL_ARGUMENTS (fndecl);
708   for (unsigned i = 0;
709        i < param_descriptions->length ();
710        ++i, parm = DECL_CHAIN (parm))
711     {
712       fprintf (f, "  Descriptor for parameter %i ", i);
713       print_generic_expr (f, parm, TDF_UID);
714       fprintf (f, "\n");
715       dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
716     }
717 }
718 
719 
720 /* Dump DESC to F.   */
721 
722 static void
dump_isra_param_descriptor(FILE * f,isra_param_desc * desc)723 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
724 {
725   if (desc->locally_unused)
726     {
727       fprintf (f, "    (locally) unused\n");
728     }
729   if (!desc->split_candidate)
730     {
731       fprintf (f, "    not a candidate for splitting\n");
732       return;
733     }
734   fprintf (f, "    param_size_limit: %u, size_reached: %u%s\n",
735 	   desc->param_size_limit, desc->size_reached,
736 	   desc->by_ref ? ", by_ref" : "");
737 
738   for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
739     {
740       param_access *access = (*desc->accesses)[i];
741       dump_isra_access (f, access);
742     }
743 }
744 
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
746    F.  */
747 
748 static void
dump_isra_param_descriptors(FILE * f,tree fndecl,isra_func_summary * ifs)749 dump_isra_param_descriptors (FILE *f, tree fndecl,
750 			     isra_func_summary *ifs)
751 {
752   tree parm = DECL_ARGUMENTS (fndecl);
753   if (!ifs->m_parameters)
754     {
755       fprintf (f, "  parameter descriptors not available\n");
756       return;
757     }
758 
759   for (unsigned i = 0;
760        i < ifs->m_parameters->length ();
761        ++i, parm = DECL_CHAIN (parm))
762     {
763       fprintf (f, "  Descriptor for parameter %i ", i);
764       print_generic_expr (f, parm, TDF_UID);
765       fprintf (f, "\n");
766       dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
767     }
768 }
769 
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage.  If the
771    function fails return false, otherwise return true.  SRC must fit into an
772    unsigned char.  Used for purposes of transitive unused parameter
773    removal.  */
774 
775 static bool
add_src_to_param_flow(isra_param_flow * param_flow,int src)776 add_src_to_param_flow (isra_param_flow *param_flow, int src)
777 {
778   gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
779   if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
780     return false;
781 
782   param_flow->inputs[(int) param_flow->length] = src;
783   param_flow->length++;
784   return true;
785 }
786 
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788    it is the only input.  Used for purposes of transitive parameter
789    splitting.  */
790 
791 static void
set_single_param_flow_source(isra_param_flow * param_flow,int src)792 set_single_param_flow_source (isra_param_flow *param_flow, int src)
793 {
794   gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
795   if (param_flow->length == 0)
796     {
797       param_flow->inputs[0] = src;
798       param_flow->length = 1;
799     }
800   else if (param_flow->length == 1)
801     gcc_assert (param_flow->inputs[0] == src);
802   else
803     gcc_unreachable ();
804 }
805 
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
807    it.  */
808 
809 static unsigned
get_single_param_flow_source(const isra_param_flow * param_flow)810 get_single_param_flow_source (const isra_param_flow *param_flow)
811 {
812   gcc_assert (param_flow->length == 1);
813   return param_flow->inputs[0];
814 }
815 
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817    in FUN represented with NODE and return a negative number if any of them is
818    used for something else than either an actual call argument, simple
819    arithmetic operation or debug statement.  If there are no such uses, return
820    the number of actual arguments that this parameter eventually feeds to (or
821    zero if there is none).  For any such parameter, mark PARM_NUM as one of its
822    sources.  ANALYZED is a bitmap that tracks which SSA names we have already
823    started investigating.  */
824 
825 static int
isra_track_scalar_value_uses(function * fun,cgraph_node * node,tree name,int parm_num,bitmap analyzed)826 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
827 			      int parm_num, bitmap analyzed)
828 {
829   int res = 0;
830   imm_use_iterator imm_iter;
831   gimple *stmt;
832 
833   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
834     {
835       if (is_gimple_debug (stmt))
836 	continue;
837 
838       /* TODO: We could handle at least const builtin functions like arithmetic
839 	 operations below.  */
840       if (is_gimple_call (stmt))
841 	{
842 	  int all_uses = 0;
843 	  use_operand_p use_p;
844 	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
845 	    all_uses++;
846 
847 	  gcall *call = as_a <gcall *> (stmt);
848 	  unsigned arg_count;
849 	  if (gimple_call_internal_p (call)
850 	      || (arg_count = gimple_call_num_args (call)) == 0)
851 	    {
852 	      res = -1;
853 	      break;
854 	    }
855 
856 	  cgraph_edge *cs = node->get_edge (stmt);
857 	  gcc_checking_assert (cs);
858 	  isra_call_summary *csum = call_sums->get_create (cs);
859 	  csum->init_inputs (arg_count);
860 
861 	  int simple_uses = 0;
862 	  for (unsigned i = 0; i < arg_count; i++)
863 	    if (gimple_call_arg (call, i) == name)
864 	      {
865 		if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
866 		  {
867 		    simple_uses = -1;
868 		    break;
869 		  }
870 		simple_uses++;
871 	      }
872 
873 	  if (simple_uses < 0
874 	      || all_uses != simple_uses)
875 	    {
876 	      res = -1;
877 	      break;
878 	    }
879 	  res += all_uses;
880 	}
881       else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
882 	       && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
883 		   || gimple_code (stmt) == GIMPLE_PHI))
884 	{
885 	  tree lhs;
886 	  if (gimple_code (stmt) == GIMPLE_PHI)
887 	    lhs = gimple_phi_result (stmt);
888 	  else
889 	    lhs = gimple_assign_lhs (stmt);
890 
891 	  if (TREE_CODE (lhs) != SSA_NAME)
892 	    {
893 	      res = -1;
894 	      break;
895 	    }
896 	  gcc_assert (!gimple_vdef (stmt));
897 	  if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
898 	    {
899 	      int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
900 						      analyzed);
901 	      if (tmp < 0)
902 		{
903 		  res = tmp;
904 		  break;
905 		}
906 	      res += tmp;
907 	    }
908 	}
909       else
910 	{
911 	  res = -1;
912 	  break;
913 	}
914     }
915   return res;
916 }
917 
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919    also described by NODE) and simple arithmetic calculations involving PARM
920    and return false if any of them is used for something else than either an
921    actual call argument, simple arithmetic operation or debug statement.  If
922    there are no such uses, return true and store the number of actual arguments
923    that this parameter eventually feeds to (or zero if there is none) to
924    *CALL_USES_P.  For any such parameter, mark PARM_NUM as one of its
925    sources.
926 
927    This function is similar to ptr_parm_has_nonarg_uses but its results are
928    meant for unused parameter removal, as opposed to splitting of parameters
929    passed by reference or converting them to passed by value.  */
930 
931 static bool
isra_track_scalar_param_local_uses(function * fun,cgraph_node * node,tree parm,int parm_num,int * call_uses_p)932 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
933 				    int parm_num, int *call_uses_p)
934 {
935   gcc_checking_assert (is_gimple_reg (parm));
936 
937   tree name = ssa_default_def (fun, parm);
938   if (!name || has_zero_uses (name))
939     {
940       *call_uses_p = 0;
941       return false;
942     }
943 
944   /* Edge summaries can only handle callers with fewer than 256 parameters.  */
945   if (parm_num > UCHAR_MAX)
946     return true;
947 
948   bitmap analyzed = BITMAP_ALLOC (NULL);
949   int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
950 						analyzed);
951   BITMAP_FREE (analyzed);
952   if (call_uses < 0)
953     return true;
954   *call_uses_p = call_uses;
955   return false;
956 }
957 
958 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
959    examine whether there are any nonarg uses that are not actual arguments or
960    otherwise infeasible uses.  If so, return true, otherwise return false.
961    Create pass-through IPA flow records for any direct uses as argument calls
962    and if returning false, store their number into *PT_COUNT_P.  NODE and FUN
963    must represent the function that is currently analyzed, PARM_NUM must be the
964    index of the analyzed parameter.
965 
966    This function is similar to isra_track_scalar_param_local_uses but its
967    results are meant for splitting of parameters passed by reference or turning
968    them into bits passed by value, as opposed to generic unused parameter
969    removal.  */
970 
971 static bool
ptr_parm_has_nonarg_uses(cgraph_node * node,function * fun,tree parm,int parm_num,unsigned * pt_count_p)972 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
973 			  int parm_num, unsigned *pt_count_p)
974 {
975   imm_use_iterator ui;
976   gimple *stmt;
977   tree name = ssa_default_def (fun, parm);
978   bool ret = false;
979   unsigned pt_count = 0;
980 
981   if (!name || has_zero_uses (name))
982     return false;
983 
984   /* Edge summaries can only handle callers with fewer than 256 parameters.  */
985   if (parm_num > UCHAR_MAX)
986     return true;
987 
988   FOR_EACH_IMM_USE_STMT (stmt, ui, name)
989     {
990       unsigned uses_ok = 0;
991       use_operand_p use_p;
992 
993       if (is_gimple_debug (stmt))
994 	continue;
995 
996       if (gimple_assign_single_p (stmt))
997 	{
998 	  tree rhs = gimple_assign_rhs1 (stmt);
999 	  if (!TREE_THIS_VOLATILE (rhs))
1000 	    {
1001 	      while (handled_component_p (rhs))
1002 		rhs = TREE_OPERAND (rhs, 0);
1003 	      if (TREE_CODE (rhs) == MEM_REF
1004 		  && TREE_OPERAND (rhs, 0) == name
1005 		  && integer_zerop (TREE_OPERAND (rhs, 1))
1006 		  && types_compatible_p (TREE_TYPE (rhs),
1007 					 TREE_TYPE (TREE_TYPE (name))))
1008 		uses_ok++;
1009 	    }
1010 	}
1011       else if (is_gimple_call (stmt))
1012 	{
1013 	  gcall *call = as_a <gcall *> (stmt);
1014 	  unsigned arg_count;
1015 	  if (gimple_call_internal_p (call)
1016 	      || (arg_count = gimple_call_num_args (call)) == 0)
1017 	    {
1018 	      ret = true;
1019 	      break;
1020 	    }
1021 
1022 	  cgraph_edge *cs = node->get_edge (stmt);
1023 	  gcc_checking_assert (cs);
1024 	  isra_call_summary *csum = call_sums->get_create (cs);
1025 	  csum->init_inputs (arg_count);
1026 
1027 	  for (unsigned i = 0; i < arg_count; ++i)
1028 	    {
1029 	      tree arg = gimple_call_arg (stmt, i);
1030 
1031 	      if (arg == name)
1032 		{
1033 		  /* TODO: Allow &MEM_REF[name + offset] here,
1034 		     ipa_param_body_adjustments::modify_call_stmt has to be
1035 		     adjusted too.  */
1036 		  csum->m_arg_flow[i].pointer_pass_through = true;
1037 		  set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1038 		  pt_count++;
1039 		  uses_ok++;
1040 		  continue;
1041 		}
1042 
1043 	      if (!TREE_THIS_VOLATILE (arg))
1044 		{
1045 		  while (handled_component_p (arg))
1046 		    arg = TREE_OPERAND (arg, 0);
1047 		  if (TREE_CODE (arg) == MEM_REF
1048 		      && TREE_OPERAND (arg, 0) == name
1049 		      && integer_zerop (TREE_OPERAND (arg, 1))
1050 		      && types_compatible_p (TREE_TYPE (arg),
1051 					     TREE_TYPE (TREE_TYPE (name))))
1052 		    uses_ok++;
1053 		}
1054 	    }
1055 	}
1056 
1057       /* If the number of valid uses does not match the number of
1058          uses in this stmt there is an unhandled use.  */
1059       unsigned all_uses = 0;
1060       FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1061 	all_uses++;
1062 
1063       gcc_checking_assert (uses_ok <= all_uses);
1064       if (uses_ok != all_uses)
1065 	{
1066 	  ret = true;
1067 	  break;
1068 	}
1069     }
1070 
1071   *pt_count_p = pt_count;
1072   return ret;
1073 }
1074 
1075 /* Initialize vector of parameter descriptors of NODE.  Return true if there
1076    are any candidates for splitting or unused aggregate parameter removal (the
1077    function may return false if there are candidates for removal of register
1078    parameters) and function body must be scanned.  */
1079 
1080 static bool
create_parameter_descriptors(cgraph_node * node,vec<gensum_param_desc> * param_descriptions)1081 create_parameter_descriptors (cgraph_node *node,
1082 			      vec<gensum_param_desc> *param_descriptions)
1083 {
1084   function *fun = DECL_STRUCT_FUNCTION (node->decl);
1085   bool ret = false;
1086 
1087   int num = 0;
1088   for (tree parm = DECL_ARGUMENTS (node->decl);
1089        parm;
1090        parm = DECL_CHAIN (parm), num++)
1091     {
1092       const char *msg;
1093       gensum_param_desc *desc = &(*param_descriptions)[num];
1094       /* param_descriptions vector is grown cleared in the caller.  */
1095       desc->param_number = num;
1096       decl2desc->put (parm, desc);
1097 
1098       if (dump_file && (dump_flags & TDF_DETAILS))
1099 	print_generic_expr (dump_file, parm, TDF_UID);
1100 
1101       int scalar_call_uses;
1102       tree type = TREE_TYPE (parm);
1103       if (TREE_THIS_VOLATILE (parm))
1104 	{
1105 	  if (dump_file && (dump_flags & TDF_DETAILS))
1106 	    fprintf (dump_file, " not a candidate, is volatile\n");
1107 	  continue;
1108 	}
1109       if (!is_gimple_reg_type (type) && is_va_list_type (type))
1110 	{
1111 	  if (dump_file && (dump_flags & TDF_DETAILS))
1112 	    fprintf (dump_file, " not a candidate, is a va_list type\n");
1113 	  continue;
1114 	}
1115       if (TREE_ADDRESSABLE (parm))
1116 	{
1117 	  if (dump_file && (dump_flags & TDF_DETAILS))
1118 	    fprintf (dump_file, " not a candidate, is addressable\n");
1119 	  continue;
1120 	}
1121       if (TREE_ADDRESSABLE (type))
1122 	{
1123 	  if (dump_file && (dump_flags & TDF_DETAILS))
1124 	    fprintf (dump_file, " not a candidate, type cannot be split\n");
1125 	  continue;
1126 	}
1127 
1128       if (is_gimple_reg (parm)
1129 	  && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1130 						  &scalar_call_uses))
1131 	{
1132 	  if (dump_file && (dump_flags & TDF_DETAILS))
1133 	    fprintf (dump_file, " is a scalar with only %i call uses\n",
1134 		     scalar_call_uses);
1135 
1136 	  desc->locally_unused = true;
1137 	  desc->call_uses = scalar_call_uses;
1138 	}
1139 
1140       if (POINTER_TYPE_P (type))
1141 	{
1142 	  type = TREE_TYPE (type);
1143 
1144 	  if (TREE_CODE (type) == FUNCTION_TYPE
1145 	      || TREE_CODE (type) == METHOD_TYPE)
1146 	    {
1147 	      if (dump_file && (dump_flags & TDF_DETAILS))
1148 		fprintf (dump_file, " not a candidate, reference to "
1149 			 "a function\n");
1150 	      continue;
1151 	    }
1152 	  if (TYPE_VOLATILE (type))
1153 	    {
1154 	      if (dump_file && (dump_flags & TDF_DETAILS))
1155 		fprintf (dump_file, " not a candidate, reference to "
1156 			 "a volatile type\n");
1157 	      continue;
1158 	    }
1159 	  if (TREE_CODE (type) == ARRAY_TYPE
1160 	      && TYPE_NONALIASED_COMPONENT (type))
1161 	    {
1162 	      if (dump_file && (dump_flags & TDF_DETAILS))
1163 		fprintf (dump_file, " not a candidate, reference to "
1164 			 "a nonaliased component array\n");
1165 	      continue;
1166 	    }
1167 	  if (!is_gimple_reg (parm))
1168 	    {
1169 	      if (dump_file && (dump_flags & TDF_DETAILS))
1170 		fprintf (dump_file, " not a candidate, a reference which is "
1171 			 "not a gimple register (probably addressable)\n");
1172 	      continue;
1173 	    }
1174 	  if (is_va_list_type (type))
1175 	    {
1176 	      if (dump_file && (dump_flags & TDF_DETAILS))
1177 		fprintf (dump_file, " not a candidate, reference to "
1178 			 "a va list\n");
1179 	      continue;
1180 	    }
1181 	  if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1182 					&desc->ptr_pt_count))
1183 	    {
1184 	      if (dump_file && (dump_flags & TDF_DETAILS))
1185 		fprintf (dump_file, " not a candidate, reference has "
1186 			 "nonarg uses\n");
1187 	      continue;
1188 	    }
1189 	  desc->by_ref = true;
1190 	}
1191       else if (!AGGREGATE_TYPE_P (type))
1192 	{
1193 	  /* This is in an else branch because scalars passed by reference are
1194 	     still candidates to be passed by value.  */
1195 	  if (dump_file && (dump_flags & TDF_DETAILS))
1196 	    fprintf (dump_file, " not a candidate, not an aggregate\n");
1197 	  continue;
1198 	}
1199 
1200       if (!COMPLETE_TYPE_P (type))
1201 	{
1202 	  if (dump_file && (dump_flags & TDF_DETAILS))
1203 	    fprintf (dump_file, " not a candidate, not a complete type\n");
1204 	  continue;
1205 	}
1206       if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1207 	{
1208 	  if (dump_file && (dump_flags & TDF_DETAILS))
1209 	    fprintf (dump_file, " not a candidate, size not representable\n");
1210 	  continue;
1211 	}
1212       unsigned HOST_WIDE_INT type_size
1213 	= tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1214       if (type_size == 0
1215 	  || type_size >= ISRA_ARG_SIZE_LIMIT)
1216 	{
1217 	  if (dump_file && (dump_flags & TDF_DETAILS))
1218 	    fprintf (dump_file, " not a candidate, has zero or huge size\n");
1219 	  continue;
1220 	}
1221       if (type_internals_preclude_sra_p (type, &msg))
1222 	{
1223 	  if (dump_file && (dump_flags & TDF_DETAILS))
1224 	      fprintf (dump_file, " not a candidate, %s\n", msg);
1225 	  continue;
1226 	}
1227 
1228       if (dump_file && (dump_flags & TDF_DETAILS))
1229 	fprintf (dump_file, " is a candidate\n");
1230 
1231       ret = true;
1232       desc->split_candidate = true;
1233       if (desc->by_ref)
1234 	desc->deref_index = by_ref_count++;
1235     }
1236   return ret;
1237 }
1238 
1239 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1240    found, which happens if DECL is for a static chain.  */
1241 
1242 static gensum_param_desc *
get_gensum_param_desc(tree decl)1243 get_gensum_param_desc (tree decl)
1244 {
1245   gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1246   gensum_param_desc **slot = decl2desc->get (decl);
1247   if (!slot)
1248     /* This can happen for static chains which we cannot handle so far.  */
1249     return NULL;
1250   gcc_checking_assert (*slot);
1251   return *slot;
1252 }
1253 
1254 
1255 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1256    write REASON to the dump file if there is one.  */
1257 
1258 static void
disqualify_split_candidate(gensum_param_desc * desc,const char * reason)1259 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1260 {
1261   if (!desc->split_candidate)
1262     return;
1263 
1264   if (dump_file && (dump_flags & TDF_DETAILS))
1265     fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1266 	     desc->param_number, reason);
1267 
1268   desc->split_candidate = false;
1269 }
1270 
1271 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1272    there is one.  */
1273 
1274 static void
disqualify_split_candidate(tree decl,const char * reason)1275 disqualify_split_candidate (tree decl, const char *reason)
1276 {
1277   gensum_param_desc *desc = get_gensum_param_desc (decl);
1278   if (desc)
1279     disqualify_split_candidate (desc, reason);
1280 }
1281 
1282 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE.  But
1283    first, check that there are not too many of them already.  If so, do not
1284    allocate anything and return NULL.  */
1285 
1286 static gensum_param_access *
allocate_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size)1287 allocate_access (gensum_param_desc *desc,
1288 		 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1289 {
1290   if (desc->access_count
1291       == (unsigned) param_ipa_sra_max_replacements)
1292     {
1293       disqualify_split_candidate (desc, "Too many replacement candidates");
1294       return NULL;
1295     }
1296 
1297   gensum_param_access *access
1298     = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1299 					     sizeof (gensum_param_access));
1300   memset (access, 0, sizeof (*access));
1301   access->offset = offset;
1302   access->size = size;
1303   return access;
1304 }
1305 
1306 /* In what context scan_expr_access has been called, whether it deals with a
1307    load, a function argument, or a store.  Please note that in rare
1308    circumstances when it is not clear if the access is a load or store,
1309    ISRA_CTX_STORE is used too.  */
1310 
1311 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1312 
1313 /* Return an access describing memory access to the variable described by DESC
1314    at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1315    at a certain tree level FIRST.  Attempt to create it and put into the
1316    appropriate place in the access tree if does not exist, but fail and return
1317    NULL if there are already too many accesses, if it would create a partially
1318    overlapping access or if an access would end up within a pre-existing
1319    non-call access.  */
1320 
1321 static gensum_param_access *
get_access_1(gensum_param_desc * desc,gensum_param_access ** first,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1322 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1323 	      HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1324 {
1325   gensum_param_access *access = *first, **ptr = first;
1326 
1327   if (!access)
1328     {
1329       /* No pre-existing access at this level, just create it.  */
1330       gensum_param_access *a = allocate_access (desc, offset, size);
1331       if (!a)
1332 	return NULL;
1333       *first = a;
1334       return *first;
1335     }
1336 
1337   if (access->offset >= offset + size)
1338     {
1339       /* We want to squeeze it in front of the very first access, just do
1340 	 it.  */
1341       gensum_param_access *r = allocate_access (desc, offset, size);
1342       if (!r)
1343 	return NULL;
1344       r->next_sibling = access;
1345       *first = r;
1346       return r;
1347     }
1348 
1349   /* Skip all accesses that have to come before us until the next sibling is
1350      already too far.  */
1351   while (offset >= access->offset + access->size
1352 	 && access->next_sibling
1353 	 && access->next_sibling->offset < offset + size)
1354     {
1355       ptr = &access->next_sibling;
1356       access = access->next_sibling;
1357     }
1358 
1359   /* At this point we know we do not belong before access.  */
1360   gcc_assert (access->offset < offset + size);
1361 
1362   if (access->offset == offset && access->size == size)
1363     /* We found what we were looking for.  */
1364     return access;
1365 
1366   if (access->offset <= offset
1367       && access->offset + access->size >= offset + size)
1368     {
1369     /* We fit into access which is larger than us.  We need to find/create
1370        something below access.  But we only allow nesting in call
1371        arguments.  */
1372       if (access->nonarg)
1373 	return NULL;
1374 
1375       return get_access_1 (desc, &access->first_child, offset, size, ctx);
1376     }
1377 
1378   if (offset <= access->offset
1379       && offset + size  >= access->offset + access->size)
1380     /* We are actually bigger than access, which fully fits into us, take its
1381        place and make all accesses fitting into it its children.  */
1382     {
1383       /* But first, we only allow nesting in call arguments so check if that is
1384 	 what we are trying to represent.  */
1385       if (ctx != ISRA_CTX_ARG)
1386 	return NULL;
1387 
1388       gensum_param_access *r = allocate_access (desc, offset, size);
1389       if (!r)
1390 	return NULL;
1391       r->first_child = access;
1392 
1393       while (access->next_sibling
1394 	     && access->next_sibling->offset < offset + size)
1395 	access = access->next_sibling;
1396       if (access->offset + access->size > offset + size)
1397 	{
1398 	  /* This must be a different access, which are sorted, so the
1399 	     following must be true and this signals a partial overlap.  */
1400 	  gcc_assert (access->offset > offset);
1401 	  return NULL;
1402 	}
1403 
1404       r->next_sibling = access->next_sibling;
1405       access->next_sibling = NULL;
1406       *ptr = r;
1407       return r;
1408     }
1409 
1410   if (offset >= access->offset + access->size)
1411     {
1412       /* We belong after access.  */
1413       gensum_param_access *r = allocate_access (desc, offset, size);
1414       if (!r)
1415 	return NULL;
1416       r->next_sibling = access->next_sibling;
1417       access->next_sibling = r;
1418       return r;
1419     }
1420 
1421   if (offset < access->offset)
1422     {
1423       /* We know the following, otherwise we would have created a
1424 	 super-access. */
1425       gcc_checking_assert (offset + size < access->offset + access->size);
1426       return NULL;
1427     }
1428 
1429   if (offset + size > access->offset + access->size)
1430     {
1431       /* Likewise.  */
1432       gcc_checking_assert (offset > access->offset);
1433       return NULL;
1434     }
1435 
1436   gcc_unreachable ();
1437 }
1438 
1439 /* Return an access describing memory access to the variable described by DESC
1440    at OFFSET with SIZE in context CTX, mark it as used in context CTX.  Attempt
1441    to create if it does not exist, but fail and return NULL if there are
1442    already too many accesses, if it would create a partially overlapping access
1443    or if an access would end up in a non-call access.  */
1444 
1445 static gensum_param_access *
get_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1446 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1447 	    isra_scan_context ctx)
1448 {
1449   gcc_checking_assert (desc->split_candidate);
1450 
1451   gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1452 					      size, ctx);
1453   if (!access)
1454     {
1455       disqualify_split_candidate (desc,
1456 				  "Bad access overlap or too many accesses");
1457       return NULL;
1458     }
1459 
1460   switch (ctx)
1461     {
1462     case ISRA_CTX_STORE:
1463       gcc_assert (!desc->by_ref);
1464       /* Fall-through */
1465     case ISRA_CTX_LOAD:
1466       access->nonarg = true;
1467       break;
1468     case ISRA_CTX_ARG:
1469       break;
1470     }
1471 
1472   return access;
1473 }
1474 
1475 /* Verify that parameter access tree starting with ACCESS is in good shape.
1476    PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1477    ACCESS or zero if there is none.  */
1478 
1479 static bool
verify_access_tree_1(gensum_param_access * access,HOST_WIDE_INT parent_offset,HOST_WIDE_INT parent_size)1480 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1481 		      HOST_WIDE_INT parent_size)
1482 {
1483   while (access)
1484     {
1485       gcc_assert (access->offset >= 0 && access->size >= 0);
1486 
1487       if (parent_size != 0)
1488 	{
1489 	  if (access->offset < parent_offset)
1490 	    {
1491 	      error ("Access offset before parent offset");
1492 	      return true;
1493 	    }
1494 	  if (access->size >= parent_size)
1495 	    {
1496 	      error ("Access size greater or equal to its parent size");
1497 	      return true;
1498 	    }
1499 	  if (access->offset + access->size > parent_offset + parent_size)
1500 	    {
1501 	      error ("Access terminates outside of its parent");
1502 	      return true;
1503 	    }
1504 	}
1505 
1506       if (verify_access_tree_1 (access->first_child, access->offset,
1507 				access->size))
1508 	return true;
1509 
1510       if (access->next_sibling
1511 	  && (access->next_sibling->offset < access->offset + access->size))
1512 	{
1513 	  error ("Access overlaps with its sibling");
1514 	  return true;
1515 	}
1516 
1517       access = access->next_sibling;
1518     }
1519   return false;
1520 }
1521 
1522 /* Verify that parameter access tree starting with ACCESS is in good shape,
1523    halt compilation and dump the tree to stderr if not.  */
1524 
1525 DEBUG_FUNCTION void
isra_verify_access_tree(gensum_param_access * access)1526 isra_verify_access_tree (gensum_param_access *access)
1527 {
1528   if (verify_access_tree_1 (access, 0, 0))
1529     {
1530       for (; access; access = access->next_sibling)
1531         dump_gensum_access (stderr, access, 2);
1532       internal_error ("IPA-SRA access verification failed");
1533     }
1534 }
1535 
1536 
1537 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1538    GIMPLE_ASM operands with memory constrains which cannot be scalarized.  */
1539 
1540 static bool
asm_visit_addr(gimple *,tree op,tree,void *)1541 asm_visit_addr (gimple *, tree op, tree, void *)
1542 {
1543   op = get_base_address (op);
1544   if (op
1545       && TREE_CODE (op) == PARM_DECL)
1546     disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1547 
1548   return false;
1549 }
1550 
1551 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1552    basic block BB, unless the BB has already been marked as a potentially
1553    final.  */
1554 
1555 static void
mark_param_dereference(gensum_param_desc * desc,HOST_WIDE_INT dist,basic_block bb)1556 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1557 		       basic_block bb)
1558 {
1559   gcc_assert (desc->by_ref);
1560   gcc_checking_assert (desc->split_candidate);
1561 
1562   if (bitmap_bit_p (final_bbs, bb->index))
1563     return;
1564 
1565   int idx = bb->index * by_ref_count + desc->deref_index;
1566   if (bb_dereferences[idx] < dist)
1567     bb_dereferences[idx] = dist;
1568 }
1569 
1570 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1571    previously recorded OLD_TYPE.  */
1572 
1573 static bool
type_prevails_p(tree old_type,tree new_type)1574 type_prevails_p (tree old_type, tree new_type)
1575 {
1576   if (old_type == new_type)
1577     return false;
1578 
1579   /* Non-aggregates are always better.  */
1580   if (!is_gimple_reg_type (old_type)
1581       && is_gimple_reg_type (new_type))
1582     return true;
1583   if (is_gimple_reg_type (old_type)
1584       && !is_gimple_reg_type (new_type))
1585     return false;
1586 
1587   /* Prefer any complex or vector type over any other scalar type.  */
1588   if (TREE_CODE (old_type) != COMPLEX_TYPE
1589       && TREE_CODE (old_type) != VECTOR_TYPE
1590       && (TREE_CODE (new_type) == COMPLEX_TYPE
1591 	  || TREE_CODE (new_type) == VECTOR_TYPE))
1592     return true;
1593   if ((TREE_CODE (old_type) == COMPLEX_TYPE
1594        || TREE_CODE (old_type) == VECTOR_TYPE)
1595       && TREE_CODE (new_type) != COMPLEX_TYPE
1596       && TREE_CODE (new_type) != VECTOR_TYPE)
1597     return false;
1598 
1599   /* Use the integral type with the bigger precision.  */
1600   if (INTEGRAL_TYPE_P (old_type)
1601       && INTEGRAL_TYPE_P (new_type))
1602     return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1603 
1604   /* Attempt to disregard any integral type with non-full precision.  */
1605   if (INTEGRAL_TYPE_P (old_type)
1606       && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1607 	  != TYPE_PRECISION (old_type)))
1608     return true;
1609   if (INTEGRAL_TYPE_P (new_type)
1610       && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1611 	  != TYPE_PRECISION (new_type)))
1612     return false;
1613   /* Stabilize the selection.  */
1614   return TYPE_UID (old_type) < TYPE_UID (new_type);
1615 }
1616 
1617 /* When scanning an expression which is a call argument, this structure
1618    specifies the call and the position of the argument.  */
1619 
1620 struct scan_call_info
1621 {
1622   /* Call graph edge representing the call. */
1623   cgraph_edge *cs;
1624   /* Total number of arguments in the call.  */
1625   unsigned argument_count;
1626   /* Number of the actual argument being scanned.  */
1627   unsigned arg_idx;
1628 };
1629 
1630 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1631    call argument described by CALL_INFO.  */
1632 
1633 static void
record_nonregister_call_use(gensum_param_desc * desc,scan_call_info * call_info,unsigned unit_offset,unsigned unit_size)1634 record_nonregister_call_use (gensum_param_desc *desc,
1635 			     scan_call_info *call_info,
1636 			     unsigned unit_offset, unsigned unit_size)
1637 {
1638   isra_call_summary *csum = call_sums->get_create (call_info->cs);
1639   csum->init_inputs (call_info->argument_count);
1640 
1641   isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1642   param_flow->aggregate_pass_through = true;
1643   set_single_param_flow_source (param_flow, desc->param_number);
1644   param_flow->unit_offset = unit_offset;
1645   param_flow->unit_size = unit_size;
1646   desc->call_uses++;
1647 }
1648 
1649 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1650    modification.  */
1651 
1652 static bool
mark_maybe_modified(ao_ref *,tree,void * data)1653 mark_maybe_modified (ao_ref *, tree, void *data)
1654 {
1655   bool *maybe_modified = (bool *) data;
1656   *maybe_modified = true;
1657   return true;
1658 }
1659 
1660 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1661    specifies whether EXPR is used in a load, store or as an argument call. BB
1662    must be the basic block in which expr resides.  If CTX specifies call
1663    argument context, CALL_INFO must describe that call and argument position,
1664    otherwise it is ignored.  */
1665 
1666 static void
scan_expr_access(tree expr,gimple * stmt,isra_scan_context ctx,basic_block bb,scan_call_info * call_info=NULL)1667 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1668 		  basic_block bb, scan_call_info *call_info = NULL)
1669 {
1670   poly_int64 poffset, psize, pmax_size;
1671   HOST_WIDE_INT offset, size, max_size;
1672   tree base;
1673   bool deref = false;
1674   bool reverse;
1675 
1676   if (TREE_CODE (expr) == BIT_FIELD_REF
1677       || TREE_CODE (expr) == IMAGPART_EXPR
1678       || TREE_CODE (expr) == REALPART_EXPR)
1679     expr = TREE_OPERAND (expr, 0);
1680 
1681   base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1682 
1683   if (TREE_CODE (base) == MEM_REF)
1684     {
1685       tree op = TREE_OPERAND (base, 0);
1686       if (TREE_CODE (op) != SSA_NAME
1687 	  || !SSA_NAME_IS_DEFAULT_DEF (op))
1688 	return;
1689       base = SSA_NAME_VAR (op);
1690       if (!base)
1691 	return;
1692       deref = true;
1693     }
1694   if (TREE_CODE (base) != PARM_DECL)
1695     return;
1696 
1697   gensum_param_desc *desc = get_gensum_param_desc (base);
1698   if (!desc || !desc->split_candidate)
1699     return;
1700 
1701   if (!poffset.is_constant (&offset)
1702       || !psize.is_constant (&size)
1703       || !pmax_size.is_constant (&max_size))
1704     {
1705       disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1706 				  "access.");
1707       return;
1708     }
1709   if (size < 0 || size != max_size)
1710     {
1711       disqualify_split_candidate (desc, "Encountered a variable sized access.");
1712       return;
1713     }
1714   if (TREE_CODE (expr) == COMPONENT_REF
1715       && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1716     {
1717       disqualify_split_candidate (desc, "Encountered a bit-field access.");
1718       return;
1719     }
1720   if (offset < 0)
1721     {
1722       disqualify_split_candidate (desc, "Encountered an access at a "
1723 				  "negative offset.");
1724       return;
1725     }
1726   gcc_assert ((offset % BITS_PER_UNIT) == 0);
1727   gcc_assert ((size % BITS_PER_UNIT) == 0);
1728   if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1729       || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1730     {
1731       disqualify_split_candidate (desc, "Encountered an access with too big "
1732 				  "offset or size");
1733       return;
1734     }
1735 
1736   tree type = TREE_TYPE (expr);
1737   unsigned int exp_align = get_object_alignment (expr);
1738 
1739   if (exp_align < TYPE_ALIGN (type))
1740     {
1741       disqualify_split_candidate (desc, "Underaligned access.");
1742       return;
1743     }
1744 
1745   if (deref)
1746     {
1747       if (!desc->by_ref)
1748 	{
1749 	  disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1750 	  return;
1751 	}
1752       else if (ctx == ISRA_CTX_STORE)
1753 	{
1754 	  disqualify_split_candidate (desc, "Storing to data passed by "
1755 				      "reference.");
1756 	  return;
1757 	}
1758 
1759       if (!aa_walking_limit)
1760 	{
1761 	  disqualify_split_candidate (desc, "Out of alias analysis step "
1762 				      "limit.");
1763 	  return;
1764 	}
1765 
1766       gcc_checking_assert (gimple_vuse (stmt));
1767       bool maybe_modified = false;
1768       ao_ref ar;
1769 
1770       ao_ref_init (&ar, expr);
1771       bitmap visited = BITMAP_ALLOC (NULL);
1772       int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1773 				       mark_maybe_modified, &maybe_modified,
1774 				       &visited, NULL, aa_walking_limit);
1775       BITMAP_FREE (visited);
1776       if (walked > 0)
1777 	{
1778 	  gcc_assert (aa_walking_limit > walked);
1779 	  aa_walking_limit = aa_walking_limit - walked;
1780 	}
1781       if (walked < 0)
1782 	aa_walking_limit = 0;
1783       if (maybe_modified || walked < 0)
1784 	{
1785 	  disqualify_split_candidate (desc, "Data passed by reference possibly "
1786 				      "modified through an alias.");
1787 	  return;
1788 	}
1789       else
1790 	mark_param_dereference (desc, offset + size, bb);
1791     }
1792   else
1793     /* Pointer parameters with direct uses should have been ruled out by
1794        analyzing SSA default def when looking at the parameters.  */
1795     gcc_assert (!desc->by_ref);
1796 
1797   gensum_param_access *access = get_access (desc, offset, size, ctx);
1798   if (!access)
1799     return;
1800 
1801   if (ctx == ISRA_CTX_ARG)
1802     {
1803       gcc_checking_assert (call_info);
1804 
1805       if (!deref)
1806 	record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1807 				     size / BITS_PER_UNIT);
1808       else
1809 	/* This is not a pass-through of a pointer, this is a use like any
1810 	   other.  */
1811 	access->nonarg = true;
1812     }
1813 
1814   if (!access->type)
1815     {
1816       access->type = type;
1817       access->alias_ptr_type = reference_alias_ptr_type (expr);
1818       access->reverse = reverse;
1819     }
1820   else
1821     {
1822       if (exp_align < TYPE_ALIGN (access->type))
1823 	{
1824 	  disqualify_split_candidate (desc, "Reference has lower alignment "
1825 				      "than a previous one.");
1826 	  return;
1827 	}
1828       if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1829 	{
1830 	  disqualify_split_candidate (desc, "Multiple alias pointer types.");
1831 	  return;
1832 	}
1833       if (access->reverse != reverse)
1834 	{
1835 	  disqualify_split_candidate (desc, "Both normal and reverse "
1836 				      "scalar storage order.");
1837 	  return;
1838 	}
1839       if (!deref
1840 	  && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1841 	  && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1842 	{
1843 	  /* We need the same aggregate type on all accesses to be able to
1844 	     distinguish transformation spots from pass-through arguments in
1845 	     the transformation phase.  */
1846 	  disqualify_split_candidate (desc, "We do not support aggregate "
1847 				      "type punning.");
1848 	  return;
1849 	}
1850 
1851       if (type_prevails_p (access->type, type))
1852 	 access->type = type;
1853     }
1854 }
1855 
1856 /* Scan body function described by NODE and FUN and create access trees for
1857    parameters.  */
1858 
1859 static void
scan_function(cgraph_node * node,struct function * fun)1860 scan_function (cgraph_node *node, struct function *fun)
1861 {
1862   basic_block bb;
1863 
1864   FOR_EACH_BB_FN (bb, fun)
1865     {
1866       gimple_stmt_iterator gsi;
1867       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1868 	{
1869 	  gimple *stmt = gsi_stmt (gsi);
1870 
1871 	  if (stmt_can_throw_external (fun, stmt))
1872 	    bitmap_set_bit (final_bbs, bb->index);
1873 	  switch (gimple_code (stmt))
1874 	    {
1875 	    case GIMPLE_RETURN:
1876 	      {
1877 		tree t = gimple_return_retval (as_a <greturn *> (stmt));
1878 		if (t != NULL_TREE)
1879 		  scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1880 		bitmap_set_bit (final_bbs, bb->index);
1881 	      }
1882 	      break;
1883 
1884 	    case GIMPLE_ASSIGN:
1885 	      if (gimple_assign_single_p (stmt)
1886 		  && !gimple_clobber_p (stmt))
1887 		{
1888 		  tree rhs = gimple_assign_rhs1 (stmt);
1889 		  scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1890 		  tree lhs = gimple_assign_lhs (stmt);
1891 		  scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1892 		}
1893 	      break;
1894 
1895 	    case GIMPLE_CALL:
1896 	      {
1897 		unsigned argument_count = gimple_call_num_args (stmt);
1898 		isra_scan_context ctx = ISRA_CTX_ARG;
1899 		scan_call_info call_info, *call_info_p = &call_info;
1900 		if (gimple_call_internal_p (stmt))
1901 		  {
1902 		    call_info_p = NULL;
1903 		    ctx = ISRA_CTX_LOAD;
1904 		    internal_fn ifn = gimple_call_internal_fn (stmt);
1905 		    if (internal_store_fn_p (ifn))
1906 		      ctx = ISRA_CTX_STORE;
1907 		  }
1908 		else
1909 		  {
1910 		    call_info.cs = node->get_edge (stmt);
1911 		    call_info.argument_count = argument_count;
1912 		  }
1913 
1914 		for (unsigned i = 0; i < argument_count; i++)
1915 		  {
1916 		    call_info.arg_idx = i;
1917 		    scan_expr_access (gimple_call_arg (stmt, i), stmt,
1918 				      ctx, bb, call_info_p);
1919 		  }
1920 
1921 		tree lhs = gimple_call_lhs (stmt);
1922 		if (lhs)
1923 		  scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1924 		int flags = gimple_call_flags (stmt);
1925 		if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1926 		    || (flags & ECF_LOOPING_CONST_OR_PURE))
1927 		  bitmap_set_bit (final_bbs, bb->index);
1928 	      }
1929 	      break;
1930 
1931 	    case GIMPLE_ASM:
1932 	      {
1933 		gasm *asm_stmt = as_a <gasm *> (stmt);
1934 		walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1935 					       asm_visit_addr);
1936 		bitmap_set_bit (final_bbs, bb->index);
1937 
1938 		for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1939 		  {
1940 		    tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1941 		    scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1942 		  }
1943 		for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1944 		  {
1945 		    tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1946 		    scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1947 		  }
1948 	      }
1949 	      break;
1950 
1951 	    default:
1952 	      break;
1953 	    }
1954 	}
1955     }
1956 }
1957 
1958 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1959    return statements, or if results of any operations it is involved in are
1960    only used in return statements.  ANALYZED is a bitmap that tracks which SSA
1961    names we have already started investigating.  */
1962 
1963 static bool
ssa_name_only_returned_p(function * fun,tree name,bitmap analyzed)1964 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1965 {
1966   bool res = true;
1967   imm_use_iterator imm_iter;
1968   gimple *stmt;
1969 
1970   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1971     {
1972       if (is_gimple_debug (stmt))
1973 	continue;
1974 
1975       if (gimple_code (stmt) == GIMPLE_RETURN)
1976 	{
1977 	  tree t = gimple_return_retval (as_a <greturn *> (stmt));
1978 	  if (t != name)
1979 	    {
1980 	      res = false;
1981 	      break;
1982 	    }
1983 	}
1984       else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1985 	       && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1986 		   || gimple_code (stmt) == GIMPLE_PHI))
1987 	{
1988 	  /* TODO: And perhaps for const function calls too?  */
1989 	  tree lhs;
1990 	  if (gimple_code (stmt) == GIMPLE_PHI)
1991 	    lhs = gimple_phi_result (stmt);
1992 	  else
1993 	    lhs = gimple_assign_lhs (stmt);
1994 
1995 	  if (TREE_CODE (lhs) != SSA_NAME)
1996 	    {
1997 	      res = false;
1998 	      break;
1999 	    }
2000 	  gcc_assert (!gimple_vdef (stmt));
2001 	  if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2002 	      && !ssa_name_only_returned_p (fun, lhs, analyzed))
2003 	    {
2004 	      res = false;
2005 	      break;
2006 	    }
2007 	}
2008       else
2009 	{
2010 	  res = false;
2011 	  break;
2012 	}
2013     }
2014   return res;
2015 }
2016 
2017 /* Inspect the uses of the return value of the call associated with CS, and if
2018    it is not used or if it is only used to construct the return value of the
2019    caller, mark it as such in call or caller summary.  Also check for
2020    misaligned arguments.  */
2021 
2022 static void
isra_analyze_call(cgraph_edge * cs)2023 isra_analyze_call (cgraph_edge *cs)
2024 {
2025   gcall *call_stmt = cs->call_stmt;
2026   unsigned count = gimple_call_num_args (call_stmt);
2027   isra_call_summary *csum = call_sums->get_create (cs);
2028 
2029   for (unsigned i = 0; i < count; i++)
2030     {
2031       tree arg = gimple_call_arg (call_stmt, i);
2032       if (is_gimple_reg (arg))
2033 	continue;
2034 
2035       tree offset;
2036       poly_int64 bitsize, bitpos;
2037       machine_mode mode;
2038       int unsignedp, reversep, volatilep = 0;
2039       get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2040 			   &unsignedp, &reversep, &volatilep);
2041       if (!multiple_p (bitpos, BITS_PER_UNIT))
2042 	{
2043 	  csum->m_bit_aligned_arg = true;
2044 	  break;
2045 	}
2046     }
2047 
2048   tree lhs = gimple_call_lhs (call_stmt);
2049   if (lhs)
2050     {
2051       /* TODO: Also detect aggregates on a LHS of a call that are only returned
2052 	 from this function (without being read anywhere).  */
2053       if (TREE_CODE (lhs) == SSA_NAME)
2054 	{
2055 	  bitmap analyzed = BITMAP_ALLOC (NULL);
2056 	  if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2057 					lhs, analyzed))
2058 	    csum->m_return_returned = true;
2059 	  BITMAP_FREE (analyzed);
2060 	}
2061     }
2062   else
2063     csum->m_return_ignored = true;
2064 }
2065 
2066 /* Look at all calls going out of NODE, described also by IFS and perform all
2067    analyses necessary for IPA-SRA that are not done at body scan time or done
2068    even when body is not scanned because the function is not a candidate.  */
2069 
2070 static void
isra_analyze_all_outgoing_calls(cgraph_node * node)2071 isra_analyze_all_outgoing_calls (cgraph_node *node)
2072 {
2073   for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2074     isra_analyze_call (cs);
2075   for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2076     isra_analyze_call (cs);
2077 }
2078 
2079 /* Dump a dereferences table with heading STR to file F.  */
2080 
2081 static void
dump_dereferences_table(FILE * f,struct function * fun,const char * str)2082 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2083 {
2084   basic_block bb;
2085 
2086   fprintf (dump_file, "%s", str);
2087   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2088 		  EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2089     {
2090       fprintf (f, "%4i  %i   ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2091       if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2092 	{
2093 	  int i;
2094 	  for (i = 0; i < by_ref_count; i++)
2095 	    {
2096 	      int idx = bb->index * by_ref_count + i;
2097 	      fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2098 	    }
2099 	}
2100       fprintf (f, "\n");
2101     }
2102   fprintf (dump_file, "\n");
2103 }
2104 
2105 /* Propagate distances in bb_dereferences in the opposite direction than the
2106    control flow edges, in each step storing the maximum of the current value
2107    and the minimum of all successors.  These steps are repeated until the table
2108    stabilizes.  Note that BBs which might terminate the functions (according to
2109    final_bbs bitmap) never updated in this way.  */
2110 
2111 static void
propagate_dereference_distances(struct function * fun)2112 propagate_dereference_distances (struct function *fun)
2113 {
2114   basic_block bb;
2115 
2116   if (dump_file && (dump_flags & TDF_DETAILS))
2117     dump_dereferences_table (dump_file, fun,
2118 			     "Dereference table before propagation:\n");
2119 
2120   auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2121   queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2122   FOR_EACH_BB_FN (bb, fun)
2123     {
2124       queue.quick_push (bb);
2125       bb->aux = bb;
2126     }
2127 
2128   while (!queue.is_empty ())
2129     {
2130       edge_iterator ei;
2131       edge e;
2132       bool change = false;
2133       int i;
2134 
2135       bb = queue.pop ();
2136       bb->aux = NULL;
2137 
2138       if (bitmap_bit_p (final_bbs, bb->index))
2139 	continue;
2140 
2141       for (i = 0; i < by_ref_count; i++)
2142 	{
2143 	  int idx = bb->index * by_ref_count + i;
2144 	  bool first = true;
2145 	  HOST_WIDE_INT inh = 0;
2146 
2147 	  FOR_EACH_EDGE (e, ei, bb->succs)
2148 	  {
2149 	    int succ_idx = e->dest->index * by_ref_count + i;
2150 
2151 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2152 	      continue;
2153 
2154 	    if (first)
2155 	      {
2156 		first = false;
2157 		inh = bb_dereferences [succ_idx];
2158 	      }
2159 	    else if (bb_dereferences [succ_idx] < inh)
2160 	      inh = bb_dereferences [succ_idx];
2161 	  }
2162 
2163 	  if (!first && bb_dereferences[idx] < inh)
2164 	    {
2165 	      bb_dereferences[idx] = inh;
2166 	      change = true;
2167 	    }
2168 	}
2169 
2170       if (change)
2171 	FOR_EACH_EDGE (e, ei, bb->preds)
2172 	  {
2173 	    if (e->src->aux)
2174 	      continue;
2175 
2176 	    e->src->aux = e->src;
2177 	    queue.quick_push (e->src);
2178 	  }
2179     }
2180 
2181   if (dump_file && (dump_flags & TDF_DETAILS))
2182     dump_dereferences_table (dump_file, fun,
2183 			     "Dereference table after propagation:\n");
2184 }
2185 
2186 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2187    children, return true if the parameter cannot be split, otherwise return
2188    true and update *TOTAL_SIZE and *ONLY_CALLS.  ENTRY_BB_INDEX must be the
2189    index of the entry BB in the function of PARM.  */
2190 
2191 static bool
check_gensum_access(tree parm,gensum_param_desc * desc,gensum_param_access * access,HOST_WIDE_INT * nonarg_acc_size,bool * only_calls,int entry_bb_index)2192 check_gensum_access (tree parm, gensum_param_desc *desc,
2193 		     gensum_param_access *access,
2194 		     HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2195 		     int entry_bb_index)
2196 {
2197   if (access->nonarg)
2198     {
2199       *only_calls = false;
2200       *nonarg_acc_size += access->size;
2201 
2202       if (access->first_child)
2203 	{
2204 	  disqualify_split_candidate (desc, "Overlapping non-call uses.");
2205 	  return true;
2206 	}
2207     }
2208   /* Do not decompose a non-BLKmode param in a way that would create
2209      BLKmode params.  Especially for by-reference passing (thus,
2210      pointer-type param) this is hardly worthwhile.  */
2211   if (DECL_MODE (parm) != BLKmode
2212       && TYPE_MODE (access->type) == BLKmode)
2213     {
2214       disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2215       return true;
2216     }
2217 
2218   if (desc->by_ref)
2219     {
2220       int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2221       if ((access->offset + access->size) > bb_dereferences[idx])
2222 	{
2223 	  disqualify_split_candidate (desc, "Would create a possibly "
2224 				      "illegal dereference in a caller.");
2225 	  return true;
2226 	}
2227     }
2228 
2229   for (gensum_param_access *ch = access->first_child;
2230        ch;
2231        ch = ch->next_sibling)
2232     if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2233 			     entry_bb_index))
2234       return true;
2235 
2236   return false;
2237 }
2238 
2239 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2240    descriptor DESC.  */
2241 
2242 static void
copy_accesses_to_ipa_desc(gensum_param_access * from,isra_param_desc * desc)2243 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2244 {
2245   param_access *to = ggc_cleared_alloc<param_access> ();
2246   gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2247   gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2248   to->unit_offset = from->offset / BITS_PER_UNIT;
2249   to->unit_size = from->size / BITS_PER_UNIT;
2250   to->type = from->type;
2251   to->alias_ptr_type = from->alias_ptr_type;
2252   to->certain = from->nonarg;
2253   to->reverse = from->reverse;
2254   vec_safe_push (desc->accesses, to);
2255 
2256   for (gensum_param_access *ch = from->first_child;
2257        ch;
2258        ch = ch->next_sibling)
2259     copy_accesses_to_ipa_desc (ch, desc);
2260 }
2261 
2262 /* Analyze function body scan results stored in param_accesses and
2263    param_accesses, detect possible transformations and store information of
2264    those in function summary.  NODE, FUN and IFS are all various structures
2265    describing the currently analyzed function.  */
2266 
2267 static void
process_scan_results(cgraph_node * node,struct function * fun,isra_func_summary * ifs,vec<gensum_param_desc> * param_descriptions)2268 process_scan_results (cgraph_node *node, struct function *fun,
2269 		      isra_func_summary *ifs,
2270 		      vec<gensum_param_desc> *param_descriptions)
2271 {
2272   bool check_pass_throughs = false;
2273   bool dereferences_propagated = false;
2274   tree parm = DECL_ARGUMENTS (node->decl);
2275   unsigned param_count = param_descriptions->length();
2276 
2277   for (unsigned desc_index = 0;
2278        desc_index < param_count;
2279        desc_index++, parm = DECL_CHAIN (parm))
2280     {
2281       gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2282       if (!desc->split_candidate)
2283 	continue;
2284 
2285       if (flag_checking)
2286 	isra_verify_access_tree (desc->accesses);
2287 
2288       if (!dereferences_propagated
2289 	  && desc->by_ref
2290 	  && desc->accesses)
2291 	{
2292 	  propagate_dereference_distances (fun);
2293 	  dereferences_propagated = true;
2294 	}
2295 
2296       HOST_WIDE_INT nonarg_acc_size = 0;
2297       bool only_calls = true;
2298       bool check_failed = false;
2299 
2300       int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2301       for (gensum_param_access *acc = desc->accesses;
2302 	   acc;
2303 	   acc = acc->next_sibling)
2304 	if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2305 				 entry_bb_index))
2306 	  {
2307 	    check_failed = true;
2308 	    break;
2309 	  }
2310       if (check_failed)
2311 	continue;
2312 
2313       if (only_calls)
2314 	desc->locally_unused = true;
2315 
2316       HOST_WIDE_INT cur_param_size
2317 	= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2318       HOST_WIDE_INT param_size_limit;
2319       if (!desc->by_ref || optimize_function_for_size_p (fun))
2320 	param_size_limit = cur_param_size;
2321       else
2322 	  param_size_limit
2323 	    = opt_for_fn (node->decl,
2324 			  param_ipa_sra_ptr_growth_factor) * cur_param_size;
2325       if (nonarg_acc_size > param_size_limit
2326 	  || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2327 	{
2328 	  disqualify_split_candidate (desc, "Would result into a too big set "
2329 				      "of replacements.");
2330 	}
2331       else
2332 	{
2333 	  /* create_parameter_descriptors makes sure unit sizes of all
2334 	     candidate parameters fit unsigned integers restricted to
2335 	     ISRA_ARG_SIZE_LIMIT.  */
2336 	  desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2337 	  desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2338 	  if (desc->split_candidate && desc->ptr_pt_count)
2339 	    {
2340 	      gcc_assert (desc->by_ref);
2341 	      check_pass_throughs = true;
2342 	    }
2343 	}
2344     }
2345 
2346   /* When a pointer parameter is passed-through to a callee, in which it is
2347      only used to read only one or a few items, we can attempt to transform it
2348      to obtaining and passing through the items instead of the pointer.  But we
2349      must take extra care that 1) we do not introduce any segfault by moving
2350      dereferences above control flow and that 2) the data is not modified
2351      through an alias in this function.  The IPA analysis must not introduce
2352      any accesses candidates unless it can prove both.
2353 
2354      The current solution is very crude as it consists of ensuring that the
2355      call postdominates entry BB and that the definition of VUSE of the call is
2356      default definition.  TODO: For non-recursive callees in the same
2357      compilation unit we could do better by doing analysis in topological order
2358      an looking into access candidates of callees, using their alias_ptr_types
2359      to attempt real AA.  We could also use the maximum known dereferenced
2360      offset in this function at IPA level.
2361 
2362      TODO: Measure the overhead and the effect of just being pessimistic.
2363      Maybe this is only -O3 material?  */
2364 
2365   bool pdoms_calculated = false;
2366   if (check_pass_throughs)
2367     for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2368       {
2369 	gcall *call_stmt = cs->call_stmt;
2370 	tree vuse = gimple_vuse (call_stmt);
2371 
2372 	/* If the callee is a const function, we don't get a VUSE.  In such
2373 	   case there will be no memory accesses in the called function (or the
2374 	   const attribute is wrong) and then we just don't care.  */
2375 	bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2376 
2377 	unsigned count = gimple_call_num_args (call_stmt);
2378 	isra_call_summary *csum = call_sums->get_create (cs);
2379 	csum->init_inputs (count);
2380 	csum->m_before_any_store = uses_memory_as_obtained;
2381 	for (unsigned argidx = 0; argidx < count; argidx++)
2382 	  {
2383 	    if (!csum->m_arg_flow[argidx].pointer_pass_through)
2384 	      continue;
2385 	    unsigned pidx
2386 	      = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2387 	    gensum_param_desc *desc = &(*param_descriptions)[pidx];
2388 	    if (!desc->split_candidate)
2389 	      {
2390 		csum->m_arg_flow[argidx].pointer_pass_through = false;
2391 		continue;
2392 	      }
2393 	    if (!uses_memory_as_obtained)
2394 	      continue;
2395 
2396 	    /* Post-dominator check placed last, hoping that it usually won't
2397 	       be needed.  */
2398 	    if (!pdoms_calculated)
2399 	      {
2400 		gcc_checking_assert (cfun);
2401 		connect_infinite_loops_to_exit ();
2402 		calculate_dominance_info (CDI_POST_DOMINATORS);
2403 		pdoms_calculated = true;
2404 	      }
2405 	    if (dominated_by_p (CDI_POST_DOMINATORS,
2406 				gimple_bb (call_stmt),
2407 				single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2408 	      csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2409 	  }
2410 
2411       }
2412   if (pdoms_calculated)
2413     {
2414       free_dominance_info (CDI_POST_DOMINATORS);
2415       remove_fake_exit_edges ();
2416     }
2417 
2418   /* TODO: Add early exit if we disqualified everything.  This also requires
2419      that we either relax the restriction that
2420      ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2421      or store the number of parameters to IPA-SRA function summary and use that
2422      when just removing params.  */
2423 
2424   vec_safe_reserve_exact (ifs->m_parameters, param_count);
2425   ifs->m_parameters->quick_grow_cleared (param_count);
2426   for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2427     {
2428       gensum_param_desc *s = &(*param_descriptions)[desc_index];
2429       isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2430 
2431       d->param_size_limit = s->param_size_limit;
2432       d->size_reached = s->nonarg_acc_size;
2433       d->locally_unused = s->locally_unused;
2434       d->split_candidate = s->split_candidate;
2435       d->by_ref = s->by_ref;
2436 
2437       for (gensum_param_access *acc = s->accesses;
2438 	   acc;
2439 	   acc = acc->next_sibling)
2440 	copy_accesses_to_ipa_desc (acc, d);
2441     }
2442 
2443   if (dump_file)
2444     dump_isra_param_descriptors (dump_file, node->decl, ifs);
2445 }
2446 
2447 /* Return true if there are any overlaps among certain accesses of DESC.  If
2448    non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2449    too.  DESC is assumed to be a split candidate that is not locally
2450    unused.  */
2451 
2452 static bool
overlapping_certain_accesses_p(isra_param_desc * desc,bool * certain_access_present_p)2453 overlapping_certain_accesses_p (isra_param_desc *desc,
2454 				bool *certain_access_present_p)
2455 {
2456   unsigned pclen = vec_safe_length (desc->accesses);
2457   for (unsigned i = 0; i < pclen; i++)
2458     {
2459       param_access *a1 = (*desc->accesses)[i];
2460 
2461       if (!a1->certain)
2462 	continue;
2463       if (certain_access_present_p)
2464 	*certain_access_present_p = true;
2465       for (unsigned j = i + 1; j < pclen; j++)
2466 	{
2467 	  param_access *a2 = (*desc->accesses)[j];
2468 	  if (a2->certain
2469 	      && a1->unit_offset < a2->unit_offset + a2->unit_size
2470 	      && a1->unit_offset + a1->unit_size > a2->unit_offset)
2471 	    return true;
2472 	}
2473     }
2474   return false;
2475 }
2476 
2477 /* Check for any overlaps of certain param accesses among splitting candidates
2478    and signal an ICE if there are any.  If CERTAIN_MUST_EXIST is set, also
2479    check that used splitting candidates have at least one certain access.  */
2480 
2481 static void
verify_splitting_accesses(cgraph_node * node,bool certain_must_exist)2482 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2483 {
2484   isra_func_summary *ifs = func_sums->get (node);
2485   if (!ifs || !ifs->m_candidate)
2486     return;
2487   unsigned param_count = vec_safe_length (ifs->m_parameters);
2488   for (unsigned pidx = 0; pidx < param_count; pidx++)
2489     {
2490       isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2491       if (!desc->split_candidate || desc->locally_unused)
2492 	continue;
2493 
2494       bool certain_access_present = !certain_must_exist;
2495       if (overlapping_certain_accesses_p (desc, &certain_access_present))
2496 	internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2497 			"which overlap", node->dump_name (), pidx);
2498       if (!certain_access_present)
2499 	internal_error ("function %qs, parameter %u, is used but does not "
2500 			"have any certain IPA-SRA access",
2501 			node->dump_name (), pidx);
2502     }
2503 }
2504 
2505 /* Intraprocedural part of IPA-SRA analysis.  Scan bodies of all functions in
2506    this compilation unit and create summary structures describing IPA-SRA
2507    opportunities and constraints in them.  */
2508 
2509 static void
ipa_sra_generate_summary(void)2510 ipa_sra_generate_summary (void)
2511 {
2512   struct cgraph_node *node;
2513 
2514   gcc_checking_assert (!func_sums);
2515   gcc_checking_assert (!call_sums);
2516   func_sums
2517     = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2518        ipa_sra_function_summaries (symtab, true));
2519   call_sums = new ipa_sra_call_summaries (symtab);
2520 
2521   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2522     ipa_sra_summarize_function (node);
2523   return;
2524 }
2525 
2526 /* Write intraprocedural analysis information about E and all of its outgoing
2527    edges into a stream for LTO WPA.  */
2528 
2529 static void
isra_write_edge_summary(output_block * ob,cgraph_edge * e)2530 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2531 {
2532   isra_call_summary *csum = call_sums->get (e);
2533   unsigned input_count = csum->m_arg_flow.length ();
2534   streamer_write_uhwi (ob, input_count);
2535   for (unsigned i = 0; i < input_count; i++)
2536     {
2537       isra_param_flow *ipf = &csum->m_arg_flow[i];
2538       streamer_write_hwi (ob, ipf->length);
2539       bitpack_d bp = bitpack_create (ob->main_stream);
2540       for (int j = 0; j < ipf->length; j++)
2541 	bp_pack_value (&bp, ipf->inputs[j], 8);
2542       bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2543       bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2544       bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2545       streamer_write_bitpack (&bp);
2546       streamer_write_uhwi (ob, ipf->unit_offset);
2547       streamer_write_uhwi (ob, ipf->unit_size);
2548     }
2549   bitpack_d bp = bitpack_create (ob->main_stream);
2550   bp_pack_value (&bp, csum->m_return_ignored, 1);
2551   bp_pack_value (&bp, csum->m_return_returned, 1);
2552   bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2553   bp_pack_value (&bp, csum->m_before_any_store, 1);
2554   streamer_write_bitpack (&bp);
2555 }
2556 
2557 /* Write intraprocedural analysis information about NODE and all of its outgoing
2558    edges into a stream for LTO WPA.  */
2559 
2560 static void
isra_write_node_summary(output_block * ob,cgraph_node * node)2561 isra_write_node_summary (output_block *ob, cgraph_node *node)
2562 {
2563   isra_func_summary *ifs = func_sums->get (node);
2564   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2565   int node_ref = lto_symtab_encoder_encode (encoder, node);
2566   streamer_write_uhwi (ob, node_ref);
2567 
2568   unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2569   streamer_write_uhwi (ob, param_desc_count);
2570   for (unsigned i = 0; i < param_desc_count; i++)
2571     {
2572       isra_param_desc *desc = &(*ifs->m_parameters)[i];
2573       unsigned access_count = vec_safe_length (desc->accesses);
2574       streamer_write_uhwi (ob, access_count);
2575       for (unsigned j = 0; j < access_count; j++)
2576 	{
2577 	  param_access *acc = (*desc->accesses)[j];
2578 	  stream_write_tree (ob, acc->type, true);
2579 	  stream_write_tree (ob, acc->alias_ptr_type, true);
2580 	  streamer_write_uhwi (ob, acc->unit_offset);
2581 	  streamer_write_uhwi (ob, acc->unit_size);
2582 	  bitpack_d bp = bitpack_create (ob->main_stream);
2583 	  bp_pack_value (&bp, acc->certain, 1);
2584 	  bp_pack_value (&bp, acc->reverse, 1);
2585 	  streamer_write_bitpack (&bp);
2586 	}
2587       streamer_write_uhwi (ob, desc->param_size_limit);
2588       streamer_write_uhwi (ob, desc->size_reached);
2589       bitpack_d bp = bitpack_create (ob->main_stream);
2590       bp_pack_value (&bp, desc->locally_unused, 1);
2591       bp_pack_value (&bp, desc->split_candidate, 1);
2592       bp_pack_value (&bp, desc->by_ref, 1);
2593       streamer_write_bitpack (&bp);
2594     }
2595   bitpack_d bp = bitpack_create (ob->main_stream);
2596   bp_pack_value (&bp, ifs->m_candidate, 1);
2597   bp_pack_value (&bp, ifs->m_returns_value, 1);
2598   bp_pack_value (&bp, ifs->m_return_ignored, 1);
2599   gcc_assert (!ifs->m_queued);
2600   streamer_write_bitpack (&bp);
2601 
2602   for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2603     isra_write_edge_summary (ob, e);
2604   for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2605     isra_write_edge_summary (ob, e);
2606 }
2607 
2608 /* Write intraprocedural analysis information into a stream for LTO WPA.  */
2609 
2610 static void
ipa_sra_write_summary(void)2611 ipa_sra_write_summary (void)
2612 {
2613   if (!func_sums || !call_sums)
2614     return;
2615 
2616   struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2617   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2618   ob->symbol = NULL;
2619 
2620   unsigned int count = 0;
2621   lto_symtab_encoder_iterator lsei;
2622   for (lsei = lsei_start_function_in_partition (encoder);
2623        !lsei_end_p (lsei);
2624        lsei_next_function_in_partition (&lsei))
2625     {
2626       cgraph_node *node = lsei_cgraph_node (lsei);
2627       if (node->has_gimple_body_p ()
2628 	  && func_sums->get (node) != NULL)
2629 	count++;
2630     }
2631   streamer_write_uhwi (ob, count);
2632 
2633   /* Process all of the functions.  */
2634   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2635        lsei_next_function_in_partition (&lsei))
2636     {
2637       cgraph_node *node = lsei_cgraph_node (lsei);
2638       if (node->has_gimple_body_p ()
2639 	  && func_sums->get (node) != NULL)
2640         isra_write_node_summary (ob, node);
2641     }
2642   streamer_write_char_stream (ob->main_stream, 0);
2643   produce_asm (ob, NULL);
2644   destroy_output_block (ob);
2645 }
2646 
2647 /* Read intraprocedural analysis information about E and all of its outgoing
2648    edges into a stream for LTO WPA.  */
2649 
2650 static void
isra_read_edge_summary(struct lto_input_block * ib,cgraph_edge * cs)2651 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2652 {
2653   isra_call_summary *csum = call_sums->get_create (cs);
2654   unsigned input_count = streamer_read_uhwi (ib);
2655   csum->init_inputs (input_count);
2656   for (unsigned i = 0; i < input_count; i++)
2657     {
2658       isra_param_flow *ipf = &csum->m_arg_flow[i];
2659       ipf->length = streamer_read_hwi (ib);
2660       bitpack_d bp = streamer_read_bitpack (ib);
2661       for (int j = 0; j < ipf->length; j++)
2662 	ipf->inputs[j] = bp_unpack_value (&bp, 8);
2663       ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2664       ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2665       ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2666       ipf->unit_offset = streamer_read_uhwi (ib);
2667       ipf->unit_size = streamer_read_uhwi (ib);
2668     }
2669   bitpack_d bp = streamer_read_bitpack (ib);
2670   csum->m_return_ignored = bp_unpack_value (&bp, 1);
2671   csum->m_return_returned = bp_unpack_value (&bp, 1);
2672   csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2673   csum->m_before_any_store = bp_unpack_value (&bp, 1);
2674 }
2675 
2676 /* Read intraprocedural analysis information about NODE and all of its outgoing
2677    edges into a stream for LTO WPA.  */
2678 
2679 static void
isra_read_node_info(struct lto_input_block * ib,cgraph_node * node,struct data_in * data_in)2680 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2681 		     struct data_in *data_in)
2682 {
2683   isra_func_summary *ifs = func_sums->get_create (node);
2684   unsigned param_desc_count = streamer_read_uhwi (ib);
2685   if (param_desc_count > 0)
2686     {
2687       vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2688       ifs->m_parameters->quick_grow_cleared (param_desc_count);
2689     }
2690   for (unsigned i = 0; i < param_desc_count; i++)
2691     {
2692       isra_param_desc *desc = &(*ifs->m_parameters)[i];
2693       unsigned access_count = streamer_read_uhwi (ib);
2694       for (unsigned j = 0; j < access_count; j++)
2695 	{
2696 	  param_access *acc = ggc_cleared_alloc<param_access> ();
2697 	  acc->type = stream_read_tree (ib, data_in);
2698 	  acc->alias_ptr_type = stream_read_tree (ib, data_in);
2699 	  acc->unit_offset = streamer_read_uhwi (ib);
2700 	  acc->unit_size = streamer_read_uhwi (ib);
2701 	  bitpack_d bp = streamer_read_bitpack (ib);
2702 	  acc->certain = bp_unpack_value (&bp, 1);
2703 	  acc->reverse = bp_unpack_value (&bp, 1);
2704 	  vec_safe_push (desc->accesses, acc);
2705 	}
2706       desc->param_size_limit = streamer_read_uhwi (ib);
2707       desc->size_reached = streamer_read_uhwi (ib);
2708       bitpack_d bp = streamer_read_bitpack (ib);
2709       desc->locally_unused = bp_unpack_value (&bp, 1);
2710       desc->split_candidate = bp_unpack_value (&bp, 1);
2711       desc->by_ref = bp_unpack_value (&bp, 1);
2712     }
2713   bitpack_d bp = streamer_read_bitpack (ib);
2714   ifs->m_candidate = bp_unpack_value (&bp, 1);
2715   ifs->m_returns_value = bp_unpack_value (&bp, 1);
2716   ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2717   ifs->m_queued = 0;
2718 
2719   for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2720     isra_read_edge_summary (ib, e);
2721   for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2722     isra_read_edge_summary (ib, e);
2723 }
2724 
2725 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2726    data DATA.  TODO: This function was copied almost verbatim from ipa-prop.cc,
2727    it should be possible to unify them somehow.  */
2728 
2729 static void
isra_read_summary_section(struct lto_file_decl_data * file_data,const char * data,size_t len)2730 isra_read_summary_section (struct lto_file_decl_data *file_data,
2731 			   const char *data, size_t len)
2732 {
2733   const struct lto_function_header *header =
2734     (const struct lto_function_header *) data;
2735   const int cfg_offset = sizeof (struct lto_function_header);
2736   const int main_offset = cfg_offset + header->cfg_size;
2737   const int string_offset = main_offset + header->main_size;
2738   struct data_in *data_in;
2739   unsigned int i;
2740   unsigned int count;
2741 
2742   lto_input_block ib_main ((const char *) data + main_offset,
2743 			   header->main_size, file_data->mode_table);
2744 
2745   data_in =
2746     lto_data_in_create (file_data, (const char *) data + string_offset,
2747 			header->string_size, vNULL);
2748   count = streamer_read_uhwi (&ib_main);
2749 
2750   for (i = 0; i < count; i++)
2751     {
2752       unsigned int index;
2753       struct cgraph_node *node;
2754       lto_symtab_encoder_t encoder;
2755 
2756       index = streamer_read_uhwi (&ib_main);
2757       encoder = file_data->symtab_node_encoder;
2758       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2759 								index));
2760       gcc_assert (node->definition);
2761       isra_read_node_info (&ib_main, node, data_in);
2762     }
2763   lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2764 			 len);
2765   lto_data_in_delete (data_in);
2766 }
2767 
2768 /* Read intraprocedural analysis information into a stream for LTO WPA.  */
2769 
2770 static void
ipa_sra_read_summary(void)2771 ipa_sra_read_summary (void)
2772 {
2773   struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2774   struct lto_file_decl_data *file_data;
2775   unsigned int j = 0;
2776 
2777   gcc_checking_assert (!func_sums);
2778   gcc_checking_assert (!call_sums);
2779   func_sums
2780     = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2781        ipa_sra_function_summaries (symtab, true));
2782   call_sums = new ipa_sra_call_summaries (symtab);
2783 
2784   while ((file_data = file_data_vec[j++]))
2785     {
2786       size_t len;
2787       const char *data
2788 	= lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2789       if (data)
2790         isra_read_summary_section (file_data, data, len);
2791     }
2792 }
2793 
2794 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F.  */
2795 
2796 static void
ipa_sra_dump_all_summaries(FILE * f)2797 ipa_sra_dump_all_summaries (FILE *f)
2798 {
2799   cgraph_node *node;
2800   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2801     {
2802       fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2803 
2804       isra_func_summary *ifs = func_sums->get (node);
2805       if (!ifs)
2806 	fprintf (f, "  Function does not have any associated IPA-SRA "
2807 		 "summary\n");
2808       else
2809 	{
2810 	  if (!ifs->m_candidate)
2811 	    {
2812 	      fprintf (f, "  Not a candidate function\n");
2813 	      continue;
2814 	    }
2815 	  if (ifs->m_returns_value)
2816 	    fprintf (f, "  Returns value\n");
2817 	  if (vec_safe_is_empty (ifs->m_parameters))
2818 	    fprintf (f, "  No parameter information. \n");
2819 	  else
2820 	    for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2821 	      {
2822 		fprintf (f, "  Descriptor for parameter %i:\n", i);
2823 		dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2824 	      }
2825 	  fprintf (f, "\n");
2826 	}
2827 
2828       struct cgraph_edge *cs;
2829       for (cs = node->callees; cs; cs = cs->next_callee)
2830 	{
2831 	  fprintf (f, "  Summary for edge %s->%s:\n", cs->caller->dump_name (),
2832 		   cs->callee->dump_name ());
2833 	  isra_call_summary *csum = call_sums->get (cs);
2834 	  if (csum)
2835 	    csum->dump (f);
2836 	  else
2837 	    fprintf (f, "    Call summary is MISSING!\n");
2838 	}
2839 
2840     }
2841   fprintf (f, "\n\n");
2842 }
2843 
2844 /* Perform function-scope viability tests that can be only made at IPA level
2845    and return false if the function is deemed unsuitable for IPA-SRA.  */
2846 
2847 static bool
ipa_sra_ipa_function_checks(cgraph_node * node)2848 ipa_sra_ipa_function_checks (cgraph_node *node)
2849 {
2850   if (!node->can_be_local_p ())
2851     {
2852       if (dump_file)
2853 	fprintf (dump_file, "Function %s disqualified because it cannot be "
2854 		 "made local.\n", node->dump_name ());
2855       return false;
2856     }
2857   if (!node->can_change_signature)
2858     {
2859       if (dump_file)
2860 	fprintf (dump_file, "Function can not change signature.\n");
2861       return false;
2862     }
2863 
2864   return true;
2865 }
2866 
2867 /* Issues found out by check_callers_for_issues.  */
2868 
2869 struct caller_issues
2870 {
2871   /* The candidate being considered.  */
2872   cgraph_node *candidate;
2873   /* There is a thunk among callers.  */
2874   bool thunk;
2875   /* Call site with no available information.  */
2876   bool unknown_callsite;
2877   /* Call from outside the candidate's comdat group.  */
2878   bool call_from_outside_comdat;
2879   /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2880   bool bit_aligned_aggregate_argument;
2881 };
2882 
2883 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2884    that apply.  */
2885 
2886 static bool
check_for_caller_issues(struct cgraph_node * node,void * data)2887 check_for_caller_issues (struct cgraph_node *node, void *data)
2888 {
2889   struct caller_issues *issues = (struct caller_issues *) data;
2890 
2891   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2892     {
2893       if (cs->caller->thunk)
2894 	{
2895 	  issues->thunk = true;
2896 	  /* TODO: We should be able to process at least some types of
2897 	     thunks.  */
2898 	  return true;
2899 	}
2900       if (issues->candidate->calls_comdat_local
2901 	  && issues->candidate->same_comdat_group
2902 	  && !issues->candidate->in_same_comdat_group_p (cs->caller))
2903 	{
2904 	  issues->call_from_outside_comdat = true;
2905 	  return true;
2906 	}
2907 
2908       isra_call_summary *csum = call_sums->get (cs);
2909       if (!csum)
2910 	{
2911 	  issues->unknown_callsite = true;
2912 	  return true;
2913 	}
2914 
2915       if (csum->m_bit_aligned_arg)
2916 	issues->bit_aligned_aggregate_argument = true;
2917     }
2918   return false;
2919 }
2920 
2921 /* Look at all incoming edges to NODE, including aliases and thunks and look
2922    for problems.  Return true if NODE type should not be modified at all.  */
2923 
2924 static bool
check_all_callers_for_issues(cgraph_node * node)2925 check_all_callers_for_issues (cgraph_node *node)
2926 {
2927   struct caller_issues issues;
2928   memset (&issues, 0, sizeof (issues));
2929   issues.candidate = node;
2930 
2931   node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2932   if (issues.unknown_callsite)
2933     {
2934       if (dump_file && (dump_flags & TDF_DETAILS))
2935 	fprintf (dump_file, "A call of %s has not been analyzed.  Disabling "
2936 		 "all modifications.\n", node->dump_name ());
2937       return true;
2938     }
2939   /* TODO: We should be able to process at least some types of thunks.  */
2940   if (issues.thunk)
2941     {
2942       if (dump_file && (dump_flags & TDF_DETAILS))
2943 	fprintf (dump_file, "A call of %s is through thunk, which are not"
2944 		 " handled yet.  Disabling all modifications.\n",
2945 		 node->dump_name ());
2946       return true;
2947     }
2948   if (issues.call_from_outside_comdat)
2949     {
2950       if (dump_file)
2951 	fprintf (dump_file, "Function would become private comdat called "
2952 		 "outside of its comdat group.\n");
2953       return true;
2954     }
2955 
2956   if (issues.bit_aligned_aggregate_argument)
2957     {
2958       /* Let's only remove parameters/return values from such functions.
2959 	 TODO: We could only prevent splitting the problematic parameters if
2960 	 anybody thinks it is worth it.  */
2961       if (dump_file && (dump_flags & TDF_DETAILS))
2962 	fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2963 		 " disabling parameter splitting.\n", node->dump_name ());
2964 
2965       isra_func_summary *ifs = func_sums->get (node);
2966       gcc_checking_assert (ifs);
2967       unsigned param_count = vec_safe_length (ifs->m_parameters);
2968       for (unsigned i = 0; i < param_count; i++)
2969 	(*ifs->m_parameters)[i].split_candidate = false;
2970     }
2971   return false;
2972 }
2973 
2974 /* Find the access with corresponding OFFSET and SIZE among accesses in
2975    PARAM_DESC and return it or NULL if such an access is not there.  */
2976 
2977 static param_access *
find_param_access(isra_param_desc * param_desc,unsigned offset,unsigned size)2978 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2979 {
2980   unsigned pclen = vec_safe_length (param_desc->accesses);
2981 
2982   /* The search is linear but the number of stored accesses is bound by
2983      PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8.  */
2984 
2985   for (unsigned i = 0; i < pclen; i++)
2986     if ((*param_desc->accesses)[i]->unit_offset == offset
2987 	&& (*param_desc->accesses)[i]->unit_size == size)
2988       return (*param_desc->accesses)[i];
2989 
2990   return NULL;
2991 }
2992 
2993 /* Return iff the total size of definite replacement SIZE would violate the
2994    limit set for it in PARAM.  */
2995 
2996 static bool
size_would_violate_limit_p(isra_param_desc * desc,unsigned size)2997 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
2998 {
2999   unsigned limit = desc->param_size_limit;
3000   if (size > limit
3001       || (!desc->by_ref && size == limit))
3002     return true;
3003   return false;
3004 }
3005 
3006 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3007    the set limit.  IDX is the parameter number which is dumped when
3008    disqualifying.  */
3009 
3010 static void
bump_reached_size(isra_param_desc * desc,unsigned size,unsigned idx)3011 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3012 {
3013   unsigned after = desc->size_reached + size;
3014   if (size_would_violate_limit_p (desc, after))
3015     {
3016       if (dump_file && (dump_flags & TDF_DETAILS))
3017 	fprintf (dump_file, "    ...size limit reached, disqualifying "
3018 		 "candidate parameter %u\n", idx);
3019       desc->split_candidate = false;
3020       return;
3021     }
3022   desc->size_reached = after;
3023 }
3024 
3025 /* Take all actions required to deal with an edge CS that represents a call to
3026    an unknown or un-analyzed function, for both parameter removal and
3027    splitting.  */
3028 
3029 static void
process_edge_to_unknown_caller(cgraph_edge * cs)3030 process_edge_to_unknown_caller (cgraph_edge *cs)
3031 {
3032   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3033   gcc_checking_assert (from_ifs);
3034   isra_call_summary *csum = call_sums->get (cs);
3035 
3036   if (dump_file && (dump_flags & TDF_DETAILS))
3037     fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3038 	     cs->caller->dump_name ());
3039 
3040   unsigned args_count = csum->m_arg_flow.length ();
3041   for (unsigned i = 0; i < args_count; i++)
3042     {
3043       isra_param_flow *ipf = &csum->m_arg_flow[i];
3044 
3045      if (ipf->pointer_pass_through)
3046        {
3047          isra_param_desc *param_desc
3048            = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3049          param_desc->locally_unused = false;
3050          param_desc->split_candidate = false;
3051         continue;
3052        }
3053       if (ipf->aggregate_pass_through)
3054 	{
3055 	  unsigned idx = get_single_param_flow_source (ipf);
3056 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3057 
3058 	  param_desc->locally_unused = false;
3059 	  if (!param_desc->split_candidate)
3060 	    continue;
3061 	  gcc_assert (!param_desc->by_ref);
3062 	  param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3063 						  ipf->unit_size);
3064 	  gcc_checking_assert (pacc);
3065 	  pacc->certain = true;
3066 	  if (overlapping_certain_accesses_p (param_desc, NULL))
3067 	    {
3068 	      if (dump_file && (dump_flags & TDF_DETAILS))
3069 		fprintf (dump_file, "    ...leading to overlap, "
3070 			 " disqualifying candidate parameter %u\n",
3071 			 idx);
3072 	      param_desc->split_candidate = false;
3073 	    }
3074 	  else
3075 	    bump_reached_size (param_desc, pacc->unit_size, idx);
3076 	  ipf->aggregate_pass_through = false;
3077 	  continue;
3078 	}
3079 
3080       for (int j = 0; j < ipf->length; j++)
3081 	{
3082 	  int input_idx = ipf->inputs[j];
3083 	  (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3084 	}
3085     }
3086 }
3087 
3088 /* Propagate parameter removal information through cross-SCC edge CS,
3089    i.e. decrease the use count in the caller parameter descriptor for each use
3090    in this call.  */
3091 
3092 static void
param_removal_cross_scc_edge(cgraph_edge * cs)3093 param_removal_cross_scc_edge (cgraph_edge *cs)
3094 {
3095   enum availability availability;
3096   cgraph_node *callee = cs->callee->function_symbol (&availability);
3097   isra_func_summary *to_ifs = func_sums->get (callee);
3098   if (!to_ifs || !to_ifs->m_candidate
3099       || (availability < AVAIL_AVAILABLE)
3100       || vec_safe_is_empty (to_ifs->m_parameters))
3101     {
3102       process_edge_to_unknown_caller (cs);
3103       return;
3104     }
3105   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3106   gcc_checking_assert (from_ifs);
3107 
3108   isra_call_summary *csum = call_sums->get (cs);
3109   unsigned args_count = csum->m_arg_flow.length ();
3110   unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3111 
3112   for (unsigned i = 0; i < args_count; i++)
3113     {
3114       bool unused_in_callee;
3115       if (i < param_count)
3116 	unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3117       else
3118 	unused_in_callee = false;
3119 
3120       if (!unused_in_callee)
3121 	{
3122 	  isra_param_flow *ipf = &csum->m_arg_flow[i];
3123 	  for (int j = 0; j < ipf->length; j++)
3124 	    {
3125 	      int input_idx = ipf->inputs[j];
3126 	      (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3127 	    }
3128 	}
3129     }
3130 }
3131 
3132 /* Unless it is already there, push NODE which is also described by IFS to
3133    STACK.  */
3134 
3135 static void
isra_push_node_to_stack(cgraph_node * node,isra_func_summary * ifs,vec<cgraph_node * > * stack)3136 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3137 		    vec<cgraph_node *> *stack)
3138 {
3139   if (!ifs->m_queued)
3140     {
3141       ifs->m_queued = true;
3142       stack->safe_push (node);
3143     }
3144 }
3145 
3146 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3147    used and push CALLER on STACK.  */
3148 
3149 static void
isra_mark_caller_param_used(isra_func_summary * from_ifs,int input_idx,cgraph_node * caller,vec<cgraph_node * > * stack)3150 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3151 			     cgraph_node *caller, vec<cgraph_node *> *stack)
3152 {
3153   if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3154     {
3155       (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3156       isra_push_node_to_stack (caller, from_ifs, stack);
3157     }
3158 }
3159 
3160 
3161 /* Propagate information that any parameter is not used only locally within a
3162    SCC across CS to the caller, which must be in the same SCC as the
3163    callee.  Push any callers that need to be re-processed to STACK.  */
3164 
3165 static void
propagate_used_across_scc_edge(cgraph_edge * cs,vec<cgraph_node * > * stack)3166 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3167 {
3168   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3169   if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3170     return;
3171 
3172   isra_call_summary *csum = call_sums->get (cs);
3173   gcc_checking_assert (csum);
3174   unsigned args_count = csum->m_arg_flow.length ();
3175   enum availability availability;
3176   cgraph_node *callee = cs->callee->function_symbol (&availability);
3177   isra_func_summary *to_ifs = func_sums->get (callee);
3178 
3179   unsigned param_count
3180     = (to_ifs && (availability >= AVAIL_AVAILABLE))
3181     ? vec_safe_length (to_ifs->m_parameters) : 0;
3182   for (unsigned i = 0; i < args_count; i++)
3183     {
3184       if (i < param_count
3185 	  && (*to_ifs->m_parameters)[i].locally_unused)
3186 	    continue;
3187 
3188       /* The argument is needed in the callee it, we must mark the parameter as
3189 	 used also in the caller and its callers within this SCC.  */
3190       isra_param_flow *ipf = &csum->m_arg_flow[i];
3191       for (int j = 0; j < ipf->length; j++)
3192 	{
3193 	  int input_idx = ipf->inputs[j];
3194 	  isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3195 	}
3196     }
3197 }
3198 
3199 /* Propagate information that any parameter is not used only locally within a
3200    SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3201    same SCC.  Push any callers that need to be re-processed to STACK.  */
3202 
3203 static bool
propagate_used_to_scc_callers(cgraph_node * node,void * data)3204 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3205 {
3206   vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3207   cgraph_edge *cs;
3208   for (cs = node->callers; cs; cs = cs->next_caller)
3209     if (ipa_edge_within_scc (cs))
3210       propagate_used_across_scc_edge (cs, stack);
3211   return false;
3212 }
3213 
3214 /* Return true iff all certain accesses in ARG_DESC are also present as
3215    certain accesses in PARAM_DESC.  */
3216 
3217 static bool
all_callee_accesses_present_p(isra_param_desc * param_desc,isra_param_desc * arg_desc)3218 all_callee_accesses_present_p (isra_param_desc *param_desc,
3219 			       isra_param_desc *arg_desc)
3220 {
3221   unsigned aclen = vec_safe_length (arg_desc->accesses);
3222   for (unsigned j = 0; j < aclen; j++)
3223     {
3224       param_access *argacc = (*arg_desc->accesses)[j];
3225       if (!argacc->certain)
3226 	continue;
3227       param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3228 					      argacc->unit_size);
3229       if (!pacc
3230 	  || !pacc->certain
3231 	  || !types_compatible_p (argacc->type, pacc->type))
3232 	return false;
3233     }
3234   return true;
3235 }
3236 
3237 /* Type internal to function pull_accesses_from_callee.  Unfortunately gcc 4.8
3238    does not allow instantiating an auto_vec with a type defined within a
3239    function so it is a global type.   */
3240 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3241 
3242 
3243 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3244    (which belongs to CALLER) if they would not violate some constraint there.
3245    If successful, return NULL, otherwise return the string reason for failure
3246    (which can be written to the dump file).  DELTA_OFFSET is the known offset
3247    of the actual argument withing the formal parameter (so of ARG_DESCS within
3248    PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3249    known. In case of success, set *CHANGE_P to true if propagation actually
3250    changed anything.  */
3251 
3252 static const char *
pull_accesses_from_callee(cgraph_node * caller,isra_param_desc * param_desc,isra_param_desc * arg_desc,unsigned delta_offset,unsigned arg_size,bool * change_p)3253 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3254 			   isra_param_desc *arg_desc,
3255 			   unsigned delta_offset, unsigned arg_size,
3256 			   bool *change_p)
3257 {
3258   unsigned pclen = vec_safe_length (param_desc->accesses);
3259   unsigned aclen = vec_safe_length (arg_desc->accesses);
3260   unsigned prop_count = 0;
3261   unsigned prop_size = 0;
3262   bool change = false;
3263 
3264   auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3265   for (unsigned j = 0; j < aclen; j++)
3266     {
3267       param_access *argacc = (*arg_desc->accesses)[j];
3268       prop_kinds.safe_push (ACC_PROP_DONT);
3269 
3270       if (arg_size > 0
3271 	  && argacc->unit_offset + argacc->unit_size > arg_size)
3272 	return "callee access outsize size boundary";
3273 
3274       if (!argacc->certain)
3275 	continue;
3276 
3277       unsigned offset = argacc->unit_offset + delta_offset;
3278       /* Given that accesses are initially stored according to increasing
3279 	 offset and decreasing size in case of equal offsets, the following
3280 	 searches could be written more efficiently if we kept the ordering
3281 	 when copying. But the number of accesses is capped at
3282 	 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3283 	 messy quickly, so let's improve on that only if necessary.  */
3284 
3285       bool exact_match = false;
3286       for (unsigned i = 0; i < pclen; i++)
3287 	{
3288 	  /* Check for overlaps.  */
3289 	  param_access *pacc = (*param_desc->accesses)[i];
3290 	  if (pacc->unit_offset == offset
3291 	      && pacc->unit_size == argacc->unit_size)
3292 	    {
3293 	      if (argacc->alias_ptr_type != pacc->alias_ptr_type
3294 		  || !types_compatible_p (argacc->type, pacc->type)
3295 		  || argacc->reverse != pacc->reverse)
3296 		return "propagated access types would not match existing ones";
3297 
3298 	      exact_match = true;
3299 	      if (!pacc->certain)
3300 		{
3301 		  prop_kinds[j] = ACC_PROP_CERTAIN;
3302 		  prop_size += argacc->unit_size;
3303 		  change = true;
3304 		}
3305 	      continue;
3306 	    }
3307 
3308 	  if (offset < pacc->unit_offset + pacc->unit_size
3309 	      && offset + argacc->unit_size > pacc->unit_offset)
3310 	    {
3311 	      /* None permissible with load accesses, possible to fit into
3312 		 argument ones.  */
3313 	      if (pacc->certain
3314 		  || offset < pacc->unit_offset
3315 		  || (offset + argacc->unit_size
3316 		      > pacc->unit_offset + pacc->unit_size))
3317 		return "a propagated access would conflict in caller";
3318 	    }
3319 	}
3320 
3321       if (!exact_match)
3322 	{
3323 	  prop_kinds[j] = ACC_PROP_COPY;
3324 	  prop_count++;
3325 	  prop_size += argacc->unit_size;
3326 	  change = true;
3327 	}
3328     }
3329 
3330     if (!change)
3331       return NULL;
3332 
3333     if ((prop_count + pclen
3334 	 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3335 	|| size_would_violate_limit_p (param_desc,
3336 				       param_desc->size_reached + prop_size))
3337       return "propagating accesses would violate the count or size limit";
3338 
3339   *change_p = true;
3340   for (unsigned j = 0; j < aclen; j++)
3341     {
3342       if (prop_kinds[j] == ACC_PROP_COPY)
3343 	{
3344 	  param_access *argacc = (*arg_desc->accesses)[j];
3345 
3346 	  param_access *copy = ggc_cleared_alloc<param_access> ();
3347 	  copy->unit_offset = argacc->unit_offset + delta_offset;
3348 	  copy->unit_size = argacc->unit_size;
3349 	  copy->type = argacc->type;
3350 	  copy->alias_ptr_type = argacc->alias_ptr_type;
3351 	  copy->certain = true;
3352 	  copy->reverse = argacc->reverse;
3353 	  vec_safe_push (param_desc->accesses, copy);
3354 	}
3355       else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3356 	{
3357 	  param_access *argacc = (*arg_desc->accesses)[j];
3358 	  param_access *csp
3359 	    = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3360 				 argacc->unit_size);
3361 	  csp->certain = true;
3362 	}
3363     }
3364 
3365   param_desc->size_reached += prop_size;
3366 
3367   return NULL;
3368 }
3369 
3370 /* Propagate parameter splitting information through call graph edge CS.
3371    Return true if any changes that might need to be propagated within SCCs have
3372    been made.  The function also clears the aggregate_pass_through and
3373    pointer_pass_through in call summaries which do not need to be processed
3374    again if this CS is revisited when iterating while changes are propagated
3375    within an SCC.  */
3376 
3377 static bool
param_splitting_across_edge(cgraph_edge * cs)3378 param_splitting_across_edge (cgraph_edge *cs)
3379 {
3380   bool res = false;
3381   bool cross_scc = !ipa_edge_within_scc (cs);
3382   enum availability availability;
3383   cgraph_node *callee = cs->callee->function_symbol (&availability);
3384   isra_func_summary *from_ifs = func_sums->get (cs->caller);
3385   gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3386 
3387   isra_call_summary *csum = call_sums->get (cs);
3388   gcc_checking_assert (csum);
3389   unsigned args_count = csum->m_arg_flow.length ();
3390   isra_func_summary *to_ifs = func_sums->get (callee);
3391   unsigned param_count
3392     = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3393        ? vec_safe_length (to_ifs->m_parameters)
3394        : 0);
3395 
3396   if (dump_file && (dump_flags & TDF_DETAILS))
3397     fprintf (dump_file, "Splitting across %s->%s:\n",
3398 	     cs->caller->dump_name (), callee->dump_name ());
3399 
3400   unsigned i;
3401   for (i = 0; (i < args_count) && (i < param_count); i++)
3402     {
3403       isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3404       isra_param_flow *ipf = &csum->m_arg_flow[i];
3405 
3406       if (arg_desc->locally_unused)
3407 	{
3408 	  if (dump_file && (dump_flags & TDF_DETAILS))
3409 	    fprintf (dump_file, "    ->%u: unused in callee\n", i);
3410 	  ipf->pointer_pass_through = false;
3411 	  continue;
3412 	}
3413 
3414       if (ipf->pointer_pass_through)
3415 	{
3416 	  int idx = get_single_param_flow_source (ipf);
3417 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3418 	  if (!param_desc->split_candidate)
3419 	    continue;
3420 	  gcc_assert (param_desc->by_ref);
3421 
3422 	  if (!arg_desc->split_candidate || !arg_desc->by_ref)
3423 	    {
3424 	      if (dump_file && (dump_flags & TDF_DETAILS))
3425 		fprintf (dump_file, "  %u->%u: not candidate or not by "
3426 			 "reference in callee\n", idx, i);
3427 	      param_desc->split_candidate = false;
3428 	      ipf->pointer_pass_through = false;
3429 	      res = true;
3430 	    }
3431 	  else if (!ipf->safe_to_import_accesses)
3432 	    {
3433 	      if (!csum->m_before_any_store
3434 		  || !all_callee_accesses_present_p (param_desc, arg_desc))
3435 		{
3436 		  if (dump_file && (dump_flags & TDF_DETAILS))
3437 		    fprintf (dump_file, "  %u->%u: cannot import accesses.\n",
3438 			     idx, i);
3439 		  param_desc->split_candidate = false;
3440 		  ipf->pointer_pass_through = false;
3441 		  res = true;
3442 
3443 		}
3444 	      else
3445 		{
3446 		  if (dump_file && (dump_flags & TDF_DETAILS))
3447 		    fprintf (dump_file, "  %u->%u: verified callee accesses "
3448 			     "present.\n", idx, i);
3449 		  if (cross_scc)
3450 		    ipf->pointer_pass_through = false;
3451 		}
3452 	    }
3453 	  else
3454 	    {
3455 	      const char *pull_failure
3456 		= pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3457 					     0, 0, &res);
3458 	      if (pull_failure)
3459 		{
3460 		  if (dump_file && (dump_flags & TDF_DETAILS))
3461 		    fprintf (dump_file, "  %u->%u: by_ref access pull "
3462 			     "failed: %s.\n", idx, i, pull_failure);
3463 		  param_desc->split_candidate = false;
3464 		  ipf->pointer_pass_through = false;
3465 		  res = true;
3466 		}
3467 	      else
3468 		{
3469 		  if (dump_file && (dump_flags & TDF_DETAILS))
3470 		    fprintf (dump_file, "  %u->%u: by_ref access pull "
3471 			     "succeeded.\n", idx, i);
3472 		  if (cross_scc)
3473 		    ipf->pointer_pass_through = false;
3474 		}
3475 	    }
3476 	}
3477       else if (ipf->aggregate_pass_through)
3478 	{
3479 	  int idx = get_single_param_flow_source (ipf);
3480 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3481 	  if (!param_desc->split_candidate)
3482 	    continue;
3483 	  gcc_assert (!param_desc->by_ref);
3484 	  param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3485 						  ipf->unit_size);
3486 	  gcc_checking_assert (pacc);
3487 
3488 	  if (pacc->certain)
3489 	    {
3490 	      if (dump_file && (dump_flags & TDF_DETAILS))
3491 		fprintf (dump_file, "  %u->%u: already certain\n", idx, i);
3492 	      ipf->aggregate_pass_through = false;
3493 	    }
3494 	  else if (!arg_desc->split_candidate || arg_desc->by_ref)
3495 	    {
3496 	      if (dump_file && (dump_flags & TDF_DETAILS))
3497 		fprintf (dump_file, "  %u->%u: not candidate or by "
3498 			 "reference in callee\n", idx, i);
3499 
3500 	      pacc->certain = true;
3501 	      if (overlapping_certain_accesses_p (param_desc, NULL))
3502 		{
3503 		  if (dump_file && (dump_flags & TDF_DETAILS))
3504 		    fprintf (dump_file, "    ...leading to overlap, "
3505 			     " disqualifying candidate parameter %u\n",
3506 			     idx);
3507 		  param_desc->split_candidate = false;
3508 		}
3509 	      else
3510 		bump_reached_size (param_desc, pacc->unit_size, idx);
3511 
3512 	      ipf->aggregate_pass_through = false;
3513 	      res = true;
3514 	    }
3515 	  else
3516 	    {
3517 	      const char *pull_failure
3518 		= pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3519 					     ipf->unit_offset,
3520 					     ipf->unit_size, &res);
3521 	      if (pull_failure)
3522 		{
3523 		  if (dump_file && (dump_flags & TDF_DETAILS))
3524 		    fprintf (dump_file, "  %u->%u: arg access pull "
3525 			     "failed: %s.\n", idx, i, pull_failure);
3526 
3527 		  ipf->aggregate_pass_through = false;
3528 		  pacc->certain = true;
3529 
3530 		  if (overlapping_certain_accesses_p (param_desc, NULL))
3531 		    {
3532 		      if (dump_file && (dump_flags & TDF_DETAILS))
3533 			fprintf (dump_file, "    ...leading to overlap, "
3534 				 " disqualifying candidate parameter %u\n",
3535 				 idx);
3536 		      param_desc->split_candidate = false;
3537 		    }
3538 		  else
3539 		    bump_reached_size (param_desc, pacc->unit_size, idx);
3540 
3541 		  res = true;
3542 		}
3543 	      else
3544 		{
3545 		  if (dump_file && (dump_flags & TDF_DETAILS))
3546 		    fprintf (dump_file, "  %u->%u: arg access pull "
3547 			     "succeeded.\n", idx, i);
3548 		  if (cross_scc)
3549 		    ipf->aggregate_pass_through = false;
3550 		}
3551 	    }
3552 	}
3553     }
3554 
3555   /* Handle argument-parameter count mismatches. */
3556   for (; (i < args_count); i++)
3557     {
3558       isra_param_flow *ipf = &csum->m_arg_flow[i];
3559 
3560       if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3561 	{
3562 	  int idx = get_single_param_flow_source (ipf);
3563 	  ipf->pointer_pass_through = false;
3564 	  ipf->aggregate_pass_through = false;
3565 	  isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3566 	  if (!param_desc->split_candidate)
3567 	    continue;
3568 
3569 	  if (dump_file && (dump_flags & TDF_DETAILS))
3570 	    fprintf (dump_file, "  %u->%u: no corresponding formal parameter\n",
3571 		     idx, i);
3572 	  param_desc->split_candidate = false;
3573 	  res = true;
3574 	}
3575     }
3576   return res;
3577 }
3578 
3579 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3580    callers ignore the return value, or come from the same SCC and use the
3581    return value only to compute their return value, return false, otherwise
3582    return true.  */
3583 
3584 static bool
retval_used_p(cgraph_node * node,void *)3585 retval_used_p (cgraph_node *node, void *)
3586 {
3587   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3588     {
3589       isra_call_summary *csum = call_sums->get (cs);
3590       gcc_checking_assert (csum);
3591       if (csum->m_return_ignored)
3592 	continue;
3593       if (!csum->m_return_returned)
3594 	return true;
3595 
3596       isra_func_summary *from_ifs = func_sums->get (cs->caller);
3597       if (!from_ifs || !from_ifs->m_candidate)
3598 	return true;
3599 
3600       if (!ipa_edge_within_scc (cs)
3601 	  && !from_ifs->m_return_ignored)
3602 	    return true;
3603     }
3604 
3605   return false;
3606 }
3607 
3608 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3609    modify parameter which originally had index BASE_INDEX, in the adjustment
3610    vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3611    PREV_ADJUSTMENT.  If the parent clone is the original function,
3612    PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX.  */
3613 
3614 static void
push_param_adjustments_for_index(isra_func_summary * ifs,unsigned base_index,unsigned prev_clone_index,ipa_adjusted_param * prev_adjustment,vec<ipa_adjusted_param,va_gc> ** new_params)3615 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3616 				  unsigned prev_clone_index,
3617 				  ipa_adjusted_param *prev_adjustment,
3618 				  vec<ipa_adjusted_param, va_gc> **new_params)
3619 {
3620   isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3621   if (desc->locally_unused)
3622     {
3623       if (dump_file)
3624 	fprintf (dump_file, "  Will remove parameter %u\n", base_index);
3625       return;
3626     }
3627 
3628   if (!desc->split_candidate)
3629     {
3630       ipa_adjusted_param adj;
3631       if (prev_adjustment)
3632 	{
3633 	  adj = *prev_adjustment;
3634 	  adj.prev_clone_adjustment = true;
3635 	  adj.prev_clone_index = prev_clone_index;
3636 	}
3637       else
3638 	{
3639 	  memset (&adj, 0, sizeof (adj));
3640 	  adj.op = IPA_PARAM_OP_COPY;
3641 	  adj.base_index = base_index;
3642 	  adj.prev_clone_index = prev_clone_index;
3643 	}
3644       vec_safe_push ((*new_params), adj);
3645       return;
3646     }
3647 
3648   if (dump_file)
3649     fprintf (dump_file, "  Will split parameter %u\n", base_index);
3650 
3651   gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3652   unsigned aclen = vec_safe_length (desc->accesses);
3653   for (unsigned j = 0; j < aclen; j++)
3654     {
3655       param_access *pa = (*desc->accesses)[j];
3656       if (!pa->certain)
3657 	continue;
3658       if (dump_file)
3659 	fprintf (dump_file, "    - component at byte offset %u, "
3660 		 "size %u\n", pa->unit_offset, pa->unit_size);
3661 
3662       ipa_adjusted_param adj;
3663       memset (&adj, 0, sizeof (adj));
3664       adj.op = IPA_PARAM_OP_SPLIT;
3665       adj.base_index = base_index;
3666       adj.prev_clone_index = prev_clone_index;
3667       adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3668       adj.reverse = pa->reverse;
3669       adj.type = pa->type;
3670       adj.alias_ptr_type = pa->alias_ptr_type;
3671       adj.unit_offset = pa->unit_offset;
3672       vec_safe_push ((*new_params), adj);
3673     }
3674 }
3675 
3676 /* Worker for all call_for_symbol_thunks_and_aliases.  Set calls_comdat_local
3677    flag of all callers of NODE.  */
3678 
3679 static bool
mark_callers_calls_comdat_local(struct cgraph_node * node,void *)3680 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3681 {
3682   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3683     cs->caller->calls_comdat_local = true;
3684   return false;
3685 }
3686 
3687 
3688 /* Do final processing of results of IPA propagation regarding NODE, clone it
3689    if appropriate.  */
3690 
3691 static void
process_isra_node_results(cgraph_node * node,hash_map<const char *,unsigned> * clone_num_suffixes)3692 process_isra_node_results (cgraph_node *node,
3693 			   hash_map<const char *, unsigned> *clone_num_suffixes)
3694 {
3695   isra_func_summary *ifs = func_sums->get (node);
3696   if (!ifs || !ifs->m_candidate)
3697     return;
3698 
3699   auto_vec<bool, 16> surviving_params;
3700   bool check_surviving = false;
3701   clone_info *cinfo = clone_info::get (node);
3702   if (cinfo && cinfo->param_adjustments)
3703     {
3704       check_surviving = true;
3705       cinfo->param_adjustments->get_surviving_params (&surviving_params);
3706     }
3707 
3708   unsigned param_count = vec_safe_length (ifs->m_parameters);
3709   bool will_change_function = false;
3710   if (ifs->m_returns_value && ifs->m_return_ignored)
3711     will_change_function = true;
3712   else
3713     for (unsigned i = 0; i < param_count; i++)
3714       {
3715 	isra_param_desc *desc = &(*ifs->m_parameters)[i];
3716 	if ((desc->locally_unused || desc->split_candidate)
3717 	    /* Make sure we do not clone just to attempt to remove an already
3718 	       removed unused argument.  */
3719 	    && (!check_surviving
3720 		|| (i < surviving_params.length ()
3721 		    && surviving_params[i])))
3722 	  {
3723 	    will_change_function = true;
3724 	    break;
3725 	  }
3726       }
3727   if (!will_change_function)
3728     return;
3729 
3730   if (dump_file)
3731     {
3732       fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3733 	       node->dump_name ());
3734       if (ifs->m_returns_value && ifs->m_return_ignored)
3735 	fprintf (dump_file, "  Will remove return value.\n");
3736     }
3737 
3738   vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3739   if (ipa_param_adjustments *old_adjustments
3740 	 = cinfo ? cinfo->param_adjustments : NULL)
3741     {
3742       unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3743       for (unsigned i = 0; i < old_adj_len; i++)
3744 	{
3745 	  ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3746 	  push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3747 					    old_adj, &new_params);
3748 	}
3749     }
3750   else
3751     for (unsigned i = 0; i < param_count; i++)
3752       push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3753 
3754   ipa_param_adjustments *new_adjustments
3755     = (new (ggc_alloc <ipa_param_adjustments> ())
3756        ipa_param_adjustments (new_params, param_count,
3757 			      ifs->m_returns_value && ifs->m_return_ignored));
3758 
3759   if (dump_file && (dump_flags & TDF_DETAILS))
3760     {
3761       fprintf (dump_file, "\n  Created adjustments:\n");
3762       new_adjustments->dump (dump_file);
3763     }
3764 
3765   unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3766 			       IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3767 				 node->decl)));
3768   auto_vec<cgraph_edge *> callers = node->collect_callers ();
3769   cgraph_node *new_node
3770     = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3771 				  suffix_counter);
3772   suffix_counter++;
3773   if (node->calls_comdat_local && node->same_comdat_group)
3774     {
3775       new_node->add_to_same_comdat_group (node);
3776       new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3777 					     NULL, true);
3778     }
3779   new_node->calls_comdat_local = node->calls_comdat_local;
3780 
3781   if (dump_file)
3782     fprintf (dump_file, "  Created new node %s\n", new_node->dump_name ());
3783   callers.release ();
3784 }
3785 
3786 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3787    and disable transformations for those which have not or which should not
3788    transformed because the associated debug counter reached its limit.  Return
3789    true if none survived or if there were no candidates to begin with.  */
3790 
3791 static bool
disable_unavailable_parameters(cgraph_node * node,isra_func_summary * ifs)3792 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3793 {
3794   bool ret = true;
3795   unsigned len = vec_safe_length (ifs->m_parameters);
3796   if (!len)
3797     return true;
3798 
3799   auto_vec<bool, 16> surviving_params;
3800   bool check_surviving = false;
3801   clone_info *cinfo = clone_info::get (node);
3802   if (cinfo && cinfo->param_adjustments)
3803     {
3804       check_surviving = true;
3805       cinfo->param_adjustments->get_surviving_params (&surviving_params);
3806     }
3807   bool dumped_first = false;
3808   for (unsigned i = 0; i < len; i++)
3809     {
3810       isra_param_desc *desc = &(*ifs->m_parameters)[i];
3811       if (!dbg_cnt (ipa_sra_params))
3812 	{
3813 	  desc->locally_unused = false;
3814 	  desc->split_candidate = false;
3815 	}
3816       else if (check_surviving
3817 	       && (i >= surviving_params.length ()
3818 		   || !surviving_params[i]))
3819 	{
3820 	  /* Even if the parameter was removed by a previous IPA pass, we do
3821 	     not clear locally_unused because if it really is unused, this
3822 	     information might be useful in callers.  */
3823 	  desc->split_candidate = false;
3824 
3825 	  if (dump_file && (dump_flags & TDF_DETAILS))
3826 	    {
3827 	      if (!dumped_first)
3828 		{
3829 		  fprintf (dump_file,
3830 			   "The following parameters of %s are dead on "
3831 			   "arrival:", node->dump_name ());
3832 		  dumped_first = true;
3833 		}
3834 	      fprintf (dump_file, " %u", i);
3835 	    }
3836 	}
3837       else if (desc->locally_unused || desc->split_candidate)
3838 	ret = false;
3839     }
3840 
3841   if (dumped_first)
3842     fprintf (dump_file, "\n");
3843 
3844   return ret;
3845 }
3846 
3847 
3848 /* Run the interprocedural part of IPA-SRA. */
3849 
3850 static unsigned int
ipa_sra_analysis(void)3851 ipa_sra_analysis (void)
3852 {
3853   if (dump_file)
3854     {
3855       fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3856       ipa_sra_dump_all_summaries (dump_file);
3857     }
3858 
3859   gcc_checking_assert (func_sums);
3860   gcc_checking_assert (call_sums);
3861   cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3862   auto_vec <cgraph_node *, 16> stack;
3863   int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3864 
3865   /* One sweep from callees to callers for parameter removal and splitting.  */
3866   for (int i = 0; i < node_scc_count; i++)
3867     {
3868       cgraph_node *scc_rep = order[i];
3869       vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3870       unsigned j;
3871 
3872       /* Preliminary IPA function level checks and first step of parameter
3873 	 removal.  */
3874       cgraph_node *v;
3875       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3876 	{
3877 	  isra_func_summary *ifs = func_sums->get (v);
3878 	  if (!ifs || !ifs->m_candidate)
3879 	    continue;
3880 	  if (!ipa_sra_ipa_function_checks (v)
3881 	      || check_all_callers_for_issues (v))
3882 	    {
3883 	      ifs->zap ();
3884 	      continue;
3885 	    }
3886 	  if (disable_unavailable_parameters (v, ifs))
3887 	    continue;
3888 	  for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3889 	    process_edge_to_unknown_caller (cs);
3890 	  for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3891 	    if (!ipa_edge_within_scc (cs))
3892 	      param_removal_cross_scc_edge (cs);
3893 	}
3894 
3895       /* Look at edges within the current SCC and propagate used-ness across
3896 	 them, pushing onto the stack all notes which might need to be
3897 	 revisited.  */
3898       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3899 	v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3900 					       &stack, true);
3901 
3902       /* Keep revisiting and pushing until nothing changes.  */
3903       while (!stack.is_empty ())
3904 	{
3905 	  cgraph_node *v = stack.pop ();
3906 	  isra_func_summary *ifs = func_sums->get (v);
3907 	  gcc_checking_assert (ifs && ifs->m_queued);
3908 	  ifs->m_queued = false;
3909 
3910 	  v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3911 						 &stack, true);
3912 	}
3913 
3914       /* Parameter splitting.  */
3915       bool repeat_scc_access_propagation;
3916       do
3917 	{
3918 	  repeat_scc_access_propagation = false;
3919 	  FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3920 	    {
3921 	      isra_func_summary *ifs = func_sums->get (v);
3922 	      if (!ifs
3923 		  || !ifs->m_candidate
3924 		  || vec_safe_is_empty (ifs->m_parameters))
3925 		continue;
3926 	      for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3927 		if (param_splitting_across_edge (cs))
3928 		  repeat_scc_access_propagation = true;
3929 	    }
3930 	}
3931       while (repeat_scc_access_propagation);
3932 
3933       if (flag_checking)
3934 	FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 	  verify_splitting_accesses (v, true);
3936 
3937       cycle_nodes.release ();
3938     }
3939 
3940   /* One sweep from caller to callees for result removal.  */
3941   for (int i = node_scc_count - 1; i >= 0 ; i--)
3942     {
3943       cgraph_node *scc_rep = order[i];
3944       vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3945       unsigned j;
3946 
3947       cgraph_node *v;
3948       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3949 	{
3950 	  isra_func_summary *ifs = func_sums->get (v);
3951 	  if (!ifs || !ifs->m_candidate)
3952 	    continue;
3953 
3954 	  bool return_needed
3955 	    = (ifs->m_returns_value
3956 	       && (!dbg_cnt (ipa_sra_retvalues)
3957 		   || v->call_for_symbol_and_aliases (retval_used_p,
3958 						      NULL, true)));
3959 	  ifs->m_return_ignored = !return_needed;
3960 	  if (return_needed)
3961 	    isra_push_node_to_stack (v, ifs, &stack);
3962 	}
3963 
3964       while (!stack.is_empty ())
3965 	{
3966 	  cgraph_node *node = stack.pop ();
3967 	  isra_func_summary *ifs = func_sums->get (node);
3968 	  gcc_checking_assert (ifs && ifs->m_queued);
3969 	  ifs->m_queued = false;
3970 
3971 	  for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3972 	    if (ipa_edge_within_scc (cs)
3973 		&& call_sums->get (cs)->m_return_returned)
3974 	      {
3975 		enum availability av;
3976 		cgraph_node *callee = cs->callee->function_symbol (&av);
3977 		isra_func_summary *to_ifs = func_sums->get (callee);
3978 		if (to_ifs && to_ifs->m_return_ignored)
3979 		  {
3980 		    to_ifs->m_return_ignored = false;
3981 		    isra_push_node_to_stack (callee, to_ifs, &stack);
3982 		  }
3983 	      }
3984 	}
3985       cycle_nodes.release ();
3986     }
3987 
3988   ipa_free_postorder_info ();
3989   free (order);
3990 
3991   if (dump_file)
3992     {
3993       if (dump_flags & TDF_DETAILS)
3994 	{
3995 	  fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3996 		   " ==========\n");
3997 	  ipa_sra_dump_all_summaries (dump_file);
3998 	}
3999       fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4000     }
4001 
4002   hash_map<const char *, unsigned> *clone_num_suffixes
4003     = new hash_map<const char *, unsigned>;
4004 
4005   cgraph_node *node;
4006   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4007     process_isra_node_results (node, clone_num_suffixes);
4008 
4009   delete clone_num_suffixes;
4010   ggc_delete (func_sums);
4011   func_sums = NULL;
4012   delete call_sums;
4013   call_sums = NULL;
4014 
4015   if (dump_file)
4016     fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4017 	     "==========\n\n");
4018   return 0;
4019 }
4020 
4021 
4022 const pass_data pass_data_ipa_sra =
4023 {
4024   IPA_PASS, /* type */
4025   "sra", /* name */
4026   OPTGROUP_NONE, /* optinfo_flags */
4027   TV_IPA_SRA, /* tv_id */
4028   0, /* properties_required */
4029   0, /* properties_provided */
4030   0, /* properties_destroyed */
4031   0, /* todo_flags_start */
4032   ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4033 };
4034 
4035 class pass_ipa_sra : public ipa_opt_pass_d
4036 {
4037 public:
pass_ipa_sra(gcc::context * ctxt)4038   pass_ipa_sra (gcc::context *ctxt)
4039     : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4040 		      ipa_sra_generate_summary, /* generate_summary */
4041 		      ipa_sra_write_summary, /* write_summary */
4042 		      ipa_sra_read_summary, /* read_summary */
4043 		      NULL , /* write_optimization_summary */
4044 		      NULL, /* read_optimization_summary */
4045 		      NULL, /* stmt_fixup */
4046 		      0, /* function_transform_todo_flags_start */
4047 		      NULL, /* function_transform */
4048 		      NULL) /* variable_transform */
4049   {}
4050 
4051   /* opt_pass methods: */
gate(function *)4052   virtual bool gate (function *)
4053     {
4054       /* TODO: We should remove the optimize check after we ensure we never run
4055 	 IPA passes when not optimizing.  */
4056       return (flag_ipa_sra && optimize);
4057     }
4058 
execute(function *)4059   virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4060 
4061 }; // class pass_ipa_sra
4062 
4063 } // anon namespace
4064 
4065 /* Intraprocedural part of IPA-SRA analysis.  Scan function body of NODE and
4066    create a summary structure describing IPA-SRA opportunities and constraints
4067    in it.  */
4068 
4069 static void
ipa_sra_summarize_function(cgraph_node * node)4070 ipa_sra_summarize_function (cgraph_node *node)
4071 {
4072   if (dump_file)
4073     fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4074 	     node->order);
4075   if (!ipa_sra_preliminary_function_checks (node))
4076     {
4077       isra_analyze_all_outgoing_calls (node);
4078       return;
4079     }
4080   gcc_obstack_init (&gensum_obstack);
4081   isra_func_summary *ifs = func_sums->get_create (node);
4082   ifs->m_candidate = true;
4083   tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4084   ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4085 
4086   decl2desc = new hash_map<tree, gensum_param_desc *>;
4087   unsigned count = 0;
4088   for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4089     count++;
4090 
4091   if (count > 0)
4092     {
4093       auto_vec<gensum_param_desc, 16> param_descriptions (count);
4094       param_descriptions.reserve_exact (count);
4095       param_descriptions.quick_grow_cleared (count);
4096 
4097       bool cfun_pushed = false;
4098       struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4099       if (create_parameter_descriptors (node, &param_descriptions))
4100 	{
4101 	  push_cfun (fun);
4102 	  cfun_pushed = true;
4103 	  final_bbs = BITMAP_ALLOC (NULL);
4104 	  bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4105 				      by_ref_count
4106 				      * last_basic_block_for_fn (fun));
4107 	  aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4108 	  scan_function (node, fun);
4109 
4110 	  if (dump_file)
4111 	    {
4112 	      dump_gensum_param_descriptors (dump_file, node->decl,
4113 					     &param_descriptions);
4114 	      fprintf (dump_file, "----------------------------------------\n");
4115 	    }
4116 	}
4117       process_scan_results (node, fun, ifs, &param_descriptions);
4118 
4119       if (cfun_pushed)
4120 	pop_cfun ();
4121       if (bb_dereferences)
4122 	{
4123 	  free (bb_dereferences);
4124 	  bb_dereferences = NULL;
4125 	  BITMAP_FREE (final_bbs);
4126 	  final_bbs = NULL;
4127 	}
4128     }
4129   isra_analyze_all_outgoing_calls (node);
4130 
4131   delete decl2desc;
4132   decl2desc = NULL;
4133   obstack_free (&gensum_obstack, NULL);
4134   if (dump_file)
4135     fprintf (dump_file, "\n\n");
4136   if (flag_checking)
4137     verify_splitting_accesses (node, false);
4138   return;
4139 }
4140 
4141 ipa_opt_pass_d *
make_pass_ipa_sra(gcc::context * ctxt)4142 make_pass_ipa_sra (gcc::context *ctxt)
4143 {
4144   return new pass_ipa_sra (ctxt);
4145 }
4146 
4147 
4148 #include "gt-ipa-sra.h"
4149