1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
27
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
44
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
51
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
57
58
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85 #include "symtab-clones.h"
86 #include "attribs.h"
87
88 static void ipa_sra_summarize_function (cgraph_node *);
89
90 /* Bits used to track size of an aggregate in bytes interprocedurally. */
91 #define ISRA_ARG_SIZE_LIMIT_BITS 16
92 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
93 /* How many parameters can feed into a call actual argument and still be
94 tracked. */
95 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
96
97 /* Structure describing accesses to a specific portion of an aggregate
98 parameter, as given by the offset and size. Any smaller accesses that occur
99 within a function that fall within another access form a tree. The pass
100 cannot analyze parameters with only partially overlapping accesses. */
101
102 struct GTY(()) param_access
103 {
104 /* Type that a potential replacement should have. This field only has
105 meaning in the summary building and transformation phases, when it is
106 reconstructed from the body. Must not be touched in IPA analysis
107 stage. */
108 tree type;
109
110 /* Alias reference type to be used in MEM_REFs when adjusting caller
111 arguments. */
112 tree alias_ptr_type;
113
114 /* Values returned by get_ref_base_and_extent but converted to bytes and
115 stored as unsigned ints. */
116 unsigned unit_offset;
117 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
118
119 /* Set once we are sure that the access will really end up in a potentially
120 transformed function - initially not set for portions of formal parameters
121 that are only used as actual function arguments passed to callees. */
122 unsigned certain : 1;
123 /* Set if the access has reverse scalar storage order. */
124 unsigned reverse : 1;
125 };
126
127 /* This structure has the same purpose as the one above and additionally it
128 contains some fields that are only necessary in the summary generation
129 phase. */
130
131 struct gensum_param_access
132 {
133 /* Values returned by get_ref_base_and_extent. */
134 HOST_WIDE_INT offset;
135 HOST_WIDE_INT size;
136
137 /* if this access has any children (in terms of the definition above), this
138 points to the first one. */
139 struct gensum_param_access *first_child;
140 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
141 described above. */
142 struct gensum_param_access *next_sibling;
143
144 /* Type that a potential replacement should have. This field only has
145 meaning in the summary building and transformation phases, when it is
146 reconstructed from the body. Must not be touched in IPA analysis
147 stage. */
148 tree type;
149 /* Alias reference type to be used in MEM_REFs when adjusting caller
150 arguments. */
151 tree alias_ptr_type;
152
153 /* Have there been writes to or reads from this exact location except for as
154 arguments to a function call that can be tracked. */
155 bool nonarg;
156
157 /* Set if the access has reverse scalar storage order. */
158 bool reverse;
159 };
160
161 /* Summary describing a parameter in the IPA stages. */
162
163 struct GTY(()) isra_param_desc
164 {
165 /* List of access representatives to the parameters, sorted according to
166 their offset. */
167 vec <param_access *, va_gc> *accesses;
168
169 /* Unit size limit of total size of all replacements. */
170 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
171 /* Sum of unit sizes of all certain replacements. */
172 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
173
174 /* A parameter that is used only in call arguments and can be removed if all
175 concerned actual arguments are removed. */
176 unsigned locally_unused : 1;
177 /* An aggregate that is a candidate for breaking up or complete removal. */
178 unsigned split_candidate : 1;
179 /* Is this a parameter passing stuff by reference? */
180 unsigned by_ref : 1;
181 };
182
183 /* Structure used when generating summaries that describes a parameter. */
184
185 struct gensum_param_desc
186 {
187 /* Roots of param_accesses. */
188 gensum_param_access *accesses;
189 /* Number of accesses in the access tree rooted in field accesses. */
190 unsigned access_count;
191
192 /* If the below is non-zero, this is the number of uses as actual arguments. */
193 int call_uses;
194 /* Number of times this parameter has been directly passed to. */
195 unsigned ptr_pt_count;
196
197 /* Size limit of total size of all replacements. */
198 unsigned param_size_limit;
199 /* Sum of sizes of nonarg accesses. */
200 unsigned nonarg_acc_size;
201
202 /* A parameter that is used only in call arguments and can be removed if all
203 concerned actual arguments are removed. */
204 bool locally_unused;
205 /* An aggregate that is a candidate for breaking up or a pointer passing data
206 by reference that is a candidate for being converted to a set of
207 parameters passing those data by value. */
208 bool split_candidate;
209 /* Is this a parameter passing stuff by reference? */
210 bool by_ref;
211
212 /* The number of this parameter as they are ordered in function decl. */
213 int param_number;
214 /* For parameters passing data by reference, this is parameter index to
215 compute indices to bb_dereferences. */
216 int deref_index;
217 };
218
219 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
220 allocated in GC memory, this is not necessary and we can consider removing
221 the function. */
222
223 static void
free_param_decl_accesses(isra_param_desc * desc)224 free_param_decl_accesses (isra_param_desc *desc)
225 {
226 unsigned len = vec_safe_length (desc->accesses);
227 for (unsigned i = 0; i < len; ++i)
228 ggc_free ((*desc->accesses)[i]);
229 vec_free (desc->accesses);
230 }
231
232 /* Class used to convey information about functions from the
233 intra-procedural analysis stage to inter-procedural one. */
234
235 class GTY((for_user)) isra_func_summary
236 {
237 public:
238 /* initialize the object. */
239
isra_func_summary()240 isra_func_summary ()
241 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
242 m_return_ignored (false), m_queued (false)
243 {}
244
245 /* Destroy m_parameters. */
246
247 ~isra_func_summary ();
248
249 /* Mark the function as not a candidate for any IPA-SRA transformation.
250 Return true if it was a candidate until now. */
251
252 bool zap ();
253
254 /* Vector of parameter descriptors corresponding to the function being
255 analyzed. */
256 vec<isra_param_desc, va_gc> *m_parameters;
257
258 /* Whether the node is even a candidate for any IPA-SRA transformation at
259 all. */
260 unsigned m_candidate : 1;
261
262 /* Whether the original function returns any value. */
263 unsigned m_returns_value : 1;
264
265 /* Set to true if all call statements do not actually use the returned
266 value. */
267
268 unsigned m_return_ignored : 1;
269
270 /* Whether the node is already queued in IPA SRA stack during processing of
271 call graphs SCCs. */
272
273 unsigned m_queued : 1;
274 };
275
276 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
277 data structure is allocated in GC memory, this is not necessary and we can
278 consider removing the destructor. */
279
~isra_func_summary()280 isra_func_summary::~isra_func_summary ()
281 {
282 unsigned len = vec_safe_length (m_parameters);
283 for (unsigned i = 0; i < len; ++i)
284 free_param_decl_accesses (&(*m_parameters)[i]);
285 vec_free (m_parameters);
286 }
287
288 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
289 true if it was a candidate until now. */
290
291 bool
zap()292 isra_func_summary::zap ()
293 {
294 bool ret = m_candidate;
295 m_candidate = false;
296
297 /* TODO: see the destructor above. */
298 unsigned len = vec_safe_length (m_parameters);
299 for (unsigned i = 0; i < len; ++i)
300 free_param_decl_accesses (&(*m_parameters)[i]);
301 vec_free (m_parameters);
302
303 return ret;
304 }
305
306 /* Structure to describe which formal parameters feed into a particular actual
307 argument. */
308
309 struct isra_param_flow
310 {
311 /* Number of elements in array inputs that contain valid data. */
312 char length;
313 /* Indices of formal parameters that feed into the described actual argument.
314 If aggregate_pass_through or pointer_pass_through below are true, it must
315 contain exactly one element which is passed through from a formal
316 parameter if the given number. Otherwise, the array contains indices of
317 callee's formal parameters which are used to calculate value of this
318 actual argument. */
319 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
320
321 /* Offset within the formal parameter. */
322 unsigned unit_offset;
323 /* Size of the portion of the formal parameter that is being passed. */
324 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
325
326 /* True when the value of this actual copy is a portion of a formal
327 parameter. */
328 unsigned aggregate_pass_through : 1;
329 /* True when the value of this actual copy is a verbatim pass through of an
330 obtained pointer. */
331 unsigned pointer_pass_through : 1;
332 /* True when it is safe to copy access candidates here from the callee, which
333 would mean introducing dereferences into callers of the caller. */
334 unsigned safe_to_import_accesses : 1;
335 };
336
337 /* Structure used to convey information about calls from the intra-procedural
338 analysis stage to inter-procedural one. */
339
340 class isra_call_summary
341 {
342 public:
isra_call_summary()343 isra_call_summary ()
344 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
345 m_bit_aligned_arg (false), m_before_any_store (false)
346 {}
347
348 void init_inputs (unsigned arg_count);
349 void dump (FILE *f);
350
351 /* Information about what formal parameters of the caller are used to compute
352 individual actual arguments of this call. */
353 auto_vec <isra_param_flow> m_arg_flow;
354
355 /* Set to true if the call statement does not have a LHS. */
356 unsigned m_return_ignored : 1;
357
358 /* Set to true if the LHS of call statement is only used to construct the
359 return value of the caller. */
360 unsigned m_return_returned : 1;
361
362 /* Set when any of the call arguments are not byte-aligned. */
363 unsigned m_bit_aligned_arg : 1;
364
365 /* Set to true if the call happend before any (other) store to memory in the
366 caller. */
367 unsigned m_before_any_store : 1;
368 };
369
370 /* Class to manage function summaries. */
371
372 class GTY((user)) ipa_sra_function_summaries
373 : public function_summary <isra_func_summary *>
374 {
375 public:
ipa_sra_function_summaries(symbol_table * table,bool ggc)376 ipa_sra_function_summaries (symbol_table *table, bool ggc):
377 function_summary<isra_func_summary *> (table, ggc) { }
378
379 virtual void duplicate (cgraph_node *, cgraph_node *,
380 isra_func_summary *old_sum,
381 isra_func_summary *new_sum);
382 virtual void insert (cgraph_node *, isra_func_summary *);
383 };
384
385 /* Hook that is called by summary when a node is duplicated. */
386
387 void
duplicate(cgraph_node *,cgraph_node *,isra_func_summary * old_sum,isra_func_summary * new_sum)388 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
389 isra_func_summary *old_sum,
390 isra_func_summary *new_sum)
391 {
392 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
393 useless. */
394 new_sum->m_candidate = old_sum->m_candidate;
395 new_sum->m_returns_value = old_sum->m_returns_value;
396 new_sum->m_return_ignored = old_sum->m_return_ignored;
397 gcc_assert (!old_sum->m_queued);
398 new_sum->m_queued = false;
399
400 unsigned param_count = vec_safe_length (old_sum->m_parameters);
401 if (!param_count)
402 return;
403 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
404 new_sum->m_parameters->quick_grow_cleared (param_count);
405 for (unsigned i = 0; i < param_count; i++)
406 {
407 isra_param_desc *s = &(*old_sum->m_parameters)[i];
408 isra_param_desc *d = &(*new_sum->m_parameters)[i];
409
410 d->param_size_limit = s->param_size_limit;
411 d->size_reached = s->size_reached;
412 d->locally_unused = s->locally_unused;
413 d->split_candidate = s->split_candidate;
414 d->by_ref = s->by_ref;
415
416 unsigned acc_count = vec_safe_length (s->accesses);
417 vec_safe_reserve_exact (d->accesses, acc_count);
418 for (unsigned j = 0; j < acc_count; j++)
419 {
420 param_access *from = (*s->accesses)[j];
421 param_access *to = ggc_cleared_alloc<param_access> ();
422 to->type = from->type;
423 to->alias_ptr_type = from->alias_ptr_type;
424 to->unit_offset = from->unit_offset;
425 to->unit_size = from->unit_size;
426 to->certain = from->certain;
427 to->reverse = from->reverse;
428 d->accesses->quick_push (to);
429 }
430 }
431 }
432
433 /* Pointer to the pass function summary holder. */
434
435 static GTY(()) ipa_sra_function_summaries *func_sums;
436
437 /* Hook that is called by summary when new node appears. */
438
439 void
insert(cgraph_node * node,isra_func_summary *)440 ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
441 {
442 if (opt_for_fn (node->decl, flag_ipa_sra))
443 {
444 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
445 ipa_sra_summarize_function (node);
446 pop_cfun ();
447 }
448 else
449 func_sums->remove (node);
450 }
451
452 /* Class to manage call summaries. */
453
454 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
455 {
456 public:
ipa_sra_call_summaries(symbol_table * table)457 ipa_sra_call_summaries (symbol_table *table):
458 call_summary<isra_call_summary *> (table) { }
459
460 /* Duplicate info when an edge is cloned. */
461 virtual void duplicate (cgraph_edge *, cgraph_edge *,
462 isra_call_summary *old_sum,
463 isra_call_summary *new_sum);
464 };
465
466 static ipa_sra_call_summaries *call_sums;
467
468
469 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
470 ARG_COUNT is the number of actual arguments passed. */
471
472 void
init_inputs(unsigned arg_count)473 isra_call_summary::init_inputs (unsigned arg_count)
474 {
475 if (arg_count == 0)
476 {
477 gcc_checking_assert (m_arg_flow.length () == 0);
478 return;
479 }
480 if (m_arg_flow.length () == 0)
481 {
482 m_arg_flow.reserve_exact (arg_count);
483 m_arg_flow.quick_grow_cleared (arg_count);
484 }
485 else
486 gcc_checking_assert (arg_count == m_arg_flow.length ());
487 }
488
489 /* Dump all information in call summary to F. */
490
491 void
dump(FILE * f)492 isra_call_summary::dump (FILE *f)
493 {
494 if (m_return_ignored)
495 fprintf (f, " return value ignored\n");
496 if (m_return_returned)
497 fprintf (f, " return value used only to compute caller return value\n");
498 if (m_before_any_store)
499 fprintf (f, " happens before any store to memory\n");
500 for (unsigned i = 0; i < m_arg_flow.length (); i++)
501 {
502 fprintf (f, " Parameter %u:\n", i);
503 isra_param_flow *ipf = &m_arg_flow[i];
504
505 if (ipf->length)
506 {
507 bool first = true;
508 fprintf (f, " Scalar param sources: ");
509 for (int j = 0; j < ipf->length; j++)
510 {
511 if (!first)
512 fprintf (f, ", ");
513 else
514 first = false;
515 fprintf (f, "%i", (int) ipf->inputs[j]);
516 }
517 fprintf (f, "\n");
518 }
519 if (ipf->aggregate_pass_through)
520 fprintf (f, " Aggregate pass through from the param given above, "
521 "unit offset: %u , unit size: %u\n",
522 ipf->unit_offset, ipf->unit_size);
523 if (ipf->pointer_pass_through)
524 fprintf (f, " Pointer pass through from the param given above, "
525 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
526 }
527 }
528
529 /* Duplicate edge summary when an edge is cloned. */
530
531 void
duplicate(cgraph_edge *,cgraph_edge *,isra_call_summary * old_sum,isra_call_summary * new_sum)532 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
533 isra_call_summary *old_sum,
534 isra_call_summary *new_sum)
535 {
536 unsigned arg_count = old_sum->m_arg_flow.length ();
537 new_sum->init_inputs (arg_count);
538 for (unsigned i = 0; i < arg_count; i++)
539 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
540
541 new_sum->m_return_ignored = old_sum->m_return_ignored;
542 new_sum->m_return_returned = old_sum->m_return_returned;
543 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
544 new_sum->m_before_any_store = old_sum->m_before_any_store;
545 }
546
547
548 /* With all GTY stuff done, we can move to anonymous namespace. */
549 namespace {
550 /* Quick mapping from a decl to its param descriptor. */
551
552 hash_map<tree, gensum_param_desc *> *decl2desc;
553
554 /* Countdown of allowed Alias Analysis steps during summary building. */
555
556 int aa_walking_limit;
557
558 /* This is a table in which for each basic block and parameter there is a
559 distance (offset + size) in that parameter which is dereferenced and
560 accessed in that BB. */
561 HOST_WIDE_INT *bb_dereferences = NULL;
562 /* How many by-reference parameters there are in the current function. */
563 int by_ref_count;
564
565 /* Bitmap of BBs that can cause the function to "stop" progressing by
566 returning, throwing externally, looping infinitely or calling a function
567 which might abort etc.. */
568 bitmap final_bbs;
569
570 /* Obstack to allocate various small structures required only when generating
571 summary for a function. */
572 struct obstack gensum_obstack;
573
574 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
575 attributes, return true otherwise. NODE is the cgraph node of the current
576 function. */
577
578 static bool
ipa_sra_preliminary_function_checks(cgraph_node * node)579 ipa_sra_preliminary_function_checks (cgraph_node *node)
580 {
581 if (!node->can_change_signature)
582 {
583 if (dump_file)
584 fprintf (dump_file, "Function cannot change signature.\n");
585 return false;
586 }
587
588 if (!tree_versionable_function_p (node->decl))
589 {
590 if (dump_file)
591 fprintf (dump_file, "Function is not versionable.\n");
592 return false;
593 }
594
595 if (!opt_for_fn (node->decl, optimize)
596 || !opt_for_fn (node->decl, flag_ipa_sra))
597 {
598 if (dump_file)
599 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
600 "function.\n");
601 return false;
602 }
603
604 if (DECL_VIRTUAL_P (node->decl))
605 {
606 if (dump_file)
607 fprintf (dump_file, "Function is a virtual method.\n");
608 return false;
609 }
610
611 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
612 if (fun->stdarg)
613 {
614 if (dump_file)
615 fprintf (dump_file, "Function uses stdarg. \n");
616 return false;
617 }
618
619 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
620 {
621 if (dump_file)
622 fprintf (dump_file, "Always inline function will be inlined "
623 "anyway. \n");
624 return false;
625 }
626
627 return true;
628 }
629
630 /* Print access tree starting at ACCESS to F. */
631
632 static void
dump_gensum_access(FILE * f,gensum_param_access * access,unsigned indent)633 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
634 {
635 fprintf (f, " ");
636 for (unsigned i = 0; i < indent; i++)
637 fprintf (f, " ");
638 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
639 access->offset);
640 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
641 fprintf (f, ", type: ");
642 print_generic_expr (f, access->type);
643 fprintf (f, ", alias_ptr_type: ");
644 print_generic_expr (f, access->alias_ptr_type);
645 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
646 for (gensum_param_access *ch = access->first_child;
647 ch;
648 ch = ch->next_sibling)
649 dump_gensum_access (f, ch, indent + 2);
650 }
651
652
653 /* Print access tree starting at ACCESS to F. */
654
655 static void
dump_isra_access(FILE * f,param_access * access)656 dump_isra_access (FILE *f, param_access *access)
657 {
658 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
659 fprintf (f, ", unit size: %u", access->unit_size);
660 fprintf (f, ", type: ");
661 print_generic_expr (f, access->type);
662 fprintf (f, ", alias_ptr_type: ");
663 print_generic_expr (f, access->alias_ptr_type);
664 if (access->certain)
665 fprintf (f, ", certain");
666 else
667 fprintf (f, ", not certain");
668 if (access->reverse)
669 fprintf (f, ", reverse");
670 fprintf (f, "\n");
671 }
672
673 /* Dump access tree starting at ACCESS to stderr. */
674
675 DEBUG_FUNCTION void
debug_isra_access(param_access * access)676 debug_isra_access (param_access *access)
677 {
678 dump_isra_access (stderr, access);
679 }
680
681 /* Dump DESC to F. */
682
683 static void
dump_gensum_param_descriptor(FILE * f,gensum_param_desc * desc)684 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
685 {
686 if (desc->locally_unused)
687 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
688 if (!desc->split_candidate)
689 {
690 fprintf (f, " not a candidate\n");
691 return;
692 }
693 if (desc->by_ref)
694 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
695
696 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
697 dump_gensum_access (f, acc, 2);
698 }
699
700 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
701 F. */
702
703 static void
dump_gensum_param_descriptors(FILE * f,tree fndecl,vec<gensum_param_desc> * param_descriptions)704 dump_gensum_param_descriptors (FILE *f, tree fndecl,
705 vec<gensum_param_desc> *param_descriptions)
706 {
707 tree parm = DECL_ARGUMENTS (fndecl);
708 for (unsigned i = 0;
709 i < param_descriptions->length ();
710 ++i, parm = DECL_CHAIN (parm))
711 {
712 fprintf (f, " Descriptor for parameter %i ", i);
713 print_generic_expr (f, parm, TDF_UID);
714 fprintf (f, "\n");
715 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
716 }
717 }
718
719
720 /* Dump DESC to F. */
721
722 static void
dump_isra_param_descriptor(FILE * f,isra_param_desc * desc)723 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
724 {
725 if (desc->locally_unused)
726 {
727 fprintf (f, " (locally) unused\n");
728 }
729 if (!desc->split_candidate)
730 {
731 fprintf (f, " not a candidate for splitting\n");
732 return;
733 }
734 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
735 desc->param_size_limit, desc->size_reached,
736 desc->by_ref ? ", by_ref" : "");
737
738 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
739 {
740 param_access *access = (*desc->accesses)[i];
741 dump_isra_access (f, access);
742 }
743 }
744
745 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
746 F. */
747
748 static void
dump_isra_param_descriptors(FILE * f,tree fndecl,isra_func_summary * ifs)749 dump_isra_param_descriptors (FILE *f, tree fndecl,
750 isra_func_summary *ifs)
751 {
752 tree parm = DECL_ARGUMENTS (fndecl);
753 if (!ifs->m_parameters)
754 {
755 fprintf (f, " parameter descriptors not available\n");
756 return;
757 }
758
759 for (unsigned i = 0;
760 i < ifs->m_parameters->length ();
761 ++i, parm = DECL_CHAIN (parm))
762 {
763 fprintf (f, " Descriptor for parameter %i ", i);
764 print_generic_expr (f, parm, TDF_UID);
765 fprintf (f, "\n");
766 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
767 }
768 }
769
770 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
771 function fails return false, otherwise return true. SRC must fit into an
772 unsigned char. Used for purposes of transitive unused parameter
773 removal. */
774
775 static bool
add_src_to_param_flow(isra_param_flow * param_flow,int src)776 add_src_to_param_flow (isra_param_flow *param_flow, int src)
777 {
778 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
779 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
780 return false;
781
782 param_flow->inputs[(int) param_flow->length] = src;
783 param_flow->length++;
784 return true;
785 }
786
787 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
788 it is the only input. Used for purposes of transitive parameter
789 splitting. */
790
791 static void
set_single_param_flow_source(isra_param_flow * param_flow,int src)792 set_single_param_flow_source (isra_param_flow *param_flow, int src)
793 {
794 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
795 if (param_flow->length == 0)
796 {
797 param_flow->inputs[0] = src;
798 param_flow->length = 1;
799 }
800 else if (param_flow->length == 1)
801 gcc_assert (param_flow->inputs[0] == src);
802 else
803 gcc_unreachable ();
804 }
805
806 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
807 it. */
808
809 static unsigned
get_single_param_flow_source(const isra_param_flow * param_flow)810 get_single_param_flow_source (const isra_param_flow *param_flow)
811 {
812 gcc_assert (param_flow->length == 1);
813 return param_flow->inputs[0];
814 }
815
816 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
817 in FUN represented with NODE and return a negative number if any of them is
818 used for something else than either an actual call argument, simple
819 arithmetic operation or debug statement. If there are no such uses, return
820 the number of actual arguments that this parameter eventually feeds to (or
821 zero if there is none). For any such parameter, mark PARM_NUM as one of its
822 sources. ANALYZED is a bitmap that tracks which SSA names we have already
823 started investigating. */
824
825 static int
isra_track_scalar_value_uses(function * fun,cgraph_node * node,tree name,int parm_num,bitmap analyzed)826 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
827 int parm_num, bitmap analyzed)
828 {
829 int res = 0;
830 imm_use_iterator imm_iter;
831 gimple *stmt;
832
833 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
834 {
835 if (is_gimple_debug (stmt))
836 continue;
837
838 /* TODO: We could handle at least const builtin functions like arithmetic
839 operations below. */
840 if (is_gimple_call (stmt))
841 {
842 int all_uses = 0;
843 use_operand_p use_p;
844 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
845 all_uses++;
846
847 gcall *call = as_a <gcall *> (stmt);
848 unsigned arg_count;
849 if (gimple_call_internal_p (call)
850 || (arg_count = gimple_call_num_args (call)) == 0)
851 {
852 res = -1;
853 break;
854 }
855
856 cgraph_edge *cs = node->get_edge (stmt);
857 gcc_checking_assert (cs);
858 isra_call_summary *csum = call_sums->get_create (cs);
859 csum->init_inputs (arg_count);
860
861 int simple_uses = 0;
862 for (unsigned i = 0; i < arg_count; i++)
863 if (gimple_call_arg (call, i) == name)
864 {
865 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
866 {
867 simple_uses = -1;
868 break;
869 }
870 simple_uses++;
871 }
872
873 if (simple_uses < 0
874 || all_uses != simple_uses)
875 {
876 res = -1;
877 break;
878 }
879 res += all_uses;
880 }
881 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
882 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
883 || gimple_code (stmt) == GIMPLE_PHI))
884 {
885 tree lhs;
886 if (gimple_code (stmt) == GIMPLE_PHI)
887 lhs = gimple_phi_result (stmt);
888 else
889 lhs = gimple_assign_lhs (stmt);
890
891 if (TREE_CODE (lhs) != SSA_NAME)
892 {
893 res = -1;
894 break;
895 }
896 gcc_assert (!gimple_vdef (stmt));
897 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
898 {
899 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
900 analyzed);
901 if (tmp < 0)
902 {
903 res = tmp;
904 break;
905 }
906 res += tmp;
907 }
908 }
909 else
910 {
911 res = -1;
912 break;
913 }
914 }
915 return res;
916 }
917
918 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
919 also described by NODE) and simple arithmetic calculations involving PARM
920 and return false if any of them is used for something else than either an
921 actual call argument, simple arithmetic operation or debug statement. If
922 there are no such uses, return true and store the number of actual arguments
923 that this parameter eventually feeds to (or zero if there is none) to
924 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
925 sources.
926
927 This function is similar to ptr_parm_has_nonarg_uses but its results are
928 meant for unused parameter removal, as opposed to splitting of parameters
929 passed by reference or converting them to passed by value. */
930
931 static bool
isra_track_scalar_param_local_uses(function * fun,cgraph_node * node,tree parm,int parm_num,int * call_uses_p)932 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
933 int parm_num, int *call_uses_p)
934 {
935 gcc_checking_assert (is_gimple_reg (parm));
936
937 tree name = ssa_default_def (fun, parm);
938 if (!name || has_zero_uses (name))
939 {
940 *call_uses_p = 0;
941 return false;
942 }
943
944 /* Edge summaries can only handle callers with fewer than 256 parameters. */
945 if (parm_num > UCHAR_MAX)
946 return true;
947
948 bitmap analyzed = BITMAP_ALLOC (NULL);
949 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
950 analyzed);
951 BITMAP_FREE (analyzed);
952 if (call_uses < 0)
953 return true;
954 *call_uses_p = call_uses;
955 return false;
956 }
957
958 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
959 examine whether there are any nonarg uses that are not actual arguments or
960 otherwise infeasible uses. If so, return true, otherwise return false.
961 Create pass-through IPA flow records for any direct uses as argument calls
962 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
963 must represent the function that is currently analyzed, PARM_NUM must be the
964 index of the analyzed parameter.
965
966 This function is similar to isra_track_scalar_param_local_uses but its
967 results are meant for splitting of parameters passed by reference or turning
968 them into bits passed by value, as opposed to generic unused parameter
969 removal. */
970
971 static bool
ptr_parm_has_nonarg_uses(cgraph_node * node,function * fun,tree parm,int parm_num,unsigned * pt_count_p)972 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
973 int parm_num, unsigned *pt_count_p)
974 {
975 imm_use_iterator ui;
976 gimple *stmt;
977 tree name = ssa_default_def (fun, parm);
978 bool ret = false;
979 unsigned pt_count = 0;
980
981 if (!name || has_zero_uses (name))
982 return false;
983
984 /* Edge summaries can only handle callers with fewer than 256 parameters. */
985 if (parm_num > UCHAR_MAX)
986 return true;
987
988 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
989 {
990 unsigned uses_ok = 0;
991 use_operand_p use_p;
992
993 if (is_gimple_debug (stmt))
994 continue;
995
996 if (gimple_assign_single_p (stmt))
997 {
998 tree rhs = gimple_assign_rhs1 (stmt);
999 if (!TREE_THIS_VOLATILE (rhs))
1000 {
1001 while (handled_component_p (rhs))
1002 rhs = TREE_OPERAND (rhs, 0);
1003 if (TREE_CODE (rhs) == MEM_REF
1004 && TREE_OPERAND (rhs, 0) == name
1005 && integer_zerop (TREE_OPERAND (rhs, 1))
1006 && types_compatible_p (TREE_TYPE (rhs),
1007 TREE_TYPE (TREE_TYPE (name))))
1008 uses_ok++;
1009 }
1010 }
1011 else if (is_gimple_call (stmt))
1012 {
1013 gcall *call = as_a <gcall *> (stmt);
1014 unsigned arg_count;
1015 if (gimple_call_internal_p (call)
1016 || (arg_count = gimple_call_num_args (call)) == 0)
1017 {
1018 ret = true;
1019 break;
1020 }
1021
1022 cgraph_edge *cs = node->get_edge (stmt);
1023 gcc_checking_assert (cs);
1024 isra_call_summary *csum = call_sums->get_create (cs);
1025 csum->init_inputs (arg_count);
1026
1027 for (unsigned i = 0; i < arg_count; ++i)
1028 {
1029 tree arg = gimple_call_arg (stmt, i);
1030
1031 if (arg == name)
1032 {
1033 /* TODO: Allow &MEM_REF[name + offset] here,
1034 ipa_param_body_adjustments::modify_call_stmt has to be
1035 adjusted too. */
1036 csum->m_arg_flow[i].pointer_pass_through = true;
1037 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1038 pt_count++;
1039 uses_ok++;
1040 continue;
1041 }
1042
1043 if (!TREE_THIS_VOLATILE (arg))
1044 {
1045 while (handled_component_p (arg))
1046 arg = TREE_OPERAND (arg, 0);
1047 if (TREE_CODE (arg) == MEM_REF
1048 && TREE_OPERAND (arg, 0) == name
1049 && integer_zerop (TREE_OPERAND (arg, 1))
1050 && types_compatible_p (TREE_TYPE (arg),
1051 TREE_TYPE (TREE_TYPE (name))))
1052 uses_ok++;
1053 }
1054 }
1055 }
1056
1057 /* If the number of valid uses does not match the number of
1058 uses in this stmt there is an unhandled use. */
1059 unsigned all_uses = 0;
1060 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1061 all_uses++;
1062
1063 gcc_checking_assert (uses_ok <= all_uses);
1064 if (uses_ok != all_uses)
1065 {
1066 ret = true;
1067 break;
1068 }
1069 }
1070
1071 *pt_count_p = pt_count;
1072 return ret;
1073 }
1074
1075 /* Initialize vector of parameter descriptors of NODE. Return true if there
1076 are any candidates for splitting or unused aggregate parameter removal (the
1077 function may return false if there are candidates for removal of register
1078 parameters) and function body must be scanned. */
1079
1080 static bool
create_parameter_descriptors(cgraph_node * node,vec<gensum_param_desc> * param_descriptions)1081 create_parameter_descriptors (cgraph_node *node,
1082 vec<gensum_param_desc> *param_descriptions)
1083 {
1084 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1085 bool ret = false;
1086
1087 int num = 0;
1088 for (tree parm = DECL_ARGUMENTS (node->decl);
1089 parm;
1090 parm = DECL_CHAIN (parm), num++)
1091 {
1092 const char *msg;
1093 gensum_param_desc *desc = &(*param_descriptions)[num];
1094 /* param_descriptions vector is grown cleared in the caller. */
1095 desc->param_number = num;
1096 decl2desc->put (parm, desc);
1097
1098 if (dump_file && (dump_flags & TDF_DETAILS))
1099 print_generic_expr (dump_file, parm, TDF_UID);
1100
1101 int scalar_call_uses;
1102 tree type = TREE_TYPE (parm);
1103 if (TREE_THIS_VOLATILE (parm))
1104 {
1105 if (dump_file && (dump_flags & TDF_DETAILS))
1106 fprintf (dump_file, " not a candidate, is volatile\n");
1107 continue;
1108 }
1109 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1110 {
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 fprintf (dump_file, " not a candidate, is a va_list type\n");
1113 continue;
1114 }
1115 if (TREE_ADDRESSABLE (parm))
1116 {
1117 if (dump_file && (dump_flags & TDF_DETAILS))
1118 fprintf (dump_file, " not a candidate, is addressable\n");
1119 continue;
1120 }
1121 if (TREE_ADDRESSABLE (type))
1122 {
1123 if (dump_file && (dump_flags & TDF_DETAILS))
1124 fprintf (dump_file, " not a candidate, type cannot be split\n");
1125 continue;
1126 }
1127
1128 if (is_gimple_reg (parm)
1129 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1130 &scalar_call_uses))
1131 {
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1133 fprintf (dump_file, " is a scalar with only %i call uses\n",
1134 scalar_call_uses);
1135
1136 desc->locally_unused = true;
1137 desc->call_uses = scalar_call_uses;
1138 }
1139
1140 if (POINTER_TYPE_P (type))
1141 {
1142 type = TREE_TYPE (type);
1143
1144 if (TREE_CODE (type) == FUNCTION_TYPE
1145 || TREE_CODE (type) == METHOD_TYPE)
1146 {
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, " not a candidate, reference to "
1149 "a function\n");
1150 continue;
1151 }
1152 if (TYPE_VOLATILE (type))
1153 {
1154 if (dump_file && (dump_flags & TDF_DETAILS))
1155 fprintf (dump_file, " not a candidate, reference to "
1156 "a volatile type\n");
1157 continue;
1158 }
1159 if (TREE_CODE (type) == ARRAY_TYPE
1160 && TYPE_NONALIASED_COMPONENT (type))
1161 {
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 fprintf (dump_file, " not a candidate, reference to "
1164 "a nonaliased component array\n");
1165 continue;
1166 }
1167 if (!is_gimple_reg (parm))
1168 {
1169 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, " not a candidate, a reference which is "
1171 "not a gimple register (probably addressable)\n");
1172 continue;
1173 }
1174 if (is_va_list_type (type))
1175 {
1176 if (dump_file && (dump_flags & TDF_DETAILS))
1177 fprintf (dump_file, " not a candidate, reference to "
1178 "a va list\n");
1179 continue;
1180 }
1181 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1182 &desc->ptr_pt_count))
1183 {
1184 if (dump_file && (dump_flags & TDF_DETAILS))
1185 fprintf (dump_file, " not a candidate, reference has "
1186 "nonarg uses\n");
1187 continue;
1188 }
1189 desc->by_ref = true;
1190 }
1191 else if (!AGGREGATE_TYPE_P (type))
1192 {
1193 /* This is in an else branch because scalars passed by reference are
1194 still candidates to be passed by value. */
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 fprintf (dump_file, " not a candidate, not an aggregate\n");
1197 continue;
1198 }
1199
1200 if (!COMPLETE_TYPE_P (type))
1201 {
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 fprintf (dump_file, " not a candidate, not a complete type\n");
1204 continue;
1205 }
1206 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1207 {
1208 if (dump_file && (dump_flags & TDF_DETAILS))
1209 fprintf (dump_file, " not a candidate, size not representable\n");
1210 continue;
1211 }
1212 unsigned HOST_WIDE_INT type_size
1213 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1214 if (type_size == 0
1215 || type_size >= ISRA_ARG_SIZE_LIMIT)
1216 {
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1219 continue;
1220 }
1221 if (type_internals_preclude_sra_p (type, &msg))
1222 {
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file, " not a candidate, %s\n", msg);
1225 continue;
1226 }
1227
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 fprintf (dump_file, " is a candidate\n");
1230
1231 ret = true;
1232 desc->split_candidate = true;
1233 if (desc->by_ref)
1234 desc->deref_index = by_ref_count++;
1235 }
1236 return ret;
1237 }
1238
1239 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1240 found, which happens if DECL is for a static chain. */
1241
1242 static gensum_param_desc *
get_gensum_param_desc(tree decl)1243 get_gensum_param_desc (tree decl)
1244 {
1245 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1246 gensum_param_desc **slot = decl2desc->get (decl);
1247 if (!slot)
1248 /* This can happen for static chains which we cannot handle so far. */
1249 return NULL;
1250 gcc_checking_assert (*slot);
1251 return *slot;
1252 }
1253
1254
1255 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1256 write REASON to the dump file if there is one. */
1257
1258 static void
disqualify_split_candidate(gensum_param_desc * desc,const char * reason)1259 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1260 {
1261 if (!desc->split_candidate)
1262 return;
1263
1264 if (dump_file && (dump_flags & TDF_DETAILS))
1265 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1266 desc->param_number, reason);
1267
1268 desc->split_candidate = false;
1269 }
1270
1271 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1272 there is one. */
1273
1274 static void
disqualify_split_candidate(tree decl,const char * reason)1275 disqualify_split_candidate (tree decl, const char *reason)
1276 {
1277 gensum_param_desc *desc = get_gensum_param_desc (decl);
1278 if (desc)
1279 disqualify_split_candidate (desc, reason);
1280 }
1281
1282 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1283 first, check that there are not too many of them already. If so, do not
1284 allocate anything and return NULL. */
1285
1286 static gensum_param_access *
allocate_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size)1287 allocate_access (gensum_param_desc *desc,
1288 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1289 {
1290 if (desc->access_count
1291 == (unsigned) param_ipa_sra_max_replacements)
1292 {
1293 disqualify_split_candidate (desc, "Too many replacement candidates");
1294 return NULL;
1295 }
1296
1297 gensum_param_access *access
1298 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1299 sizeof (gensum_param_access));
1300 memset (access, 0, sizeof (*access));
1301 access->offset = offset;
1302 access->size = size;
1303 return access;
1304 }
1305
1306 /* In what context scan_expr_access has been called, whether it deals with a
1307 load, a function argument, or a store. Please note that in rare
1308 circumstances when it is not clear if the access is a load or store,
1309 ISRA_CTX_STORE is used too. */
1310
1311 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1312
1313 /* Return an access describing memory access to the variable described by DESC
1314 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1315 at a certain tree level FIRST. Attempt to create it and put into the
1316 appropriate place in the access tree if does not exist, but fail and return
1317 NULL if there are already too many accesses, if it would create a partially
1318 overlapping access or if an access would end up within a pre-existing
1319 non-call access. */
1320
1321 static gensum_param_access *
get_access_1(gensum_param_desc * desc,gensum_param_access ** first,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1322 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1323 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1324 {
1325 gensum_param_access *access = *first, **ptr = first;
1326
1327 if (!access)
1328 {
1329 /* No pre-existing access at this level, just create it. */
1330 gensum_param_access *a = allocate_access (desc, offset, size);
1331 if (!a)
1332 return NULL;
1333 *first = a;
1334 return *first;
1335 }
1336
1337 if (access->offset >= offset + size)
1338 {
1339 /* We want to squeeze it in front of the very first access, just do
1340 it. */
1341 gensum_param_access *r = allocate_access (desc, offset, size);
1342 if (!r)
1343 return NULL;
1344 r->next_sibling = access;
1345 *first = r;
1346 return r;
1347 }
1348
1349 /* Skip all accesses that have to come before us until the next sibling is
1350 already too far. */
1351 while (offset >= access->offset + access->size
1352 && access->next_sibling
1353 && access->next_sibling->offset < offset + size)
1354 {
1355 ptr = &access->next_sibling;
1356 access = access->next_sibling;
1357 }
1358
1359 /* At this point we know we do not belong before access. */
1360 gcc_assert (access->offset < offset + size);
1361
1362 if (access->offset == offset && access->size == size)
1363 /* We found what we were looking for. */
1364 return access;
1365
1366 if (access->offset <= offset
1367 && access->offset + access->size >= offset + size)
1368 {
1369 /* We fit into access which is larger than us. We need to find/create
1370 something below access. But we only allow nesting in call
1371 arguments. */
1372 if (access->nonarg)
1373 return NULL;
1374
1375 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1376 }
1377
1378 if (offset <= access->offset
1379 && offset + size >= access->offset + access->size)
1380 /* We are actually bigger than access, which fully fits into us, take its
1381 place and make all accesses fitting into it its children. */
1382 {
1383 /* But first, we only allow nesting in call arguments so check if that is
1384 what we are trying to represent. */
1385 if (ctx != ISRA_CTX_ARG)
1386 return NULL;
1387
1388 gensum_param_access *r = allocate_access (desc, offset, size);
1389 if (!r)
1390 return NULL;
1391 r->first_child = access;
1392
1393 while (access->next_sibling
1394 && access->next_sibling->offset < offset + size)
1395 access = access->next_sibling;
1396 if (access->offset + access->size > offset + size)
1397 {
1398 /* This must be a different access, which are sorted, so the
1399 following must be true and this signals a partial overlap. */
1400 gcc_assert (access->offset > offset);
1401 return NULL;
1402 }
1403
1404 r->next_sibling = access->next_sibling;
1405 access->next_sibling = NULL;
1406 *ptr = r;
1407 return r;
1408 }
1409
1410 if (offset >= access->offset + access->size)
1411 {
1412 /* We belong after access. */
1413 gensum_param_access *r = allocate_access (desc, offset, size);
1414 if (!r)
1415 return NULL;
1416 r->next_sibling = access->next_sibling;
1417 access->next_sibling = r;
1418 return r;
1419 }
1420
1421 if (offset < access->offset)
1422 {
1423 /* We know the following, otherwise we would have created a
1424 super-access. */
1425 gcc_checking_assert (offset + size < access->offset + access->size);
1426 return NULL;
1427 }
1428
1429 if (offset + size > access->offset + access->size)
1430 {
1431 /* Likewise. */
1432 gcc_checking_assert (offset > access->offset);
1433 return NULL;
1434 }
1435
1436 gcc_unreachable ();
1437 }
1438
1439 /* Return an access describing memory access to the variable described by DESC
1440 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1441 to create if it does not exist, but fail and return NULL if there are
1442 already too many accesses, if it would create a partially overlapping access
1443 or if an access would end up in a non-call access. */
1444
1445 static gensum_param_access *
get_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1446 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1447 isra_scan_context ctx)
1448 {
1449 gcc_checking_assert (desc->split_candidate);
1450
1451 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1452 size, ctx);
1453 if (!access)
1454 {
1455 disqualify_split_candidate (desc,
1456 "Bad access overlap or too many accesses");
1457 return NULL;
1458 }
1459
1460 switch (ctx)
1461 {
1462 case ISRA_CTX_STORE:
1463 gcc_assert (!desc->by_ref);
1464 /* Fall-through */
1465 case ISRA_CTX_LOAD:
1466 access->nonarg = true;
1467 break;
1468 case ISRA_CTX_ARG:
1469 break;
1470 }
1471
1472 return access;
1473 }
1474
1475 /* Verify that parameter access tree starting with ACCESS is in good shape.
1476 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1477 ACCESS or zero if there is none. */
1478
1479 static bool
verify_access_tree_1(gensum_param_access * access,HOST_WIDE_INT parent_offset,HOST_WIDE_INT parent_size)1480 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1481 HOST_WIDE_INT parent_size)
1482 {
1483 while (access)
1484 {
1485 gcc_assert (access->offset >= 0 && access->size >= 0);
1486
1487 if (parent_size != 0)
1488 {
1489 if (access->offset < parent_offset)
1490 {
1491 error ("Access offset before parent offset");
1492 return true;
1493 }
1494 if (access->size >= parent_size)
1495 {
1496 error ("Access size greater or equal to its parent size");
1497 return true;
1498 }
1499 if (access->offset + access->size > parent_offset + parent_size)
1500 {
1501 error ("Access terminates outside of its parent");
1502 return true;
1503 }
1504 }
1505
1506 if (verify_access_tree_1 (access->first_child, access->offset,
1507 access->size))
1508 return true;
1509
1510 if (access->next_sibling
1511 && (access->next_sibling->offset < access->offset + access->size))
1512 {
1513 error ("Access overlaps with its sibling");
1514 return true;
1515 }
1516
1517 access = access->next_sibling;
1518 }
1519 return false;
1520 }
1521
1522 /* Verify that parameter access tree starting with ACCESS is in good shape,
1523 halt compilation and dump the tree to stderr if not. */
1524
1525 DEBUG_FUNCTION void
isra_verify_access_tree(gensum_param_access * access)1526 isra_verify_access_tree (gensum_param_access *access)
1527 {
1528 if (verify_access_tree_1 (access, 0, 0))
1529 {
1530 for (; access; access = access->next_sibling)
1531 dump_gensum_access (stderr, access, 2);
1532 internal_error ("IPA-SRA access verification failed");
1533 }
1534 }
1535
1536
1537 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1538 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1539
1540 static bool
asm_visit_addr(gimple *,tree op,tree,void *)1541 asm_visit_addr (gimple *, tree op, tree, void *)
1542 {
1543 op = get_base_address (op);
1544 if (op
1545 && TREE_CODE (op) == PARM_DECL)
1546 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1547
1548 return false;
1549 }
1550
1551 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1552 basic block BB, unless the BB has already been marked as a potentially
1553 final. */
1554
1555 static void
mark_param_dereference(gensum_param_desc * desc,HOST_WIDE_INT dist,basic_block bb)1556 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1557 basic_block bb)
1558 {
1559 gcc_assert (desc->by_ref);
1560 gcc_checking_assert (desc->split_candidate);
1561
1562 if (bitmap_bit_p (final_bbs, bb->index))
1563 return;
1564
1565 int idx = bb->index * by_ref_count + desc->deref_index;
1566 if (bb_dereferences[idx] < dist)
1567 bb_dereferences[idx] = dist;
1568 }
1569
1570 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1571 previously recorded OLD_TYPE. */
1572
1573 static bool
type_prevails_p(tree old_type,tree new_type)1574 type_prevails_p (tree old_type, tree new_type)
1575 {
1576 if (old_type == new_type)
1577 return false;
1578
1579 /* Non-aggregates are always better. */
1580 if (!is_gimple_reg_type (old_type)
1581 && is_gimple_reg_type (new_type))
1582 return true;
1583 if (is_gimple_reg_type (old_type)
1584 && !is_gimple_reg_type (new_type))
1585 return false;
1586
1587 /* Prefer any complex or vector type over any other scalar type. */
1588 if (TREE_CODE (old_type) != COMPLEX_TYPE
1589 && TREE_CODE (old_type) != VECTOR_TYPE
1590 && (TREE_CODE (new_type) == COMPLEX_TYPE
1591 || TREE_CODE (new_type) == VECTOR_TYPE))
1592 return true;
1593 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1594 || TREE_CODE (old_type) == VECTOR_TYPE)
1595 && TREE_CODE (new_type) != COMPLEX_TYPE
1596 && TREE_CODE (new_type) != VECTOR_TYPE)
1597 return false;
1598
1599 /* Use the integral type with the bigger precision. */
1600 if (INTEGRAL_TYPE_P (old_type)
1601 && INTEGRAL_TYPE_P (new_type))
1602 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1603
1604 /* Attempt to disregard any integral type with non-full precision. */
1605 if (INTEGRAL_TYPE_P (old_type)
1606 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1607 != TYPE_PRECISION (old_type)))
1608 return true;
1609 if (INTEGRAL_TYPE_P (new_type)
1610 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1611 != TYPE_PRECISION (new_type)))
1612 return false;
1613 /* Stabilize the selection. */
1614 return TYPE_UID (old_type) < TYPE_UID (new_type);
1615 }
1616
1617 /* When scanning an expression which is a call argument, this structure
1618 specifies the call and the position of the argument. */
1619
1620 struct scan_call_info
1621 {
1622 /* Call graph edge representing the call. */
1623 cgraph_edge *cs;
1624 /* Total number of arguments in the call. */
1625 unsigned argument_count;
1626 /* Number of the actual argument being scanned. */
1627 unsigned arg_idx;
1628 };
1629
1630 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1631 call argument described by CALL_INFO. */
1632
1633 static void
record_nonregister_call_use(gensum_param_desc * desc,scan_call_info * call_info,unsigned unit_offset,unsigned unit_size)1634 record_nonregister_call_use (gensum_param_desc *desc,
1635 scan_call_info *call_info,
1636 unsigned unit_offset, unsigned unit_size)
1637 {
1638 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1639 csum->init_inputs (call_info->argument_count);
1640
1641 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1642 param_flow->aggregate_pass_through = true;
1643 set_single_param_flow_source (param_flow, desc->param_number);
1644 param_flow->unit_offset = unit_offset;
1645 param_flow->unit_size = unit_size;
1646 desc->call_uses++;
1647 }
1648
1649 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1650 modification. */
1651
1652 static bool
mark_maybe_modified(ao_ref *,tree,void * data)1653 mark_maybe_modified (ao_ref *, tree, void *data)
1654 {
1655 bool *maybe_modified = (bool *) data;
1656 *maybe_modified = true;
1657 return true;
1658 }
1659
1660 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1661 specifies whether EXPR is used in a load, store or as an argument call. BB
1662 must be the basic block in which expr resides. If CTX specifies call
1663 argument context, CALL_INFO must describe that call and argument position,
1664 otherwise it is ignored. */
1665
1666 static void
scan_expr_access(tree expr,gimple * stmt,isra_scan_context ctx,basic_block bb,scan_call_info * call_info=NULL)1667 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1668 basic_block bb, scan_call_info *call_info = NULL)
1669 {
1670 poly_int64 poffset, psize, pmax_size;
1671 HOST_WIDE_INT offset, size, max_size;
1672 tree base;
1673 bool deref = false;
1674 bool reverse;
1675
1676 if (TREE_CODE (expr) == BIT_FIELD_REF
1677 || TREE_CODE (expr) == IMAGPART_EXPR
1678 || TREE_CODE (expr) == REALPART_EXPR)
1679 expr = TREE_OPERAND (expr, 0);
1680
1681 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1682
1683 if (TREE_CODE (base) == MEM_REF)
1684 {
1685 tree op = TREE_OPERAND (base, 0);
1686 if (TREE_CODE (op) != SSA_NAME
1687 || !SSA_NAME_IS_DEFAULT_DEF (op))
1688 return;
1689 base = SSA_NAME_VAR (op);
1690 if (!base)
1691 return;
1692 deref = true;
1693 }
1694 if (TREE_CODE (base) != PARM_DECL)
1695 return;
1696
1697 gensum_param_desc *desc = get_gensum_param_desc (base);
1698 if (!desc || !desc->split_candidate)
1699 return;
1700
1701 if (!poffset.is_constant (&offset)
1702 || !psize.is_constant (&size)
1703 || !pmax_size.is_constant (&max_size))
1704 {
1705 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1706 "access.");
1707 return;
1708 }
1709 if (size < 0 || size != max_size)
1710 {
1711 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1712 return;
1713 }
1714 if (TREE_CODE (expr) == COMPONENT_REF
1715 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1716 {
1717 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1718 return;
1719 }
1720 if (offset < 0)
1721 {
1722 disqualify_split_candidate (desc, "Encountered an access at a "
1723 "negative offset.");
1724 return;
1725 }
1726 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1727 gcc_assert ((size % BITS_PER_UNIT) == 0);
1728 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1729 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1730 {
1731 disqualify_split_candidate (desc, "Encountered an access with too big "
1732 "offset or size");
1733 return;
1734 }
1735
1736 tree type = TREE_TYPE (expr);
1737 unsigned int exp_align = get_object_alignment (expr);
1738
1739 if (exp_align < TYPE_ALIGN (type))
1740 {
1741 disqualify_split_candidate (desc, "Underaligned access.");
1742 return;
1743 }
1744
1745 if (deref)
1746 {
1747 if (!desc->by_ref)
1748 {
1749 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1750 return;
1751 }
1752 else if (ctx == ISRA_CTX_STORE)
1753 {
1754 disqualify_split_candidate (desc, "Storing to data passed by "
1755 "reference.");
1756 return;
1757 }
1758
1759 if (!aa_walking_limit)
1760 {
1761 disqualify_split_candidate (desc, "Out of alias analysis step "
1762 "limit.");
1763 return;
1764 }
1765
1766 gcc_checking_assert (gimple_vuse (stmt));
1767 bool maybe_modified = false;
1768 ao_ref ar;
1769
1770 ao_ref_init (&ar, expr);
1771 bitmap visited = BITMAP_ALLOC (NULL);
1772 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1773 mark_maybe_modified, &maybe_modified,
1774 &visited, NULL, aa_walking_limit);
1775 BITMAP_FREE (visited);
1776 if (walked > 0)
1777 {
1778 gcc_assert (aa_walking_limit > walked);
1779 aa_walking_limit = aa_walking_limit - walked;
1780 }
1781 if (walked < 0)
1782 aa_walking_limit = 0;
1783 if (maybe_modified || walked < 0)
1784 {
1785 disqualify_split_candidate (desc, "Data passed by reference possibly "
1786 "modified through an alias.");
1787 return;
1788 }
1789 else
1790 mark_param_dereference (desc, offset + size, bb);
1791 }
1792 else
1793 /* Pointer parameters with direct uses should have been ruled out by
1794 analyzing SSA default def when looking at the parameters. */
1795 gcc_assert (!desc->by_ref);
1796
1797 gensum_param_access *access = get_access (desc, offset, size, ctx);
1798 if (!access)
1799 return;
1800
1801 if (ctx == ISRA_CTX_ARG)
1802 {
1803 gcc_checking_assert (call_info);
1804
1805 if (!deref)
1806 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1807 size / BITS_PER_UNIT);
1808 else
1809 /* This is not a pass-through of a pointer, this is a use like any
1810 other. */
1811 access->nonarg = true;
1812 }
1813
1814 if (!access->type)
1815 {
1816 access->type = type;
1817 access->alias_ptr_type = reference_alias_ptr_type (expr);
1818 access->reverse = reverse;
1819 }
1820 else
1821 {
1822 if (exp_align < TYPE_ALIGN (access->type))
1823 {
1824 disqualify_split_candidate (desc, "Reference has lower alignment "
1825 "than a previous one.");
1826 return;
1827 }
1828 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1829 {
1830 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1831 return;
1832 }
1833 if (access->reverse != reverse)
1834 {
1835 disqualify_split_candidate (desc, "Both normal and reverse "
1836 "scalar storage order.");
1837 return;
1838 }
1839 if (!deref
1840 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1841 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1842 {
1843 /* We need the same aggregate type on all accesses to be able to
1844 distinguish transformation spots from pass-through arguments in
1845 the transformation phase. */
1846 disqualify_split_candidate (desc, "We do not support aggregate "
1847 "type punning.");
1848 return;
1849 }
1850
1851 if (type_prevails_p (access->type, type))
1852 access->type = type;
1853 }
1854 }
1855
1856 /* Scan body function described by NODE and FUN and create access trees for
1857 parameters. */
1858
1859 static void
scan_function(cgraph_node * node,struct function * fun)1860 scan_function (cgraph_node *node, struct function *fun)
1861 {
1862 basic_block bb;
1863
1864 FOR_EACH_BB_FN (bb, fun)
1865 {
1866 gimple_stmt_iterator gsi;
1867 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1868 {
1869 gimple *stmt = gsi_stmt (gsi);
1870
1871 if (stmt_can_throw_external (fun, stmt))
1872 bitmap_set_bit (final_bbs, bb->index);
1873 switch (gimple_code (stmt))
1874 {
1875 case GIMPLE_RETURN:
1876 {
1877 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1878 if (t != NULL_TREE)
1879 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1880 bitmap_set_bit (final_bbs, bb->index);
1881 }
1882 break;
1883
1884 case GIMPLE_ASSIGN:
1885 if (gimple_assign_single_p (stmt)
1886 && !gimple_clobber_p (stmt))
1887 {
1888 tree rhs = gimple_assign_rhs1 (stmt);
1889 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1890 tree lhs = gimple_assign_lhs (stmt);
1891 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1892 }
1893 break;
1894
1895 case GIMPLE_CALL:
1896 {
1897 unsigned argument_count = gimple_call_num_args (stmt);
1898 isra_scan_context ctx = ISRA_CTX_ARG;
1899 scan_call_info call_info, *call_info_p = &call_info;
1900 if (gimple_call_internal_p (stmt))
1901 {
1902 call_info_p = NULL;
1903 ctx = ISRA_CTX_LOAD;
1904 internal_fn ifn = gimple_call_internal_fn (stmt);
1905 if (internal_store_fn_p (ifn))
1906 ctx = ISRA_CTX_STORE;
1907 }
1908 else
1909 {
1910 call_info.cs = node->get_edge (stmt);
1911 call_info.argument_count = argument_count;
1912 }
1913
1914 for (unsigned i = 0; i < argument_count; i++)
1915 {
1916 call_info.arg_idx = i;
1917 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1918 ctx, bb, call_info_p);
1919 }
1920
1921 tree lhs = gimple_call_lhs (stmt);
1922 if (lhs)
1923 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1924 int flags = gimple_call_flags (stmt);
1925 if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1926 || (flags & ECF_LOOPING_CONST_OR_PURE))
1927 bitmap_set_bit (final_bbs, bb->index);
1928 }
1929 break;
1930
1931 case GIMPLE_ASM:
1932 {
1933 gasm *asm_stmt = as_a <gasm *> (stmt);
1934 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1935 asm_visit_addr);
1936 bitmap_set_bit (final_bbs, bb->index);
1937
1938 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1939 {
1940 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1941 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1942 }
1943 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1944 {
1945 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1946 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1947 }
1948 }
1949 break;
1950
1951 default:
1952 break;
1953 }
1954 }
1955 }
1956 }
1957
1958 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1959 return statements, or if results of any operations it is involved in are
1960 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1961 names we have already started investigating. */
1962
1963 static bool
ssa_name_only_returned_p(function * fun,tree name,bitmap analyzed)1964 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1965 {
1966 bool res = true;
1967 imm_use_iterator imm_iter;
1968 gimple *stmt;
1969
1970 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1971 {
1972 if (is_gimple_debug (stmt))
1973 continue;
1974
1975 if (gimple_code (stmt) == GIMPLE_RETURN)
1976 {
1977 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1978 if (t != name)
1979 {
1980 res = false;
1981 break;
1982 }
1983 }
1984 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1985 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1986 || gimple_code (stmt) == GIMPLE_PHI))
1987 {
1988 /* TODO: And perhaps for const function calls too? */
1989 tree lhs;
1990 if (gimple_code (stmt) == GIMPLE_PHI)
1991 lhs = gimple_phi_result (stmt);
1992 else
1993 lhs = gimple_assign_lhs (stmt);
1994
1995 if (TREE_CODE (lhs) != SSA_NAME)
1996 {
1997 res = false;
1998 break;
1999 }
2000 gcc_assert (!gimple_vdef (stmt));
2001 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2002 && !ssa_name_only_returned_p (fun, lhs, analyzed))
2003 {
2004 res = false;
2005 break;
2006 }
2007 }
2008 else
2009 {
2010 res = false;
2011 break;
2012 }
2013 }
2014 return res;
2015 }
2016
2017 /* Inspect the uses of the return value of the call associated with CS, and if
2018 it is not used or if it is only used to construct the return value of the
2019 caller, mark it as such in call or caller summary. Also check for
2020 misaligned arguments. */
2021
2022 static void
isra_analyze_call(cgraph_edge * cs)2023 isra_analyze_call (cgraph_edge *cs)
2024 {
2025 gcall *call_stmt = cs->call_stmt;
2026 unsigned count = gimple_call_num_args (call_stmt);
2027 isra_call_summary *csum = call_sums->get_create (cs);
2028
2029 for (unsigned i = 0; i < count; i++)
2030 {
2031 tree arg = gimple_call_arg (call_stmt, i);
2032 if (is_gimple_reg (arg))
2033 continue;
2034
2035 tree offset;
2036 poly_int64 bitsize, bitpos;
2037 machine_mode mode;
2038 int unsignedp, reversep, volatilep = 0;
2039 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2040 &unsignedp, &reversep, &volatilep);
2041 if (!multiple_p (bitpos, BITS_PER_UNIT))
2042 {
2043 csum->m_bit_aligned_arg = true;
2044 break;
2045 }
2046 }
2047
2048 tree lhs = gimple_call_lhs (call_stmt);
2049 if (lhs)
2050 {
2051 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2052 from this function (without being read anywhere). */
2053 if (TREE_CODE (lhs) == SSA_NAME)
2054 {
2055 bitmap analyzed = BITMAP_ALLOC (NULL);
2056 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2057 lhs, analyzed))
2058 csum->m_return_returned = true;
2059 BITMAP_FREE (analyzed);
2060 }
2061 }
2062 else
2063 csum->m_return_ignored = true;
2064 }
2065
2066 /* Look at all calls going out of NODE, described also by IFS and perform all
2067 analyses necessary for IPA-SRA that are not done at body scan time or done
2068 even when body is not scanned because the function is not a candidate. */
2069
2070 static void
isra_analyze_all_outgoing_calls(cgraph_node * node)2071 isra_analyze_all_outgoing_calls (cgraph_node *node)
2072 {
2073 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2074 isra_analyze_call (cs);
2075 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2076 isra_analyze_call (cs);
2077 }
2078
2079 /* Dump a dereferences table with heading STR to file F. */
2080
2081 static void
dump_dereferences_table(FILE * f,struct function * fun,const char * str)2082 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2083 {
2084 basic_block bb;
2085
2086 fprintf (dump_file, "%s", str);
2087 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2088 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2089 {
2090 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2091 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2092 {
2093 int i;
2094 for (i = 0; i < by_ref_count; i++)
2095 {
2096 int idx = bb->index * by_ref_count + i;
2097 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2098 }
2099 }
2100 fprintf (f, "\n");
2101 }
2102 fprintf (dump_file, "\n");
2103 }
2104
2105 /* Propagate distances in bb_dereferences in the opposite direction than the
2106 control flow edges, in each step storing the maximum of the current value
2107 and the minimum of all successors. These steps are repeated until the table
2108 stabilizes. Note that BBs which might terminate the functions (according to
2109 final_bbs bitmap) never updated in this way. */
2110
2111 static void
propagate_dereference_distances(struct function * fun)2112 propagate_dereference_distances (struct function *fun)
2113 {
2114 basic_block bb;
2115
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 dump_dereferences_table (dump_file, fun,
2118 "Dereference table before propagation:\n");
2119
2120 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2121 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2122 FOR_EACH_BB_FN (bb, fun)
2123 {
2124 queue.quick_push (bb);
2125 bb->aux = bb;
2126 }
2127
2128 while (!queue.is_empty ())
2129 {
2130 edge_iterator ei;
2131 edge e;
2132 bool change = false;
2133 int i;
2134
2135 bb = queue.pop ();
2136 bb->aux = NULL;
2137
2138 if (bitmap_bit_p (final_bbs, bb->index))
2139 continue;
2140
2141 for (i = 0; i < by_ref_count; i++)
2142 {
2143 int idx = bb->index * by_ref_count + i;
2144 bool first = true;
2145 HOST_WIDE_INT inh = 0;
2146
2147 FOR_EACH_EDGE (e, ei, bb->succs)
2148 {
2149 int succ_idx = e->dest->index * by_ref_count + i;
2150
2151 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2152 continue;
2153
2154 if (first)
2155 {
2156 first = false;
2157 inh = bb_dereferences [succ_idx];
2158 }
2159 else if (bb_dereferences [succ_idx] < inh)
2160 inh = bb_dereferences [succ_idx];
2161 }
2162
2163 if (!first && bb_dereferences[idx] < inh)
2164 {
2165 bb_dereferences[idx] = inh;
2166 change = true;
2167 }
2168 }
2169
2170 if (change)
2171 FOR_EACH_EDGE (e, ei, bb->preds)
2172 {
2173 if (e->src->aux)
2174 continue;
2175
2176 e->src->aux = e->src;
2177 queue.quick_push (e->src);
2178 }
2179 }
2180
2181 if (dump_file && (dump_flags & TDF_DETAILS))
2182 dump_dereferences_table (dump_file, fun,
2183 "Dereference table after propagation:\n");
2184 }
2185
2186 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2187 children, return true if the parameter cannot be split, otherwise return
2188 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2189 index of the entry BB in the function of PARM. */
2190
2191 static bool
check_gensum_access(tree parm,gensum_param_desc * desc,gensum_param_access * access,HOST_WIDE_INT * nonarg_acc_size,bool * only_calls,int entry_bb_index)2192 check_gensum_access (tree parm, gensum_param_desc *desc,
2193 gensum_param_access *access,
2194 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2195 int entry_bb_index)
2196 {
2197 if (access->nonarg)
2198 {
2199 *only_calls = false;
2200 *nonarg_acc_size += access->size;
2201
2202 if (access->first_child)
2203 {
2204 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2205 return true;
2206 }
2207 }
2208 /* Do not decompose a non-BLKmode param in a way that would create
2209 BLKmode params. Especially for by-reference passing (thus,
2210 pointer-type param) this is hardly worthwhile. */
2211 if (DECL_MODE (parm) != BLKmode
2212 && TYPE_MODE (access->type) == BLKmode)
2213 {
2214 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2215 return true;
2216 }
2217
2218 if (desc->by_ref)
2219 {
2220 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2221 if ((access->offset + access->size) > bb_dereferences[idx])
2222 {
2223 disqualify_split_candidate (desc, "Would create a possibly "
2224 "illegal dereference in a caller.");
2225 return true;
2226 }
2227 }
2228
2229 for (gensum_param_access *ch = access->first_child;
2230 ch;
2231 ch = ch->next_sibling)
2232 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2233 entry_bb_index))
2234 return true;
2235
2236 return false;
2237 }
2238
2239 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2240 descriptor DESC. */
2241
2242 static void
copy_accesses_to_ipa_desc(gensum_param_access * from,isra_param_desc * desc)2243 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2244 {
2245 param_access *to = ggc_cleared_alloc<param_access> ();
2246 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2247 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2248 to->unit_offset = from->offset / BITS_PER_UNIT;
2249 to->unit_size = from->size / BITS_PER_UNIT;
2250 to->type = from->type;
2251 to->alias_ptr_type = from->alias_ptr_type;
2252 to->certain = from->nonarg;
2253 to->reverse = from->reverse;
2254 vec_safe_push (desc->accesses, to);
2255
2256 for (gensum_param_access *ch = from->first_child;
2257 ch;
2258 ch = ch->next_sibling)
2259 copy_accesses_to_ipa_desc (ch, desc);
2260 }
2261
2262 /* Analyze function body scan results stored in param_accesses and
2263 param_accesses, detect possible transformations and store information of
2264 those in function summary. NODE, FUN and IFS are all various structures
2265 describing the currently analyzed function. */
2266
2267 static void
process_scan_results(cgraph_node * node,struct function * fun,isra_func_summary * ifs,vec<gensum_param_desc> * param_descriptions)2268 process_scan_results (cgraph_node *node, struct function *fun,
2269 isra_func_summary *ifs,
2270 vec<gensum_param_desc> *param_descriptions)
2271 {
2272 bool check_pass_throughs = false;
2273 bool dereferences_propagated = false;
2274 tree parm = DECL_ARGUMENTS (node->decl);
2275 unsigned param_count = param_descriptions->length();
2276
2277 for (unsigned desc_index = 0;
2278 desc_index < param_count;
2279 desc_index++, parm = DECL_CHAIN (parm))
2280 {
2281 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2282 if (!desc->split_candidate)
2283 continue;
2284
2285 if (flag_checking)
2286 isra_verify_access_tree (desc->accesses);
2287
2288 if (!dereferences_propagated
2289 && desc->by_ref
2290 && desc->accesses)
2291 {
2292 propagate_dereference_distances (fun);
2293 dereferences_propagated = true;
2294 }
2295
2296 HOST_WIDE_INT nonarg_acc_size = 0;
2297 bool only_calls = true;
2298 bool check_failed = false;
2299
2300 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2301 for (gensum_param_access *acc = desc->accesses;
2302 acc;
2303 acc = acc->next_sibling)
2304 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2305 entry_bb_index))
2306 {
2307 check_failed = true;
2308 break;
2309 }
2310 if (check_failed)
2311 continue;
2312
2313 if (only_calls)
2314 desc->locally_unused = true;
2315
2316 HOST_WIDE_INT cur_param_size
2317 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2318 HOST_WIDE_INT param_size_limit;
2319 if (!desc->by_ref || optimize_function_for_size_p (fun))
2320 param_size_limit = cur_param_size;
2321 else
2322 param_size_limit
2323 = opt_for_fn (node->decl,
2324 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2325 if (nonarg_acc_size > param_size_limit
2326 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2327 {
2328 disqualify_split_candidate (desc, "Would result into a too big set "
2329 "of replacements.");
2330 }
2331 else
2332 {
2333 /* create_parameter_descriptors makes sure unit sizes of all
2334 candidate parameters fit unsigned integers restricted to
2335 ISRA_ARG_SIZE_LIMIT. */
2336 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2337 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2338 if (desc->split_candidate && desc->ptr_pt_count)
2339 {
2340 gcc_assert (desc->by_ref);
2341 check_pass_throughs = true;
2342 }
2343 }
2344 }
2345
2346 /* When a pointer parameter is passed-through to a callee, in which it is
2347 only used to read only one or a few items, we can attempt to transform it
2348 to obtaining and passing through the items instead of the pointer. But we
2349 must take extra care that 1) we do not introduce any segfault by moving
2350 dereferences above control flow and that 2) the data is not modified
2351 through an alias in this function. The IPA analysis must not introduce
2352 any accesses candidates unless it can prove both.
2353
2354 The current solution is very crude as it consists of ensuring that the
2355 call postdominates entry BB and that the definition of VUSE of the call is
2356 default definition. TODO: For non-recursive callees in the same
2357 compilation unit we could do better by doing analysis in topological order
2358 an looking into access candidates of callees, using their alias_ptr_types
2359 to attempt real AA. We could also use the maximum known dereferenced
2360 offset in this function at IPA level.
2361
2362 TODO: Measure the overhead and the effect of just being pessimistic.
2363 Maybe this is only -O3 material? */
2364
2365 bool pdoms_calculated = false;
2366 if (check_pass_throughs)
2367 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2368 {
2369 gcall *call_stmt = cs->call_stmt;
2370 tree vuse = gimple_vuse (call_stmt);
2371
2372 /* If the callee is a const function, we don't get a VUSE. In such
2373 case there will be no memory accesses in the called function (or the
2374 const attribute is wrong) and then we just don't care. */
2375 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2376
2377 unsigned count = gimple_call_num_args (call_stmt);
2378 isra_call_summary *csum = call_sums->get_create (cs);
2379 csum->init_inputs (count);
2380 csum->m_before_any_store = uses_memory_as_obtained;
2381 for (unsigned argidx = 0; argidx < count; argidx++)
2382 {
2383 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2384 continue;
2385 unsigned pidx
2386 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2387 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2388 if (!desc->split_candidate)
2389 {
2390 csum->m_arg_flow[argidx].pointer_pass_through = false;
2391 continue;
2392 }
2393 if (!uses_memory_as_obtained)
2394 continue;
2395
2396 /* Post-dominator check placed last, hoping that it usually won't
2397 be needed. */
2398 if (!pdoms_calculated)
2399 {
2400 gcc_checking_assert (cfun);
2401 connect_infinite_loops_to_exit ();
2402 calculate_dominance_info (CDI_POST_DOMINATORS);
2403 pdoms_calculated = true;
2404 }
2405 if (dominated_by_p (CDI_POST_DOMINATORS,
2406 gimple_bb (call_stmt),
2407 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2408 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2409 }
2410
2411 }
2412 if (pdoms_calculated)
2413 {
2414 free_dominance_info (CDI_POST_DOMINATORS);
2415 remove_fake_exit_edges ();
2416 }
2417
2418 /* TODO: Add early exit if we disqualified everything. This also requires
2419 that we either relax the restriction that
2420 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2421 or store the number of parameters to IPA-SRA function summary and use that
2422 when just removing params. */
2423
2424 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2425 ifs->m_parameters->quick_grow_cleared (param_count);
2426 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2427 {
2428 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2429 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2430
2431 d->param_size_limit = s->param_size_limit;
2432 d->size_reached = s->nonarg_acc_size;
2433 d->locally_unused = s->locally_unused;
2434 d->split_candidate = s->split_candidate;
2435 d->by_ref = s->by_ref;
2436
2437 for (gensum_param_access *acc = s->accesses;
2438 acc;
2439 acc = acc->next_sibling)
2440 copy_accesses_to_ipa_desc (acc, d);
2441 }
2442
2443 if (dump_file)
2444 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2445 }
2446
2447 /* Return true if there are any overlaps among certain accesses of DESC. If
2448 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2449 too. DESC is assumed to be a split candidate that is not locally
2450 unused. */
2451
2452 static bool
overlapping_certain_accesses_p(isra_param_desc * desc,bool * certain_access_present_p)2453 overlapping_certain_accesses_p (isra_param_desc *desc,
2454 bool *certain_access_present_p)
2455 {
2456 unsigned pclen = vec_safe_length (desc->accesses);
2457 for (unsigned i = 0; i < pclen; i++)
2458 {
2459 param_access *a1 = (*desc->accesses)[i];
2460
2461 if (!a1->certain)
2462 continue;
2463 if (certain_access_present_p)
2464 *certain_access_present_p = true;
2465 for (unsigned j = i + 1; j < pclen; j++)
2466 {
2467 param_access *a2 = (*desc->accesses)[j];
2468 if (a2->certain
2469 && a1->unit_offset < a2->unit_offset + a2->unit_size
2470 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2471 return true;
2472 }
2473 }
2474 return false;
2475 }
2476
2477 /* Check for any overlaps of certain param accesses among splitting candidates
2478 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2479 check that used splitting candidates have at least one certain access. */
2480
2481 static void
verify_splitting_accesses(cgraph_node * node,bool certain_must_exist)2482 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2483 {
2484 isra_func_summary *ifs = func_sums->get (node);
2485 if (!ifs || !ifs->m_candidate)
2486 return;
2487 unsigned param_count = vec_safe_length (ifs->m_parameters);
2488 for (unsigned pidx = 0; pidx < param_count; pidx++)
2489 {
2490 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2491 if (!desc->split_candidate || desc->locally_unused)
2492 continue;
2493
2494 bool certain_access_present = !certain_must_exist;
2495 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2496 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2497 "which overlap", node->dump_name (), pidx);
2498 if (!certain_access_present)
2499 internal_error ("function %qs, parameter %u, is used but does not "
2500 "have any certain IPA-SRA access",
2501 node->dump_name (), pidx);
2502 }
2503 }
2504
2505 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2506 this compilation unit and create summary structures describing IPA-SRA
2507 opportunities and constraints in them. */
2508
2509 static void
ipa_sra_generate_summary(void)2510 ipa_sra_generate_summary (void)
2511 {
2512 struct cgraph_node *node;
2513
2514 gcc_checking_assert (!func_sums);
2515 gcc_checking_assert (!call_sums);
2516 func_sums
2517 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2518 ipa_sra_function_summaries (symtab, true));
2519 call_sums = new ipa_sra_call_summaries (symtab);
2520
2521 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2522 ipa_sra_summarize_function (node);
2523 return;
2524 }
2525
2526 /* Write intraprocedural analysis information about E and all of its outgoing
2527 edges into a stream for LTO WPA. */
2528
2529 static void
isra_write_edge_summary(output_block * ob,cgraph_edge * e)2530 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2531 {
2532 isra_call_summary *csum = call_sums->get (e);
2533 unsigned input_count = csum->m_arg_flow.length ();
2534 streamer_write_uhwi (ob, input_count);
2535 for (unsigned i = 0; i < input_count; i++)
2536 {
2537 isra_param_flow *ipf = &csum->m_arg_flow[i];
2538 streamer_write_hwi (ob, ipf->length);
2539 bitpack_d bp = bitpack_create (ob->main_stream);
2540 for (int j = 0; j < ipf->length; j++)
2541 bp_pack_value (&bp, ipf->inputs[j], 8);
2542 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2543 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2544 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2545 streamer_write_bitpack (&bp);
2546 streamer_write_uhwi (ob, ipf->unit_offset);
2547 streamer_write_uhwi (ob, ipf->unit_size);
2548 }
2549 bitpack_d bp = bitpack_create (ob->main_stream);
2550 bp_pack_value (&bp, csum->m_return_ignored, 1);
2551 bp_pack_value (&bp, csum->m_return_returned, 1);
2552 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2553 bp_pack_value (&bp, csum->m_before_any_store, 1);
2554 streamer_write_bitpack (&bp);
2555 }
2556
2557 /* Write intraprocedural analysis information about NODE and all of its outgoing
2558 edges into a stream for LTO WPA. */
2559
2560 static void
isra_write_node_summary(output_block * ob,cgraph_node * node)2561 isra_write_node_summary (output_block *ob, cgraph_node *node)
2562 {
2563 isra_func_summary *ifs = func_sums->get (node);
2564 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2565 int node_ref = lto_symtab_encoder_encode (encoder, node);
2566 streamer_write_uhwi (ob, node_ref);
2567
2568 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2569 streamer_write_uhwi (ob, param_desc_count);
2570 for (unsigned i = 0; i < param_desc_count; i++)
2571 {
2572 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2573 unsigned access_count = vec_safe_length (desc->accesses);
2574 streamer_write_uhwi (ob, access_count);
2575 for (unsigned j = 0; j < access_count; j++)
2576 {
2577 param_access *acc = (*desc->accesses)[j];
2578 stream_write_tree (ob, acc->type, true);
2579 stream_write_tree (ob, acc->alias_ptr_type, true);
2580 streamer_write_uhwi (ob, acc->unit_offset);
2581 streamer_write_uhwi (ob, acc->unit_size);
2582 bitpack_d bp = bitpack_create (ob->main_stream);
2583 bp_pack_value (&bp, acc->certain, 1);
2584 bp_pack_value (&bp, acc->reverse, 1);
2585 streamer_write_bitpack (&bp);
2586 }
2587 streamer_write_uhwi (ob, desc->param_size_limit);
2588 streamer_write_uhwi (ob, desc->size_reached);
2589 bitpack_d bp = bitpack_create (ob->main_stream);
2590 bp_pack_value (&bp, desc->locally_unused, 1);
2591 bp_pack_value (&bp, desc->split_candidate, 1);
2592 bp_pack_value (&bp, desc->by_ref, 1);
2593 streamer_write_bitpack (&bp);
2594 }
2595 bitpack_d bp = bitpack_create (ob->main_stream);
2596 bp_pack_value (&bp, ifs->m_candidate, 1);
2597 bp_pack_value (&bp, ifs->m_returns_value, 1);
2598 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2599 gcc_assert (!ifs->m_queued);
2600 streamer_write_bitpack (&bp);
2601
2602 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2603 isra_write_edge_summary (ob, e);
2604 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2605 isra_write_edge_summary (ob, e);
2606 }
2607
2608 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2609
2610 static void
ipa_sra_write_summary(void)2611 ipa_sra_write_summary (void)
2612 {
2613 if (!func_sums || !call_sums)
2614 return;
2615
2616 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2617 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2618 ob->symbol = NULL;
2619
2620 unsigned int count = 0;
2621 lto_symtab_encoder_iterator lsei;
2622 for (lsei = lsei_start_function_in_partition (encoder);
2623 !lsei_end_p (lsei);
2624 lsei_next_function_in_partition (&lsei))
2625 {
2626 cgraph_node *node = lsei_cgraph_node (lsei);
2627 if (node->has_gimple_body_p ()
2628 && func_sums->get (node) != NULL)
2629 count++;
2630 }
2631 streamer_write_uhwi (ob, count);
2632
2633 /* Process all of the functions. */
2634 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2635 lsei_next_function_in_partition (&lsei))
2636 {
2637 cgraph_node *node = lsei_cgraph_node (lsei);
2638 if (node->has_gimple_body_p ()
2639 && func_sums->get (node) != NULL)
2640 isra_write_node_summary (ob, node);
2641 }
2642 streamer_write_char_stream (ob->main_stream, 0);
2643 produce_asm (ob, NULL);
2644 destroy_output_block (ob);
2645 }
2646
2647 /* Read intraprocedural analysis information about E and all of its outgoing
2648 edges into a stream for LTO WPA. */
2649
2650 static void
isra_read_edge_summary(struct lto_input_block * ib,cgraph_edge * cs)2651 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2652 {
2653 isra_call_summary *csum = call_sums->get_create (cs);
2654 unsigned input_count = streamer_read_uhwi (ib);
2655 csum->init_inputs (input_count);
2656 for (unsigned i = 0; i < input_count; i++)
2657 {
2658 isra_param_flow *ipf = &csum->m_arg_flow[i];
2659 ipf->length = streamer_read_hwi (ib);
2660 bitpack_d bp = streamer_read_bitpack (ib);
2661 for (int j = 0; j < ipf->length; j++)
2662 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2663 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2664 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2665 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2666 ipf->unit_offset = streamer_read_uhwi (ib);
2667 ipf->unit_size = streamer_read_uhwi (ib);
2668 }
2669 bitpack_d bp = streamer_read_bitpack (ib);
2670 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2671 csum->m_return_returned = bp_unpack_value (&bp, 1);
2672 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2673 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2674 }
2675
2676 /* Read intraprocedural analysis information about NODE and all of its outgoing
2677 edges into a stream for LTO WPA. */
2678
2679 static void
isra_read_node_info(struct lto_input_block * ib,cgraph_node * node,struct data_in * data_in)2680 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2681 struct data_in *data_in)
2682 {
2683 isra_func_summary *ifs = func_sums->get_create (node);
2684 unsigned param_desc_count = streamer_read_uhwi (ib);
2685 if (param_desc_count > 0)
2686 {
2687 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2688 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2689 }
2690 for (unsigned i = 0; i < param_desc_count; i++)
2691 {
2692 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2693 unsigned access_count = streamer_read_uhwi (ib);
2694 for (unsigned j = 0; j < access_count; j++)
2695 {
2696 param_access *acc = ggc_cleared_alloc<param_access> ();
2697 acc->type = stream_read_tree (ib, data_in);
2698 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2699 acc->unit_offset = streamer_read_uhwi (ib);
2700 acc->unit_size = streamer_read_uhwi (ib);
2701 bitpack_d bp = streamer_read_bitpack (ib);
2702 acc->certain = bp_unpack_value (&bp, 1);
2703 acc->reverse = bp_unpack_value (&bp, 1);
2704 vec_safe_push (desc->accesses, acc);
2705 }
2706 desc->param_size_limit = streamer_read_uhwi (ib);
2707 desc->size_reached = streamer_read_uhwi (ib);
2708 bitpack_d bp = streamer_read_bitpack (ib);
2709 desc->locally_unused = bp_unpack_value (&bp, 1);
2710 desc->split_candidate = bp_unpack_value (&bp, 1);
2711 desc->by_ref = bp_unpack_value (&bp, 1);
2712 }
2713 bitpack_d bp = streamer_read_bitpack (ib);
2714 ifs->m_candidate = bp_unpack_value (&bp, 1);
2715 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2716 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2717 ifs->m_queued = 0;
2718
2719 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2720 isra_read_edge_summary (ib, e);
2721 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2722 isra_read_edge_summary (ib, e);
2723 }
2724
2725 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2726 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2727 it should be possible to unify them somehow. */
2728
2729 static void
isra_read_summary_section(struct lto_file_decl_data * file_data,const char * data,size_t len)2730 isra_read_summary_section (struct lto_file_decl_data *file_data,
2731 const char *data, size_t len)
2732 {
2733 const struct lto_function_header *header =
2734 (const struct lto_function_header *) data;
2735 const int cfg_offset = sizeof (struct lto_function_header);
2736 const int main_offset = cfg_offset + header->cfg_size;
2737 const int string_offset = main_offset + header->main_size;
2738 struct data_in *data_in;
2739 unsigned int i;
2740 unsigned int count;
2741
2742 lto_input_block ib_main ((const char *) data + main_offset,
2743 header->main_size, file_data->mode_table);
2744
2745 data_in =
2746 lto_data_in_create (file_data, (const char *) data + string_offset,
2747 header->string_size, vNULL);
2748 count = streamer_read_uhwi (&ib_main);
2749
2750 for (i = 0; i < count; i++)
2751 {
2752 unsigned int index;
2753 struct cgraph_node *node;
2754 lto_symtab_encoder_t encoder;
2755
2756 index = streamer_read_uhwi (&ib_main);
2757 encoder = file_data->symtab_node_encoder;
2758 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2759 index));
2760 gcc_assert (node->definition);
2761 isra_read_node_info (&ib_main, node, data_in);
2762 }
2763 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2764 len);
2765 lto_data_in_delete (data_in);
2766 }
2767
2768 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2769
2770 static void
ipa_sra_read_summary(void)2771 ipa_sra_read_summary (void)
2772 {
2773 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2774 struct lto_file_decl_data *file_data;
2775 unsigned int j = 0;
2776
2777 gcc_checking_assert (!func_sums);
2778 gcc_checking_assert (!call_sums);
2779 func_sums
2780 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2781 ipa_sra_function_summaries (symtab, true));
2782 call_sums = new ipa_sra_call_summaries (symtab);
2783
2784 while ((file_data = file_data_vec[j++]))
2785 {
2786 size_t len;
2787 const char *data
2788 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2789 if (data)
2790 isra_read_summary_section (file_data, data, len);
2791 }
2792 }
2793
2794 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2795
2796 static void
ipa_sra_dump_all_summaries(FILE * f)2797 ipa_sra_dump_all_summaries (FILE *f)
2798 {
2799 cgraph_node *node;
2800 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2801 {
2802 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2803
2804 isra_func_summary *ifs = func_sums->get (node);
2805 if (!ifs)
2806 fprintf (f, " Function does not have any associated IPA-SRA "
2807 "summary\n");
2808 else
2809 {
2810 if (!ifs->m_candidate)
2811 {
2812 fprintf (f, " Not a candidate function\n");
2813 continue;
2814 }
2815 if (ifs->m_returns_value)
2816 fprintf (f, " Returns value\n");
2817 if (vec_safe_is_empty (ifs->m_parameters))
2818 fprintf (f, " No parameter information. \n");
2819 else
2820 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2821 {
2822 fprintf (f, " Descriptor for parameter %i:\n", i);
2823 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2824 }
2825 fprintf (f, "\n");
2826 }
2827
2828 struct cgraph_edge *cs;
2829 for (cs = node->callees; cs; cs = cs->next_callee)
2830 {
2831 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2832 cs->callee->dump_name ());
2833 isra_call_summary *csum = call_sums->get (cs);
2834 if (csum)
2835 csum->dump (f);
2836 else
2837 fprintf (f, " Call summary is MISSING!\n");
2838 }
2839
2840 }
2841 fprintf (f, "\n\n");
2842 }
2843
2844 /* Perform function-scope viability tests that can be only made at IPA level
2845 and return false if the function is deemed unsuitable for IPA-SRA. */
2846
2847 static bool
ipa_sra_ipa_function_checks(cgraph_node * node)2848 ipa_sra_ipa_function_checks (cgraph_node *node)
2849 {
2850 if (!node->can_be_local_p ())
2851 {
2852 if (dump_file)
2853 fprintf (dump_file, "Function %s disqualified because it cannot be "
2854 "made local.\n", node->dump_name ());
2855 return false;
2856 }
2857 if (!node->can_change_signature)
2858 {
2859 if (dump_file)
2860 fprintf (dump_file, "Function can not change signature.\n");
2861 return false;
2862 }
2863
2864 return true;
2865 }
2866
2867 /* Issues found out by check_callers_for_issues. */
2868
2869 struct caller_issues
2870 {
2871 /* The candidate being considered. */
2872 cgraph_node *candidate;
2873 /* There is a thunk among callers. */
2874 bool thunk;
2875 /* Call site with no available information. */
2876 bool unknown_callsite;
2877 /* Call from outside the candidate's comdat group. */
2878 bool call_from_outside_comdat;
2879 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2880 bool bit_aligned_aggregate_argument;
2881 };
2882
2883 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2884 that apply. */
2885
2886 static bool
check_for_caller_issues(struct cgraph_node * node,void * data)2887 check_for_caller_issues (struct cgraph_node *node, void *data)
2888 {
2889 struct caller_issues *issues = (struct caller_issues *) data;
2890
2891 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2892 {
2893 if (cs->caller->thunk)
2894 {
2895 issues->thunk = true;
2896 /* TODO: We should be able to process at least some types of
2897 thunks. */
2898 return true;
2899 }
2900 if (issues->candidate->calls_comdat_local
2901 && issues->candidate->same_comdat_group
2902 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2903 {
2904 issues->call_from_outside_comdat = true;
2905 return true;
2906 }
2907
2908 isra_call_summary *csum = call_sums->get (cs);
2909 if (!csum)
2910 {
2911 issues->unknown_callsite = true;
2912 return true;
2913 }
2914
2915 if (csum->m_bit_aligned_arg)
2916 issues->bit_aligned_aggregate_argument = true;
2917 }
2918 return false;
2919 }
2920
2921 /* Look at all incoming edges to NODE, including aliases and thunks and look
2922 for problems. Return true if NODE type should not be modified at all. */
2923
2924 static bool
check_all_callers_for_issues(cgraph_node * node)2925 check_all_callers_for_issues (cgraph_node *node)
2926 {
2927 struct caller_issues issues;
2928 memset (&issues, 0, sizeof (issues));
2929 issues.candidate = node;
2930
2931 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2932 if (issues.unknown_callsite)
2933 {
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2936 "all modifications.\n", node->dump_name ());
2937 return true;
2938 }
2939 /* TODO: We should be able to process at least some types of thunks. */
2940 if (issues.thunk)
2941 {
2942 if (dump_file && (dump_flags & TDF_DETAILS))
2943 fprintf (dump_file, "A call of %s is through thunk, which are not"
2944 " handled yet. Disabling all modifications.\n",
2945 node->dump_name ());
2946 return true;
2947 }
2948 if (issues.call_from_outside_comdat)
2949 {
2950 if (dump_file)
2951 fprintf (dump_file, "Function would become private comdat called "
2952 "outside of its comdat group.\n");
2953 return true;
2954 }
2955
2956 if (issues.bit_aligned_aggregate_argument)
2957 {
2958 /* Let's only remove parameters/return values from such functions.
2959 TODO: We could only prevent splitting the problematic parameters if
2960 anybody thinks it is worth it. */
2961 if (dump_file && (dump_flags & TDF_DETAILS))
2962 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
2963 " disabling parameter splitting.\n", node->dump_name ());
2964
2965 isra_func_summary *ifs = func_sums->get (node);
2966 gcc_checking_assert (ifs);
2967 unsigned param_count = vec_safe_length (ifs->m_parameters);
2968 for (unsigned i = 0; i < param_count; i++)
2969 (*ifs->m_parameters)[i].split_candidate = false;
2970 }
2971 return false;
2972 }
2973
2974 /* Find the access with corresponding OFFSET and SIZE among accesses in
2975 PARAM_DESC and return it or NULL if such an access is not there. */
2976
2977 static param_access *
find_param_access(isra_param_desc * param_desc,unsigned offset,unsigned size)2978 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
2979 {
2980 unsigned pclen = vec_safe_length (param_desc->accesses);
2981
2982 /* The search is linear but the number of stored accesses is bound by
2983 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
2984
2985 for (unsigned i = 0; i < pclen; i++)
2986 if ((*param_desc->accesses)[i]->unit_offset == offset
2987 && (*param_desc->accesses)[i]->unit_size == size)
2988 return (*param_desc->accesses)[i];
2989
2990 return NULL;
2991 }
2992
2993 /* Return iff the total size of definite replacement SIZE would violate the
2994 limit set for it in PARAM. */
2995
2996 static bool
size_would_violate_limit_p(isra_param_desc * desc,unsigned size)2997 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
2998 {
2999 unsigned limit = desc->param_size_limit;
3000 if (size > limit
3001 || (!desc->by_ref && size == limit))
3002 return true;
3003 return false;
3004 }
3005
3006 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3007 the set limit. IDX is the parameter number which is dumped when
3008 disqualifying. */
3009
3010 static void
bump_reached_size(isra_param_desc * desc,unsigned size,unsigned idx)3011 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3012 {
3013 unsigned after = desc->size_reached + size;
3014 if (size_would_violate_limit_p (desc, after))
3015 {
3016 if (dump_file && (dump_flags & TDF_DETAILS))
3017 fprintf (dump_file, " ...size limit reached, disqualifying "
3018 "candidate parameter %u\n", idx);
3019 desc->split_candidate = false;
3020 return;
3021 }
3022 desc->size_reached = after;
3023 }
3024
3025 /* Take all actions required to deal with an edge CS that represents a call to
3026 an unknown or un-analyzed function, for both parameter removal and
3027 splitting. */
3028
3029 static void
process_edge_to_unknown_caller(cgraph_edge * cs)3030 process_edge_to_unknown_caller (cgraph_edge *cs)
3031 {
3032 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3033 gcc_checking_assert (from_ifs);
3034 isra_call_summary *csum = call_sums->get (cs);
3035
3036 if (dump_file && (dump_flags & TDF_DETAILS))
3037 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3038 cs->caller->dump_name ());
3039
3040 unsigned args_count = csum->m_arg_flow.length ();
3041 for (unsigned i = 0; i < args_count; i++)
3042 {
3043 isra_param_flow *ipf = &csum->m_arg_flow[i];
3044
3045 if (ipf->pointer_pass_through)
3046 {
3047 isra_param_desc *param_desc
3048 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3049 param_desc->locally_unused = false;
3050 param_desc->split_candidate = false;
3051 continue;
3052 }
3053 if (ipf->aggregate_pass_through)
3054 {
3055 unsigned idx = get_single_param_flow_source (ipf);
3056 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3057
3058 param_desc->locally_unused = false;
3059 if (!param_desc->split_candidate)
3060 continue;
3061 gcc_assert (!param_desc->by_ref);
3062 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3063 ipf->unit_size);
3064 gcc_checking_assert (pacc);
3065 pacc->certain = true;
3066 if (overlapping_certain_accesses_p (param_desc, NULL))
3067 {
3068 if (dump_file && (dump_flags & TDF_DETAILS))
3069 fprintf (dump_file, " ...leading to overlap, "
3070 " disqualifying candidate parameter %u\n",
3071 idx);
3072 param_desc->split_candidate = false;
3073 }
3074 else
3075 bump_reached_size (param_desc, pacc->unit_size, idx);
3076 ipf->aggregate_pass_through = false;
3077 continue;
3078 }
3079
3080 for (int j = 0; j < ipf->length; j++)
3081 {
3082 int input_idx = ipf->inputs[j];
3083 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3084 }
3085 }
3086 }
3087
3088 /* Propagate parameter removal information through cross-SCC edge CS,
3089 i.e. decrease the use count in the caller parameter descriptor for each use
3090 in this call. */
3091
3092 static void
param_removal_cross_scc_edge(cgraph_edge * cs)3093 param_removal_cross_scc_edge (cgraph_edge *cs)
3094 {
3095 enum availability availability;
3096 cgraph_node *callee = cs->callee->function_symbol (&availability);
3097 isra_func_summary *to_ifs = func_sums->get (callee);
3098 if (!to_ifs || !to_ifs->m_candidate
3099 || (availability < AVAIL_AVAILABLE)
3100 || vec_safe_is_empty (to_ifs->m_parameters))
3101 {
3102 process_edge_to_unknown_caller (cs);
3103 return;
3104 }
3105 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3106 gcc_checking_assert (from_ifs);
3107
3108 isra_call_summary *csum = call_sums->get (cs);
3109 unsigned args_count = csum->m_arg_flow.length ();
3110 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3111
3112 for (unsigned i = 0; i < args_count; i++)
3113 {
3114 bool unused_in_callee;
3115 if (i < param_count)
3116 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3117 else
3118 unused_in_callee = false;
3119
3120 if (!unused_in_callee)
3121 {
3122 isra_param_flow *ipf = &csum->m_arg_flow[i];
3123 for (int j = 0; j < ipf->length; j++)
3124 {
3125 int input_idx = ipf->inputs[j];
3126 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3127 }
3128 }
3129 }
3130 }
3131
3132 /* Unless it is already there, push NODE which is also described by IFS to
3133 STACK. */
3134
3135 static void
isra_push_node_to_stack(cgraph_node * node,isra_func_summary * ifs,vec<cgraph_node * > * stack)3136 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3137 vec<cgraph_node *> *stack)
3138 {
3139 if (!ifs->m_queued)
3140 {
3141 ifs->m_queued = true;
3142 stack->safe_push (node);
3143 }
3144 }
3145
3146 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3147 used and push CALLER on STACK. */
3148
3149 static void
isra_mark_caller_param_used(isra_func_summary * from_ifs,int input_idx,cgraph_node * caller,vec<cgraph_node * > * stack)3150 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3151 cgraph_node *caller, vec<cgraph_node *> *stack)
3152 {
3153 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3154 {
3155 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3156 isra_push_node_to_stack (caller, from_ifs, stack);
3157 }
3158 }
3159
3160
3161 /* Propagate information that any parameter is not used only locally within a
3162 SCC across CS to the caller, which must be in the same SCC as the
3163 callee. Push any callers that need to be re-processed to STACK. */
3164
3165 static void
propagate_used_across_scc_edge(cgraph_edge * cs,vec<cgraph_node * > * stack)3166 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3167 {
3168 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3169 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3170 return;
3171
3172 isra_call_summary *csum = call_sums->get (cs);
3173 gcc_checking_assert (csum);
3174 unsigned args_count = csum->m_arg_flow.length ();
3175 enum availability availability;
3176 cgraph_node *callee = cs->callee->function_symbol (&availability);
3177 isra_func_summary *to_ifs = func_sums->get (callee);
3178
3179 unsigned param_count
3180 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3181 ? vec_safe_length (to_ifs->m_parameters) : 0;
3182 for (unsigned i = 0; i < args_count; i++)
3183 {
3184 if (i < param_count
3185 && (*to_ifs->m_parameters)[i].locally_unused)
3186 continue;
3187
3188 /* The argument is needed in the callee it, we must mark the parameter as
3189 used also in the caller and its callers within this SCC. */
3190 isra_param_flow *ipf = &csum->m_arg_flow[i];
3191 for (int j = 0; j < ipf->length; j++)
3192 {
3193 int input_idx = ipf->inputs[j];
3194 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3195 }
3196 }
3197 }
3198
3199 /* Propagate information that any parameter is not used only locally within a
3200 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3201 same SCC. Push any callers that need to be re-processed to STACK. */
3202
3203 static bool
propagate_used_to_scc_callers(cgraph_node * node,void * data)3204 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3205 {
3206 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3207 cgraph_edge *cs;
3208 for (cs = node->callers; cs; cs = cs->next_caller)
3209 if (ipa_edge_within_scc (cs))
3210 propagate_used_across_scc_edge (cs, stack);
3211 return false;
3212 }
3213
3214 /* Return true iff all certain accesses in ARG_DESC are also present as
3215 certain accesses in PARAM_DESC. */
3216
3217 static bool
all_callee_accesses_present_p(isra_param_desc * param_desc,isra_param_desc * arg_desc)3218 all_callee_accesses_present_p (isra_param_desc *param_desc,
3219 isra_param_desc *arg_desc)
3220 {
3221 unsigned aclen = vec_safe_length (arg_desc->accesses);
3222 for (unsigned j = 0; j < aclen; j++)
3223 {
3224 param_access *argacc = (*arg_desc->accesses)[j];
3225 if (!argacc->certain)
3226 continue;
3227 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3228 argacc->unit_size);
3229 if (!pacc
3230 || !pacc->certain
3231 || !types_compatible_p (argacc->type, pacc->type))
3232 return false;
3233 }
3234 return true;
3235 }
3236
3237 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3238 does not allow instantiating an auto_vec with a type defined within a
3239 function so it is a global type. */
3240 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3241
3242
3243 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3244 (which belongs to CALLER) if they would not violate some constraint there.
3245 If successful, return NULL, otherwise return the string reason for failure
3246 (which can be written to the dump file). DELTA_OFFSET is the known offset
3247 of the actual argument withing the formal parameter (so of ARG_DESCS within
3248 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3249 known. In case of success, set *CHANGE_P to true if propagation actually
3250 changed anything. */
3251
3252 static const char *
pull_accesses_from_callee(cgraph_node * caller,isra_param_desc * param_desc,isra_param_desc * arg_desc,unsigned delta_offset,unsigned arg_size,bool * change_p)3253 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3254 isra_param_desc *arg_desc,
3255 unsigned delta_offset, unsigned arg_size,
3256 bool *change_p)
3257 {
3258 unsigned pclen = vec_safe_length (param_desc->accesses);
3259 unsigned aclen = vec_safe_length (arg_desc->accesses);
3260 unsigned prop_count = 0;
3261 unsigned prop_size = 0;
3262 bool change = false;
3263
3264 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3265 for (unsigned j = 0; j < aclen; j++)
3266 {
3267 param_access *argacc = (*arg_desc->accesses)[j];
3268 prop_kinds.safe_push (ACC_PROP_DONT);
3269
3270 if (arg_size > 0
3271 && argacc->unit_offset + argacc->unit_size > arg_size)
3272 return "callee access outsize size boundary";
3273
3274 if (!argacc->certain)
3275 continue;
3276
3277 unsigned offset = argacc->unit_offset + delta_offset;
3278 /* Given that accesses are initially stored according to increasing
3279 offset and decreasing size in case of equal offsets, the following
3280 searches could be written more efficiently if we kept the ordering
3281 when copying. But the number of accesses is capped at
3282 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3283 messy quickly, so let's improve on that only if necessary. */
3284
3285 bool exact_match = false;
3286 for (unsigned i = 0; i < pclen; i++)
3287 {
3288 /* Check for overlaps. */
3289 param_access *pacc = (*param_desc->accesses)[i];
3290 if (pacc->unit_offset == offset
3291 && pacc->unit_size == argacc->unit_size)
3292 {
3293 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3294 || !types_compatible_p (argacc->type, pacc->type)
3295 || argacc->reverse != pacc->reverse)
3296 return "propagated access types would not match existing ones";
3297
3298 exact_match = true;
3299 if (!pacc->certain)
3300 {
3301 prop_kinds[j] = ACC_PROP_CERTAIN;
3302 prop_size += argacc->unit_size;
3303 change = true;
3304 }
3305 continue;
3306 }
3307
3308 if (offset < pacc->unit_offset + pacc->unit_size
3309 && offset + argacc->unit_size > pacc->unit_offset)
3310 {
3311 /* None permissible with load accesses, possible to fit into
3312 argument ones. */
3313 if (pacc->certain
3314 || offset < pacc->unit_offset
3315 || (offset + argacc->unit_size
3316 > pacc->unit_offset + pacc->unit_size))
3317 return "a propagated access would conflict in caller";
3318 }
3319 }
3320
3321 if (!exact_match)
3322 {
3323 prop_kinds[j] = ACC_PROP_COPY;
3324 prop_count++;
3325 prop_size += argacc->unit_size;
3326 change = true;
3327 }
3328 }
3329
3330 if (!change)
3331 return NULL;
3332
3333 if ((prop_count + pclen
3334 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3335 || size_would_violate_limit_p (param_desc,
3336 param_desc->size_reached + prop_size))
3337 return "propagating accesses would violate the count or size limit";
3338
3339 *change_p = true;
3340 for (unsigned j = 0; j < aclen; j++)
3341 {
3342 if (prop_kinds[j] == ACC_PROP_COPY)
3343 {
3344 param_access *argacc = (*arg_desc->accesses)[j];
3345
3346 param_access *copy = ggc_cleared_alloc<param_access> ();
3347 copy->unit_offset = argacc->unit_offset + delta_offset;
3348 copy->unit_size = argacc->unit_size;
3349 copy->type = argacc->type;
3350 copy->alias_ptr_type = argacc->alias_ptr_type;
3351 copy->certain = true;
3352 copy->reverse = argacc->reverse;
3353 vec_safe_push (param_desc->accesses, copy);
3354 }
3355 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3356 {
3357 param_access *argacc = (*arg_desc->accesses)[j];
3358 param_access *csp
3359 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3360 argacc->unit_size);
3361 csp->certain = true;
3362 }
3363 }
3364
3365 param_desc->size_reached += prop_size;
3366
3367 return NULL;
3368 }
3369
3370 /* Propagate parameter splitting information through call graph edge CS.
3371 Return true if any changes that might need to be propagated within SCCs have
3372 been made. The function also clears the aggregate_pass_through and
3373 pointer_pass_through in call summaries which do not need to be processed
3374 again if this CS is revisited when iterating while changes are propagated
3375 within an SCC. */
3376
3377 static bool
param_splitting_across_edge(cgraph_edge * cs)3378 param_splitting_across_edge (cgraph_edge *cs)
3379 {
3380 bool res = false;
3381 bool cross_scc = !ipa_edge_within_scc (cs);
3382 enum availability availability;
3383 cgraph_node *callee = cs->callee->function_symbol (&availability);
3384 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3385 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3386
3387 isra_call_summary *csum = call_sums->get (cs);
3388 gcc_checking_assert (csum);
3389 unsigned args_count = csum->m_arg_flow.length ();
3390 isra_func_summary *to_ifs = func_sums->get (callee);
3391 unsigned param_count
3392 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3393 ? vec_safe_length (to_ifs->m_parameters)
3394 : 0);
3395
3396 if (dump_file && (dump_flags & TDF_DETAILS))
3397 fprintf (dump_file, "Splitting across %s->%s:\n",
3398 cs->caller->dump_name (), callee->dump_name ());
3399
3400 unsigned i;
3401 for (i = 0; (i < args_count) && (i < param_count); i++)
3402 {
3403 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3404 isra_param_flow *ipf = &csum->m_arg_flow[i];
3405
3406 if (arg_desc->locally_unused)
3407 {
3408 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, " ->%u: unused in callee\n", i);
3410 ipf->pointer_pass_through = false;
3411 continue;
3412 }
3413
3414 if (ipf->pointer_pass_through)
3415 {
3416 int idx = get_single_param_flow_source (ipf);
3417 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3418 if (!param_desc->split_candidate)
3419 continue;
3420 gcc_assert (param_desc->by_ref);
3421
3422 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3423 {
3424 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, " %u->%u: not candidate or not by "
3426 "reference in callee\n", idx, i);
3427 param_desc->split_candidate = false;
3428 ipf->pointer_pass_through = false;
3429 res = true;
3430 }
3431 else if (!ipf->safe_to_import_accesses)
3432 {
3433 if (!csum->m_before_any_store
3434 || !all_callee_accesses_present_p (param_desc, arg_desc))
3435 {
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3438 idx, i);
3439 param_desc->split_candidate = false;
3440 ipf->pointer_pass_through = false;
3441 res = true;
3442
3443 }
3444 else
3445 {
3446 if (dump_file && (dump_flags & TDF_DETAILS))
3447 fprintf (dump_file, " %u->%u: verified callee accesses "
3448 "present.\n", idx, i);
3449 if (cross_scc)
3450 ipf->pointer_pass_through = false;
3451 }
3452 }
3453 else
3454 {
3455 const char *pull_failure
3456 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3457 0, 0, &res);
3458 if (pull_failure)
3459 {
3460 if (dump_file && (dump_flags & TDF_DETAILS))
3461 fprintf (dump_file, " %u->%u: by_ref access pull "
3462 "failed: %s.\n", idx, i, pull_failure);
3463 param_desc->split_candidate = false;
3464 ipf->pointer_pass_through = false;
3465 res = true;
3466 }
3467 else
3468 {
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, " %u->%u: by_ref access pull "
3471 "succeeded.\n", idx, i);
3472 if (cross_scc)
3473 ipf->pointer_pass_through = false;
3474 }
3475 }
3476 }
3477 else if (ipf->aggregate_pass_through)
3478 {
3479 int idx = get_single_param_flow_source (ipf);
3480 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3481 if (!param_desc->split_candidate)
3482 continue;
3483 gcc_assert (!param_desc->by_ref);
3484 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3485 ipf->unit_size);
3486 gcc_checking_assert (pacc);
3487
3488 if (pacc->certain)
3489 {
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3491 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3492 ipf->aggregate_pass_through = false;
3493 }
3494 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3495 {
3496 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, " %u->%u: not candidate or by "
3498 "reference in callee\n", idx, i);
3499
3500 pacc->certain = true;
3501 if (overlapping_certain_accesses_p (param_desc, NULL))
3502 {
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3504 fprintf (dump_file, " ...leading to overlap, "
3505 " disqualifying candidate parameter %u\n",
3506 idx);
3507 param_desc->split_candidate = false;
3508 }
3509 else
3510 bump_reached_size (param_desc, pacc->unit_size, idx);
3511
3512 ipf->aggregate_pass_through = false;
3513 res = true;
3514 }
3515 else
3516 {
3517 const char *pull_failure
3518 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3519 ipf->unit_offset,
3520 ipf->unit_size, &res);
3521 if (pull_failure)
3522 {
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3524 fprintf (dump_file, " %u->%u: arg access pull "
3525 "failed: %s.\n", idx, i, pull_failure);
3526
3527 ipf->aggregate_pass_through = false;
3528 pacc->certain = true;
3529
3530 if (overlapping_certain_accesses_p (param_desc, NULL))
3531 {
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 fprintf (dump_file, " ...leading to overlap, "
3534 " disqualifying candidate parameter %u\n",
3535 idx);
3536 param_desc->split_candidate = false;
3537 }
3538 else
3539 bump_reached_size (param_desc, pacc->unit_size, idx);
3540
3541 res = true;
3542 }
3543 else
3544 {
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, " %u->%u: arg access pull "
3547 "succeeded.\n", idx, i);
3548 if (cross_scc)
3549 ipf->aggregate_pass_through = false;
3550 }
3551 }
3552 }
3553 }
3554
3555 /* Handle argument-parameter count mismatches. */
3556 for (; (i < args_count); i++)
3557 {
3558 isra_param_flow *ipf = &csum->m_arg_flow[i];
3559
3560 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3561 {
3562 int idx = get_single_param_flow_source (ipf);
3563 ipf->pointer_pass_through = false;
3564 ipf->aggregate_pass_through = false;
3565 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3566 if (!param_desc->split_candidate)
3567 continue;
3568
3569 if (dump_file && (dump_flags & TDF_DETAILS))
3570 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3571 idx, i);
3572 param_desc->split_candidate = false;
3573 res = true;
3574 }
3575 }
3576 return res;
3577 }
3578
3579 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3580 callers ignore the return value, or come from the same SCC and use the
3581 return value only to compute their return value, return false, otherwise
3582 return true. */
3583
3584 static bool
retval_used_p(cgraph_node * node,void *)3585 retval_used_p (cgraph_node *node, void *)
3586 {
3587 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3588 {
3589 isra_call_summary *csum = call_sums->get (cs);
3590 gcc_checking_assert (csum);
3591 if (csum->m_return_ignored)
3592 continue;
3593 if (!csum->m_return_returned)
3594 return true;
3595
3596 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3597 if (!from_ifs || !from_ifs->m_candidate)
3598 return true;
3599
3600 if (!ipa_edge_within_scc (cs)
3601 && !from_ifs->m_return_ignored)
3602 return true;
3603 }
3604
3605 return false;
3606 }
3607
3608 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3609 modify parameter which originally had index BASE_INDEX, in the adjustment
3610 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3611 PREV_ADJUSTMENT. If the parent clone is the original function,
3612 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3613
3614 static void
push_param_adjustments_for_index(isra_func_summary * ifs,unsigned base_index,unsigned prev_clone_index,ipa_adjusted_param * prev_adjustment,vec<ipa_adjusted_param,va_gc> ** new_params)3615 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3616 unsigned prev_clone_index,
3617 ipa_adjusted_param *prev_adjustment,
3618 vec<ipa_adjusted_param, va_gc> **new_params)
3619 {
3620 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3621 if (desc->locally_unused)
3622 {
3623 if (dump_file)
3624 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3625 return;
3626 }
3627
3628 if (!desc->split_candidate)
3629 {
3630 ipa_adjusted_param adj;
3631 if (prev_adjustment)
3632 {
3633 adj = *prev_adjustment;
3634 adj.prev_clone_adjustment = true;
3635 adj.prev_clone_index = prev_clone_index;
3636 }
3637 else
3638 {
3639 memset (&adj, 0, sizeof (adj));
3640 adj.op = IPA_PARAM_OP_COPY;
3641 adj.base_index = base_index;
3642 adj.prev_clone_index = prev_clone_index;
3643 }
3644 vec_safe_push ((*new_params), adj);
3645 return;
3646 }
3647
3648 if (dump_file)
3649 fprintf (dump_file, " Will split parameter %u\n", base_index);
3650
3651 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3652 unsigned aclen = vec_safe_length (desc->accesses);
3653 for (unsigned j = 0; j < aclen; j++)
3654 {
3655 param_access *pa = (*desc->accesses)[j];
3656 if (!pa->certain)
3657 continue;
3658 if (dump_file)
3659 fprintf (dump_file, " - component at byte offset %u, "
3660 "size %u\n", pa->unit_offset, pa->unit_size);
3661
3662 ipa_adjusted_param adj;
3663 memset (&adj, 0, sizeof (adj));
3664 adj.op = IPA_PARAM_OP_SPLIT;
3665 adj.base_index = base_index;
3666 adj.prev_clone_index = prev_clone_index;
3667 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3668 adj.reverse = pa->reverse;
3669 adj.type = pa->type;
3670 adj.alias_ptr_type = pa->alias_ptr_type;
3671 adj.unit_offset = pa->unit_offset;
3672 vec_safe_push ((*new_params), adj);
3673 }
3674 }
3675
3676 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3677 flag of all callers of NODE. */
3678
3679 static bool
mark_callers_calls_comdat_local(struct cgraph_node * node,void *)3680 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3681 {
3682 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3683 cs->caller->calls_comdat_local = true;
3684 return false;
3685 }
3686
3687
3688 /* Do final processing of results of IPA propagation regarding NODE, clone it
3689 if appropriate. */
3690
3691 static void
process_isra_node_results(cgraph_node * node,hash_map<const char *,unsigned> * clone_num_suffixes)3692 process_isra_node_results (cgraph_node *node,
3693 hash_map<const char *, unsigned> *clone_num_suffixes)
3694 {
3695 isra_func_summary *ifs = func_sums->get (node);
3696 if (!ifs || !ifs->m_candidate)
3697 return;
3698
3699 auto_vec<bool, 16> surviving_params;
3700 bool check_surviving = false;
3701 clone_info *cinfo = clone_info::get (node);
3702 if (cinfo && cinfo->param_adjustments)
3703 {
3704 check_surviving = true;
3705 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3706 }
3707
3708 unsigned param_count = vec_safe_length (ifs->m_parameters);
3709 bool will_change_function = false;
3710 if (ifs->m_returns_value && ifs->m_return_ignored)
3711 will_change_function = true;
3712 else
3713 for (unsigned i = 0; i < param_count; i++)
3714 {
3715 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3716 if ((desc->locally_unused || desc->split_candidate)
3717 /* Make sure we do not clone just to attempt to remove an already
3718 removed unused argument. */
3719 && (!check_surviving
3720 || (i < surviving_params.length ()
3721 && surviving_params[i])))
3722 {
3723 will_change_function = true;
3724 break;
3725 }
3726 }
3727 if (!will_change_function)
3728 return;
3729
3730 if (dump_file)
3731 {
3732 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3733 node->dump_name ());
3734 if (ifs->m_returns_value && ifs->m_return_ignored)
3735 fprintf (dump_file, " Will remove return value.\n");
3736 }
3737
3738 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3739 if (ipa_param_adjustments *old_adjustments
3740 = cinfo ? cinfo->param_adjustments : NULL)
3741 {
3742 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3743 for (unsigned i = 0; i < old_adj_len; i++)
3744 {
3745 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3746 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3747 old_adj, &new_params);
3748 }
3749 }
3750 else
3751 for (unsigned i = 0; i < param_count; i++)
3752 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3753
3754 ipa_param_adjustments *new_adjustments
3755 = (new (ggc_alloc <ipa_param_adjustments> ())
3756 ipa_param_adjustments (new_params, param_count,
3757 ifs->m_returns_value && ifs->m_return_ignored));
3758
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3760 {
3761 fprintf (dump_file, "\n Created adjustments:\n");
3762 new_adjustments->dump (dump_file);
3763 }
3764
3765 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3766 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3767 node->decl)));
3768 auto_vec<cgraph_edge *> callers = node->collect_callers ();
3769 cgraph_node *new_node
3770 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3771 suffix_counter);
3772 suffix_counter++;
3773 if (node->calls_comdat_local && node->same_comdat_group)
3774 {
3775 new_node->add_to_same_comdat_group (node);
3776 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3777 NULL, true);
3778 }
3779 new_node->calls_comdat_local = node->calls_comdat_local;
3780
3781 if (dump_file)
3782 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3783 callers.release ();
3784 }
3785
3786 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3787 and disable transformations for those which have not or which should not
3788 transformed because the associated debug counter reached its limit. Return
3789 true if none survived or if there were no candidates to begin with. */
3790
3791 static bool
disable_unavailable_parameters(cgraph_node * node,isra_func_summary * ifs)3792 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3793 {
3794 bool ret = true;
3795 unsigned len = vec_safe_length (ifs->m_parameters);
3796 if (!len)
3797 return true;
3798
3799 auto_vec<bool, 16> surviving_params;
3800 bool check_surviving = false;
3801 clone_info *cinfo = clone_info::get (node);
3802 if (cinfo && cinfo->param_adjustments)
3803 {
3804 check_surviving = true;
3805 cinfo->param_adjustments->get_surviving_params (&surviving_params);
3806 }
3807 bool dumped_first = false;
3808 for (unsigned i = 0; i < len; i++)
3809 {
3810 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3811 if (!dbg_cnt (ipa_sra_params))
3812 {
3813 desc->locally_unused = false;
3814 desc->split_candidate = false;
3815 }
3816 else if (check_surviving
3817 && (i >= surviving_params.length ()
3818 || !surviving_params[i]))
3819 {
3820 /* Even if the parameter was removed by a previous IPA pass, we do
3821 not clear locally_unused because if it really is unused, this
3822 information might be useful in callers. */
3823 desc->split_candidate = false;
3824
3825 if (dump_file && (dump_flags & TDF_DETAILS))
3826 {
3827 if (!dumped_first)
3828 {
3829 fprintf (dump_file,
3830 "The following parameters of %s are dead on "
3831 "arrival:", node->dump_name ());
3832 dumped_first = true;
3833 }
3834 fprintf (dump_file, " %u", i);
3835 }
3836 }
3837 else if (desc->locally_unused || desc->split_candidate)
3838 ret = false;
3839 }
3840
3841 if (dumped_first)
3842 fprintf (dump_file, "\n");
3843
3844 return ret;
3845 }
3846
3847
3848 /* Run the interprocedural part of IPA-SRA. */
3849
3850 static unsigned int
ipa_sra_analysis(void)3851 ipa_sra_analysis (void)
3852 {
3853 if (dump_file)
3854 {
3855 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3856 ipa_sra_dump_all_summaries (dump_file);
3857 }
3858
3859 gcc_checking_assert (func_sums);
3860 gcc_checking_assert (call_sums);
3861 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3862 auto_vec <cgraph_node *, 16> stack;
3863 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3864
3865 /* One sweep from callees to callers for parameter removal and splitting. */
3866 for (int i = 0; i < node_scc_count; i++)
3867 {
3868 cgraph_node *scc_rep = order[i];
3869 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3870 unsigned j;
3871
3872 /* Preliminary IPA function level checks and first step of parameter
3873 removal. */
3874 cgraph_node *v;
3875 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3876 {
3877 isra_func_summary *ifs = func_sums->get (v);
3878 if (!ifs || !ifs->m_candidate)
3879 continue;
3880 if (!ipa_sra_ipa_function_checks (v)
3881 || check_all_callers_for_issues (v))
3882 {
3883 ifs->zap ();
3884 continue;
3885 }
3886 if (disable_unavailable_parameters (v, ifs))
3887 continue;
3888 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3889 process_edge_to_unknown_caller (cs);
3890 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3891 if (!ipa_edge_within_scc (cs))
3892 param_removal_cross_scc_edge (cs);
3893 }
3894
3895 /* Look at edges within the current SCC and propagate used-ness across
3896 them, pushing onto the stack all notes which might need to be
3897 revisited. */
3898 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3899 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3900 &stack, true);
3901
3902 /* Keep revisiting and pushing until nothing changes. */
3903 while (!stack.is_empty ())
3904 {
3905 cgraph_node *v = stack.pop ();
3906 isra_func_summary *ifs = func_sums->get (v);
3907 gcc_checking_assert (ifs && ifs->m_queued);
3908 ifs->m_queued = false;
3909
3910 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3911 &stack, true);
3912 }
3913
3914 /* Parameter splitting. */
3915 bool repeat_scc_access_propagation;
3916 do
3917 {
3918 repeat_scc_access_propagation = false;
3919 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3920 {
3921 isra_func_summary *ifs = func_sums->get (v);
3922 if (!ifs
3923 || !ifs->m_candidate
3924 || vec_safe_is_empty (ifs->m_parameters))
3925 continue;
3926 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3927 if (param_splitting_across_edge (cs))
3928 repeat_scc_access_propagation = true;
3929 }
3930 }
3931 while (repeat_scc_access_propagation);
3932
3933 if (flag_checking)
3934 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3935 verify_splitting_accesses (v, true);
3936
3937 cycle_nodes.release ();
3938 }
3939
3940 /* One sweep from caller to callees for result removal. */
3941 for (int i = node_scc_count - 1; i >= 0 ; i--)
3942 {
3943 cgraph_node *scc_rep = order[i];
3944 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3945 unsigned j;
3946
3947 cgraph_node *v;
3948 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3949 {
3950 isra_func_summary *ifs = func_sums->get (v);
3951 if (!ifs || !ifs->m_candidate)
3952 continue;
3953
3954 bool return_needed
3955 = (ifs->m_returns_value
3956 && (!dbg_cnt (ipa_sra_retvalues)
3957 || v->call_for_symbol_and_aliases (retval_used_p,
3958 NULL, true)));
3959 ifs->m_return_ignored = !return_needed;
3960 if (return_needed)
3961 isra_push_node_to_stack (v, ifs, &stack);
3962 }
3963
3964 while (!stack.is_empty ())
3965 {
3966 cgraph_node *node = stack.pop ();
3967 isra_func_summary *ifs = func_sums->get (node);
3968 gcc_checking_assert (ifs && ifs->m_queued);
3969 ifs->m_queued = false;
3970
3971 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3972 if (ipa_edge_within_scc (cs)
3973 && call_sums->get (cs)->m_return_returned)
3974 {
3975 enum availability av;
3976 cgraph_node *callee = cs->callee->function_symbol (&av);
3977 isra_func_summary *to_ifs = func_sums->get (callee);
3978 if (to_ifs && to_ifs->m_return_ignored)
3979 {
3980 to_ifs->m_return_ignored = false;
3981 isra_push_node_to_stack (callee, to_ifs, &stack);
3982 }
3983 }
3984 }
3985 cycle_nodes.release ();
3986 }
3987
3988 ipa_free_postorder_info ();
3989 free (order);
3990
3991 if (dump_file)
3992 {
3993 if (dump_flags & TDF_DETAILS)
3994 {
3995 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
3996 " ==========\n");
3997 ipa_sra_dump_all_summaries (dump_file);
3998 }
3999 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4000 }
4001
4002 hash_map<const char *, unsigned> *clone_num_suffixes
4003 = new hash_map<const char *, unsigned>;
4004
4005 cgraph_node *node;
4006 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4007 process_isra_node_results (node, clone_num_suffixes);
4008
4009 delete clone_num_suffixes;
4010 ggc_delete (func_sums);
4011 func_sums = NULL;
4012 delete call_sums;
4013 call_sums = NULL;
4014
4015 if (dump_file)
4016 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4017 "==========\n\n");
4018 return 0;
4019 }
4020
4021
4022 const pass_data pass_data_ipa_sra =
4023 {
4024 IPA_PASS, /* type */
4025 "sra", /* name */
4026 OPTGROUP_NONE, /* optinfo_flags */
4027 TV_IPA_SRA, /* tv_id */
4028 0, /* properties_required */
4029 0, /* properties_provided */
4030 0, /* properties_destroyed */
4031 0, /* todo_flags_start */
4032 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4033 };
4034
4035 class pass_ipa_sra : public ipa_opt_pass_d
4036 {
4037 public:
pass_ipa_sra(gcc::context * ctxt)4038 pass_ipa_sra (gcc::context *ctxt)
4039 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4040 ipa_sra_generate_summary, /* generate_summary */
4041 ipa_sra_write_summary, /* write_summary */
4042 ipa_sra_read_summary, /* read_summary */
4043 NULL , /* write_optimization_summary */
4044 NULL, /* read_optimization_summary */
4045 NULL, /* stmt_fixup */
4046 0, /* function_transform_todo_flags_start */
4047 NULL, /* function_transform */
4048 NULL) /* variable_transform */
4049 {}
4050
4051 /* opt_pass methods: */
gate(function *)4052 virtual bool gate (function *)
4053 {
4054 /* TODO: We should remove the optimize check after we ensure we never run
4055 IPA passes when not optimizing. */
4056 return (flag_ipa_sra && optimize);
4057 }
4058
execute(function *)4059 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4060
4061 }; // class pass_ipa_sra
4062
4063 } // anon namespace
4064
4065 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4066 create a summary structure describing IPA-SRA opportunities and constraints
4067 in it. */
4068
4069 static void
ipa_sra_summarize_function(cgraph_node * node)4070 ipa_sra_summarize_function (cgraph_node *node)
4071 {
4072 if (dump_file)
4073 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4074 node->order);
4075 if (!ipa_sra_preliminary_function_checks (node))
4076 {
4077 isra_analyze_all_outgoing_calls (node);
4078 return;
4079 }
4080 gcc_obstack_init (&gensum_obstack);
4081 isra_func_summary *ifs = func_sums->get_create (node);
4082 ifs->m_candidate = true;
4083 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4084 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4085
4086 decl2desc = new hash_map<tree, gensum_param_desc *>;
4087 unsigned count = 0;
4088 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
4089 count++;
4090
4091 if (count > 0)
4092 {
4093 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4094 param_descriptions.reserve_exact (count);
4095 param_descriptions.quick_grow_cleared (count);
4096
4097 bool cfun_pushed = false;
4098 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4099 if (create_parameter_descriptors (node, ¶m_descriptions))
4100 {
4101 push_cfun (fun);
4102 cfun_pushed = true;
4103 final_bbs = BITMAP_ALLOC (NULL);
4104 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4105 by_ref_count
4106 * last_basic_block_for_fn (fun));
4107 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4108 scan_function (node, fun);
4109
4110 if (dump_file)
4111 {
4112 dump_gensum_param_descriptors (dump_file, node->decl,
4113 ¶m_descriptions);
4114 fprintf (dump_file, "----------------------------------------\n");
4115 }
4116 }
4117 process_scan_results (node, fun, ifs, ¶m_descriptions);
4118
4119 if (cfun_pushed)
4120 pop_cfun ();
4121 if (bb_dereferences)
4122 {
4123 free (bb_dereferences);
4124 bb_dereferences = NULL;
4125 BITMAP_FREE (final_bbs);
4126 final_bbs = NULL;
4127 }
4128 }
4129 isra_analyze_all_outgoing_calls (node);
4130
4131 delete decl2desc;
4132 decl2desc = NULL;
4133 obstack_free (&gensum_obstack, NULL);
4134 if (dump_file)
4135 fprintf (dump_file, "\n\n");
4136 if (flag_checking)
4137 verify_splitting_accesses (node, false);
4138 return;
4139 }
4140
4141 ipa_opt_pass_d *
make_pass_ipa_sra(gcc::context * ctxt)4142 make_pass_ipa_sra (gcc::context *ctxt)
4143 {
4144 return new pass_ipa_sra (ctxt);
4145 }
4146
4147
4148 #include "gt-ipa-sra.h"
4149