1 /* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
27
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
44
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
51
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
57
58
59 #include "config.h"
60 #include "system.h"
61 #include "coretypes.h"
62 #include "backend.h"
63 #include "tree.h"
64 #include "gimple.h"
65 #include "predict.h"
66 #include "tree-pass.h"
67 #include "ssa.h"
68 #include "cgraph.h"
69 #include "gimple-pretty-print.h"
70 #include "alias.h"
71 #include "tree-eh.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "tree-dfa.h"
75 #include "tree-sra.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "dbgcnt.h"
79 #include "tree-inline.h"
80 #include "ipa-utils.h"
81 #include "builtins.h"
82 #include "cfganal.h"
83 #include "tree-streamer.h"
84 #include "internal-fn.h"
85
86 /* Bits used to track size of an aggregate in bytes interprocedurally. */
87 #define ISRA_ARG_SIZE_LIMIT_BITS 16
88 #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
89 /* How many parameters can feed into a call actual argument and still be
90 tracked. */
91 #define IPA_SRA_MAX_PARAM_FLOW_LEN 7
92
93 /* Structure describing accesses to a specific portion of an aggregate
94 parameter, as given by the offset and size. Any smaller accesses that occur
95 within a function that fall within another access form a tree. The pass
96 cannot analyze parameters with only partially overlapping accesses. */
97
98 struct GTY(()) param_access
99 {
100 /* Type that a potential replacement should have. This field only has
101 meaning in the summary building and transformation phases, when it is
102 reconstructed from the body. Must not be touched in IPA analysis
103 stage. */
104 tree type;
105
106 /* Alias reference type to be used in MEM_REFs when adjusting caller
107 arguments. */
108 tree alias_ptr_type;
109
110 /* Values returned by get_ref_base_and_extent but converted to bytes and
111 stored as unsigned ints. */
112 unsigned unit_offset;
113 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
114
115 /* Set once we are sure that the access will really end up in a potentially
116 transformed function - initially not set for portions of formal parameters
117 that are only used as actual function arguments passed to callees. */
118 unsigned certain : 1;
119 /* Set if the access has reverse scalar storage order. */
120 unsigned reverse : 1;
121 };
122
123 /* This structure has the same purpose as the one above and additionally it
124 contains some fields that are only necessary in the summary generation
125 phase. */
126
127 struct gensum_param_access
128 {
129 /* Values returned by get_ref_base_and_extent. */
130 HOST_WIDE_INT offset;
131 HOST_WIDE_INT size;
132
133 /* if this access has any children (in terms of the definition above), this
134 points to the first one. */
135 struct gensum_param_access *first_child;
136 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
137 described above. */
138 struct gensum_param_access *next_sibling;
139
140 /* Type that a potential replacement should have. This field only has
141 meaning in the summary building and transformation phases, when it is
142 reconstructed from the body. Must not be touched in IPA analysis
143 stage. */
144 tree type;
145 /* Alias reference type to be used in MEM_REFs when adjusting caller
146 arguments. */
147 tree alias_ptr_type;
148
149 /* Have there been writes to or reads from this exact location except for as
150 arguments to a function call that can be tracked. */
151 bool nonarg;
152
153 /* Set if the access has reverse scalar storage order. */
154 bool reverse;
155 };
156
157 /* Summary describing a parameter in the IPA stages. */
158
159 struct GTY(()) isra_param_desc
160 {
161 /* List of access representatives to the parameters, sorted according to
162 their offset. */
163 vec <param_access *, va_gc> *accesses;
164
165 /* Unit size limit of total size of all replacements. */
166 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
167 /* Sum of unit sizes of all certain replacements. */
168 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
169
170 /* A parameter that is used only in call arguments and can be removed if all
171 concerned actual arguments are removed. */
172 unsigned locally_unused : 1;
173 /* An aggregate that is a candidate for breaking up or complete removal. */
174 unsigned split_candidate : 1;
175 /* Is this a parameter passing stuff by reference? */
176 unsigned by_ref : 1;
177 };
178
179 /* Structure used when generating summaries that describes a parameter. */
180
181 struct gensum_param_desc
182 {
183 /* Roots of param_accesses. */
184 gensum_param_access *accesses;
185 /* Number of accesses in the access tree rooted in field accesses. */
186 unsigned access_count;
187
188 /* If the below is non-zero, this is the number of uses as actual arguments. */
189 int call_uses;
190 /* Number of times this parameter has been directly passed to. */
191 unsigned ptr_pt_count;
192
193 /* Size limit of total size of all replacements. */
194 unsigned param_size_limit;
195 /* Sum of sizes of nonarg accesses. */
196 unsigned nonarg_acc_size;
197
198 /* A parameter that is used only in call arguments and can be removed if all
199 concerned actual arguments are removed. */
200 bool locally_unused;
201 /* An aggregate that is a candidate for breaking up or a pointer passing data
202 by reference that is a candidate for being converted to a set of
203 parameters passing those data by value. */
204 bool split_candidate;
205 /* Is this a parameter passing stuff by reference? */
206 bool by_ref;
207
208 /* The number of this parameter as they are ordered in function decl. */
209 int param_number;
210 /* For parameters passing data by reference, this is parameter index to
211 compute indices to bb_dereferences. */
212 int deref_index;
213 };
214
215 /* Properly deallocate accesses of DESC. TODO: Since this data structure is
216 allocated in GC memory, this is not necessary and we can consider removing
217 the function. */
218
219 static void
free_param_decl_accesses(isra_param_desc * desc)220 free_param_decl_accesses (isra_param_desc *desc)
221 {
222 unsigned len = vec_safe_length (desc->accesses);
223 for (unsigned i = 0; i < len; ++i)
224 ggc_free ((*desc->accesses)[i]);
225 vec_free (desc->accesses);
226 }
227
228 /* Class used to convey information about functions from the
229 intra-procedural analysis stage to inter-procedural one. */
230
class(for_user)231 class GTY((for_user)) isra_func_summary
232 {
233 public:
234 /* initialize the object. */
235
236 isra_func_summary ()
237 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
238 m_return_ignored (false), m_queued (false)
239 {}
240
241 /* Destroy m_parameters. */
242
243 ~isra_func_summary ();
244
245 /* Mark the function as not a candidate for any IPA-SRA transformation.
246 Return true if it was a candidate until now. */
247
248 bool zap ();
249
250 /* Vector of parameter descriptors corresponding to the function being
251 analyzed. */
252 vec<isra_param_desc, va_gc> *m_parameters;
253
254 /* Whether the node is even a candidate for any IPA-SRA transformation at
255 all. */
256 unsigned m_candidate : 1;
257
258 /* Whether the original function returns any value. */
259 unsigned m_returns_value : 1;
260
261 /* Set to true if all call statements do not actually use the returned
262 value. */
263
264 unsigned m_return_ignored : 1;
265
266 /* Whether the node is already queued in IPA SRA stack during processing of
267 call graphs SCCs. */
268
269 unsigned m_queued : 1;
270 };
271
272 /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
273 data structure is allocated in GC memory, this is not necessary and we can
274 consider removing the destructor. */
275
~isra_func_summary()276 isra_func_summary::~isra_func_summary ()
277 {
278 unsigned len = vec_safe_length (m_parameters);
279 for (unsigned i = 0; i < len; ++i)
280 free_param_decl_accesses (&(*m_parameters)[i]);
281 vec_free (m_parameters);
282 }
283
284 /* Mark the function as not a candidate for any IPA-SRA transformation. Return
285 true if it was a candidate until now. */
286
287 bool
zap()288 isra_func_summary::zap ()
289 {
290 bool ret = m_candidate;
291 m_candidate = false;
292
293 /* TODO: see the destructor above. */
294 unsigned len = vec_safe_length (m_parameters);
295 for (unsigned i = 0; i < len; ++i)
296 free_param_decl_accesses (&(*m_parameters)[i]);
297 vec_free (m_parameters);
298
299 return ret;
300 }
301
302 /* Structure to describe which formal parameters feed into a particular actual
303 argument. */
304
305 struct isra_param_flow
306 {
307 /* Number of elements in array inputs that contain valid data. */
308 char length;
309 /* Indices of formal parameters that feed into the described actual argument.
310 If aggregate_pass_through or pointer_pass_through below are true, it must
311 contain exactly one element which is passed through from a formal
312 parameter if the given number. Otherwise, the array contains indices of
313 callee's formal parameters which are used to calculate value of this
314 actual argument. */
315 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
316
317 /* Offset within the formal parameter. */
318 unsigned unit_offset;
319 /* Size of the portion of the formal parameter that is being passed. */
320 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
321
322 /* True when the value of this actual copy is a portion of a formal
323 parameter. */
324 unsigned aggregate_pass_through : 1;
325 /* True when the value of this actual copy is a verbatim pass through of an
326 obtained pointer. */
327 unsigned pointer_pass_through : 1;
328 /* True when it is safe to copy access candidates here from the callee, which
329 would mean introducing dereferences into callers of the caller. */
330 unsigned safe_to_import_accesses : 1;
331 };
332
333 /* Structure used to convey information about calls from the intra-procedural
334 analysis stage to inter-procedural one. */
335
336 class isra_call_summary
337 {
338 public:
isra_call_summary()339 isra_call_summary ()
340 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
341 m_bit_aligned_arg (false), m_before_any_store (false)
342 {}
343
344 void init_inputs (unsigned arg_count);
345 void dump (FILE *f);
346
347 /* Information about what formal parameters of the caller are used to compute
348 individual actual arguments of this call. */
349 auto_vec <isra_param_flow> m_arg_flow;
350
351 /* Set to true if the call statement does not have a LHS. */
352 unsigned m_return_ignored : 1;
353
354 /* Set to true if the LHS of call statement is only used to construct the
355 return value of the caller. */
356 unsigned m_return_returned : 1;
357
358 /* Set when any of the call arguments are not byte-aligned. */
359 unsigned m_bit_aligned_arg : 1;
360
361 /* Set to true if the call happend before any (other) store to memory in the
362 caller. */
363 unsigned m_before_any_store : 1;
364 };
365
366 /* Class to manage function summaries. */
367
class(user)368 class GTY((user)) ipa_sra_function_summaries
369 : public function_summary <isra_func_summary *>
370 {
371 public:
372 ipa_sra_function_summaries (symbol_table *table, bool ggc):
373 function_summary<isra_func_summary *> (table, ggc) { }
374
375 virtual void duplicate (cgraph_node *, cgraph_node *,
376 isra_func_summary *old_sum,
377 isra_func_summary *new_sum);
378 };
379
380 /* Hook that is called by summary when a node is duplicated. */
381
382 void
duplicate(cgraph_node *,cgraph_node *,isra_func_summary * old_sum,isra_func_summary * new_sum)383 ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
384 isra_func_summary *old_sum,
385 isra_func_summary *new_sum)
386 {
387 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
388 useless. */
389 new_sum->m_candidate = old_sum->m_candidate;
390 new_sum->m_returns_value = old_sum->m_returns_value;
391 new_sum->m_return_ignored = old_sum->m_return_ignored;
392 gcc_assert (!old_sum->m_queued);
393 new_sum->m_queued = false;
394
395 unsigned param_count = vec_safe_length (old_sum->m_parameters);
396 if (!param_count)
397 return;
398 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
399 new_sum->m_parameters->quick_grow_cleared (param_count);
400 for (unsigned i = 0; i < param_count; i++)
401 {
402 isra_param_desc *s = &(*old_sum->m_parameters)[i];
403 isra_param_desc *d = &(*new_sum->m_parameters)[i];
404
405 d->param_size_limit = s->param_size_limit;
406 d->size_reached = s->size_reached;
407 d->locally_unused = s->locally_unused;
408 d->split_candidate = s->split_candidate;
409 d->by_ref = s->by_ref;
410
411 unsigned acc_count = vec_safe_length (s->accesses);
412 vec_safe_reserve_exact (d->accesses, acc_count);
413 for (unsigned j = 0; j < acc_count; j++)
414 {
415 param_access *from = (*s->accesses)[j];
416 param_access *to = ggc_cleared_alloc<param_access> ();
417 to->type = from->type;
418 to->alias_ptr_type = from->alias_ptr_type;
419 to->unit_offset = from->unit_offset;
420 to->unit_size = from->unit_size;
421 to->certain = from->certain;
422 to->reverse = from->reverse;
423 d->accesses->quick_push (to);
424 }
425 }
426 }
427
428 /* Pointer to the pass function summary holder. */
429
430 static GTY(()) ipa_sra_function_summaries *func_sums;
431
432 /* Class to manage call summaries. */
433
434 class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
435 {
436 public:
ipa_sra_call_summaries(symbol_table * table)437 ipa_sra_call_summaries (symbol_table *table):
438 call_summary<isra_call_summary *> (table) { }
439
440 /* Duplicate info when an edge is cloned. */
441 virtual void duplicate (cgraph_edge *, cgraph_edge *,
442 isra_call_summary *old_sum,
443 isra_call_summary *new_sum);
444 };
445
446 static ipa_sra_call_summaries *call_sums;
447
448
449 /* Initialize m_arg_flow of a particular instance of isra_call_summary.
450 ARG_COUNT is the number of actual arguments passed. */
451
452 void
init_inputs(unsigned arg_count)453 isra_call_summary::init_inputs (unsigned arg_count)
454 {
455 if (arg_count == 0)
456 {
457 gcc_checking_assert (m_arg_flow.length () == 0);
458 return;
459 }
460 if (m_arg_flow.length () == 0)
461 {
462 m_arg_flow.reserve_exact (arg_count);
463 m_arg_flow.quick_grow_cleared (arg_count);
464 }
465 else
466 gcc_checking_assert (arg_count == m_arg_flow.length ());
467 }
468
469 /* Dump all information in call summary to F. */
470
471 void
dump(FILE * f)472 isra_call_summary::dump (FILE *f)
473 {
474 if (m_return_ignored)
475 fprintf (f, " return value ignored\n");
476 if (m_return_returned)
477 fprintf (f, " return value used only to compute caller return value\n");
478 if (m_before_any_store)
479 fprintf (f, " happens before any store to memory\n");
480 for (unsigned i = 0; i < m_arg_flow.length (); i++)
481 {
482 fprintf (f, " Parameter %u:\n", i);
483 isra_param_flow *ipf = &m_arg_flow[i];
484
485 if (ipf->length)
486 {
487 bool first = true;
488 fprintf (f, " Scalar param sources: ");
489 for (int j = 0; j < ipf->length; j++)
490 {
491 if (!first)
492 fprintf (f, ", ");
493 else
494 first = false;
495 fprintf (f, "%i", (int) ipf->inputs[j]);
496 }
497 fprintf (f, "\n");
498 }
499 if (ipf->aggregate_pass_through)
500 fprintf (f, " Aggregate pass through from the param given above, "
501 "unit offset: %u , unit size: %u\n",
502 ipf->unit_offset, ipf->unit_size);
503 if (ipf->pointer_pass_through)
504 fprintf (f, " Pointer pass through from the param given above, "
505 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
506 }
507 }
508
509 /* Duplicate edge summary when an edge is cloned. */
510
511 void
duplicate(cgraph_edge *,cgraph_edge *,isra_call_summary * old_sum,isra_call_summary * new_sum)512 ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
513 isra_call_summary *old_sum,
514 isra_call_summary *new_sum)
515 {
516 unsigned arg_count = old_sum->m_arg_flow.length ();
517 new_sum->init_inputs (arg_count);
518 for (unsigned i = 0; i < arg_count; i++)
519 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
520
521 new_sum->m_return_ignored = old_sum->m_return_ignored;
522 new_sum->m_return_returned = old_sum->m_return_returned;
523 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
524 new_sum->m_before_any_store = old_sum->m_before_any_store;
525 }
526
527
528 /* With all GTY stuff done, we can move to anonymous namespace. */
529 namespace {
530 /* Quick mapping from a decl to its param descriptor. */
531
532 hash_map<tree, gensum_param_desc *> *decl2desc;
533
534 /* Countdown of allowed Alias Analysis steps during summary building. */
535
536 int aa_walking_limit;
537
538 /* This is a table in which for each basic block and parameter there is a
539 distance (offset + size) in that parameter which is dereferenced and
540 accessed in that BB. */
541 HOST_WIDE_INT *bb_dereferences = NULL;
542 /* How many by-reference parameters there are in the current function. */
543 int by_ref_count;
544
545 /* Bitmap of BBs that can cause the function to "stop" progressing by
546 returning, throwing externally, looping infinitely or calling a function
547 which might abort etc.. */
548 bitmap final_bbs;
549
550 /* Obstack to allocate various small structures required only when generating
551 summary for a function. */
552 struct obstack gensum_obstack;
553
554 /* Return false the function is apparently unsuitable for IPA-SRA based on it's
555 attributes, return true otherwise. NODE is the cgraph node of the current
556 function. */
557
558 static bool
ipa_sra_preliminary_function_checks(cgraph_node * node)559 ipa_sra_preliminary_function_checks (cgraph_node *node)
560 {
561 if (!node->can_change_signature)
562 {
563 if (dump_file)
564 fprintf (dump_file, "Function cannot change signature.\n");
565 return false;
566 }
567
568 if (!tree_versionable_function_p (node->decl))
569 {
570 if (dump_file)
571 fprintf (dump_file, "Function is not versionable.\n");
572 return false;
573 }
574
575 if (!opt_for_fn (node->decl, optimize)
576 || !opt_for_fn (node->decl, flag_ipa_sra))
577 {
578 if (dump_file)
579 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
580 "function.\n");
581 return false;
582 }
583
584 if (DECL_VIRTUAL_P (node->decl))
585 {
586 if (dump_file)
587 fprintf (dump_file, "Function is a virtual method.\n");
588 return false;
589 }
590
591 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
592 if (fun->stdarg)
593 {
594 if (dump_file)
595 fprintf (dump_file, "Function uses stdarg. \n");
596 return false;
597 }
598
599 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
600 {
601 if (dump_file)
602 fprintf (dump_file, "Function type has attributes. \n");
603 return false;
604 }
605
606 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
607 {
608 if (dump_file)
609 fprintf (dump_file, "Always inline function will be inlined "
610 "anyway. \n");
611 return false;
612 }
613
614 return true;
615 }
616
617 /* Print access tree starting at ACCESS to F. */
618
619 static void
dump_gensum_access(FILE * f,gensum_param_access * access,unsigned indent)620 dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
621 {
622 fprintf (f, " ");
623 for (unsigned i = 0; i < indent; i++)
624 fprintf (f, " ");
625 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
626 access->offset);
627 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
628 fprintf (f, ", type: ");
629 print_generic_expr (f, access->type);
630 fprintf (f, ", alias_ptr_type: ");
631 print_generic_expr (f, access->alias_ptr_type);
632 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
633 for (gensum_param_access *ch = access->first_child;
634 ch;
635 ch = ch->next_sibling)
636 dump_gensum_access (f, ch, indent + 2);
637 }
638
639
640 /* Print access tree starting at ACCESS to F. */
641
642 static void
dump_isra_access(FILE * f,param_access * access)643 dump_isra_access (FILE *f, param_access *access)
644 {
645 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
646 fprintf (f, ", unit size: %u", access->unit_size);
647 fprintf (f, ", type: ");
648 print_generic_expr (f, access->type);
649 fprintf (f, ", alias_ptr_type: ");
650 print_generic_expr (f, access->alias_ptr_type);
651 if (access->certain)
652 fprintf (f, ", certain");
653 else
654 fprintf (f, ", not certain");
655 if (access->reverse)
656 fprintf (f, ", reverse");
657 fprintf (f, "\n");
658 }
659
660 /* Dump access tree starting at ACCESS to stderr. */
661
662 DEBUG_FUNCTION void
debug_isra_access(param_access * access)663 debug_isra_access (param_access *access)
664 {
665 dump_isra_access (stderr, access);
666 }
667
668 /* Dump DESC to F. */
669
670 static void
dump_gensum_param_descriptor(FILE * f,gensum_param_desc * desc)671 dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
672 {
673 if (desc->locally_unused)
674 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
675 if (!desc->split_candidate)
676 {
677 fprintf (f, " not a candidate\n");
678 return;
679 }
680 if (desc->by_ref)
681 fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count);
682
683 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
684 dump_gensum_access (f, acc, 2);
685 }
686
687 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
688 F. */
689
690 static void
dump_gensum_param_descriptors(FILE * f,tree fndecl,vec<gensum_param_desc> * param_descriptions)691 dump_gensum_param_descriptors (FILE *f, tree fndecl,
692 vec<gensum_param_desc> *param_descriptions)
693 {
694 tree parm = DECL_ARGUMENTS (fndecl);
695 for (unsigned i = 0;
696 i < param_descriptions->length ();
697 ++i, parm = DECL_CHAIN (parm))
698 {
699 fprintf (f, " Descriptor for parameter %i ", i);
700 print_generic_expr (f, parm, TDF_UID);
701 fprintf (f, "\n");
702 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
703 }
704 }
705
706
707 /* Dump DESC to F. */
708
709 static void
dump_isra_param_descriptor(FILE * f,isra_param_desc * desc)710 dump_isra_param_descriptor (FILE *f, isra_param_desc *desc)
711 {
712 if (desc->locally_unused)
713 {
714 fprintf (f, " (locally) unused\n");
715 }
716 if (!desc->split_candidate)
717 {
718 fprintf (f, " not a candidate for splitting\n");
719 return;
720 }
721 fprintf (f, " param_size_limit: %u, size_reached: %u%s\n",
722 desc->param_size_limit, desc->size_reached,
723 desc->by_ref ? ", by_ref" : "");
724
725 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
726 {
727 param_access *access = (*desc->accesses)[i];
728 dump_isra_access (f, access);
729 }
730 }
731
732 /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
733 F. */
734
735 static void
dump_isra_param_descriptors(FILE * f,tree fndecl,isra_func_summary * ifs)736 dump_isra_param_descriptors (FILE *f, tree fndecl,
737 isra_func_summary *ifs)
738 {
739 tree parm = DECL_ARGUMENTS (fndecl);
740 if (!ifs->m_parameters)
741 {
742 fprintf (f, " parameter descriptors not available\n");
743 return;
744 }
745
746 for (unsigned i = 0;
747 i < ifs->m_parameters->length ();
748 ++i, parm = DECL_CHAIN (parm))
749 {
750 fprintf (f, " Descriptor for parameter %i ", i);
751 print_generic_expr (f, parm, TDF_UID);
752 fprintf (f, "\n");
753 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
754 }
755 }
756
757 /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
758 function fails return false, otherwise return true. SRC must fit into an
759 unsigned char. Used for purposes of transitive unused parameter
760 removal. */
761
762 static bool
add_src_to_param_flow(isra_param_flow * param_flow,int src)763 add_src_to_param_flow (isra_param_flow *param_flow, int src)
764 {
765 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
766 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
767 return false;
768
769 param_flow->inputs[(int) param_flow->length] = src;
770 param_flow->length++;
771 return true;
772 }
773
774 /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
775 it is the only input. Used for purposes of transitive parameter
776 splitting. */
777
778 static void
set_single_param_flow_source(isra_param_flow * param_flow,int src)779 set_single_param_flow_source (isra_param_flow *param_flow, int src)
780 {
781 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
782 if (param_flow->length == 0)
783 {
784 param_flow->inputs[0] = src;
785 param_flow->length = 1;
786 }
787 else if (param_flow->length == 1)
788 gcc_assert (param_flow->inputs[0] == src);
789 else
790 gcc_unreachable ();
791 }
792
793 /* Assert that there is only a single value in PARAM_FLOW's inputs and return
794 it. */
795
796 static unsigned
get_single_param_flow_source(const isra_param_flow * param_flow)797 get_single_param_flow_source (const isra_param_flow *param_flow)
798 {
799 gcc_assert (param_flow->length == 1);
800 return param_flow->inputs[0];
801 }
802
803 /* Inspect all uses of NAME and simple arithmetic calculations involving NAME
804 in FUN represented with NODE and return a negative number if any of them is
805 used for something else than either an actual call argument, simple
806 arithmetic operation or debug statement. If there are no such uses, return
807 the number of actual arguments that this parameter eventually feeds to (or
808 zero if there is none). For any such parameter, mark PARM_NUM as one of its
809 sources. ANALYZED is a bitmap that tracks which SSA names we have already
810 started investigating. */
811
812 static int
isra_track_scalar_value_uses(function * fun,cgraph_node * node,tree name,int parm_num,bitmap analyzed)813 isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
814 int parm_num, bitmap analyzed)
815 {
816 int res = 0;
817 imm_use_iterator imm_iter;
818 gimple *stmt;
819
820 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
821 {
822 if (is_gimple_debug (stmt))
823 continue;
824
825 /* TODO: We could handle at least const builtin functions like arithmetic
826 operations below. */
827 if (is_gimple_call (stmt))
828 {
829 int all_uses = 0;
830 use_operand_p use_p;
831 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
832 all_uses++;
833
834 gcall *call = as_a <gcall *> (stmt);
835 unsigned arg_count;
836 if (gimple_call_internal_p (call)
837 || (arg_count = gimple_call_num_args (call)) == 0)
838 {
839 res = -1;
840 BREAK_FROM_IMM_USE_STMT (imm_iter);
841 }
842
843 cgraph_edge *cs = node->get_edge (stmt);
844 gcc_checking_assert (cs);
845 isra_call_summary *csum = call_sums->get_create (cs);
846 csum->init_inputs (arg_count);
847
848 int simple_uses = 0;
849 for (unsigned i = 0; i < arg_count; i++)
850 if (gimple_call_arg (call, i) == name)
851 {
852 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
853 {
854 simple_uses = -1;
855 break;
856 }
857 simple_uses++;
858 }
859
860 if (simple_uses < 0
861 || all_uses != simple_uses)
862 {
863 res = -1;
864 BREAK_FROM_IMM_USE_STMT (imm_iter);
865 }
866 res += all_uses;
867 }
868 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
869 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
870 || gimple_code (stmt) == GIMPLE_PHI))
871 {
872 tree lhs;
873 if (gimple_code (stmt) == GIMPLE_PHI)
874 lhs = gimple_phi_result (stmt);
875 else
876 lhs = gimple_assign_lhs (stmt);
877
878 if (TREE_CODE (lhs) != SSA_NAME)
879 {
880 res = -1;
881 BREAK_FROM_IMM_USE_STMT (imm_iter);
882 }
883 gcc_assert (!gimple_vdef (stmt));
884 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
885 {
886 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
887 analyzed);
888 if (tmp < 0)
889 {
890 res = tmp;
891 BREAK_FROM_IMM_USE_STMT (imm_iter);
892 }
893 res += tmp;
894 }
895 }
896 else
897 {
898 res = -1;
899 BREAK_FROM_IMM_USE_STMT (imm_iter);
900 }
901 }
902 return res;
903 }
904
905 /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
906 also described by NODE) and simple arithmetic calculations involving PARM
907 and return false if any of them is used for something else than either an
908 actual call argument, simple arithmetic operation or debug statement. If
909 there are no such uses, return true and store the number of actual arguments
910 that this parameter eventually feeds to (or zero if there is none) to
911 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
912 sources.
913
914 This function is similar to ptr_parm_has_nonarg_uses but its results are
915 meant for unused parameter removal, as opposed to splitting of parameters
916 passed by reference or converting them to passed by value. */
917
918 static bool
isra_track_scalar_param_local_uses(function * fun,cgraph_node * node,tree parm,int parm_num,int * call_uses_p)919 isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
920 int parm_num, int *call_uses_p)
921 {
922 gcc_checking_assert (is_gimple_reg (parm));
923
924 tree name = ssa_default_def (fun, parm);
925 if (!name || has_zero_uses (name))
926 {
927 *call_uses_p = 0;
928 return false;
929 }
930
931 /* Edge summaries can only handle callers with fewer than 256 parameters. */
932 if (parm_num > UCHAR_MAX)
933 return true;
934
935 bitmap analyzed = BITMAP_ALLOC (NULL);
936 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
937 analyzed);
938 BITMAP_FREE (analyzed);
939 if (call_uses < 0)
940 return true;
941 *call_uses_p = call_uses;
942 return false;
943 }
944
945 /* Scan immediate uses of a default definition SSA name of a parameter PARM and
946 examine whether there are any nonarg uses that are not actual arguments or
947 otherwise infeasible uses. If so, return true, otherwise return false.
948 Create pass-through IPA flow records for any direct uses as argument calls
949 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
950 must represent the function that is currently analyzed, PARM_NUM must be the
951 index of the analyzed parameter.
952
953 This function is similar to isra_track_scalar_param_local_uses but its
954 results are meant for splitting of parameters passed by reference or turning
955 them into bits passed by value, as opposed to generic unused parameter
956 removal. */
957
958 static bool
ptr_parm_has_nonarg_uses(cgraph_node * node,function * fun,tree parm,int parm_num,unsigned * pt_count_p)959 ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
960 int parm_num, unsigned *pt_count_p)
961 {
962 imm_use_iterator ui;
963 gimple *stmt;
964 tree name = ssa_default_def (fun, parm);
965 bool ret = false;
966 unsigned pt_count = 0;
967
968 if (!name || has_zero_uses (name))
969 return false;
970
971 /* Edge summaries can only handle callers with fewer than 256 parameters. */
972 if (parm_num > UCHAR_MAX)
973 return true;
974
975 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
976 {
977 unsigned uses_ok = 0;
978 use_operand_p use_p;
979
980 if (is_gimple_debug (stmt))
981 continue;
982
983 if (gimple_assign_single_p (stmt))
984 {
985 tree rhs = gimple_assign_rhs1 (stmt);
986 if (!TREE_THIS_VOLATILE (rhs))
987 {
988 while (handled_component_p (rhs))
989 rhs = TREE_OPERAND (rhs, 0);
990 if (TREE_CODE (rhs) == MEM_REF
991 && TREE_OPERAND (rhs, 0) == name
992 && integer_zerop (TREE_OPERAND (rhs, 1))
993 && types_compatible_p (TREE_TYPE (rhs),
994 TREE_TYPE (TREE_TYPE (name))))
995 uses_ok++;
996 }
997 }
998 else if (is_gimple_call (stmt))
999 {
1000 gcall *call = as_a <gcall *> (stmt);
1001 unsigned arg_count;
1002 if (gimple_call_internal_p (call)
1003 || (arg_count = gimple_call_num_args (call)) == 0)
1004 {
1005 ret = true;
1006 BREAK_FROM_IMM_USE_STMT (ui);
1007 }
1008
1009 cgraph_edge *cs = node->get_edge (stmt);
1010 gcc_checking_assert (cs);
1011 isra_call_summary *csum = call_sums->get_create (cs);
1012 csum->init_inputs (arg_count);
1013
1014 for (unsigned i = 0; i < arg_count; ++i)
1015 {
1016 tree arg = gimple_call_arg (stmt, i);
1017
1018 if (arg == name)
1019 {
1020 /* TODO: Allow &MEM_REF[name + offset] here,
1021 ipa_param_body_adjustments::modify_call_stmt has to be
1022 adjusted too. */
1023 csum->m_arg_flow[i].pointer_pass_through = true;
1024 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1025 pt_count++;
1026 uses_ok++;
1027 continue;
1028 }
1029
1030 if (!TREE_THIS_VOLATILE (arg))
1031 {
1032 while (handled_component_p (arg))
1033 arg = TREE_OPERAND (arg, 0);
1034 if (TREE_CODE (arg) == MEM_REF
1035 && TREE_OPERAND (arg, 0) == name
1036 && integer_zerop (TREE_OPERAND (arg, 1))
1037 && types_compatible_p (TREE_TYPE (arg),
1038 TREE_TYPE (TREE_TYPE (name))))
1039 uses_ok++;
1040 }
1041 }
1042 }
1043
1044 /* If the number of valid uses does not match the number of
1045 uses in this stmt there is an unhandled use. */
1046 unsigned all_uses = 0;
1047 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1048 all_uses++;
1049
1050 gcc_checking_assert (uses_ok <= all_uses);
1051 if (uses_ok != all_uses)
1052 {
1053 ret = true;
1054 BREAK_FROM_IMM_USE_STMT (ui);
1055 }
1056 }
1057
1058 *pt_count_p = pt_count;
1059 return ret;
1060 }
1061
1062 /* Initialize vector of parameter descriptors of NODE. Return true if there
1063 are any candidates for splitting or unused aggregate parameter removal (the
1064 function may return false if there are candidates for removal of register
1065 parameters) and function body must be scanned. */
1066
1067 static bool
create_parameter_descriptors(cgraph_node * node,vec<gensum_param_desc> * param_descriptions)1068 create_parameter_descriptors (cgraph_node *node,
1069 vec<gensum_param_desc> *param_descriptions)
1070 {
1071 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1072 bool ret = false;
1073
1074 int num = 0;
1075 for (tree parm = DECL_ARGUMENTS (node->decl);
1076 parm;
1077 parm = DECL_CHAIN (parm), num++)
1078 {
1079 const char *msg;
1080 gensum_param_desc *desc = &(*param_descriptions)[num];
1081 /* param_descriptions vector is grown cleared in the caller. */
1082 desc->param_number = num;
1083 decl2desc->put (parm, desc);
1084
1085 if (dump_file && (dump_flags & TDF_DETAILS))
1086 print_generic_expr (dump_file, parm, TDF_UID);
1087
1088 int scalar_call_uses;
1089 tree type = TREE_TYPE (parm);
1090 if (TREE_THIS_VOLATILE (parm))
1091 {
1092 if (dump_file && (dump_flags & TDF_DETAILS))
1093 fprintf (dump_file, " not a candidate, is volatile\n");
1094 continue;
1095 }
1096 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1097 {
1098 if (dump_file && (dump_flags & TDF_DETAILS))
1099 fprintf (dump_file, " not a candidate, is a va_list type\n");
1100 continue;
1101 }
1102 if (TREE_ADDRESSABLE (parm))
1103 {
1104 if (dump_file && (dump_flags & TDF_DETAILS))
1105 fprintf (dump_file, " not a candidate, is addressable\n");
1106 continue;
1107 }
1108 if (TREE_ADDRESSABLE (type))
1109 {
1110 if (dump_file && (dump_flags & TDF_DETAILS))
1111 fprintf (dump_file, " not a candidate, type cannot be split\n");
1112 continue;
1113 }
1114
1115 if (is_gimple_reg (parm)
1116 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1117 &scalar_call_uses))
1118 {
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1120 fprintf (dump_file, " is a scalar with only %i call uses\n",
1121 scalar_call_uses);
1122
1123 desc->locally_unused = true;
1124 desc->call_uses = scalar_call_uses;
1125 }
1126
1127 if (POINTER_TYPE_P (type))
1128 {
1129 type = TREE_TYPE (type);
1130
1131 if (TREE_CODE (type) == FUNCTION_TYPE
1132 || TREE_CODE (type) == METHOD_TYPE)
1133 {
1134 if (dump_file && (dump_flags & TDF_DETAILS))
1135 fprintf (dump_file, " not a candidate, reference to "
1136 "a function\n");
1137 continue;
1138 }
1139 if (TYPE_VOLATILE (type))
1140 {
1141 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, " not a candidate, reference to "
1143 "a volatile type\n");
1144 continue;
1145 }
1146 if (TREE_CODE (type) == ARRAY_TYPE
1147 && TYPE_NONALIASED_COMPONENT (type))
1148 {
1149 if (dump_file && (dump_flags & TDF_DETAILS))
1150 fprintf (dump_file, " not a candidate, reference to "
1151 "a nonaliased component array\n");
1152 continue;
1153 }
1154 if (!is_gimple_reg (parm))
1155 {
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 fprintf (dump_file, " not a candidate, a reference which is "
1158 "not a gimple register (probably addressable)\n");
1159 continue;
1160 }
1161 if (is_va_list_type (type))
1162 {
1163 if (dump_file && (dump_flags & TDF_DETAILS))
1164 fprintf (dump_file, " not a candidate, reference to "
1165 "a va list\n");
1166 continue;
1167 }
1168 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1169 &desc->ptr_pt_count))
1170 {
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file, " not a candidate, reference has "
1173 "nonarg uses\n");
1174 continue;
1175 }
1176 desc->by_ref = true;
1177 }
1178 else if (!AGGREGATE_TYPE_P (type))
1179 {
1180 /* This is in an else branch because scalars passed by reference are
1181 still candidates to be passed by value. */
1182 if (dump_file && (dump_flags & TDF_DETAILS))
1183 fprintf (dump_file, " not a candidate, not an aggregate\n");
1184 continue;
1185 }
1186
1187 if (!COMPLETE_TYPE_P (type))
1188 {
1189 if (dump_file && (dump_flags & TDF_DETAILS))
1190 fprintf (dump_file, " not a candidate, not a complete type\n");
1191 continue;
1192 }
1193 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1194 {
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 fprintf (dump_file, " not a candidate, size not representable\n");
1197 continue;
1198 }
1199 unsigned HOST_WIDE_INT type_size
1200 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1201 if (type_size == 0
1202 || type_size >= ISRA_ARG_SIZE_LIMIT)
1203 {
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1206 continue;
1207 }
1208 if (type_internals_preclude_sra_p (type, &msg))
1209 {
1210 if (dump_file && (dump_flags & TDF_DETAILS))
1211 fprintf (dump_file, " not a candidate, %s\n", msg);
1212 continue;
1213 }
1214
1215 if (dump_file && (dump_flags & TDF_DETAILS))
1216 fprintf (dump_file, " is a candidate\n");
1217
1218 ret = true;
1219 desc->split_candidate = true;
1220 if (desc->by_ref)
1221 desc->deref_index = by_ref_count++;
1222 }
1223 return ret;
1224 }
1225
1226 /* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1227 found, which happens if DECL is for a static chain. */
1228
1229 static gensum_param_desc *
get_gensum_param_desc(tree decl)1230 get_gensum_param_desc (tree decl)
1231 {
1232 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1233 gensum_param_desc **slot = decl2desc->get (decl);
1234 if (!slot)
1235 /* This can happen for static chains which we cannot handle so far. */
1236 return NULL;
1237 gcc_checking_assert (*slot);
1238 return *slot;
1239 }
1240
1241
1242 /* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1243 write REASON to the dump file if there is one. */
1244
1245 static void
disqualify_split_candidate(gensum_param_desc * desc,const char * reason)1246 disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1247 {
1248 if (!desc->split_candidate)
1249 return;
1250
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1253 desc->param_number, reason);
1254
1255 desc->split_candidate = false;
1256 }
1257
1258 /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1259 there is one. */
1260
1261 static void
disqualify_split_candidate(tree decl,const char * reason)1262 disqualify_split_candidate (tree decl, const char *reason)
1263 {
1264 gensum_param_desc *desc = get_gensum_param_desc (decl);
1265 if (desc)
1266 disqualify_split_candidate (desc, reason);
1267 }
1268
1269 /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1270 first, check that there are not too many of them already. If so, do not
1271 allocate anything and return NULL. */
1272
1273 static gensum_param_access *
allocate_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size)1274 allocate_access (gensum_param_desc *desc,
1275 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1276 {
1277 if (desc->access_count
1278 == (unsigned) param_ipa_sra_max_replacements)
1279 {
1280 disqualify_split_candidate (desc, "Too many replacement candidates");
1281 return NULL;
1282 }
1283
1284 gensum_param_access *access
1285 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1286 sizeof (gensum_param_access));
1287 memset (access, 0, sizeof (*access));
1288 access->offset = offset;
1289 access->size = size;
1290 return access;
1291 }
1292
1293 /* In what context scan_expr_access has been called, whether it deals with a
1294 load, a function argument, or a store. Please note that in rare
1295 circumstances when it is not clear if the access is a load or store,
1296 ISRA_CTX_STORE is used too. */
1297
1298 enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1299
1300 /* Return an access describing memory access to the variable described by DESC
1301 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1302 at a certain tree level FIRST. Attempt to create it and put into the
1303 appropriate place in the access tree if does not exist, but fail and return
1304 NULL if there are already too many accesses, if it would create a partially
1305 overlapping access or if an access would end up within a pre-existing
1306 non-call access. */
1307
1308 static gensum_param_access *
get_access_1(gensum_param_desc * desc,gensum_param_access ** first,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1309 get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1310 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1311 {
1312 gensum_param_access *access = *first, **ptr = first;
1313
1314 if (!access)
1315 {
1316 /* No pre-existing access at this level, just create it. */
1317 gensum_param_access *a = allocate_access (desc, offset, size);
1318 if (!a)
1319 return NULL;
1320 *first = a;
1321 return *first;
1322 }
1323
1324 if (access->offset >= offset + size)
1325 {
1326 /* We want to squeeze it in front of the very first access, just do
1327 it. */
1328 gensum_param_access *r = allocate_access (desc, offset, size);
1329 if (!r)
1330 return NULL;
1331 r->next_sibling = access;
1332 *first = r;
1333 return r;
1334 }
1335
1336 /* Skip all accesses that have to come before us until the next sibling is
1337 already too far. */
1338 while (offset >= access->offset + access->size
1339 && access->next_sibling
1340 && access->next_sibling->offset < offset + size)
1341 {
1342 ptr = &access->next_sibling;
1343 access = access->next_sibling;
1344 }
1345
1346 /* At this point we know we do not belong before access. */
1347 gcc_assert (access->offset < offset + size);
1348
1349 if (access->offset == offset && access->size == size)
1350 /* We found what we were looking for. */
1351 return access;
1352
1353 if (access->offset <= offset
1354 && access->offset + access->size >= offset + size)
1355 {
1356 /* We fit into access which is larger than us. We need to find/create
1357 something below access. But we only allow nesting in call
1358 arguments. */
1359 if (access->nonarg)
1360 return NULL;
1361
1362 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1363 }
1364
1365 if (offset <= access->offset
1366 && offset + size >= access->offset + access->size)
1367 /* We are actually bigger than access, which fully fits into us, take its
1368 place and make all accesses fitting into it its children. */
1369 {
1370 /* But first, we only allow nesting in call arguments so check if that is
1371 what we are trying to represent. */
1372 if (ctx != ISRA_CTX_ARG)
1373 return NULL;
1374
1375 gensum_param_access *r = allocate_access (desc, offset, size);
1376 if (!r)
1377 return NULL;
1378 r->first_child = access;
1379
1380 while (access->next_sibling
1381 && access->next_sibling->offset < offset + size)
1382 access = access->next_sibling;
1383 if (access->offset + access->size > offset + size)
1384 {
1385 /* This must be a different access, which are sorted, so the
1386 following must be true and this signals a partial overlap. */
1387 gcc_assert (access->offset > offset);
1388 return NULL;
1389 }
1390
1391 r->next_sibling = access->next_sibling;
1392 access->next_sibling = NULL;
1393 *ptr = r;
1394 return r;
1395 }
1396
1397 if (offset >= access->offset + access->size)
1398 {
1399 /* We belong after access. */
1400 gensum_param_access *r = allocate_access (desc, offset, size);
1401 if (!r)
1402 return NULL;
1403 r->next_sibling = access->next_sibling;
1404 access->next_sibling = r;
1405 return r;
1406 }
1407
1408 if (offset < access->offset)
1409 {
1410 /* We know the following, otherwise we would have created a
1411 super-access. */
1412 gcc_checking_assert (offset + size < access->offset + access->size);
1413 return NULL;
1414 }
1415
1416 if (offset + size > access->offset + access->size)
1417 {
1418 /* Likewise. */
1419 gcc_checking_assert (offset > access->offset);
1420 return NULL;
1421 }
1422
1423 gcc_unreachable ();
1424 }
1425
1426 /* Return an access describing memory access to the variable described by DESC
1427 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1428 to create if it does not exist, but fail and return NULL if there are
1429 already too many accesses, if it would create a partially overlapping access
1430 or if an access would end up in a non-call access. */
1431
1432 static gensum_param_access *
get_access(gensum_param_desc * desc,HOST_WIDE_INT offset,HOST_WIDE_INT size,isra_scan_context ctx)1433 get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1434 isra_scan_context ctx)
1435 {
1436 gcc_checking_assert (desc->split_candidate);
1437
1438 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1439 size, ctx);
1440 if (!access)
1441 {
1442 disqualify_split_candidate (desc,
1443 "Bad access overlap or too many accesses");
1444 return NULL;
1445 }
1446
1447 switch (ctx)
1448 {
1449 case ISRA_CTX_STORE:
1450 gcc_assert (!desc->by_ref);
1451 /* Fall-through */
1452 case ISRA_CTX_LOAD:
1453 access->nonarg = true;
1454 break;
1455 case ISRA_CTX_ARG:
1456 break;
1457 }
1458
1459 return access;
1460 }
1461
1462 /* Verify that parameter access tree starting with ACCESS is in good shape.
1463 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1464 ACCESS or zero if there is none. */
1465
1466 static bool
verify_access_tree_1(gensum_param_access * access,HOST_WIDE_INT parent_offset,HOST_WIDE_INT parent_size)1467 verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1468 HOST_WIDE_INT parent_size)
1469 {
1470 while (access)
1471 {
1472 gcc_assert (access->offset >= 0 && access->size >= 0);
1473
1474 if (parent_size != 0)
1475 {
1476 if (access->offset < parent_offset)
1477 {
1478 error ("Access offset before parent offset");
1479 return true;
1480 }
1481 if (access->size >= parent_size)
1482 {
1483 error ("Access size greater or equal to its parent size");
1484 return true;
1485 }
1486 if (access->offset + access->size > parent_offset + parent_size)
1487 {
1488 error ("Access terminates outside of its parent");
1489 return true;
1490 }
1491 }
1492
1493 if (verify_access_tree_1 (access->first_child, access->offset,
1494 access->size))
1495 return true;
1496
1497 if (access->next_sibling
1498 && (access->next_sibling->offset < access->offset + access->size))
1499 {
1500 error ("Access overlaps with its sibling");
1501 return true;
1502 }
1503
1504 access = access->next_sibling;
1505 }
1506 return false;
1507 }
1508
1509 /* Verify that parameter access tree starting with ACCESS is in good shape,
1510 halt compilation and dump the tree to stderr if not. */
1511
1512 DEBUG_FUNCTION void
isra_verify_access_tree(gensum_param_access * access)1513 isra_verify_access_tree (gensum_param_access *access)
1514 {
1515 if (verify_access_tree_1 (access, 0, 0))
1516 {
1517 for (; access; access = access->next_sibling)
1518 dump_gensum_access (stderr, access, 2);
1519 internal_error ("IPA-SRA access verification failed");
1520 }
1521 }
1522
1523
1524 /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1525 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1526
1527 static bool
asm_visit_addr(gimple *,tree op,tree,void *)1528 asm_visit_addr (gimple *, tree op, tree, void *)
1529 {
1530 op = get_base_address (op);
1531 if (op
1532 && TREE_CODE (op) == PARM_DECL)
1533 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1534
1535 return false;
1536 }
1537
1538 /* Mark a dereference of parameter identified by DESC of distance DIST in a
1539 basic block BB, unless the BB has already been marked as a potentially
1540 final. */
1541
1542 static void
mark_param_dereference(gensum_param_desc * desc,HOST_WIDE_INT dist,basic_block bb)1543 mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1544 basic_block bb)
1545 {
1546 gcc_assert (desc->by_ref);
1547 gcc_checking_assert (desc->split_candidate);
1548
1549 if (bitmap_bit_p (final_bbs, bb->index))
1550 return;
1551
1552 int idx = bb->index * by_ref_count + desc->deref_index;
1553 if (bb_dereferences[idx] < dist)
1554 bb_dereferences[idx] = dist;
1555 }
1556
1557 /* Return true, if any potential replacements should use NEW_TYPE as opposed to
1558 previously recorded OLD_TYPE. */
1559
1560 static bool
type_prevails_p(tree old_type,tree new_type)1561 type_prevails_p (tree old_type, tree new_type)
1562 {
1563 if (old_type == new_type)
1564 return false;
1565
1566 /* Non-aggregates are always better. */
1567 if (!is_gimple_reg_type (old_type)
1568 && is_gimple_reg_type (new_type))
1569 return true;
1570 if (is_gimple_reg_type (old_type)
1571 && !is_gimple_reg_type (new_type))
1572 return false;
1573
1574 /* Prefer any complex or vector type over any other scalar type. */
1575 if (TREE_CODE (old_type) != COMPLEX_TYPE
1576 && TREE_CODE (old_type) != VECTOR_TYPE
1577 && (TREE_CODE (new_type) == COMPLEX_TYPE
1578 || TREE_CODE (new_type) == VECTOR_TYPE))
1579 return true;
1580 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1581 || TREE_CODE (old_type) == VECTOR_TYPE)
1582 && TREE_CODE (new_type) != COMPLEX_TYPE
1583 && TREE_CODE (new_type) != VECTOR_TYPE)
1584 return false;
1585
1586 /* Use the integral type with the bigger precision. */
1587 if (INTEGRAL_TYPE_P (old_type)
1588 && INTEGRAL_TYPE_P (new_type))
1589 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1590
1591 /* Attempt to disregard any integral type with non-full precision. */
1592 if (INTEGRAL_TYPE_P (old_type)
1593 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1594 != TYPE_PRECISION (old_type)))
1595 return true;
1596 if (INTEGRAL_TYPE_P (new_type)
1597 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1598 != TYPE_PRECISION (new_type)))
1599 return false;
1600 /* Stabilize the selection. */
1601 return TYPE_UID (old_type) < TYPE_UID (new_type);
1602 }
1603
1604 /* When scanning an expression which is a call argument, this structure
1605 specifies the call and the position of the argument. */
1606
1607 struct scan_call_info
1608 {
1609 /* Call graph edge representing the call. */
1610 cgraph_edge *cs;
1611 /* Total number of arguments in the call. */
1612 unsigned argument_count;
1613 /* Number of the actual argument being scanned. */
1614 unsigned arg_idx;
1615 };
1616
1617 /* Record use of ACCESS which belongs to a parameter described by DESC in a
1618 call argument described by CALL_INFO. */
1619
1620 static void
record_nonregister_call_use(gensum_param_desc * desc,scan_call_info * call_info,unsigned unit_offset,unsigned unit_size)1621 record_nonregister_call_use (gensum_param_desc *desc,
1622 scan_call_info *call_info,
1623 unsigned unit_offset, unsigned unit_size)
1624 {
1625 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1626 csum->init_inputs (call_info->argument_count);
1627
1628 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1629 param_flow->aggregate_pass_through = true;
1630 set_single_param_flow_source (param_flow, desc->param_number);
1631 param_flow->unit_offset = unit_offset;
1632 param_flow->unit_size = unit_size;
1633 desc->call_uses++;
1634 }
1635
1636 /* Callback of walk_aliased_vdefs, just mark that there was a possible
1637 modification. */
1638
1639 static bool
mark_maybe_modified(ao_ref *,tree,void * data)1640 mark_maybe_modified (ao_ref *, tree, void *data)
1641 {
1642 bool *maybe_modified = (bool *) data;
1643 *maybe_modified = true;
1644 return true;
1645 }
1646
1647 /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1648 specifies whether EXPR is used in a load, store or as an argument call. BB
1649 must be the basic block in which expr resides. If CTX specifies call
1650 argument context, CALL_INFO must describe that call and argument position,
1651 otherwise it is ignored. */
1652
1653 static void
1654 scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1655 basic_block bb, scan_call_info *call_info = NULL)
1656 {
1657 poly_int64 poffset, psize, pmax_size;
1658 HOST_WIDE_INT offset, size, max_size;
1659 tree base;
1660 bool deref = false;
1661 bool reverse;
1662
1663 if (TREE_CODE (expr) == BIT_FIELD_REF
1664 || TREE_CODE (expr) == IMAGPART_EXPR
1665 || TREE_CODE (expr) == REALPART_EXPR)
1666 expr = TREE_OPERAND (expr, 0);
1667
1668 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1669
1670 if (TREE_CODE (base) == MEM_REF)
1671 {
1672 tree op = TREE_OPERAND (base, 0);
1673 if (TREE_CODE (op) != SSA_NAME
1674 || !SSA_NAME_IS_DEFAULT_DEF (op))
1675 return;
1676 base = SSA_NAME_VAR (op);
1677 if (!base)
1678 return;
1679 deref = true;
1680 }
1681 if (TREE_CODE (base) != PARM_DECL)
1682 return;
1683
1684 gensum_param_desc *desc = get_gensum_param_desc (base);
1685 if (!desc || !desc->split_candidate)
1686 return;
1687
1688 if (!poffset.is_constant (&offset)
1689 || !psize.is_constant (&size)
1690 || !pmax_size.is_constant (&max_size))
1691 {
1692 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1693 "access.");
1694 return;
1695 }
1696 if (size < 0 || size != max_size)
1697 {
1698 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1699 return;
1700 }
1701 if (TREE_CODE (expr) == COMPONENT_REF
1702 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1703 {
1704 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1705 return;
1706 }
1707 if (offset < 0)
1708 {
1709 disqualify_split_candidate (desc, "Encountered an access at a "
1710 "negative offset.");
1711 return;
1712 }
1713 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1714 gcc_assert ((size % BITS_PER_UNIT) == 0);
1715 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1716 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1717 {
1718 disqualify_split_candidate (desc, "Encountered an access with too big "
1719 "offset or size");
1720 return;
1721 }
1722
1723 tree type = TREE_TYPE (expr);
1724 unsigned int exp_align = get_object_alignment (expr);
1725
1726 if (exp_align < TYPE_ALIGN (type))
1727 {
1728 disqualify_split_candidate (desc, "Underaligned access.");
1729 return;
1730 }
1731
1732 if (deref)
1733 {
1734 if (!desc->by_ref)
1735 {
1736 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1737 return;
1738 }
1739 else if (ctx == ISRA_CTX_STORE)
1740 {
1741 disqualify_split_candidate (desc, "Storing to data passed by "
1742 "reference.");
1743 return;
1744 }
1745
1746 if (!aa_walking_limit)
1747 {
1748 disqualify_split_candidate (desc, "Out of alias analysis step "
1749 "limit.");
1750 return;
1751 }
1752
1753 gcc_checking_assert (gimple_vuse (stmt));
1754 bool maybe_modified = false;
1755 ao_ref ar;
1756
1757 ao_ref_init (&ar, expr);
1758 bitmap visited = BITMAP_ALLOC (NULL);
1759 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1760 mark_maybe_modified, &maybe_modified,
1761 &visited, NULL, aa_walking_limit);
1762 BITMAP_FREE (visited);
1763 if (walked > 0)
1764 {
1765 gcc_assert (aa_walking_limit > walked);
1766 aa_walking_limit = aa_walking_limit - walked;
1767 }
1768 if (walked < 0)
1769 aa_walking_limit = 0;
1770 if (maybe_modified || walked < 0)
1771 {
1772 disqualify_split_candidate (desc, "Data passed by reference possibly "
1773 "modified through an alias.");
1774 return;
1775 }
1776 else
1777 mark_param_dereference (desc, offset + size, bb);
1778 }
1779 else
1780 /* Pointer parameters with direct uses should have been ruled out by
1781 analyzing SSA default def when looking at the parameters. */
1782 gcc_assert (!desc->by_ref);
1783
1784 gensum_param_access *access = get_access (desc, offset, size, ctx);
1785 if (!access)
1786 return;
1787
1788 if (ctx == ISRA_CTX_ARG)
1789 {
1790 gcc_checking_assert (call_info);
1791
1792 if (!deref)
1793 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1794 size / BITS_PER_UNIT);
1795 else
1796 /* This is not a pass-through of a pointer, this is a use like any
1797 other. */
1798 access->nonarg = true;
1799 }
1800
1801 if (!access->type)
1802 {
1803 access->type = type;
1804 access->alias_ptr_type = reference_alias_ptr_type (expr);
1805 access->reverse = reverse;
1806 }
1807 else
1808 {
1809 if (exp_align < TYPE_ALIGN (access->type))
1810 {
1811 disqualify_split_candidate (desc, "Reference has lower alignment "
1812 "than a previous one.");
1813 return;
1814 }
1815 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1816 {
1817 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1818 return;
1819 }
1820 if (access->reverse != reverse)
1821 {
1822 disqualify_split_candidate (desc, "Both normal and reverse "
1823 "scalar storage order.");
1824 return;
1825 }
1826 if (!deref
1827 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1828 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1829 {
1830 /* We need the same aggregate type on all accesses to be able to
1831 distinguish transformation spots from pass-through arguments in
1832 the transformation phase. */
1833 disqualify_split_candidate (desc, "We do not support aggregate "
1834 "type punning.");
1835 return;
1836 }
1837
1838 if (type_prevails_p (access->type, type))
1839 access->type = type;
1840 }
1841 }
1842
1843 /* Scan body function described by NODE and FUN and create access trees for
1844 parameters. */
1845
1846 static void
scan_function(cgraph_node * node,struct function * fun)1847 scan_function (cgraph_node *node, struct function *fun)
1848 {
1849 basic_block bb;
1850
1851 FOR_EACH_BB_FN (bb, fun)
1852 {
1853 gimple_stmt_iterator gsi;
1854 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1855 {
1856 gimple *stmt = gsi_stmt (gsi);
1857
1858 if (stmt_can_throw_external (fun, stmt))
1859 bitmap_set_bit (final_bbs, bb->index);
1860 switch (gimple_code (stmt))
1861 {
1862 case GIMPLE_RETURN:
1863 {
1864 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1865 if (t != NULL_TREE)
1866 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1867 bitmap_set_bit (final_bbs, bb->index);
1868 }
1869 break;
1870
1871 case GIMPLE_ASSIGN:
1872 if (gimple_assign_single_p (stmt)
1873 && !gimple_clobber_p (stmt))
1874 {
1875 tree rhs = gimple_assign_rhs1 (stmt);
1876 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1877 tree lhs = gimple_assign_lhs (stmt);
1878 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1879 }
1880 break;
1881
1882 case GIMPLE_CALL:
1883 {
1884 unsigned argument_count = gimple_call_num_args (stmt);
1885 isra_scan_context ctx = ISRA_CTX_ARG;
1886 scan_call_info call_info, *call_info_p = &call_info;
1887 if (gimple_call_internal_p (stmt))
1888 {
1889 call_info_p = NULL;
1890 ctx = ISRA_CTX_LOAD;
1891 internal_fn ifn = gimple_call_internal_fn (stmt);
1892 if (internal_store_fn_p (ifn))
1893 ctx = ISRA_CTX_STORE;
1894 }
1895 else
1896 {
1897 call_info.cs = node->get_edge (stmt);
1898 call_info.argument_count = argument_count;
1899 }
1900
1901 for (unsigned i = 0; i < argument_count; i++)
1902 {
1903 call_info.arg_idx = i;
1904 scan_expr_access (gimple_call_arg (stmt, i), stmt,
1905 ctx, bb, call_info_p);
1906 }
1907
1908 tree lhs = gimple_call_lhs (stmt);
1909 if (lhs)
1910 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1911 int flags = gimple_call_flags (stmt);
1912 if (((flags & (ECF_CONST | ECF_PURE)) == 0)
1913 || (flags & ECF_LOOPING_CONST_OR_PURE))
1914 bitmap_set_bit (final_bbs, bb->index);
1915 }
1916 break;
1917
1918 case GIMPLE_ASM:
1919 {
1920 gasm *asm_stmt = as_a <gasm *> (stmt);
1921 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
1922 asm_visit_addr);
1923 bitmap_set_bit (final_bbs, bb->index);
1924
1925 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
1926 {
1927 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
1928 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
1929 }
1930 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
1931 {
1932 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
1933 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
1934 }
1935 }
1936 break;
1937
1938 default:
1939 break;
1940 }
1941 }
1942 }
1943 }
1944
1945 /* Return true if SSA_NAME NAME of function described by FUN is only used in
1946 return statements, or if results of any operations it is involved in are
1947 only used in return statements. ANALYZED is a bitmap that tracks which SSA
1948 names we have already started investigating. */
1949
1950 static bool
ssa_name_only_returned_p(function * fun,tree name,bitmap analyzed)1951 ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
1952 {
1953 bool res = true;
1954 imm_use_iterator imm_iter;
1955 gimple *stmt;
1956
1957 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
1958 {
1959 if (is_gimple_debug (stmt))
1960 continue;
1961
1962 if (gimple_code (stmt) == GIMPLE_RETURN)
1963 {
1964 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1965 if (t != name)
1966 {
1967 res = false;
1968 BREAK_FROM_IMM_USE_STMT (imm_iter);
1969 }
1970 }
1971 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
1972 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
1973 || gimple_code (stmt) == GIMPLE_PHI))
1974 {
1975 /* TODO: And perhaps for const function calls too? */
1976 tree lhs;
1977 if (gimple_code (stmt) == GIMPLE_PHI)
1978 lhs = gimple_phi_result (stmt);
1979 else
1980 lhs = gimple_assign_lhs (stmt);
1981
1982 if (TREE_CODE (lhs) != SSA_NAME)
1983 {
1984 res = false;
1985 BREAK_FROM_IMM_USE_STMT (imm_iter);
1986 }
1987 gcc_assert (!gimple_vdef (stmt));
1988 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
1989 && !ssa_name_only_returned_p (fun, lhs, analyzed))
1990 {
1991 res = false;
1992 BREAK_FROM_IMM_USE_STMT (imm_iter);
1993 }
1994 }
1995 else
1996 {
1997 res = false;
1998 BREAK_FROM_IMM_USE_STMT (imm_iter);
1999 }
2000 }
2001 return res;
2002 }
2003
2004 /* Inspect the uses of the return value of the call associated with CS, and if
2005 it is not used or if it is only used to construct the return value of the
2006 caller, mark it as such in call or caller summary. Also check for
2007 misaligned arguments. */
2008
2009 static void
isra_analyze_call(cgraph_edge * cs)2010 isra_analyze_call (cgraph_edge *cs)
2011 {
2012 gcall *call_stmt = cs->call_stmt;
2013 unsigned count = gimple_call_num_args (call_stmt);
2014 isra_call_summary *csum = call_sums->get_create (cs);
2015
2016 for (unsigned i = 0; i < count; i++)
2017 {
2018 tree arg = gimple_call_arg (call_stmt, i);
2019 if (is_gimple_reg (arg))
2020 continue;
2021
2022 tree offset;
2023 poly_int64 bitsize, bitpos;
2024 machine_mode mode;
2025 int unsignedp, reversep, volatilep = 0;
2026 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2027 &unsignedp, &reversep, &volatilep);
2028 if (!multiple_p (bitpos, BITS_PER_UNIT))
2029 {
2030 csum->m_bit_aligned_arg = true;
2031 break;
2032 }
2033 }
2034
2035 tree lhs = gimple_call_lhs (call_stmt);
2036 if (lhs)
2037 {
2038 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2039 from this function (without being read anywhere). */
2040 if (TREE_CODE (lhs) == SSA_NAME)
2041 {
2042 bitmap analyzed = BITMAP_ALLOC (NULL);
2043 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2044 lhs, analyzed))
2045 csum->m_return_returned = true;
2046 BITMAP_FREE (analyzed);
2047 }
2048 }
2049 else
2050 csum->m_return_ignored = true;
2051 }
2052
2053 /* Look at all calls going out of NODE, described also by IFS and perform all
2054 analyses necessary for IPA-SRA that are not done at body scan time or done
2055 even when body is not scanned because the function is not a candidate. */
2056
2057 static void
isra_analyze_all_outgoing_calls(cgraph_node * node)2058 isra_analyze_all_outgoing_calls (cgraph_node *node)
2059 {
2060 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2061 isra_analyze_call (cs);
2062 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2063 isra_analyze_call (cs);
2064 }
2065
2066 /* Dump a dereferences table with heading STR to file F. */
2067
2068 static void
dump_dereferences_table(FILE * f,struct function * fun,const char * str)2069 dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2070 {
2071 basic_block bb;
2072
2073 fprintf (dump_file, "%s", str);
2074 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2075 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2076 {
2077 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2078 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2079 {
2080 int i;
2081 for (i = 0; i < by_ref_count; i++)
2082 {
2083 int idx = bb->index * by_ref_count + i;
2084 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2085 }
2086 }
2087 fprintf (f, "\n");
2088 }
2089 fprintf (dump_file, "\n");
2090 }
2091
2092 /* Propagate distances in bb_dereferences in the opposite direction than the
2093 control flow edges, in each step storing the maximum of the current value
2094 and the minimum of all successors. These steps are repeated until the table
2095 stabilizes. Note that BBs which might terminate the functions (according to
2096 final_bbs bitmap) never updated in this way. */
2097
2098 static void
propagate_dereference_distances(struct function * fun)2099 propagate_dereference_distances (struct function *fun)
2100 {
2101 basic_block bb;
2102
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 dump_dereferences_table (dump_file, fun,
2105 "Dereference table before propagation:\n");
2106
2107 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2108 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2109 FOR_EACH_BB_FN (bb, fun)
2110 {
2111 queue.quick_push (bb);
2112 bb->aux = bb;
2113 }
2114
2115 while (!queue.is_empty ())
2116 {
2117 edge_iterator ei;
2118 edge e;
2119 bool change = false;
2120 int i;
2121
2122 bb = queue.pop ();
2123 bb->aux = NULL;
2124
2125 if (bitmap_bit_p (final_bbs, bb->index))
2126 continue;
2127
2128 for (i = 0; i < by_ref_count; i++)
2129 {
2130 int idx = bb->index * by_ref_count + i;
2131 bool first = true;
2132 HOST_WIDE_INT inh = 0;
2133
2134 FOR_EACH_EDGE (e, ei, bb->succs)
2135 {
2136 int succ_idx = e->dest->index * by_ref_count + i;
2137
2138 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2139 continue;
2140
2141 if (first)
2142 {
2143 first = false;
2144 inh = bb_dereferences [succ_idx];
2145 }
2146 else if (bb_dereferences [succ_idx] < inh)
2147 inh = bb_dereferences [succ_idx];
2148 }
2149
2150 if (!first && bb_dereferences[idx] < inh)
2151 {
2152 bb_dereferences[idx] = inh;
2153 change = true;
2154 }
2155 }
2156
2157 if (change)
2158 FOR_EACH_EDGE (e, ei, bb->preds)
2159 {
2160 if (e->src->aux)
2161 continue;
2162
2163 e->src->aux = e->src;
2164 queue.quick_push (e->src);
2165 }
2166 }
2167
2168 if (dump_file && (dump_flags & TDF_DETAILS))
2169 dump_dereferences_table (dump_file, fun,
2170 "Dereference table after propagation:\n");
2171 }
2172
2173 /* Perform basic checks on ACCESS to PARM described by DESC and all its
2174 children, return true if the parameter cannot be split, otherwise return
2175 true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the
2176 index of the entry BB in the function of PARM. */
2177
2178 static bool
check_gensum_access(tree parm,gensum_param_desc * desc,gensum_param_access * access,HOST_WIDE_INT * nonarg_acc_size,bool * only_calls,int entry_bb_index)2179 check_gensum_access (tree parm, gensum_param_desc *desc,
2180 gensum_param_access *access,
2181 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2182 int entry_bb_index)
2183 {
2184 if (access->nonarg)
2185 {
2186 *only_calls = false;
2187 *nonarg_acc_size += access->size;
2188
2189 if (access->first_child)
2190 {
2191 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2192 return true;
2193 }
2194 }
2195 /* Do not decompose a non-BLKmode param in a way that would create
2196 BLKmode params. Especially for by-reference passing (thus,
2197 pointer-type param) this is hardly worthwhile. */
2198 if (DECL_MODE (parm) != BLKmode
2199 && TYPE_MODE (access->type) == BLKmode)
2200 {
2201 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2202 return true;
2203 }
2204
2205 if (desc->by_ref)
2206 {
2207 int idx = (entry_bb_index * by_ref_count + desc->deref_index);
2208 if ((access->offset + access->size) > bb_dereferences[idx])
2209 {
2210 disqualify_split_candidate (desc, "Would create a possibly "
2211 "illegal dereference in a caller.");
2212 return true;
2213 }
2214 }
2215
2216 for (gensum_param_access *ch = access->first_child;
2217 ch;
2218 ch = ch->next_sibling)
2219 if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls,
2220 entry_bb_index))
2221 return true;
2222
2223 return false;
2224 }
2225
2226 /* Copy data from FROM and all of its children to a vector of accesses in IPA
2227 descriptor DESC. */
2228
2229 static void
copy_accesses_to_ipa_desc(gensum_param_access * from,isra_param_desc * desc)2230 copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2231 {
2232 param_access *to = ggc_cleared_alloc<param_access> ();
2233 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2234 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2235 to->unit_offset = from->offset / BITS_PER_UNIT;
2236 to->unit_size = from->size / BITS_PER_UNIT;
2237 to->type = from->type;
2238 to->alias_ptr_type = from->alias_ptr_type;
2239 to->certain = from->nonarg;
2240 to->reverse = from->reverse;
2241 vec_safe_push (desc->accesses, to);
2242
2243 for (gensum_param_access *ch = from->first_child;
2244 ch;
2245 ch = ch->next_sibling)
2246 copy_accesses_to_ipa_desc (ch, desc);
2247 }
2248
2249 /* Analyze function body scan results stored in param_accesses and
2250 param_accesses, detect possible transformations and store information of
2251 those in function summary. NODE, FUN and IFS are all various structures
2252 describing the currently analyzed function. */
2253
2254 static void
process_scan_results(cgraph_node * node,struct function * fun,isra_func_summary * ifs,vec<gensum_param_desc> * param_descriptions)2255 process_scan_results (cgraph_node *node, struct function *fun,
2256 isra_func_summary *ifs,
2257 vec<gensum_param_desc> *param_descriptions)
2258 {
2259 bool check_pass_throughs = false;
2260 bool dereferences_propagated = false;
2261 tree parm = DECL_ARGUMENTS (node->decl);
2262 unsigned param_count = param_descriptions->length();
2263
2264 for (unsigned desc_index = 0;
2265 desc_index < param_count;
2266 desc_index++, parm = DECL_CHAIN (parm))
2267 {
2268 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2269 if (!desc->split_candidate)
2270 continue;
2271
2272 if (flag_checking)
2273 isra_verify_access_tree (desc->accesses);
2274
2275 if (!dereferences_propagated
2276 && desc->by_ref
2277 && desc->accesses)
2278 {
2279 propagate_dereference_distances (fun);
2280 dereferences_propagated = true;
2281 }
2282
2283 HOST_WIDE_INT nonarg_acc_size = 0;
2284 bool only_calls = true;
2285 bool check_failed = false;
2286
2287 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2288 for (gensum_param_access *acc = desc->accesses;
2289 acc;
2290 acc = acc->next_sibling)
2291 if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls,
2292 entry_bb_index))
2293 {
2294 check_failed = true;
2295 break;
2296 }
2297 if (check_failed)
2298 continue;
2299
2300 if (only_calls)
2301 desc->locally_unused = true;
2302
2303 HOST_WIDE_INT cur_param_size
2304 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2305 HOST_WIDE_INT param_size_limit;
2306 if (!desc->by_ref || optimize_function_for_size_p (fun))
2307 param_size_limit = cur_param_size;
2308 else
2309 param_size_limit
2310 = opt_for_fn (node->decl,
2311 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2312 if (nonarg_acc_size > param_size_limit
2313 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2314 {
2315 disqualify_split_candidate (desc, "Would result into a too big set "
2316 "of replacements.");
2317 }
2318 else
2319 {
2320 /* create_parameter_descriptors makes sure unit sizes of all
2321 candidate parameters fit unsigned integers restricted to
2322 ISRA_ARG_SIZE_LIMIT. */
2323 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2324 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2325 if (desc->split_candidate && desc->ptr_pt_count)
2326 {
2327 gcc_assert (desc->by_ref);
2328 check_pass_throughs = true;
2329 }
2330 }
2331 }
2332
2333 /* When a pointer parameter is passed-through to a callee, in which it is
2334 only used to read only one or a few items, we can attempt to transform it
2335 to obtaining and passing through the items instead of the pointer. But we
2336 must take extra care that 1) we do not introduce any segfault by moving
2337 dereferences above control flow and that 2) the data is not modified
2338 through an alias in this function. The IPA analysis must not introduce
2339 any accesses candidates unless it can prove both.
2340
2341 The current solution is very crude as it consists of ensuring that the
2342 call postdominates entry BB and that the definition of VUSE of the call is
2343 default definition. TODO: For non-recursive callees in the same
2344 compilation unit we could do better by doing analysis in topological order
2345 an looking into access candidates of callees, using their alias_ptr_types
2346 to attempt real AA. We could also use the maximum known dereferenced
2347 offset in this function at IPA level.
2348
2349 TODO: Measure the overhead and the effect of just being pessimistic.
2350 Maybe this is only -O3 material? */
2351
2352 bool pdoms_calculated = false;
2353 if (check_pass_throughs)
2354 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2355 {
2356 gcall *call_stmt = cs->call_stmt;
2357 tree vuse = gimple_vuse (call_stmt);
2358
2359 /* If the callee is a const function, we don't get a VUSE. In such
2360 case there will be no memory accesses in the called function (or the
2361 const attribute is wrong) and then we just don't care. */
2362 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2363
2364 unsigned count = gimple_call_num_args (call_stmt);
2365 isra_call_summary *csum = call_sums->get_create (cs);
2366 csum->init_inputs (count);
2367 csum->m_before_any_store = uses_memory_as_obtained;
2368 for (unsigned argidx = 0; argidx < count; argidx++)
2369 {
2370 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2371 continue;
2372 unsigned pidx
2373 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2374 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2375 if (!desc->split_candidate)
2376 {
2377 csum->m_arg_flow[argidx].pointer_pass_through = false;
2378 continue;
2379 }
2380 if (!uses_memory_as_obtained)
2381 continue;
2382
2383 /* Post-dominator check placed last, hoping that it usually won't
2384 be needed. */
2385 if (!pdoms_calculated)
2386 {
2387 gcc_checking_assert (cfun);
2388 add_noreturn_fake_exit_edges ();
2389 connect_infinite_loops_to_exit ();
2390 calculate_dominance_info (CDI_POST_DOMINATORS);
2391 pdoms_calculated = true;
2392 }
2393 if (dominated_by_p (CDI_POST_DOMINATORS,
2394 gimple_bb (call_stmt),
2395 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2396 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2397 }
2398
2399 }
2400 if (pdoms_calculated)
2401 {
2402 free_dominance_info (CDI_POST_DOMINATORS);
2403 remove_fake_exit_edges ();
2404 }
2405
2406 /* TODO: Add early exit if we disqualified everything. This also requires
2407 that we either relax the restriction that
2408 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2409 or store the number of parameters to IPA-SRA function summary and use that
2410 when just removing params. */
2411
2412 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2413 ifs->m_parameters->quick_grow_cleared (param_count);
2414 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2415 {
2416 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2417 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2418
2419 d->param_size_limit = s->param_size_limit;
2420 d->size_reached = s->nonarg_acc_size;
2421 d->locally_unused = s->locally_unused;
2422 d->split_candidate = s->split_candidate;
2423 d->by_ref = s->by_ref;
2424
2425 for (gensum_param_access *acc = s->accesses;
2426 acc;
2427 acc = acc->next_sibling)
2428 copy_accesses_to_ipa_desc (acc, d);
2429 }
2430
2431 if (dump_file)
2432 dump_isra_param_descriptors (dump_file, node->decl, ifs);
2433 }
2434
2435 /* Return true if there are any overlaps among certain accesses of DESC. If
2436 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2437 too. DESC is assumed to be a split candidate that is not locally
2438 unused. */
2439
2440 static bool
overlapping_certain_accesses_p(isra_param_desc * desc,bool * certain_access_present_p)2441 overlapping_certain_accesses_p (isra_param_desc *desc,
2442 bool *certain_access_present_p)
2443 {
2444 unsigned pclen = vec_safe_length (desc->accesses);
2445 for (unsigned i = 0; i < pclen; i++)
2446 {
2447 param_access *a1 = (*desc->accesses)[i];
2448
2449 if (!a1->certain)
2450 continue;
2451 if (certain_access_present_p)
2452 *certain_access_present_p = true;
2453 for (unsigned j = i + 1; j < pclen; j++)
2454 {
2455 param_access *a2 = (*desc->accesses)[j];
2456 if (a2->certain
2457 && a1->unit_offset < a2->unit_offset + a2->unit_size
2458 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2459 return true;
2460 }
2461 }
2462 return false;
2463 }
2464
2465 /* Check for any overlaps of certain param accesses among splitting candidates
2466 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2467 check that used splitting candidates have at least one certain access. */
2468
2469 static void
verify_splitting_accesses(cgraph_node * node,bool certain_must_exist)2470 verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2471 {
2472 isra_func_summary *ifs = func_sums->get (node);
2473 if (!ifs || !ifs->m_candidate)
2474 return;
2475 unsigned param_count = vec_safe_length (ifs->m_parameters);
2476 for (unsigned pidx = 0; pidx < param_count; pidx++)
2477 {
2478 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2479 if (!desc->split_candidate || desc->locally_unused)
2480 continue;
2481
2482 bool certain_access_present = !certain_must_exist;
2483 if (overlapping_certain_accesses_p (desc, &certain_access_present))
2484 internal_error ("Function %qs, parameter %u, has IPA-SRA accesses "
2485 "which overlap", node->dump_name (), pidx);
2486 if (!certain_access_present)
2487 internal_error ("Function %s, parameter %u, is used but does not "
2488 "have any certain IPA-SRA access",
2489 node->dump_name (), pidx);
2490 }
2491 }
2492
2493 /* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
2494 create a summary structure describing IPA-SRA opportunities and constraints
2495 in it. */
2496
2497 static void
ipa_sra_summarize_function(cgraph_node * node)2498 ipa_sra_summarize_function (cgraph_node *node)
2499 {
2500 if (dump_file)
2501 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
2502 node->order);
2503 if (!ipa_sra_preliminary_function_checks (node))
2504 return;
2505 gcc_obstack_init (&gensum_obstack);
2506 isra_func_summary *ifs = func_sums->get_create (node);
2507 ifs->m_candidate = true;
2508 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
2509 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
2510
2511 decl2desc = new hash_map<tree, gensum_param_desc *>;
2512 unsigned count = 0;
2513 for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
2514 count++;
2515
2516 if (count > 0)
2517 {
2518 auto_vec<gensum_param_desc, 16> param_descriptions (count);
2519 param_descriptions.reserve_exact (count);
2520 param_descriptions.quick_grow_cleared (count);
2521
2522 bool cfun_pushed = false;
2523 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
2524 if (create_parameter_descriptors (node, ¶m_descriptions))
2525 {
2526 push_cfun (fun);
2527 cfun_pushed = true;
2528 final_bbs = BITMAP_ALLOC (NULL);
2529 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
2530 by_ref_count
2531 * last_basic_block_for_fn (fun));
2532 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2533 scan_function (node, fun);
2534
2535 if (dump_file)
2536 {
2537 dump_gensum_param_descriptors (dump_file, node->decl,
2538 ¶m_descriptions);
2539 fprintf (dump_file, "----------------------------------------\n");
2540 }
2541 }
2542 process_scan_results (node, fun, ifs, ¶m_descriptions);
2543
2544 if (cfun_pushed)
2545 pop_cfun ();
2546 if (bb_dereferences)
2547 {
2548 free (bb_dereferences);
2549 bb_dereferences = NULL;
2550 BITMAP_FREE (final_bbs);
2551 final_bbs = NULL;
2552 }
2553 }
2554 isra_analyze_all_outgoing_calls (node);
2555
2556 delete decl2desc;
2557 decl2desc = NULL;
2558 obstack_free (&gensum_obstack, NULL);
2559 if (dump_file)
2560 fprintf (dump_file, "\n\n");
2561 if (flag_checking)
2562 verify_splitting_accesses (node, false);
2563 return;
2564 }
2565
2566 /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2567 this compilation unit and create summary structures describing IPA-SRA
2568 opportunities and constraints in them. */
2569
2570 static void
ipa_sra_generate_summary(void)2571 ipa_sra_generate_summary (void)
2572 {
2573 struct cgraph_node *node;
2574
2575 gcc_checking_assert (!func_sums);
2576 gcc_checking_assert (!call_sums);
2577 func_sums
2578 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2579 ipa_sra_function_summaries (symtab, true));
2580 call_sums = new ipa_sra_call_summaries (symtab);
2581
2582 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2583 ipa_sra_summarize_function (node);
2584 return;
2585 }
2586
2587 /* Write intraprocedural analysis information about E and all of its outgoing
2588 edges into a stream for LTO WPA. */
2589
2590 static void
isra_write_edge_summary(output_block * ob,cgraph_edge * e)2591 isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2592 {
2593 isra_call_summary *csum = call_sums->get (e);
2594 unsigned input_count = csum->m_arg_flow.length ();
2595 streamer_write_uhwi (ob, input_count);
2596 for (unsigned i = 0; i < input_count; i++)
2597 {
2598 isra_param_flow *ipf = &csum->m_arg_flow[i];
2599 streamer_write_hwi (ob, ipf->length);
2600 bitpack_d bp = bitpack_create (ob->main_stream);
2601 for (int j = 0; j < ipf->length; j++)
2602 bp_pack_value (&bp, ipf->inputs[j], 8);
2603 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2604 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2605 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
2606 streamer_write_bitpack (&bp);
2607 streamer_write_uhwi (ob, ipf->unit_offset);
2608 streamer_write_uhwi (ob, ipf->unit_size);
2609 }
2610 bitpack_d bp = bitpack_create (ob->main_stream);
2611 bp_pack_value (&bp, csum->m_return_ignored, 1);
2612 bp_pack_value (&bp, csum->m_return_returned, 1);
2613 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
2614 bp_pack_value (&bp, csum->m_before_any_store, 1);
2615 streamer_write_bitpack (&bp);
2616 }
2617
2618 /* Write intraprocedural analysis information about NODE and all of its outgoing
2619 edges into a stream for LTO WPA. */
2620
2621 static void
isra_write_node_summary(output_block * ob,cgraph_node * node)2622 isra_write_node_summary (output_block *ob, cgraph_node *node)
2623 {
2624 isra_func_summary *ifs = func_sums->get (node);
2625 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2626 int node_ref = lto_symtab_encoder_encode (encoder, node);
2627 streamer_write_uhwi (ob, node_ref);
2628
2629 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2630 streamer_write_uhwi (ob, param_desc_count);
2631 for (unsigned i = 0; i < param_desc_count; i++)
2632 {
2633 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2634 unsigned access_count = vec_safe_length (desc->accesses);
2635 streamer_write_uhwi (ob, access_count);
2636 for (unsigned j = 0; j < access_count; j++)
2637 {
2638 param_access *acc = (*desc->accesses)[j];
2639 stream_write_tree (ob, acc->type, true);
2640 stream_write_tree (ob, acc->alias_ptr_type, true);
2641 streamer_write_uhwi (ob, acc->unit_offset);
2642 streamer_write_uhwi (ob, acc->unit_size);
2643 bitpack_d bp = bitpack_create (ob->main_stream);
2644 bp_pack_value (&bp, acc->certain, 1);
2645 bp_pack_value (&bp, acc->reverse, 1);
2646 streamer_write_bitpack (&bp);
2647 }
2648 streamer_write_uhwi (ob, desc->param_size_limit);
2649 streamer_write_uhwi (ob, desc->size_reached);
2650 bitpack_d bp = bitpack_create (ob->main_stream);
2651 bp_pack_value (&bp, desc->locally_unused, 1);
2652 bp_pack_value (&bp, desc->split_candidate, 1);
2653 bp_pack_value (&bp, desc->by_ref, 1);
2654 streamer_write_bitpack (&bp);
2655 }
2656 bitpack_d bp = bitpack_create (ob->main_stream);
2657 bp_pack_value (&bp, ifs->m_candidate, 1);
2658 bp_pack_value (&bp, ifs->m_returns_value, 1);
2659 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2660 gcc_assert (!ifs->m_queued);
2661 streamer_write_bitpack (&bp);
2662
2663 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2664 isra_write_edge_summary (ob, e);
2665 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2666 isra_write_edge_summary (ob, e);
2667 }
2668
2669 /* Write intraprocedural analysis information into a stream for LTO WPA. */
2670
2671 static void
ipa_sra_write_summary(void)2672 ipa_sra_write_summary (void)
2673 {
2674 if (!func_sums || !call_sums)
2675 return;
2676
2677 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2678 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2679 ob->symbol = NULL;
2680
2681 unsigned int count = 0;
2682 lto_symtab_encoder_iterator lsei;
2683 for (lsei = lsei_start_function_in_partition (encoder);
2684 !lsei_end_p (lsei);
2685 lsei_next_function_in_partition (&lsei))
2686 {
2687 cgraph_node *node = lsei_cgraph_node (lsei);
2688 if (node->has_gimple_body_p ()
2689 && func_sums->get (node) != NULL)
2690 count++;
2691 }
2692 streamer_write_uhwi (ob, count);
2693
2694 /* Process all of the functions. */
2695 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2696 lsei_next_function_in_partition (&lsei))
2697 {
2698 cgraph_node *node = lsei_cgraph_node (lsei);
2699 if (node->has_gimple_body_p ()
2700 && func_sums->get (node) != NULL)
2701 isra_write_node_summary (ob, node);
2702 }
2703 streamer_write_char_stream (ob->main_stream, 0);
2704 produce_asm (ob, NULL);
2705 destroy_output_block (ob);
2706 }
2707
2708 /* Read intraprocedural analysis information about E and all of its outgoing
2709 edges into a stream for LTO WPA. */
2710
2711 static void
isra_read_edge_summary(struct lto_input_block * ib,cgraph_edge * cs)2712 isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2713 {
2714 isra_call_summary *csum = call_sums->get_create (cs);
2715 unsigned input_count = streamer_read_uhwi (ib);
2716 csum->init_inputs (input_count);
2717 for (unsigned i = 0; i < input_count; i++)
2718 {
2719 isra_param_flow *ipf = &csum->m_arg_flow[i];
2720 ipf->length = streamer_read_hwi (ib);
2721 bitpack_d bp = streamer_read_bitpack (ib);
2722 for (int j = 0; j < ipf->length; j++)
2723 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2724 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2725 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2726 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
2727 ipf->unit_offset = streamer_read_uhwi (ib);
2728 ipf->unit_size = streamer_read_uhwi (ib);
2729 }
2730 bitpack_d bp = streamer_read_bitpack (ib);
2731 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2732 csum->m_return_returned = bp_unpack_value (&bp, 1);
2733 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
2734 csum->m_before_any_store = bp_unpack_value (&bp, 1);
2735 }
2736
2737 /* Read intraprocedural analysis information about NODE and all of its outgoing
2738 edges into a stream for LTO WPA. */
2739
2740 static void
isra_read_node_info(struct lto_input_block * ib,cgraph_node * node,struct data_in * data_in)2741 isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2742 struct data_in *data_in)
2743 {
2744 isra_func_summary *ifs = func_sums->get_create (node);
2745 unsigned param_desc_count = streamer_read_uhwi (ib);
2746 if (param_desc_count > 0)
2747 {
2748 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2749 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2750 }
2751 for (unsigned i = 0; i < param_desc_count; i++)
2752 {
2753 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2754 unsigned access_count = streamer_read_uhwi (ib);
2755 for (unsigned j = 0; j < access_count; j++)
2756 {
2757 param_access *acc = ggc_cleared_alloc<param_access> ();
2758 acc->type = stream_read_tree (ib, data_in);
2759 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2760 acc->unit_offset = streamer_read_uhwi (ib);
2761 acc->unit_size = streamer_read_uhwi (ib);
2762 bitpack_d bp = streamer_read_bitpack (ib);
2763 acc->certain = bp_unpack_value (&bp, 1);
2764 acc->reverse = bp_unpack_value (&bp, 1);
2765 vec_safe_push (desc->accesses, acc);
2766 }
2767 desc->param_size_limit = streamer_read_uhwi (ib);
2768 desc->size_reached = streamer_read_uhwi (ib);
2769 bitpack_d bp = streamer_read_bitpack (ib);
2770 desc->locally_unused = bp_unpack_value (&bp, 1);
2771 desc->split_candidate = bp_unpack_value (&bp, 1);
2772 desc->by_ref = bp_unpack_value (&bp, 1);
2773 }
2774 bitpack_d bp = streamer_read_bitpack (ib);
2775 ifs->m_candidate = bp_unpack_value (&bp, 1);
2776 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2777 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2778 ifs->m_queued = 0;
2779
2780 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2781 isra_read_edge_summary (ib, e);
2782 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2783 isra_read_edge_summary (ib, e);
2784 }
2785
2786 /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2787 data DATA. TODO: This function was copied almost verbatim from ipa-prop.c,
2788 it should be possible to unify them somehow. */
2789
2790 static void
isra_read_summary_section(struct lto_file_decl_data * file_data,const char * data,size_t len)2791 isra_read_summary_section (struct lto_file_decl_data *file_data,
2792 const char *data, size_t len)
2793 {
2794 const struct lto_function_header *header =
2795 (const struct lto_function_header *) data;
2796 const int cfg_offset = sizeof (struct lto_function_header);
2797 const int main_offset = cfg_offset + header->cfg_size;
2798 const int string_offset = main_offset + header->main_size;
2799 struct data_in *data_in;
2800 unsigned int i;
2801 unsigned int count;
2802
2803 lto_input_block ib_main ((const char *) data + main_offset,
2804 header->main_size, file_data->mode_table);
2805
2806 data_in =
2807 lto_data_in_create (file_data, (const char *) data + string_offset,
2808 header->string_size, vNULL);
2809 count = streamer_read_uhwi (&ib_main);
2810
2811 for (i = 0; i < count; i++)
2812 {
2813 unsigned int index;
2814 struct cgraph_node *node;
2815 lto_symtab_encoder_t encoder;
2816
2817 index = streamer_read_uhwi (&ib_main);
2818 encoder = file_data->symtab_node_encoder;
2819 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2820 index));
2821 gcc_assert (node->definition);
2822 isra_read_node_info (&ib_main, node, data_in);
2823 }
2824 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2825 len);
2826 lto_data_in_delete (data_in);
2827 }
2828
2829 /* Read intraprocedural analysis information into a stream for LTO WPA. */
2830
2831 static void
ipa_sra_read_summary(void)2832 ipa_sra_read_summary (void)
2833 {
2834 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2835 struct lto_file_decl_data *file_data;
2836 unsigned int j = 0;
2837
2838 gcc_checking_assert (!func_sums);
2839 gcc_checking_assert (!call_sums);
2840 func_sums
2841 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2842 ipa_sra_function_summaries (symtab, true));
2843 call_sums = new ipa_sra_call_summaries (symtab);
2844
2845 while ((file_data = file_data_vec[j++]))
2846 {
2847 size_t len;
2848 const char *data
2849 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
2850 if (data)
2851 isra_read_summary_section (file_data, data, len);
2852 }
2853 }
2854
2855 /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */
2856
2857 static void
ipa_sra_dump_all_summaries(FILE * f)2858 ipa_sra_dump_all_summaries (FILE *f)
2859 {
2860 cgraph_node *node;
2861 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2862 {
2863 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2864
2865 isra_func_summary *ifs = func_sums->get (node);
2866 if (!ifs)
2867 {
2868 fprintf (f, " Function does not have any associated IPA-SRA "
2869 "summary\n");
2870 continue;
2871 }
2872 if (!ifs->m_candidate)
2873 {
2874 fprintf (f, " Not a candidate function\n");
2875 continue;
2876 }
2877 if (ifs->m_returns_value)
2878 fprintf (f, " Returns value\n");
2879 if (vec_safe_is_empty (ifs->m_parameters))
2880 fprintf (f, " No parameter information. \n");
2881 else
2882 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
2883 {
2884 fprintf (f, " Descriptor for parameter %i:\n", i);
2885 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]);
2886 }
2887 fprintf (f, "\n");
2888
2889 struct cgraph_edge *cs;
2890 for (cs = node->callees; cs; cs = cs->next_callee)
2891 {
2892 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
2893 cs->callee->dump_name ());
2894 isra_call_summary *csum = call_sums->get (cs);
2895 if (csum)
2896 csum->dump (f);
2897 else
2898 fprintf (f, " Call summary is MISSING!\n");
2899 }
2900
2901 }
2902 fprintf (f, "\n\n");
2903 }
2904
2905 /* Perform function-scope viability tests that can be only made at IPA level
2906 and return false if the function is deemed unsuitable for IPA-SRA. */
2907
2908 static bool
ipa_sra_ipa_function_checks(cgraph_node * node)2909 ipa_sra_ipa_function_checks (cgraph_node *node)
2910 {
2911 if (!node->can_be_local_p ())
2912 {
2913 if (dump_file)
2914 fprintf (dump_file, "Function %s disqualified because it cannot be "
2915 "made local.\n", node->dump_name ());
2916 return false;
2917 }
2918 if (!node->can_change_signature)
2919 {
2920 if (dump_file)
2921 fprintf (dump_file, "Function can not change signature.\n");
2922 return false;
2923 }
2924
2925 return true;
2926 }
2927
2928 /* Issues found out by check_callers_for_issues. */
2929
2930 struct caller_issues
2931 {
2932 /* The candidate being considered. */
2933 cgraph_node *candidate;
2934 /* There is a thunk among callers. */
2935 bool thunk;
2936 /* Call site with no available information. */
2937 bool unknown_callsite;
2938 /* Call from outside the the candidate's comdat group. */
2939 bool call_from_outside_comdat;
2940 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
2941 bool bit_aligned_aggregate_argument;
2942 };
2943
2944 /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
2945 that apply. */
2946
2947 static bool
check_for_caller_issues(struct cgraph_node * node,void * data)2948 check_for_caller_issues (struct cgraph_node *node, void *data)
2949 {
2950 struct caller_issues *issues = (struct caller_issues *) data;
2951
2952 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
2953 {
2954 if (cs->caller->thunk.thunk_p)
2955 {
2956 issues->thunk = true;
2957 /* TODO: We should be able to process at least some types of
2958 thunks. */
2959 return true;
2960 }
2961 if (issues->candidate->calls_comdat_local
2962 && issues->candidate->same_comdat_group
2963 && !issues->candidate->in_same_comdat_group_p (cs->caller))
2964 {
2965 issues->call_from_outside_comdat = true;
2966 return true;
2967 }
2968
2969 isra_call_summary *csum = call_sums->get (cs);
2970 if (!csum)
2971 {
2972 issues->unknown_callsite = true;
2973 return true;
2974 }
2975
2976 if (csum->m_bit_aligned_arg)
2977 issues->bit_aligned_aggregate_argument = true;
2978 }
2979 return false;
2980 }
2981
2982 /* Look at all incoming edges to NODE, including aliases and thunks and look
2983 for problems. Return true if NODE type should not be modified at all. */
2984
2985 static bool
check_all_callers_for_issues(cgraph_node * node)2986 check_all_callers_for_issues (cgraph_node *node)
2987 {
2988 struct caller_issues issues;
2989 memset (&issues, 0, sizeof (issues));
2990 issues.candidate = node;
2991
2992 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
2993 if (issues.unknown_callsite)
2994 {
2995 if (dump_file && (dump_flags & TDF_DETAILS))
2996 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
2997 "all modifications.\n", node->dump_name ());
2998 return true;
2999 }
3000 /* TODO: We should be able to process at least some types of thunks. */
3001 if (issues.thunk)
3002 {
3003 if (dump_file && (dump_flags & TDF_DETAILS))
3004 fprintf (dump_file, "A call of %s is through thunk, which are not"
3005 " handled yet. Disabling all modifications.\n",
3006 node->dump_name ());
3007 return true;
3008 }
3009 if (issues.call_from_outside_comdat)
3010 {
3011 if (dump_file)
3012 fprintf (dump_file, "Function would become private comdat called "
3013 "outside of its comdat group.\n");
3014 return true;
3015 }
3016
3017 if (issues.bit_aligned_aggregate_argument)
3018 {
3019 /* Let's only remove parameters/return values from such functions.
3020 TODO: We could only prevent splitting the problematic parameters if
3021 anybody thinks it is worth it. */
3022 if (dump_file && (dump_flags & TDF_DETAILS))
3023 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
3024 " disabling parameter splitting.\n", node->dump_name ());
3025
3026 isra_func_summary *ifs = func_sums->get (node);
3027 gcc_checking_assert (ifs);
3028 unsigned param_count = vec_safe_length (ifs->m_parameters);
3029 for (unsigned i = 0; i < param_count; i++)
3030 (*ifs->m_parameters)[i].split_candidate = false;
3031 }
3032 return false;
3033 }
3034
3035 /* Find the access with corresponding OFFSET and SIZE among accesses in
3036 PARAM_DESC and return it or NULL if such an access is not there. */
3037
3038 static param_access *
find_param_access(isra_param_desc * param_desc,unsigned offset,unsigned size)3039 find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3040 {
3041 unsigned pclen = vec_safe_length (param_desc->accesses);
3042
3043 /* The search is linear but the number of stored accesses is bound by
3044 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3045
3046 for (unsigned i = 0; i < pclen; i++)
3047 if ((*param_desc->accesses)[i]->unit_offset == offset
3048 && (*param_desc->accesses)[i]->unit_size == size)
3049 return (*param_desc->accesses)[i];
3050
3051 return NULL;
3052 }
3053
3054 /* Return iff the total size of definite replacement SIZE would violate the
3055 limit set for it in PARAM. */
3056
3057 static bool
size_would_violate_limit_p(isra_param_desc * desc,unsigned size)3058 size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3059 {
3060 unsigned limit = desc->param_size_limit;
3061 if (size > limit
3062 || (!desc->by_ref && size == limit))
3063 return true;
3064 return false;
3065 }
3066
3067 /* Increase reached size of DESC by SIZE or disqualify it if it would violate
3068 the set limit. IDX is the parameter number which is dumped when
3069 disqualifying. */
3070
3071 static void
bump_reached_size(isra_param_desc * desc,unsigned size,unsigned idx)3072 bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3073 {
3074 unsigned after = desc->size_reached + size;
3075 if (size_would_violate_limit_p (desc, after))
3076 {
3077 if (dump_file && (dump_flags & TDF_DETAILS))
3078 fprintf (dump_file, " ...size limit reached, disqualifying "
3079 "candidate parameter %u\n", idx);
3080 desc->split_candidate = false;
3081 return;
3082 }
3083 desc->size_reached = after;
3084 }
3085
3086 /* Take all actions required to deal with an edge CS that represents a call to
3087 an unknown or un-analyzed function, for both parameter removal and
3088 splitting. */
3089
3090 static void
process_edge_to_unknown_caller(cgraph_edge * cs)3091 process_edge_to_unknown_caller (cgraph_edge *cs)
3092 {
3093 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3094 gcc_checking_assert (from_ifs);
3095 isra_call_summary *csum = call_sums->get (cs);
3096
3097 if (dump_file && (dump_flags & TDF_DETAILS))
3098 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3099 cs->caller->dump_name ());
3100
3101 unsigned args_count = csum->m_arg_flow.length ();
3102 for (unsigned i = 0; i < args_count; i++)
3103 {
3104 isra_param_flow *ipf = &csum->m_arg_flow[i];
3105
3106 if (ipf->pointer_pass_through)
3107 {
3108 isra_param_desc *param_desc
3109 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3110 param_desc->locally_unused = false;
3111 param_desc->split_candidate = false;
3112 continue;
3113 }
3114 if (ipf->aggregate_pass_through)
3115 {
3116 unsigned idx = get_single_param_flow_source (ipf);
3117 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3118
3119 param_desc->locally_unused = false;
3120 if (!param_desc->split_candidate)
3121 continue;
3122 gcc_assert (!param_desc->by_ref);
3123 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3124 ipf->unit_size);
3125 gcc_checking_assert (pacc);
3126 pacc->certain = true;
3127 if (overlapping_certain_accesses_p (param_desc, NULL))
3128 {
3129 if (dump_file && (dump_flags & TDF_DETAILS))
3130 fprintf (dump_file, " ...leading to overlap, "
3131 " disqualifying candidate parameter %u\n",
3132 idx);
3133 param_desc->split_candidate = false;
3134 }
3135 else
3136 bump_reached_size (param_desc, pacc->unit_size, idx);
3137 ipf->aggregate_pass_through = false;
3138 continue;
3139 }
3140
3141 for (int j = 0; j < ipf->length; j++)
3142 {
3143 int input_idx = ipf->inputs[j];
3144 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3145 }
3146 }
3147 }
3148
3149 /* Propagate parameter removal information through cross-SCC edge CS,
3150 i.e. decrease the use count in the caller parameter descriptor for each use
3151 in this call. */
3152
3153 static void
param_removal_cross_scc_edge(cgraph_edge * cs)3154 param_removal_cross_scc_edge (cgraph_edge *cs)
3155 {
3156 enum availability availability;
3157 cgraph_node *callee = cs->callee->function_symbol (&availability);
3158 isra_func_summary *to_ifs = func_sums->get (callee);
3159 if (!to_ifs || !to_ifs->m_candidate
3160 || (availability < AVAIL_AVAILABLE)
3161 || vec_safe_is_empty (to_ifs->m_parameters))
3162 {
3163 process_edge_to_unknown_caller (cs);
3164 return;
3165 }
3166 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3167 gcc_checking_assert (from_ifs);
3168
3169 isra_call_summary *csum = call_sums->get (cs);
3170 unsigned args_count = csum->m_arg_flow.length ();
3171 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3172
3173 for (unsigned i = 0; i < args_count; i++)
3174 {
3175 bool unused_in_callee;
3176 if (i < param_count)
3177 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3178 else
3179 unused_in_callee = false;
3180
3181 if (!unused_in_callee)
3182 {
3183 isra_param_flow *ipf = &csum->m_arg_flow[i];
3184 for (int j = 0; j < ipf->length; j++)
3185 {
3186 int input_idx = ipf->inputs[j];
3187 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3188 }
3189 }
3190 }
3191 }
3192
3193 /* Unless it is already there, push NODE which is also described by IFS to
3194 STACK. */
3195
3196 static void
isra_push_node_to_stack(cgraph_node * node,isra_func_summary * ifs,vec<cgraph_node * > * stack)3197 isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3198 vec<cgraph_node *> *stack)
3199 {
3200 if (!ifs->m_queued)
3201 {
3202 ifs->m_queued = true;
3203 stack->safe_push (node);
3204 }
3205 }
3206
3207 /* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3208 used and push CALLER on STACK. */
3209
3210 static void
isra_mark_caller_param_used(isra_func_summary * from_ifs,int input_idx,cgraph_node * caller,vec<cgraph_node * > * stack)3211 isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3212 cgraph_node *caller, vec<cgraph_node *> *stack)
3213 {
3214 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3215 {
3216 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3217 isra_push_node_to_stack (caller, from_ifs, stack);
3218 }
3219 }
3220
3221
3222 /* Propagate information that any parameter is not used only locally within a
3223 SCC across CS to the caller, which must be in the same SCC as the
3224 callee. Push any callers that need to be re-processed to STACK. */
3225
3226 static void
propagate_used_across_scc_edge(cgraph_edge * cs,vec<cgraph_node * > * stack)3227 propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3228 {
3229 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3230 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3231 return;
3232
3233 isra_call_summary *csum = call_sums->get (cs);
3234 gcc_checking_assert (csum);
3235 unsigned args_count = csum->m_arg_flow.length ();
3236 enum availability availability;
3237 cgraph_node *callee = cs->callee->function_symbol (&availability);
3238 isra_func_summary *to_ifs = func_sums->get (callee);
3239
3240 unsigned param_count
3241 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3242 ? vec_safe_length (to_ifs->m_parameters) : 0;
3243 for (unsigned i = 0; i < args_count; i++)
3244 {
3245 if (i < param_count
3246 && (*to_ifs->m_parameters)[i].locally_unused)
3247 continue;
3248
3249 /* The argument is needed in the callee it, we must mark the parameter as
3250 used also in the caller and its callers within this SCC. */
3251 isra_param_flow *ipf = &csum->m_arg_flow[i];
3252 for (int j = 0; j < ipf->length; j++)
3253 {
3254 int input_idx = ipf->inputs[j];
3255 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3256 }
3257 }
3258 }
3259
3260 /* Propagate information that any parameter is not used only locally within a
3261 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3262 same SCC. Push any callers that need to be re-processed to STACK. */
3263
3264 static bool
propagate_used_to_scc_callers(cgraph_node * node,void * data)3265 propagate_used_to_scc_callers (cgraph_node *node, void *data)
3266 {
3267 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3268 cgraph_edge *cs;
3269 for (cs = node->callers; cs; cs = cs->next_caller)
3270 if (ipa_edge_within_scc (cs))
3271 propagate_used_across_scc_edge (cs, stack);
3272 return false;
3273 }
3274
3275 /* Return true iff all certain accesses in ARG_DESC are also present as
3276 certain accesses in PARAM_DESC. */
3277
3278 static bool
all_callee_accesses_present_p(isra_param_desc * param_desc,isra_param_desc * arg_desc)3279 all_callee_accesses_present_p (isra_param_desc *param_desc,
3280 isra_param_desc *arg_desc)
3281 {
3282 unsigned aclen = vec_safe_length (arg_desc->accesses);
3283 for (unsigned j = 0; j < aclen; j++)
3284 {
3285 param_access *argacc = (*arg_desc->accesses)[j];
3286 if (!argacc->certain)
3287 continue;
3288 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3289 argacc->unit_size);
3290 if (!pacc
3291 || !pacc->certain
3292 || !types_compatible_p (argacc->type, pacc->type))
3293 return false;
3294 }
3295 return true;
3296 }
3297
3298 /* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3299 does not allow instantiating an auto_vec with a type defined within a
3300 function so it is a global type. */
3301 enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3302
3303
3304 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3305 (which belongs to CALLER) if they would not violate some constraint there.
3306 If successful, return NULL, otherwise return the string reason for failure
3307 (which can be written to the dump file). DELTA_OFFSET is the known offset
3308 of the actual argument withing the formal parameter (so of ARG_DESCS within
3309 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3310 known. In case of success, set *CHANGE_P to true if propagation actually
3311 changed anything. */
3312
3313 static const char *
pull_accesses_from_callee(cgraph_node * caller,isra_param_desc * param_desc,isra_param_desc * arg_desc,unsigned delta_offset,unsigned arg_size,bool * change_p)3314 pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3315 isra_param_desc *arg_desc,
3316 unsigned delta_offset, unsigned arg_size,
3317 bool *change_p)
3318 {
3319 unsigned pclen = vec_safe_length (param_desc->accesses);
3320 unsigned aclen = vec_safe_length (arg_desc->accesses);
3321 unsigned prop_count = 0;
3322 unsigned prop_size = 0;
3323 bool change = false;
3324
3325 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3326 for (unsigned j = 0; j < aclen; j++)
3327 {
3328 param_access *argacc = (*arg_desc->accesses)[j];
3329 prop_kinds.safe_push (ACC_PROP_DONT);
3330
3331 if (arg_size > 0
3332 && argacc->unit_offset + argacc->unit_size > arg_size)
3333 return "callee access outsize size boundary";
3334
3335 if (!argacc->certain)
3336 continue;
3337
3338 unsigned offset = argacc->unit_offset + delta_offset;
3339 /* Given that accesses are initially stored according to increasing
3340 offset and decreasing size in case of equal offsets, the following
3341 searches could be written more efficiently if we kept the ordering
3342 when copying. But the number of accesses is capped at
3343 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3344 messy quickly, so let's improve on that only if necessary. */
3345
3346 bool exact_match = false;
3347 for (unsigned i = 0; i < pclen; i++)
3348 {
3349 /* Check for overlaps. */
3350 param_access *pacc = (*param_desc->accesses)[i];
3351 if (pacc->unit_offset == offset
3352 && pacc->unit_size == argacc->unit_size)
3353 {
3354 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3355 || !types_compatible_p (argacc->type, pacc->type)
3356 || argacc->reverse != pacc->reverse)
3357 return "propagated access types would not match existing ones";
3358
3359 exact_match = true;
3360 if (!pacc->certain)
3361 {
3362 prop_kinds[j] = ACC_PROP_CERTAIN;
3363 prop_size += argacc->unit_size;
3364 change = true;
3365 }
3366 continue;
3367 }
3368
3369 if (offset < pacc->unit_offset + pacc->unit_size
3370 && offset + argacc->unit_size > pacc->unit_offset)
3371 {
3372 /* None permissible with load accesses, possible to fit into
3373 argument ones. */
3374 if (pacc->certain
3375 || offset < pacc->unit_offset
3376 || (offset + argacc->unit_size
3377 > pacc->unit_offset + pacc->unit_size))
3378 return "a propagated access would conflict in caller";
3379 }
3380 }
3381
3382 if (!exact_match)
3383 {
3384 prop_kinds[j] = ACC_PROP_COPY;
3385 prop_count++;
3386 prop_size += argacc->unit_size;
3387 change = true;
3388 }
3389 }
3390
3391 if (!change)
3392 return NULL;
3393
3394 if ((prop_count + pclen
3395 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3396 || size_would_violate_limit_p (param_desc,
3397 param_desc->size_reached + prop_size))
3398 return "propagating accesses would violate the count or size limit";
3399
3400 *change_p = true;
3401 for (unsigned j = 0; j < aclen; j++)
3402 {
3403 if (prop_kinds[j] == ACC_PROP_COPY)
3404 {
3405 param_access *argacc = (*arg_desc->accesses)[j];
3406
3407 param_access *copy = ggc_cleared_alloc<param_access> ();
3408 copy->unit_offset = argacc->unit_offset + delta_offset;
3409 copy->unit_size = argacc->unit_size;
3410 copy->type = argacc->type;
3411 copy->alias_ptr_type = argacc->alias_ptr_type;
3412 copy->certain = true;
3413 copy->reverse = argacc->reverse;
3414 vec_safe_push (param_desc->accesses, copy);
3415 }
3416 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3417 {
3418 param_access *argacc = (*arg_desc->accesses)[j];
3419 param_access *csp
3420 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3421 argacc->unit_size);
3422 csp->certain = true;
3423 }
3424 }
3425
3426 param_desc->size_reached += prop_size;
3427
3428 return NULL;
3429 }
3430
3431 /* Propagate parameter splitting information through call graph edge CS.
3432 Return true if any changes that might need to be propagated within SCCs have
3433 been made. The function also clears the aggregate_pass_through and
3434 pointer_pass_through in call summaries which do not need to be processed
3435 again if this CS is revisited when iterating while changes are propagated
3436 within an SCC. */
3437
3438 static bool
param_splitting_across_edge(cgraph_edge * cs)3439 param_splitting_across_edge (cgraph_edge *cs)
3440 {
3441 bool res = false;
3442 bool cross_scc = !ipa_edge_within_scc (cs);
3443 enum availability availability;
3444 cgraph_node *callee = cs->callee->function_symbol (&availability);
3445 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3446 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3447
3448 isra_call_summary *csum = call_sums->get (cs);
3449 gcc_checking_assert (csum);
3450 unsigned args_count = csum->m_arg_flow.length ();
3451 isra_func_summary *to_ifs = func_sums->get (callee);
3452 unsigned param_count
3453 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3454 ? vec_safe_length (to_ifs->m_parameters)
3455 : 0);
3456
3457 if (dump_file && (dump_flags & TDF_DETAILS))
3458 fprintf (dump_file, "Splitting across %s->%s:\n",
3459 cs->caller->dump_name (), callee->dump_name ());
3460
3461 unsigned i;
3462 for (i = 0; (i < args_count) && (i < param_count); i++)
3463 {
3464 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3465 isra_param_flow *ipf = &csum->m_arg_flow[i];
3466
3467 if (arg_desc->locally_unused)
3468 {
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, " ->%u: unused in callee\n", i);
3471 ipf->pointer_pass_through = false;
3472 continue;
3473 }
3474
3475 if (ipf->pointer_pass_through)
3476 {
3477 int idx = get_single_param_flow_source (ipf);
3478 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3479 if (!param_desc->split_candidate)
3480 continue;
3481 gcc_assert (param_desc->by_ref);
3482
3483 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3484 {
3485 if (dump_file && (dump_flags & TDF_DETAILS))
3486 fprintf (dump_file, " %u->%u: not candidate or not by "
3487 "reference in callee\n", idx, i);
3488 param_desc->split_candidate = false;
3489 ipf->pointer_pass_through = false;
3490 res = true;
3491 }
3492 else if (!ipf->safe_to_import_accesses)
3493 {
3494 if (!csum->m_before_any_store
3495 || !all_callee_accesses_present_p (param_desc, arg_desc))
3496 {
3497 if (dump_file && (dump_flags & TDF_DETAILS))
3498 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3499 idx, i);
3500 param_desc->split_candidate = false;
3501 ipf->pointer_pass_through = false;
3502 res = true;
3503
3504 }
3505 else
3506 {
3507 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file, " %u->%u: verified callee accesses "
3509 "present.\n", idx, i);
3510 if (cross_scc)
3511 ipf->pointer_pass_through = false;
3512 }
3513 }
3514 else
3515 {
3516 const char *pull_failure
3517 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3518 0, 0, &res);
3519 if (pull_failure)
3520 {
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3522 fprintf (dump_file, " %u->%u: by_ref access pull "
3523 "failed: %s.\n", idx, i, pull_failure);
3524 param_desc->split_candidate = false;
3525 ipf->pointer_pass_through = false;
3526 res = true;
3527 }
3528 else
3529 {
3530 if (dump_file && (dump_flags & TDF_DETAILS))
3531 fprintf (dump_file, " %u->%u: by_ref access pull "
3532 "succeeded.\n", idx, i);
3533 if (cross_scc)
3534 ipf->pointer_pass_through = false;
3535 }
3536 }
3537 }
3538 else if (ipf->aggregate_pass_through)
3539 {
3540 int idx = get_single_param_flow_source (ipf);
3541 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3542 if (!param_desc->split_candidate)
3543 continue;
3544 gcc_assert (!param_desc->by_ref);
3545 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3546 ipf->unit_size);
3547 gcc_checking_assert (pacc);
3548
3549 if (pacc->certain)
3550 {
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3552 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3553 ipf->aggregate_pass_through = false;
3554 }
3555 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3556 {
3557 if (dump_file && (dump_flags & TDF_DETAILS))
3558 fprintf (dump_file, " %u->%u: not candidate or by "
3559 "reference in callee\n", idx, i);
3560
3561 pacc->certain = true;
3562 if (overlapping_certain_accesses_p (param_desc, NULL))
3563 {
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3565 fprintf (dump_file, " ...leading to overlap, "
3566 " disqualifying candidate parameter %u\n",
3567 idx);
3568 param_desc->split_candidate = false;
3569 }
3570 else
3571 bump_reached_size (param_desc, pacc->unit_size, idx);
3572
3573 ipf->aggregate_pass_through = false;
3574 res = true;
3575 }
3576 else
3577 {
3578 const char *pull_failure
3579 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3580 ipf->unit_offset,
3581 ipf->unit_size, &res);
3582 if (pull_failure)
3583 {
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3585 fprintf (dump_file, " %u->%u: arg access pull "
3586 "failed: %s.\n", idx, i, pull_failure);
3587
3588 ipf->aggregate_pass_through = false;
3589 pacc->certain = true;
3590
3591 if (overlapping_certain_accesses_p (param_desc, NULL))
3592 {
3593 if (dump_file && (dump_flags & TDF_DETAILS))
3594 fprintf (dump_file, " ...leading to overlap, "
3595 " disqualifying candidate parameter %u\n",
3596 idx);
3597 param_desc->split_candidate = false;
3598 }
3599 else
3600 bump_reached_size (param_desc, pacc->unit_size, idx);
3601
3602 res = true;
3603 }
3604 else
3605 {
3606 if (dump_file && (dump_flags & TDF_DETAILS))
3607 fprintf (dump_file, " %u->%u: arg access pull "
3608 "succeeded.\n", idx, i);
3609 if (cross_scc)
3610 ipf->aggregate_pass_through = false;
3611 }
3612 }
3613 }
3614 }
3615
3616 /* Handle argument-parameter count mismatches. */
3617 for (; (i < args_count); i++)
3618 {
3619 isra_param_flow *ipf = &csum->m_arg_flow[i];
3620
3621 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3622 {
3623 int idx = get_single_param_flow_source (ipf);
3624 ipf->pointer_pass_through = false;
3625 ipf->aggregate_pass_through = false;
3626 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3627 if (!param_desc->split_candidate)
3628 continue;
3629
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3631 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3632 idx, i);
3633 param_desc->split_candidate = false;
3634 res = true;
3635 }
3636 }
3637 return res;
3638 }
3639
3640 /* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3641 callers ignore the return value, or come from the same SCC and use the
3642 return value only to compute their return value, return false, otherwise
3643 return true. */
3644
3645 static bool
retval_used_p(cgraph_node * node,void *)3646 retval_used_p (cgraph_node *node, void *)
3647 {
3648 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3649 {
3650 isra_call_summary *csum = call_sums->get (cs);
3651 gcc_checking_assert (csum);
3652 if (csum->m_return_ignored)
3653 continue;
3654 if (!csum->m_return_returned)
3655 return true;
3656
3657 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3658 if (!from_ifs || !from_ifs->m_candidate)
3659 return true;
3660
3661 if (!ipa_edge_within_scc (cs)
3662 && !from_ifs->m_return_ignored)
3663 return true;
3664 }
3665
3666 return false;
3667 }
3668
3669 /* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3670 modify parameter which originally had index BASE_INDEX, in the adjustment
3671 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
3672 PREV_ADJUSTMENT. If the parent clone is the original function,
3673 PREV_ADJUSTMENT is NULL and PREV_CLONE_INDEX is equal to BASE_INDEX. */
3674
3675 static void
push_param_adjustments_for_index(isra_func_summary * ifs,unsigned base_index,unsigned prev_clone_index,ipa_adjusted_param * prev_adjustment,vec<ipa_adjusted_param,va_gc> ** new_params)3676 push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3677 unsigned prev_clone_index,
3678 ipa_adjusted_param *prev_adjustment,
3679 vec<ipa_adjusted_param, va_gc> **new_params)
3680 {
3681 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3682 if (desc->locally_unused)
3683 {
3684 if (dump_file)
3685 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3686 return;
3687 }
3688
3689 if (!desc->split_candidate)
3690 {
3691 ipa_adjusted_param adj;
3692 if (prev_adjustment)
3693 {
3694 adj = *prev_adjustment;
3695 adj.prev_clone_adjustment = true;
3696 adj.prev_clone_index = prev_clone_index;
3697 }
3698 else
3699 {
3700 memset (&adj, 0, sizeof (adj));
3701 adj.op = IPA_PARAM_OP_COPY;
3702 adj.base_index = base_index;
3703 adj.prev_clone_index = prev_clone_index;
3704 }
3705 vec_safe_push ((*new_params), adj);
3706 return;
3707 }
3708
3709 if (dump_file)
3710 fprintf (dump_file, " Will split parameter %u\n", base_index);
3711
3712 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3713 unsigned aclen = vec_safe_length (desc->accesses);
3714 for (unsigned j = 0; j < aclen; j++)
3715 {
3716 param_access *pa = (*desc->accesses)[j];
3717 if (!pa->certain)
3718 continue;
3719 if (dump_file)
3720 fprintf (dump_file, " - component at byte offset %u, "
3721 "size %u\n", pa->unit_offset, pa->unit_size);
3722
3723 ipa_adjusted_param adj;
3724 memset (&adj, 0, sizeof (adj));
3725 adj.op = IPA_PARAM_OP_SPLIT;
3726 adj.base_index = base_index;
3727 adj.prev_clone_index = prev_clone_index;
3728 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3729 adj.reverse = pa->reverse;
3730 adj.type = pa->type;
3731 adj.alias_ptr_type = pa->alias_ptr_type;
3732 adj.unit_offset = pa->unit_offset;
3733 vec_safe_push ((*new_params), adj);
3734 }
3735 }
3736
3737 /* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
3738 flag of all callers of NODE. */
3739
3740 static bool
mark_callers_calls_comdat_local(struct cgraph_node * node,void *)3741 mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
3742 {
3743 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3744 cs->caller->calls_comdat_local = true;
3745 return false;
3746 }
3747
3748
3749 /* Do final processing of results of IPA propagation regarding NODE, clone it
3750 if appropriate. */
3751
3752 static void
process_isra_node_results(cgraph_node * node,hash_map<const char *,unsigned> * clone_num_suffixes)3753 process_isra_node_results (cgraph_node *node,
3754 hash_map<const char *, unsigned> *clone_num_suffixes)
3755 {
3756 isra_func_summary *ifs = func_sums->get (node);
3757 if (!ifs || !ifs->m_candidate)
3758 return;
3759
3760 auto_vec<bool, 16> surviving_params;
3761 bool check_surviving = false;
3762 if (node->clone.param_adjustments)
3763 {
3764 check_surviving = true;
3765 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3766 }
3767
3768 unsigned param_count = vec_safe_length (ifs->m_parameters);
3769 bool will_change_function = false;
3770 if (ifs->m_returns_value && ifs->m_return_ignored)
3771 will_change_function = true;
3772 else
3773 for (unsigned i = 0; i < param_count; i++)
3774 {
3775 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3776 if ((desc->locally_unused || desc->split_candidate)
3777 /* Make sure we do not clone just to attempt to remove an already
3778 removed unused argument. */
3779 && (!check_surviving
3780 || (i < surviving_params.length ()
3781 && surviving_params[i])))
3782 {
3783 will_change_function = true;
3784 break;
3785 }
3786 }
3787 if (!will_change_function)
3788 return;
3789
3790 if (dump_file)
3791 {
3792 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
3793 node->dump_name ());
3794 if (ifs->m_returns_value && ifs->m_return_ignored)
3795 fprintf (dump_file, " Will remove return value.\n");
3796 }
3797
3798 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
3799 if (ipa_param_adjustments *old_adjustments = node->clone.param_adjustments)
3800 {
3801 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
3802 for (unsigned i = 0; i < old_adj_len; i++)
3803 {
3804 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
3805 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
3806 old_adj, &new_params);
3807 }
3808 }
3809 else
3810 for (unsigned i = 0; i < param_count; i++)
3811 push_param_adjustments_for_index (ifs, i, i, NULL, &new_params);
3812
3813 ipa_param_adjustments *new_adjustments
3814 = (new (ggc_alloc <ipa_param_adjustments> ())
3815 ipa_param_adjustments (new_params, param_count,
3816 ifs->m_returns_value && ifs->m_return_ignored));
3817
3818 if (dump_file && (dump_flags & TDF_DETAILS))
3819 {
3820 fprintf (dump_file, "\n Created adjustments:\n");
3821 new_adjustments->dump (dump_file);
3822 }
3823
3824 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
3825 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
3826 node->decl)));
3827 vec<cgraph_edge *> callers = node->collect_callers ();
3828 cgraph_node *new_node
3829 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
3830 suffix_counter);
3831 suffix_counter++;
3832 if (node->calls_comdat_local && node->same_comdat_group)
3833 {
3834 new_node->add_to_same_comdat_group (node);
3835 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
3836 NULL, true);
3837 }
3838 new_node->calls_comdat_local = node->calls_comdat_local;
3839
3840 if (dump_file)
3841 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
3842 callers.release ();
3843 }
3844
3845 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
3846 and disable transformations for those which have not or which should not
3847 transformed because the associated debug counter reached its limit. Return
3848 true if none survived or if there were no candidates to begin with. */
3849
3850 static bool
disable_unavailable_parameters(cgraph_node * node,isra_func_summary * ifs)3851 disable_unavailable_parameters (cgraph_node *node, isra_func_summary *ifs)
3852 {
3853 bool ret = true;
3854 unsigned len = vec_safe_length (ifs->m_parameters);
3855 if (!len)
3856 return true;
3857
3858 auto_vec<bool, 16> surviving_params;
3859 bool check_surviving = false;
3860 if (node->clone.param_adjustments)
3861 {
3862 check_surviving = true;
3863 node->clone.param_adjustments->get_surviving_params (&surviving_params);
3864 }
3865 bool dumped_first = false;
3866 for (unsigned i = 0; i < len; i++)
3867 {
3868 isra_param_desc *desc = &(*ifs->m_parameters)[i];
3869 if (!dbg_cnt (ipa_sra_params))
3870 {
3871 desc->locally_unused = false;
3872 desc->split_candidate = false;
3873 }
3874 else if (check_surviving
3875 && (i >= surviving_params.length ()
3876 || !surviving_params[i]))
3877 {
3878 /* Even if the parameter was removed by a previous IPA pass, we do
3879 not clear locally_unused because if it really is unused, this
3880 information might be useful in callers. */
3881 desc->split_candidate = false;
3882
3883 if (dump_file && (dump_flags & TDF_DETAILS))
3884 {
3885 if (!dumped_first)
3886 {
3887 fprintf (dump_file,
3888 "The following parameters of %s are dead on "
3889 "arrival:", node->dump_name ());
3890 dumped_first = true;
3891 }
3892 fprintf (dump_file, " %u", i);
3893 }
3894 }
3895 else if (desc->locally_unused || desc->split_candidate)
3896 ret = false;
3897 }
3898
3899 if (dumped_first)
3900 fprintf (dump_file, "\n");
3901
3902 return ret;
3903 }
3904
3905
3906 /* Run the interprocedural part of IPA-SRA. */
3907
3908 static unsigned int
ipa_sra_analysis(void)3909 ipa_sra_analysis (void)
3910 {
3911 if (dump_file)
3912 {
3913 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
3914 ipa_sra_dump_all_summaries (dump_file);
3915 }
3916
3917 gcc_checking_assert (func_sums);
3918 gcc_checking_assert (call_sums);
3919 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
3920 auto_vec <cgraph_node *, 16> stack;
3921 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
3922
3923 /* One sweep from callees to callers for parameter removal and splitting. */
3924 for (int i = 0; i < node_scc_count; i++)
3925 {
3926 cgraph_node *scc_rep = order[i];
3927 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
3928 unsigned j;
3929
3930 /* Preliminary IPA function level checks and first step of parameter
3931 removal. */
3932 cgraph_node *v;
3933 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3934 {
3935 isra_func_summary *ifs = func_sums->get (v);
3936 if (!ifs || !ifs->m_candidate)
3937 continue;
3938 if (!ipa_sra_ipa_function_checks (v)
3939 || check_all_callers_for_issues (v))
3940 {
3941 ifs->zap ();
3942 continue;
3943 }
3944 if (disable_unavailable_parameters (v, ifs))
3945 continue;
3946 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
3947 process_edge_to_unknown_caller (cs);
3948 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3949 if (!ipa_edge_within_scc (cs))
3950 param_removal_cross_scc_edge (cs);
3951 }
3952
3953 /* Look at edges within the current SCC and propagate used-ness across
3954 them, pushing onto the stack all notes which might need to be
3955 revisited. */
3956 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3957 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3958 &stack, true);
3959
3960 /* Keep revisiting and pushing until nothing changes. */
3961 while (!stack.is_empty ())
3962 {
3963 cgraph_node *v = stack.pop ();
3964 isra_func_summary *ifs = func_sums->get (v);
3965 gcc_checking_assert (ifs && ifs->m_queued);
3966 ifs->m_queued = false;
3967
3968 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
3969 &stack, true);
3970 }
3971
3972 /* Parameter splitting. */
3973 bool repeat_scc_access_propagation;
3974 do
3975 {
3976 repeat_scc_access_propagation = false;
3977 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3978 {
3979 isra_func_summary *ifs = func_sums->get (v);
3980 if (!ifs
3981 || !ifs->m_candidate
3982 || vec_safe_is_empty (ifs->m_parameters))
3983 continue;
3984 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3985 if (param_splitting_across_edge (cs))
3986 repeat_scc_access_propagation = true;
3987 }
3988 }
3989 while (repeat_scc_access_propagation);
3990
3991 if (flag_checking)
3992 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3993 verify_splitting_accesses (v, true);
3994
3995 cycle_nodes.release ();
3996 }
3997
3998 /* One sweep from caller to callees for result removal. */
3999 for (int i = node_scc_count - 1; i >= 0 ; i--)
4000 {
4001 cgraph_node *scc_rep = order[i];
4002 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4003 unsigned j;
4004
4005 cgraph_node *v;
4006 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
4007 {
4008 isra_func_summary *ifs = func_sums->get (v);
4009 if (!ifs || !ifs->m_candidate)
4010 continue;
4011
4012 bool return_needed
4013 = (ifs->m_returns_value
4014 && (!dbg_cnt (ipa_sra_retvalues)
4015 || v->call_for_symbol_and_aliases (retval_used_p,
4016 NULL, true)));
4017 ifs->m_return_ignored = !return_needed;
4018 if (return_needed)
4019 isra_push_node_to_stack (v, ifs, &stack);
4020 }
4021
4022 while (!stack.is_empty ())
4023 {
4024 cgraph_node *node = stack.pop ();
4025 isra_func_summary *ifs = func_sums->get (node);
4026 gcc_checking_assert (ifs && ifs->m_queued);
4027 ifs->m_queued = false;
4028
4029 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4030 if (ipa_edge_within_scc (cs)
4031 && call_sums->get (cs)->m_return_returned)
4032 {
4033 enum availability av;
4034 cgraph_node *callee = cs->callee->function_symbol (&av);
4035 isra_func_summary *to_ifs = func_sums->get (callee);
4036 if (to_ifs && to_ifs->m_return_ignored)
4037 {
4038 to_ifs->m_return_ignored = false;
4039 isra_push_node_to_stack (callee, to_ifs, &stack);
4040 }
4041 }
4042 }
4043 cycle_nodes.release ();
4044 }
4045
4046 ipa_free_postorder_info ();
4047 free (order);
4048
4049 if (dump_file)
4050 {
4051 if (dump_flags & TDF_DETAILS)
4052 {
4053 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4054 " ==========\n");
4055 ipa_sra_dump_all_summaries (dump_file);
4056 }
4057 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4058 }
4059
4060 hash_map<const char *, unsigned> *clone_num_suffixes
4061 = new hash_map<const char *, unsigned>;
4062
4063 cgraph_node *node;
4064 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4065 process_isra_node_results (node, clone_num_suffixes);
4066
4067 delete clone_num_suffixes;
4068 ggc_delete (func_sums);
4069 func_sums = NULL;
4070 delete call_sums;
4071 call_sums = NULL;
4072
4073 if (dump_file)
4074 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4075 "==========\n\n");
4076 return 0;
4077 }
4078
4079
4080 const pass_data pass_data_ipa_sra =
4081 {
4082 IPA_PASS, /* type */
4083 "sra", /* name */
4084 OPTGROUP_NONE, /* optinfo_flags */
4085 TV_IPA_SRA, /* tv_id */
4086 0, /* properties_required */
4087 0, /* properties_provided */
4088 0, /* properties_destroyed */
4089 0, /* todo_flags_start */
4090 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4091 };
4092
4093 class pass_ipa_sra : public ipa_opt_pass_d
4094 {
4095 public:
pass_ipa_sra(gcc::context * ctxt)4096 pass_ipa_sra (gcc::context *ctxt)
4097 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4098 ipa_sra_generate_summary, /* generate_summary */
4099 ipa_sra_write_summary, /* write_summary */
4100 ipa_sra_read_summary, /* read_summary */
4101 NULL , /* write_optimization_summary */
4102 NULL, /* read_optimization_summary */
4103 NULL, /* stmt_fixup */
4104 0, /* function_transform_todo_flags_start */
4105 NULL, /* function_transform */
4106 NULL) /* variable_transform */
4107 {}
4108
4109 /* opt_pass methods: */
gate(function *)4110 virtual bool gate (function *)
4111 {
4112 /* TODO: We should remove the optimize check after we ensure we never run
4113 IPA passes when not optimizing. */
4114 return (flag_ipa_sra && optimize);
4115 }
4116
execute(function *)4117 virtual unsigned int execute (function *) { return ipa_sra_analysis (); }
4118
4119 }; // class pass_ipa_sra
4120
4121 } // anon namespace
4122
4123 ipa_opt_pass_d *
make_pass_ipa_sra(gcc::context * ctxt)4124 make_pass_ipa_sra (gcc::context *ctxt)
4125 {
4126 return new pass_ipa_sra (ctxt);
4127 }
4128
4129
4130 #include "gt-ipa-sra.h"
4131