1 /* Top-level LTO routines.
2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 Contributed by CodeSourcery, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "function.h"
26 #include "bitmap.h"
27 #include "basic-block.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "alloc-pool.h"
32 #include "tree-pass.h"
33 #include "tree-streamer.h"
34 #include "cgraph.h"
35 #include "opts.h"
36 #include "toplev.h"
37 #include "stor-layout.h"
38 #include "symbol-summary.h"
39 #include "tree-vrp.h"
40 #include "ipa-prop.h"
41 #include "debug.h"
42 #include "lto.h"
43 #include "lto-section-names.h"
44 #include "splay-tree.h"
45 #include "lto-partition.h"
46 #include "context.h"
47 #include "pass_manager.h"
48 #include "ipa-fnsummary.h"
49 #include "ipa-utils.h"
50 #include "gomp-constants.h"
51 #include "lto-symtab.h"
52 #include "stringpool.h"
53 #include "fold-const.h"
54 #include "attribs.h"
55 #include "builtins.h"
56 #include "lto-common.h"
57
58
59 /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver. */
60 static int lto_parallelism;
61
62 /* Return true when NODE has a clone that is analyzed (i.e. we need
63 to load its body even if the node itself is not needed). */
64
65 static bool
has_analyzed_clone_p(struct cgraph_node * node)66 has_analyzed_clone_p (struct cgraph_node *node)
67 {
68 struct cgraph_node *orig = node;
69 node = node->clones;
70 if (node)
71 while (node != orig)
72 {
73 if (node->analyzed)
74 return true;
75 if (node->clones)
76 node = node->clones;
77 else if (node->next_sibling_clone)
78 node = node->next_sibling_clone;
79 else
80 {
81 while (node != orig && !node->next_sibling_clone)
82 node = node->clone_of;
83 if (node != orig)
84 node = node->next_sibling_clone;
85 }
86 }
87 return false;
88 }
89
90 /* Read the function body for the function associated with NODE. */
91
92 static void
lto_materialize_function(struct cgraph_node * node)93 lto_materialize_function (struct cgraph_node *node)
94 {
95 tree decl;
96
97 decl = node->decl;
98 /* Read in functions with body (analyzed nodes)
99 and also functions that are needed to produce virtual clones. */
100 if ((node->has_gimple_body_p () && node->analyzed)
101 || node->used_as_abstract_origin
102 || has_analyzed_clone_p (node))
103 {
104 /* Clones don't need to be read. */
105 if (node->clone_of)
106 return;
107 if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
108 first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
109 /* If the file contains a function with a language specific EH
110 personality set or with EH enabled initialize the backend EH
111 machinery. */
112 if (DECL_FUNCTION_PERSONALITY (decl)
113 || opt_for_fn (decl, flag_exceptions))
114 lto_init_eh ();
115 }
116
117 /* Let the middle end know about the function. */
118 rest_of_decl_compilation (decl, 1, 0);
119 }
120
121 /* Materialize all the bodies for all the nodes in the callgraph. */
122
123 static void
materialize_cgraph(void)124 materialize_cgraph (void)
125 {
126 struct cgraph_node *node;
127 timevar_id_t lto_timer;
128
129 if (!quiet_flag)
130 fprintf (stderr,
131 flag_wpa ? "Materializing decls:" : "Reading function bodies:");
132
133
134 FOR_EACH_FUNCTION (node)
135 {
136 if (node->lto_file_data)
137 {
138 lto_materialize_function (node);
139 lto_stats.num_input_cgraph_nodes++;
140 }
141 }
142
143
144 /* Start the appropriate timer depending on the mode that we are
145 operating in. */
146 lto_timer = (flag_wpa) ? TV_WHOPR_WPA
147 : (flag_ltrans) ? TV_WHOPR_LTRANS
148 : TV_LTO;
149 timevar_push (lto_timer);
150
151 current_function_decl = NULL;
152 set_cfun (NULL);
153
154 if (!quiet_flag)
155 fprintf (stderr, "\n");
156
157 timevar_pop (lto_timer);
158 }
159
160 /* Actually stream out ENCODER into TEMP_FILENAME. */
161
162 static void
stream_out(char * temp_filename,lto_symtab_encoder_t encoder,int part)163 stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
164 {
165 lto_file *file = lto_obj_file_open (temp_filename, true);
166 if (!file)
167 fatal_error (input_location, "%<lto_obj_file_open()%> failed");
168 lto_set_current_out_file (file);
169
170 gcc_assert (!dump_file);
171 streamer_dump_file = dump_begin (TDI_lto_stream_out, NULL, part);
172 ipa_write_optimization_summaries (encoder);
173
174 free (CONST_CAST (char *, file->filename));
175
176 lto_set_current_out_file (NULL);
177 lto_obj_file_close (file);
178 free (file);
179 if (streamer_dump_file)
180 {
181 dump_end (TDI_lto_stream_out, streamer_dump_file);
182 streamer_dump_file = NULL;
183 }
184 }
185
186 /* Wait for forked process and signal errors. */
187 #ifdef HAVE_WORKING_FORK
188 static void
wait_for_child()189 wait_for_child ()
190 {
191 int status;
192 do
193 {
194 #ifndef WCONTINUED
195 #define WCONTINUED 0
196 #endif
197 int w = waitpid (0, &status, WUNTRACED | WCONTINUED);
198 if (w == -1)
199 fatal_error (input_location, "waitpid failed");
200
201 if (WIFEXITED (status) && WEXITSTATUS (status))
202 fatal_error (input_location, "streaming subprocess failed");
203 else if (WIFSIGNALED (status))
204 fatal_error (input_location,
205 "streaming subprocess was killed by signal");
206 }
207 while (!WIFEXITED (status) && !WIFSIGNALED (status));
208 }
209 #endif
210
211 static void
stream_out_partitions_1(char * temp_filename,int blen,int min,int max)212 stream_out_partitions_1 (char *temp_filename, int blen, int min, int max)
213 {
214 /* Write all the nodes in SET. */
215 for (int p = min; p < max; p ++)
216 {
217 sprintf (temp_filename + blen, "%u.o", p);
218 stream_out (temp_filename, ltrans_partitions[p]->encoder, p);
219 ltrans_partitions[p]->encoder = NULL;
220 }
221 }
222
223 /* Stream out ENCODER into TEMP_FILENAME
224 Fork if that seems to help. */
225
226 static void
stream_out_partitions(char * temp_filename,int blen,int min,int max,bool ARG_UNUSED (last))227 stream_out_partitions (char *temp_filename, int blen, int min, int max,
228 bool ARG_UNUSED (last))
229 {
230 #ifdef HAVE_WORKING_FORK
231 static int nruns;
232
233 if (lto_parallelism <= 1)
234 {
235 stream_out_partitions_1 (temp_filename, blen, min, max);
236 return;
237 }
238
239 /* Do not run more than LTO_PARALLELISM streamings
240 FIXME: we ignore limits on jobserver. */
241 if (lto_parallelism > 0 && nruns >= lto_parallelism)
242 {
243 wait_for_child ();
244 nruns --;
245 }
246 /* If this is not the last parallel partition, execute new
247 streaming process. */
248 if (!last)
249 {
250 pid_t cpid = fork ();
251
252 if (!cpid)
253 {
254 setproctitle ("lto1-wpa-streaming");
255 stream_out_partitions_1 (temp_filename, blen, min, max);
256 exit (0);
257 }
258 /* Fork failed; lets do the job ourseleves. */
259 else if (cpid == -1)
260 stream_out_partitions_1 (temp_filename, blen, min, max);
261 else
262 nruns++;
263 }
264 /* Last partition; stream it and wait for all children to die. */
265 else
266 {
267 int i;
268 stream_out_partitions_1 (temp_filename, blen, min, max);
269 for (i = 0; i < nruns; i++)
270 wait_for_child ();
271 }
272 asm_nodes_output = true;
273 #else
274 stream_out_partitions_1 (temp_filename, blen, min, max);
275 #endif
276 }
277
278 /* Write all output files in WPA mode and the file with the list of
279 LTRANS units. */
280
281 static void
lto_wpa_write_files(void)282 lto_wpa_write_files (void)
283 {
284 unsigned i, n_sets;
285 ltrans_partition part;
286 FILE *ltrans_output_list_stream;
287 char *temp_filename;
288 auto_vec <char *>temp_filenames;
289 auto_vec <int>temp_priority;
290 size_t blen;
291
292 /* Open the LTRANS output list. */
293 if (!ltrans_output_list)
294 fatal_error (input_location, "no LTRANS output list filename provided");
295
296 timevar_push (TV_WHOPR_WPA);
297
298 FOR_EACH_VEC_ELT (ltrans_partitions, i, part)
299 lto_stats.num_output_symtab_nodes
300 += lto_symtab_encoder_size (part->encoder);
301
302 timevar_pop (TV_WHOPR_WPA);
303
304 timevar_push (TV_WHOPR_WPA_IO);
305
306 cgraph_node *node;
307 /* Do body modifications needed for streaming before we fork out
308 worker processes. */
309 FOR_EACH_FUNCTION (node)
310 if (!node->clone_of && gimple_has_body_p (node->decl))
311 lto_prepare_function_for_streaming (node);
312
313 ggc_trim ();
314 report_heap_memory_use ();
315
316 /* Generate a prefix for the LTRANS unit files. */
317 blen = strlen (ltrans_output_list);
318 temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
319 strcpy (temp_filename, ltrans_output_list);
320 if (blen > sizeof (".out")
321 && strcmp (temp_filename + blen - sizeof (".out") + 1,
322 ".out") == 0)
323 temp_filename[blen - sizeof (".out") + 1] = '\0';
324 blen = strlen (temp_filename);
325
326 n_sets = ltrans_partitions.length ();
327 unsigned sets_per_worker = n_sets;
328 if (lto_parallelism > 1)
329 {
330 if (lto_parallelism > (int)n_sets)
331 lto_parallelism = n_sets;
332 sets_per_worker = (n_sets + lto_parallelism - 1) / lto_parallelism;
333 }
334
335 for (i = 0; i < n_sets; i++)
336 {
337 ltrans_partition part = ltrans_partitions[i];
338
339 /* Write all the nodes in SET. */
340 sprintf (temp_filename + blen, "%u.o", i);
341
342 if (!quiet_flag)
343 fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name,
344 part->insns);
345 if (symtab->dump_file)
346 {
347 lto_symtab_encoder_iterator lsei;
348
349 fprintf (symtab->dump_file,
350 "Writing partition %s to file %s, %i insns\n",
351 part->name, temp_filename, part->insns);
352 fprintf (symtab->dump_file, " Symbols in partition: ");
353 for (lsei = lsei_start_in_partition (part->encoder);
354 !lsei_end_p (lsei);
355 lsei_next_in_partition (&lsei))
356 {
357 symtab_node *node = lsei_node (lsei);
358 fprintf (symtab->dump_file, "%s ", node->dump_asm_name ());
359 }
360 fprintf (symtab->dump_file, "\n Symbols in boundary: ");
361 for (lsei = lsei_start (part->encoder); !lsei_end_p (lsei);
362 lsei_next (&lsei))
363 {
364 symtab_node *node = lsei_node (lsei);
365 if (!lto_symtab_encoder_in_partition_p (part->encoder, node))
366 {
367 fprintf (symtab->dump_file, "%s ", node->dump_asm_name ());
368 cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
369 if (cnode
370 && lto_symtab_encoder_encode_body_p (part->encoder,
371 cnode))
372 fprintf (symtab->dump_file, "(body included)");
373 else
374 {
375 varpool_node *vnode = dyn_cast <varpool_node *> (node);
376 if (vnode
377 && lto_symtab_encoder_encode_initializer_p (part->encoder,
378 vnode))
379 fprintf (symtab->dump_file, "(initializer included)");
380 }
381 }
382 }
383 fprintf (symtab->dump_file, "\n");
384 }
385 gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
386
387 temp_priority.safe_push (part->insns);
388 temp_filenames.safe_push (xstrdup (temp_filename));
389 }
390 memory_block_pool::trim (0);
391
392 for (int set = 0; set < MAX (lto_parallelism, 1); set++)
393 {
394 stream_out_partitions (temp_filename, blen, set * sets_per_worker,
395 MIN ((set + 1) * sets_per_worker, n_sets),
396 set == MAX (lto_parallelism, 1) - 1);
397 }
398
399 ltrans_output_list_stream = fopen (ltrans_output_list, "w");
400 if (ltrans_output_list_stream == NULL)
401 fatal_error (input_location,
402 "opening LTRANS output list %s: %m", ltrans_output_list);
403 for (i = 0; i < n_sets; i++)
404 {
405 unsigned int len = strlen (temp_filenames[i]);
406 if (fprintf (ltrans_output_list_stream, "%i\n", temp_priority[i]) < 0
407 || fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
408 || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
409 fatal_error (input_location, "writing to LTRANS output list %s: %m",
410 ltrans_output_list);
411 free (temp_filenames[i]);
412 }
413
414 lto_stats.num_output_files += n_sets;
415
416 /* Close the LTRANS output list. */
417 if (fclose (ltrans_output_list_stream))
418 fatal_error (input_location,
419 "closing LTRANS output list %s: %m", ltrans_output_list);
420
421 free_ltrans_partitions ();
422 free (temp_filename);
423
424 timevar_pop (TV_WHOPR_WPA_IO);
425 }
426
427 /* Create artificial pointers for "omp declare target link" vars. */
428
429 static void
offload_handle_link_vars(void)430 offload_handle_link_vars (void)
431 {
432 #ifdef ACCEL_COMPILER
433 varpool_node *var;
434 FOR_EACH_VARIABLE (var)
435 if (lookup_attribute ("omp declare target link",
436 DECL_ATTRIBUTES (var->decl)))
437 {
438 tree type = build_pointer_type (TREE_TYPE (var->decl));
439 tree link_ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
440 clone_function_name (var->decl,
441 "linkptr"), type);
442 TREE_USED (link_ptr_var) = 1;
443 TREE_STATIC (link_ptr_var) = 1;
444 TREE_PUBLIC (link_ptr_var) = TREE_PUBLIC (var->decl);
445 DECL_ARTIFICIAL (link_ptr_var) = 1;
446 SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var));
447 SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var));
448 DECL_HAS_VALUE_EXPR_P (var->decl) = 1;
449 }
450 #endif
451 }
452
453 /* Perform whole program analysis (WPA) on the callgraph and write out the
454 optimization plan. */
455
456 static void
do_whole_program_analysis(void)457 do_whole_program_analysis (void)
458 {
459 symtab_node *node;
460
461 lto_parallelism = 1;
462
463 /* TODO: jobserver communication is not supported, yet. */
464 if (!strcmp (flag_wpa, "jobserver"))
465 lto_parallelism = param_max_lto_streaming_parallelism;
466 else
467 {
468 lto_parallelism = atoi (flag_wpa);
469 if (lto_parallelism <= 0)
470 lto_parallelism = 0;
471 if (lto_parallelism >= param_max_lto_streaming_parallelism)
472 lto_parallelism = param_max_lto_streaming_parallelism;
473 }
474
475 timevar_start (TV_PHASE_OPT_GEN);
476
477 /* Note that since we are in WPA mode, materialize_cgraph will not
478 actually read in all the function bodies. It only materializes
479 the decls and cgraph nodes so that analysis can be performed. */
480 materialize_cgraph ();
481
482 /* Reading in the cgraph uses different timers, start timing WPA now. */
483 timevar_push (TV_WHOPR_WPA);
484
485 if (pre_ipa_mem_report)
486 dump_memory_report ("Memory consumption before IPA");
487
488 symtab->function_flags_ready = true;
489
490 if (symtab->dump_file)
491 symtab->dump (symtab->dump_file);
492 bitmap_obstack_initialize (NULL);
493 symtab->state = IPA_SSA;
494
495 execute_ipa_pass_list (g->get_passes ()->all_regular_ipa_passes);
496
497 /* When WPA analysis raises errors, do not bother to output anything. */
498 if (seen_error ())
499 return;
500
501 /* We are about to launch the final LTRANS phase, stop the WPA timer. */
502 timevar_pop (TV_WHOPR_WPA);
503
504 /* We are no longer going to stream in anything. Free some memory. */
505 lto_free_file_name_hash ();
506
507
508 timevar_push (TV_WHOPR_PARTITIONING);
509
510 gcc_assert (!dump_file);
511 dump_file = dump_begin (partition_dump_id, NULL);
512
513 if (dump_file)
514 symtab->dump (dump_file);
515
516 symtab_node::checking_verify_symtab_nodes ();
517 bitmap_obstack_release (NULL);
518 if (flag_lto_partition == LTO_PARTITION_1TO1)
519 lto_1_to_1_map ();
520 else if (flag_lto_partition == LTO_PARTITION_MAX)
521 lto_max_map ();
522 else if (flag_lto_partition == LTO_PARTITION_ONE)
523 lto_balanced_map (1, INT_MAX);
524 else if (flag_lto_partition == LTO_PARTITION_BALANCED)
525 lto_balanced_map (param_lto_partitions,
526 param_max_partition_size);
527 else
528 gcc_unreachable ();
529
530 /* Size summaries are needed for balanced partitioning. Free them now so
531 the memory can be used for streamer caches. */
532 ipa_free_size_summary ();
533
534 /* AUX pointers are used by partitioning code to bookkeep number of
535 partitions symbol is in. This is no longer needed. */
536 FOR_EACH_SYMBOL (node)
537 node->aux = NULL;
538
539 lto_stats.num_cgraph_partitions += ltrans_partitions.length ();
540
541 /* Find out statics that need to be promoted
542 to globals with hidden visibility because they are accessed from multiple
543 partitions. */
544 lto_promote_cross_file_statics ();
545 offload_handle_link_vars ();
546 if (dump_file)
547 dump_end (partition_dump_id, dump_file);
548 dump_file = NULL;
549 timevar_pop (TV_WHOPR_PARTITIONING);
550
551 timevar_stop (TV_PHASE_OPT_GEN);
552
553 /* Collect a last time - in lto_wpa_write_files we may end up forking
554 with the idea that this doesn't increase memory usage. So we
555 absoultely do not want to collect after that. */
556 ggc_collect ();
557
558 timevar_start (TV_PHASE_STREAM_OUT);
559 if (!quiet_flag)
560 {
561 fprintf (stderr, "\nStreaming out");
562 fflush (stderr);
563 }
564 lto_wpa_write_files ();
565 if (!quiet_flag)
566 fprintf (stderr, "\n");
567 timevar_stop (TV_PHASE_STREAM_OUT);
568
569 if (post_ipa_mem_report)
570 dump_memory_report ("Memory consumption after IPA");
571
572 /* Show the LTO report before launching LTRANS. */
573 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
574 print_lto_report_1 ();
575 if (mem_report_wpa)
576 dump_memory_report ("Final");
577 }
578
579 unsigned int
lto_option_lang_mask(void)580 lto_option_lang_mask (void)
581 {
582 return CL_LTO;
583 }
584
585 /* Main entry point for the GIMPLE front end. This front end has
586 three main personalities:
587
588 - LTO (-flto). All the object files on the command line are
589 loaded in memory and processed as a single translation unit.
590 This is the traditional link-time optimization behavior.
591
592 - WPA (-fwpa). Only the callgraph and summary information for
593 files in the command file are loaded. A single callgraph
594 (without function bodies) is instantiated for the whole set of
595 files. IPA passes are only allowed to analyze the call graph
596 and make transformation decisions. The callgraph is
597 partitioned, each partition is written to a new object file
598 together with the transformation decisions.
599
600 - LTRANS (-fltrans). Similar to -flto but it prevents the IPA
601 summary files from running again. Since WPA computed summary
602 information and decided what transformations to apply, LTRANS
603 simply applies them. */
604
605 void
lto_main(void)606 lto_main (void)
607 {
608 /* LTO is called as a front end, even though it is not a front end.
609 Because it is called as a front end, TV_PHASE_PARSING and
610 TV_PARSE_GLOBAL are active, and we need to turn them off while
611 doing LTO. Later we turn them back on so they are active up in
612 toplev.cc. */
613 timevar_pop (TV_PARSE_GLOBAL);
614 timevar_stop (TV_PHASE_PARSING);
615
616 timevar_start (TV_PHASE_SETUP);
617
618 /* Initialize the LTO front end. */
619 lto_fe_init ();
620
621 timevar_stop (TV_PHASE_SETUP);
622 timevar_start (TV_PHASE_STREAM_IN);
623
624 /* Read all the symbols and call graph from all the files in the
625 command line. */
626 read_cgraph_and_symbols (num_in_fnames, in_fnames);
627
628 timevar_stop (TV_PHASE_STREAM_IN);
629
630 if (!seen_error ())
631 {
632 offload_handle_link_vars ();
633
634 /* If WPA is enabled analyze the whole call graph and create an
635 optimization plan. Otherwise, read in all the function
636 bodies and continue with optimization. */
637 if (flag_wpa)
638 do_whole_program_analysis ();
639 else
640 {
641 timevar_start (TV_PHASE_OPT_GEN);
642
643 materialize_cgraph ();
644 if (!flag_ltrans)
645 {
646 lto_promote_statics_nonwpa ();
647 offload_handle_link_vars ();
648 }
649
650 /* Annotate the CU DIE and mark the early debug phase as finished. */
651 debuginfo_early_start ();
652 debug_hooks->early_finish ("<artificial>");
653 debuginfo_early_stop ();
654
655 /* Let the middle end know that we have read and merged all of
656 the input files. */
657 symtab->compile ();
658
659 timevar_stop (TV_PHASE_OPT_GEN);
660
661 /* FIXME lto, if the processes spawned by WPA fail, we miss
662 the chance to print WPA's report, so WPA will call
663 print_lto_report before launching LTRANS. If LTRANS was
664 launched directly by the driver we would not need to do
665 this. */
666 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
667 print_lto_report_1 ();
668 }
669 }
670
671 /* Here we make LTO pretend to be a parser. */
672 timevar_start (TV_PHASE_PARSING);
673 timevar_push (TV_PARSE_GLOBAL);
674 }
675