xref: /netbsd-src/external/gpl3/gcc/dist/gcc/lto/lto-partition.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* LTO partitioning logic routines.
2    Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "target.h"
24 #include "function.h"
25 #include "basic-block.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "stringpool.h"
30 #include "cgraph.h"
31 #include "lto-streamer.h"
32 #include "symbol-summary.h"
33 #include "tree-vrp.h"
34 #include "ipa-prop.h"
35 #include "ipa-fnsummary.h"
36 #include "lto-partition.h"
37 #include "sreal.h"
38 
39 vec<ltrans_partition> ltrans_partitions;
40 
41 static void add_symbol_to_partition (ltrans_partition part, symtab_node *node);
42 
43 
44 /* Helper for qsort; compare partitions and return one with smaller order.  */
45 
46 static int
cmp_partitions_order(const void * a,const void * b)47 cmp_partitions_order (const void *a, const void *b)
48 {
49   const struct ltrans_partition_def *pa
50      = *(struct ltrans_partition_def *const *)a;
51   const struct ltrans_partition_def *pb
52      = *(struct ltrans_partition_def *const *)b;
53   int ordera = -1, orderb = -1;
54 
55   if (lto_symtab_encoder_size (pa->encoder))
56     ordera = lto_symtab_encoder_deref (pa->encoder, 0)->order;
57   if (lto_symtab_encoder_size (pb->encoder))
58     orderb = lto_symtab_encoder_deref (pb->encoder, 0)->order;
59   return orderb - ordera;
60 }
61 
62 /* Create new partition with name NAME.  */
63 
64 static ltrans_partition
new_partition(const char * name)65 new_partition (const char *name)
66 {
67   ltrans_partition part = XCNEW (struct ltrans_partition_def);
68   part->encoder = lto_symtab_encoder_new (false);
69   part->name = name;
70   part->insns = 0;
71   part->symbols = 0;
72   ltrans_partitions.safe_push (part);
73   return part;
74 }
75 
76 /* Free memory used by ltrans datastructures.  */
77 
78 void
free_ltrans_partitions(void)79 free_ltrans_partitions (void)
80 {
81   unsigned int idx;
82   ltrans_partition part;
83   for (idx = 0; ltrans_partitions.iterate (idx, &part); idx++)
84     {
85       if (part->initializers_visited)
86 	delete part->initializers_visited;
87       /* Symtab encoder is freed after streaming.  */
88       free (part);
89     }
90   ltrans_partitions.release ();
91 }
92 
93 /* Return true if symbol is already in some partition.  */
94 
95 static inline bool
symbol_partitioned_p(symtab_node * node)96 symbol_partitioned_p (symtab_node *node)
97 {
98   return node->aux;
99 }
100 
101 /* Add references into the partition.  */
102 static void
add_references_to_partition(ltrans_partition part,symtab_node * node)103 add_references_to_partition (ltrans_partition part, symtab_node *node)
104 {
105   int i;
106   struct ipa_ref *ref = NULL;
107 
108   /* Add all duplicated references to the partition.  */
109   for (i = 0; node->iterate_reference (i, ref); i++)
110     if (ref->referred->get_partitioning_class () == SYMBOL_DUPLICATE)
111       add_symbol_to_partition (part, ref->referred);
112     /* References to a readonly variable may be constant foled into its value.
113        Recursively look into the initializers of the constant variable and add
114        references, too.  */
115     else if (is_a <varpool_node *> (ref->referred)
116 	     && (dyn_cast <varpool_node *> (ref->referred)
117 		 ->ctor_useable_for_folding_p ())
118 	     && !lto_symtab_encoder_in_partition_p (part->encoder, ref->referred))
119       {
120 	if (!part->initializers_visited)
121 	  part->initializers_visited = new hash_set<symtab_node *>;
122 	if (!part->initializers_visited->add (ref->referred))
123 	  add_references_to_partition (part, ref->referred);
124       }
125 }
126 
127 /* Helper function for add_symbol_to_partition doing the actual dirty work
128    of adding NODE to PART.  */
129 
130 static bool
add_symbol_to_partition_1(ltrans_partition part,symtab_node * node)131 add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
132 {
133   enum symbol_partitioning_class c = node->get_partitioning_class ();
134   struct ipa_ref *ref;
135   symtab_node *node1;
136 
137   /* If NODE is already there, we have nothing to do.  */
138   if (lto_symtab_encoder_in_partition_p (part->encoder, node))
139     return true;
140 
141   /* non-duplicated aliases or tunks of a duplicated symbol needs to be output
142      just once.
143 
144      Be lax about comdats; they may or may not be duplicated and we may
145      end up in need to duplicate keyed comdat because it has unkeyed alias.  */
146   if (c == SYMBOL_PARTITION && !DECL_COMDAT (node->decl)
147       && symbol_partitioned_p (node))
148     return false;
149 
150   /* Be sure that we never try to duplicate partitioned symbol
151      or add external symbol.  */
152   gcc_assert (c != SYMBOL_EXTERNAL
153 	      && (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
154 
155   part->symbols++;
156 
157   lto_set_symtab_encoder_in_partition (part->encoder, node);
158 
159   if (symbol_partitioned_p (node))
160     {
161       node->in_other_partition = 1;
162       if (dump_file)
163 	fprintf (dump_file,
164 		 "Symbol node %s now used in multiple partitions\n",
165 		 node->dump_name ());
166     }
167   node->aux = (void *)((size_t)node->aux + 1);
168 
169   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
170     {
171       struct cgraph_edge *e;
172       if (!node->alias && c == SYMBOL_PARTITION)
173 	part->insns += ipa_size_summaries->get (cnode)->size;
174 
175       /* Add all inline clones and callees that are duplicated.  */
176       for (e = cnode->callees; e; e = e->next_callee)
177 	if (!e->inline_failed)
178 	  add_symbol_to_partition_1 (part, e->callee);
179 	else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE)
180 	  add_symbol_to_partition (part, e->callee);
181 
182       /* Add all thunks associated with the function.  */
183       for (e = cnode->callers; e; e = e->next_caller)
184 	if (e->caller->thunk && !e->caller->inlined_to)
185 	  add_symbol_to_partition_1 (part, e->caller);
186     }
187 
188   add_references_to_partition (part, node);
189 
190   /* Add all aliases associated with the symbol.  */
191 
192   FOR_EACH_ALIAS (node, ref)
193     if (!ref->referring->transparent_alias)
194       add_symbol_to_partition_1 (part, ref->referring);
195     else
196       {
197 	struct ipa_ref *ref2;
198 	/* We do not need to add transparent aliases if they are not used.
199 	   However we must add aliases of transparent aliases if they exist.  */
200 	FOR_EACH_ALIAS (ref->referring, ref2)
201 	  {
202 	    /* Nested transparent aliases are not permitted.  */
203 	    gcc_checking_assert (!ref2->referring->transparent_alias);
204 	    add_symbol_to_partition_1 (part, ref2->referring);
205 	  }
206       }
207 
208   /* Ensure that SAME_COMDAT_GROUP lists all allways added in a group.  */
209   if (node->same_comdat_group)
210     for (node1 = node->same_comdat_group;
211 	 node1 != node; node1 = node1->same_comdat_group)
212       if (!node->alias)
213 	{
214 	  bool added = add_symbol_to_partition_1 (part, node1);
215 	  gcc_assert (added);
216 	}
217   return true;
218 }
219 
220 /* If symbol NODE is really part of other symbol's definition (i.e. it is
221    internal label, thunk, alias or so), return the outer symbol.
222    When add_symbol_to_partition_1 is called on the outer symbol it must
223    eventually add NODE, too.  */
224 static symtab_node *
contained_in_symbol(symtab_node * node)225 contained_in_symbol (symtab_node *node)
226 {
227   /* There is no need to consider transparent aliases to be part of the
228      definition: they are only useful insite the partition they are output
229      and thus we will always see an explicit reference to it.  */
230   if (node->transparent_alias)
231     return node;
232   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
233     {
234       cnode = cnode->function_symbol ();
235       if (cnode->inlined_to)
236 	cnode = cnode->inlined_to;
237       return cnode;
238     }
239   else if (varpool_node *vnode = dyn_cast <varpool_node *> (node))
240     return vnode->ultimate_alias_target ();
241   return node;
242 }
243 
244 /* Add symbol NODE to partition.  When definition of NODE is part
245    of other symbol definition, add the other symbol, too.  */
246 
247 static void
add_symbol_to_partition(ltrans_partition part,symtab_node * node)248 add_symbol_to_partition (ltrans_partition part, symtab_node *node)
249 {
250   symtab_node *node1;
251 
252   /* Verify that we do not try to duplicate something that cannot be.  */
253   gcc_checking_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
254 		       || !symbol_partitioned_p (node));
255 
256   while ((node1 = contained_in_symbol (node)) != node)
257     node = node1;
258 
259   /* If we have duplicated symbol contained in something we cannot duplicate,
260      we are very badly screwed.  The other way is possible, so we do not
261      assert this in add_symbol_to_partition_1.
262 
263      Be lax about comdats; they may or may not be duplicated and we may
264      end up in need to duplicate keyed comdat because it has unkeyed alias.  */
265 
266   gcc_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
267 	      || DECL_COMDAT (node->decl)
268 	      || !symbol_partitioned_p (node));
269 
270   add_symbol_to_partition_1 (part, node);
271 }
272 
273 /* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES
274    and number of varpool nodes is N_VARPOOL_NODES.  */
275 
276 static void
undo_partition(ltrans_partition partition,unsigned int n_nodes)277 undo_partition (ltrans_partition partition, unsigned int n_nodes)
278 {
279   while (lto_symtab_encoder_size (partition->encoder) > (int)n_nodes)
280     {
281       symtab_node *node = lto_symtab_encoder_deref (partition->encoder,
282 						   n_nodes);
283       partition->symbols--;
284       cgraph_node *cnode;
285 
286       /* After UNDO we no longer know what was visited.  */
287       if (partition->initializers_visited)
288 	delete partition->initializers_visited;
289       partition->initializers_visited = NULL;
290 
291       if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
292           && node->get_partitioning_class () == SYMBOL_PARTITION)
293 	partition->insns -= ipa_size_summaries->get (cnode)->size;
294       lto_symtab_encoder_delete_node (partition->encoder, node);
295       node->aux = (void *)((size_t)node->aux - 1);
296     }
297 }
298 
299 /* Group cgrah nodes by input files.  This is used mainly for testing
300    right now.  */
301 
302 void
lto_1_to_1_map(void)303 lto_1_to_1_map (void)
304 {
305   symtab_node *node;
306   struct lto_file_decl_data *file_data;
307   hash_map<lto_file_decl_data *, ltrans_partition> pmap;
308   ltrans_partition partition;
309   int npartitions = 0;
310 
311   FOR_EACH_SYMBOL (node)
312     {
313       if (node->get_partitioning_class () != SYMBOL_PARTITION
314 	  || symbol_partitioned_p (node))
315 	continue;
316 
317       file_data = node->lto_file_data;
318 
319       if (file_data)
320 	{
321           ltrans_partition *slot = &pmap.get_or_insert (file_data);
322           if (*slot)
323 	    partition = *slot;
324 	  else
325 	    {
326 	      partition = new_partition (file_data->file_name);
327 	      *slot = partition;
328 	      npartitions++;
329 	    }
330 	}
331       else if (!file_data && ltrans_partitions.length ())
332 	partition = ltrans_partitions[0];
333       else
334 	{
335 	  partition = new_partition ("");
336 	  pmap.put (NULL, partition);
337 	  npartitions++;
338 	}
339 
340       add_symbol_to_partition (partition, node);
341     }
342 
343   /* If the cgraph is empty, create one cgraph node set so that there is still
344      an output file for any variables that need to be exported in a DSO.  */
345   if (!npartitions)
346     new_partition ("empty");
347 
348   /* Order partitions by order of symbols because they are linked into binary
349      that way.  */
350   ltrans_partitions.qsort (cmp_partitions_order);
351 }
352 
353 /* Maximal partitioning.  Put every new symbol into new partition if possible.  */
354 
355 void
lto_max_map(void)356 lto_max_map (void)
357 {
358   symtab_node *node;
359   ltrans_partition partition;
360   int npartitions = 0;
361 
362   FOR_EACH_SYMBOL (node)
363     {
364       if (node->get_partitioning_class () != SYMBOL_PARTITION
365 	  || symbol_partitioned_p (node))
366 	continue;
367       partition = new_partition (node->asm_name ());
368       add_symbol_to_partition (partition, node);
369       npartitions++;
370     }
371   if (!npartitions)
372     new_partition ("empty");
373 }
374 
375 /* Helper function for qsort; sort nodes by order.  */
376 static int
node_cmp(const void * pa,const void * pb)377 node_cmp (const void *pa, const void *pb)
378 {
379   const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
380   const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
381   return b->order - a->order;
382 }
383 
384 /* Add all symtab nodes from NEXT_NODE to PARTITION in order.  */
385 
386 static void
add_sorted_nodes(vec<symtab_node * > & next_nodes,ltrans_partition partition)387 add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
388 {
389   unsigned i;
390   symtab_node *node;
391 
392   next_nodes.qsort (node_cmp);
393   FOR_EACH_VEC_ELT (next_nodes, i, node)
394     if (!symbol_partitioned_p (node))
395       add_symbol_to_partition (partition, node);
396 }
397 
398 /* Return true if we should account reference from N1 to N2 in cost
399    of partition boundary.  */
400 
401 bool
account_reference_p(symtab_node * n1,symtab_node * n2)402 account_reference_p (symtab_node *n1, symtab_node *n2)
403 {
404   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (n1))
405     n1 = cnode;
406   /* Do not account references from aliases - they are never split across
407      partitions.  */
408   if (n1->alias)
409     return false;
410   /* Do not account recursion - the code below will handle it incorrectly
411      otherwise.  Do not account references to external symbols: they will
412      never become local.  Finally do not account references to duplicated
413      symbols: they will be always local.  */
414   if (n1 == n2
415       || !n2->definition
416       || n2->get_partitioning_class () != SYMBOL_PARTITION)
417     return false;
418   /* If referring node is external symbol do not account it to boundary
419      cost. Those are added into units only to enable possible constant
420      folding and devirtulization.
421 
422      Here we do not know if it will ever be added to some partition
423      (this is decided by compute_ltrans_boundary) and second it is not
424      that likely that constant folding will actually use the reference.  */
425   if (contained_in_symbol (n1)
426 	->get_partitioning_class () == SYMBOL_EXTERNAL)
427     return false;
428   return true;
429 }
430 
431 
432 /* Group cgraph nodes into equally-sized partitions.
433 
434    The partitioning algorithm is simple: nodes are taken in predefined order.
435    The order corresponds to the order we want functions to have in the final
436    output.  In the future this will be given by function reordering pass, but
437    at the moment we use the topological order, which is a good approximation.
438 
439    The goal is to partition this linear order into intervals (partitions) so
440    that all the partitions have approximately the same size and the number of
441    callgraph or IPA reference edges crossing boundaries is minimal.
442 
443    This is a lot faster (O(n) in size of callgraph) than algorithms doing
444    priority-based graph clustering that are generally O(n^2) and, since
445    WHOPR is designed to make things go well across partitions, it leads
446    to good results.
447 
448    We compute the expected size of a partition as:
449 
450      max (total_size / lto_partitions, min_partition_size)
451 
452    We use dynamic expected size of partition so small programs are partitioned
453    into enough partitions to allow use of multiple CPUs, while large programs
454    are not partitioned too much.  Creating too many partitions significantly
455    increases the streaming overhead.
456 
457    In the future, we would like to bound the maximal size of partitions so as
458    to prevent the LTRANS stage from consuming too much memory.  At the moment,
459    however, the WPA stage is the most memory intensive for large benchmarks,
460    since too many types and declarations are read into memory.
461 
462    The function implements a simple greedy algorithm.  Nodes are being added
463    to the current partition until after 3/4 of the expected partition size is
464    reached.  Past this threshold, we keep track of boundary size (number of
465    edges going to other partitions) and continue adding functions until after
466    the current partition has grown to twice the expected partition size.  Then
467    the process is undone to the point where the minimal ratio of boundary size
468    and in-partition calls was reached.  */
469 
470 void
lto_balanced_map(int n_lto_partitions,int max_partition_size)471 lto_balanced_map (int n_lto_partitions, int max_partition_size)
472 {
473   int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
474   int best_noreorder_pos = 0;
475   auto_vec <cgraph_node *> order (symtab->cgraph_count);
476   auto_vec<cgraph_node *> noreorder;
477   auto_vec<varpool_node *> varpool_order;
478   struct cgraph_node *node;
479   int64_t original_total_size, total_size = 0;
480   int64_t partition_size;
481   ltrans_partition partition;
482   int last_visited_node = 0;
483   varpool_node *vnode;
484   int64_t cost = 0, internal = 0;
485   unsigned int best_n_nodes = 0, best_i = 0;
486   int64_t best_cost = -1, best_internal = 0, best_size = 0;
487   int npartitions;
488   int current_order = -1;
489   int noreorder_pos = 0;
490 
491   FOR_EACH_VARIABLE (vnode)
492     gcc_assert (!vnode->aux);
493 
494   FOR_EACH_DEFINED_FUNCTION (node)
495     if (node->get_partitioning_class () == SYMBOL_PARTITION)
496       {
497 	if (node->no_reorder)
498 	  noreorder.safe_push (node);
499 	else
500 	  order.safe_push (node);
501 	if (!node->alias)
502 	  total_size += ipa_size_summaries->get (node)->size;
503       }
504 
505   original_total_size = total_size;
506 
507   /* Streaming works best when the source units do not cross partition
508      boundaries much.  This is because importing function from a source
509      unit tends to import a lot of global trees defined there.  We should
510      get better about minimizing the function bounday, but until that
511      things works smoother if we order in source order.  */
512   order.qsort (tp_first_run_node_cmp);
513   noreorder.qsort (node_cmp);
514 
515   if (dump_file)
516     {
517       for (unsigned i = 0; i < order.length (); i++)
518 	fprintf (dump_file, "Balanced map symbol order:%s:%u\n",
519 		 order[i]->dump_name (), order[i]->tp_first_run);
520       for (unsigned i = 0; i < noreorder.length (); i++)
521 	fprintf (dump_file, "Balanced map symbol no_reorder:%s:%u\n",
522 		 noreorder[i]->dump_name (), noreorder[i]->tp_first_run);
523     }
524 
525   /* Collect all variables that should not be reordered.  */
526   FOR_EACH_VARIABLE (vnode)
527     if (vnode->get_partitioning_class () == SYMBOL_PARTITION
528 	&& vnode->no_reorder)
529       varpool_order.safe_push (vnode);
530   n_varpool_nodes = varpool_order.length ();
531   varpool_order.qsort (node_cmp);
532 
533   /* Compute partition size and create the first partition.  */
534   if (param_min_partition_size > max_partition_size)
535     fatal_error (input_location, "min partition size cannot be greater "
536 		 "than max partition size");
537 
538   partition_size = total_size / n_lto_partitions;
539   if (partition_size < param_min_partition_size)
540     partition_size = param_min_partition_size;
541   npartitions = 1;
542   partition = new_partition ("");
543   if (dump_file)
544     fprintf (dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
545 	     total_size, partition_size);
546 
547   auto_vec<symtab_node *> next_nodes;
548 
549   for (unsigned i = 0; i < order.length (); i++)
550     {
551       if (symbol_partitioned_p (order[i]))
552 	continue;
553 
554       current_order = order[i]->order;
555 
556       /* Output noreorder and varpool in program order first.  */
557       next_nodes.truncate (0);
558       while (varpool_pos < n_varpool_nodes
559 	     && varpool_order[varpool_pos]->order < current_order)
560 	next_nodes.safe_push (varpool_order[varpool_pos++]);
561       while (noreorder_pos < (int)noreorder.length ()
562 	     && noreorder[noreorder_pos]->order < current_order)
563 	next_nodes.safe_push (noreorder[noreorder_pos++]);
564       add_sorted_nodes (next_nodes, partition);
565 
566       if (!symbol_partitioned_p (order[i]))
567         add_symbol_to_partition (partition, order[i]);
568 
569 
570       /* Once we added a new node to the partition, we also want to add
571          all referenced variables unless they was already added into some
572          earlier partition.
573 	 add_symbol_to_partition adds possibly multiple nodes and
574 	 variables that are needed to satisfy needs of ORDER[i].
575          We remember last visited cgraph and varpool node from last iteration
576          of outer loop that allows us to process every new addition.
577 
578 	 At the same time we compute size of the boundary into COST.  Every
579          callgraph or IPA reference edge leaving the partition contributes into
580          COST.  Every edge inside partition was earlier computed as one leaving
581 	 it and thus we need to subtract it from COST.  */
582       while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
583 	{
584 	  int j;
585 	  struct ipa_ref *ref = NULL;
586 	  symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
587 							last_visited_node);
588 
589 	  if (cgraph_node *node = dyn_cast <cgraph_node *> (snode))
590 	    {
591 	      struct cgraph_edge *edge;
592 
593 
594 	      last_visited_node++;
595 
596 	      gcc_assert (node->definition || node->weakref
597 			  || node->declare_variant_alt);
598 
599 	      /* Compute boundary cost of callgraph edges.  */
600 	      for (edge = node->callees; edge; edge = edge->next_callee)
601 		/* Inline edges will always end up local.  */
602 		if (edge->inline_failed
603 		    && account_reference_p (node, edge->callee))
604 		  {
605 		    int edge_cost = edge->frequency ();
606 		    int index;
607 
608 		    if (!edge_cost)
609 		      edge_cost = 1;
610 		    gcc_assert (edge_cost > 0);
611 		    index = lto_symtab_encoder_lookup (partition->encoder,
612 						       edge->callee);
613 		    if (index != LCC_NOT_FOUND
614 		        && index < last_visited_node - 1)
615 		      cost -= edge_cost, internal += edge_cost;
616 		    else
617 		      cost += edge_cost;
618 		  }
619 	      for (edge = node->callers; edge; edge = edge->next_caller)
620 		if (edge->inline_failed
621 		    && account_reference_p (edge->caller, node))
622 		{
623 		  int edge_cost = edge->frequency ();
624 		  int index;
625 
626 		  gcc_assert (edge->caller->definition);
627 		  if (!edge_cost)
628 		    edge_cost = 1;
629 		  gcc_assert (edge_cost > 0);
630 		  index = lto_symtab_encoder_lookup (partition->encoder,
631 						     edge->caller);
632 		  if (index != LCC_NOT_FOUND
633 		      && index < last_visited_node - 1)
634 		    cost -= edge_cost, internal += edge_cost;
635 		  else
636 		    cost += edge_cost;
637 		}
638 	    }
639 	  else
640 	    last_visited_node++;
641 
642 	  /* Compute boundary cost of IPA REF edges and at the same time look into
643 	     variables referenced from current partition and try to add them.  */
644 	  for (j = 0; snode->iterate_reference (j, ref); j++)
645 	    if (!account_reference_p (snode, ref->referred))
646 	      ;
647 	    else if (is_a <varpool_node *> (ref->referred))
648 	      {
649 		int index;
650 
651 		vnode = dyn_cast <varpool_node *> (ref->referred);
652 		if (!symbol_partitioned_p (vnode)
653 		    && !vnode->no_reorder
654 		    && vnode->get_partitioning_class () == SYMBOL_PARTITION)
655 		  add_symbol_to_partition (partition, vnode);
656 		index = lto_symtab_encoder_lookup (partition->encoder,
657 						   vnode);
658 		if (index != LCC_NOT_FOUND
659 		    && index < last_visited_node - 1)
660 		  cost--, internal++;
661 		else
662 		  cost++;
663 	      }
664 	    else
665 	      {
666 		int index;
667 
668 		node = dyn_cast <cgraph_node *> (ref->referred);
669 		index = lto_symtab_encoder_lookup (partition->encoder,
670 						   node);
671 		if (index != LCC_NOT_FOUND
672 		    && index < last_visited_node - 1)
673 		  cost--, internal++;
674 		else
675 		  cost++;
676 	      }
677 	  for (j = 0; snode->iterate_referring (j, ref); j++)
678 	    if (!account_reference_p (ref->referring, snode))
679 	      ;
680 	    else if (is_a <varpool_node *> (ref->referring))
681 	      {
682 		int index;
683 
684 		vnode = dyn_cast <varpool_node *> (ref->referring);
685 		gcc_assert (vnode->definition);
686 		/* It is better to couple variables with their users,
687 		   because it allows them to be removed.  Coupling
688 		   with objects they refer to only helps to reduce
689 		   number of symbols promoted to hidden.  */
690 		if (!symbol_partitioned_p (vnode)
691 		    && !vnode->no_reorder
692 		    && !vnode->can_remove_if_no_refs_p ()
693 		    && vnode->get_partitioning_class () == SYMBOL_PARTITION)
694 		  add_symbol_to_partition (partition, vnode);
695 		index = lto_symtab_encoder_lookup (partition->encoder,
696 						   vnode);
697 		if (index != LCC_NOT_FOUND
698 		    && index < last_visited_node - 1)
699 		  cost--, internal++;
700 		else
701 		  cost++;
702 	      }
703 	    else
704 	      {
705 		int index;
706 
707 		node = dyn_cast <cgraph_node *> (ref->referring);
708 		gcc_assert (node->definition || node->declare_variant_alt);
709 		index = lto_symtab_encoder_lookup (partition->encoder,
710 						   node);
711 		if (index != LCC_NOT_FOUND
712 		    && index < last_visited_node - 1)
713 		  cost--, internal++;
714 		else
715 		  cost++;
716 	      }
717 	}
718 
719       gcc_assert (cost >= 0 && internal >= 0);
720 
721       /* If the partition is large enough, start looking for smallest boundary cost.
722          If partition still seems too small (less than 7/8 of target weight) accept
723 	 any cost.  If partition has right size, optimize for highest internal/cost.
724 	 Later we stop building partition if its size is 9/8 of the target wight.  */
725       if (partition->insns < partition_size * 7 / 8
726 	  || best_cost == -1
727 	  || (!cost
728 	      || ((sreal)best_internal * (sreal) cost
729 		  < ((sreal) internal * (sreal)best_cost))))
730 	{
731 	  best_cost = cost;
732 	  best_internal = internal;
733 	  best_size = partition->insns;
734 	  best_i = i;
735 	  best_n_nodes = lto_symtab_encoder_size (partition->encoder);
736 	  best_varpool_pos = varpool_pos;
737 	  best_noreorder_pos = noreorder_pos;
738 	}
739       if (dump_file)
740 	fprintf (dump_file, "Step %i: added %s, size %i, "
741 		 "cost %" PRId64 "/%" PRId64 " "
742 		 "best %" PRId64 "/%" PRId64", step %i\n", i,
743 		 order[i]->dump_name (),
744 		 partition->insns, cost, internal,
745 		 best_cost, best_internal, best_i);
746       /* Partition is too large, unwind into step when best cost was reached and
747 	 start new partition.  */
748       if (partition->insns > 9 * partition_size / 8
749 	  || partition->insns > max_partition_size)
750 	{
751 	  if (best_i != i)
752 	    {
753 	      if (dump_file)
754 		fprintf (dump_file, "Unwinding %i insertions to step %i\n",
755 			 i - best_i, best_i);
756 	      undo_partition (partition, best_n_nodes);
757 	      varpool_pos = best_varpool_pos;
758 	      noreorder_pos = best_noreorder_pos;
759 	    }
760 	  gcc_assert (best_size == partition->insns);
761 	  i = best_i;
762 	  if (dump_file)
763 	    fprintf (dump_file,
764 		     "Partition insns: %i (want %" PRId64 ")\n",
765 		     partition->insns, partition_size);
766  	  /* When we are finished, avoid creating empty partition.  */
767 	  while (i < order.length () - 1 && symbol_partitioned_p (order[i + 1]))
768 	    i++;
769 	  if (i == order.length () - 1)
770 	    break;
771 	  total_size -= partition->insns;
772 	  partition = new_partition ("");
773 	  last_visited_node = 0;
774 	  cost = 0;
775 
776 	  if (dump_file)
777 	    fprintf (dump_file, "New partition\n");
778 	  best_n_nodes = 0;
779 	  best_cost = -1;
780 
781 	  /* Since the size of partitions is just approximate, update the size after
782 	     we finished current one.  */
783 	  if (npartitions < n_lto_partitions)
784 	    partition_size = total_size / (n_lto_partitions - npartitions);
785 	  else
786 	    /* Watch for overflow.  */
787 	    partition_size = INT_MAX / 16;
788 
789 	  if (dump_file)
790 	    fprintf (dump_file,
791 		     "Total size: %" PRId64 " partition_size: %" PRId64 "\n",
792 		     total_size, partition_size);
793 	  if (partition_size < param_min_partition_size)
794 	    partition_size = param_min_partition_size;
795 	  npartitions ++;
796 	}
797     }
798 
799   next_nodes.truncate (0);
800 
801   /* Varables that are not reachable from the code go into last partition.  */
802   FOR_EACH_VARIABLE (vnode)
803     if (vnode->get_partitioning_class () == SYMBOL_PARTITION
804 	&& !symbol_partitioned_p (vnode))
805       next_nodes.safe_push (vnode);
806 
807   /* Output remaining ordered symbols.  */
808   while (varpool_pos < n_varpool_nodes)
809     next_nodes.safe_push (varpool_order[varpool_pos++]);
810   while (noreorder_pos < (int)noreorder.length ())
811     next_nodes.safe_push (noreorder[noreorder_pos++]);
812   /* For one partition the cost of boundary should be 0 unless we added final
813      symbols here (these are not accounted) or we have accounting bug.  */
814   gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost || best_cost == -1);
815   add_sorted_nodes (next_nodes, partition);
816 
817   if (dump_file)
818     {
819       fprintf (dump_file, "\nPartition sizes:\n");
820       unsigned partitions = ltrans_partitions.length ();
821 
822       for (unsigned i = 0; i < partitions ; i++)
823 	{
824 	  ltrans_partition p = ltrans_partitions[i];
825 	  fprintf (dump_file, "partition %d contains %d (%2.2f%%)"
826 		   " symbols and %d (%2.2f%%) insns\n", i, p->symbols,
827 		   100.0 * p->symbols / order.length (), p->insns,
828 		   100.0 * p->insns / original_total_size);
829 	}
830 
831       fprintf (dump_file, "\n");
832     }
833 }
834 
835 /* Return true if we must not change the name of the NODE.  The name as
836    extracted from the corresponding decl should be passed in NAME.  */
837 
838 static bool
must_not_rename(symtab_node * node,const char * name)839 must_not_rename (symtab_node *node, const char *name)
840 {
841   /* Our renaming machinery do not handle more than one change of assembler name.
842      We should not need more than one anyway.  */
843   if (node->lto_file_data
844       && lto_get_decl_name_mapping (node->lto_file_data, name) != name)
845     {
846       if (dump_file)
847 	fprintf (dump_file,
848 		 "Not privatizing symbol name: %s. It privatized already.\n",
849 		 name);
850       return true;
851     }
852   /* Avoid mangling of already mangled clones.
853      ???  should have a flag whether a symbol has a 'private' name already,
854      since we produce some symbols like that i.e. for global constructors
855      that are not really clones.
856      ???  it is what unique_name means.  We only need to set it when doing
857      private symbols.  */
858   if (node->unique_name)
859     {
860       if (dump_file)
861 	fprintf (dump_file,
862 		 "Not privatizing symbol name: %s. Has unique name.\n",
863 		 name);
864       return true;
865     }
866   return false;
867 }
868 
869 /* If we are an offload compiler, we may have to rewrite symbols to be
870    valid on this target.  Return either PTR or a modified version of it.  */
871 
872 static const char *
maybe_rewrite_identifier(const char * ptr)873 maybe_rewrite_identifier (const char *ptr)
874 {
875 #if defined ACCEL_COMPILER && (defined NO_DOT_IN_LABEL || defined NO_DOLLAR_IN_LABEL)
876 #ifndef NO_DOT_IN_LABEL
877   char valid = '.';
878   const char reject[] = "$";
879 #elif !defined NO_DOLLAR_IN_LABEL
880   char valid = '$';
881   const char reject[] = ".";
882 #else
883   char valid = '_';
884   const char reject[] = ".$";
885 #endif
886 
887   char *copy = NULL;
888   const char *match = ptr;
889   for (;;)
890     {
891       size_t off = strcspn (match, reject);
892       if (match[off] == '\0')
893 	break;
894       if (copy == NULL)
895 	{
896 	  copy = xstrdup (ptr);
897 	  match = copy;
898 	}
899       copy[off] = valid;
900     }
901   if (copy)
902     {
903       match = IDENTIFIER_POINTER (get_identifier (copy));
904       free (copy);
905     }
906   return match;
907 #else
908   return ptr;
909 #endif
910 }
911 
912 /* Ensure that the symbol in NODE is valid for the target, and if not,
913    rewrite it.  */
914 
915 static void
validize_symbol_for_target(symtab_node * node)916 validize_symbol_for_target (symtab_node *node)
917 {
918   tree decl = node->decl;
919   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
920 
921   if (must_not_rename (node, name))
922     return;
923 
924   const char *name2 = maybe_rewrite_identifier (name);
925   if (name2 != name)
926     {
927       symtab->change_decl_assembler_name (decl, get_identifier (name2));
928       if (node->lto_file_data)
929 	lto_record_renamed_decl (node->lto_file_data, name, name2);
930     }
931 }
932 
933 /* Maps symbol names to unique lto clone counters.  */
934 static hash_map<const char *, unsigned> *lto_clone_numbers;
935 
936 /* Helper for privatize_symbol_name.  Mangle NODE symbol name
937    represented by DECL.  */
938 
939 static bool
privatize_symbol_name_1(symtab_node * node,tree decl)940 privatize_symbol_name_1 (symtab_node *node, tree decl)
941 {
942   const char *name0 = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
943 
944   if (must_not_rename (node, name0))
945     return false;
946 
947   const char *name = maybe_rewrite_identifier (name0);
948   unsigned &clone_number = lto_clone_numbers->get_or_insert (name);
949   symtab->change_decl_assembler_name (decl,
950 				      clone_function_name (
951 					  name, "lto_priv", clone_number));
952   clone_number++;
953 
954   if (node->lto_file_data)
955     lto_record_renamed_decl (node->lto_file_data, name0,
956 			     IDENTIFIER_POINTER
957 			     (DECL_ASSEMBLER_NAME (decl)));
958 
959   if (dump_file)
960     fprintf (dump_file,
961 	     "Privatizing symbol name: %s -> %s\n",
962 	     name, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
963 
964   return true;
965 }
966 
967 /* Mangle NODE symbol name into a local name.
968    This is necessary to do
969    1) if two or more static vars of same assembler name
970       are merged into single ltrans unit.
971    2) if previously static var was promoted hidden to avoid possible conflict
972       with symbols defined out of the LTO world.  */
973 
974 static bool
privatize_symbol_name(symtab_node * node)975 privatize_symbol_name (symtab_node *node)
976 {
977   if (!privatize_symbol_name_1 (node, node->decl))
978     return false;
979 
980   return true;
981 }
982 
983 /* Promote variable VNODE to be static.  */
984 
985 static void
promote_symbol(symtab_node * node)986 promote_symbol (symtab_node *node)
987 {
988   /* We already promoted ... */
989   if (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
990       && DECL_VISIBILITY_SPECIFIED (node->decl)
991       && TREE_PUBLIC (node->decl))
992     {
993       validize_symbol_for_target (node);
994       return;
995     }
996 
997   gcc_checking_assert (!TREE_PUBLIC (node->decl)
998 		       && !DECL_EXTERNAL (node->decl));
999   /* Be sure that newly public symbol does not conflict with anything already
1000      defined by the non-LTO part.  */
1001   privatize_symbol_name (node);
1002   TREE_PUBLIC (node->decl) = 1;
1003   /* After privatization the node should not conflict with any other symbol,
1004      so it is prevailing.  This is important to keep binds_to_current_def_p
1005      to work across partitions.  */
1006   node->resolution = LDPR_PREVAILING_DEF_IRONLY;
1007   node->semantic_interposition = false;
1008   DECL_VISIBILITY (node->decl) = VISIBILITY_HIDDEN;
1009   DECL_VISIBILITY_SPECIFIED (node->decl) = true;
1010   if (dump_file)
1011     fprintf (dump_file,
1012 	     "Promoting as hidden: %s (%s)\n", node->dump_name (),
1013 	     IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1014 
1015   /* Promoting a symbol also promotes all transparent aliases with exception
1016      of weakref where the visibility flags are always wrong and set to
1017      !PUBLIC.  */
1018   ipa_ref *ref;
1019   for (unsigned i = 0; node->iterate_direct_aliases (i, ref); i++)
1020     {
1021       struct symtab_node *alias = ref->referring;
1022       if (alias->transparent_alias && !alias->weakref)
1023 	{
1024 	  TREE_PUBLIC (alias->decl) = 1;
1025 	  DECL_VISIBILITY (alias->decl) = VISIBILITY_HIDDEN;
1026 	  DECL_VISIBILITY_SPECIFIED (alias->decl) = true;
1027 	  if (dump_file)
1028 	    fprintf (dump_file,
1029 		     "Promoting alias as hidden: %s\n",
1030 		     IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1031 	}
1032       gcc_assert (!alias->weakref || TREE_PUBLIC (alias->decl));
1033     }
1034 }
1035 
1036 /* Return true if NODE needs named section even if it won't land in
1037    the partition symbol table.
1038 
1039    FIXME: we should really not use named sections for inline clones
1040    and master clones.  */
1041 
1042 static bool
may_need_named_section_p(lto_symtab_encoder_t encoder,symtab_node * node)1043 may_need_named_section_p (lto_symtab_encoder_t encoder, symtab_node *node)
1044 {
1045   struct cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
1046   if (!cnode)
1047     return false;
1048   if (node->real_symbol_p ())
1049     return false;
1050   return (!encoder
1051 	  || (lto_symtab_encoder_lookup (encoder, node) != LCC_NOT_FOUND
1052               && lto_symtab_encoder_encode_body_p (encoder,
1053 				                   cnode)));
1054 }
1055 
1056 /* If NODE represents a static variable.  See if there are other variables
1057    of the same name in partition ENCODER (or in whole compilation unit if
1058    ENCODER is NULL) and if so, mangle the statics.  Always mangle all
1059    conflicting statics, so we reduce changes of silently miscompiling
1060    asm statements referring to them by symbol name.  */
1061 
1062 static void
rename_statics(lto_symtab_encoder_t encoder,symtab_node * node)1063 rename_statics (lto_symtab_encoder_t encoder, symtab_node *node)
1064 {
1065   tree decl = node->decl;
1066   symtab_node *s;
1067   tree name = DECL_ASSEMBLER_NAME (decl);
1068 
1069   /* See if this is static symbol. */
1070   if (((node->externally_visible && !node->weakref)
1071       /* FIXME: externally_visible is somewhat illogically not set for
1072 	 external symbols (i.e. those not defined).  Remove this test
1073 	 once this is fixed.  */
1074         || DECL_EXTERNAL (node->decl)
1075 	|| !node->real_symbol_p ())
1076        && !may_need_named_section_p (encoder, node))
1077     return;
1078 
1079   /* Now walk symbols sharing the same name and see if there are any conflicts.
1080      (all types of symbols counts here, since we cannot have static of the
1081      same name as external or public symbol.)  */
1082   for (s = symtab_node::get_for_asmname (name);
1083        s; s = s->next_sharing_asm_name)
1084     if ((s->real_symbol_p () || may_need_named_section_p (encoder, s))
1085 	&& s->decl != node->decl
1086 	&& (!encoder
1087 	    || lto_symtab_encoder_lookup (encoder, s) != LCC_NOT_FOUND))
1088        break;
1089 
1090   /* OK, no confict, so we have nothing to do.  */
1091   if (!s)
1092     return;
1093 
1094   if (dump_file)
1095     fprintf (dump_file,
1096 	    "Renaming statics with asm name: %s\n", node->dump_name ());
1097 
1098   /* Assign every symbol in the set that shares the same ASM name an unique
1099      mangled name.  */
1100   for (s = symtab_node::get_for_asmname (name); s;)
1101     if ((!s->externally_visible || s->weakref)
1102 	/* Transparent aliases having same name as target are renamed at a
1103 	   time their target gets new name.  Transparent aliases that use
1104 	   separate assembler name require the name to be unique.  */
1105 	&& (!s->transparent_alias || !s->definition || s->weakref
1106 	    || !symbol_table::assembler_names_equal_p
1107 		 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (s->decl)),
1108 		  IDENTIFIER_POINTER
1109 		    (DECL_ASSEMBLER_NAME (s->get_alias_target()->decl))))
1110 	&& ((s->real_symbol_p ()
1111              && !DECL_EXTERNAL (s->decl)
1112 	     && !TREE_PUBLIC (s->decl))
1113  	    || may_need_named_section_p (encoder, s))
1114 	&& (!encoder
1115 	    || lto_symtab_encoder_lookup (encoder, s) != LCC_NOT_FOUND))
1116       {
1117         if (privatize_symbol_name (s))
1118 	  /* Re-start from beginning since we do not know how many
1119 	     symbols changed a name.  */
1120 	  s = symtab_node::get_for_asmname (name);
1121         else s = s->next_sharing_asm_name;
1122       }
1123     else s = s->next_sharing_asm_name;
1124 }
1125 
1126 /* Find out all static decls that need to be promoted to global because
1127    of cross file sharing.  This function must be run in the WPA mode after
1128    all inlinees are added.  */
1129 
1130 void
lto_promote_cross_file_statics(void)1131 lto_promote_cross_file_statics (void)
1132 {
1133   unsigned i, n_sets;
1134 
1135   gcc_assert (flag_wpa);
1136 
1137   lto_stream_offload_p = false;
1138   select_what_to_stream ();
1139 
1140   /* First compute boundaries.  */
1141   n_sets = ltrans_partitions.length ();
1142   for (i = 0; i < n_sets; i++)
1143     {
1144       ltrans_partition part
1145 	= ltrans_partitions[i];
1146       part->encoder = compute_ltrans_boundary (part->encoder);
1147     }
1148 
1149   lto_clone_numbers = new hash_map<const char *, unsigned>;
1150 
1151   /* Look at boundaries and promote symbols as needed.  */
1152   for (i = 0; i < n_sets; i++)
1153     {
1154       lto_symtab_encoder_iterator lsei;
1155       lto_symtab_encoder_t encoder = ltrans_partitions[i]->encoder;
1156 
1157       for (lsei = lsei_start (encoder); !lsei_end_p (lsei);
1158 	   lsei_next (&lsei))
1159         {
1160           symtab_node *node = lsei_node (lsei);
1161 
1162 	  /* If symbol is static, rename it if its assembler name
1163 	     clashes with anything else in this unit.  */
1164 	  rename_statics (encoder, node);
1165 
1166 	  /* No need to promote if symbol already is externally visible ... */
1167 	  if (node->externally_visible
1168  	      /* ... or if it is part of current partition ... */
1169 	      || lto_symtab_encoder_in_partition_p (encoder, node)
1170 	      /* ... or if we do not partition it. This mean that it will
1171 		 appear in every partition referencing it.  */
1172 	      || node->get_partitioning_class () != SYMBOL_PARTITION)
1173 	    {
1174 	      validize_symbol_for_target (node);
1175 	      continue;
1176 	    }
1177 
1178           promote_symbol (node);
1179         }
1180     }
1181   delete lto_clone_numbers;
1182 }
1183 
1184 /* Rename statics in the whole unit in the case that
1185    we do -flto-partition=none.  */
1186 
1187 void
lto_promote_statics_nonwpa(void)1188 lto_promote_statics_nonwpa (void)
1189 {
1190   symtab_node *node;
1191 
1192   lto_clone_numbers = new hash_map<const char *, unsigned>;
1193   FOR_EACH_SYMBOL (node)
1194     {
1195       rename_statics (NULL, node);
1196       validize_symbol_for_target (node);
1197     }
1198   delete lto_clone_numbers;
1199 }
1200