xref: /netbsd-src/external/gpl3/binutils/dist/bfd/merge.c (revision cb63e24e8d6aae7ddac1859a9015f48b1d8bd90e)
1 /* SEC_MERGE support.
2    Copyright (C) 2001-2024 Free Software Foundation, Inc.
3    Written by Jakub Jelinek <jakub@redhat.com>.
4 
5    This file is part of BFD, the Binary File Descriptor library.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20    MA 02110-1301, USA.  */
21 
22 
23 /* This file contains support for merging duplicate entities within sections,
24    as used in ELF SHF_MERGE.  */
25 
26 #include "sysdep.h"
27 #include <limits.h>
28 #include "bfd.h"
29 #include "elf-bfd.h"
30 #include "libbfd.h"
31 #include "objalloc.h"
32 #include "libiberty.h"
33 
34 /* We partition all mergable input sections into sets of similar
35    characteristics.  These sets are the unit of merging.  All content
36    of the input sections is scanned and inserted into a hash table.
37    We also remember an input-offset to entry mapping per input section, but
38    the content itself is removed.  After everything is read in we assign
39    output offsets to all hash entries, and when relocations are processed we
40    lookup the given input offset per input-section, get the matching entry
41    and its output offset (possibly adjusted for offset pointing into the
42    middle of an entry).
43 
44    The input-offset-to-entry mapping (in map_ofs/map) is sorted, so in principle
45    we could binary search it, but that's not cache-friendly and it's faster
46    to add another lookup structure that gets us very near the correct
47    entry in just one step (that's what ofstolowbound is for) and do a linear
48    search from there.  */
49 
50 /* An entry in the section merge hash table.  */
51 
52 struct sec_merge_hash_entry
53 {
54   /* Length of this entry.  This includes the zero terminator.  */
55   unsigned int len;
56   /* Start of this string needs to be aligned to
57      alignment octets (not 1 << align).  */
58   unsigned int alignment;
59   union
60   {
61     /* Index within the merged section.  */
62     bfd_size_type index;
63     /* Entry this is a suffix of (if alignment is 0).  */
64     struct sec_merge_hash_entry *suffix;
65   } u;
66   /* Next entity in the hash table (in order of entering).  */
67   struct sec_merge_hash_entry *next;
68   char str[1];
69 };
70 
71 /* The section merge hash table.  */
72 
73 struct sec_merge_hash
74 {
75   struct bfd_hash_table table;
76   /* Next available index.  */
77   bfd_size_type size;
78   /* First entity in the SEC_MERGE sections of this type.  */
79   struct sec_merge_hash_entry *first;
80   /* Last entity in the SEC_MERGE sections of this type.  */
81   struct sec_merge_hash_entry *last;
82   /* Entity size.  */
83   unsigned int entsize;
84   /* Are entries fixed size or zero terminated strings?  */
85   bool strings;
86   /* struct-of-array variant of all entries in the hash-table: */
87   unsigned int nbuckets;
88   /* We keep hash-code and length of entry together in a separate
89      array in such a way that it can be checked with just a single memory
90      reference.  In this way we don't need indirect access to the entries
91      in the normal case.  keys_lens[i] is 'hashcode << 32) | len' for entry
92      i (which is pointed to be values[i]).  */
93   uint64_t *key_lens;
94   struct sec_merge_hash_entry **values;
95 };
96 
97 /* True when given NEWCOUNT and NBUCKETS indicate that the hash table needs
98    resizing.  */
99 #define NEEDS_RESIZE(newcount, nbuckets) ((newcount) > (nbuckets) / 3 * 2)
100 
101 struct sec_merge_sec_info;
102 
103 /* Information per merged blob.  This is the unit of merging and is
104    related to (multiple) input sections of similar characteristics
105    (alignment, entity size, strings or blobs).  */
106 struct sec_merge_info
107 {
108   /* Chain of sec_merge_infos.  */
109   struct sec_merge_info *next;
110   /* Chain of sec_merge_sec_infos.  This first one will be the representative
111      section that conceptually collects all merged content.  */
112   struct sec_merge_sec_info *chain;
113   struct sec_merge_sec_info **last;
114   /* A hash table used to hold section content.  */
115   struct sec_merge_hash *htab;
116 };
117 
118 /* Offset into input mergable sections are represented by this type.
119    Note how doesn't support crazy large mergable sections.  */
120 typedef uint32_t mapofs_type;
121 
122 /* Given a sec_merge_sec_info S this gives the input offset of the IDX's
123    recorded entry.  */
124 #define MAP_OFS(S,IDX) (S)->map_ofs[IDX]
125 /* And this gives the output offset (in the merged blob representing
126    this S.  */
127 #define MAP_IDX(S,IDX) (S)->map[IDX].idx
128 /* For quick lookup of output offset given an input offset we store
129    an array mapping intput-offset / OFSDIV to entry index.
130    16 is better than 8, 32 is roughly same as 16, but uses less memory, so
131    we use that. */
132 #define OFSDIV 32
133 
134 /* Information per input merge section.  */
135 struct sec_merge_sec_info
136 {
137   /* Chain of sec_merge_sec_infos.  */
138   struct sec_merge_sec_info *next;
139   /* The corresponding section.  */
140   asection *sec;
141   /* Pointer to merge_info pointing to us.  */
142   void **psecinfo;
143   /* The merge entity this is a part of.  */
144   struct sec_merge_info *sinfo;
145   /* The section associated with sinfo (i.e. the representative section).
146      Same as sinfo->chain->sec, but faster to access in the hot function.  */
147   asection *reprsec;
148   /* First string in this section.  */
149   struct sec_merge_hash_entry *first_str;
150   /* Sparse mapping from input offset to entry covering that offset:  */
151   unsigned int noffsetmap;  /* Number of these mappings.  */
152   mapofs_type *map_ofs;     /* Input offset.  */
153   union {
154       struct sec_merge_hash_entry *entry;  /* Covering hash entry ... */
155       bfd_size_type idx;                   /* ... or destination offset.  */
156   } *map;
157   /* Quick access: index into map_ofs[].  ofstolowbound[o / OFSDIV]=I is
158      such that map_ofs[I] is the smallest offset higher that
159      rounddown(o, OFSDIV) (and hence I-1 is the largest entry whose offset is
160      smaller or equal to o/OFSDIV*OFSDIV).  */
161   unsigned int *ofstolowbound;
162   int fast_state;
163 };
164 
165 
166 /* Given a merge hash table TABLE and a number of entries to be
167    ADDED, possibly resize the table for this to fit without further
168    resizing.  */
169 
170 static bool
sec_merge_maybe_resize(struct sec_merge_hash * table,unsigned added)171 sec_merge_maybe_resize (struct sec_merge_hash *table, unsigned added)
172 {
173   struct bfd_hash_table *bfdtab = &table->table;
174   if (NEEDS_RESIZE (bfdtab->count + added, table->nbuckets))
175     {
176       unsigned i;
177       unsigned long newnb = table->nbuckets * 2;
178       struct sec_merge_hash_entry **newv;
179       uint64_t *newl;
180       unsigned long alloc;
181 
182       while (NEEDS_RESIZE (bfdtab->count + added, newnb))
183 	{
184 	  newnb *= 2;
185 	  if (!newnb)
186 	    return false;
187 	}
188 
189       alloc = newnb * sizeof (newl[0]);
190       if (alloc / sizeof (newl[0]) != newnb)
191 	return false;
192       newl = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
193       if (newl == NULL)
194 	return false;
195       memset (newl, 0, alloc);
196       alloc = newnb * sizeof (newv[0]);
197       if (alloc / sizeof (newv[0]) != newnb)
198 	return false;
199       newv = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
200       if (newv == NULL)
201 	return false;
202       memset (newv, 0, alloc);
203 
204       for (i = 0; i < table->nbuckets; i++)
205 	{
206 	  struct sec_merge_hash_entry *v = table->values[i];
207 	  if (v)
208 	    {
209 	      uint32_t thishash = table->key_lens[i] >> 32;
210 	      unsigned idx = thishash & (newnb - 1);
211 	      while (newv[idx])
212 		idx = (idx + 1) & (newnb - 1);
213 	      newl[idx] = table->key_lens[i];
214 	      newv[idx] = v;
215 	    }
216 	}
217 
218       table->key_lens = newl;
219       table->values = newv;
220       table->nbuckets = newnb;
221     }
222   return true;
223 }
224 
225 /* Insert STRING (actually a byte blob of length LEN, with pre-computed
226    HASH and bucket _INDEX) into our hash TABLE.  */
227 
228 static struct sec_merge_hash_entry *
sec_merge_hash_insert(struct sec_merge_hash * table,const char * string,uint64_t hash,unsigned int len,unsigned int _index)229 sec_merge_hash_insert (struct sec_merge_hash *table,
230 		 const char *string,
231 		 uint64_t hash, unsigned int len, unsigned int _index)
232 {
233   struct bfd_hash_table *bfdtab = &table->table;
234   struct sec_merge_hash_entry *hashp;
235 
236   hashp = (struct sec_merge_hash_entry *)
237       bfd_hash_allocate (bfdtab, len + sizeof (struct sec_merge_hash_entry));
238   if (hashp == NULL)
239     return NULL;
240 
241   memcpy (hashp->str, string, len);
242   hashp->len = len;
243   hashp->alignment = 0;
244   hashp->u.suffix = NULL;
245   hashp->next = NULL;
246   // We must not need resizing, otherwise the estimation was wrong
247   BFD_ASSERT (!NEEDS_RESIZE (bfdtab->count + 1, table->nbuckets));
248   bfdtab->count++;
249   table->key_lens[_index] = (hash << 32) | (uint32_t)len;
250   table->values[_index] = hashp;
251 
252   return hashp;
253 }
254 
255 /* Read four bytes from *STR, interpret it as 32bit unsigned little
256    endian value and return that.  */
257 
258 static inline uint32_t
hash_read32(const char * str)259 hash_read32 (const char *str)
260 {
261   uint32_t i;
262   /* All reasonable compilers will inline this memcpy and generate optimal
263      code on architectures that support unaligned (4-byte) accesses.  */
264   memcpy(&i, str, 4);
265 #ifdef WORDS_BIGENDIAN
266   i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24);
267 #endif
268   return i;
269 }
270 
271 /* Calculate and return a hashvalue of the bytes at STR[0..LEN-1].
272    All non-zero lengths and all alignments are supported.
273 
274    This is somewhat similar to xxh3 (of xxhash), but restricted to 32bit.
275    On cc1 strings this has quite similar statistic properties, and we
276    don't need to jump through hoops to get fast 64x64->128 mults,
277    or 64bit arith on 32 bit hosts.  We also don't care for seeds
278    or secrets.  They improve mixing very little.  */
279 
280 static uint32_t
hash_blob(const char * str,unsigned int len)281 hash_blob (const char *str, unsigned int len)
282 {
283   uint32_t ret = 0;
284   uint32_t mul = (1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5) + (1 << 7);
285   mul += (1 << 11) + (1 << 13) + (1 << 17) + (0 << 19) + (1 << 23) + (1 << 29);
286   mul += (1u << 31);
287   if (len >= 8)
288     {
289       uint32_t acc = len * 0x9e3779b1;
290       while (len >= 8)
291 	{
292 	  uint32_t i1 = hash_read32  (str) ^ (0x396cfeb8 + 1*len);
293 	  uint32_t i2 = hash_read32  (str + 4) ^ (0xbe4ba423 + 1*len);
294 	  str += 8;
295 	  len -= 8;
296 	  uint64_t m = (uint64_t)i1 * i2;
297 	  acc += (uint32_t)m ^ (uint32_t)(m >> 32);
298 	}
299       acc = acc ^ (acc >> 7);
300       uint64_t r = (uint64_t)mul * acc;
301       ret = (uint32_t)r ^ (uint32_t)(r >> 32);
302       if (len == 0)
303 	goto end;
304     }
305   if (len >= 4)
306     {
307       uint32_t i1 = hash_read32  (str);
308       uint32_t i2 = hash_read32  (str + len - 4);
309       i1 = ((i1 + len) ^ (i1 >> 7));
310       i2 = i2 ^ (i2 >> 7);
311       uint64_t r = (uint64_t)mul * i1 + i2;
312       ret += r ^ (r >> 32);
313     }
314   else
315     {
316       /* Cleverly read in 1 to 3 bytes without further conditionals.  */
317       unsigned char c1 = str[0];
318       unsigned char c2 = str[len >> 1];
319       unsigned char c3 = str[len - 1];
320       uint32_t i1 = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24)
321 		     | ((uint32_t) c3) | (len << 8);
322       i1 = i1 ^ (i1 >> 7);
323       uint64_t r = (uint64_t)mul * i1;
324       ret += r ^ (r >> 32);
325     }
326 end:
327   return ret;
328 }
329 
330 /* Given a hash TABLE, return the hash of STRING (a blob described
331    according to info in TABLE, either a character string, or some fixed
332    size entity) and set *PLEN to the length of this blob.  */
333 
334 static uint32_t
hashit(struct sec_merge_hash * table,const char * string,unsigned int * plen)335 hashit (struct sec_merge_hash *table, const char *string, unsigned int *plen)
336 {
337   const unsigned char *s;
338   uint32_t hash;
339   unsigned int len, i;
340 
341   s = (const unsigned char *) string;
342   if (table->strings)
343     {
344       if (table->entsize == 1)
345 	len = strlen (string) + 1;
346       else
347 	{
348 	  len = 0;
349 	  for (;;)
350 	    {
351 	      for (i = 0; i < table->entsize; ++i)
352 		if (s[i] != '\0')
353 		  break;
354 	      if (i == table->entsize)
355 		break;
356 	      s += table->entsize;
357 	      ++len;
358 	    }
359 	  len *= table->entsize;
360 	  len += table->entsize;
361 	}
362     }
363   else
364     len = table->entsize;
365   hash = hash_blob (string, len);
366   *plen = len;
367   return hash;
368 }
369 
370 /* Lookup or insert a blob STRING (of length LEN, precomputed HASH and
371    input ALIGNMENT) into TABLE.  Return the found or new hash table entry.  */
372 
373 static struct sec_merge_hash_entry *
sec_merge_hash_lookup(struct sec_merge_hash * table,const char * string,unsigned int len,uint64_t hash,unsigned int alignment)374 sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
375 		       unsigned int len, uint64_t hash,
376 		       unsigned int alignment)
377 {
378   struct sec_merge_hash_entry *hashp;
379   unsigned int _index;
380 
381   /*printf ("YYY insert 0x%x into %u buckets (%s)\n",
382 	  (unsigned)hash, (unsigned)table->nbuckets, string);*/
383   uint64_t *key_lens = table->key_lens;
384   struct sec_merge_hash_entry **values = table->values;
385   uint64_t hlen = (hash << 32) | (uint32_t)len;
386   unsigned int nbuckets = table->nbuckets;
387   _index = hash & (nbuckets - 1);
388   while (1)
389     {
390       uint64_t candlen = key_lens[_index];
391       if (candlen == hlen
392 	  && !memcmp (values[_index]->str, string, len))
393 	{
394 	  hashp = values[_index];
395 	  if (hashp->alignment < alignment)
396 	    hashp->alignment = alignment;
397 	  return hashp;
398 	}
399       if (!(candlen & (uint32_t)-1))
400 	break;
401       _index = (_index + 1) & (nbuckets - 1);
402     }
403 
404   hashp = sec_merge_hash_insert (table, string, hash, len, _index);
405   if (hashp == NULL)
406     return NULL;
407   hashp->alignment = alignment;
408 
409   table->size++;
410   BFD_ASSERT (table->size == table->table.count);
411   if (table->first == NULL)
412     table->first = hashp;
413   else
414     table->last->next = hashp;
415   table->last = hashp;
416 
417   return hashp;
418 }
419 
420 /* Create a new hash table.  */
421 
422 static struct sec_merge_hash *
sec_merge_init(unsigned int entsize,bool strings)423 sec_merge_init (unsigned int entsize, bool strings)
424 {
425   struct sec_merge_hash *table;
426 
427   table = (struct sec_merge_hash *) bfd_malloc (sizeof (struct sec_merge_hash));
428   if (table == NULL)
429     return NULL;
430 
431   if (! bfd_hash_table_init_n (&table->table, NULL,
432 			       sizeof (struct sec_merge_hash_entry), 0x2000))
433     {
434       free (table);
435       return NULL;
436     }
437 
438   table->size = 0;
439   table->first = NULL;
440   table->last = NULL;
441   table->entsize = entsize;
442   table->strings = strings;
443 
444   table->nbuckets = 0x2000;
445   table->key_lens = objalloc_alloc ((struct objalloc *) table->table.memory,
446 				table->nbuckets * sizeof (table->key_lens[0]));
447   memset (table->key_lens, 0, table->nbuckets * sizeof (table->key_lens[0]));
448   table->values = objalloc_alloc ((struct objalloc *) table->table.memory,
449 				table->nbuckets * sizeof (table->values[0]));
450   memset (table->values, 0, table->nbuckets * sizeof (table->values[0]));
451 
452   return table;
453 }
454 
455 /* Append the tuple of input-offset O corresponding
456    to hash table ENTRY into SECINFO, such that we later may lookup the
457    entry just by O.  */
458 
459 static bool
append_offsetmap(struct sec_merge_sec_info * secinfo,mapofs_type o,struct sec_merge_hash_entry * entry)460 append_offsetmap (struct sec_merge_sec_info *secinfo,
461 		  mapofs_type o,
462 		  struct sec_merge_hash_entry *entry)
463 {
464   if ((secinfo->noffsetmap & 2047) == 0)
465     {
466       bfd_size_type amt;
467       amt = (secinfo->noffsetmap + 2048);
468       secinfo->map_ofs = bfd_realloc (secinfo->map_ofs,
469 				      amt * sizeof(secinfo->map_ofs[0]));
470       if (!secinfo->map_ofs)
471 	return false;
472       secinfo->map = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
473       if (!secinfo->map)
474 	return false;
475     }
476   unsigned int i = secinfo->noffsetmap++;
477   MAP_OFS(secinfo, i) = o;
478   secinfo->map[i].entry = entry;
479   return true;
480 }
481 
482 /* Prepare the input-offset-to-entry tables after output offsets are
483    determined.  */
484 
485 static void
prepare_offsetmap(struct sec_merge_sec_info * secinfo)486 prepare_offsetmap (struct sec_merge_sec_info *secinfo)
487 {
488   unsigned int noffsetmap = secinfo->noffsetmap;
489   unsigned int i, lbi;
490   bfd_size_type l, sz, amt;
491 
492   secinfo->fast_state = 1;
493 
494   for (i = 0; i < noffsetmap; i++)
495     MAP_IDX(secinfo, i) = secinfo->map[i].entry->u.index;
496 
497   sz = secinfo->sec->rawsize;
498   amt = (sz / OFSDIV + 1) * sizeof (secinfo->ofstolowbound[0]);
499   secinfo->ofstolowbound = bfd_zmalloc (amt);
500   if (!secinfo->ofstolowbound)
501     return;
502   for (l = lbi = 0; l < sz; l += OFSDIV)
503     {
504       /* No need for bounds checking on lbi, as we've added a sentinel that's
505 	 larger than any offset.  */
506       while (MAP_OFS(secinfo, lbi) <= l)
507 	lbi++;
508       //BFD_ASSERT ((l / OFSDIV) <= (i / OFSDIV));
509       secinfo->ofstolowbound[l / OFSDIV] = lbi;
510     }
511   secinfo->fast_state = 2;
512 }
513 
514 static bool
sec_merge_emit(bfd * abfd,struct sec_merge_sec_info * secinfo,unsigned char * contents)515 sec_merge_emit (bfd *abfd, struct sec_merge_sec_info *secinfo,
516 		unsigned char *contents)
517 {
518   struct sec_merge_hash_entry *entry = secinfo->first_str;
519   asection *sec = secinfo->sec;
520   file_ptr offset = sec->output_offset;
521   char *pad = NULL;
522   bfd_size_type off = 0;
523   unsigned int opb = bfd_octets_per_byte (abfd, sec);
524   int alignment_power = sec->output_section->alignment_power * opb;
525   bfd_size_type pad_len;  /* Octets.  */
526 
527   /* FIXME: If alignment_power is 0 then really we should scan the
528      entry list for the largest required alignment and use that.  */
529   pad_len = alignment_power ? ((bfd_size_type) 1 << alignment_power) : 16;
530 
531   pad = (char *) bfd_zmalloc (pad_len);
532   if (pad == NULL)
533     return false;
534 
535   for (; entry != NULL; entry = entry->next)
536     {
537       const char *str;
538       bfd_size_type len;
539 
540       if (!entry->len)
541 	continue;
542       BFD_ASSERT (entry->alignment);
543       len = -off & (entry->alignment - 1);
544       if (len != 0)
545 	{
546 	  BFD_ASSERT (len <= pad_len);
547 	  if (contents)
548 	    {
549 	      memcpy (contents + offset, pad, len);
550 	      offset += len;
551 	    }
552 	  else if (bfd_write (pad, len, abfd) != len)
553 	    goto err;
554 	  off += len;
555 	}
556 
557       str = entry->str;
558       len = entry->len;
559 
560       if (contents)
561 	{
562 	  memcpy (contents + offset, str, len);
563 	  offset += len;
564 	}
565       else if (bfd_write (str, len, abfd) != len)
566 	goto err;
567 
568       off += len;
569     }
570   BFD_ASSERT (!entry);
571 
572   /* Trailing alignment needed?  */
573   off = sec->size - off;
574   if (1 && off != 0)
575     {
576       BFD_ASSERT (off <= pad_len);
577       if (contents)
578 	memcpy (contents + offset, pad, off);
579       else if (bfd_write (pad, off, abfd) != off)
580 	goto err;
581     }
582 
583   free (pad);
584   return true;
585 
586  err:
587   free (pad);
588   return false;
589 }
590 
591 /* Register a SEC_MERGE section as a candidate for merging.
592    This function is called for all non-dynamic SEC_MERGE input sections.  */
593 
594 bool
_bfd_add_merge_section(bfd * abfd,void ** psinfo,asection * sec,void ** psecinfo)595 _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
596 			void **psecinfo)
597 {
598   struct sec_merge_info *sinfo;
599   struct sec_merge_sec_info *secinfo;
600   asection *repr;
601   unsigned int alignment_power;  /* Octets.  */
602   unsigned int align;            /* Octets.  */
603   unsigned int opb = bfd_octets_per_byte (abfd, sec);
604 
605   if ((abfd->flags & DYNAMIC) != 0
606       || (sec->flags & SEC_MERGE) == 0)
607     abort ();
608 
609   if (sec->size == 0
610       || (sec->flags & SEC_EXCLUDE) != 0
611       || sec->entsize == 0)
612     return true;
613 
614   if (sec->size % sec->entsize != 0)
615     return true;
616 
617   if ((sec->flags & SEC_RELOC) != 0)
618     {
619       /* We aren't prepared to handle relocations in merged sections.  */
620       return true;
621     }
622 
623   if (sec->size > (mapofs_type)-1)
624     {
625       /* Input offsets must be representable by mapofs_type.  */
626       return true;
627     }
628 
629 #ifndef CHAR_BIT
630 #define CHAR_BIT 8
631 #endif
632   alignment_power = sec->alignment_power * opb;
633   if (alignment_power >= sizeof (align) * CHAR_BIT)
634     return true;
635 
636   align = 1u << alignment_power;
637   if ((sec->entsize < align
638        && ((sec->entsize & (sec->entsize - 1))
639 	   || !(sec->flags & SEC_STRINGS)))
640       || (sec->entsize > align
641 	  && (sec->entsize & (align - 1))))
642     {
643       /* Sanity check.  If string character size is smaller than
644 	 alignment, then we require character size to be a power
645 	 of 2, otherwise character size must be integer multiple
646 	 of alignment.  For non-string constants, alignment must
647 	 be smaller than or equal to entity size and entity size
648 	 must be integer multiple of alignment.  */
649       return true;
650     }
651 
652   /* Initialize the descriptor for this input section.  */
653 
654   *psecinfo = secinfo = bfd_zalloc (abfd, sizeof (*secinfo));
655   if (*psecinfo == NULL)
656     goto error_return;
657 
658   secinfo->sec = sec;
659   secinfo->psecinfo = psecinfo;
660 
661   /* Search for a matching output merged section.  */
662   for (sinfo = (struct sec_merge_info *) *psinfo; sinfo; sinfo = sinfo->next)
663     if (sinfo->chain
664 	&& (repr = sinfo->chain->sec)
665 	&& ! ((repr->flags ^ sec->flags) & (SEC_MERGE | SEC_STRINGS))
666 	&& repr->entsize == sec->entsize
667 	&& repr->alignment_power == sec->alignment_power
668 	&& repr->output_section == sec->output_section)
669       break;
670 
671   if (sinfo == NULL)
672     {
673       /* Initialize the information we need to keep track of.  */
674       sinfo = (struct sec_merge_info *)
675 	  bfd_alloc (abfd, sizeof (struct sec_merge_info));
676       if (sinfo == NULL)
677 	goto error_return;
678       sinfo->next = (struct sec_merge_info *) *psinfo;
679       sinfo->chain = NULL;
680       sinfo->last = &sinfo->chain;
681       *psinfo = sinfo;
682       sinfo->htab = sec_merge_init (sec->entsize, (sec->flags & SEC_STRINGS));
683       if (sinfo->htab == NULL)
684 	goto error_return;
685     }
686 
687   *sinfo->last = secinfo;
688   sinfo->last = &secinfo->next;
689 
690   secinfo->sinfo = sinfo;
691   secinfo->reprsec = sinfo->chain->sec;
692 
693   return true;
694 
695  error_return:
696   *psecinfo = NULL;
697   return false;
698 }
699 
700 /* Record one whole input section (described by SECINFO) into the hash table
701    SINFO.  */
702 
703 static bool
record_section(struct sec_merge_info * sinfo,struct sec_merge_sec_info * secinfo)704 record_section (struct sec_merge_info *sinfo,
705 		struct sec_merge_sec_info *secinfo)
706 {
707   asection *sec = secinfo->sec;
708   struct sec_merge_hash_entry *entry;
709   unsigned char *p, *end;
710   bfd_vma mask, eltalign;
711   unsigned int align;
712   bfd_size_type amt;
713   bfd_byte *contents;
714   void *tmpptr;
715 
716   amt = sec->size;
717   if (sec->flags & SEC_STRINGS)
718     /* Some versions of gcc may emit a string without a zero terminator.
719        See http://gcc.gnu.org/ml/gcc-patches/2006-06/msg01004.html
720        Allocate space for an extra zero.  */
721     amt += sec->entsize;
722   contents = bfd_malloc (amt);
723   if (!contents)
724     goto error_return;
725 
726   /* Slurp in all section contents (possibly decompressing it).  */
727   sec->rawsize = sec->size;
728   if (sec->flags & SEC_STRINGS)
729     memset (contents + sec->size, 0, sec->entsize);
730   if (! bfd_get_full_section_contents (sec->owner, sec, &contents))
731     goto error_return;
732 
733   /* Now populate the hash table and offset mapping.  */
734 
735   /* Presize the hash table for what we're going to add.  We overestimate
736      quite a bit, but if it turns out to be too much then other sections
737      merged into this area will make use of that as well.  */
738   if (!sec_merge_maybe_resize (sinfo->htab, 1 + sec->size / 2))
739     {
740       bfd_set_error (bfd_error_no_memory);
741       goto error_return;
742     }
743 
744   /* Walk through the contents, calculate hashes and length of all
745      blobs (strings or fixed-size entries) we find and fill the
746      hash and offset tables.  */
747   align = sec->alignment_power;
748   mask = ((bfd_vma) 1 << align) - 1;
749   end = contents + sec->size;
750   for (p = contents; p < end;)
751     {
752       unsigned len;
753       uint32_t hash = hashit (sinfo->htab, (char*) p, &len);
754       unsigned int ofs = p - contents;
755       eltalign = ofs;
756       eltalign = ((eltalign ^ (eltalign - 1)) + 1) >> 1;
757       if (!eltalign || eltalign > mask)
758 	eltalign = mask + 1;
759       entry = sec_merge_hash_lookup (sinfo->htab, (char *) p, len, hash,
760 				     (unsigned) eltalign);
761       if (! entry)
762 	goto error_return;
763       if (! append_offsetmap (secinfo, ofs, entry))
764 	goto error_return;
765       p += len;
766     }
767 
768   /* Add a sentinel element that's conceptually behind all others.  */
769   append_offsetmap (secinfo, sec->size, NULL);
770   /* But don't count it.  */
771   secinfo->noffsetmap--;
772 
773   free (contents);
774   contents = NULL;
775 
776   /* We allocate the ofsmap arrays in blocks of 2048 elements.
777      In case we have very many small input files/sections,
778      this might waste large amounts of memory, so reallocate these
779      arrays here to their true size.  */
780   amt = secinfo->noffsetmap + 1;
781   tmpptr = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
782   if (tmpptr)
783     secinfo->map = tmpptr;
784   tmpptr = bfd_realloc (secinfo->map_ofs, amt * sizeof(secinfo->map_ofs[0]));
785   if (tmpptr)
786     secinfo->map_ofs = tmpptr;
787 
788   /*printf ("ZZZ %s:%s %u entries\n", sec->owner->filename, sec->name,
789 	  (unsigned)secinfo->noffsetmap);*/
790 
791   return true;
792 
793  error_return:
794   free (contents);
795   contents = NULL;
796   for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
797     *secinfo->psecinfo = NULL;
798   return false;
799 }
800 
801 /* qsort comparison function.  Won't ever return zero as all entries
802    differ, so there is no issue with qsort stability here.  */
803 
804 static int
strrevcmp(const void * a,const void * b)805 strrevcmp (const void *a, const void *b)
806 {
807   struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
808   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
809   unsigned int lenA = A->len;
810   unsigned int lenB = B->len;
811   const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
812   const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
813   int l = lenA < lenB ? lenA : lenB;
814 
815   while (l)
816     {
817       if (*s != *t)
818 	return (int) *s - (int) *t;
819       s--;
820       t--;
821       l--;
822     }
823   return lenA - lenB;
824 }
825 
826 /* Like strrevcmp, but for the case where all strings have the same
827    alignment > entsize.  */
828 
829 static int
strrevcmp_align(const void * a,const void * b)830 strrevcmp_align (const void *a, const void *b)
831 {
832   struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
833   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
834   unsigned int lenA = A->len;
835   unsigned int lenB = B->len;
836   const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
837   const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
838   int l = lenA < lenB ? lenA : lenB;
839   int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
840 
841   if (tail_align != 0)
842     return tail_align;
843 
844   while (l)
845     {
846       if (*s != *t)
847 	return (int) *s - (int) *t;
848       s--;
849       t--;
850       l--;
851     }
852   return lenA - lenB;
853 }
854 
855 static inline int
is_suffix(const struct sec_merge_hash_entry * A,const struct sec_merge_hash_entry * B)856 is_suffix (const struct sec_merge_hash_entry *A,
857 	   const struct sec_merge_hash_entry *B)
858 {
859   if (A->len <= B->len)
860     /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
861        not to be equal by the hash table.  */
862     return 0;
863 
864   return memcmp (A->str + (A->len - B->len),
865 		 B->str, B->len) == 0;
866 }
867 
868 /* This is a helper function for _bfd_merge_sections.  It attempts to
869    merge strings matching suffixes of longer strings.  */
870 static struct sec_merge_sec_info *
merge_strings(struct sec_merge_info * sinfo)871 merge_strings (struct sec_merge_info *sinfo)
872 {
873   struct sec_merge_hash_entry **array, **a, *e;
874   struct sec_merge_sec_info *secinfo;
875   bfd_size_type size, amt;
876   unsigned int alignment = 0;
877 
878   /* Now sort the strings */
879   amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
880   array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
881   if (array == NULL)
882     return NULL;
883 
884   for (e = sinfo->htab->first, a = array; e; e = e->next)
885     if (e->alignment)
886       {
887 	*a++ = e;
888 	/* Adjust the length to not include the zero terminator.  */
889 	e->len -= sinfo->htab->entsize;
890 	if (alignment != e->alignment)
891 	  {
892 	    if (alignment == 0)
893 	      alignment = e->alignment;
894 	    else
895 	      alignment = (unsigned) -1;
896 	  }
897       }
898 
899   sinfo->htab->size = a - array;
900   if (sinfo->htab->size != 0)
901     {
902       qsort (array, (size_t) sinfo->htab->size,
903 	     sizeof (struct sec_merge_hash_entry *),
904 	     (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize
905 	      ? strrevcmp_align : strrevcmp));
906 
907       /* Loop over the sorted array and merge suffixes */
908       e = *--a;
909       e->len += sinfo->htab->entsize;
910       while (--a >= array)
911 	{
912 	  struct sec_merge_hash_entry *cmp = *a;
913 
914 	  cmp->len += sinfo->htab->entsize;
915 	  if (e->alignment >= cmp->alignment
916 	      && !((e->len - cmp->len) & (cmp->alignment - 1))
917 	      && is_suffix (e, cmp))
918 	    {
919 	      cmp->u.suffix = e;
920 	      cmp->alignment = 0;
921 	    }
922 	  else
923 	    e = cmp;
924 	}
925     }
926 
927   free (array);
928 
929   /* Now assign positions to the strings we want to keep.  */
930   size = 0;
931   secinfo = sinfo->chain;
932   for (e = sinfo->htab->first; e; e = e->next)
933     {
934       if (e->alignment)
935 	{
936 	  size = (size + e->alignment - 1) & ~((bfd_vma) e->alignment - 1);
937 	  e->u.index = size;
938 	  size += e->len;
939 	}
940     }
941   secinfo->sec->size = size;
942 
943   /* And now adjust the rest, removing them from the chain (but not hashtable)
944      at the same time.  */
945   for (a = &sinfo->htab->first, e = *a; e; e = e->next)
946     if (e->alignment)
947       a = &e->next;
948     else
949       {
950 	*a = e->next;
951 	if (e->len)
952 	  {
953 	    e->alignment = e->u.suffix->alignment;
954 	    e->u.index = e->u.suffix->u.index + (e->u.suffix->len - e->len);
955 	  }
956       }
957 
958   BFD_ASSERT (!secinfo->first_str);
959   secinfo->first_str = sinfo->htab->first;
960 
961   return secinfo;
962 }
963 
964 /* This function is called once after all SEC_MERGE sections are registered
965    with _bfd_merge_section.  */
966 
967 bool
_bfd_merge_sections(bfd * abfd,struct bfd_link_info * info ATTRIBUTE_UNUSED,void * xsinfo,void (* remove_hook)(bfd *,asection *))968 _bfd_merge_sections (bfd *abfd,
969 		     struct bfd_link_info *info ATTRIBUTE_UNUSED,
970 		     void *xsinfo,
971 		     void (*remove_hook) (bfd *, asection *))
972 {
973   struct sec_merge_info *sinfo;
974 
975   for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
976     {
977       struct sec_merge_sec_info *secinfo;
978       bfd_size_type align;  /* Bytes.  */
979 
980       if (! sinfo->chain)
981 	continue;
982 
983       /* Record the sections into the hash table.  */
984       align = 1;
985       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
986 	if (secinfo->sec->flags & SEC_EXCLUDE)
987 	  {
988 	    *secinfo->psecinfo = NULL;
989 	    if (remove_hook)
990 	      (*remove_hook) (abfd, secinfo->sec);
991 	  }
992 	else
993 	  {
994 	    if (!record_section (sinfo, secinfo))
995 	      return false;
996 	    if (align)
997 	      {
998 		unsigned int opb = bfd_octets_per_byte (abfd, secinfo->sec);
999 
1000 		align = (bfd_size_type) 1 << secinfo->sec->alignment_power;
1001 		if (((secinfo->sec->size / opb) & (align - 1)) != 0)
1002 		  align = 0;
1003 	      }
1004 	  }
1005 
1006       if (sinfo->htab->first == NULL)
1007 	continue;
1008 
1009       if (sinfo->htab->strings)
1010 	{
1011 	  secinfo = merge_strings (sinfo);
1012 	  if (!secinfo)
1013 	    return false;
1014 	}
1015       else
1016 	{
1017 	  struct sec_merge_hash_entry *e = sinfo->htab->first;
1018 	  bfd_size_type size = 0;  /* Octets.  */
1019 
1020 	  /* Things are much simpler for non-strings.
1021 	     Just assign them slots in the section.  */
1022 	  secinfo = sinfo->chain;
1023 	  BFD_ASSERT (!secinfo->first_str);
1024 	  secinfo->first_str = e;
1025 	  for (e = sinfo->htab->first; e; e = e->next)
1026 	    {
1027 	      if (e->alignment)
1028 		{
1029 		  size = (size + e->alignment - 1)
1030 			 & ~((bfd_vma) e->alignment - 1);
1031 		  e->u.index = size;
1032 		  size += e->len;
1033 		}
1034 	    }
1035 	  secinfo->sec->size = size;
1036 	}
1037 
1038       /* If the input sections were padded according to their alignments,
1039 	 then pad the output too.  */
1040       if (align)
1041 	secinfo->sec->size = (secinfo->sec->size + align - 1) & -align;
1042 
1043       /* Finally remove all input sections which have not made it into
1044 	 the hash table at all.  */
1045       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1046 	if (secinfo->first_str == NULL)
1047 	  secinfo->sec->flags |= SEC_EXCLUDE | SEC_KEEP;
1048     }
1049 
1050   return true;
1051 }
1052 
1053 /* Write out the merged section.  */
1054 
1055 bool
_bfd_write_merged_section(bfd * output_bfd,asection * sec,void * psecinfo)1056 _bfd_write_merged_section (bfd *output_bfd, asection *sec, void *psecinfo)
1057 {
1058   struct sec_merge_sec_info *secinfo;
1059   file_ptr pos;
1060   unsigned char *contents;
1061   Elf_Internal_Shdr *hdr;
1062 
1063   secinfo = (struct sec_merge_sec_info *) psecinfo;
1064 
1065   if (!secinfo)
1066     return false;
1067 
1068   if (secinfo->first_str == NULL)
1069     return true;
1070 
1071   /* FIXME: octets_per_byte.  */
1072   hdr = &elf_section_data (sec->output_section)->this_hdr;
1073   if (hdr->sh_offset == (file_ptr) -1)
1074     {
1075       /* We must compress this section.  Write output to the
1076 	 buffer.  */
1077       contents = hdr->contents;
1078       if (contents == NULL)
1079 	abort ();
1080     }
1081   else
1082     {
1083       contents = NULL;
1084       pos = sec->output_section->filepos + sec->output_offset;
1085       if (bfd_seek (output_bfd, pos, SEEK_SET) != 0)
1086 	return false;
1087     }
1088 
1089   BFD_ASSERT (sec == secinfo->sec);
1090   BFD_ASSERT (secinfo == secinfo->sinfo->chain);
1091   if (! sec_merge_emit (output_bfd, secinfo, contents))
1092     return false;
1093 
1094   return true;
1095 }
1096 
1097 /* Adjust an address in the SEC_MERGE section.  Given OFFSET within
1098    *PSEC, this returns the new offset in the adjusted SEC_MERGE
1099    section and writes the new section back into *PSEC.  */
1100 
1101 bfd_vma
_bfd_merged_section_offset(bfd * output_bfd ATTRIBUTE_UNUSED,asection ** psec,void * psecinfo,bfd_vma offset)1102 _bfd_merged_section_offset (bfd *output_bfd ATTRIBUTE_UNUSED, asection **psec,
1103 			    void *psecinfo, bfd_vma offset)
1104 {
1105   struct sec_merge_sec_info *secinfo;
1106   asection *sec = *psec;
1107 
1108   secinfo = (struct sec_merge_sec_info *) psecinfo;
1109 
1110   if (!secinfo)
1111     return offset;
1112 
1113   if (offset >= sec->rawsize)
1114     {
1115       if (offset > sec->rawsize)
1116 	_bfd_error_handler
1117 	  /* xgettext:c-format */
1118 	  (_("%pB: access beyond end of merged section (%" PRId64 ")"),
1119 	   sec->owner, (int64_t) offset);
1120       return secinfo->first_str ? sec->size : 0;
1121     }
1122 
1123   if (secinfo->fast_state != 2)
1124     {
1125       if (!secinfo->fast_state)
1126 	prepare_offsetmap (secinfo);
1127       if (secinfo->fast_state != 2)
1128 	return offset;
1129     }
1130 
1131   long lb = secinfo->ofstolowbound[offset / OFSDIV];
1132   *psec = secinfo->reprsec;
1133 
1134   /* No need for bounds checking on lb, as we've added a sentinel that's
1135      larger than any offset.  */
1136   while (MAP_OFS(secinfo, lb) <= offset)
1137     lb++;
1138   lb--;
1139 
1140   /*printf ("YYY (%s:%s):%u -> (%s):%u\n",
1141 	  sec->owner->filename, sec->name, (unsigned)offset,
1142 	  (*psec)->name, (unsigned)lb);*/
1143   return MAP_IDX(secinfo, lb) + offset - MAP_OFS(secinfo, lb);
1144 }
1145 
1146 /* Tidy up when done.  */
1147 
1148 void
_bfd_merge_sections_free(void * xsinfo)1149 _bfd_merge_sections_free (void *xsinfo)
1150 {
1151   struct sec_merge_info *sinfo;
1152 
1153   for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
1154     {
1155       struct sec_merge_sec_info *secinfo;
1156       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1157 	{
1158 	  free (secinfo->ofstolowbound);
1159 	  free (secinfo->map);
1160 	  free (secinfo->map_ofs);
1161 	}
1162       bfd_hash_table_free (&sinfo->htab->table);
1163       free (sinfo->htab);
1164     }
1165 }
1166