xref: /netbsd-src/external/gpl3/gdb/dist/libctf/ctf-string.c (revision 12989c96ee862c63521a9ead8c44629b7a2ba9b1)
18dffb485Schristos /* CTF string table management.
2*12989c96Schristos    Copyright (C) 2019-2024 Free Software Foundation, Inc.
38dffb485Schristos 
48dffb485Schristos    This file is part of libctf.
58dffb485Schristos 
68dffb485Schristos    libctf is free software; you can redistribute it and/or modify it under
78dffb485Schristos    the terms of the GNU General Public License as published by the Free
88dffb485Schristos    Software Foundation; either version 3, or (at your option) any later
98dffb485Schristos    version.
108dffb485Schristos 
118dffb485Schristos    This program is distributed in the hope that it will be useful, but
128dffb485Schristos    WITHOUT ANY WARRANTY; without even the implied warranty of
138dffb485Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
148dffb485Schristos    See the GNU General Public License for more details.
158dffb485Schristos 
168dffb485Schristos    You should have received a copy of the GNU General Public License
178dffb485Schristos    along with this program; see the file COPYING.  If not see
188dffb485Schristos    <http://www.gnu.org/licenses/>.  */
198dffb485Schristos 
20*12989c96Schristos #include <assert.h>
218dffb485Schristos #include <ctf-impl.h>
228dffb485Schristos #include <string.h>
238dffb485Schristos 
24*12989c96Schristos static ctf_str_atom_t *
25*12989c96Schristos ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
26*12989c96Schristos 			  int flags, uint32_t *ref);
27*12989c96Schristos 
28*12989c96Schristos /* Convert an encoded CTF string name into a pointer to a C string, possibly
29*12989c96Schristos   using an explicit internal provisional strtab rather than the fp-based
30*12989c96Schristos   one.  */
318dffb485Schristos const char *
324b169a6bSchristos ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
338dffb485Schristos {
348dffb485Schristos   ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
358dffb485Schristos 
368dffb485Schristos   if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
378dffb485Schristos     ctsp = strtab;
388dffb485Schristos 
39*12989c96Schristos   /* If this name is in the external strtab, and there is a synthetic
40*12989c96Schristos      strtab, use it in preference.  (This is used to add the set of strings
41*12989c96Schristos      -- symbol names, etc -- the linker knows about before the strtab is
42*12989c96Schristos      written out.)  */
438dffb485Schristos 
448dffb485Schristos   if (CTF_NAME_STID (name) == CTF_STRTAB_1
458dffb485Schristos       && fp->ctf_syn_ext_strtab != NULL)
468dffb485Schristos     return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
478dffb485Schristos 			       (void *) (uintptr_t) name);
488dffb485Schristos 
49*12989c96Schristos   /* If the name is in the internal strtab, and the name offset is beyond
50*12989c96Schristos      the end of the ctsp->cts_len but below the ctf_str_prov_offset, this is
51*12989c96Schristos      a provisional string added by ctf_str_add*() but not yet built into a
52*12989c96Schristos      real strtab: get the value out of the ctf_prov_strtab.  */
538dffb485Schristos 
548dffb485Schristos   if (CTF_NAME_STID (name) == CTF_STRTAB_0
558dffb485Schristos       && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
568dffb485Schristos       return ctf_dynhash_lookup (fp->ctf_prov_strtab,
578dffb485Schristos 				 (void *) (uintptr_t) name);
588dffb485Schristos 
598dffb485Schristos   if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len)
608dffb485Schristos     return (ctsp->cts_strs + CTF_NAME_OFFSET (name));
618dffb485Schristos 
628dffb485Schristos   /* String table not loaded or corrupt offset.  */
638dffb485Schristos   return NULL;
648dffb485Schristos }
658dffb485Schristos 
668dffb485Schristos /* Convert an encoded CTF string name into a pointer to a C string by looking
678dffb485Schristos   up the appropriate string table buffer and then adding the offset.  */
688dffb485Schristos const char *
694b169a6bSchristos ctf_strraw (ctf_dict_t *fp, uint32_t name)
708dffb485Schristos {
718dffb485Schristos   return ctf_strraw_explicit (fp, name, NULL);
728dffb485Schristos }
738dffb485Schristos 
748dffb485Schristos /* Return a guaranteed-non-NULL pointer to the string with the given CTF
758dffb485Schristos    name.  */
768dffb485Schristos const char *
774b169a6bSchristos ctf_strptr (ctf_dict_t *fp, uint32_t name)
788dffb485Schristos {
798dffb485Schristos   const char *s = ctf_strraw (fp, name);
808dffb485Schristos   return (s != NULL ? s : "(?)");
818dffb485Schristos }
828dffb485Schristos 
83*12989c96Schristos /* As above, but return info on what is wrong in more detail.
84*12989c96Schristos    (Used for type lookups.) */
85*12989c96Schristos 
86*12989c96Schristos const char *
87*12989c96Schristos ctf_strptr_validate (ctf_dict_t *fp, uint32_t name)
88*12989c96Schristos {
89*12989c96Schristos   const char *str = ctf_strraw (fp, name);
90*12989c96Schristos 
91*12989c96Schristos   if (str == NULL)
92*12989c96Schristos     {
93*12989c96Schristos       if (CTF_NAME_STID (name) == CTF_STRTAB_1
94*12989c96Schristos 	  && fp->ctf_syn_ext_strtab == NULL
95*12989c96Schristos 	  && fp->ctf_str[CTF_NAME_STID (name)].cts_strs == NULL)
96*12989c96Schristos 	{
97*12989c96Schristos 	  ctf_set_errno (fp, ECTF_STRTAB);
98*12989c96Schristos 	  return NULL;
99*12989c96Schristos 	}
100*12989c96Schristos 
101*12989c96Schristos       ctf_set_errno (fp, ECTF_BADNAME);
102*12989c96Schristos       return NULL;
103*12989c96Schristos     }
104*12989c96Schristos   return str;
105*12989c96Schristos }
106*12989c96Schristos 
1078dffb485Schristos /* Remove all refs to a given atom.  */
1088dffb485Schristos static void
1098dffb485Schristos ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
1108dffb485Schristos {
1118dffb485Schristos   ctf_str_atom_ref_t *ref, *next;
1128dffb485Schristos 
1138dffb485Schristos   for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next)
1148dffb485Schristos     {
1158dffb485Schristos       next = ctf_list_next (ref);
1168dffb485Schristos       ctf_list_delete (&atom->csa_refs, ref);
117*12989c96Schristos       if (atom->csa_flags & CTF_STR_ATOM_MOVABLE)
118*12989c96Schristos 	{
119*12989c96Schristos 	  ctf_str_atom_ref_movable_t *movref;
120*12989c96Schristos 	  movref = (ctf_str_atom_ref_movable_t *) ref;
121*12989c96Schristos 	  ctf_dynhash_remove (movref->caf_movable_refs, ref);
122*12989c96Schristos 	}
123*12989c96Schristos 
1248dffb485Schristos       free (ref);
1258dffb485Schristos     }
1268dffb485Schristos }
1278dffb485Schristos 
128*12989c96Schristos /* Free an atom.  */
1298dffb485Schristos static void
1308dffb485Schristos ctf_str_free_atom (void *a)
1318dffb485Schristos {
1328dffb485Schristos   ctf_str_atom_t *atom = a;
1338dffb485Schristos 
1348dffb485Schristos   ctf_str_purge_atom_refs (atom);
135*12989c96Schristos 
136*12989c96Schristos   if (atom->csa_flags & CTF_STR_ATOM_FREEABLE)
137*12989c96Schristos     free (atom->csa_str);
138*12989c96Schristos 
1398dffb485Schristos   free (atom);
1408dffb485Schristos }
1418dffb485Schristos 
1428dffb485Schristos /* Create the atoms table.  There is always at least one atom in it, the null
143*12989c96Schristos    string: but also pull in atoms from the internal strtab.  (We rely on
144*12989c96Schristos    calls to ctf_str_add_external to populate external strtab entries, since
145*12989c96Schristos    these are often not quite the same as what appears in any external
146*12989c96Schristos    strtab, and the external strtab is often huge and best not aggressively
147*12989c96Schristos    pulled in.)  */
1488dffb485Schristos int
1494b169a6bSchristos ctf_str_create_atoms (ctf_dict_t *fp)
1508dffb485Schristos {
151*12989c96Schristos   size_t i;
152*12989c96Schristos 
1538dffb485Schristos   fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
154*12989c96Schristos 					  NULL, ctf_str_free_atom);
1554b169a6bSchristos   if (!fp->ctf_str_atoms)
1568dffb485Schristos     return -ENOMEM;
1578dffb485Schristos 
1588dffb485Schristos   if (!fp->ctf_prov_strtab)
1598dffb485Schristos     fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer,
1608dffb485Schristos 					      ctf_hash_eq_integer,
1618dffb485Schristos 					      NULL, NULL);
1628dffb485Schristos   if (!fp->ctf_prov_strtab)
1638dffb485Schristos     goto oom_prov_strtab;
1648dffb485Schristos 
165*12989c96Schristos   fp->ctf_str_movable_refs = ctf_dynhash_create (ctf_hash_integer,
166*12989c96Schristos 						 ctf_hash_eq_integer,
167*12989c96Schristos 						 NULL, NULL);
168*12989c96Schristos   if (!fp->ctf_str_movable_refs)
169*12989c96Schristos     goto oom_movable_refs;
1704b169a6bSchristos 
1718dffb485Schristos   errno = 0;
1728dffb485Schristos   ctf_str_add (fp, "");
1738dffb485Schristos   if (errno == ENOMEM)
1748dffb485Schristos     goto oom_str_add;
1758dffb485Schristos 
176*12989c96Schristos   /* Pull in all the strings in the strtab as new atoms.  The provisional
177*12989c96Schristos      strtab must be empty at this point, so there is no need to populate
178*12989c96Schristos      atoms from it as well.  Types in this subset are frozen and readonly,
179*12989c96Schristos      so the refs list and movable refs list need not be populated.  */
180*12989c96Schristos 
181*12989c96Schristos   for (i = 0; i < fp->ctf_str[CTF_STRTAB_0].cts_len;
182*12989c96Schristos        i += strlen (&fp->ctf_str[CTF_STRTAB_0].cts_strs[i]) + 1)
183*12989c96Schristos     {
184*12989c96Schristos       ctf_str_atom_t *atom;
185*12989c96Schristos 
186*12989c96Schristos       if (fp->ctf_str[CTF_STRTAB_0].cts_strs[i] == 0)
187*12989c96Schristos 	continue;
188*12989c96Schristos 
189*12989c96Schristos       atom = ctf_str_add_ref_internal (fp, &fp->ctf_str[CTF_STRTAB_0].cts_strs[i],
190*12989c96Schristos 				       0, 0);
191*12989c96Schristos 
192*12989c96Schristos       if (!atom)
193*12989c96Schristos 	goto oom_str_add;
194*12989c96Schristos 
195*12989c96Schristos       atom->csa_offset = i;
196*12989c96Schristos     }
197*12989c96Schristos 
1988dffb485Schristos   return 0;
1998dffb485Schristos 
2008dffb485Schristos  oom_str_add:
201*12989c96Schristos   ctf_dynhash_destroy (fp->ctf_str_movable_refs);
202*12989c96Schristos   fp->ctf_str_movable_refs = NULL;
203*12989c96Schristos  oom_movable_refs:
2048dffb485Schristos   ctf_dynhash_destroy (fp->ctf_prov_strtab);
2058dffb485Schristos   fp->ctf_prov_strtab = NULL;
2068dffb485Schristos  oom_prov_strtab:
2078dffb485Schristos   ctf_dynhash_destroy (fp->ctf_str_atoms);
2088dffb485Schristos   fp->ctf_str_atoms = NULL;
2098dffb485Schristos   return -ENOMEM;
2108dffb485Schristos }
2118dffb485Schristos 
212*12989c96Schristos /* Destroy the atoms table and associated refs.  */
2138dffb485Schristos void
2144b169a6bSchristos ctf_str_free_atoms (ctf_dict_t *fp)
2158dffb485Schristos {
2168dffb485Schristos   ctf_dynhash_destroy (fp->ctf_prov_strtab);
2178dffb485Schristos   ctf_dynhash_destroy (fp->ctf_str_atoms);
218*12989c96Schristos   ctf_dynhash_destroy (fp->ctf_str_movable_refs);
219*12989c96Schristos   if (fp->ctf_dynstrtab)
220*12989c96Schristos     {
221*12989c96Schristos       free (fp->ctf_dynstrtab->cts_strs);
222*12989c96Schristos       free (fp->ctf_dynstrtab);
223*12989c96Schristos     }
2248dffb485Schristos }
2258dffb485Schristos 
2264b169a6bSchristos #define CTF_STR_ADD_REF 0x1
227*12989c96Schristos #define CTF_STR_PROVISIONAL 0x2
228*12989c96Schristos #define CTF_STR_MOVABLE 0x4
2294b169a6bSchristos 
230*12989c96Schristos /* Allocate a ref and bind it into a ref list.  */
231*12989c96Schristos 
232*12989c96Schristos static ctf_str_atom_ref_t *
233*12989c96Schristos aref_create (ctf_dict_t *fp, ctf_str_atom_t *atom, uint32_t *ref, int flags)
234*12989c96Schristos {
235*12989c96Schristos   ctf_str_atom_ref_t *aref;
236*12989c96Schristos   size_t s = sizeof (struct ctf_str_atom_ref);
237*12989c96Schristos 
238*12989c96Schristos   if (flags & CTF_STR_MOVABLE)
239*12989c96Schristos     s = sizeof (struct ctf_str_atom_ref_movable);
240*12989c96Schristos 
241*12989c96Schristos   aref = malloc (s);
242*12989c96Schristos 
243*12989c96Schristos   if (!aref)
244*12989c96Schristos     return NULL;
245*12989c96Schristos 
246*12989c96Schristos   aref->caf_ref = ref;
247*12989c96Schristos 
248*12989c96Schristos   /* Movable refs get a backpointer to them in ctf_str_movable_refs, and a
249*12989c96Schristos      pointer to ctf_str_movable_refs itself in the ref, for use when freeing
250*12989c96Schristos      refs: they can be moved later in batches via a call to
251*12989c96Schristos      ctf_str_move_refs.  */
252*12989c96Schristos 
253*12989c96Schristos   if (flags & CTF_STR_MOVABLE)
254*12989c96Schristos     {
255*12989c96Schristos       ctf_str_atom_ref_movable_t *movref = (ctf_str_atom_ref_movable_t *) aref;
256*12989c96Schristos 
257*12989c96Schristos       movref->caf_movable_refs = fp->ctf_str_movable_refs;
258*12989c96Schristos 
259*12989c96Schristos       if (ctf_dynhash_insert (fp->ctf_str_movable_refs, ref, aref) < 0)
260*12989c96Schristos 	{
261*12989c96Schristos 	  free (aref);
262*12989c96Schristos 	  return NULL;
263*12989c96Schristos 	}
264*12989c96Schristos     }
265*12989c96Schristos 
266*12989c96Schristos   ctf_list_append (&atom->csa_refs, aref);
267*12989c96Schristos 
268*12989c96Schristos   return aref;
269*12989c96Schristos }
270*12989c96Schristos 
271*12989c96Schristos /* Add a string to the atoms table, copying the passed-in string if
272*12989c96Schristos    necessary.  Return the atom added. Return NULL only when out of memory
273*12989c96Schristos    (and do not touch the passed-in string in that case).
274*12989c96Schristos 
275*12989c96Schristos    Possibly add a provisional entry for this string to the provisional
276*12989c96Schristos    strtab.  If the string is in the provisional strtab, update its ref list
277*12989c96Schristos    with the passed-in ref, causing the ref to be updated when the strtab is
278*12989c96Schristos    written out.  */
279*12989c96Schristos 
2808dffb485Schristos static ctf_str_atom_t *
2814b169a6bSchristos ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
2824b169a6bSchristos 			  int flags, uint32_t *ref)
2838dffb485Schristos {
2848dffb485Schristos   char *newstr = NULL;
2858dffb485Schristos   ctf_str_atom_t *atom = NULL;
286*12989c96Schristos   int added = 0;
2878dffb485Schristos 
2888dffb485Schristos   atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
2898dffb485Schristos 
290*12989c96Schristos   /* Existing atoms get refs added only if they are provisional:
291*12989c96Schristos      non-provisional strings already have a fixed strtab offset, and just
292*12989c96Schristos      get their ref updated immediately, since its value cannot change.  */
2938dffb485Schristos 
2948dffb485Schristos   if (atom)
2958dffb485Schristos     {
296*12989c96Schristos       if (!ctf_dynhash_lookup (fp->ctf_prov_strtab, (void *) (uintptr_t)
297*12989c96Schristos 			       atom->csa_offset))
298*12989c96Schristos 	{
2994b169a6bSchristos 	  if (flags & CTF_STR_ADD_REF)
3008dffb485Schristos 	    {
301*12989c96Schristos 	      if (atom->csa_external_offset)
302*12989c96Schristos 		*ref = atom->csa_external_offset;
303*12989c96Schristos 	      else
304*12989c96Schristos 		*ref = atom->csa_offset;
3058dffb485Schristos 	    }
3068dffb485Schristos 	  return atom;
3078dffb485Schristos 	}
3088dffb485Schristos 
309*12989c96Schristos       if (flags & CTF_STR_ADD_REF)
310*12989c96Schristos 	{
311*12989c96Schristos 	  if (!aref_create (fp, atom, ref, flags))
312*12989c96Schristos 	    {
313*12989c96Schristos 	      ctf_set_errno (fp, ENOMEM);
314*12989c96Schristos 	      return NULL;
315*12989c96Schristos 	    }
316*12989c96Schristos 	}
317*12989c96Schristos 
318*12989c96Schristos       return atom;
319*12989c96Schristos     }
320*12989c96Schristos 
321*12989c96Schristos   /* New atom.  */
322*12989c96Schristos 
3238dffb485Schristos   if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
3248dffb485Schristos     goto oom;
3258dffb485Schristos   memset (atom, 0, sizeof (struct ctf_str_atom));
3268dffb485Schristos 
327*12989c96Schristos   /* Don't allocate new strings if this string is within an mmapped
328*12989c96Schristos      strtab.  */
329*12989c96Schristos 
330*12989c96Schristos   if ((unsigned char *) str < (unsigned char *) fp->ctf_data_mmapped
331*12989c96Schristos       || (unsigned char *) str > (unsigned char *) fp->ctf_data_mmapped + fp->ctf_data_mmapped_len)
332*12989c96Schristos     {
3338dffb485Schristos       if ((newstr = strdup (str)) == NULL)
3348dffb485Schristos 	goto oom;
335*12989c96Schristos       atom->csa_flags |= CTF_STR_ATOM_FREEABLE;
3368dffb485Schristos       atom->csa_str = newstr;
337*12989c96Schristos     }
338*12989c96Schristos   else
339*12989c96Schristos     atom->csa_str = (char *) str;
340*12989c96Schristos 
341*12989c96Schristos   if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0)
342*12989c96Schristos     goto oom;
343*12989c96Schristos   added = 1;
344*12989c96Schristos 
3458dffb485Schristos   atom->csa_snapshot_id = fp->ctf_snapshots;
3468dffb485Schristos 
347*12989c96Schristos   /* New atoms marked provisional go into the provisional strtab, and get a
348*12989c96Schristos      ref added.  */
349*12989c96Schristos 
350*12989c96Schristos   if (flags & CTF_STR_PROVISIONAL)
3518dffb485Schristos     {
3528dffb485Schristos       atom->csa_offset = fp->ctf_str_prov_offset;
3538dffb485Schristos 
3548dffb485Schristos       if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t)
3558dffb485Schristos 			      atom->csa_offset, (void *) atom->csa_str) < 0)
3568dffb485Schristos 	goto oom;
3578dffb485Schristos 
3588dffb485Schristos       fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
3598dffb485Schristos 
360*12989c96Schristos       if (flags & CTF_STR_ADD_REF)
3618dffb485Schristos       {
362*12989c96Schristos 	if (!aref_create (fp, atom, ref, flags))
3634b169a6bSchristos 	  goto oom;
3644b169a6bSchristos       }
3658dffb485Schristos     }
366*12989c96Schristos 
3678dffb485Schristos   return atom;
3688dffb485Schristos 
3698dffb485Schristos  oom:
370*12989c96Schristos   if (added)
371*12989c96Schristos     ctf_dynhash_remove (fp->ctf_str_atoms, atom->csa_str);
3728dffb485Schristos   free (atom);
3738dffb485Schristos   free (newstr);
3744b169a6bSchristos   ctf_set_errno (fp, ENOMEM);
3758dffb485Schristos   return NULL;
3768dffb485Schristos }
3778dffb485Schristos 
3788dffb485Schristos /* Add a string to the atoms table, without augmenting the ref list for this
3798dffb485Schristos    string: return a 'provisional offset' which can be used to return this string
3808dffb485Schristos    until ctf_str_write_strtab is called, or 0 on failure.  (Everywhere the
3818dffb485Schristos    provisional offset is assigned to should be added as a ref using
3828dffb485Schristos    ctf_str_add_ref() as well.) */
3838dffb485Schristos uint32_t
3844b169a6bSchristos ctf_str_add (ctf_dict_t *fp, const char *str)
3858dffb485Schristos {
3868dffb485Schristos   ctf_str_atom_t *atom;
3878dffb485Schristos 
3884b169a6bSchristos   if (!str)
3894b169a6bSchristos     str = "";
3904b169a6bSchristos 
391*12989c96Schristos   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PROVISIONAL, 0);
3928dffb485Schristos   if (!atom)
3938dffb485Schristos     return 0;
3948dffb485Schristos 
3958dffb485Schristos   return atom->csa_offset;
3968dffb485Schristos }
3978dffb485Schristos 
3988dffb485Schristos /* Like ctf_str_add(), but additionally augment the atom's refs list with the
3998dffb485Schristos    passed-in ref, whether or not the string is already present.  There is no
4008dffb485Schristos    attempt to deduplicate the refs list (but duplicates are harmless).  */
4018dffb485Schristos uint32_t
4024b169a6bSchristos ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
4038dffb485Schristos {
4048dffb485Schristos   ctf_str_atom_t *atom;
4058dffb485Schristos 
4064b169a6bSchristos   if (!str)
4074b169a6bSchristos     str = "";
4084b169a6bSchristos 
4094b169a6bSchristos   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
410*12989c96Schristos 				   | CTF_STR_PROVISIONAL, ref);
4118dffb485Schristos   if (!atom)
4128dffb485Schristos     return 0;
4138dffb485Schristos 
4148dffb485Schristos   return atom->csa_offset;
4158dffb485Schristos }
4168dffb485Schristos 
417*12989c96Schristos /* Like ctf_str_add_ref(), but note that the ref may be moved later on.  */
4184b169a6bSchristos uint32_t
419*12989c96Schristos ctf_str_add_movable_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
4204b169a6bSchristos {
4214b169a6bSchristos   ctf_str_atom_t *atom;
4224b169a6bSchristos 
4234b169a6bSchristos   if (!str)
4244b169a6bSchristos     str = "";
4254b169a6bSchristos 
426*12989c96Schristos   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
427*12989c96Schristos 				   | CTF_STR_PROVISIONAL
428*12989c96Schristos 				   | CTF_STR_MOVABLE, ref);
4294b169a6bSchristos   if (!atom)
4304b169a6bSchristos     return 0;
4314b169a6bSchristos 
4324b169a6bSchristos   return atom->csa_offset;
4334b169a6bSchristos }
4344b169a6bSchristos 
4358dffb485Schristos /* Add an external strtab reference at OFFSET.  Returns zero if the addition
4368dffb485Schristos    failed, nonzero otherwise.  */
4378dffb485Schristos int
4384b169a6bSchristos ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
4398dffb485Schristos {
4408dffb485Schristos   ctf_str_atom_t *atom;
4418dffb485Schristos 
4424b169a6bSchristos   if (!str)
4434b169a6bSchristos     str = "";
4444b169a6bSchristos 
4454b169a6bSchristos   atom = ctf_str_add_ref_internal (fp, str, 0, 0);
4468dffb485Schristos   if (!atom)
4478dffb485Schristos     return 0;
4488dffb485Schristos 
4498dffb485Schristos   atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
4504b169a6bSchristos 
4514b169a6bSchristos   if (!fp->ctf_syn_ext_strtab)
4524b169a6bSchristos     fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer,
4534b169a6bSchristos 						 ctf_hash_eq_integer,
4544b169a6bSchristos 						 NULL, NULL);
4554b169a6bSchristos   if (!fp->ctf_syn_ext_strtab)
4564b169a6bSchristos     {
4574b169a6bSchristos       ctf_set_errno (fp, ENOMEM);
4584b169a6bSchristos       return 0;
4594b169a6bSchristos     }
4604b169a6bSchristos 
4614b169a6bSchristos   if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab,
4624b169a6bSchristos 			  (void *) (uintptr_t)
4634b169a6bSchristos 			  atom->csa_external_offset,
4644b169a6bSchristos 			  (void *) atom->csa_str) < 0)
4654b169a6bSchristos     {
4664b169a6bSchristos       /* No need to bother freeing the syn_ext_strtab: it will get freed at
4674b169a6bSchristos 	 ctf_str_write_strtab time if unreferenced.  */
4684b169a6bSchristos       ctf_set_errno (fp, ENOMEM);
4694b169a6bSchristos       return 0;
4704b169a6bSchristos     }
4714b169a6bSchristos 
4728dffb485Schristos   return 1;
4738dffb485Schristos }
4748dffb485Schristos 
475*12989c96Schristos /* Note that refs have moved from (SRC, LEN) to DEST.  We use the movable
476*12989c96Schristos    refs backpointer for this, because it is done an amortized-constant
477*12989c96Schristos    number of times during structure member and enumerand addition, and if we
478*12989c96Schristos    did a linear search this would turn such addition into an O(n^2)
479*12989c96Schristos    operation.  Even this is not linear, but it's better than that.  */
480*12989c96Schristos int
481*12989c96Schristos ctf_str_move_refs (ctf_dict_t *fp, void *src, size_t len, void *dest)
482*12989c96Schristos {
483*12989c96Schristos   uintptr_t p;
484*12989c96Schristos 
485*12989c96Schristos   if (src == dest)
486*12989c96Schristos     return 0;
487*12989c96Schristos 
488*12989c96Schristos   for (p = (uintptr_t) src; p - (uintptr_t) src < len; p++)
489*12989c96Schristos     {
490*12989c96Schristos       ctf_str_atom_ref_t *ref;
491*12989c96Schristos 
492*12989c96Schristos       if ((ref = ctf_dynhash_lookup (fp->ctf_str_movable_refs,
493*12989c96Schristos 				     (ctf_str_atom_ref_t *) p)) != NULL)
494*12989c96Schristos 	{
495*12989c96Schristos 	  int out_of_memory;
496*12989c96Schristos 
497*12989c96Schristos 	  ref->caf_ref = (uint32_t *) (((uintptr_t) ref->caf_ref +
498*12989c96Schristos 					(uintptr_t) dest - (uintptr_t) src));
499*12989c96Schristos 	  ctf_dynhash_remove (fp->ctf_str_movable_refs,
500*12989c96Schristos 			      (ctf_str_atom_ref_t *) p);
501*12989c96Schristos 	  out_of_memory = ctf_dynhash_insert (fp->ctf_str_movable_refs,
502*12989c96Schristos 					      ref->caf_ref, ref);
503*12989c96Schristos 	  assert (out_of_memory == 0);
504*12989c96Schristos 	}
505*12989c96Schristos     }
506*12989c96Schristos 
507*12989c96Schristos   return 0;
508*12989c96Schristos }
509*12989c96Schristos 
5108dffb485Schristos /* Remove a single ref.  */
5118dffb485Schristos void
5124b169a6bSchristos ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
5138dffb485Schristos {
5148dffb485Schristos   ctf_str_atom_ref_t *aref, *anext;
5158dffb485Schristos   ctf_str_atom_t *atom = NULL;
5168dffb485Schristos 
5178dffb485Schristos   atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
5188dffb485Schristos   if (!atom)
5198dffb485Schristos     return;
5208dffb485Schristos 
5218dffb485Schristos   for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext)
5228dffb485Schristos     {
5238dffb485Schristos       anext = ctf_list_next (aref);
5248dffb485Schristos       if (aref->caf_ref == ref)
5258dffb485Schristos 	{
5268dffb485Schristos 	  ctf_list_delete (&atom->csa_refs, aref);
5278dffb485Schristos 	  free (aref);
5288dffb485Schristos 	}
5298dffb485Schristos     }
5308dffb485Schristos }
5318dffb485Schristos 
5328dffb485Schristos /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
5334b169a6bSchristos    snapshot ID.  External atoms are never removed, because they came from the
5344b169a6bSchristos    linker string table and are still present even if you roll back type
5354b169a6bSchristos    additions.  */
5368dffb485Schristos static int
5378dffb485Schristos ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg)
5388dffb485Schristos {
5398dffb485Schristos   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
5408dffb485Schristos   ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg;
5418dffb485Schristos 
5424b169a6bSchristos   return (atom->csa_snapshot_id > id->snapshot_id)
5434b169a6bSchristos     && (atom->csa_external_offset == 0);
5448dffb485Schristos }
5458dffb485Schristos 
5464b169a6bSchristos /* Roll back, deleting all (internal) atoms created after a particular ID.  */
5478dffb485Schristos void
5484b169a6bSchristos ctf_str_rollback (ctf_dict_t *fp, ctf_snapshot_id_t id)
5498dffb485Schristos {
5508dffb485Schristos   ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
5518dffb485Schristos }
5528dffb485Schristos 
5538dffb485Schristos /* An adaptor around ctf_purge_atom_refs.  */
5548dffb485Schristos static void
5558dffb485Schristos ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
5568dffb485Schristos 			     void *arg _libctf_unused_)
5578dffb485Schristos {
5588dffb485Schristos   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
5598dffb485Schristos   ctf_str_purge_atom_refs (atom);
5608dffb485Schristos }
5618dffb485Schristos 
5628dffb485Schristos /* Remove all the recorded refs from the atoms table.  */
5638dffb485Schristos void
5644b169a6bSchristos ctf_str_purge_refs (ctf_dict_t *fp)
5658dffb485Schristos {
5668dffb485Schristos   ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
5678dffb485Schristos }
5688dffb485Schristos 
5698dffb485Schristos /* Update a list of refs to the specified value. */
5708dffb485Schristos static void
5718dffb485Schristos ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
5728dffb485Schristos {
5738dffb485Schristos   ctf_str_atom_ref_t *ref;
5748dffb485Schristos 
5758dffb485Schristos   for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
5768dffb485Schristos        ref = ctf_list_next (ref))
5778dffb485Schristos     *(ref->caf_ref) = value;
5788dffb485Schristos }
5798dffb485Schristos 
5808dffb485Schristos /* Sort the strtab.  */
5818dffb485Schristos static int
5828dffb485Schristos ctf_str_sort_strtab (const void *a, const void *b)
5838dffb485Schristos {
5848dffb485Schristos   ctf_str_atom_t **one = (ctf_str_atom_t **) a;
5858dffb485Schristos   ctf_str_atom_t **two = (ctf_str_atom_t **) b;
5868dffb485Schristos 
5878dffb485Schristos   return (strcmp ((*one)->csa_str, (*two)->csa_str));
5888dffb485Schristos }
5898dffb485Schristos 
5908dffb485Schristos /* Write out and return a strtab containing all strings with recorded refs,
591*12989c96Schristos    adjusting the refs to refer to the corresponding string.  The returned
592*12989c96Schristos    strtab is already assigned to strtab 0 in this dict, is owned by this
593*12989c96Schristos    dict, and may be NULL on error.  Also populate the synthetic strtab with
594*12989c96Schristos    mappings from external strtab offsets to names, so we can look them up
595*12989c96Schristos    with ctf_strptr().  Only external strtab offsets with references are
596*12989c96Schristos    added.
597*12989c96Schristos 
598*12989c96Schristos    As a side effect, replaces the strtab of the current dict with the newly-
599*12989c96Schristos    generated strtab.  This is an exception to the general rule that
600*12989c96Schristos    serialization does not change the dict passed in, because the alternative
601*12989c96Schristos    is to copy the entire atoms table on every reserialization just to avoid
602*12989c96Schristos    modifying the original, which is excessively costly for minimal gain.
603*12989c96Schristos 
604*12989c96Schristos    We use the lazy man's approach and double memory costs by always storing
605*12989c96Schristos    atoms as individually allocated entities whenever they come from anywhere
606*12989c96Schristos    but a freshly-opened, mmapped dict, even though after serialization there
607*12989c96Schristos    is another copy in the strtab; this ensures that ctf_strptr()-returned
608*12989c96Schristos    pointers to them remain valid for the lifetime of the dict.
609*12989c96Schristos 
610*12989c96Schristos    This is all rendered more complex because if a dict is ctf_open()ed it
611*12989c96Schristos    will have a bunch of strings in its strtab already, and their strtab
612*12989c96Schristos    offsets can never change (without piles of complexity to rescan the
613*12989c96Schristos    entire dict just to get all the offsets to all of them into the atoms
614*12989c96Schristos    table).  Entries below the existing strtab limit are just copied into the
615*12989c96Schristos    new dict: entries above it are new, and are are sorted first, then
616*12989c96Schristos    appended to it.  The sorting is purely a compression-efficiency
617*12989c96Schristos    improvement, and we get nearly as good an improvement from sorting big
618*12989c96Schristos    chunks like this as we would from sorting the whole thing.  */
619*12989c96Schristos 
620*12989c96Schristos const ctf_strs_writable_t *
6214b169a6bSchristos ctf_str_write_strtab (ctf_dict_t *fp)
6228dffb485Schristos {
623*12989c96Schristos   ctf_strs_writable_t *strtab;
624*12989c96Schristos   size_t strtab_count = 0;
6258dffb485Schristos   uint32_t cur_stroff = 0;
6268dffb485Schristos   ctf_str_atom_t **sorttab;
627*12989c96Schristos   ctf_next_t *it = NULL;
6288dffb485Schristos   size_t i;
629*12989c96Schristos   void *v;
630*12989c96Schristos   int err;
631*12989c96Schristos   int new_strtab = 0;
6328dffb485Schristos   int any_external = 0;
6338dffb485Schristos 
634*12989c96Schristos   strtab = calloc (1, sizeof (ctf_strs_writable_t));
635*12989c96Schristos   if (!strtab)
636*12989c96Schristos     return NULL;
6378dffb485Schristos 
638*12989c96Schristos   /* The strtab contains the existing string table at its start: figure out
639*12989c96Schristos      how many new strings we need to add.  We only need to add new strings
640*12989c96Schristos      that have no external offset, that have refs, and that are found in the
641*12989c96Schristos      provisional strtab.  If the existing strtab is empty we also need to
642*12989c96Schristos      add the null string at its start.  */
643*12989c96Schristos 
644*12989c96Schristos   strtab->cts_len = fp->ctf_str[CTF_STRTAB_0].cts_len;
645*12989c96Schristos 
646*12989c96Schristos   if (strtab->cts_len == 0)
6478dffb485Schristos     {
648*12989c96Schristos       new_strtab = 1;
649*12989c96Schristos       strtab->cts_len++; 			/* For the \0.  */
6508dffb485Schristos     }
6518dffb485Schristos 
652*12989c96Schristos   /* Count new entries in the strtab: i.e. entries in the provisional
653*12989c96Schristos      strtab.  Ignore any entry for \0, entries which ended up in the
654*12989c96Schristos      external strtab, and unreferenced entries.  */
6558dffb485Schristos 
656*12989c96Schristos   while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
657*12989c96Schristos     {
658*12989c96Schristos       const char *str = (const char *) v;
659*12989c96Schristos       ctf_str_atom_t *atom;
6608dffb485Schristos 
661*12989c96Schristos       atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
662*12989c96Schristos       if (!ctf_assert (fp, atom))
663*12989c96Schristos 	goto err_strtab;
664*12989c96Schristos 
665*12989c96Schristos       if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
666*12989c96Schristos 	  atom->csa_external_offset)
667*12989c96Schristos 	continue;
668*12989c96Schristos 
669*12989c96Schristos       strtab->cts_len += strlen (atom->csa_str) + 1;
670*12989c96Schristos       strtab_count++;
671*12989c96Schristos     }
672*12989c96Schristos   if (err != ECTF_NEXT_END)
673*12989c96Schristos     {
674*12989c96Schristos       ctf_dprintf ("ctf_str_write_strtab: error counting strtab entries: %s\n",
675*12989c96Schristos 		   ctf_errmsg (err));
676*12989c96Schristos       goto err_strtab;
677*12989c96Schristos     }
678*12989c96Schristos 
679*12989c96Schristos   ctf_dprintf ("%lu bytes of strings in strtab: %lu pre-existing.\n",
680*12989c96Schristos 	       (unsigned long) strtab->cts_len,
681*12989c96Schristos 	       (unsigned long) fp->ctf_str[CTF_STRTAB_0].cts_len);
682*12989c96Schristos 
683*12989c96Schristos   /* Sort the new part of the strtab.  */
684*12989c96Schristos 
685*12989c96Schristos   sorttab = calloc (strtab_count, sizeof (ctf_str_atom_t *));
6868dffb485Schristos   if (!sorttab)
687*12989c96Schristos     {
688*12989c96Schristos       ctf_set_errno (fp, ENOMEM);
689*12989c96Schristos       goto err_strtab;
690*12989c96Schristos     }
6918dffb485Schristos 
692*12989c96Schristos   i = 0;
693*12989c96Schristos   while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
694*12989c96Schristos     {
695*12989c96Schristos       ctf_str_atom_t *atom;
6968dffb485Schristos 
697*12989c96Schristos       atom = ctf_dynhash_lookup (fp->ctf_str_atoms, v);
698*12989c96Schristos       if (!ctf_assert (fp, atom))
699*12989c96Schristos 	goto err_sorttab;
700*12989c96Schristos 
701*12989c96Schristos       if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
702*12989c96Schristos 	  atom->csa_external_offset)
703*12989c96Schristos 	continue;
704*12989c96Schristos 
705*12989c96Schristos       sorttab[i++] = atom;
706*12989c96Schristos     }
707*12989c96Schristos 
708*12989c96Schristos   qsort (sorttab, strtab_count, sizeof (ctf_str_atom_t *),
7098dffb485Schristos 	 ctf_str_sort_strtab);
7108dffb485Schristos 
711*12989c96Schristos   if ((strtab->cts_strs = malloc (strtab->cts_len)) == NULL)
712*12989c96Schristos     goto err_sorttab;
7138dffb485Schristos 
714*12989c96Schristos   cur_stroff = fp->ctf_str[CTF_STRTAB_0].cts_len;
7158dffb485Schristos 
716*12989c96Schristos   if (new_strtab)
717*12989c96Schristos     {
718*12989c96Schristos       strtab->cts_strs[0] = 0;
719*12989c96Schristos       cur_stroff++;
7208dffb485Schristos     }
7218dffb485Schristos   else
722*12989c96Schristos     memcpy (strtab->cts_strs, fp->ctf_str[CTF_STRTAB_0].cts_strs,
723*12989c96Schristos 	    fp->ctf_str[CTF_STRTAB_0].cts_len);
7248dffb485Schristos 
725*12989c96Schristos   /* Work over the sorttab, add its strings to the strtab, and remember
726*12989c96Schristos      where they are in the csa_offset for the appropriate atom.  No ref
727*12989c96Schristos      updating is done at this point, because refs might well relate to
728*12989c96Schristos      already-existing strings, or external strings, which do not need adding
729*12989c96Schristos      to the strtab and may not be in the sorttab.  */
730*12989c96Schristos 
731*12989c96Schristos   for (i = 0; i < strtab_count; i++)
732*12989c96Schristos     {
7338dffb485Schristos       sorttab[i]->csa_offset = cur_stroff;
734*12989c96Schristos       strcpy (&strtab->cts_strs[cur_stroff], sorttab[i]->csa_str);
7358dffb485Schristos       cur_stroff += strlen (sorttab[i]->csa_str) + 1;
7368dffb485Schristos     }
7378dffb485Schristos   free (sorttab);
738*12989c96Schristos   sorttab = NULL;
739*12989c96Schristos 
740*12989c96Schristos   /* Update all refs, then purge them as no longer necessary: also update
741*12989c96Schristos      the strtab appropriately.  */
742*12989c96Schristos 
743*12989c96Schristos   while ((err = ctf_dynhash_next (fp->ctf_str_atoms, &it, NULL, &v)) == 0)
744*12989c96Schristos     {
745*12989c96Schristos       ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
746*12989c96Schristos       uint32_t offset;
747*12989c96Schristos 
748*12989c96Schristos       if (ctf_list_empty_p (&atom->csa_refs))
749*12989c96Schristos 	continue;
750*12989c96Schristos 
751*12989c96Schristos       if (atom->csa_external_offset)
752*12989c96Schristos 	{
753*12989c96Schristos 	  any_external = 1;
754*12989c96Schristos 	  offset = atom->csa_external_offset;
755*12989c96Schristos 	}
756*12989c96Schristos       else
757*12989c96Schristos 	offset = atom->csa_offset;
758*12989c96Schristos       ctf_str_update_refs (atom, offset);
759*12989c96Schristos     }
760*12989c96Schristos   if (err != ECTF_NEXT_END)
761*12989c96Schristos     {
762*12989c96Schristos       ctf_dprintf ("ctf_str_write_strtab: error iterating over atoms while updating refs: %s\n",
763*12989c96Schristos 		   ctf_errmsg (err));
764*12989c96Schristos       goto err_strtab;
765*12989c96Schristos     }
766*12989c96Schristos   ctf_str_purge_refs (fp);
7678dffb485Schristos 
7688dffb485Schristos   if (!any_external)
7698dffb485Schristos     {
7708dffb485Schristos       ctf_dynhash_destroy (fp->ctf_syn_ext_strtab);
7718dffb485Schristos       fp->ctf_syn_ext_strtab = NULL;
7728dffb485Schristos     }
7738dffb485Schristos 
774*12989c96Schristos   /* Replace the old strtab with the new one in this dict.  */
775*12989c96Schristos 
776*12989c96Schristos   if (fp->ctf_dynstrtab)
777*12989c96Schristos     {
778*12989c96Schristos       free (fp->ctf_dynstrtab->cts_strs);
779*12989c96Schristos       free (fp->ctf_dynstrtab);
780*12989c96Schristos     }
781*12989c96Schristos 
782*12989c96Schristos   fp->ctf_dynstrtab = strtab;
783*12989c96Schristos   fp->ctf_str[CTF_STRTAB_0].cts_strs = strtab->cts_strs;
784*12989c96Schristos   fp->ctf_str[CTF_STRTAB_0].cts_len = strtab->cts_len;
785*12989c96Schristos 
7868dffb485Schristos   /* All the provisional strtab entries are now real strtab entries, and
7878dffb485Schristos      ctf_strptr() will find them there.  The provisional offset now starts right
7888dffb485Schristos      beyond the new end of the strtab.  */
7898dffb485Schristos 
7908dffb485Schristos   ctf_dynhash_empty (fp->ctf_prov_strtab);
791*12989c96Schristos   fp->ctf_str_prov_offset = strtab->cts_len + 1;
7928dffb485Schristos   return strtab;
7938dffb485Schristos 
794*12989c96Schristos  err_sorttab:
7958dffb485Schristos   free (sorttab);
796*12989c96Schristos  err_strtab:
797*12989c96Schristos   free (strtab);
798*12989c96Schristos   return NULL;
7998dffb485Schristos }
800