18dffb485Schristos /* CTF string table management. 2*12989c96Schristos Copyright (C) 2019-2024 Free Software Foundation, Inc. 38dffb485Schristos 48dffb485Schristos This file is part of libctf. 58dffb485Schristos 68dffb485Schristos libctf is free software; you can redistribute it and/or modify it under 78dffb485Schristos the terms of the GNU General Public License as published by the Free 88dffb485Schristos Software Foundation; either version 3, or (at your option) any later 98dffb485Schristos version. 108dffb485Schristos 118dffb485Schristos This program is distributed in the hope that it will be useful, but 128dffb485Schristos WITHOUT ANY WARRANTY; without even the implied warranty of 138dffb485Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 148dffb485Schristos See the GNU General Public License for more details. 158dffb485Schristos 168dffb485Schristos You should have received a copy of the GNU General Public License 178dffb485Schristos along with this program; see the file COPYING. If not see 188dffb485Schristos <http://www.gnu.org/licenses/>. */ 198dffb485Schristos 20*12989c96Schristos #include <assert.h> 218dffb485Schristos #include <ctf-impl.h> 228dffb485Schristos #include <string.h> 238dffb485Schristos 24*12989c96Schristos static ctf_str_atom_t * 25*12989c96Schristos ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str, 26*12989c96Schristos int flags, uint32_t *ref); 27*12989c96Schristos 28*12989c96Schristos /* Convert an encoded CTF string name into a pointer to a C string, possibly 29*12989c96Schristos using an explicit internal provisional strtab rather than the fp-based 30*12989c96Schristos one. */ 318dffb485Schristos const char * 324b169a6bSchristos ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab) 338dffb485Schristos { 348dffb485Schristos ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)]; 358dffb485Schristos 368dffb485Schristos if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL)) 378dffb485Schristos ctsp = strtab; 388dffb485Schristos 39*12989c96Schristos /* If this name is in the external strtab, and there is a synthetic 40*12989c96Schristos strtab, use it in preference. (This is used to add the set of strings 41*12989c96Schristos -- symbol names, etc -- the linker knows about before the strtab is 42*12989c96Schristos written out.) */ 438dffb485Schristos 448dffb485Schristos if (CTF_NAME_STID (name) == CTF_STRTAB_1 458dffb485Schristos && fp->ctf_syn_ext_strtab != NULL) 468dffb485Schristos return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab, 478dffb485Schristos (void *) (uintptr_t) name); 488dffb485Schristos 49*12989c96Schristos /* If the name is in the internal strtab, and the name offset is beyond 50*12989c96Schristos the end of the ctsp->cts_len but below the ctf_str_prov_offset, this is 51*12989c96Schristos a provisional string added by ctf_str_add*() but not yet built into a 52*12989c96Schristos real strtab: get the value out of the ctf_prov_strtab. */ 538dffb485Schristos 548dffb485Schristos if (CTF_NAME_STID (name) == CTF_STRTAB_0 558dffb485Schristos && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset) 568dffb485Schristos return ctf_dynhash_lookup (fp->ctf_prov_strtab, 578dffb485Schristos (void *) (uintptr_t) name); 588dffb485Schristos 598dffb485Schristos if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len) 608dffb485Schristos return (ctsp->cts_strs + CTF_NAME_OFFSET (name)); 618dffb485Schristos 628dffb485Schristos /* String table not loaded or corrupt offset. */ 638dffb485Schristos return NULL; 648dffb485Schristos } 658dffb485Schristos 668dffb485Schristos /* Convert an encoded CTF string name into a pointer to a C string by looking 678dffb485Schristos up the appropriate string table buffer and then adding the offset. */ 688dffb485Schristos const char * 694b169a6bSchristos ctf_strraw (ctf_dict_t *fp, uint32_t name) 708dffb485Schristos { 718dffb485Schristos return ctf_strraw_explicit (fp, name, NULL); 728dffb485Schristos } 738dffb485Schristos 748dffb485Schristos /* Return a guaranteed-non-NULL pointer to the string with the given CTF 758dffb485Schristos name. */ 768dffb485Schristos const char * 774b169a6bSchristos ctf_strptr (ctf_dict_t *fp, uint32_t name) 788dffb485Schristos { 798dffb485Schristos const char *s = ctf_strraw (fp, name); 808dffb485Schristos return (s != NULL ? s : "(?)"); 818dffb485Schristos } 828dffb485Schristos 83*12989c96Schristos /* As above, but return info on what is wrong in more detail. 84*12989c96Schristos (Used for type lookups.) */ 85*12989c96Schristos 86*12989c96Schristos const char * 87*12989c96Schristos ctf_strptr_validate (ctf_dict_t *fp, uint32_t name) 88*12989c96Schristos { 89*12989c96Schristos const char *str = ctf_strraw (fp, name); 90*12989c96Schristos 91*12989c96Schristos if (str == NULL) 92*12989c96Schristos { 93*12989c96Schristos if (CTF_NAME_STID (name) == CTF_STRTAB_1 94*12989c96Schristos && fp->ctf_syn_ext_strtab == NULL 95*12989c96Schristos && fp->ctf_str[CTF_NAME_STID (name)].cts_strs == NULL) 96*12989c96Schristos { 97*12989c96Schristos ctf_set_errno (fp, ECTF_STRTAB); 98*12989c96Schristos return NULL; 99*12989c96Schristos } 100*12989c96Schristos 101*12989c96Schristos ctf_set_errno (fp, ECTF_BADNAME); 102*12989c96Schristos return NULL; 103*12989c96Schristos } 104*12989c96Schristos return str; 105*12989c96Schristos } 106*12989c96Schristos 1078dffb485Schristos /* Remove all refs to a given atom. */ 1088dffb485Schristos static void 1098dffb485Schristos ctf_str_purge_atom_refs (ctf_str_atom_t *atom) 1108dffb485Schristos { 1118dffb485Schristos ctf_str_atom_ref_t *ref, *next; 1128dffb485Schristos 1138dffb485Schristos for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next) 1148dffb485Schristos { 1158dffb485Schristos next = ctf_list_next (ref); 1168dffb485Schristos ctf_list_delete (&atom->csa_refs, ref); 117*12989c96Schristos if (atom->csa_flags & CTF_STR_ATOM_MOVABLE) 118*12989c96Schristos { 119*12989c96Schristos ctf_str_atom_ref_movable_t *movref; 120*12989c96Schristos movref = (ctf_str_atom_ref_movable_t *) ref; 121*12989c96Schristos ctf_dynhash_remove (movref->caf_movable_refs, ref); 122*12989c96Schristos } 123*12989c96Schristos 1248dffb485Schristos free (ref); 1258dffb485Schristos } 1268dffb485Schristos } 1278dffb485Schristos 128*12989c96Schristos /* Free an atom. */ 1298dffb485Schristos static void 1308dffb485Schristos ctf_str_free_atom (void *a) 1318dffb485Schristos { 1328dffb485Schristos ctf_str_atom_t *atom = a; 1338dffb485Schristos 1348dffb485Schristos ctf_str_purge_atom_refs (atom); 135*12989c96Schristos 136*12989c96Schristos if (atom->csa_flags & CTF_STR_ATOM_FREEABLE) 137*12989c96Schristos free (atom->csa_str); 138*12989c96Schristos 1398dffb485Schristos free (atom); 1408dffb485Schristos } 1418dffb485Schristos 1428dffb485Schristos /* Create the atoms table. There is always at least one atom in it, the null 143*12989c96Schristos string: but also pull in atoms from the internal strtab. (We rely on 144*12989c96Schristos calls to ctf_str_add_external to populate external strtab entries, since 145*12989c96Schristos these are often not quite the same as what appears in any external 146*12989c96Schristos strtab, and the external strtab is often huge and best not aggressively 147*12989c96Schristos pulled in.) */ 1488dffb485Schristos int 1494b169a6bSchristos ctf_str_create_atoms (ctf_dict_t *fp) 1508dffb485Schristos { 151*12989c96Schristos size_t i; 152*12989c96Schristos 1538dffb485Schristos fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, 154*12989c96Schristos NULL, ctf_str_free_atom); 1554b169a6bSchristos if (!fp->ctf_str_atoms) 1568dffb485Schristos return -ENOMEM; 1578dffb485Schristos 1588dffb485Schristos if (!fp->ctf_prov_strtab) 1598dffb485Schristos fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer, 1608dffb485Schristos ctf_hash_eq_integer, 1618dffb485Schristos NULL, NULL); 1628dffb485Schristos if (!fp->ctf_prov_strtab) 1638dffb485Schristos goto oom_prov_strtab; 1648dffb485Schristos 165*12989c96Schristos fp->ctf_str_movable_refs = ctf_dynhash_create (ctf_hash_integer, 166*12989c96Schristos ctf_hash_eq_integer, 167*12989c96Schristos NULL, NULL); 168*12989c96Schristos if (!fp->ctf_str_movable_refs) 169*12989c96Schristos goto oom_movable_refs; 1704b169a6bSchristos 1718dffb485Schristos errno = 0; 1728dffb485Schristos ctf_str_add (fp, ""); 1738dffb485Schristos if (errno == ENOMEM) 1748dffb485Schristos goto oom_str_add; 1758dffb485Schristos 176*12989c96Schristos /* Pull in all the strings in the strtab as new atoms. The provisional 177*12989c96Schristos strtab must be empty at this point, so there is no need to populate 178*12989c96Schristos atoms from it as well. Types in this subset are frozen and readonly, 179*12989c96Schristos so the refs list and movable refs list need not be populated. */ 180*12989c96Schristos 181*12989c96Schristos for (i = 0; i < fp->ctf_str[CTF_STRTAB_0].cts_len; 182*12989c96Schristos i += strlen (&fp->ctf_str[CTF_STRTAB_0].cts_strs[i]) + 1) 183*12989c96Schristos { 184*12989c96Schristos ctf_str_atom_t *atom; 185*12989c96Schristos 186*12989c96Schristos if (fp->ctf_str[CTF_STRTAB_0].cts_strs[i] == 0) 187*12989c96Schristos continue; 188*12989c96Schristos 189*12989c96Schristos atom = ctf_str_add_ref_internal (fp, &fp->ctf_str[CTF_STRTAB_0].cts_strs[i], 190*12989c96Schristos 0, 0); 191*12989c96Schristos 192*12989c96Schristos if (!atom) 193*12989c96Schristos goto oom_str_add; 194*12989c96Schristos 195*12989c96Schristos atom->csa_offset = i; 196*12989c96Schristos } 197*12989c96Schristos 1988dffb485Schristos return 0; 1998dffb485Schristos 2008dffb485Schristos oom_str_add: 201*12989c96Schristos ctf_dynhash_destroy (fp->ctf_str_movable_refs); 202*12989c96Schristos fp->ctf_str_movable_refs = NULL; 203*12989c96Schristos oom_movable_refs: 2048dffb485Schristos ctf_dynhash_destroy (fp->ctf_prov_strtab); 2058dffb485Schristos fp->ctf_prov_strtab = NULL; 2068dffb485Schristos oom_prov_strtab: 2078dffb485Schristos ctf_dynhash_destroy (fp->ctf_str_atoms); 2088dffb485Schristos fp->ctf_str_atoms = NULL; 2098dffb485Schristos return -ENOMEM; 2108dffb485Schristos } 2118dffb485Schristos 212*12989c96Schristos /* Destroy the atoms table and associated refs. */ 2138dffb485Schristos void 2144b169a6bSchristos ctf_str_free_atoms (ctf_dict_t *fp) 2158dffb485Schristos { 2168dffb485Schristos ctf_dynhash_destroy (fp->ctf_prov_strtab); 2178dffb485Schristos ctf_dynhash_destroy (fp->ctf_str_atoms); 218*12989c96Schristos ctf_dynhash_destroy (fp->ctf_str_movable_refs); 219*12989c96Schristos if (fp->ctf_dynstrtab) 220*12989c96Schristos { 221*12989c96Schristos free (fp->ctf_dynstrtab->cts_strs); 222*12989c96Schristos free (fp->ctf_dynstrtab); 223*12989c96Schristos } 2248dffb485Schristos } 2258dffb485Schristos 2264b169a6bSchristos #define CTF_STR_ADD_REF 0x1 227*12989c96Schristos #define CTF_STR_PROVISIONAL 0x2 228*12989c96Schristos #define CTF_STR_MOVABLE 0x4 2294b169a6bSchristos 230*12989c96Schristos /* Allocate a ref and bind it into a ref list. */ 231*12989c96Schristos 232*12989c96Schristos static ctf_str_atom_ref_t * 233*12989c96Schristos aref_create (ctf_dict_t *fp, ctf_str_atom_t *atom, uint32_t *ref, int flags) 234*12989c96Schristos { 235*12989c96Schristos ctf_str_atom_ref_t *aref; 236*12989c96Schristos size_t s = sizeof (struct ctf_str_atom_ref); 237*12989c96Schristos 238*12989c96Schristos if (flags & CTF_STR_MOVABLE) 239*12989c96Schristos s = sizeof (struct ctf_str_atom_ref_movable); 240*12989c96Schristos 241*12989c96Schristos aref = malloc (s); 242*12989c96Schristos 243*12989c96Schristos if (!aref) 244*12989c96Schristos return NULL; 245*12989c96Schristos 246*12989c96Schristos aref->caf_ref = ref; 247*12989c96Schristos 248*12989c96Schristos /* Movable refs get a backpointer to them in ctf_str_movable_refs, and a 249*12989c96Schristos pointer to ctf_str_movable_refs itself in the ref, for use when freeing 250*12989c96Schristos refs: they can be moved later in batches via a call to 251*12989c96Schristos ctf_str_move_refs. */ 252*12989c96Schristos 253*12989c96Schristos if (flags & CTF_STR_MOVABLE) 254*12989c96Schristos { 255*12989c96Schristos ctf_str_atom_ref_movable_t *movref = (ctf_str_atom_ref_movable_t *) aref; 256*12989c96Schristos 257*12989c96Schristos movref->caf_movable_refs = fp->ctf_str_movable_refs; 258*12989c96Schristos 259*12989c96Schristos if (ctf_dynhash_insert (fp->ctf_str_movable_refs, ref, aref) < 0) 260*12989c96Schristos { 261*12989c96Schristos free (aref); 262*12989c96Schristos return NULL; 263*12989c96Schristos } 264*12989c96Schristos } 265*12989c96Schristos 266*12989c96Schristos ctf_list_append (&atom->csa_refs, aref); 267*12989c96Schristos 268*12989c96Schristos return aref; 269*12989c96Schristos } 270*12989c96Schristos 271*12989c96Schristos /* Add a string to the atoms table, copying the passed-in string if 272*12989c96Schristos necessary. Return the atom added. Return NULL only when out of memory 273*12989c96Schristos (and do not touch the passed-in string in that case). 274*12989c96Schristos 275*12989c96Schristos Possibly add a provisional entry for this string to the provisional 276*12989c96Schristos strtab. If the string is in the provisional strtab, update its ref list 277*12989c96Schristos with the passed-in ref, causing the ref to be updated when the strtab is 278*12989c96Schristos written out. */ 279*12989c96Schristos 2808dffb485Schristos static ctf_str_atom_t * 2814b169a6bSchristos ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str, 2824b169a6bSchristos int flags, uint32_t *ref) 2838dffb485Schristos { 2848dffb485Schristos char *newstr = NULL; 2858dffb485Schristos ctf_str_atom_t *atom = NULL; 286*12989c96Schristos int added = 0; 2878dffb485Schristos 2888dffb485Schristos atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str); 2898dffb485Schristos 290*12989c96Schristos /* Existing atoms get refs added only if they are provisional: 291*12989c96Schristos non-provisional strings already have a fixed strtab offset, and just 292*12989c96Schristos get their ref updated immediately, since its value cannot change. */ 2938dffb485Schristos 2948dffb485Schristos if (atom) 2958dffb485Schristos { 296*12989c96Schristos if (!ctf_dynhash_lookup (fp->ctf_prov_strtab, (void *) (uintptr_t) 297*12989c96Schristos atom->csa_offset)) 298*12989c96Schristos { 2994b169a6bSchristos if (flags & CTF_STR_ADD_REF) 3008dffb485Schristos { 301*12989c96Schristos if (atom->csa_external_offset) 302*12989c96Schristos *ref = atom->csa_external_offset; 303*12989c96Schristos else 304*12989c96Schristos *ref = atom->csa_offset; 3058dffb485Schristos } 3068dffb485Schristos return atom; 3078dffb485Schristos } 3088dffb485Schristos 309*12989c96Schristos if (flags & CTF_STR_ADD_REF) 310*12989c96Schristos { 311*12989c96Schristos if (!aref_create (fp, atom, ref, flags)) 312*12989c96Schristos { 313*12989c96Schristos ctf_set_errno (fp, ENOMEM); 314*12989c96Schristos return NULL; 315*12989c96Schristos } 316*12989c96Schristos } 317*12989c96Schristos 318*12989c96Schristos return atom; 319*12989c96Schristos } 320*12989c96Schristos 321*12989c96Schristos /* New atom. */ 322*12989c96Schristos 3238dffb485Schristos if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL) 3248dffb485Schristos goto oom; 3258dffb485Schristos memset (atom, 0, sizeof (struct ctf_str_atom)); 3268dffb485Schristos 327*12989c96Schristos /* Don't allocate new strings if this string is within an mmapped 328*12989c96Schristos strtab. */ 329*12989c96Schristos 330*12989c96Schristos if ((unsigned char *) str < (unsigned char *) fp->ctf_data_mmapped 331*12989c96Schristos || (unsigned char *) str > (unsigned char *) fp->ctf_data_mmapped + fp->ctf_data_mmapped_len) 332*12989c96Schristos { 3338dffb485Schristos if ((newstr = strdup (str)) == NULL) 3348dffb485Schristos goto oom; 335*12989c96Schristos atom->csa_flags |= CTF_STR_ATOM_FREEABLE; 3368dffb485Schristos atom->csa_str = newstr; 337*12989c96Schristos } 338*12989c96Schristos else 339*12989c96Schristos atom->csa_str = (char *) str; 340*12989c96Schristos 341*12989c96Schristos if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0) 342*12989c96Schristos goto oom; 343*12989c96Schristos added = 1; 344*12989c96Schristos 3458dffb485Schristos atom->csa_snapshot_id = fp->ctf_snapshots; 3468dffb485Schristos 347*12989c96Schristos /* New atoms marked provisional go into the provisional strtab, and get a 348*12989c96Schristos ref added. */ 349*12989c96Schristos 350*12989c96Schristos if (flags & CTF_STR_PROVISIONAL) 3518dffb485Schristos { 3528dffb485Schristos atom->csa_offset = fp->ctf_str_prov_offset; 3538dffb485Schristos 3548dffb485Schristos if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t) 3558dffb485Schristos atom->csa_offset, (void *) atom->csa_str) < 0) 3568dffb485Schristos goto oom; 3578dffb485Schristos 3588dffb485Schristos fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1; 3598dffb485Schristos 360*12989c96Schristos if (flags & CTF_STR_ADD_REF) 3618dffb485Schristos { 362*12989c96Schristos if (!aref_create (fp, atom, ref, flags)) 3634b169a6bSchristos goto oom; 3644b169a6bSchristos } 3658dffb485Schristos } 366*12989c96Schristos 3678dffb485Schristos return atom; 3688dffb485Schristos 3698dffb485Schristos oom: 370*12989c96Schristos if (added) 371*12989c96Schristos ctf_dynhash_remove (fp->ctf_str_atoms, atom->csa_str); 3728dffb485Schristos free (atom); 3738dffb485Schristos free (newstr); 3744b169a6bSchristos ctf_set_errno (fp, ENOMEM); 3758dffb485Schristos return NULL; 3768dffb485Schristos } 3778dffb485Schristos 3788dffb485Schristos /* Add a string to the atoms table, without augmenting the ref list for this 3798dffb485Schristos string: return a 'provisional offset' which can be used to return this string 3808dffb485Schristos until ctf_str_write_strtab is called, or 0 on failure. (Everywhere the 3818dffb485Schristos provisional offset is assigned to should be added as a ref using 3828dffb485Schristos ctf_str_add_ref() as well.) */ 3838dffb485Schristos uint32_t 3844b169a6bSchristos ctf_str_add (ctf_dict_t *fp, const char *str) 3858dffb485Schristos { 3868dffb485Schristos ctf_str_atom_t *atom; 3878dffb485Schristos 3884b169a6bSchristos if (!str) 3894b169a6bSchristos str = ""; 3904b169a6bSchristos 391*12989c96Schristos atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PROVISIONAL, 0); 3928dffb485Schristos if (!atom) 3938dffb485Schristos return 0; 3948dffb485Schristos 3958dffb485Schristos return atom->csa_offset; 3968dffb485Schristos } 3978dffb485Schristos 3988dffb485Schristos /* Like ctf_str_add(), but additionally augment the atom's refs list with the 3998dffb485Schristos passed-in ref, whether or not the string is already present. There is no 4008dffb485Schristos attempt to deduplicate the refs list (but duplicates are harmless). */ 4018dffb485Schristos uint32_t 4024b169a6bSchristos ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref) 4038dffb485Schristos { 4048dffb485Schristos ctf_str_atom_t *atom; 4058dffb485Schristos 4064b169a6bSchristos if (!str) 4074b169a6bSchristos str = ""; 4084b169a6bSchristos 4094b169a6bSchristos atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF 410*12989c96Schristos | CTF_STR_PROVISIONAL, ref); 4118dffb485Schristos if (!atom) 4128dffb485Schristos return 0; 4138dffb485Schristos 4148dffb485Schristos return atom->csa_offset; 4158dffb485Schristos } 4168dffb485Schristos 417*12989c96Schristos /* Like ctf_str_add_ref(), but note that the ref may be moved later on. */ 4184b169a6bSchristos uint32_t 419*12989c96Schristos ctf_str_add_movable_ref (ctf_dict_t *fp, const char *str, uint32_t *ref) 4204b169a6bSchristos { 4214b169a6bSchristos ctf_str_atom_t *atom; 4224b169a6bSchristos 4234b169a6bSchristos if (!str) 4244b169a6bSchristos str = ""; 4254b169a6bSchristos 426*12989c96Schristos atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF 427*12989c96Schristos | CTF_STR_PROVISIONAL 428*12989c96Schristos | CTF_STR_MOVABLE, ref); 4294b169a6bSchristos if (!atom) 4304b169a6bSchristos return 0; 4314b169a6bSchristos 4324b169a6bSchristos return atom->csa_offset; 4334b169a6bSchristos } 4344b169a6bSchristos 4358dffb485Schristos /* Add an external strtab reference at OFFSET. Returns zero if the addition 4368dffb485Schristos failed, nonzero otherwise. */ 4378dffb485Schristos int 4384b169a6bSchristos ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset) 4398dffb485Schristos { 4408dffb485Schristos ctf_str_atom_t *atom; 4418dffb485Schristos 4424b169a6bSchristos if (!str) 4434b169a6bSchristos str = ""; 4444b169a6bSchristos 4454b169a6bSchristos atom = ctf_str_add_ref_internal (fp, str, 0, 0); 4468dffb485Schristos if (!atom) 4478dffb485Schristos return 0; 4488dffb485Schristos 4498dffb485Schristos atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1); 4504b169a6bSchristos 4514b169a6bSchristos if (!fp->ctf_syn_ext_strtab) 4524b169a6bSchristos fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer, 4534b169a6bSchristos ctf_hash_eq_integer, 4544b169a6bSchristos NULL, NULL); 4554b169a6bSchristos if (!fp->ctf_syn_ext_strtab) 4564b169a6bSchristos { 4574b169a6bSchristos ctf_set_errno (fp, ENOMEM); 4584b169a6bSchristos return 0; 4594b169a6bSchristos } 4604b169a6bSchristos 4614b169a6bSchristos if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab, 4624b169a6bSchristos (void *) (uintptr_t) 4634b169a6bSchristos atom->csa_external_offset, 4644b169a6bSchristos (void *) atom->csa_str) < 0) 4654b169a6bSchristos { 4664b169a6bSchristos /* No need to bother freeing the syn_ext_strtab: it will get freed at 4674b169a6bSchristos ctf_str_write_strtab time if unreferenced. */ 4684b169a6bSchristos ctf_set_errno (fp, ENOMEM); 4694b169a6bSchristos return 0; 4704b169a6bSchristos } 4714b169a6bSchristos 4728dffb485Schristos return 1; 4738dffb485Schristos } 4748dffb485Schristos 475*12989c96Schristos /* Note that refs have moved from (SRC, LEN) to DEST. We use the movable 476*12989c96Schristos refs backpointer for this, because it is done an amortized-constant 477*12989c96Schristos number of times during structure member and enumerand addition, and if we 478*12989c96Schristos did a linear search this would turn such addition into an O(n^2) 479*12989c96Schristos operation. Even this is not linear, but it's better than that. */ 480*12989c96Schristos int 481*12989c96Schristos ctf_str_move_refs (ctf_dict_t *fp, void *src, size_t len, void *dest) 482*12989c96Schristos { 483*12989c96Schristos uintptr_t p; 484*12989c96Schristos 485*12989c96Schristos if (src == dest) 486*12989c96Schristos return 0; 487*12989c96Schristos 488*12989c96Schristos for (p = (uintptr_t) src; p - (uintptr_t) src < len; p++) 489*12989c96Schristos { 490*12989c96Schristos ctf_str_atom_ref_t *ref; 491*12989c96Schristos 492*12989c96Schristos if ((ref = ctf_dynhash_lookup (fp->ctf_str_movable_refs, 493*12989c96Schristos (ctf_str_atom_ref_t *) p)) != NULL) 494*12989c96Schristos { 495*12989c96Schristos int out_of_memory; 496*12989c96Schristos 497*12989c96Schristos ref->caf_ref = (uint32_t *) (((uintptr_t) ref->caf_ref + 498*12989c96Schristos (uintptr_t) dest - (uintptr_t) src)); 499*12989c96Schristos ctf_dynhash_remove (fp->ctf_str_movable_refs, 500*12989c96Schristos (ctf_str_atom_ref_t *) p); 501*12989c96Schristos out_of_memory = ctf_dynhash_insert (fp->ctf_str_movable_refs, 502*12989c96Schristos ref->caf_ref, ref); 503*12989c96Schristos assert (out_of_memory == 0); 504*12989c96Schristos } 505*12989c96Schristos } 506*12989c96Schristos 507*12989c96Schristos return 0; 508*12989c96Schristos } 509*12989c96Schristos 5108dffb485Schristos /* Remove a single ref. */ 5118dffb485Schristos void 5124b169a6bSchristos ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref) 5138dffb485Schristos { 5148dffb485Schristos ctf_str_atom_ref_t *aref, *anext; 5158dffb485Schristos ctf_str_atom_t *atom = NULL; 5168dffb485Schristos 5178dffb485Schristos atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str); 5188dffb485Schristos if (!atom) 5198dffb485Schristos return; 5208dffb485Schristos 5218dffb485Schristos for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext) 5228dffb485Schristos { 5238dffb485Schristos anext = ctf_list_next (aref); 5248dffb485Schristos if (aref->caf_ref == ref) 5258dffb485Schristos { 5268dffb485Schristos ctf_list_delete (&atom->csa_refs, aref); 5278dffb485Schristos free (aref); 5288dffb485Schristos } 5298dffb485Schristos } 5308dffb485Schristos } 5318dffb485Schristos 5328dffb485Schristos /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given 5334b169a6bSchristos snapshot ID. External atoms are never removed, because they came from the 5344b169a6bSchristos linker string table and are still present even if you roll back type 5354b169a6bSchristos additions. */ 5368dffb485Schristos static int 5378dffb485Schristos ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg) 5388dffb485Schristos { 5398dffb485Schristos ctf_str_atom_t *atom = (ctf_str_atom_t *) value; 5408dffb485Schristos ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg; 5418dffb485Schristos 5424b169a6bSchristos return (atom->csa_snapshot_id > id->snapshot_id) 5434b169a6bSchristos && (atom->csa_external_offset == 0); 5448dffb485Schristos } 5458dffb485Schristos 5464b169a6bSchristos /* Roll back, deleting all (internal) atoms created after a particular ID. */ 5478dffb485Schristos void 5484b169a6bSchristos ctf_str_rollback (ctf_dict_t *fp, ctf_snapshot_id_t id) 5498dffb485Schristos { 5508dffb485Schristos ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id); 5518dffb485Schristos } 5528dffb485Schristos 5538dffb485Schristos /* An adaptor around ctf_purge_atom_refs. */ 5548dffb485Schristos static void 5558dffb485Schristos ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value, 5568dffb485Schristos void *arg _libctf_unused_) 5578dffb485Schristos { 5588dffb485Schristos ctf_str_atom_t *atom = (ctf_str_atom_t *) value; 5598dffb485Schristos ctf_str_purge_atom_refs (atom); 5608dffb485Schristos } 5618dffb485Schristos 5628dffb485Schristos /* Remove all the recorded refs from the atoms table. */ 5638dffb485Schristos void 5644b169a6bSchristos ctf_str_purge_refs (ctf_dict_t *fp) 5658dffb485Schristos { 5668dffb485Schristos ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL); 5678dffb485Schristos } 5688dffb485Schristos 5698dffb485Schristos /* Update a list of refs to the specified value. */ 5708dffb485Schristos static void 5718dffb485Schristos ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value) 5728dffb485Schristos { 5738dffb485Schristos ctf_str_atom_ref_t *ref; 5748dffb485Schristos 5758dffb485Schristos for (ref = ctf_list_next (&refs->csa_refs); ref != NULL; 5768dffb485Schristos ref = ctf_list_next (ref)) 5778dffb485Schristos *(ref->caf_ref) = value; 5788dffb485Schristos } 5798dffb485Schristos 5808dffb485Schristos /* Sort the strtab. */ 5818dffb485Schristos static int 5828dffb485Schristos ctf_str_sort_strtab (const void *a, const void *b) 5838dffb485Schristos { 5848dffb485Schristos ctf_str_atom_t **one = (ctf_str_atom_t **) a; 5858dffb485Schristos ctf_str_atom_t **two = (ctf_str_atom_t **) b; 5868dffb485Schristos 5878dffb485Schristos return (strcmp ((*one)->csa_str, (*two)->csa_str)); 5888dffb485Schristos } 5898dffb485Schristos 5908dffb485Schristos /* Write out and return a strtab containing all strings with recorded refs, 591*12989c96Schristos adjusting the refs to refer to the corresponding string. The returned 592*12989c96Schristos strtab is already assigned to strtab 0 in this dict, is owned by this 593*12989c96Schristos dict, and may be NULL on error. Also populate the synthetic strtab with 594*12989c96Schristos mappings from external strtab offsets to names, so we can look them up 595*12989c96Schristos with ctf_strptr(). Only external strtab offsets with references are 596*12989c96Schristos added. 597*12989c96Schristos 598*12989c96Schristos As a side effect, replaces the strtab of the current dict with the newly- 599*12989c96Schristos generated strtab. This is an exception to the general rule that 600*12989c96Schristos serialization does not change the dict passed in, because the alternative 601*12989c96Schristos is to copy the entire atoms table on every reserialization just to avoid 602*12989c96Schristos modifying the original, which is excessively costly for minimal gain. 603*12989c96Schristos 604*12989c96Schristos We use the lazy man's approach and double memory costs by always storing 605*12989c96Schristos atoms as individually allocated entities whenever they come from anywhere 606*12989c96Schristos but a freshly-opened, mmapped dict, even though after serialization there 607*12989c96Schristos is another copy in the strtab; this ensures that ctf_strptr()-returned 608*12989c96Schristos pointers to them remain valid for the lifetime of the dict. 609*12989c96Schristos 610*12989c96Schristos This is all rendered more complex because if a dict is ctf_open()ed it 611*12989c96Schristos will have a bunch of strings in its strtab already, and their strtab 612*12989c96Schristos offsets can never change (without piles of complexity to rescan the 613*12989c96Schristos entire dict just to get all the offsets to all of them into the atoms 614*12989c96Schristos table). Entries below the existing strtab limit are just copied into the 615*12989c96Schristos new dict: entries above it are new, and are are sorted first, then 616*12989c96Schristos appended to it. The sorting is purely a compression-efficiency 617*12989c96Schristos improvement, and we get nearly as good an improvement from sorting big 618*12989c96Schristos chunks like this as we would from sorting the whole thing. */ 619*12989c96Schristos 620*12989c96Schristos const ctf_strs_writable_t * 6214b169a6bSchristos ctf_str_write_strtab (ctf_dict_t *fp) 6228dffb485Schristos { 623*12989c96Schristos ctf_strs_writable_t *strtab; 624*12989c96Schristos size_t strtab_count = 0; 6258dffb485Schristos uint32_t cur_stroff = 0; 6268dffb485Schristos ctf_str_atom_t **sorttab; 627*12989c96Schristos ctf_next_t *it = NULL; 6288dffb485Schristos size_t i; 629*12989c96Schristos void *v; 630*12989c96Schristos int err; 631*12989c96Schristos int new_strtab = 0; 6328dffb485Schristos int any_external = 0; 6338dffb485Schristos 634*12989c96Schristos strtab = calloc (1, sizeof (ctf_strs_writable_t)); 635*12989c96Schristos if (!strtab) 636*12989c96Schristos return NULL; 6378dffb485Schristos 638*12989c96Schristos /* The strtab contains the existing string table at its start: figure out 639*12989c96Schristos how many new strings we need to add. We only need to add new strings 640*12989c96Schristos that have no external offset, that have refs, and that are found in the 641*12989c96Schristos provisional strtab. If the existing strtab is empty we also need to 642*12989c96Schristos add the null string at its start. */ 643*12989c96Schristos 644*12989c96Schristos strtab->cts_len = fp->ctf_str[CTF_STRTAB_0].cts_len; 645*12989c96Schristos 646*12989c96Schristos if (strtab->cts_len == 0) 6478dffb485Schristos { 648*12989c96Schristos new_strtab = 1; 649*12989c96Schristos strtab->cts_len++; /* For the \0. */ 6508dffb485Schristos } 6518dffb485Schristos 652*12989c96Schristos /* Count new entries in the strtab: i.e. entries in the provisional 653*12989c96Schristos strtab. Ignore any entry for \0, entries which ended up in the 654*12989c96Schristos external strtab, and unreferenced entries. */ 6558dffb485Schristos 656*12989c96Schristos while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0) 657*12989c96Schristos { 658*12989c96Schristos const char *str = (const char *) v; 659*12989c96Schristos ctf_str_atom_t *atom; 6608dffb485Schristos 661*12989c96Schristos atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str); 662*12989c96Schristos if (!ctf_assert (fp, atom)) 663*12989c96Schristos goto err_strtab; 664*12989c96Schristos 665*12989c96Schristos if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) || 666*12989c96Schristos atom->csa_external_offset) 667*12989c96Schristos continue; 668*12989c96Schristos 669*12989c96Schristos strtab->cts_len += strlen (atom->csa_str) + 1; 670*12989c96Schristos strtab_count++; 671*12989c96Schristos } 672*12989c96Schristos if (err != ECTF_NEXT_END) 673*12989c96Schristos { 674*12989c96Schristos ctf_dprintf ("ctf_str_write_strtab: error counting strtab entries: %s\n", 675*12989c96Schristos ctf_errmsg (err)); 676*12989c96Schristos goto err_strtab; 677*12989c96Schristos } 678*12989c96Schristos 679*12989c96Schristos ctf_dprintf ("%lu bytes of strings in strtab: %lu pre-existing.\n", 680*12989c96Schristos (unsigned long) strtab->cts_len, 681*12989c96Schristos (unsigned long) fp->ctf_str[CTF_STRTAB_0].cts_len); 682*12989c96Schristos 683*12989c96Schristos /* Sort the new part of the strtab. */ 684*12989c96Schristos 685*12989c96Schristos sorttab = calloc (strtab_count, sizeof (ctf_str_atom_t *)); 6868dffb485Schristos if (!sorttab) 687*12989c96Schristos { 688*12989c96Schristos ctf_set_errno (fp, ENOMEM); 689*12989c96Schristos goto err_strtab; 690*12989c96Schristos } 6918dffb485Schristos 692*12989c96Schristos i = 0; 693*12989c96Schristos while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0) 694*12989c96Schristos { 695*12989c96Schristos ctf_str_atom_t *atom; 6968dffb485Schristos 697*12989c96Schristos atom = ctf_dynhash_lookup (fp->ctf_str_atoms, v); 698*12989c96Schristos if (!ctf_assert (fp, atom)) 699*12989c96Schristos goto err_sorttab; 700*12989c96Schristos 701*12989c96Schristos if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) || 702*12989c96Schristos atom->csa_external_offset) 703*12989c96Schristos continue; 704*12989c96Schristos 705*12989c96Schristos sorttab[i++] = atom; 706*12989c96Schristos } 707*12989c96Schristos 708*12989c96Schristos qsort (sorttab, strtab_count, sizeof (ctf_str_atom_t *), 7098dffb485Schristos ctf_str_sort_strtab); 7108dffb485Schristos 711*12989c96Schristos if ((strtab->cts_strs = malloc (strtab->cts_len)) == NULL) 712*12989c96Schristos goto err_sorttab; 7138dffb485Schristos 714*12989c96Schristos cur_stroff = fp->ctf_str[CTF_STRTAB_0].cts_len; 7158dffb485Schristos 716*12989c96Schristos if (new_strtab) 717*12989c96Schristos { 718*12989c96Schristos strtab->cts_strs[0] = 0; 719*12989c96Schristos cur_stroff++; 7208dffb485Schristos } 7218dffb485Schristos else 722*12989c96Schristos memcpy (strtab->cts_strs, fp->ctf_str[CTF_STRTAB_0].cts_strs, 723*12989c96Schristos fp->ctf_str[CTF_STRTAB_0].cts_len); 7248dffb485Schristos 725*12989c96Schristos /* Work over the sorttab, add its strings to the strtab, and remember 726*12989c96Schristos where they are in the csa_offset for the appropriate atom. No ref 727*12989c96Schristos updating is done at this point, because refs might well relate to 728*12989c96Schristos already-existing strings, or external strings, which do not need adding 729*12989c96Schristos to the strtab and may not be in the sorttab. */ 730*12989c96Schristos 731*12989c96Schristos for (i = 0; i < strtab_count; i++) 732*12989c96Schristos { 7338dffb485Schristos sorttab[i]->csa_offset = cur_stroff; 734*12989c96Schristos strcpy (&strtab->cts_strs[cur_stroff], sorttab[i]->csa_str); 7358dffb485Schristos cur_stroff += strlen (sorttab[i]->csa_str) + 1; 7368dffb485Schristos } 7378dffb485Schristos free (sorttab); 738*12989c96Schristos sorttab = NULL; 739*12989c96Schristos 740*12989c96Schristos /* Update all refs, then purge them as no longer necessary: also update 741*12989c96Schristos the strtab appropriately. */ 742*12989c96Schristos 743*12989c96Schristos while ((err = ctf_dynhash_next (fp->ctf_str_atoms, &it, NULL, &v)) == 0) 744*12989c96Schristos { 745*12989c96Schristos ctf_str_atom_t *atom = (ctf_str_atom_t *) v; 746*12989c96Schristos uint32_t offset; 747*12989c96Schristos 748*12989c96Schristos if (ctf_list_empty_p (&atom->csa_refs)) 749*12989c96Schristos continue; 750*12989c96Schristos 751*12989c96Schristos if (atom->csa_external_offset) 752*12989c96Schristos { 753*12989c96Schristos any_external = 1; 754*12989c96Schristos offset = atom->csa_external_offset; 755*12989c96Schristos } 756*12989c96Schristos else 757*12989c96Schristos offset = atom->csa_offset; 758*12989c96Schristos ctf_str_update_refs (atom, offset); 759*12989c96Schristos } 760*12989c96Schristos if (err != ECTF_NEXT_END) 761*12989c96Schristos { 762*12989c96Schristos ctf_dprintf ("ctf_str_write_strtab: error iterating over atoms while updating refs: %s\n", 763*12989c96Schristos ctf_errmsg (err)); 764*12989c96Schristos goto err_strtab; 765*12989c96Schristos } 766*12989c96Schristos ctf_str_purge_refs (fp); 7678dffb485Schristos 7688dffb485Schristos if (!any_external) 7698dffb485Schristos { 7708dffb485Schristos ctf_dynhash_destroy (fp->ctf_syn_ext_strtab); 7718dffb485Schristos fp->ctf_syn_ext_strtab = NULL; 7728dffb485Schristos } 7738dffb485Schristos 774*12989c96Schristos /* Replace the old strtab with the new one in this dict. */ 775*12989c96Schristos 776*12989c96Schristos if (fp->ctf_dynstrtab) 777*12989c96Schristos { 778*12989c96Schristos free (fp->ctf_dynstrtab->cts_strs); 779*12989c96Schristos free (fp->ctf_dynstrtab); 780*12989c96Schristos } 781*12989c96Schristos 782*12989c96Schristos fp->ctf_dynstrtab = strtab; 783*12989c96Schristos fp->ctf_str[CTF_STRTAB_0].cts_strs = strtab->cts_strs; 784*12989c96Schristos fp->ctf_str[CTF_STRTAB_0].cts_len = strtab->cts_len; 785*12989c96Schristos 7868dffb485Schristos /* All the provisional strtab entries are now real strtab entries, and 7878dffb485Schristos ctf_strptr() will find them there. The provisional offset now starts right 7888dffb485Schristos beyond the new end of the strtab. */ 7898dffb485Schristos 7908dffb485Schristos ctf_dynhash_empty (fp->ctf_prov_strtab); 791*12989c96Schristos fp->ctf_str_prov_offset = strtab->cts_len + 1; 7928dffb485Schristos return strtab; 7938dffb485Schristos 794*12989c96Schristos err_sorttab: 7958dffb485Schristos free (sorttab); 796*12989c96Schristos err_strtab: 797*12989c96Schristos free (strtab); 798*12989c96Schristos return NULL; 7998dffb485Schristos } 800