1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <string_table.h> 30*0Sstevel@tonic-gate #include <stdlib.h> 31*0Sstevel@tonic-gate #include <strings.h> 32*0Sstevel@tonic-gate #include <sgs.h> 33*0Sstevel@tonic-gate #include <stdio.h> 34*0Sstevel@tonic-gate 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate 37*0Sstevel@tonic-gate /* 38*0Sstevel@tonic-gate * This file provides the interfaces to build a Str_tbl suitable 39*0Sstevel@tonic-gate * for use by either the sgsmsg system or a standard ELF 40*0Sstevel@tonic-gate * SHT_STRTAB. 41*0Sstevel@tonic-gate * 42*0Sstevel@tonic-gate * There are two modes which can be used when constructing a 43*0Sstevel@tonic-gate * string table: 44*0Sstevel@tonic-gate * 45*0Sstevel@tonic-gate * st_new(0) 46*0Sstevel@tonic-gate * standard string table - no compression. This is the 47*0Sstevel@tonic-gate * traditional method and fast 48*0Sstevel@tonic-gate * 49*0Sstevel@tonic-gate * st_new(FLG_STNEW_COMPRESS) 50*0Sstevel@tonic-gate * build a compressed string table which both 51*0Sstevel@tonic-gate * eliminates duplicate strings and permits 52*0Sstevel@tonic-gate * strings with common suffixes (atexit vs. exit) to 53*0Sstevel@tonic-gate * overlap in the table. This provides space 54*0Sstevel@tonic-gate * savings for many string tables. 55*0Sstevel@tonic-gate * 56*0Sstevel@tonic-gate * These string tables are now built with a common interface in a 57*0Sstevel@tonic-gate * two-pass manner, the first pass it to find all of the strings 58*0Sstevel@tonic-gate * required for the string-table and to calculate the size that 59*0Sstevel@tonic-gate * will be required for the final string table. 60*0Sstevel@tonic-gate * 61*0Sstevel@tonic-gate * The second pass allocates the string table and populates the 62*0Sstevel@tonic-gate * strings into the table and returns the offsets the strings 63*0Sstevel@tonic-gate * have been assigned. 64*0Sstevel@tonic-gate * 65*0Sstevel@tonic-gate * The calling sequence to build and populate a string table is: 66*0Sstevel@tonic-gate * 67*0Sstevel@tonic-gate * st_new(); // initialize strtab 68*0Sstevel@tonic-gate * 69*0Sstevel@tonic-gate * st_insert(st1); // first pass of strings ... 70*0Sstevel@tonic-gate * // calculates size required for 71*0Sstevel@tonic-gate * // string table 72*0Sstevel@tonic-gate * 73*0Sstevel@tonic-gate * st_delstring(st?); // remove string previously 74*0Sstevel@tonic-gate * // inserted 75*0Sstevel@tonic-gate * 76*0Sstevel@tonic-gate * st_insert(stN); 77*0Sstevel@tonic-gate * 78*0Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes 79*0Sstevel@tonic-gate * // size of table. 80*0Sstevel@tonic-gate * 81*0Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination 82*0Sstevel@tonic-gate * // for the string table 83*0Sstevel@tonic-gate * 84*0Sstevel@tonic-gate * st_setstring(st1); // populate the string table 85*0Sstevel@tonic-gate * ... // offsets are based off of second 86*0Sstevel@tonic-gate * // pass through the string table 87*0Sstevel@tonic-gate * st_setstring(stN); 88*0Sstevel@tonic-gate * 89*0Sstevel@tonic-gate * st_destroy(); // tear down string table 90*0Sstevel@tonic-gate * // structures. 91*0Sstevel@tonic-gate * 92*0Sstevel@tonic-gate * String Suffix Compression Algorithm: 93*0Sstevel@tonic-gate * 94*0Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String 95*0Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm 96*0Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique 97*0Sstevel@tonic-gate * strings inserted into the string table. The hash function for 98*0Sstevel@tonic-gate * this table is a standard string hash except that the hash starts 99*0Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards 100*0Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the 101*0Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values 102*0Sstevel@tonic-gate * for all of the possible suffix strings for that string. 103*0Sstevel@tonic-gate * 104*0Sstevel@tonic-gate * As we compute the hash - at each character see if the current 105*0Sstevel@tonic-gate * suffix string for that hash is already present in the table. If 106*0Sstevel@tonic-gate * it is, and the string is a master string. Then change that 107*0Sstevel@tonic-gate * string to a suffix string of the new string being inserted. 108*0Sstevel@tonic-gate * 109*0Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check 110*0Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference 111*0Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a 112*0Sstevel@tonic-gate * new hash table entry for a master string. 113*0Sstevel@tonic-gate * 114*0Sstevel@tonic-gate * The above method will find all suffixes of a given string given 115*0Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is 116*0Sstevel@tonic-gate * why this is a two phase method, we first collect all of the 117*0Sstevel@tonic-gate * strings and store them based off of their length in a nice AVL tree. 118*0Sstevel@tonic-gate * Once all of the strings have been submitted we then start the 119*0Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and 120*0Sstevel@tonic-gate * inserting the strings from shortest to longest as described 121*0Sstevel@tonic-gate * above. 122*0Sstevel@tonic-gate * 123*0Sstevel@tonic-gate */ 124*0Sstevel@tonic-gate 125*0Sstevel@tonic-gate /* LINTLIBRARY */ 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate int 129*0Sstevel@tonic-gate strlen_compare(const void *elem1, const void *elem2) 130*0Sstevel@tonic-gate { 131*0Sstevel@tonic-gate uint_t l1, l2; 132*0Sstevel@tonic-gate l1 = ((Stringelem *)elem1)->se_stlen; 133*0Sstevel@tonic-gate l2 = ((Stringelem *)elem2)->se_stlen; 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate if (l1 == l2) 136*0Sstevel@tonic-gate return (0); 137*0Sstevel@tonic-gate if (l2 < l1) 138*0Sstevel@tonic-gate return (1); 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate return (-1); 141*0Sstevel@tonic-gate } 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate /* 144*0Sstevel@tonic-gate * Return a initialized Str_tbl - returns NULL on failure. 145*0Sstevel@tonic-gate * 146*0Sstevel@tonic-gate * stflags: 147*0Sstevel@tonic-gate * 148*0Sstevel@tonic-gate * FLG_STNEW_COMPRESS - build a compressed string table 149*0Sstevel@tonic-gate * 150*0Sstevel@tonic-gate */ 151*0Sstevel@tonic-gate Str_tbl * 152*0Sstevel@tonic-gate st_new(uint_t stflags) 153*0Sstevel@tonic-gate { 154*0Sstevel@tonic-gate Str_tbl *stp; 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate if ((stp = calloc(sizeof (Str_tbl), 1)) == 0) 157*0Sstevel@tonic-gate return (0); 158*0Sstevel@tonic-gate 159*0Sstevel@tonic-gate /* 160*0Sstevel@tonic-gate * Start with a leading '\0' - it's tradition. 161*0Sstevel@tonic-gate */ 162*0Sstevel@tonic-gate stp->st_stringsize = stp->st_fullstringsize = stp->st_nextoff = 1; 163*0Sstevel@tonic-gate 164*0Sstevel@tonic-gate /* 165*0Sstevel@tonic-gate * Do we compress this string table 166*0Sstevel@tonic-gate */ 167*0Sstevel@tonic-gate if ((stflags & FLG_STNEW_COMPRESS) == 0) 168*0Sstevel@tonic-gate return (stp); 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COMPRESS; 171*0Sstevel@tonic-gate if ((stp->st_strtree = calloc(sizeof (avl_tree_t), 1)) == 0) { 172*0Sstevel@tonic-gate return (0); 173*0Sstevel@tonic-gate } 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate avl_create(stp->st_strtree, &strlen_compare, sizeof (Stringelem), 176*0Sstevel@tonic-gate SGSOFFSETOF(Stringelem, se_avlnode)); 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate return (stp); 179*0Sstevel@tonic-gate } 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate /* 182*0Sstevel@tonic-gate * Tear down a String_Table structure. 183*0Sstevel@tonic-gate */ 184*0Sstevel@tonic-gate void 185*0Sstevel@tonic-gate st_destroy(Str_tbl *stp) 186*0Sstevel@tonic-gate { 187*0Sstevel@tonic-gate Str_hash *sthash, *psthash; 188*0Sstevel@tonic-gate Str_master *mstr, *pmstr; 189*0Sstevel@tonic-gate uint_t i; 190*0Sstevel@tonic-gate 191*0Sstevel@tonic-gate /* 192*0Sstevel@tonic-gate * cleanup the master strings 193*0Sstevel@tonic-gate */ 194*0Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr; 195*0Sstevel@tonic-gate mstr = mstr->sm_next) { 196*0Sstevel@tonic-gate if (pmstr) 197*0Sstevel@tonic-gate free(pmstr); 198*0Sstevel@tonic-gate pmstr = mstr; 199*0Sstevel@tonic-gate } 200*0Sstevel@tonic-gate if (pmstr) 201*0Sstevel@tonic-gate free(pmstr); 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate if (stp->st_hashbcks) { 204*0Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) { 205*0Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0; 206*0Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 207*0Sstevel@tonic-gate if (psthash) 208*0Sstevel@tonic-gate free(psthash); 209*0Sstevel@tonic-gate psthash = sthash; 210*0Sstevel@tonic-gate } 211*0Sstevel@tonic-gate if (psthash) 212*0Sstevel@tonic-gate free(psthash); 213*0Sstevel@tonic-gate } 214*0Sstevel@tonic-gate free(stp->st_hashbcks); 215*0Sstevel@tonic-gate } 216*0Sstevel@tonic-gate free(stp); 217*0Sstevel@tonic-gate } 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gate /* 223*0Sstevel@tonic-gate * Remove a previously inserted string from the Str_tbl 224*0Sstevel@tonic-gate */ 225*0Sstevel@tonic-gate int 226*0Sstevel@tonic-gate st_delstring(Str_tbl *stp, const char *str) 227*0Sstevel@tonic-gate { 228*0Sstevel@tonic-gate uint_t stlen; 229*0Sstevel@tonic-gate Stringelem qstelem; 230*0Sstevel@tonic-gate Stringelem *stelem; 231*0Sstevel@tonic-gate Stringlist *stlist, *pstlist; 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate /* 234*0Sstevel@tonic-gate * String table can't have been cooked 235*0Sstevel@tonic-gate */ 236*0Sstevel@tonic-gate assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 237*0Sstevel@tonic-gate 238*0Sstevel@tonic-gate stlen = (uint_t)strlen(str); 239*0Sstevel@tonic-gate stp->st_fullstringsize -= stlen + 1; 240*0Sstevel@tonic-gate 241*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 242*0Sstevel@tonic-gate return (0); 243*0Sstevel@tonic-gate 244*0Sstevel@tonic-gate qstelem.se_stlen = stlen; 245*0Sstevel@tonic-gate if ((stelem = avl_find(stp->st_strtree, &qstelem, 0)) == NULL) { 246*0Sstevel@tonic-gate /* 247*0Sstevel@tonic-gate * no strings of this length recorded, let alone 248*0Sstevel@tonic-gate * this specific string - someone goofed. 249*0Sstevel@tonic-gate */ 250*0Sstevel@tonic-gate return (-1); 251*0Sstevel@tonic-gate } 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate pstlist = 0; 254*0Sstevel@tonic-gate for (stlist = stelem->se_strlist; stlist; stlist = stlist->sl_next) { 255*0Sstevel@tonic-gate if (strcmp(str, stlist->sl_string) == 0) 256*0Sstevel@tonic-gate break; 257*0Sstevel@tonic-gate pstlist = stlist; 258*0Sstevel@tonic-gate } 259*0Sstevel@tonic-gate 260*0Sstevel@tonic-gate if (stlist == 0) { 261*0Sstevel@tonic-gate /* 262*0Sstevel@tonic-gate * string was not found 263*0Sstevel@tonic-gate */ 264*0Sstevel@tonic-gate return (-1); 265*0Sstevel@tonic-gate } 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate if (pstlist == 0) { 268*0Sstevel@tonic-gate /* 269*0Sstevel@tonic-gate * String is first on list. 270*0Sstevel@tonic-gate */ 271*0Sstevel@tonic-gate stelem->se_strlist = stlist->sl_next; 272*0Sstevel@tonic-gate } else { 273*0Sstevel@tonic-gate /* 274*0Sstevel@tonic-gate * remove string from list. 275*0Sstevel@tonic-gate */ 276*0Sstevel@tonic-gate pstlist->sl_next = stlist->sl_next; 277*0Sstevel@tonic-gate } 278*0Sstevel@tonic-gate 279*0Sstevel@tonic-gate free(stlist); 280*0Sstevel@tonic-gate return (0); 281*0Sstevel@tonic-gate } 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate 284*0Sstevel@tonic-gate /* 285*0Sstevel@tonic-gate * Insert a new string into the Str_tbl 286*0Sstevel@tonic-gate */ 287*0Sstevel@tonic-gate int 288*0Sstevel@tonic-gate st_insert(Str_tbl *stp, const char *str) 289*0Sstevel@tonic-gate { 290*0Sstevel@tonic-gate uint_t stlen; 291*0Sstevel@tonic-gate Stringelem qstelem; 292*0Sstevel@tonic-gate Stringelem *stelem; 293*0Sstevel@tonic-gate Stringlist *strlist; 294*0Sstevel@tonic-gate avl_index_t where; 295*0Sstevel@tonic-gate 296*0Sstevel@tonic-gate /* 297*0Sstevel@tonic-gate * String table can't have been cooked 298*0Sstevel@tonic-gate */ 299*0Sstevel@tonic-gate assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 300*0Sstevel@tonic-gate stlen = (uint_t)strlen(str); 301*0Sstevel@tonic-gate /* 302*0Sstevel@tonic-gate * Null strings always point to the head of the string 303*0Sstevel@tonic-gate * table - no reason to keep searching. 304*0Sstevel@tonic-gate */ 305*0Sstevel@tonic-gate if (stlen == 0) 306*0Sstevel@tonic-gate return (0); 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate stp->st_fullstringsize += stlen + 1; 309*0Sstevel@tonic-gate stp->st_stringcnt++; 310*0Sstevel@tonic-gate 311*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 312*0Sstevel@tonic-gate return (0); 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate qstelem.se_stlen = strlen(str); 315*0Sstevel@tonic-gate if ((stelem = avl_find(stp->st_strtree, &qstelem, 316*0Sstevel@tonic-gate &where)) == NULL) { 317*0Sstevel@tonic-gate if ((stelem = calloc(sizeof (Stringelem), 1)) == 0) 318*0Sstevel@tonic-gate return (-1); 319*0Sstevel@tonic-gate stelem->se_stlen = qstelem.se_stlen; 320*0Sstevel@tonic-gate avl_insert(stp->st_strtree, stelem, where); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate if ((strlist = malloc(sizeof (Stringlist))) == 0) 323*0Sstevel@tonic-gate return (-1); 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate strlist->sl_string = str; 326*0Sstevel@tonic-gate strlist->sl_next = stelem->se_strlist; 327*0Sstevel@tonic-gate stelem->se_strlist = strlist; 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate return (0); 330*0Sstevel@tonic-gate } 331*0Sstevel@tonic-gate 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate /* 334*0Sstevel@tonic-gate * For a given string - copy it into the buffer associated with 335*0Sstevel@tonic-gate * the string table - and return the offset it has been assigned. 336*0Sstevel@tonic-gate * 337*0Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in 338*0Sstevel@tonic-gate * the Str_tbl. 339*0Sstevel@tonic-gate */ 340*0Sstevel@tonic-gate int 341*0Sstevel@tonic-gate st_setstring(Str_tbl *stp, const char *str, uint_t *stoff) 342*0Sstevel@tonic-gate { 343*0Sstevel@tonic-gate uint_t stlen; 344*0Sstevel@tonic-gate uint_t hashval; 345*0Sstevel@tonic-gate Str_hash *sthash; 346*0Sstevel@tonic-gate Str_master *mstr; 347*0Sstevel@tonic-gate int i; 348*0Sstevel@tonic-gate 349*0Sstevel@tonic-gate /* 350*0Sstevel@tonic-gate * String table *must* have been previously cooked 351*0Sstevel@tonic-gate */ 352*0Sstevel@tonic-gate assert(stp->st_strbuf); 353*0Sstevel@tonic-gate 354*0Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 355*0Sstevel@tonic-gate stlen = (uint_t)strlen(str); 356*0Sstevel@tonic-gate /* 357*0Sstevel@tonic-gate * Null string always points to head of string table 358*0Sstevel@tonic-gate */ 359*0Sstevel@tonic-gate if (stlen == 0) { 360*0Sstevel@tonic-gate *stoff = 0; 361*0Sstevel@tonic-gate return (0); 362*0Sstevel@tonic-gate } 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 365*0Sstevel@tonic-gate uint_t _stoff; 366*0Sstevel@tonic-gate 367*0Sstevel@tonic-gate stlen++; /* count for trailing '\0' */ 368*0Sstevel@tonic-gate _stoff = stp->st_nextoff; 369*0Sstevel@tonic-gate /* 370*0Sstevel@tonic-gate * Have we overflowed our assigned buffer? 371*0Sstevel@tonic-gate */ 372*0Sstevel@tonic-gate if ((_stoff + stlen) > stp->st_fullstringsize) 373*0Sstevel@tonic-gate return (-1); 374*0Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen); 375*0Sstevel@tonic-gate *stoff = _stoff; 376*0Sstevel@tonic-gate stp->st_nextoff += stlen; 377*0Sstevel@tonic-gate return (0); 378*0Sstevel@tonic-gate } 379*0Sstevel@tonic-gate 380*0Sstevel@tonic-gate /* 381*0Sstevel@tonic-gate * Calculate reverse hash for string 382*0Sstevel@tonic-gate */ 383*0Sstevel@tonic-gate hashval = HASHSEED; 384*0Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 385*0Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 386*0Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 387*0Sstevel@tonic-gate } 388*0Sstevel@tonic-gate 389*0Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash; 390*0Sstevel@tonic-gate sthash = sthash->hi_next) { 391*0Sstevel@tonic-gate if (sthash->hi_hashval == hashval) { 392*0Sstevel@tonic-gate const char *hstr; 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate hstr = &sthash->hi_mstr->sm_str[ 395*0Sstevel@tonic-gate sthash->hi_mstr->sm_stlen - 396*0Sstevel@tonic-gate sthash->hi_stlen]; 397*0Sstevel@tonic-gate if (strcmp(str, hstr) == 0) { 398*0Sstevel@tonic-gate break; 399*0Sstevel@tonic-gate } 400*0Sstevel@tonic-gate } 401*0Sstevel@tonic-gate } 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate /* 404*0Sstevel@tonic-gate * Did we find the string? 405*0Sstevel@tonic-gate */ 406*0Sstevel@tonic-gate if (sthash == 0) 407*0Sstevel@tonic-gate return (-1); 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate /* 410*0Sstevel@tonic-gate * Has this string been copied into the string table? 411*0Sstevel@tonic-gate */ 412*0Sstevel@tonic-gate mstr = sthash->hi_mstr; 413*0Sstevel@tonic-gate if (mstr->sm_stoff == 0) { 414*0Sstevel@tonic-gate uint_t mstlen = mstr->sm_stlen + 1; 415*0Sstevel@tonic-gate mstr->sm_stoff = stp->st_nextoff; 416*0Sstevel@tonic-gate /* 417*0Sstevel@tonic-gate * Have we overflowed our assigned buffer? 418*0Sstevel@tonic-gate */ 419*0Sstevel@tonic-gate if ((mstr->sm_stoff + mstlen) > stp->st_fullstringsize) 420*0Sstevel@tonic-gate return (-1); 421*0Sstevel@tonic-gate memcpy(stp->st_strbuf + mstr->sm_stoff, mstr->sm_str, 422*0Sstevel@tonic-gate mstlen); 423*0Sstevel@tonic-gate stp->st_nextoff += mstlen; 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate /* 426*0Sstevel@tonic-gate * Calculate offset of (sub)string 427*0Sstevel@tonic-gate */ 428*0Sstevel@tonic-gate *stoff = mstr->sm_stoff + mstr->sm_stlen - sthash->hi_stlen; 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate return (0); 431*0Sstevel@tonic-gate } 432*0Sstevel@tonic-gate 433*0Sstevel@tonic-gate 434*0Sstevel@tonic-gate static int 435*0Sstevel@tonic-gate st_hash_insert(Str_tbl *stp, const char *str, uint_t stlen) 436*0Sstevel@tonic-gate { 437*0Sstevel@tonic-gate int i; 438*0Sstevel@tonic-gate uint_t hashval = HASHSEED; 439*0Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt; 440*0Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks; 441*0Sstevel@tonic-gate Str_hash *sthash; 442*0Sstevel@tonic-gate Str_master *mstr = 0; 443*0Sstevel@tonic-gate 444*0Sstevel@tonic-gate /* 445*0Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But 446*0Sstevel@tonic-gate * instead of hashing from the start of the string to the 447*0Sstevel@tonic-gate * end, we do it in reverse. 448*0Sstevel@tonic-gate * 449*0Sstevel@tonic-gate * This way - we are essentially building all of the 450*0Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if 451*0Sstevel@tonic-gate * any suffixes already exist in the tree as we generate 452*0Sstevel@tonic-gate * the hash. 453*0Sstevel@tonic-gate */ 454*0Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 455*0Sstevel@tonic-gate 456*0Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 457*0Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 458*0Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt]; 459*0Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 460*0Sstevel@tonic-gate 461*0Sstevel@tonic-gate if (sthash->hi_hashval == hashval) { 462*0Sstevel@tonic-gate const char *hstr; 463*0Sstevel@tonic-gate Str_master *_mstr; 464*0Sstevel@tonic-gate 465*0Sstevel@tonic-gate _mstr = sthash->hi_mstr; 466*0Sstevel@tonic-gate hstr = &_mstr->sm_str[_mstr->sm_stlen - 467*0Sstevel@tonic-gate sthash->hi_stlen]; 468*0Sstevel@tonic-gate if (strcmp(&str[i], hstr) == 0) { 469*0Sstevel@tonic-gate if (i == 0) { 470*0Sstevel@tonic-gate /* 471*0Sstevel@tonic-gate * Entry already in table, 472*0Sstevel@tonic-gate * increment refcnt and get 473*0Sstevel@tonic-gate * out. 474*0Sstevel@tonic-gate */ 475*0Sstevel@tonic-gate sthash->hi_refcnt++; 476*0Sstevel@tonic-gate return (0); 477*0Sstevel@tonic-gate } else { 478*0Sstevel@tonic-gate /* 479*0Sstevel@tonic-gate * If this 'suffix' is 480*0Sstevel@tonic-gate * presently a 'master' string, 481*0Sstevel@tonic-gate * then take over it's record. 482*0Sstevel@tonic-gate */ 483*0Sstevel@tonic-gate if (sthash->hi_stlen == 484*0Sstevel@tonic-gate _mstr->sm_stlen) { 485*0Sstevel@tonic-gate /* 486*0Sstevel@tonic-gate * we should only do 487*0Sstevel@tonic-gate * this once. 488*0Sstevel@tonic-gate */ 489*0Sstevel@tonic-gate assert(mstr == 0); 490*0Sstevel@tonic-gate mstr = _mstr; 491*0Sstevel@tonic-gate } 492*0Sstevel@tonic-gate } 493*0Sstevel@tonic-gate } 494*0Sstevel@tonic-gate } 495*0Sstevel@tonic-gate } 496*0Sstevel@tonic-gate } 497*0Sstevel@tonic-gate 498*0Sstevel@tonic-gate 499*0Sstevel@tonic-gate /* 500*0Sstevel@tonic-gate * Do we need a new master string, or can we take over 501*0Sstevel@tonic-gate * one we already found in the table? 502*0Sstevel@tonic-gate */ 503*0Sstevel@tonic-gate if (mstr == 0) { 504*0Sstevel@tonic-gate /* 505*0Sstevel@tonic-gate * allocate a new master string 506*0Sstevel@tonic-gate */ 507*0Sstevel@tonic-gate if ((mstr = calloc(sizeof (Str_hash), 1)) == 0) 508*0Sstevel@tonic-gate return (-1); 509*0Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist; 510*0Sstevel@tonic-gate stp->st_mstrlist = mstr; 511*0Sstevel@tonic-gate stp->st_stringsize += stlen + 1; 512*0Sstevel@tonic-gate } else { 513*0Sstevel@tonic-gate /* 514*0Sstevel@tonic-gate * We are taking over a existing master string, 515*0Sstevel@tonic-gate * the stringsize only increments by the 516*0Sstevel@tonic-gate * difference between the currnet string and the 517*0Sstevel@tonic-gate * previous master. 518*0Sstevel@tonic-gate */ 519*0Sstevel@tonic-gate assert(stlen > mstr->sm_stlen); 520*0Sstevel@tonic-gate stp->st_stringsize += stlen - mstr->sm_stlen; 521*0Sstevel@tonic-gate } 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate if ((sthash = calloc(sizeof (Str_hash), 1)) == 0) 524*0Sstevel@tonic-gate return (-1); 525*0Sstevel@tonic-gate 526*0Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval; 527*0Sstevel@tonic-gate mstr->sm_stlen = sthash->hi_stlen = stlen; 528*0Sstevel@tonic-gate mstr->sm_str = str; 529*0Sstevel@tonic-gate sthash->hi_refcnt = 1; 530*0Sstevel@tonic-gate sthash->hi_mstr = mstr; 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate /* 533*0Sstevel@tonic-gate * Insert string element into head of hash list 534*0Sstevel@tonic-gate */ 535*0Sstevel@tonic-gate hashval = hashval % bckcnt; 536*0Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval]; 537*0Sstevel@tonic-gate hashbcks[hashval] = sthash; 538*0Sstevel@tonic-gate return (0); 539*0Sstevel@tonic-gate } 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate /* 542*0Sstevel@tonic-gate * Return amount of space required for the string table. 543*0Sstevel@tonic-gate */ 544*0Sstevel@tonic-gate uint_t 545*0Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp) 546*0Sstevel@tonic-gate { 547*0Sstevel@tonic-gate assert(stp->st_fullstringsize > 0); 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 550*0Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 551*0Sstevel@tonic-gate return (stp->st_fullstringsize); 552*0Sstevel@tonic-gate } 553*0Sstevel@tonic-gate 554*0Sstevel@tonic-gate 555*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) { 556*0Sstevel@tonic-gate Stringelem *stelem; 557*0Sstevel@tonic-gate void *cookie; 558*0Sstevel@tonic-gate 559*0Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 560*0Sstevel@tonic-gate /* 561*0Sstevel@tonic-gate * allocate a hash table about the size of # of 562*0Sstevel@tonic-gate * strings input. 563*0Sstevel@tonic-gate */ 564*0Sstevel@tonic-gate stp->st_hbckcnt = findprime(stp->st_stringcnt); 565*0Sstevel@tonic-gate if ((stp->st_hashbcks = 566*0Sstevel@tonic-gate calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL) 567*0Sstevel@tonic-gate return (0); 568*0Sstevel@tonic-gate 569*0Sstevel@tonic-gate /* 570*0Sstevel@tonic-gate * We now walk all of the strings in the list, 571*0Sstevel@tonic-gate * from shortest to longest, and insert them into 572*0Sstevel@tonic-gate * the hashtable. 573*0Sstevel@tonic-gate */ 574*0Sstevel@tonic-gate if ((stelem = avl_first(stp->st_strtree)) == NULL) { 575*0Sstevel@tonic-gate /* 576*0Sstevel@tonic-gate * Is it possible we have a empty string table, 577*0Sstevel@tonic-gate * if so - the table still conains '\0' 578*0Sstevel@tonic-gate * so still return the size. 579*0Sstevel@tonic-gate */ 580*0Sstevel@tonic-gate if (avl_numnodes(stp->st_strtree) == 0) { 581*0Sstevel@tonic-gate assert(stp->st_stringsize == 1); 582*0Sstevel@tonic-gate return (stp->st_stringsize); 583*0Sstevel@tonic-gate } 584*0Sstevel@tonic-gate return (0); 585*0Sstevel@tonic-gate } 586*0Sstevel@tonic-gate while (stelem) { 587*0Sstevel@tonic-gate Stringlist *strlist, *pstrlist; 588*0Sstevel@tonic-gate 589*0Sstevel@tonic-gate /* 590*0Sstevel@tonic-gate * Walk the string lists and insert them 591*0Sstevel@tonic-gate * into the hash list. Once a string is 592*0Sstevel@tonic-gate * inserted we no longer need it's entry, 593*0Sstevel@tonic-gate * so free it 594*0Sstevel@tonic-gate */ 595*0Sstevel@tonic-gate for (strlist = stelem->se_strlist, pstrlist = 0; 596*0Sstevel@tonic-gate strlist; strlist = strlist->sl_next) { 597*0Sstevel@tonic-gate if (st_hash_insert(stp, strlist->sl_string, 598*0Sstevel@tonic-gate stelem->se_stlen) == -1) 599*0Sstevel@tonic-gate return (0); 600*0Sstevel@tonic-gate if (pstrlist) 601*0Sstevel@tonic-gate free(pstrlist); 602*0Sstevel@tonic-gate } 603*0Sstevel@tonic-gate free(pstrlist); 604*0Sstevel@tonic-gate stelem->se_strlist = 0; 605*0Sstevel@tonic-gate stelem = AVL_NEXT(stp->st_strtree, stelem); 606*0Sstevel@tonic-gate } 607*0Sstevel@tonic-gate 608*0Sstevel@tonic-gate /* 609*0Sstevel@tonic-gate * Now that all of the strings have been freed, 610*0Sstevel@tonic-gate * go ahead and quickly re-walk the AVL tree and 611*0Sstevel@tonic-gate * free all of the AVL nodes. 612*0Sstevel@tonic-gate * 613*0Sstevel@tonic-gate * avl_destroy_nodes() beats avl_remove() because 614*0Sstevel@tonic-gate * avl_remove will 'ballance' the tree as nodes 615*0Sstevel@tonic-gate * are deleted - we just want to tear the whole 616*0Sstevel@tonic-gate * thing down now. 617*0Sstevel@tonic-gate */ 618*0Sstevel@tonic-gate cookie = NULL; 619*0Sstevel@tonic-gate while ((stelem = avl_destroy_nodes(stp->st_strtree, 620*0Sstevel@tonic-gate &cookie)) != NULL) 621*0Sstevel@tonic-gate free(stelem); 622*0Sstevel@tonic-gate avl_destroy(stp->st_strtree); 623*0Sstevel@tonic-gate free(stp->st_strtree); 624*0Sstevel@tonic-gate stp->st_strtree = 0; 625*0Sstevel@tonic-gate } 626*0Sstevel@tonic-gate 627*0Sstevel@tonic-gate assert(stp->st_stringsize > 0); 628*0Sstevel@tonic-gate assert(stp->st_fullstringsize >= stp->st_stringsize); 629*0Sstevel@tonic-gate 630*0Sstevel@tonic-gate return (stp->st_stringsize); 631*0Sstevel@tonic-gate } 632*0Sstevel@tonic-gate 633*0Sstevel@tonic-gate /* 634*0Sstevel@tonic-gate * Associate a buffer with the string table. 635*0Sstevel@tonic-gate */ 636*0Sstevel@tonic-gate const char * 637*0Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp) 638*0Sstevel@tonic-gate { 639*0Sstevel@tonic-gate return (stp->st_strbuf); 640*0Sstevel@tonic-gate } 641*0Sstevel@tonic-gate 642*0Sstevel@tonic-gate int 643*0Sstevel@tonic-gate st_setstrbuf(Str_tbl *stp, char *stbuf, uint_t bufsize) 644*0Sstevel@tonic-gate { 645*0Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 646*0Sstevel@tonic-gate 647*0Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 648*0Sstevel@tonic-gate if (bufsize < stp->st_fullstringsize) 649*0Sstevel@tonic-gate return (-1); 650*0Sstevel@tonic-gate } else { 651*0Sstevel@tonic-gate if (bufsize < stp->st_stringsize) 652*0Sstevel@tonic-gate return (-1); 653*0Sstevel@tonic-gate } 654*0Sstevel@tonic-gate 655*0Sstevel@tonic-gate stp->st_strbuf = stbuf; 656*0Sstevel@tonic-gate #ifdef DEBUG 657*0Sstevel@tonic-gate /* 658*0Sstevel@tonic-gate * for debug builds - start with a stringtable filled in 659*0Sstevel@tonic-gate * with '0xff'. This makes it very easy to find wholes 660*0Sstevel@tonic-gate * which we failed to fill in - in the strtab. 661*0Sstevel@tonic-gate */ 662*0Sstevel@tonic-gate memset(stbuf, 0xff, bufsize); 663*0Sstevel@tonic-gate stbuf[0] = '\0'; 664*0Sstevel@tonic-gate #else 665*0Sstevel@tonic-gate memset(stbuf, 0x0, bufsize); 666*0Sstevel@tonic-gate #endif 667*0Sstevel@tonic-gate return (0); 668*0Sstevel@tonic-gate } 669