10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51618Srie * Common Development and Distribution License (the "License"). 61618Srie * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211618Srie 220Sstevel@tonic-gate /* 23*5549Srie * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 29*5549Srie #include <_string_table.h> 300Sstevel@tonic-gate #include <strings.h> 310Sstevel@tonic-gate #include <sgs.h> 320Sstevel@tonic-gate #include <stdio.h> 330Sstevel@tonic-gate 340Sstevel@tonic-gate /* 35*5549Srie * This file provides the interfaces to build a Str_tbl suitable for use by 36*5549Srie * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB) 37*5549Srie * as created by ld(1). 380Sstevel@tonic-gate * 39*5549Srie * There are two modes which can be used when constructing a string table: 400Sstevel@tonic-gate * 410Sstevel@tonic-gate * st_new(0) 420Sstevel@tonic-gate * standard string table - no compression. This is the 43*5549Srie * traditional, fast method. 440Sstevel@tonic-gate * 45*5549Srie * st_new(FLG_STTAB_COMPRESS) 46*5549Srie * builds a compressed string table which both eliminates 47*5549Srie * duplicate strings, and permits strings with common suffixes 48*5549Srie * (atexit vs. exit) to overlap in the table. This provides space 49*5549Srie * savings for many string tables. Although more work than the 50*5549Srie * traditional method, the algorithms used are designed to scale 51*5549Srie * and keep any overhead at a minimum. 520Sstevel@tonic-gate * 53*5549Srie * These string tables are built with a common interface in a two-pass manner. 54*5549Srie * The first pass finds all of the strings required for the string-table and 55*5549Srie * calculates the size required for the final string table. 56*5549Srie * 57*5549Srie * The second pass allocates the string table, populates the strings into the 58*5549Srie * table and returns the offsets the strings have been assigned. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * The calling sequence to build and populate a string table is: 610Sstevel@tonic-gate * 620Sstevel@tonic-gate * st_new(); // initialize strtab 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * st_insert(st1); // first pass of strings ... 650Sstevel@tonic-gate * // calculates size required for 660Sstevel@tonic-gate * // string table 670Sstevel@tonic-gate * 680Sstevel@tonic-gate * st_delstring(st?); // remove string previously 690Sstevel@tonic-gate * // inserted 700Sstevel@tonic-gate * st_insert(stN); 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes 730Sstevel@tonic-gate * // size of table. 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination 760Sstevel@tonic-gate * // for the string table 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * st_setstring(st1); // populate the string table 790Sstevel@tonic-gate * ... // offsets are based off of second 800Sstevel@tonic-gate * // pass through the string table 810Sstevel@tonic-gate * st_setstring(stN); 820Sstevel@tonic-gate * 830Sstevel@tonic-gate * st_destroy(); // tear down string table 840Sstevel@tonic-gate * // structures. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * String Suffix Compression Algorithm: 870Sstevel@tonic-gate * 880Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String 890Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm 900Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique 910Sstevel@tonic-gate * strings inserted into the string table. The hash function for 920Sstevel@tonic-gate * this table is a standard string hash except that the hash starts 930Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards 940Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the 950Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values 960Sstevel@tonic-gate * for all of the possible suffix strings for that string. 970Sstevel@tonic-gate * 980Sstevel@tonic-gate * As we compute the hash - at each character see if the current 990Sstevel@tonic-gate * suffix string for that hash is already present in the table. If 1000Sstevel@tonic-gate * it is, and the string is a master string. Then change that 1010Sstevel@tonic-gate * string to a suffix string of the new string being inserted. 1020Sstevel@tonic-gate * 1030Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check 1040Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference 1050Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a 1060Sstevel@tonic-gate * new hash table entry for a master string. 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * The above method will find all suffixes of a given string given 1090Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is 1100Sstevel@tonic-gate * why this is a two phase method, we first collect all of the 111*5549Srie * strings and store them based off of their length in an AVL tree. 1120Sstevel@tonic-gate * Once all of the strings have been submitted we then start the 1130Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and 1140Sstevel@tonic-gate * inserting the strings from shortest to longest as described 1150Sstevel@tonic-gate * above. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate /* LINTLIBRARY */ 1190Sstevel@tonic-gate 120*5549Srie static int 121*5549Srie avl_len_compare(const void *n1, const void *n2) 1220Sstevel@tonic-gate { 123*5549Srie uint_t len1, len2; 124*5549Srie 125*5549Srie len1 = ((LenNode *)n1)->ln_strlen; 126*5549Srie len2 = ((LenNode *)n2)->ln_strlen; 1270Sstevel@tonic-gate 128*5549Srie if (len1 == len2) 1290Sstevel@tonic-gate return (0); 130*5549Srie if (len2 < len1) 1310Sstevel@tonic-gate return (1); 1320Sstevel@tonic-gate return (-1); 1330Sstevel@tonic-gate } 1340Sstevel@tonic-gate 135*5549Srie static int 136*5549Srie avl_str_compare(const void *n1, const void *n2) 137*5549Srie { 138*5549Srie const char *str1, *str2; 139*5549Srie int rc; 140*5549Srie 141*5549Srie str1 = ((StrNode *)n1)->sn_str; 142*5549Srie str2 = ((StrNode *)n2)->sn_str; 143*5549Srie 144*5549Srie rc = strcmp(str1, str2); 145*5549Srie if (rc > 0) 146*5549Srie return (1); 147*5549Srie if (rc < 0) 148*5549Srie return (-1); 149*5549Srie return (0); 150*5549Srie } 151*5549Srie 1520Sstevel@tonic-gate /* 153*5549Srie * Return an initialized Str_tbl - returns NULL on failure. 1540Sstevel@tonic-gate * 155*5549Srie * flags: 156*5549Srie * FLG_STTAB_COMPRESS - build a compressed string table 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate Str_tbl * 159*5549Srie st_new(uint_t flags) 1600Sstevel@tonic-gate { 1610Sstevel@tonic-gate Str_tbl *stp; 1620Sstevel@tonic-gate 163*5549Srie if ((stp = calloc(sizeof (Str_tbl), 1)) == NULL) 164*5549Srie return (NULL); 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate /* 1670Sstevel@tonic-gate * Start with a leading '\0' - it's tradition. 1680Sstevel@tonic-gate */ 169*5549Srie stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1; 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate /* 172*5549Srie * Do we compress this string table? 1730Sstevel@tonic-gate */ 174*5549Srie stp->st_flags = flags; 175*5549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 1760Sstevel@tonic-gate return (stp); 1770Sstevel@tonic-gate 178*5549Srie if ((stp->st_lentree = calloc(sizeof (avl_tree_t), 1)) == NULL) 179*5549Srie return (NULL); 180*5549Srie 181*5549Srie avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode), 182*5549Srie SGSOFFSETOF(LenNode, ln_avlnode)); 183*5549Srie 184*5549Srie return (stp); 185*5549Srie } 186*5549Srie 187*5549Srie /* 188*5549Srie * Insert a new string into the Str_tbl. There are two AVL trees used. 189*5549Srie * 190*5549Srie * . The first LenNode AVL tree maintains a tree of nodes based on string 191*5549Srie * sizes. 192*5549Srie * . Each LenNode maintains a StrNode AVL tree for each string. Large 193*5549Srie * applications have been known to contribute thousands of strings of 194*5549Srie * the same size. Should strings need to be removed (-z ignore), then 195*5549Srie * the string AVL tree makes this removal efficient and scalable. 196*5549Srie */ 197*5549Srie int 198*5549Srie st_insert(Str_tbl *stp, const char *str) 199*5549Srie { 200*5549Srie uint_t len; 201*5549Srie StrNode *snp, sn = { 0 }; 202*5549Srie LenNode *lnp, ln = { 0 }; 203*5549Srie avl_index_t where; 204*5549Srie 205*5549Srie /* 206*5549Srie * String table can't have been cooked 207*5549Srie */ 208*5549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 209*5549Srie 210*5549Srie /* 211*5549Srie * Null strings always point to the head of the string 212*5549Srie * table - no reason to keep searching. 213*5549Srie */ 214*5549Srie if ((len = (uint_t)strlen(str)) == 0) 2150Sstevel@tonic-gate return (0); 216*5549Srie 217*5549Srie stp->st_fullstrsize += len + 1; 218*5549Srie stp->st_strcnt++; 219*5549Srie 220*5549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 221*5549Srie return (0); 222*5549Srie 223*5549Srie /* 224*5549Srie * From the controlling string table, determine which LenNode AVL node 225*5549Srie * provides for this string length. If the node doesn't exist, insert 226*5549Srie * a new node to represent this string length. 227*5549Srie */ 228*5549Srie ln.ln_strlen = len; 229*5549Srie if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) { 230*5549Srie if ((lnp = calloc(sizeof (LenNode), 1)) == NULL) 231*5549Srie return (-1); 232*5549Srie lnp->ln_strlen = len; 233*5549Srie avl_insert(stp->st_lentree, lnp, where); 234*5549Srie 235*5549Srie if ((lnp->ln_strtree = calloc(sizeof (avl_tree_t), 1)) == NULL) 236*5549Srie return (0); 237*5549Srie 238*5549Srie avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode), 239*5549Srie SGSOFFSETOF(StrNode, sn_avlnode)); 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 242*5549Srie /* 243*5549Srie * From the string length AVL node determine whether a StrNode AVL node 244*5549Srie * provides this string. If the node doesn't exist, insert a new node 245*5549Srie * to represent this string. 246*5549Srie */ 247*5549Srie sn.sn_str = str; 248*5549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) { 249*5549Srie if ((snp = calloc(sizeof (StrNode), 1)) == NULL) 250*5549Srie return (-1); 251*5549Srie snp->sn_str = str; 252*5549Srie avl_insert(lnp->ln_strtree, snp, where); 253*5549Srie } 254*5549Srie snp->sn_refcnt++; 255*5549Srie 256*5549Srie return (0); 257*5549Srie } 258*5549Srie 259*5549Srie /* 260*5549Srie * Remove a previously inserted string from the Str_tbl. 261*5549Srie */ 262*5549Srie int 263*5549Srie st_delstring(Str_tbl *stp, const char *str) 264*5549Srie { 265*5549Srie uint_t len; 266*5549Srie LenNode *lnp, ln = { 0 }; 267*5549Srie StrNode *snp, sn = { 0 }; 2680Sstevel@tonic-gate 269*5549Srie /* 270*5549Srie * String table can't have been cooked 271*5549Srie */ 272*5549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 273*5549Srie 274*5549Srie len = (uint_t)strlen(str); 275*5549Srie stp->st_fullstrsize -= len + 1; 276*5549Srie 277*5549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 278*5549Srie return (0); 279*5549Srie 280*5549Srie /* 281*5549Srie * Determine which LenNode AVL node provides for this string length. 282*5549Srie */ 283*5549Srie ln.ln_strlen = len; 284*5549Srie if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) { 285*5549Srie sn.sn_str = str; 286*5549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) { 287*5549Srie /* 288*5549Srie * Reduce the reference count, and if zero remove the 289*5549Srie * node. 290*5549Srie */ 291*5549Srie if (--snp->sn_refcnt == 0) 292*5549Srie avl_remove(lnp->ln_strtree, snp); 293*5549Srie return (0); 294*5549Srie } 295*5549Srie } 296*5549Srie 297*5549Srie /* 298*5549Srie * No strings of this length, or no string itself - someone goofed. 299*5549Srie */ 300*5549Srie return (-1); 3010Sstevel@tonic-gate } 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate /* 3040Sstevel@tonic-gate * Tear down a String_Table structure. 3050Sstevel@tonic-gate */ 3060Sstevel@tonic-gate void 3070Sstevel@tonic-gate st_destroy(Str_tbl *stp) 3080Sstevel@tonic-gate { 3090Sstevel@tonic-gate Str_hash *sthash, *psthash; 3100Sstevel@tonic-gate Str_master *mstr, *pmstr; 3110Sstevel@tonic-gate uint_t i; 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate /* 3140Sstevel@tonic-gate * cleanup the master strings 3150Sstevel@tonic-gate */ 3160Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr; 3170Sstevel@tonic-gate mstr = mstr->sm_next) { 3180Sstevel@tonic-gate if (pmstr) 3190Sstevel@tonic-gate free(pmstr); 3200Sstevel@tonic-gate pmstr = mstr; 3210Sstevel@tonic-gate } 3220Sstevel@tonic-gate if (pmstr) 3230Sstevel@tonic-gate free(pmstr); 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate if (stp->st_hashbcks) { 3260Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) { 3270Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0; 3280Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 3290Sstevel@tonic-gate if (psthash) 3300Sstevel@tonic-gate free(psthash); 3310Sstevel@tonic-gate psthash = sthash; 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate if (psthash) 3340Sstevel@tonic-gate free(psthash); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate free(stp->st_hashbcks); 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate free(stp); 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate /* 3430Sstevel@tonic-gate * For a given string - copy it into the buffer associated with 3440Sstevel@tonic-gate * the string table - and return the offset it has been assigned. 3450Sstevel@tonic-gate * 3460Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in 3470Sstevel@tonic-gate * the Str_tbl. 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate int 3500Sstevel@tonic-gate st_setstring(Str_tbl *stp, const char *str, uint_t *stoff) 3510Sstevel@tonic-gate { 3520Sstevel@tonic-gate uint_t stlen; 3530Sstevel@tonic-gate uint_t hashval; 3540Sstevel@tonic-gate Str_hash *sthash; 3550Sstevel@tonic-gate Str_master *mstr; 3560Sstevel@tonic-gate int i; 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * String table *must* have been previously cooked 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate assert(stp->st_strbuf); 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 3640Sstevel@tonic-gate stlen = (uint_t)strlen(str); 3650Sstevel@tonic-gate /* 3660Sstevel@tonic-gate * Null string always points to head of string table 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate if (stlen == 0) { 3690Sstevel@tonic-gate *stoff = 0; 3700Sstevel@tonic-gate return (0); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 3740Sstevel@tonic-gate uint_t _stoff; 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate stlen++; /* count for trailing '\0' */ 3770Sstevel@tonic-gate _stoff = stp->st_nextoff; 3780Sstevel@tonic-gate /* 3790Sstevel@tonic-gate * Have we overflowed our assigned buffer? 3800Sstevel@tonic-gate */ 381*5549Srie if ((_stoff + stlen) > stp->st_fullstrsize) 3820Sstevel@tonic-gate return (-1); 3830Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen); 3840Sstevel@tonic-gate *stoff = _stoff; 3850Sstevel@tonic-gate stp->st_nextoff += stlen; 3860Sstevel@tonic-gate return (0); 3870Sstevel@tonic-gate } 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate /* 390*5549Srie * Calculate reverse hash for string. 3910Sstevel@tonic-gate */ 3920Sstevel@tonic-gate hashval = HASHSEED; 3930Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 3940Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 395*5549Srie str[i]; /* h = ((h * 33) + c) */ 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash; 3990Sstevel@tonic-gate sthash = sthash->hi_next) { 400*5549Srie const char *hstr; 401*5549Srie 402*5549Srie if (sthash->hi_hashval != hashval) 403*5549Srie continue; 4040Sstevel@tonic-gate 405*5549Srie hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen - 406*5549Srie sthash->hi_strlen]; 407*5549Srie if (strcmp(str, hstr) == 0) 408*5549Srie break; 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate /* 4120Sstevel@tonic-gate * Did we find the string? 4130Sstevel@tonic-gate */ 4140Sstevel@tonic-gate if (sthash == 0) 4150Sstevel@tonic-gate return (-1); 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate /* 4180Sstevel@tonic-gate * Has this string been copied into the string table? 4190Sstevel@tonic-gate */ 4200Sstevel@tonic-gate mstr = sthash->hi_mstr; 421*5549Srie if (mstr->sm_stroff == 0) { 422*5549Srie uint_t mstrlen = mstr->sm_strlen + 1; 423*5549Srie 424*5549Srie mstr->sm_stroff = stp->st_nextoff; 425*5549Srie 4260Sstevel@tonic-gate /* 4270Sstevel@tonic-gate * Have we overflowed our assigned buffer? 4280Sstevel@tonic-gate */ 429*5549Srie if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize) 4300Sstevel@tonic-gate return (-1); 431*5549Srie 432*5549Srie (void) memcpy(stp->st_strbuf + mstr->sm_stroff, 433*5549Srie mstr->sm_str, mstrlen); 434*5549Srie stp->st_nextoff += mstrlen; 4350Sstevel@tonic-gate } 436*5549Srie 4370Sstevel@tonic-gate /* 438*5549Srie * Calculate offset of (sub)string. 4390Sstevel@tonic-gate */ 440*5549Srie *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen; 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate return (0); 4430Sstevel@tonic-gate } 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate static int 447*5549Srie st_hash_insert(Str_tbl *stp, const char *str, uint_t len) 4480Sstevel@tonic-gate { 4490Sstevel@tonic-gate int i; 4500Sstevel@tonic-gate uint_t hashval = HASHSEED; 4510Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt; 4520Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks; 4530Sstevel@tonic-gate Str_hash *sthash; 4540Sstevel@tonic-gate Str_master *mstr = 0; 4550Sstevel@tonic-gate 4560Sstevel@tonic-gate /* 4570Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But 4580Sstevel@tonic-gate * instead of hashing from the start of the string to the 4590Sstevel@tonic-gate * end, we do it in reverse. 4600Sstevel@tonic-gate * 4610Sstevel@tonic-gate * This way - we are essentially building all of the 4620Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if 4630Sstevel@tonic-gate * any suffixes already exist in the tree as we generate 4640Sstevel@tonic-gate * the hash. 4650Sstevel@tonic-gate */ 466*5549Srie for (i = len; i >= 0; i--) { 467*5549Srie hashval = ((hashval << 5) + hashval) + 468*5549Srie str[i]; /* h = ((h * 33) + c) */ 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt]; 4710Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 472*5549Srie const char *hstr; 473*5549Srie Str_master *_mstr; 4740Sstevel@tonic-gate 475*5549Srie if (sthash->hi_hashval != hashval) 476*5549Srie continue; 477*5549Srie 478*5549Srie _mstr = sthash->hi_mstr; 479*5549Srie hstr = &_mstr->sm_str[_mstr->sm_strlen - 480*5549Srie sthash->hi_strlen]; 481*5549Srie 482*5549Srie if (strcmp(&str[i], hstr)) 483*5549Srie continue; 4840Sstevel@tonic-gate 485*5549Srie if (i == 0) { 486*5549Srie /* 487*5549Srie * Entry already in table, increment refcnt and 488*5549Srie * get out. 489*5549Srie */ 490*5549Srie sthash->hi_refcnt++; 491*5549Srie return (0); 492*5549Srie } else { 493*5549Srie /* 494*5549Srie * If this 'suffix' is presently a 'master 495*5549Srie * string, then take over it's record. 496*5549Srie */ 497*5549Srie if (sthash->hi_strlen == _mstr->sm_strlen) { 498*5549Srie /* 499*5549Srie * we should only do this once. 500*5549Srie */ 501*5549Srie assert(mstr == 0); 502*5549Srie mstr = _mstr; 5030Sstevel@tonic-gate } 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate /* 5090Sstevel@tonic-gate * Do we need a new master string, or can we take over 5100Sstevel@tonic-gate * one we already found in the table? 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate if (mstr == 0) { 5130Sstevel@tonic-gate /* 5140Sstevel@tonic-gate * allocate a new master string 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate if ((mstr = calloc(sizeof (Str_hash), 1)) == 0) 5170Sstevel@tonic-gate return (-1); 5180Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist; 5190Sstevel@tonic-gate stp->st_mstrlist = mstr; 520*5549Srie stp->st_strsize += len + 1; 5210Sstevel@tonic-gate } else { 5220Sstevel@tonic-gate /* 523*5549Srie * We are taking over a existing master string, the string size 524*5549Srie * only increments by the difference between the current string 525*5549Srie * and the previous master. 5260Sstevel@tonic-gate */ 527*5549Srie assert(len > mstr->sm_strlen); 528*5549Srie stp->st_strsize += len - mstr->sm_strlen; 5290Sstevel@tonic-gate } 5300Sstevel@tonic-gate 5310Sstevel@tonic-gate if ((sthash = calloc(sizeof (Str_hash), 1)) == 0) 5320Sstevel@tonic-gate return (-1); 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval; 535*5549Srie mstr->sm_strlen = sthash->hi_strlen = len; 5360Sstevel@tonic-gate mstr->sm_str = str; 5370Sstevel@tonic-gate sthash->hi_refcnt = 1; 5380Sstevel@tonic-gate sthash->hi_mstr = mstr; 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate /* 5410Sstevel@tonic-gate * Insert string element into head of hash list 5420Sstevel@tonic-gate */ 5430Sstevel@tonic-gate hashval = hashval % bckcnt; 5440Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval]; 5450Sstevel@tonic-gate hashbcks[hashval] = sthash; 5460Sstevel@tonic-gate return (0); 5470Sstevel@tonic-gate } 5480Sstevel@tonic-gate 5490Sstevel@tonic-gate /* 5500Sstevel@tonic-gate * Return amount of space required for the string table. 5510Sstevel@tonic-gate */ 5520Sstevel@tonic-gate uint_t 5530Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp) 5540Sstevel@tonic-gate { 555*5549Srie assert(stp->st_fullstrsize > 0); 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 5580Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 559*5549Srie return (stp->st_fullstrsize); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) { 563*5549Srie LenNode *lnp; 5640Sstevel@tonic-gate void *cookie; 5650Sstevel@tonic-gate 5660Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5670Sstevel@tonic-gate /* 5680Sstevel@tonic-gate * allocate a hash table about the size of # of 5690Sstevel@tonic-gate * strings input. 5700Sstevel@tonic-gate */ 571*5549Srie stp->st_hbckcnt = findprime(stp->st_strcnt); 5720Sstevel@tonic-gate if ((stp->st_hashbcks = 5730Sstevel@tonic-gate calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL) 5740Sstevel@tonic-gate return (0); 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate /* 577*5549Srie * We now walk all of the strings in the list, from shortest to 578*5549Srie * longest, and insert them into the hashtable. 5790Sstevel@tonic-gate */ 580*5549Srie if ((lnp = avl_first(stp->st_lentree)) == NULL) { 5810Sstevel@tonic-gate /* 582*5549Srie * Is it possible we have an empty string table, if so, 583*5549Srie * the table still contains '\0', so return the size. 5840Sstevel@tonic-gate */ 585*5549Srie if (avl_numnodes(stp->st_lentree) == 0) { 586*5549Srie assert(stp->st_strsize == 1); 587*5549Srie return (stp->st_strsize); 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate return (0); 5900Sstevel@tonic-gate } 591*5549Srie 592*5549Srie while (lnp) { 593*5549Srie StrNode *snp; 594*5549Srie 595*5549Srie /* 596*5549Srie * Walk the string lists and insert them into the hash 597*5549Srie * list. Once a string is inserted we no longer need 598*5549Srie * it's entry, so the string can be freed. 599*5549Srie */ 600*5549Srie for (snp = avl_first(lnp->ln_strtree); snp; 601*5549Srie snp = AVL_NEXT(lnp->ln_strtree, snp)) { 602*5549Srie if (st_hash_insert(stp, snp->sn_str, 603*5549Srie lnp->ln_strlen) == -1) 604*5549Srie return (0); 605*5549Srie } 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate /* 608*5549Srie * Now that the strings have been copied, walk the 609*5549Srie * StrNode tree and free all the AVL nodes. Note, 610*5549Srie * avl_destroy_nodes() beats avl_remove() as the 611*5549Srie * latter balances the nodes as they are removed. 612*5549Srie * We just want to tear the whole thing down fast. 6130Sstevel@tonic-gate */ 614*5549Srie cookie = NULL; 615*5549Srie while ((snp = avl_destroy_nodes(lnp->ln_strtree, 616*5549Srie &cookie)) != NULL) 617*5549Srie free(snp); 618*5549Srie avl_destroy(lnp->ln_strtree); 619*5549Srie free(lnp->ln_strtree); 620*5549Srie lnp->ln_strtree = NULL; 621*5549Srie 622*5549Srie /* 623*5549Srie * Move on to the next LenNode. 624*5549Srie */ 625*5549Srie lnp = AVL_NEXT(stp->st_lentree, lnp); 6260Sstevel@tonic-gate } 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate /* 629*5549Srie * Now that all of the strings have been freed, walk the 630*5549Srie * LenNode tree and free all of the AVL nodes. Note, 631*5549Srie * avl_destroy_nodes() beats avl_remove() as the latter 632*5549Srie * balances the nodes as they are removed. We just want to 633*5549Srie * tear the whole thing down fast. 6340Sstevel@tonic-gate */ 6350Sstevel@tonic-gate cookie = NULL; 636*5549Srie while ((lnp = avl_destroy_nodes(stp->st_lentree, 6370Sstevel@tonic-gate &cookie)) != NULL) 638*5549Srie free(lnp); 639*5549Srie avl_destroy(stp->st_lentree); 640*5549Srie free(stp->st_lentree); 641*5549Srie stp->st_lentree = 0; 6420Sstevel@tonic-gate } 6430Sstevel@tonic-gate 644*5549Srie assert(stp->st_strsize > 0); 645*5549Srie assert(stp->st_fullstrsize >= stp->st_strsize); 6460Sstevel@tonic-gate 647*5549Srie return (stp->st_strsize); 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate /* 651*5549Srie * Associate a buffer with a string table. 6520Sstevel@tonic-gate */ 6530Sstevel@tonic-gate const char * 6540Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp) 6550Sstevel@tonic-gate { 6560Sstevel@tonic-gate return (stp->st_strbuf); 6570Sstevel@tonic-gate } 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate int 6600Sstevel@tonic-gate st_setstrbuf(Str_tbl *stp, char *stbuf, uint_t bufsize) 6610Sstevel@tonic-gate { 6620Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 665*5549Srie if (bufsize < stp->st_fullstrsize) 6660Sstevel@tonic-gate return (-1); 6670Sstevel@tonic-gate } else { 668*5549Srie if (bufsize < stp->st_strsize) 6690Sstevel@tonic-gate return (-1); 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate stp->st_strbuf = stbuf; 6730Sstevel@tonic-gate #ifdef DEBUG 6740Sstevel@tonic-gate /* 6750Sstevel@tonic-gate * for debug builds - start with a stringtable filled in 6760Sstevel@tonic-gate * with '0xff'. This makes it very easy to find wholes 6770Sstevel@tonic-gate * which we failed to fill in - in the strtab. 6780Sstevel@tonic-gate */ 6790Sstevel@tonic-gate memset(stbuf, 0xff, bufsize); 6800Sstevel@tonic-gate stbuf[0] = '\0'; 6810Sstevel@tonic-gate #else 6820Sstevel@tonic-gate memset(stbuf, 0x0, bufsize); 6830Sstevel@tonic-gate #endif 6840Sstevel@tonic-gate return (0); 6850Sstevel@tonic-gate } 686