10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51618Srie * Common Development and Distribution License (the "License").
61618Srie * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211618Srie
220Sstevel@tonic-gate /*
23*8608SAli.Bahrami@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
275549Srie #include <_string_table.h>
280Sstevel@tonic-gate #include <strings.h>
290Sstevel@tonic-gate #include <sgs.h>
300Sstevel@tonic-gate #include <stdio.h>
310Sstevel@tonic-gate
320Sstevel@tonic-gate /*
335549Srie * This file provides the interfaces to build a Str_tbl suitable for use by
345549Srie * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB)
355549Srie * as created by ld(1).
360Sstevel@tonic-gate *
375549Srie * There are two modes which can be used when constructing a string table:
380Sstevel@tonic-gate *
390Sstevel@tonic-gate * st_new(0)
400Sstevel@tonic-gate * standard string table - no compression. This is the
415549Srie * traditional, fast method.
420Sstevel@tonic-gate *
435549Srie * st_new(FLG_STTAB_COMPRESS)
445549Srie * builds a compressed string table which both eliminates
455549Srie * duplicate strings, and permits strings with common suffixes
465549Srie * (atexit vs. exit) to overlap in the table. This provides space
475549Srie * savings for many string tables. Although more work than the
485549Srie * traditional method, the algorithms used are designed to scale
495549Srie * and keep any overhead at a minimum.
500Sstevel@tonic-gate *
515549Srie * These string tables are built with a common interface in a two-pass manner.
525549Srie * The first pass finds all of the strings required for the string-table and
535549Srie * calculates the size required for the final string table.
545549Srie *
555549Srie * The second pass allocates the string table, populates the strings into the
565549Srie * table and returns the offsets the strings have been assigned.
570Sstevel@tonic-gate *
580Sstevel@tonic-gate * The calling sequence to build and populate a string table is:
590Sstevel@tonic-gate *
600Sstevel@tonic-gate * st_new(); // initialize strtab
610Sstevel@tonic-gate *
620Sstevel@tonic-gate * st_insert(st1); // first pass of strings ...
630Sstevel@tonic-gate * // calculates size required for
640Sstevel@tonic-gate * // string table
650Sstevel@tonic-gate *
660Sstevel@tonic-gate * st_delstring(st?); // remove string previously
670Sstevel@tonic-gate * // inserted
680Sstevel@tonic-gate * st_insert(stN);
690Sstevel@tonic-gate *
700Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes
710Sstevel@tonic-gate * // size of table.
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination
740Sstevel@tonic-gate * // for the string table
750Sstevel@tonic-gate *
760Sstevel@tonic-gate * st_setstring(st1); // populate the string table
770Sstevel@tonic-gate * ... // offsets are based off of second
780Sstevel@tonic-gate * // pass through the string table
790Sstevel@tonic-gate * st_setstring(stN);
800Sstevel@tonic-gate *
810Sstevel@tonic-gate * st_destroy(); // tear down string table
820Sstevel@tonic-gate * // structures.
830Sstevel@tonic-gate *
840Sstevel@tonic-gate * String Suffix Compression Algorithm:
850Sstevel@tonic-gate *
860Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String
870Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm
880Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique
890Sstevel@tonic-gate * strings inserted into the string table. The hash function for
900Sstevel@tonic-gate * this table is a standard string hash except that the hash starts
910Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards
920Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the
930Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values
940Sstevel@tonic-gate * for all of the possible suffix strings for that string.
950Sstevel@tonic-gate *
960Sstevel@tonic-gate * As we compute the hash - at each character see if the current
970Sstevel@tonic-gate * suffix string for that hash is already present in the table. If
980Sstevel@tonic-gate * it is, and the string is a master string. Then change that
990Sstevel@tonic-gate * string to a suffix string of the new string being inserted.
1000Sstevel@tonic-gate *
1010Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check
1020Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference
1030Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a
1040Sstevel@tonic-gate * new hash table entry for a master string.
1050Sstevel@tonic-gate *
1060Sstevel@tonic-gate * The above method will find all suffixes of a given string given
1070Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is
1080Sstevel@tonic-gate * why this is a two phase method, we first collect all of the
1095549Srie * strings and store them based off of their length in an AVL tree.
1100Sstevel@tonic-gate * Once all of the strings have been submitted we then start the
1110Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and
1120Sstevel@tonic-gate * inserting the strings from shortest to longest as described
1130Sstevel@tonic-gate * above.
1140Sstevel@tonic-gate */
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate /* LINTLIBRARY */
1170Sstevel@tonic-gate
1185549Srie static int
avl_len_compare(const void * n1,const void * n2)1195549Srie avl_len_compare(const void *n1, const void *n2)
1200Sstevel@tonic-gate {
1215892Sab196087 size_t len1, len2;
1225549Srie
1235549Srie len1 = ((LenNode *)n1)->ln_strlen;
1245549Srie len2 = ((LenNode *)n2)->ln_strlen;
1250Sstevel@tonic-gate
1265549Srie if (len1 == len2)
1270Sstevel@tonic-gate return (0);
1285549Srie if (len2 < len1)
1290Sstevel@tonic-gate return (1);
1300Sstevel@tonic-gate return (-1);
1310Sstevel@tonic-gate }
1320Sstevel@tonic-gate
1335549Srie static int
avl_str_compare(const void * n1,const void * n2)1345549Srie avl_str_compare(const void *n1, const void *n2)
1355549Srie {
1365549Srie const char *str1, *str2;
1375549Srie int rc;
1385549Srie
1395549Srie str1 = ((StrNode *)n1)->sn_str;
1405549Srie str2 = ((StrNode *)n2)->sn_str;
1415549Srie
1425549Srie rc = strcmp(str1, str2);
1435549Srie if (rc > 0)
1445549Srie return (1);
1455549Srie if (rc < 0)
1465549Srie return (-1);
1475549Srie return (0);
1485549Srie }
1495549Srie
1500Sstevel@tonic-gate /*
1515549Srie * Return an initialized Str_tbl - returns NULL on failure.
1520Sstevel@tonic-gate *
1535549Srie * flags:
1545549Srie * FLG_STTAB_COMPRESS - build a compressed string table
1550Sstevel@tonic-gate */
1560Sstevel@tonic-gate Str_tbl *
st_new(uint_t flags)1575549Srie st_new(uint_t flags)
1580Sstevel@tonic-gate {
1590Sstevel@tonic-gate Str_tbl *stp;
1600Sstevel@tonic-gate
161*8608SAli.Bahrami@Sun.COM if ((stp = calloc(sizeof (*stp), 1)) == NULL)
1625549Srie return (NULL);
1630Sstevel@tonic-gate
1640Sstevel@tonic-gate /*
1650Sstevel@tonic-gate * Start with a leading '\0' - it's tradition.
1660Sstevel@tonic-gate */
1675549Srie stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1;
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate /*
1705549Srie * Do we compress this string table?
1710Sstevel@tonic-gate */
1725549Srie stp->st_flags = flags;
1735549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
1740Sstevel@tonic-gate return (stp);
1750Sstevel@tonic-gate
176*8608SAli.Bahrami@Sun.COM if ((stp->st_lentree = calloc(sizeof (*stp->st_lentree), 1)) == NULL)
1775549Srie return (NULL);
1785549Srie
1795549Srie avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode),
1805549Srie SGSOFFSETOF(LenNode, ln_avlnode));
1815549Srie
1825549Srie return (stp);
1835549Srie }
1845549Srie
1855549Srie /*
1865549Srie * Insert a new string into the Str_tbl. There are two AVL trees used.
1875549Srie *
188*8608SAli.Bahrami@Sun.COM * - The first LenNode AVL tree maintains a tree of nodes based on string
1895549Srie * sizes.
190*8608SAli.Bahrami@Sun.COM * - Each LenNode maintains a StrNode AVL tree for each string. Large
1915549Srie * applications have been known to contribute thousands of strings of
1925549Srie * the same size. Should strings need to be removed (-z ignore), then
1935549Srie * the string AVL tree makes this removal efficient and scalable.
1945549Srie */
1955549Srie int
st_insert(Str_tbl * stp,const char * str)1965549Srie st_insert(Str_tbl *stp, const char *str)
1975549Srie {
1985892Sab196087 size_t len;
1995549Srie StrNode *snp, sn = { 0 };
2005549Srie LenNode *lnp, ln = { 0 };
2015549Srie avl_index_t where;
2025549Srie
2035549Srie /*
2045549Srie * String table can't have been cooked
2055549Srie */
2065549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
2075549Srie
2085549Srie /*
2095549Srie * Null strings always point to the head of the string
2105549Srie * table - no reason to keep searching.
2115549Srie */
2125892Sab196087 if ((len = strlen(str)) == 0)
2130Sstevel@tonic-gate return (0);
2145549Srie
2155549Srie stp->st_fullstrsize += len + 1;
2165549Srie stp->st_strcnt++;
2175549Srie
2185549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
2195549Srie return (0);
2205549Srie
2215549Srie /*
2225549Srie * From the controlling string table, determine which LenNode AVL node
2235549Srie * provides for this string length. If the node doesn't exist, insert
2245549Srie * a new node to represent this string length.
2255549Srie */
2265549Srie ln.ln_strlen = len;
2275549Srie if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
228*8608SAli.Bahrami@Sun.COM if ((lnp = calloc(sizeof (*lnp), 1)) == NULL)
2295549Srie return (-1);
2305549Srie lnp->ln_strlen = len;
2315549Srie avl_insert(stp->st_lentree, lnp, where);
2325549Srie
233*8608SAli.Bahrami@Sun.COM if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) ==
234*8608SAli.Bahrami@Sun.COM NULL)
2355549Srie return (0);
2365549Srie
2375549Srie avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
2385549Srie SGSOFFSETOF(StrNode, sn_avlnode));
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
2415549Srie /*
2425549Srie * From the string length AVL node determine whether a StrNode AVL node
2435549Srie * provides this string. If the node doesn't exist, insert a new node
2445549Srie * to represent this string.
2455549Srie */
2465549Srie sn.sn_str = str;
2475549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) {
248*8608SAli.Bahrami@Sun.COM if ((snp = calloc(sizeof (*snp), 1)) == NULL)
2495549Srie return (-1);
2505549Srie snp->sn_str = str;
2515549Srie avl_insert(lnp->ln_strtree, snp, where);
2525549Srie }
2535549Srie snp->sn_refcnt++;
2545549Srie
2555549Srie return (0);
2565549Srie }
2575549Srie
2585549Srie /*
2595549Srie * Remove a previously inserted string from the Str_tbl.
2605549Srie */
2615549Srie int
st_delstring(Str_tbl * stp,const char * str)2625549Srie st_delstring(Str_tbl *stp, const char *str)
2635549Srie {
2645892Sab196087 size_t len;
2655549Srie LenNode *lnp, ln = { 0 };
2665549Srie StrNode *snp, sn = { 0 };
2670Sstevel@tonic-gate
2685549Srie /*
2695549Srie * String table can't have been cooked
2705549Srie */
2715549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
2725549Srie
2735892Sab196087 len = strlen(str);
2745549Srie stp->st_fullstrsize -= len + 1;
2755549Srie
2765549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
2775549Srie return (0);
2785549Srie
2795549Srie /*
2805549Srie * Determine which LenNode AVL node provides for this string length.
2815549Srie */
2825549Srie ln.ln_strlen = len;
2835549Srie if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) {
2845549Srie sn.sn_str = str;
2855549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) {
2865549Srie /*
2875549Srie * Reduce the reference count, and if zero remove the
2885549Srie * node.
2895549Srie */
2905549Srie if (--snp->sn_refcnt == 0)
2915549Srie avl_remove(lnp->ln_strtree, snp);
2925549Srie return (0);
2935549Srie }
2945549Srie }
2955549Srie
2965549Srie /*
2975549Srie * No strings of this length, or no string itself - someone goofed.
2985549Srie */
2995549Srie return (-1);
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate
3020Sstevel@tonic-gate /*
3030Sstevel@tonic-gate * Tear down a String_Table structure.
3040Sstevel@tonic-gate */
3050Sstevel@tonic-gate void
st_destroy(Str_tbl * stp)3060Sstevel@tonic-gate st_destroy(Str_tbl *stp)
3070Sstevel@tonic-gate {
3080Sstevel@tonic-gate Str_hash *sthash, *psthash;
3090Sstevel@tonic-gate Str_master *mstr, *pmstr;
3100Sstevel@tonic-gate uint_t i;
3110Sstevel@tonic-gate
3120Sstevel@tonic-gate /*
3130Sstevel@tonic-gate * cleanup the master strings
3140Sstevel@tonic-gate */
3150Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr;
3160Sstevel@tonic-gate mstr = mstr->sm_next) {
3170Sstevel@tonic-gate if (pmstr)
3180Sstevel@tonic-gate free(pmstr);
3190Sstevel@tonic-gate pmstr = mstr;
3200Sstevel@tonic-gate }
3210Sstevel@tonic-gate if (pmstr)
3220Sstevel@tonic-gate free(pmstr);
3230Sstevel@tonic-gate
3240Sstevel@tonic-gate if (stp->st_hashbcks) {
3250Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) {
3260Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0;
3270Sstevel@tonic-gate sthash; sthash = sthash->hi_next) {
3280Sstevel@tonic-gate if (psthash)
3290Sstevel@tonic-gate free(psthash);
3300Sstevel@tonic-gate psthash = sthash;
3310Sstevel@tonic-gate }
3320Sstevel@tonic-gate if (psthash)
3330Sstevel@tonic-gate free(psthash);
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate free(stp->st_hashbcks);
3360Sstevel@tonic-gate }
3370Sstevel@tonic-gate free(stp);
3380Sstevel@tonic-gate }
3390Sstevel@tonic-gate
3400Sstevel@tonic-gate
3410Sstevel@tonic-gate /*
3420Sstevel@tonic-gate * For a given string - copy it into the buffer associated with
3430Sstevel@tonic-gate * the string table - and return the offset it has been assigned.
3440Sstevel@tonic-gate *
3450Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in
3460Sstevel@tonic-gate * the Str_tbl.
3470Sstevel@tonic-gate */
3480Sstevel@tonic-gate int
st_setstring(Str_tbl * stp,const char * str,size_t * stoff)3495892Sab196087 st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
3500Sstevel@tonic-gate {
3515892Sab196087 size_t stlen;
3520Sstevel@tonic-gate uint_t hashval;
3530Sstevel@tonic-gate Str_hash *sthash;
3540Sstevel@tonic-gate Str_master *mstr;
3550Sstevel@tonic-gate int i;
3560Sstevel@tonic-gate
3570Sstevel@tonic-gate /*
3580Sstevel@tonic-gate * String table *must* have been previously cooked
3590Sstevel@tonic-gate */
3600Sstevel@tonic-gate assert(stp->st_strbuf);
3610Sstevel@tonic-gate
3620Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED);
3635892Sab196087 stlen = strlen(str);
3640Sstevel@tonic-gate /*
3650Sstevel@tonic-gate * Null string always points to head of string table
3660Sstevel@tonic-gate */
3670Sstevel@tonic-gate if (stlen == 0) {
3680Sstevel@tonic-gate *stoff = 0;
3690Sstevel@tonic-gate return (0);
3700Sstevel@tonic-gate }
3710Sstevel@tonic-gate
3720Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
3735892Sab196087 size_t _stoff;
3740Sstevel@tonic-gate
3750Sstevel@tonic-gate stlen++; /* count for trailing '\0' */
3760Sstevel@tonic-gate _stoff = stp->st_nextoff;
3770Sstevel@tonic-gate /*
3780Sstevel@tonic-gate * Have we overflowed our assigned buffer?
3790Sstevel@tonic-gate */
3805549Srie if ((_stoff + stlen) > stp->st_fullstrsize)
3810Sstevel@tonic-gate return (-1);
3820Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen);
3830Sstevel@tonic-gate *stoff = _stoff;
3840Sstevel@tonic-gate stp->st_nextoff += stlen;
3850Sstevel@tonic-gate return (0);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate /*
3895549Srie * Calculate reverse hash for string.
3900Sstevel@tonic-gate */
3910Sstevel@tonic-gate hashval = HASHSEED;
3920Sstevel@tonic-gate for (i = stlen; i >= 0; i--) {
3930Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) +
3945549Srie str[i]; /* h = ((h * 33) + c) */
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate
3970Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
3980Sstevel@tonic-gate sthash = sthash->hi_next) {
3995549Srie const char *hstr;
4005549Srie
4015549Srie if (sthash->hi_hashval != hashval)
4025549Srie continue;
4030Sstevel@tonic-gate
4045549Srie hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
4055549Srie sthash->hi_strlen];
4065549Srie if (strcmp(str, hstr) == 0)
4075549Srie break;
4080Sstevel@tonic-gate }
4090Sstevel@tonic-gate
4100Sstevel@tonic-gate /*
4110Sstevel@tonic-gate * Did we find the string?
4120Sstevel@tonic-gate */
4130Sstevel@tonic-gate if (sthash == 0)
4140Sstevel@tonic-gate return (-1);
4150Sstevel@tonic-gate
4160Sstevel@tonic-gate /*
4170Sstevel@tonic-gate * Has this string been copied into the string table?
4180Sstevel@tonic-gate */
4190Sstevel@tonic-gate mstr = sthash->hi_mstr;
4205549Srie if (mstr->sm_stroff == 0) {
4215892Sab196087 size_t mstrlen = mstr->sm_strlen + 1;
4225549Srie
4235549Srie mstr->sm_stroff = stp->st_nextoff;
4245549Srie
4250Sstevel@tonic-gate /*
4260Sstevel@tonic-gate * Have we overflowed our assigned buffer?
4270Sstevel@tonic-gate */
4285549Srie if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize)
4290Sstevel@tonic-gate return (-1);
4305549Srie
4315549Srie (void) memcpy(stp->st_strbuf + mstr->sm_stroff,
4325549Srie mstr->sm_str, mstrlen);
4335549Srie stp->st_nextoff += mstrlen;
4340Sstevel@tonic-gate }
4355549Srie
4360Sstevel@tonic-gate /*
4375549Srie * Calculate offset of (sub)string.
4380Sstevel@tonic-gate */
4395549Srie *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
4400Sstevel@tonic-gate
4410Sstevel@tonic-gate return (0);
4420Sstevel@tonic-gate }
4430Sstevel@tonic-gate
4440Sstevel@tonic-gate
4450Sstevel@tonic-gate static int
st_hash_insert(Str_tbl * stp,const char * str,size_t len)4465892Sab196087 st_hash_insert(Str_tbl *stp, const char *str, size_t len)
4470Sstevel@tonic-gate {
4480Sstevel@tonic-gate int i;
4490Sstevel@tonic-gate uint_t hashval = HASHSEED;
4500Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt;
4510Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks;
4520Sstevel@tonic-gate Str_hash *sthash;
4530Sstevel@tonic-gate Str_master *mstr = 0;
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate /*
4560Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But
4570Sstevel@tonic-gate * instead of hashing from the start of the string to the
4580Sstevel@tonic-gate * end, we do it in reverse.
4590Sstevel@tonic-gate *
4600Sstevel@tonic-gate * This way - we are essentially building all of the
4610Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if
4620Sstevel@tonic-gate * any suffixes already exist in the tree as we generate
4630Sstevel@tonic-gate * the hash.
4640Sstevel@tonic-gate */
4655549Srie for (i = len; i >= 0; i--) {
4665549Srie hashval = ((hashval << 5) + hashval) +
4675549Srie str[i]; /* h = ((h * 33) + c) */
4680Sstevel@tonic-gate
4690Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt];
4700Sstevel@tonic-gate sthash; sthash = sthash->hi_next) {
4715549Srie const char *hstr;
4725549Srie Str_master *_mstr;
4730Sstevel@tonic-gate
4745549Srie if (sthash->hi_hashval != hashval)
4755549Srie continue;
4765549Srie
4775549Srie _mstr = sthash->hi_mstr;
4785549Srie hstr = &_mstr->sm_str[_mstr->sm_strlen -
4795549Srie sthash->hi_strlen];
4805549Srie
4815549Srie if (strcmp(&str[i], hstr))
4825549Srie continue;
4830Sstevel@tonic-gate
4845549Srie if (i == 0) {
4855549Srie /*
4865549Srie * Entry already in table, increment refcnt and
4875549Srie * get out.
4885549Srie */
4895549Srie sthash->hi_refcnt++;
4905549Srie return (0);
4915549Srie } else {
4925549Srie /*
4935549Srie * If this 'suffix' is presently a 'master
4945549Srie * string, then take over it's record.
4955549Srie */
4965549Srie if (sthash->hi_strlen == _mstr->sm_strlen) {
4975549Srie /*
4985549Srie * we should only do this once.
4995549Srie */
5005549Srie assert(mstr == 0);
5015549Srie mstr = _mstr;
5020Sstevel@tonic-gate }
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate }
5050Sstevel@tonic-gate }
5060Sstevel@tonic-gate
5070Sstevel@tonic-gate /*
5080Sstevel@tonic-gate * Do we need a new master string, or can we take over
5090Sstevel@tonic-gate * one we already found in the table?
5100Sstevel@tonic-gate */
5110Sstevel@tonic-gate if (mstr == 0) {
5120Sstevel@tonic-gate /*
5130Sstevel@tonic-gate * allocate a new master string
5140Sstevel@tonic-gate */
515*8608SAli.Bahrami@Sun.COM if ((mstr = calloc(sizeof (*mstr), 1)) == 0)
5160Sstevel@tonic-gate return (-1);
5170Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist;
5180Sstevel@tonic-gate stp->st_mstrlist = mstr;
5195549Srie stp->st_strsize += len + 1;
5200Sstevel@tonic-gate } else {
5210Sstevel@tonic-gate /*
5225549Srie * We are taking over a existing master string, the string size
5235549Srie * only increments by the difference between the current string
5245549Srie * and the previous master.
5250Sstevel@tonic-gate */
5265549Srie assert(len > mstr->sm_strlen);
5275549Srie stp->st_strsize += len - mstr->sm_strlen;
5280Sstevel@tonic-gate }
5290Sstevel@tonic-gate
530*8608SAli.Bahrami@Sun.COM if ((sthash = calloc(sizeof (*sthash), 1)) == 0)
5310Sstevel@tonic-gate return (-1);
5320Sstevel@tonic-gate
5330Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval;
5345549Srie mstr->sm_strlen = sthash->hi_strlen = len;
5350Sstevel@tonic-gate mstr->sm_str = str;
5360Sstevel@tonic-gate sthash->hi_refcnt = 1;
5370Sstevel@tonic-gate sthash->hi_mstr = mstr;
5380Sstevel@tonic-gate
5390Sstevel@tonic-gate /*
5400Sstevel@tonic-gate * Insert string element into head of hash list
5410Sstevel@tonic-gate */
5420Sstevel@tonic-gate hashval = hashval % bckcnt;
5430Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval];
5440Sstevel@tonic-gate hashbcks[hashval] = sthash;
5450Sstevel@tonic-gate return (0);
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate
5480Sstevel@tonic-gate /*
5490Sstevel@tonic-gate * Return amount of space required for the string table.
5500Sstevel@tonic-gate */
5515892Sab196087 size_t
st_getstrtab_sz(Str_tbl * stp)5520Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp)
5530Sstevel@tonic-gate {
5545549Srie assert(stp->st_fullstrsize > 0);
5550Sstevel@tonic-gate
5560Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
5570Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED;
5585549Srie return (stp->st_fullstrsize);
5590Sstevel@tonic-gate }
5600Sstevel@tonic-gate
5610Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) {
5625549Srie LenNode *lnp;
5630Sstevel@tonic-gate void *cookie;
5640Sstevel@tonic-gate
5650Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED;
5660Sstevel@tonic-gate /*
5670Sstevel@tonic-gate * allocate a hash table about the size of # of
5680Sstevel@tonic-gate * strings input.
5690Sstevel@tonic-gate */
5705549Srie stp->st_hbckcnt = findprime(stp->st_strcnt);
571*8608SAli.Bahrami@Sun.COM if ((stp->st_hashbcks = calloc(sizeof (*stp->st_hashbcks),
572*8608SAli.Bahrami@Sun.COM stp->st_hbckcnt)) == NULL)
5730Sstevel@tonic-gate return (0);
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate /*
5765549Srie * We now walk all of the strings in the list, from shortest to
5775549Srie * longest, and insert them into the hashtable.
5780Sstevel@tonic-gate */
5795549Srie if ((lnp = avl_first(stp->st_lentree)) == NULL) {
5800Sstevel@tonic-gate /*
5815549Srie * Is it possible we have an empty string table, if so,
5825549Srie * the table still contains '\0', so return the size.
5830Sstevel@tonic-gate */
5845549Srie if (avl_numnodes(stp->st_lentree) == 0) {
5855549Srie assert(stp->st_strsize == 1);
5865549Srie return (stp->st_strsize);
5870Sstevel@tonic-gate }
5880Sstevel@tonic-gate return (0);
5890Sstevel@tonic-gate }
5905549Srie
5915549Srie while (lnp) {
5925549Srie StrNode *snp;
5935549Srie
5945549Srie /*
5955549Srie * Walk the string lists and insert them into the hash
5965549Srie * list. Once a string is inserted we no longer need
5975549Srie * it's entry, so the string can be freed.
5985549Srie */
5995549Srie for (snp = avl_first(lnp->ln_strtree); snp;
6005549Srie snp = AVL_NEXT(lnp->ln_strtree, snp)) {
6015549Srie if (st_hash_insert(stp, snp->sn_str,
6025549Srie lnp->ln_strlen) == -1)
6035549Srie return (0);
6045549Srie }
6050Sstevel@tonic-gate
6060Sstevel@tonic-gate /*
6075549Srie * Now that the strings have been copied, walk the
6085549Srie * StrNode tree and free all the AVL nodes. Note,
6095549Srie * avl_destroy_nodes() beats avl_remove() as the
6105549Srie * latter balances the nodes as they are removed.
6115549Srie * We just want to tear the whole thing down fast.
6120Sstevel@tonic-gate */
6135549Srie cookie = NULL;
6145549Srie while ((snp = avl_destroy_nodes(lnp->ln_strtree,
6155549Srie &cookie)) != NULL)
6165549Srie free(snp);
6175549Srie avl_destroy(lnp->ln_strtree);
6185549Srie free(lnp->ln_strtree);
6195549Srie lnp->ln_strtree = NULL;
6205549Srie
6215549Srie /*
6225549Srie * Move on to the next LenNode.
6235549Srie */
6245549Srie lnp = AVL_NEXT(stp->st_lentree, lnp);
6250Sstevel@tonic-gate }
6260Sstevel@tonic-gate
6270Sstevel@tonic-gate /*
6285549Srie * Now that all of the strings have been freed, walk the
6295549Srie * LenNode tree and free all of the AVL nodes. Note,
6305549Srie * avl_destroy_nodes() beats avl_remove() as the latter
6315549Srie * balances the nodes as they are removed. We just want to
6325549Srie * tear the whole thing down fast.
6330Sstevel@tonic-gate */
6340Sstevel@tonic-gate cookie = NULL;
6355549Srie while ((lnp = avl_destroy_nodes(stp->st_lentree,
6360Sstevel@tonic-gate &cookie)) != NULL)
6375549Srie free(lnp);
6385549Srie avl_destroy(stp->st_lentree);
6395549Srie free(stp->st_lentree);
6405549Srie stp->st_lentree = 0;
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate
6435549Srie assert(stp->st_strsize > 0);
6445549Srie assert(stp->st_fullstrsize >= stp->st_strsize);
6450Sstevel@tonic-gate
6465549Srie return (stp->st_strsize);
6470Sstevel@tonic-gate }
6480Sstevel@tonic-gate
6490Sstevel@tonic-gate /*
6505549Srie * Associate a buffer with a string table.
6510Sstevel@tonic-gate */
6520Sstevel@tonic-gate const char *
st_getstrbuf(Str_tbl * stp)6530Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp)
6540Sstevel@tonic-gate {
6550Sstevel@tonic-gate return (stp->st_strbuf);
6560Sstevel@tonic-gate }
6570Sstevel@tonic-gate
6580Sstevel@tonic-gate int
st_setstrbuf(Str_tbl * stp,char * stbuf,size_t bufsize)6595892Sab196087 st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
6600Sstevel@tonic-gate {
6610Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED);
6620Sstevel@tonic-gate
6630Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
6645549Srie if (bufsize < stp->st_fullstrsize)
6650Sstevel@tonic-gate return (-1);
6660Sstevel@tonic-gate } else {
6675549Srie if (bufsize < stp->st_strsize)
6680Sstevel@tonic-gate return (-1);
6690Sstevel@tonic-gate }
6700Sstevel@tonic-gate
6710Sstevel@tonic-gate stp->st_strbuf = stbuf;
6720Sstevel@tonic-gate #ifdef DEBUG
6730Sstevel@tonic-gate /*
6740Sstevel@tonic-gate * for debug builds - start with a stringtable filled in
675*8608SAli.Bahrami@Sun.COM * with '0xff'. This makes it very easy to spot unfilled
676*8608SAli.Bahrami@Sun.COM * holes in the strtab.
6770Sstevel@tonic-gate */
6780Sstevel@tonic-gate memset(stbuf, 0xff, bufsize);
6790Sstevel@tonic-gate stbuf[0] = '\0';
6800Sstevel@tonic-gate #else
6810Sstevel@tonic-gate memset(stbuf, 0x0, bufsize);
6820Sstevel@tonic-gate #endif
6830Sstevel@tonic-gate return (0);
6840Sstevel@tonic-gate }
685