10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51618Srie * Common Development and Distribution License (the "License"). 61618Srie * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211618Srie 220Sstevel@tonic-gate /* 23*5892Sab196087 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 295549Srie #include <_string_table.h> 300Sstevel@tonic-gate #include <strings.h> 310Sstevel@tonic-gate #include <sgs.h> 320Sstevel@tonic-gate #include <stdio.h> 330Sstevel@tonic-gate 340Sstevel@tonic-gate /* 355549Srie * This file provides the interfaces to build a Str_tbl suitable for use by 365549Srie * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB) 375549Srie * as created by ld(1). 380Sstevel@tonic-gate * 395549Srie * There are two modes which can be used when constructing a string table: 400Sstevel@tonic-gate * 410Sstevel@tonic-gate * st_new(0) 420Sstevel@tonic-gate * standard string table - no compression. This is the 435549Srie * traditional, fast method. 440Sstevel@tonic-gate * 455549Srie * st_new(FLG_STTAB_COMPRESS) 465549Srie * builds a compressed string table which both eliminates 475549Srie * duplicate strings, and permits strings with common suffixes 485549Srie * (atexit vs. exit) to overlap in the table. This provides space 495549Srie * savings for many string tables. Although more work than the 505549Srie * traditional method, the algorithms used are designed to scale 515549Srie * and keep any overhead at a minimum. 520Sstevel@tonic-gate * 535549Srie * These string tables are built with a common interface in a two-pass manner. 545549Srie * The first pass finds all of the strings required for the string-table and 555549Srie * calculates the size required for the final string table. 565549Srie * 575549Srie * The second pass allocates the string table, populates the strings into the 585549Srie * table and returns the offsets the strings have been assigned. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * The calling sequence to build and populate a string table is: 610Sstevel@tonic-gate * 620Sstevel@tonic-gate * st_new(); // initialize strtab 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * st_insert(st1); // first pass of strings ... 650Sstevel@tonic-gate * // calculates size required for 660Sstevel@tonic-gate * // string table 670Sstevel@tonic-gate * 680Sstevel@tonic-gate * st_delstring(st?); // remove string previously 690Sstevel@tonic-gate * // inserted 700Sstevel@tonic-gate * st_insert(stN); 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes 730Sstevel@tonic-gate * // size of table. 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination 760Sstevel@tonic-gate * // for the string table 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * st_setstring(st1); // populate the string table 790Sstevel@tonic-gate * ... // offsets are based off of second 800Sstevel@tonic-gate * // pass through the string table 810Sstevel@tonic-gate * st_setstring(stN); 820Sstevel@tonic-gate * 830Sstevel@tonic-gate * st_destroy(); // tear down string table 840Sstevel@tonic-gate * // structures. 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * String Suffix Compression Algorithm: 870Sstevel@tonic-gate * 880Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String 890Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm 900Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique 910Sstevel@tonic-gate * strings inserted into the string table. The hash function for 920Sstevel@tonic-gate * this table is a standard string hash except that the hash starts 930Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards 940Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the 950Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values 960Sstevel@tonic-gate * for all of the possible suffix strings for that string. 970Sstevel@tonic-gate * 980Sstevel@tonic-gate * As we compute the hash - at each character see if the current 990Sstevel@tonic-gate * suffix string for that hash is already present in the table. If 1000Sstevel@tonic-gate * it is, and the string is a master string. Then change that 1010Sstevel@tonic-gate * string to a suffix string of the new string being inserted. 1020Sstevel@tonic-gate * 1030Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check 1040Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference 1050Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a 1060Sstevel@tonic-gate * new hash table entry for a master string. 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * The above method will find all suffixes of a given string given 1090Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is 1100Sstevel@tonic-gate * why this is a two phase method, we first collect all of the 1115549Srie * strings and store them based off of their length in an AVL tree. 1120Sstevel@tonic-gate * Once all of the strings have been submitted we then start the 1130Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and 1140Sstevel@tonic-gate * inserting the strings from shortest to longest as described 1150Sstevel@tonic-gate * above. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate /* LINTLIBRARY */ 1190Sstevel@tonic-gate 1205549Srie static int 1215549Srie avl_len_compare(const void *n1, const void *n2) 1220Sstevel@tonic-gate { 123*5892Sab196087 size_t len1, len2; 1245549Srie 1255549Srie len1 = ((LenNode *)n1)->ln_strlen; 1265549Srie len2 = ((LenNode *)n2)->ln_strlen; 1270Sstevel@tonic-gate 1285549Srie if (len1 == len2) 1290Sstevel@tonic-gate return (0); 1305549Srie if (len2 < len1) 1310Sstevel@tonic-gate return (1); 1320Sstevel@tonic-gate return (-1); 1330Sstevel@tonic-gate } 1340Sstevel@tonic-gate 1355549Srie static int 1365549Srie avl_str_compare(const void *n1, const void *n2) 1375549Srie { 1385549Srie const char *str1, *str2; 1395549Srie int rc; 1405549Srie 1415549Srie str1 = ((StrNode *)n1)->sn_str; 1425549Srie str2 = ((StrNode *)n2)->sn_str; 1435549Srie 1445549Srie rc = strcmp(str1, str2); 1455549Srie if (rc > 0) 1465549Srie return (1); 1475549Srie if (rc < 0) 1485549Srie return (-1); 1495549Srie return (0); 1505549Srie } 1515549Srie 1520Sstevel@tonic-gate /* 1535549Srie * Return an initialized Str_tbl - returns NULL on failure. 1540Sstevel@tonic-gate * 1555549Srie * flags: 1565549Srie * FLG_STTAB_COMPRESS - build a compressed string table 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate Str_tbl * 1595549Srie st_new(uint_t flags) 1600Sstevel@tonic-gate { 1610Sstevel@tonic-gate Str_tbl *stp; 1620Sstevel@tonic-gate 1635549Srie if ((stp = calloc(sizeof (Str_tbl), 1)) == NULL) 1645549Srie return (NULL); 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate /* 1670Sstevel@tonic-gate * Start with a leading '\0' - it's tradition. 1680Sstevel@tonic-gate */ 1695549Srie stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1; 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate /* 1725549Srie * Do we compress this string table? 1730Sstevel@tonic-gate */ 1745549Srie stp->st_flags = flags; 1755549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 1760Sstevel@tonic-gate return (stp); 1770Sstevel@tonic-gate 1785549Srie if ((stp->st_lentree = calloc(sizeof (avl_tree_t), 1)) == NULL) 1795549Srie return (NULL); 1805549Srie 1815549Srie avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode), 1825549Srie SGSOFFSETOF(LenNode, ln_avlnode)); 1835549Srie 1845549Srie return (stp); 1855549Srie } 1865549Srie 1875549Srie /* 1885549Srie * Insert a new string into the Str_tbl. There are two AVL trees used. 1895549Srie * 1905549Srie * . The first LenNode AVL tree maintains a tree of nodes based on string 1915549Srie * sizes. 1925549Srie * . Each LenNode maintains a StrNode AVL tree for each string. Large 1935549Srie * applications have been known to contribute thousands of strings of 1945549Srie * the same size. Should strings need to be removed (-z ignore), then 1955549Srie * the string AVL tree makes this removal efficient and scalable. 1965549Srie */ 1975549Srie int 1985549Srie st_insert(Str_tbl *stp, const char *str) 1995549Srie { 200*5892Sab196087 size_t len; 2015549Srie StrNode *snp, sn = { 0 }; 2025549Srie LenNode *lnp, ln = { 0 }; 2035549Srie avl_index_t where; 2045549Srie 2055549Srie /* 2065549Srie * String table can't have been cooked 2075549Srie */ 2085549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 2095549Srie 2105549Srie /* 2115549Srie * Null strings always point to the head of the string 2125549Srie * table - no reason to keep searching. 2135549Srie */ 214*5892Sab196087 if ((len = strlen(str)) == 0) 2150Sstevel@tonic-gate return (0); 2165549Srie 2175549Srie stp->st_fullstrsize += len + 1; 2185549Srie stp->st_strcnt++; 2195549Srie 2205549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 2215549Srie return (0); 2225549Srie 2235549Srie /* 2245549Srie * From the controlling string table, determine which LenNode AVL node 2255549Srie * provides for this string length. If the node doesn't exist, insert 2265549Srie * a new node to represent this string length. 2275549Srie */ 2285549Srie ln.ln_strlen = len; 2295549Srie if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) { 2305549Srie if ((lnp = calloc(sizeof (LenNode), 1)) == NULL) 2315549Srie return (-1); 2325549Srie lnp->ln_strlen = len; 2335549Srie avl_insert(stp->st_lentree, lnp, where); 2345549Srie 2355549Srie if ((lnp->ln_strtree = calloc(sizeof (avl_tree_t), 1)) == NULL) 2365549Srie return (0); 2375549Srie 2385549Srie avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode), 2395549Srie SGSOFFSETOF(StrNode, sn_avlnode)); 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 2425549Srie /* 2435549Srie * From the string length AVL node determine whether a StrNode AVL node 2445549Srie * provides this string. If the node doesn't exist, insert a new node 2455549Srie * to represent this string. 2465549Srie */ 2475549Srie sn.sn_str = str; 2485549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) { 2495549Srie if ((snp = calloc(sizeof (StrNode), 1)) == NULL) 2505549Srie return (-1); 2515549Srie snp->sn_str = str; 2525549Srie avl_insert(lnp->ln_strtree, snp, where); 2535549Srie } 2545549Srie snp->sn_refcnt++; 2555549Srie 2565549Srie return (0); 2575549Srie } 2585549Srie 2595549Srie /* 2605549Srie * Remove a previously inserted string from the Str_tbl. 2615549Srie */ 2625549Srie int 2635549Srie st_delstring(Str_tbl *stp, const char *str) 2645549Srie { 265*5892Sab196087 size_t len; 2665549Srie LenNode *lnp, ln = { 0 }; 2675549Srie StrNode *snp, sn = { 0 }; 2680Sstevel@tonic-gate 2695549Srie /* 2705549Srie * String table can't have been cooked 2715549Srie */ 2725549Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 2735549Srie 274*5892Sab196087 len = strlen(str); 2755549Srie stp->st_fullstrsize -= len + 1; 2765549Srie 2775549Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 2785549Srie return (0); 2795549Srie 2805549Srie /* 2815549Srie * Determine which LenNode AVL node provides for this string length. 2825549Srie */ 2835549Srie ln.ln_strlen = len; 2845549Srie if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) { 2855549Srie sn.sn_str = str; 2865549Srie if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) { 2875549Srie /* 2885549Srie * Reduce the reference count, and if zero remove the 2895549Srie * node. 2905549Srie */ 2915549Srie if (--snp->sn_refcnt == 0) 2925549Srie avl_remove(lnp->ln_strtree, snp); 2935549Srie return (0); 2945549Srie } 2955549Srie } 2965549Srie 2975549Srie /* 2985549Srie * No strings of this length, or no string itself - someone goofed. 2995549Srie */ 3005549Srie return (-1); 3010Sstevel@tonic-gate } 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate /* 3040Sstevel@tonic-gate * Tear down a String_Table structure. 3050Sstevel@tonic-gate */ 3060Sstevel@tonic-gate void 3070Sstevel@tonic-gate st_destroy(Str_tbl *stp) 3080Sstevel@tonic-gate { 3090Sstevel@tonic-gate Str_hash *sthash, *psthash; 3100Sstevel@tonic-gate Str_master *mstr, *pmstr; 3110Sstevel@tonic-gate uint_t i; 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate /* 3140Sstevel@tonic-gate * cleanup the master strings 3150Sstevel@tonic-gate */ 3160Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr; 3170Sstevel@tonic-gate mstr = mstr->sm_next) { 3180Sstevel@tonic-gate if (pmstr) 3190Sstevel@tonic-gate free(pmstr); 3200Sstevel@tonic-gate pmstr = mstr; 3210Sstevel@tonic-gate } 3220Sstevel@tonic-gate if (pmstr) 3230Sstevel@tonic-gate free(pmstr); 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate if (stp->st_hashbcks) { 3260Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) { 3270Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0; 3280Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 3290Sstevel@tonic-gate if (psthash) 3300Sstevel@tonic-gate free(psthash); 3310Sstevel@tonic-gate psthash = sthash; 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate if (psthash) 3340Sstevel@tonic-gate free(psthash); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate free(stp->st_hashbcks); 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate free(stp); 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate /* 3430Sstevel@tonic-gate * For a given string - copy it into the buffer associated with 3440Sstevel@tonic-gate * the string table - and return the offset it has been assigned. 3450Sstevel@tonic-gate * 3460Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in 3470Sstevel@tonic-gate * the Str_tbl. 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate int 350*5892Sab196087 st_setstring(Str_tbl *stp, const char *str, size_t *stoff) 3510Sstevel@tonic-gate { 352*5892Sab196087 size_t stlen; 3530Sstevel@tonic-gate uint_t hashval; 3540Sstevel@tonic-gate Str_hash *sthash; 3550Sstevel@tonic-gate Str_master *mstr; 3560Sstevel@tonic-gate int i; 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * String table *must* have been previously cooked 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate assert(stp->st_strbuf); 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 364*5892Sab196087 stlen = strlen(str); 3650Sstevel@tonic-gate /* 3660Sstevel@tonic-gate * Null string always points to head of string table 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate if (stlen == 0) { 3690Sstevel@tonic-gate *stoff = 0; 3700Sstevel@tonic-gate return (0); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 374*5892Sab196087 size_t _stoff; 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate stlen++; /* count for trailing '\0' */ 3770Sstevel@tonic-gate _stoff = stp->st_nextoff; 3780Sstevel@tonic-gate /* 3790Sstevel@tonic-gate * Have we overflowed our assigned buffer? 3800Sstevel@tonic-gate */ 3815549Srie if ((_stoff + stlen) > stp->st_fullstrsize) 3820Sstevel@tonic-gate return (-1); 3830Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen); 3840Sstevel@tonic-gate *stoff = _stoff; 3850Sstevel@tonic-gate stp->st_nextoff += stlen; 3860Sstevel@tonic-gate return (0); 3870Sstevel@tonic-gate } 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate /* 3905549Srie * Calculate reverse hash for string. 3910Sstevel@tonic-gate */ 3920Sstevel@tonic-gate hashval = HASHSEED; 3930Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 3940Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 3955549Srie str[i]; /* h = ((h * 33) + c) */ 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash; 3990Sstevel@tonic-gate sthash = sthash->hi_next) { 4005549Srie const char *hstr; 4015549Srie 4025549Srie if (sthash->hi_hashval != hashval) 4035549Srie continue; 4040Sstevel@tonic-gate 4055549Srie hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen - 4065549Srie sthash->hi_strlen]; 4075549Srie if (strcmp(str, hstr) == 0) 4085549Srie break; 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate /* 4120Sstevel@tonic-gate * Did we find the string? 4130Sstevel@tonic-gate */ 4140Sstevel@tonic-gate if (sthash == 0) 4150Sstevel@tonic-gate return (-1); 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate /* 4180Sstevel@tonic-gate * Has this string been copied into the string table? 4190Sstevel@tonic-gate */ 4200Sstevel@tonic-gate mstr = sthash->hi_mstr; 4215549Srie if (mstr->sm_stroff == 0) { 422*5892Sab196087 size_t mstrlen = mstr->sm_strlen + 1; 4235549Srie 4245549Srie mstr->sm_stroff = stp->st_nextoff; 4255549Srie 4260Sstevel@tonic-gate /* 4270Sstevel@tonic-gate * Have we overflowed our assigned buffer? 4280Sstevel@tonic-gate */ 4295549Srie if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize) 4300Sstevel@tonic-gate return (-1); 4315549Srie 4325549Srie (void) memcpy(stp->st_strbuf + mstr->sm_stroff, 4335549Srie mstr->sm_str, mstrlen); 4345549Srie stp->st_nextoff += mstrlen; 4350Sstevel@tonic-gate } 4365549Srie 4370Sstevel@tonic-gate /* 4385549Srie * Calculate offset of (sub)string. 4390Sstevel@tonic-gate */ 4405549Srie *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen; 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate return (0); 4430Sstevel@tonic-gate } 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate static int 447*5892Sab196087 st_hash_insert(Str_tbl *stp, const char *str, size_t len) 4480Sstevel@tonic-gate { 4490Sstevel@tonic-gate int i; 4500Sstevel@tonic-gate uint_t hashval = HASHSEED; 4510Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt; 4520Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks; 4530Sstevel@tonic-gate Str_hash *sthash; 4540Sstevel@tonic-gate Str_master *mstr = 0; 4550Sstevel@tonic-gate 4560Sstevel@tonic-gate /* 4570Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But 4580Sstevel@tonic-gate * instead of hashing from the start of the string to the 4590Sstevel@tonic-gate * end, we do it in reverse. 4600Sstevel@tonic-gate * 4610Sstevel@tonic-gate * This way - we are essentially building all of the 4620Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if 4630Sstevel@tonic-gate * any suffixes already exist in the tree as we generate 4640Sstevel@tonic-gate * the hash. 4650Sstevel@tonic-gate */ 4665549Srie for (i = len; i >= 0; i--) { 4675549Srie hashval = ((hashval << 5) + hashval) + 4685549Srie str[i]; /* h = ((h * 33) + c) */ 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt]; 4710Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 4725549Srie const char *hstr; 4735549Srie Str_master *_mstr; 4740Sstevel@tonic-gate 4755549Srie if (sthash->hi_hashval != hashval) 4765549Srie continue; 4775549Srie 4785549Srie _mstr = sthash->hi_mstr; 4795549Srie hstr = &_mstr->sm_str[_mstr->sm_strlen - 4805549Srie sthash->hi_strlen]; 4815549Srie 4825549Srie if (strcmp(&str[i], hstr)) 4835549Srie continue; 4840Sstevel@tonic-gate 4855549Srie if (i == 0) { 4865549Srie /* 4875549Srie * Entry already in table, increment refcnt and 4885549Srie * get out. 4895549Srie */ 4905549Srie sthash->hi_refcnt++; 4915549Srie return (0); 4925549Srie } else { 4935549Srie /* 4945549Srie * If this 'suffix' is presently a 'master 4955549Srie * string, then take over it's record. 4965549Srie */ 4975549Srie if (sthash->hi_strlen == _mstr->sm_strlen) { 4985549Srie /* 4995549Srie * we should only do this once. 5005549Srie */ 5015549Srie assert(mstr == 0); 5025549Srie mstr = _mstr; 5030Sstevel@tonic-gate } 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate /* 5090Sstevel@tonic-gate * Do we need a new master string, or can we take over 5100Sstevel@tonic-gate * one we already found in the table? 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate if (mstr == 0) { 5130Sstevel@tonic-gate /* 5140Sstevel@tonic-gate * allocate a new master string 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate if ((mstr = calloc(sizeof (Str_hash), 1)) == 0) 5170Sstevel@tonic-gate return (-1); 5180Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist; 5190Sstevel@tonic-gate stp->st_mstrlist = mstr; 5205549Srie stp->st_strsize += len + 1; 5210Sstevel@tonic-gate } else { 5220Sstevel@tonic-gate /* 5235549Srie * We are taking over a existing master string, the string size 5245549Srie * only increments by the difference between the current string 5255549Srie * and the previous master. 5260Sstevel@tonic-gate */ 5275549Srie assert(len > mstr->sm_strlen); 5285549Srie stp->st_strsize += len - mstr->sm_strlen; 5290Sstevel@tonic-gate } 5300Sstevel@tonic-gate 5310Sstevel@tonic-gate if ((sthash = calloc(sizeof (Str_hash), 1)) == 0) 5320Sstevel@tonic-gate return (-1); 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval; 5355549Srie mstr->sm_strlen = sthash->hi_strlen = len; 5360Sstevel@tonic-gate mstr->sm_str = str; 5370Sstevel@tonic-gate sthash->hi_refcnt = 1; 5380Sstevel@tonic-gate sthash->hi_mstr = mstr; 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate /* 5410Sstevel@tonic-gate * Insert string element into head of hash list 5420Sstevel@tonic-gate */ 5430Sstevel@tonic-gate hashval = hashval % bckcnt; 5440Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval]; 5450Sstevel@tonic-gate hashbcks[hashval] = sthash; 5460Sstevel@tonic-gate return (0); 5470Sstevel@tonic-gate } 5480Sstevel@tonic-gate 5490Sstevel@tonic-gate /* 5500Sstevel@tonic-gate * Return amount of space required for the string table. 5510Sstevel@tonic-gate */ 552*5892Sab196087 size_t 5530Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp) 5540Sstevel@tonic-gate { 5555549Srie assert(stp->st_fullstrsize > 0); 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 5580Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5595549Srie return (stp->st_fullstrsize); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) { 5635549Srie LenNode *lnp; 5640Sstevel@tonic-gate void *cookie; 5650Sstevel@tonic-gate 5660Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5670Sstevel@tonic-gate /* 5680Sstevel@tonic-gate * allocate a hash table about the size of # of 5690Sstevel@tonic-gate * strings input. 5700Sstevel@tonic-gate */ 5715549Srie stp->st_hbckcnt = findprime(stp->st_strcnt); 5720Sstevel@tonic-gate if ((stp->st_hashbcks = 5730Sstevel@tonic-gate calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL) 5740Sstevel@tonic-gate return (0); 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate /* 5775549Srie * We now walk all of the strings in the list, from shortest to 5785549Srie * longest, and insert them into the hashtable. 5790Sstevel@tonic-gate */ 5805549Srie if ((lnp = avl_first(stp->st_lentree)) == NULL) { 5810Sstevel@tonic-gate /* 5825549Srie * Is it possible we have an empty string table, if so, 5835549Srie * the table still contains '\0', so return the size. 5840Sstevel@tonic-gate */ 5855549Srie if (avl_numnodes(stp->st_lentree) == 0) { 5865549Srie assert(stp->st_strsize == 1); 5875549Srie return (stp->st_strsize); 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate return (0); 5900Sstevel@tonic-gate } 5915549Srie 5925549Srie while (lnp) { 5935549Srie StrNode *snp; 5945549Srie 5955549Srie /* 5965549Srie * Walk the string lists and insert them into the hash 5975549Srie * list. Once a string is inserted we no longer need 5985549Srie * it's entry, so the string can be freed. 5995549Srie */ 6005549Srie for (snp = avl_first(lnp->ln_strtree); snp; 6015549Srie snp = AVL_NEXT(lnp->ln_strtree, snp)) { 6025549Srie if (st_hash_insert(stp, snp->sn_str, 6035549Srie lnp->ln_strlen) == -1) 6045549Srie return (0); 6055549Srie } 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate /* 6085549Srie * Now that the strings have been copied, walk the 6095549Srie * StrNode tree and free all the AVL nodes. Note, 6105549Srie * avl_destroy_nodes() beats avl_remove() as the 6115549Srie * latter balances the nodes as they are removed. 6125549Srie * We just want to tear the whole thing down fast. 6130Sstevel@tonic-gate */ 6145549Srie cookie = NULL; 6155549Srie while ((snp = avl_destroy_nodes(lnp->ln_strtree, 6165549Srie &cookie)) != NULL) 6175549Srie free(snp); 6185549Srie avl_destroy(lnp->ln_strtree); 6195549Srie free(lnp->ln_strtree); 6205549Srie lnp->ln_strtree = NULL; 6215549Srie 6225549Srie /* 6235549Srie * Move on to the next LenNode. 6245549Srie */ 6255549Srie lnp = AVL_NEXT(stp->st_lentree, lnp); 6260Sstevel@tonic-gate } 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate /* 6295549Srie * Now that all of the strings have been freed, walk the 6305549Srie * LenNode tree and free all of the AVL nodes. Note, 6315549Srie * avl_destroy_nodes() beats avl_remove() as the latter 6325549Srie * balances the nodes as they are removed. We just want to 6335549Srie * tear the whole thing down fast. 6340Sstevel@tonic-gate */ 6350Sstevel@tonic-gate cookie = NULL; 6365549Srie while ((lnp = avl_destroy_nodes(stp->st_lentree, 6370Sstevel@tonic-gate &cookie)) != NULL) 6385549Srie free(lnp); 6395549Srie avl_destroy(stp->st_lentree); 6405549Srie free(stp->st_lentree); 6415549Srie stp->st_lentree = 0; 6420Sstevel@tonic-gate } 6430Sstevel@tonic-gate 6445549Srie assert(stp->st_strsize > 0); 6455549Srie assert(stp->st_fullstrsize >= stp->st_strsize); 6460Sstevel@tonic-gate 6475549Srie return (stp->st_strsize); 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate /* 6515549Srie * Associate a buffer with a string table. 6520Sstevel@tonic-gate */ 6530Sstevel@tonic-gate const char * 6540Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp) 6550Sstevel@tonic-gate { 6560Sstevel@tonic-gate return (stp->st_strbuf); 6570Sstevel@tonic-gate } 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate int 660*5892Sab196087 st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize) 6610Sstevel@tonic-gate { 6620Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 6655549Srie if (bufsize < stp->st_fullstrsize) 6660Sstevel@tonic-gate return (-1); 6670Sstevel@tonic-gate } else { 6685549Srie if (bufsize < stp->st_strsize) 6690Sstevel@tonic-gate return (-1); 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate stp->st_strbuf = stbuf; 6730Sstevel@tonic-gate #ifdef DEBUG 6740Sstevel@tonic-gate /* 6750Sstevel@tonic-gate * for debug builds - start with a stringtable filled in 6760Sstevel@tonic-gate * with '0xff'. This makes it very easy to find wholes 6770Sstevel@tonic-gate * which we failed to fill in - in the strtab. 6780Sstevel@tonic-gate */ 6790Sstevel@tonic-gate memset(stbuf, 0xff, bufsize); 6800Sstevel@tonic-gate stbuf[0] = '\0'; 6810Sstevel@tonic-gate #else 6820Sstevel@tonic-gate memset(stbuf, 0x0, bufsize); 6830Sstevel@tonic-gate #endif 6840Sstevel@tonic-gate return (0); 6850Sstevel@tonic-gate } 686