10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*1618Srie * Common Development and Distribution License (the "License"). 6*1618Srie * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 21*1618Srie 220Sstevel@tonic-gate /* 23*1618Srie * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <string_table.h> 300Sstevel@tonic-gate #include <strings.h> 310Sstevel@tonic-gate #include <sgs.h> 320Sstevel@tonic-gate #include <stdio.h> 330Sstevel@tonic-gate 340Sstevel@tonic-gate 350Sstevel@tonic-gate 360Sstevel@tonic-gate /* 370Sstevel@tonic-gate * This file provides the interfaces to build a Str_tbl suitable 380Sstevel@tonic-gate * for use by either the sgsmsg system or a standard ELF 390Sstevel@tonic-gate * SHT_STRTAB. 400Sstevel@tonic-gate * 410Sstevel@tonic-gate * There are two modes which can be used when constructing a 420Sstevel@tonic-gate * string table: 430Sstevel@tonic-gate * 440Sstevel@tonic-gate * st_new(0) 450Sstevel@tonic-gate * standard string table - no compression. This is the 460Sstevel@tonic-gate * traditional method and fast 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * st_new(FLG_STNEW_COMPRESS) 490Sstevel@tonic-gate * build a compressed string table which both 500Sstevel@tonic-gate * eliminates duplicate strings and permits 510Sstevel@tonic-gate * strings with common suffixes (atexit vs. exit) to 520Sstevel@tonic-gate * overlap in the table. This provides space 530Sstevel@tonic-gate * savings for many string tables. 540Sstevel@tonic-gate * 550Sstevel@tonic-gate * These string tables are now built with a common interface in a 560Sstevel@tonic-gate * two-pass manner, the first pass it to find all of the strings 570Sstevel@tonic-gate * required for the string-table and to calculate the size that 580Sstevel@tonic-gate * will be required for the final string table. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * The second pass allocates the string table and populates the 610Sstevel@tonic-gate * strings into the table and returns the offsets the strings 620Sstevel@tonic-gate * have been assigned. 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * The calling sequence to build and populate a string table is: 650Sstevel@tonic-gate * 660Sstevel@tonic-gate * st_new(); // initialize strtab 670Sstevel@tonic-gate * 680Sstevel@tonic-gate * st_insert(st1); // first pass of strings ... 690Sstevel@tonic-gate * // calculates size required for 700Sstevel@tonic-gate * // string table 710Sstevel@tonic-gate * 720Sstevel@tonic-gate * st_delstring(st?); // remove string previously 730Sstevel@tonic-gate * // inserted 740Sstevel@tonic-gate * 750Sstevel@tonic-gate * st_insert(stN); 760Sstevel@tonic-gate * 770Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes 780Sstevel@tonic-gate * // size of table. 790Sstevel@tonic-gate * 800Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination 810Sstevel@tonic-gate * // for the string table 820Sstevel@tonic-gate * 830Sstevel@tonic-gate * st_setstring(st1); // populate the string table 840Sstevel@tonic-gate * ... // offsets are based off of second 850Sstevel@tonic-gate * // pass through the string table 860Sstevel@tonic-gate * st_setstring(stN); 870Sstevel@tonic-gate * 880Sstevel@tonic-gate * st_destroy(); // tear down string table 890Sstevel@tonic-gate * // structures. 900Sstevel@tonic-gate * 910Sstevel@tonic-gate * String Suffix Compression Algorithm: 920Sstevel@tonic-gate * 930Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String 940Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm 950Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique 960Sstevel@tonic-gate * strings inserted into the string table. The hash function for 970Sstevel@tonic-gate * this table is a standard string hash except that the hash starts 980Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards 990Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the 1000Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values 1010Sstevel@tonic-gate * for all of the possible suffix strings for that string. 1020Sstevel@tonic-gate * 1030Sstevel@tonic-gate * As we compute the hash - at each character see if the current 1040Sstevel@tonic-gate * suffix string for that hash is already present in the table. If 1050Sstevel@tonic-gate * it is, and the string is a master string. Then change that 1060Sstevel@tonic-gate * string to a suffix string of the new string being inserted. 1070Sstevel@tonic-gate * 1080Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check 1090Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference 1100Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a 1110Sstevel@tonic-gate * new hash table entry for a master string. 1120Sstevel@tonic-gate * 1130Sstevel@tonic-gate * The above method will find all suffixes of a given string given 1140Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is 1150Sstevel@tonic-gate * why this is a two phase method, we first collect all of the 1160Sstevel@tonic-gate * strings and store them based off of their length in a nice AVL tree. 1170Sstevel@tonic-gate * Once all of the strings have been submitted we then start the 1180Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and 1190Sstevel@tonic-gate * inserting the strings from shortest to longest as described 1200Sstevel@tonic-gate * above. 1210Sstevel@tonic-gate * 1220Sstevel@tonic-gate */ 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* LINTLIBRARY */ 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate int 1280Sstevel@tonic-gate strlen_compare(const void *elem1, const void *elem2) 1290Sstevel@tonic-gate { 1300Sstevel@tonic-gate uint_t l1, l2; 1310Sstevel@tonic-gate l1 = ((Stringelem *)elem1)->se_stlen; 1320Sstevel@tonic-gate l2 = ((Stringelem *)elem2)->se_stlen; 1330Sstevel@tonic-gate 1340Sstevel@tonic-gate if (l1 == l2) 1350Sstevel@tonic-gate return (0); 1360Sstevel@tonic-gate if (l2 < l1) 1370Sstevel@tonic-gate return (1); 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate return (-1); 1400Sstevel@tonic-gate } 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate /* 1430Sstevel@tonic-gate * Return a initialized Str_tbl - returns NULL on failure. 1440Sstevel@tonic-gate * 1450Sstevel@tonic-gate * stflags: 1460Sstevel@tonic-gate * 1470Sstevel@tonic-gate * FLG_STNEW_COMPRESS - build a compressed string table 1480Sstevel@tonic-gate * 1490Sstevel@tonic-gate */ 1500Sstevel@tonic-gate Str_tbl * 1510Sstevel@tonic-gate st_new(uint_t stflags) 1520Sstevel@tonic-gate { 1530Sstevel@tonic-gate Str_tbl *stp; 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate if ((stp = calloc(sizeof (Str_tbl), 1)) == 0) 1560Sstevel@tonic-gate return (0); 1570Sstevel@tonic-gate 1580Sstevel@tonic-gate /* 1590Sstevel@tonic-gate * Start with a leading '\0' - it's tradition. 1600Sstevel@tonic-gate */ 1610Sstevel@tonic-gate stp->st_stringsize = stp->st_fullstringsize = stp->st_nextoff = 1; 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* 1640Sstevel@tonic-gate * Do we compress this string table 1650Sstevel@tonic-gate */ 1660Sstevel@tonic-gate if ((stflags & FLG_STNEW_COMPRESS) == 0) 1670Sstevel@tonic-gate return (stp); 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COMPRESS; 1700Sstevel@tonic-gate if ((stp->st_strtree = calloc(sizeof (avl_tree_t), 1)) == 0) { 1710Sstevel@tonic-gate return (0); 1720Sstevel@tonic-gate } 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate avl_create(stp->st_strtree, &strlen_compare, sizeof (Stringelem), 1750Sstevel@tonic-gate SGSOFFSETOF(Stringelem, se_avlnode)); 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate return (stp); 1780Sstevel@tonic-gate } 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate /* 1810Sstevel@tonic-gate * Tear down a String_Table structure. 1820Sstevel@tonic-gate */ 1830Sstevel@tonic-gate void 1840Sstevel@tonic-gate st_destroy(Str_tbl *stp) 1850Sstevel@tonic-gate { 1860Sstevel@tonic-gate Str_hash *sthash, *psthash; 1870Sstevel@tonic-gate Str_master *mstr, *pmstr; 1880Sstevel@tonic-gate uint_t i; 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate /* 1910Sstevel@tonic-gate * cleanup the master strings 1920Sstevel@tonic-gate */ 1930Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr; 1940Sstevel@tonic-gate mstr = mstr->sm_next) { 1950Sstevel@tonic-gate if (pmstr) 1960Sstevel@tonic-gate free(pmstr); 1970Sstevel@tonic-gate pmstr = mstr; 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate if (pmstr) 2000Sstevel@tonic-gate free(pmstr); 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate if (stp->st_hashbcks) { 2030Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) { 2040Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0; 2050Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 2060Sstevel@tonic-gate if (psthash) 2070Sstevel@tonic-gate free(psthash); 2080Sstevel@tonic-gate psthash = sthash; 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate if (psthash) 2110Sstevel@tonic-gate free(psthash); 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate free(stp->st_hashbcks); 2140Sstevel@tonic-gate } 2150Sstevel@tonic-gate free(stp); 2160Sstevel@tonic-gate } 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate /* 2220Sstevel@tonic-gate * Remove a previously inserted string from the Str_tbl 2230Sstevel@tonic-gate */ 2240Sstevel@tonic-gate int 2250Sstevel@tonic-gate st_delstring(Str_tbl *stp, const char *str) 2260Sstevel@tonic-gate { 2270Sstevel@tonic-gate uint_t stlen; 2280Sstevel@tonic-gate Stringelem qstelem; 2290Sstevel@tonic-gate Stringelem *stelem; 2300Sstevel@tonic-gate Stringlist *stlist, *pstlist; 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * String table can't have been cooked 2340Sstevel@tonic-gate */ 2350Sstevel@tonic-gate assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate stlen = (uint_t)strlen(str); 2380Sstevel@tonic-gate stp->st_fullstringsize -= stlen + 1; 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 2410Sstevel@tonic-gate return (0); 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate qstelem.se_stlen = stlen; 2440Sstevel@tonic-gate if ((stelem = avl_find(stp->st_strtree, &qstelem, 0)) == NULL) { 2450Sstevel@tonic-gate /* 2460Sstevel@tonic-gate * no strings of this length recorded, let alone 2470Sstevel@tonic-gate * this specific string - someone goofed. 2480Sstevel@tonic-gate */ 2490Sstevel@tonic-gate return (-1); 2500Sstevel@tonic-gate } 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate pstlist = 0; 2530Sstevel@tonic-gate for (stlist = stelem->se_strlist; stlist; stlist = stlist->sl_next) { 2540Sstevel@tonic-gate if (strcmp(str, stlist->sl_string) == 0) 2550Sstevel@tonic-gate break; 2560Sstevel@tonic-gate pstlist = stlist; 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate if (stlist == 0) { 2600Sstevel@tonic-gate /* 2610Sstevel@tonic-gate * string was not found 2620Sstevel@tonic-gate */ 2630Sstevel@tonic-gate return (-1); 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate if (pstlist == 0) { 2670Sstevel@tonic-gate /* 2680Sstevel@tonic-gate * String is first on list. 2690Sstevel@tonic-gate */ 2700Sstevel@tonic-gate stelem->se_strlist = stlist->sl_next; 2710Sstevel@tonic-gate } else { 2720Sstevel@tonic-gate /* 2730Sstevel@tonic-gate * remove string from list. 2740Sstevel@tonic-gate */ 2750Sstevel@tonic-gate pstlist->sl_next = stlist->sl_next; 2760Sstevel@tonic-gate } 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate free(stlist); 2790Sstevel@tonic-gate return (0); 2800Sstevel@tonic-gate } 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate /* 2840Sstevel@tonic-gate * Insert a new string into the Str_tbl 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate int 2870Sstevel@tonic-gate st_insert(Str_tbl *stp, const char *str) 2880Sstevel@tonic-gate { 2890Sstevel@tonic-gate uint_t stlen; 2900Sstevel@tonic-gate Stringelem qstelem; 2910Sstevel@tonic-gate Stringelem *stelem; 2920Sstevel@tonic-gate Stringlist *strlist; 2930Sstevel@tonic-gate avl_index_t where; 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate /* 2960Sstevel@tonic-gate * String table can't have been cooked 2970Sstevel@tonic-gate */ 2980Sstevel@tonic-gate assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 2990Sstevel@tonic-gate stlen = (uint_t)strlen(str); 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * Null strings always point to the head of the string 3020Sstevel@tonic-gate * table - no reason to keep searching. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate if (stlen == 0) 3050Sstevel@tonic-gate return (0); 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate stp->st_fullstringsize += stlen + 1; 3080Sstevel@tonic-gate stp->st_stringcnt++; 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 3110Sstevel@tonic-gate return (0); 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate qstelem.se_stlen = strlen(str); 3140Sstevel@tonic-gate if ((stelem = avl_find(stp->st_strtree, &qstelem, 3150Sstevel@tonic-gate &where)) == NULL) { 3160Sstevel@tonic-gate if ((stelem = calloc(sizeof (Stringelem), 1)) == 0) 3170Sstevel@tonic-gate return (-1); 3180Sstevel@tonic-gate stelem->se_stlen = qstelem.se_stlen; 3190Sstevel@tonic-gate avl_insert(stp->st_strtree, stelem, where); 3200Sstevel@tonic-gate } 3210Sstevel@tonic-gate if ((strlist = malloc(sizeof (Stringlist))) == 0) 3220Sstevel@tonic-gate return (-1); 3230Sstevel@tonic-gate 3240Sstevel@tonic-gate strlist->sl_string = str; 3250Sstevel@tonic-gate strlist->sl_next = stelem->se_strlist; 3260Sstevel@tonic-gate stelem->se_strlist = strlist; 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate return (0); 3290Sstevel@tonic-gate } 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate 3320Sstevel@tonic-gate /* 3330Sstevel@tonic-gate * For a given string - copy it into the buffer associated with 3340Sstevel@tonic-gate * the string table - and return the offset it has been assigned. 3350Sstevel@tonic-gate * 3360Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in 3370Sstevel@tonic-gate * the Str_tbl. 3380Sstevel@tonic-gate */ 3390Sstevel@tonic-gate int 3400Sstevel@tonic-gate st_setstring(Str_tbl *stp, const char *str, uint_t *stoff) 3410Sstevel@tonic-gate { 3420Sstevel@tonic-gate uint_t stlen; 3430Sstevel@tonic-gate uint_t hashval; 3440Sstevel@tonic-gate Str_hash *sthash; 3450Sstevel@tonic-gate Str_master *mstr; 3460Sstevel@tonic-gate int i; 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * String table *must* have been previously cooked 3500Sstevel@tonic-gate */ 3510Sstevel@tonic-gate assert(stp->st_strbuf); 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 3540Sstevel@tonic-gate stlen = (uint_t)strlen(str); 3550Sstevel@tonic-gate /* 3560Sstevel@tonic-gate * Null string always points to head of string table 3570Sstevel@tonic-gate */ 3580Sstevel@tonic-gate if (stlen == 0) { 3590Sstevel@tonic-gate *stoff = 0; 3600Sstevel@tonic-gate return (0); 3610Sstevel@tonic-gate } 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 3640Sstevel@tonic-gate uint_t _stoff; 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate stlen++; /* count for trailing '\0' */ 3670Sstevel@tonic-gate _stoff = stp->st_nextoff; 3680Sstevel@tonic-gate /* 3690Sstevel@tonic-gate * Have we overflowed our assigned buffer? 3700Sstevel@tonic-gate */ 3710Sstevel@tonic-gate if ((_stoff + stlen) > stp->st_fullstringsize) 3720Sstevel@tonic-gate return (-1); 3730Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen); 3740Sstevel@tonic-gate *stoff = _stoff; 3750Sstevel@tonic-gate stp->st_nextoff += stlen; 3760Sstevel@tonic-gate return (0); 3770Sstevel@tonic-gate } 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate /* 3800Sstevel@tonic-gate * Calculate reverse hash for string 3810Sstevel@tonic-gate */ 3820Sstevel@tonic-gate hashval = HASHSEED; 3830Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 3840Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 3850Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash; 3890Sstevel@tonic-gate sthash = sthash->hi_next) { 3900Sstevel@tonic-gate if (sthash->hi_hashval == hashval) { 3910Sstevel@tonic-gate const char *hstr; 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate hstr = &sthash->hi_mstr->sm_str[ 3940Sstevel@tonic-gate sthash->hi_mstr->sm_stlen - 3950Sstevel@tonic-gate sthash->hi_stlen]; 3960Sstevel@tonic-gate if (strcmp(str, hstr) == 0) { 3970Sstevel@tonic-gate break; 3980Sstevel@tonic-gate } 3990Sstevel@tonic-gate } 4000Sstevel@tonic-gate } 4010Sstevel@tonic-gate 4020Sstevel@tonic-gate /* 4030Sstevel@tonic-gate * Did we find the string? 4040Sstevel@tonic-gate */ 4050Sstevel@tonic-gate if (sthash == 0) 4060Sstevel@tonic-gate return (-1); 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4090Sstevel@tonic-gate * Has this string been copied into the string table? 4100Sstevel@tonic-gate */ 4110Sstevel@tonic-gate mstr = sthash->hi_mstr; 4120Sstevel@tonic-gate if (mstr->sm_stoff == 0) { 4130Sstevel@tonic-gate uint_t mstlen = mstr->sm_stlen + 1; 4140Sstevel@tonic-gate mstr->sm_stoff = stp->st_nextoff; 4150Sstevel@tonic-gate /* 4160Sstevel@tonic-gate * Have we overflowed our assigned buffer? 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate if ((mstr->sm_stoff + mstlen) > stp->st_fullstringsize) 4190Sstevel@tonic-gate return (-1); 4200Sstevel@tonic-gate memcpy(stp->st_strbuf + mstr->sm_stoff, mstr->sm_str, 4210Sstevel@tonic-gate mstlen); 4220Sstevel@tonic-gate stp->st_nextoff += mstlen; 4230Sstevel@tonic-gate } 4240Sstevel@tonic-gate /* 4250Sstevel@tonic-gate * Calculate offset of (sub)string 4260Sstevel@tonic-gate */ 4270Sstevel@tonic-gate *stoff = mstr->sm_stoff + mstr->sm_stlen - sthash->hi_stlen; 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate return (0); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate static int 4340Sstevel@tonic-gate st_hash_insert(Str_tbl *stp, const char *str, uint_t stlen) 4350Sstevel@tonic-gate { 4360Sstevel@tonic-gate int i; 4370Sstevel@tonic-gate uint_t hashval = HASHSEED; 4380Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt; 4390Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks; 4400Sstevel@tonic-gate Str_hash *sthash; 4410Sstevel@tonic-gate Str_master *mstr = 0; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate /* 4440Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But 4450Sstevel@tonic-gate * instead of hashing from the start of the string to the 4460Sstevel@tonic-gate * end, we do it in reverse. 4470Sstevel@tonic-gate * 4480Sstevel@tonic-gate * This way - we are essentially building all of the 4490Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if 4500Sstevel@tonic-gate * any suffixes already exist in the tree as we generate 4510Sstevel@tonic-gate * the hash. 4520Sstevel@tonic-gate */ 4530Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 4560Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 4570Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt]; 4580Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate if (sthash->hi_hashval == hashval) { 4610Sstevel@tonic-gate const char *hstr; 4620Sstevel@tonic-gate Str_master *_mstr; 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate _mstr = sthash->hi_mstr; 4650Sstevel@tonic-gate hstr = &_mstr->sm_str[_mstr->sm_stlen - 4660Sstevel@tonic-gate sthash->hi_stlen]; 4670Sstevel@tonic-gate if (strcmp(&str[i], hstr) == 0) { 4680Sstevel@tonic-gate if (i == 0) { 4690Sstevel@tonic-gate /* 4700Sstevel@tonic-gate * Entry already in table, 4710Sstevel@tonic-gate * increment refcnt and get 4720Sstevel@tonic-gate * out. 4730Sstevel@tonic-gate */ 4740Sstevel@tonic-gate sthash->hi_refcnt++; 4750Sstevel@tonic-gate return (0); 4760Sstevel@tonic-gate } else { 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * If this 'suffix' is 4790Sstevel@tonic-gate * presently a 'master' string, 4800Sstevel@tonic-gate * then take over it's record. 4810Sstevel@tonic-gate */ 4820Sstevel@tonic-gate if (sthash->hi_stlen == 4830Sstevel@tonic-gate _mstr->sm_stlen) { 4840Sstevel@tonic-gate /* 4850Sstevel@tonic-gate * we should only do 4860Sstevel@tonic-gate * this once. 4870Sstevel@tonic-gate */ 4880Sstevel@tonic-gate assert(mstr == 0); 4890Sstevel@tonic-gate mstr = _mstr; 4900Sstevel@tonic-gate } 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate } 4930Sstevel@tonic-gate } 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate } 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate /* 4990Sstevel@tonic-gate * Do we need a new master string, or can we take over 5000Sstevel@tonic-gate * one we already found in the table? 5010Sstevel@tonic-gate */ 5020Sstevel@tonic-gate if (mstr == 0) { 5030Sstevel@tonic-gate /* 5040Sstevel@tonic-gate * allocate a new master string 5050Sstevel@tonic-gate */ 5060Sstevel@tonic-gate if ((mstr = calloc(sizeof (Str_hash), 1)) == 0) 5070Sstevel@tonic-gate return (-1); 5080Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist; 5090Sstevel@tonic-gate stp->st_mstrlist = mstr; 5100Sstevel@tonic-gate stp->st_stringsize += stlen + 1; 5110Sstevel@tonic-gate } else { 5120Sstevel@tonic-gate /* 5130Sstevel@tonic-gate * We are taking over a existing master string, 5140Sstevel@tonic-gate * the stringsize only increments by the 5150Sstevel@tonic-gate * difference between the currnet string and the 5160Sstevel@tonic-gate * previous master. 5170Sstevel@tonic-gate */ 5180Sstevel@tonic-gate assert(stlen > mstr->sm_stlen); 5190Sstevel@tonic-gate stp->st_stringsize += stlen - mstr->sm_stlen; 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate if ((sthash = calloc(sizeof (Str_hash), 1)) == 0) 5230Sstevel@tonic-gate return (-1); 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval; 5260Sstevel@tonic-gate mstr->sm_stlen = sthash->hi_stlen = stlen; 5270Sstevel@tonic-gate mstr->sm_str = str; 5280Sstevel@tonic-gate sthash->hi_refcnt = 1; 5290Sstevel@tonic-gate sthash->hi_mstr = mstr; 5300Sstevel@tonic-gate 5310Sstevel@tonic-gate /* 5320Sstevel@tonic-gate * Insert string element into head of hash list 5330Sstevel@tonic-gate */ 5340Sstevel@tonic-gate hashval = hashval % bckcnt; 5350Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval]; 5360Sstevel@tonic-gate hashbcks[hashval] = sthash; 5370Sstevel@tonic-gate return (0); 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate /* 5410Sstevel@tonic-gate * Return amount of space required for the string table. 5420Sstevel@tonic-gate */ 5430Sstevel@tonic-gate uint_t 5440Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp) 5450Sstevel@tonic-gate { 5460Sstevel@tonic-gate assert(stp->st_fullstringsize > 0); 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 5490Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5500Sstevel@tonic-gate return (stp->st_fullstringsize); 5510Sstevel@tonic-gate } 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) { 5550Sstevel@tonic-gate Stringelem *stelem; 5560Sstevel@tonic-gate void *cookie; 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5590Sstevel@tonic-gate /* 5600Sstevel@tonic-gate * allocate a hash table about the size of # of 5610Sstevel@tonic-gate * strings input. 5620Sstevel@tonic-gate */ 5630Sstevel@tonic-gate stp->st_hbckcnt = findprime(stp->st_stringcnt); 5640Sstevel@tonic-gate if ((stp->st_hashbcks = 5650Sstevel@tonic-gate calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL) 5660Sstevel@tonic-gate return (0); 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate /* 5690Sstevel@tonic-gate * We now walk all of the strings in the list, 5700Sstevel@tonic-gate * from shortest to longest, and insert them into 5710Sstevel@tonic-gate * the hashtable. 5720Sstevel@tonic-gate */ 5730Sstevel@tonic-gate if ((stelem = avl_first(stp->st_strtree)) == NULL) { 5740Sstevel@tonic-gate /* 5750Sstevel@tonic-gate * Is it possible we have a empty string table, 5760Sstevel@tonic-gate * if so - the table still conains '\0' 5770Sstevel@tonic-gate * so still return the size. 5780Sstevel@tonic-gate */ 5790Sstevel@tonic-gate if (avl_numnodes(stp->st_strtree) == 0) { 5800Sstevel@tonic-gate assert(stp->st_stringsize == 1); 5810Sstevel@tonic-gate return (stp->st_stringsize); 5820Sstevel@tonic-gate } 5830Sstevel@tonic-gate return (0); 5840Sstevel@tonic-gate } 5850Sstevel@tonic-gate while (stelem) { 5860Sstevel@tonic-gate Stringlist *strlist, *pstrlist; 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate /* 5890Sstevel@tonic-gate * Walk the string lists and insert them 5900Sstevel@tonic-gate * into the hash list. Once a string is 5910Sstevel@tonic-gate * inserted we no longer need it's entry, 5920Sstevel@tonic-gate * so free it 5930Sstevel@tonic-gate */ 5940Sstevel@tonic-gate for (strlist = stelem->se_strlist, pstrlist = 0; 5950Sstevel@tonic-gate strlist; strlist = strlist->sl_next) { 5960Sstevel@tonic-gate if (st_hash_insert(stp, strlist->sl_string, 5970Sstevel@tonic-gate stelem->se_stlen) == -1) 5980Sstevel@tonic-gate return (0); 5990Sstevel@tonic-gate if (pstrlist) 6000Sstevel@tonic-gate free(pstrlist); 6010Sstevel@tonic-gate } 6020Sstevel@tonic-gate free(pstrlist); 6030Sstevel@tonic-gate stelem->se_strlist = 0; 6040Sstevel@tonic-gate stelem = AVL_NEXT(stp->st_strtree, stelem); 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate /* 6080Sstevel@tonic-gate * Now that all of the strings have been freed, 6090Sstevel@tonic-gate * go ahead and quickly re-walk the AVL tree and 6100Sstevel@tonic-gate * free all of the AVL nodes. 6110Sstevel@tonic-gate * 6120Sstevel@tonic-gate * avl_destroy_nodes() beats avl_remove() because 6130Sstevel@tonic-gate * avl_remove will 'ballance' the tree as nodes 6140Sstevel@tonic-gate * are deleted - we just want to tear the whole 6150Sstevel@tonic-gate * thing down now. 6160Sstevel@tonic-gate */ 6170Sstevel@tonic-gate cookie = NULL; 6180Sstevel@tonic-gate while ((stelem = avl_destroy_nodes(stp->st_strtree, 6190Sstevel@tonic-gate &cookie)) != NULL) 6200Sstevel@tonic-gate free(stelem); 6210Sstevel@tonic-gate avl_destroy(stp->st_strtree); 6220Sstevel@tonic-gate free(stp->st_strtree); 6230Sstevel@tonic-gate stp->st_strtree = 0; 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate assert(stp->st_stringsize > 0); 6270Sstevel@tonic-gate assert(stp->st_fullstringsize >= stp->st_stringsize); 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate return (stp->st_stringsize); 6300Sstevel@tonic-gate } 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate /* 6330Sstevel@tonic-gate * Associate a buffer with the string table. 6340Sstevel@tonic-gate */ 6350Sstevel@tonic-gate const char * 6360Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp) 6370Sstevel@tonic-gate { 6380Sstevel@tonic-gate return (stp->st_strbuf); 6390Sstevel@tonic-gate } 6400Sstevel@tonic-gate 6410Sstevel@tonic-gate int 6420Sstevel@tonic-gate st_setstrbuf(Str_tbl *stp, char *stbuf, uint_t bufsize) 6430Sstevel@tonic-gate { 6440Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 6470Sstevel@tonic-gate if (bufsize < stp->st_fullstringsize) 6480Sstevel@tonic-gate return (-1); 6490Sstevel@tonic-gate } else { 6500Sstevel@tonic-gate if (bufsize < stp->st_stringsize) 6510Sstevel@tonic-gate return (-1); 6520Sstevel@tonic-gate } 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate stp->st_strbuf = stbuf; 6550Sstevel@tonic-gate #ifdef DEBUG 6560Sstevel@tonic-gate /* 6570Sstevel@tonic-gate * for debug builds - start with a stringtable filled in 6580Sstevel@tonic-gate * with '0xff'. This makes it very easy to find wholes 6590Sstevel@tonic-gate * which we failed to fill in - in the strtab. 6600Sstevel@tonic-gate */ 6610Sstevel@tonic-gate memset(stbuf, 0xff, bufsize); 6620Sstevel@tonic-gate stbuf[0] = '\0'; 6630Sstevel@tonic-gate #else 6640Sstevel@tonic-gate memset(stbuf, 0x0, bufsize); 6650Sstevel@tonic-gate #endif 6660Sstevel@tonic-gate return (0); 6670Sstevel@tonic-gate } 668