xref: /onnv-gate/usr/src/cmd/sgs/tools/common/string_table.c (revision 1618:8c9a4f31d225)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1618Srie  * Common Development and Distribution License (the "License").
6*1618Srie  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*1618Srie 
220Sstevel@tonic-gate /*
23*1618Srie  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <string_table.h>
300Sstevel@tonic-gate #include <strings.h>
310Sstevel@tonic-gate #include <sgs.h>
320Sstevel@tonic-gate #include <stdio.h>
330Sstevel@tonic-gate 
340Sstevel@tonic-gate 
350Sstevel@tonic-gate 
360Sstevel@tonic-gate /*
370Sstevel@tonic-gate  * This file provides the interfaces to build a Str_tbl suitable
380Sstevel@tonic-gate  * for use by either the sgsmsg system or a standard ELF
390Sstevel@tonic-gate  * SHT_STRTAB.
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  * There are two modes which can be used when constructing a
420Sstevel@tonic-gate  * string table:
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  *	st_new(0)
450Sstevel@tonic-gate  *		standard string table - no compression.  This is the
460Sstevel@tonic-gate  *		traditional method and fast
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  *	st_new(FLG_STNEW_COMPRESS)
490Sstevel@tonic-gate  *		build a compressed string table which both
500Sstevel@tonic-gate  *		eliminates duplicate strings and permits
510Sstevel@tonic-gate  *		strings with common suffixes (atexit vs. exit) to
520Sstevel@tonic-gate  *		overlap in the table.  This provides space
530Sstevel@tonic-gate  *		savings for many string tables.
540Sstevel@tonic-gate  *
550Sstevel@tonic-gate  * These string tables are now built with a common interface in a
560Sstevel@tonic-gate  * two-pass manner, the first pass it to find all of the strings
570Sstevel@tonic-gate  * required for the string-table and to calculate the size that
580Sstevel@tonic-gate  * will be required for the final string table.
590Sstevel@tonic-gate  *
600Sstevel@tonic-gate  * The second pass allocates the string table and populates the
610Sstevel@tonic-gate  * strings into the table and returns the offsets the strings
620Sstevel@tonic-gate  * have been assigned.
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * The calling sequence to build and populate a string table is:
650Sstevel@tonic-gate  *
660Sstevel@tonic-gate  *		st_new();		// initialize strtab
670Sstevel@tonic-gate  *
680Sstevel@tonic-gate  *		st_insert(st1);		// first pass of strings ...
690Sstevel@tonic-gate  *					// calculates size required for
700Sstevel@tonic-gate  *					// string table
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  *		st_delstring(st?);	// remove string previously
730Sstevel@tonic-gate  *					// inserted
740Sstevel@tonic-gate  *
750Sstevel@tonic-gate  *		st_insert(stN);
760Sstevel@tonic-gate  *
770Sstevel@tonic-gate  *		st_getstrtab_sz();	// freezes strtab and computes
780Sstevel@tonic-gate  *					// size of table.
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  *		st_setstrbuf();		// associates a final destination
810Sstevel@tonic-gate  *					// for the string table
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  *		st_setstring(st1);	// populate the string table
840Sstevel@tonic-gate  *		...			// offsets are based off of second
850Sstevel@tonic-gate  *					// pass	through the string table
860Sstevel@tonic-gate  *		st_setstring(stN);
870Sstevel@tonic-gate  *
880Sstevel@tonic-gate  *		st_destroy();		// tear down string table
890Sstevel@tonic-gate  *					// structures.
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  * String Suffix Compression Algorithm:
920Sstevel@tonic-gate  *
930Sstevel@tonic-gate  *   Here's a quick high level overview of the Suffix String
940Sstevel@tonic-gate  *   compression algorithm used.  First - the heart of the algorithm
950Sstevel@tonic-gate  *   is a Hash table list which represents a dictionary of all unique
960Sstevel@tonic-gate  *   strings inserted into the string table.  The hash function for
970Sstevel@tonic-gate  *   this table is a standard string hash except that the hash starts
980Sstevel@tonic-gate  *   at the last character in the string (&str[n - 1]) and works towards
990Sstevel@tonic-gate  *   the first character in the function (&str[0]).  As we compute the
1000Sstevel@tonic-gate  *   HASH value for a given string, we also compute the hash values
1010Sstevel@tonic-gate  *   for all of the possible suffix strings for that string.
1020Sstevel@tonic-gate  *
1030Sstevel@tonic-gate  *   As we compute the hash - at each character see if the current
1040Sstevel@tonic-gate  *   suffix string for that hash is already present in the table.  If
1050Sstevel@tonic-gate  *   it is, and the string is a master string.  Then change that
1060Sstevel@tonic-gate  *   string to a suffix string of the new string being inserted.
1070Sstevel@tonic-gate  *
1080Sstevel@tonic-gate  *   When the final hash value is found (hash for str[0...n]), check
1090Sstevel@tonic-gate  *   to see if it is in the hash table - if so increment the reference
1100Sstevel@tonic-gate  *   count for the string.  If it is not yet in the table, insert a
1110Sstevel@tonic-gate  *   new hash table entry for a master string.
1120Sstevel@tonic-gate  *
1130Sstevel@tonic-gate  *   The above method will find all suffixes of a given string given
1140Sstevel@tonic-gate  *   that the strings are inserted from shortest to longest.  That is
1150Sstevel@tonic-gate  *   why this is a two phase method, we first collect all of the
1160Sstevel@tonic-gate  *   strings and store them based off of their length in a nice AVL tree.
1170Sstevel@tonic-gate  *   Once all of the strings have been submitted we then start the
1180Sstevel@tonic-gate  *   hash table build by traversing the AVL tree in order and
1190Sstevel@tonic-gate  *   inserting the strings from shortest to longest as described
1200Sstevel@tonic-gate  *   above.
1210Sstevel@tonic-gate  *
1220Sstevel@tonic-gate  */
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate /* LINTLIBRARY */
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate int
1280Sstevel@tonic-gate strlen_compare(const void *elem1, const void *elem2)
1290Sstevel@tonic-gate {
1300Sstevel@tonic-gate 	uint_t	l1, l2;
1310Sstevel@tonic-gate 	l1 = ((Stringelem *)elem1)->se_stlen;
1320Sstevel@tonic-gate 	l2 = ((Stringelem *)elem2)->se_stlen;
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate 	if (l1 == l2)
1350Sstevel@tonic-gate 		return (0);
1360Sstevel@tonic-gate 	if (l2 < l1)
1370Sstevel@tonic-gate 		return (1);
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	return (-1);
1400Sstevel@tonic-gate }
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate /*
1430Sstevel@tonic-gate  * Return a initialized Str_tbl - returns NULL on failure.
1440Sstevel@tonic-gate  *
1450Sstevel@tonic-gate  * stflags:
1460Sstevel@tonic-gate  *
1470Sstevel@tonic-gate  *	FLG_STNEW_COMPRESS - build a compressed string table
1480Sstevel@tonic-gate  *
1490Sstevel@tonic-gate  */
1500Sstevel@tonic-gate Str_tbl *
1510Sstevel@tonic-gate st_new(uint_t stflags)
1520Sstevel@tonic-gate {
1530Sstevel@tonic-gate 	Str_tbl	*stp;
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	if ((stp = calloc(sizeof (Str_tbl), 1)) == 0)
1560Sstevel@tonic-gate 		return (0);
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 	/*
1590Sstevel@tonic-gate 	 * Start with a leading '\0' - it's tradition.
1600Sstevel@tonic-gate 	 */
1610Sstevel@tonic-gate 	stp->st_stringsize = stp->st_fullstringsize = stp->st_nextoff = 1;
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	/*
1640Sstevel@tonic-gate 	 * Do we compress this string table
1650Sstevel@tonic-gate 	 */
1660Sstevel@tonic-gate 	if ((stflags & FLG_STNEW_COMPRESS) == 0)
1670Sstevel@tonic-gate 		return (stp);
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 	stp->st_flags |= FLG_STTAB_COMPRESS;
1700Sstevel@tonic-gate 	if ((stp->st_strtree = calloc(sizeof (avl_tree_t), 1)) == 0) {
1710Sstevel@tonic-gate 		return (0);
1720Sstevel@tonic-gate 	}
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	avl_create(stp->st_strtree, &strlen_compare, sizeof (Stringelem),
1750Sstevel@tonic-gate 		SGSOFFSETOF(Stringelem, se_avlnode));
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	return (stp);
1780Sstevel@tonic-gate }
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate /*
1810Sstevel@tonic-gate  * Tear down a String_Table structure.
1820Sstevel@tonic-gate  */
1830Sstevel@tonic-gate void
1840Sstevel@tonic-gate st_destroy(Str_tbl *stp)
1850Sstevel@tonic-gate {
1860Sstevel@tonic-gate 	Str_hash	*sthash, *psthash;
1870Sstevel@tonic-gate 	Str_master	*mstr, *pmstr;
1880Sstevel@tonic-gate 	uint_t		i;
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	/*
1910Sstevel@tonic-gate 	 * cleanup the master strings
1920Sstevel@tonic-gate 	 */
1930Sstevel@tonic-gate 	for (mstr = stp->st_mstrlist, pmstr = 0; mstr;
1940Sstevel@tonic-gate 	    mstr = mstr->sm_next) {
1950Sstevel@tonic-gate 		if (pmstr)
1960Sstevel@tonic-gate 			free(pmstr);
1970Sstevel@tonic-gate 		pmstr = mstr;
1980Sstevel@tonic-gate 	}
1990Sstevel@tonic-gate 	if (pmstr)
2000Sstevel@tonic-gate 		free(pmstr);
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	if (stp->st_hashbcks) {
2030Sstevel@tonic-gate 		for (i = 0; i < stp->st_hbckcnt; i++) {
2040Sstevel@tonic-gate 			for (sthash = stp->st_hashbcks[i], psthash = 0;
2050Sstevel@tonic-gate 			    sthash; sthash = sthash->hi_next) {
2060Sstevel@tonic-gate 				if (psthash)
2070Sstevel@tonic-gate 					free(psthash);
2080Sstevel@tonic-gate 				psthash = sthash;
2090Sstevel@tonic-gate 			}
2100Sstevel@tonic-gate 			if (psthash)
2110Sstevel@tonic-gate 				free(psthash);
2120Sstevel@tonic-gate 		}
2130Sstevel@tonic-gate 		free(stp->st_hashbcks);
2140Sstevel@tonic-gate 	}
2150Sstevel@tonic-gate 	free(stp);
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate  * Remove a previously inserted string from the Str_tbl
2230Sstevel@tonic-gate  */
2240Sstevel@tonic-gate int
2250Sstevel@tonic-gate st_delstring(Str_tbl *stp, const char *str)
2260Sstevel@tonic-gate {
2270Sstevel@tonic-gate 	uint_t		stlen;
2280Sstevel@tonic-gate 	Stringelem	qstelem;
2290Sstevel@tonic-gate 	Stringelem	*stelem;
2300Sstevel@tonic-gate 	Stringlist	*stlist, *pstlist;
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	/*
2330Sstevel@tonic-gate 	 * String table can't have been cooked
2340Sstevel@tonic-gate 	 */
2350Sstevel@tonic-gate 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
2380Sstevel@tonic-gate 	stp->st_fullstringsize -= stlen + 1;
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
2410Sstevel@tonic-gate 		return (0);
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	qstelem.se_stlen = stlen;
2440Sstevel@tonic-gate 	if ((stelem = avl_find(stp->st_strtree, &qstelem, 0)) == NULL) {
2450Sstevel@tonic-gate 		/*
2460Sstevel@tonic-gate 		 * no strings of this length recorded, let alone
2470Sstevel@tonic-gate 		 * this specific string - someone goofed.
2480Sstevel@tonic-gate 		 */
2490Sstevel@tonic-gate 		return (-1);
2500Sstevel@tonic-gate 	}
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 	pstlist = 0;
2530Sstevel@tonic-gate 	for (stlist = stelem->se_strlist; stlist; stlist = stlist->sl_next) {
2540Sstevel@tonic-gate 		if (strcmp(str, stlist->sl_string) == 0)
2550Sstevel@tonic-gate 			break;
2560Sstevel@tonic-gate 		pstlist = stlist;
2570Sstevel@tonic-gate 	}
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 	if (stlist == 0) {
2600Sstevel@tonic-gate 		/*
2610Sstevel@tonic-gate 		 * string was not found
2620Sstevel@tonic-gate 		 */
2630Sstevel@tonic-gate 		return (-1);
2640Sstevel@tonic-gate 	}
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate 	if (pstlist == 0) {
2670Sstevel@tonic-gate 		/*
2680Sstevel@tonic-gate 		 * String is first on list.
2690Sstevel@tonic-gate 		 */
2700Sstevel@tonic-gate 		stelem->se_strlist = stlist->sl_next;
2710Sstevel@tonic-gate 	} else {
2720Sstevel@tonic-gate 		/*
2730Sstevel@tonic-gate 		 * remove string from list.
2740Sstevel@tonic-gate 		 */
2750Sstevel@tonic-gate 		pstlist->sl_next = stlist->sl_next;
2760Sstevel@tonic-gate 	}
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	free(stlist);
2790Sstevel@tonic-gate 	return (0);
2800Sstevel@tonic-gate }
2810Sstevel@tonic-gate 
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate /*
2840Sstevel@tonic-gate  * Insert a new string into the Str_tbl
2850Sstevel@tonic-gate  */
2860Sstevel@tonic-gate int
2870Sstevel@tonic-gate st_insert(Str_tbl *stp, const char *str)
2880Sstevel@tonic-gate {
2890Sstevel@tonic-gate 	uint_t	stlen;
2900Sstevel@tonic-gate 	Stringelem	qstelem;
2910Sstevel@tonic-gate 	Stringelem	*stelem;
2920Sstevel@tonic-gate 	Stringlist	*strlist;
2930Sstevel@tonic-gate 	avl_index_t	where;
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 	/*
2960Sstevel@tonic-gate 	 * String table can't have been cooked
2970Sstevel@tonic-gate 	 */
2980Sstevel@tonic-gate 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
2990Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
3000Sstevel@tonic-gate 	/*
3010Sstevel@tonic-gate 	 * Null strings always point to the head of the string
3020Sstevel@tonic-gate 	 * table - no reason to keep searching.
3030Sstevel@tonic-gate 	 */
3040Sstevel@tonic-gate 	if (stlen == 0)
3050Sstevel@tonic-gate 		return (0);
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 	stp->st_fullstringsize += stlen + 1;
3080Sstevel@tonic-gate 	stp->st_stringcnt++;
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
3110Sstevel@tonic-gate 		return (0);
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	qstelem.se_stlen = strlen(str);
3140Sstevel@tonic-gate 	if ((stelem = avl_find(stp->st_strtree, &qstelem,
3150Sstevel@tonic-gate 	    &where)) == NULL) {
3160Sstevel@tonic-gate 		if ((stelem = calloc(sizeof (Stringelem), 1)) == 0)
3170Sstevel@tonic-gate 			return (-1);
3180Sstevel@tonic-gate 		stelem->se_stlen = qstelem.se_stlen;
3190Sstevel@tonic-gate 		avl_insert(stp->st_strtree, stelem, where);
3200Sstevel@tonic-gate 	}
3210Sstevel@tonic-gate 	if ((strlist = malloc(sizeof (Stringlist))) == 0)
3220Sstevel@tonic-gate 		return (-1);
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	strlist->sl_string = str;
3250Sstevel@tonic-gate 	strlist->sl_next = stelem->se_strlist;
3260Sstevel@tonic-gate 	stelem->se_strlist = strlist;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	return (0);
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate  * For a given string - copy it into the buffer associated with
3340Sstevel@tonic-gate  * the string table - and return the offset it has been assigned.
3350Sstevel@tonic-gate  *
3360Sstevel@tonic-gate  * If a value of '-1' is returned - the string was not found in
3370Sstevel@tonic-gate  * the Str_tbl.
3380Sstevel@tonic-gate  */
3390Sstevel@tonic-gate int
3400Sstevel@tonic-gate st_setstring(Str_tbl *stp, const char *str, uint_t *stoff)
3410Sstevel@tonic-gate {
3420Sstevel@tonic-gate 	uint_t		stlen;
3430Sstevel@tonic-gate 	uint_t		hashval;
3440Sstevel@tonic-gate 	Str_hash	*sthash;
3450Sstevel@tonic-gate 	Str_master	*mstr;
3460Sstevel@tonic-gate 	int		i;
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	/*
3490Sstevel@tonic-gate 	 * String table *must* have been previously cooked
3500Sstevel@tonic-gate 	 */
3510Sstevel@tonic-gate 	assert(stp->st_strbuf);
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
3540Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
3550Sstevel@tonic-gate 	/*
3560Sstevel@tonic-gate 	 * Null string always points to head of string table
3570Sstevel@tonic-gate 	 */
3580Sstevel@tonic-gate 	if (stlen == 0) {
3590Sstevel@tonic-gate 		*stoff = 0;
3600Sstevel@tonic-gate 		return (0);
3610Sstevel@tonic-gate 	}
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
3640Sstevel@tonic-gate 		uint_t		_stoff;
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate 		stlen++;	/* count for trailing '\0' */
3670Sstevel@tonic-gate 		_stoff = stp->st_nextoff;
3680Sstevel@tonic-gate 		/*
3690Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
3700Sstevel@tonic-gate 		 */
3710Sstevel@tonic-gate 		if ((_stoff + stlen) > stp->st_fullstringsize)
3720Sstevel@tonic-gate 			return (-1);
3730Sstevel@tonic-gate 		memcpy(stp->st_strbuf + _stoff, str, stlen);
3740Sstevel@tonic-gate 		*stoff = _stoff;
3750Sstevel@tonic-gate 		stp->st_nextoff += stlen;
3760Sstevel@tonic-gate 		return (0);
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	/*
3800Sstevel@tonic-gate 	 * Calculate reverse hash for string
3810Sstevel@tonic-gate 	 */
3820Sstevel@tonic-gate 	hashval = HASHSEED;
3830Sstevel@tonic-gate 	for (i = stlen; i >= 0; i--) {
3840Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
3850Sstevel@tonic-gate 			str[i];			/* h = ((h * 33) + c) */
3860Sstevel@tonic-gate 	}
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
3890Sstevel@tonic-gate 	    sthash = sthash->hi_next) {
3900Sstevel@tonic-gate 		if (sthash->hi_hashval == hashval) {
3910Sstevel@tonic-gate 			const char	*hstr;
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 			hstr = &sthash->hi_mstr->sm_str[
3940Sstevel@tonic-gate 			    sthash->hi_mstr->sm_stlen -
3950Sstevel@tonic-gate 			    sthash->hi_stlen];
3960Sstevel@tonic-gate 			if (strcmp(str, hstr) == 0) {
3970Sstevel@tonic-gate 				break;
3980Sstevel@tonic-gate 			}
3990Sstevel@tonic-gate 		}
4000Sstevel@tonic-gate 	}
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	/*
4030Sstevel@tonic-gate 	 * Did we find the string?
4040Sstevel@tonic-gate 	 */
4050Sstevel@tonic-gate 	if (sthash == 0)
4060Sstevel@tonic-gate 		return (-1);
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	/*
4090Sstevel@tonic-gate 	 * Has this string been copied into the string table?
4100Sstevel@tonic-gate 	 */
4110Sstevel@tonic-gate 	mstr = sthash->hi_mstr;
4120Sstevel@tonic-gate 	if (mstr->sm_stoff == 0) {
4130Sstevel@tonic-gate 		uint_t	mstlen = mstr->sm_stlen + 1;
4140Sstevel@tonic-gate 		mstr->sm_stoff = stp->st_nextoff;
4150Sstevel@tonic-gate 		/*
4160Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
4170Sstevel@tonic-gate 		 */
4180Sstevel@tonic-gate 		if ((mstr->sm_stoff + mstlen) > stp->st_fullstringsize)
4190Sstevel@tonic-gate 			return (-1);
4200Sstevel@tonic-gate 		memcpy(stp->st_strbuf + mstr->sm_stoff, mstr->sm_str,
4210Sstevel@tonic-gate 			mstlen);
4220Sstevel@tonic-gate 		stp->st_nextoff += mstlen;
4230Sstevel@tonic-gate 	}
4240Sstevel@tonic-gate 	/*
4250Sstevel@tonic-gate 	 * Calculate offset of (sub)string
4260Sstevel@tonic-gate 	 */
4270Sstevel@tonic-gate 	*stoff = mstr->sm_stoff + mstr->sm_stlen - sthash->hi_stlen;
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 	return (0);
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate static int
4340Sstevel@tonic-gate st_hash_insert(Str_tbl *stp, const char *str, uint_t stlen)
4350Sstevel@tonic-gate {
4360Sstevel@tonic-gate 	int		i;
4370Sstevel@tonic-gate 	uint_t		hashval = HASHSEED;
4380Sstevel@tonic-gate 	uint_t		bckcnt = stp->st_hbckcnt;
4390Sstevel@tonic-gate 	Str_hash	**hashbcks = stp->st_hashbcks;
4400Sstevel@tonic-gate 	Str_hash	*sthash;
4410Sstevel@tonic-gate 	Str_master	*mstr = 0;
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 	/*
4440Sstevel@tonic-gate 	 * We use a classic 'Bernstein k=33' hash function.  But
4450Sstevel@tonic-gate 	 * instead of hashing from the start of the string to the
4460Sstevel@tonic-gate 	 * end, we do it in reverse.
4470Sstevel@tonic-gate 	 *
4480Sstevel@tonic-gate 	 * This way - we are essentially building all of the
4490Sstevel@tonic-gate 	 * suffix hashvalues as we go.  We can check to see if
4500Sstevel@tonic-gate 	 * any suffixes already exist in the tree as we generate
4510Sstevel@tonic-gate 	 * the hash.
4520Sstevel@tonic-gate 	 */
4530Sstevel@tonic-gate 	for (i = stlen; i >= 0; i--) {
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
4560Sstevel@tonic-gate 			str[i];			/* h = ((h * 33) + c) */
4570Sstevel@tonic-gate 		for (sthash = hashbcks[hashval % bckcnt];
4580Sstevel@tonic-gate 		    sthash; sthash = sthash->hi_next) {
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 			if (sthash->hi_hashval == hashval) {
4610Sstevel@tonic-gate 				const char	*hstr;
4620Sstevel@tonic-gate 				Str_master	*_mstr;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 				_mstr = sthash->hi_mstr;
4650Sstevel@tonic-gate 				hstr = &_mstr->sm_str[_mstr->sm_stlen -
4660Sstevel@tonic-gate 				    sthash->hi_stlen];
4670Sstevel@tonic-gate 				if (strcmp(&str[i], hstr) == 0) {
4680Sstevel@tonic-gate 					if (i == 0) {
4690Sstevel@tonic-gate 						/*
4700Sstevel@tonic-gate 						 * Entry already in table,
4710Sstevel@tonic-gate 						 * increment refcnt and get
4720Sstevel@tonic-gate 						 * out.
4730Sstevel@tonic-gate 						 */
4740Sstevel@tonic-gate 						sthash->hi_refcnt++;
4750Sstevel@tonic-gate 						return (0);
4760Sstevel@tonic-gate 					} else {
4770Sstevel@tonic-gate 						/*
4780Sstevel@tonic-gate 						 * If this 'suffix' is
4790Sstevel@tonic-gate 						 * presently a 'master' string,
4800Sstevel@tonic-gate 						 * then take over it's record.
4810Sstevel@tonic-gate 						 */
4820Sstevel@tonic-gate 						if (sthash->hi_stlen ==
4830Sstevel@tonic-gate 						    _mstr->sm_stlen) {
4840Sstevel@tonic-gate 							/*
4850Sstevel@tonic-gate 							 * we should only do
4860Sstevel@tonic-gate 							 * this once.
4870Sstevel@tonic-gate 							 */
4880Sstevel@tonic-gate 							assert(mstr == 0);
4890Sstevel@tonic-gate 							mstr = _mstr;
4900Sstevel@tonic-gate 						}
4910Sstevel@tonic-gate 					}
4920Sstevel@tonic-gate 				}
4930Sstevel@tonic-gate 			}
4940Sstevel@tonic-gate 		}
4950Sstevel@tonic-gate 	}
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 	/*
4990Sstevel@tonic-gate 	 * Do we need a new master string, or can we take over
5000Sstevel@tonic-gate 	 * one we already found in the table?
5010Sstevel@tonic-gate 	 */
5020Sstevel@tonic-gate 	if (mstr == 0) {
5030Sstevel@tonic-gate 		/*
5040Sstevel@tonic-gate 		 * allocate a new master string
5050Sstevel@tonic-gate 		 */
5060Sstevel@tonic-gate 		if ((mstr = calloc(sizeof (Str_hash), 1)) == 0)
5070Sstevel@tonic-gate 			return (-1);
5080Sstevel@tonic-gate 		mstr->sm_next = stp->st_mstrlist;
5090Sstevel@tonic-gate 		stp->st_mstrlist = mstr;
5100Sstevel@tonic-gate 		stp->st_stringsize += stlen + 1;
5110Sstevel@tonic-gate 	} else {
5120Sstevel@tonic-gate 		/*
5130Sstevel@tonic-gate 		 * We are taking over a existing master string,
5140Sstevel@tonic-gate 		 * the stringsize only increments by the
5150Sstevel@tonic-gate 		 * difference between the currnet string and the
5160Sstevel@tonic-gate 		 * previous master.
5170Sstevel@tonic-gate 		 */
5180Sstevel@tonic-gate 		assert(stlen > mstr->sm_stlen);
5190Sstevel@tonic-gate 		stp->st_stringsize += stlen - mstr->sm_stlen;
5200Sstevel@tonic-gate 	}
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 	if ((sthash = calloc(sizeof (Str_hash), 1)) == 0)
5230Sstevel@tonic-gate 		return (-1);
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 	mstr->sm_hashval = sthash->hi_hashval = hashval;
5260Sstevel@tonic-gate 	mstr->sm_stlen = sthash->hi_stlen = stlen;
5270Sstevel@tonic-gate 	mstr->sm_str = str;
5280Sstevel@tonic-gate 	sthash->hi_refcnt = 1;
5290Sstevel@tonic-gate 	sthash->hi_mstr = mstr;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 	/*
5320Sstevel@tonic-gate 	 * Insert string element into head of hash list
5330Sstevel@tonic-gate 	 */
5340Sstevel@tonic-gate 	hashval = hashval % bckcnt;
5350Sstevel@tonic-gate 	sthash->hi_next = hashbcks[hashval];
5360Sstevel@tonic-gate 	hashbcks[hashval] = sthash;
5370Sstevel@tonic-gate 	return (0);
5380Sstevel@tonic-gate }
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate /*
5410Sstevel@tonic-gate  * Return amount of space required for the string table.
5420Sstevel@tonic-gate  */
5430Sstevel@tonic-gate uint_t
5440Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp)
5450Sstevel@tonic-gate {
5460Sstevel@tonic-gate 	assert(stp->st_fullstringsize > 0);
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
5490Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
5500Sstevel@tonic-gate 		return (stp->st_fullstringsize);
5510Sstevel@tonic-gate 	}
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COOKED) == 0) {
5550Sstevel@tonic-gate 		Stringelem	*stelem;
5560Sstevel@tonic-gate 		void		*cookie;
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
5590Sstevel@tonic-gate 		/*
5600Sstevel@tonic-gate 		 * allocate a hash table about the size of # of
5610Sstevel@tonic-gate 		 * strings input.
5620Sstevel@tonic-gate 		 */
5630Sstevel@tonic-gate 		stp->st_hbckcnt = findprime(stp->st_stringcnt);
5640Sstevel@tonic-gate 		if ((stp->st_hashbcks =
5650Sstevel@tonic-gate 		    calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL)
5660Sstevel@tonic-gate 			return (0);
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 		/*
5690Sstevel@tonic-gate 		 * We now walk all of the strings in the list,
5700Sstevel@tonic-gate 		 * from shortest to longest, and insert them into
5710Sstevel@tonic-gate 		 * the hashtable.
5720Sstevel@tonic-gate 		 */
5730Sstevel@tonic-gate 		if ((stelem = avl_first(stp->st_strtree)) == NULL) {
5740Sstevel@tonic-gate 			/*
5750Sstevel@tonic-gate 			 * Is it possible we have a empty string table,
5760Sstevel@tonic-gate 			 * if so - the table still conains '\0'
5770Sstevel@tonic-gate 			 * so still return the size.
5780Sstevel@tonic-gate 			 */
5790Sstevel@tonic-gate 			if (avl_numnodes(stp->st_strtree) == 0) {
5800Sstevel@tonic-gate 				assert(stp->st_stringsize == 1);
5810Sstevel@tonic-gate 				return (stp->st_stringsize);
5820Sstevel@tonic-gate 			}
5830Sstevel@tonic-gate 			return (0);
5840Sstevel@tonic-gate 		}
5850Sstevel@tonic-gate 		while (stelem) {
5860Sstevel@tonic-gate 			Stringlist	*strlist, *pstrlist;
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate 			/*
5890Sstevel@tonic-gate 			 * Walk the string lists and insert them
5900Sstevel@tonic-gate 			 * into the hash list.  Once a string is
5910Sstevel@tonic-gate 			 * inserted we no longer need it's entry,
5920Sstevel@tonic-gate 			 * so free it
5930Sstevel@tonic-gate 			 */
5940Sstevel@tonic-gate 			for (strlist = stelem->se_strlist, pstrlist = 0;
5950Sstevel@tonic-gate 			    strlist; strlist = strlist->sl_next) {
5960Sstevel@tonic-gate 				if (st_hash_insert(stp, strlist->sl_string,
5970Sstevel@tonic-gate 				    stelem->se_stlen) == -1)
5980Sstevel@tonic-gate 					return (0);
5990Sstevel@tonic-gate 				if (pstrlist)
6000Sstevel@tonic-gate 					free(pstrlist);
6010Sstevel@tonic-gate 			}
6020Sstevel@tonic-gate 			free(pstrlist);
6030Sstevel@tonic-gate 			stelem->se_strlist = 0;
6040Sstevel@tonic-gate 			stelem = AVL_NEXT(stp->st_strtree, stelem);
6050Sstevel@tonic-gate 		}
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 		/*
6080Sstevel@tonic-gate 		 * Now that all of the strings have been freed,
6090Sstevel@tonic-gate 		 * go ahead and quickly re-walk the AVL tree and
6100Sstevel@tonic-gate 		 * free all of the AVL nodes.
6110Sstevel@tonic-gate 		 *
6120Sstevel@tonic-gate 		 * avl_destroy_nodes() beats avl_remove() because
6130Sstevel@tonic-gate 		 * avl_remove will 'ballance' the tree as nodes
6140Sstevel@tonic-gate 		 * are deleted - we just want to tear the whole
6150Sstevel@tonic-gate 		 * thing down now.
6160Sstevel@tonic-gate 		 */
6170Sstevel@tonic-gate 		cookie = NULL;
6180Sstevel@tonic-gate 		while ((stelem = avl_destroy_nodes(stp->st_strtree,
6190Sstevel@tonic-gate 		    &cookie)) != NULL)
6200Sstevel@tonic-gate 			free(stelem);
6210Sstevel@tonic-gate 		avl_destroy(stp->st_strtree);
6220Sstevel@tonic-gate 		free(stp->st_strtree);
6230Sstevel@tonic-gate 		stp->st_strtree = 0;
6240Sstevel@tonic-gate 	}
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	assert(stp->st_stringsize > 0);
6270Sstevel@tonic-gate 	assert(stp->st_fullstringsize >= stp->st_stringsize);
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate 	return (stp->st_stringsize);
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate /*
6330Sstevel@tonic-gate  * Associate a buffer with the string table.
6340Sstevel@tonic-gate  */
6350Sstevel@tonic-gate const char *
6360Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp)
6370Sstevel@tonic-gate {
6380Sstevel@tonic-gate 	return (stp->st_strbuf);
6390Sstevel@tonic-gate }
6400Sstevel@tonic-gate 
6410Sstevel@tonic-gate int
6420Sstevel@tonic-gate st_setstrbuf(Str_tbl *stp, char *stbuf, uint_t bufsize)
6430Sstevel@tonic-gate {
6440Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
6470Sstevel@tonic-gate 		if (bufsize < stp->st_fullstringsize)
6480Sstevel@tonic-gate 			return (-1);
6490Sstevel@tonic-gate 	} else {
6500Sstevel@tonic-gate 		if (bufsize < stp->st_stringsize)
6510Sstevel@tonic-gate 			return (-1);
6520Sstevel@tonic-gate 	}
6530Sstevel@tonic-gate 
6540Sstevel@tonic-gate 	stp->st_strbuf = stbuf;
6550Sstevel@tonic-gate #ifdef	DEBUG
6560Sstevel@tonic-gate 	/*
6570Sstevel@tonic-gate 	 * for debug builds - start with a stringtable filled in
6580Sstevel@tonic-gate 	 * with '0xff'.  This makes it very easy to find wholes
6590Sstevel@tonic-gate 	 * which we failed to fill in - in the strtab.
6600Sstevel@tonic-gate 	 */
6610Sstevel@tonic-gate 	memset(stbuf, 0xff, bufsize);
6620Sstevel@tonic-gate 	stbuf[0] = '\0';
6630Sstevel@tonic-gate #else
6640Sstevel@tonic-gate 	memset(stbuf, 0x0, bufsize);
6650Sstevel@tonic-gate #endif
6660Sstevel@tonic-gate 	return (0);
6670Sstevel@tonic-gate }
668