xref: /onnv-gate/usr/src/cmd/sgs/tools/common/string_table.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <string_table.h>
30*0Sstevel@tonic-gate #include <stdlib.h>
31*0Sstevel@tonic-gate #include <strings.h>
32*0Sstevel@tonic-gate #include <sgs.h>
33*0Sstevel@tonic-gate #include <stdio.h>
34*0Sstevel@tonic-gate 
35*0Sstevel@tonic-gate 
36*0Sstevel@tonic-gate 
37*0Sstevel@tonic-gate /*
38*0Sstevel@tonic-gate  * This file provides the interfaces to build a Str_tbl suitable
39*0Sstevel@tonic-gate  * for use by either the sgsmsg system or a standard ELF
40*0Sstevel@tonic-gate  * SHT_STRTAB.
41*0Sstevel@tonic-gate  *
42*0Sstevel@tonic-gate  * There are two modes which can be used when constructing a
43*0Sstevel@tonic-gate  * string table:
44*0Sstevel@tonic-gate  *
45*0Sstevel@tonic-gate  *	st_new(0)
46*0Sstevel@tonic-gate  *		standard string table - no compression.  This is the
47*0Sstevel@tonic-gate  *		traditional method and fast
48*0Sstevel@tonic-gate  *
49*0Sstevel@tonic-gate  *	st_new(FLG_STNEW_COMPRESS)
50*0Sstevel@tonic-gate  *		build a compressed string table which both
51*0Sstevel@tonic-gate  *		eliminates duplicate strings and permits
52*0Sstevel@tonic-gate  *		strings with common suffixes (atexit vs. exit) to
53*0Sstevel@tonic-gate  *		overlap in the table.  This provides space
54*0Sstevel@tonic-gate  *		savings for many string tables.
55*0Sstevel@tonic-gate  *
56*0Sstevel@tonic-gate  * These string tables are now built with a common interface in a
57*0Sstevel@tonic-gate  * two-pass manner, the first pass it to find all of the strings
58*0Sstevel@tonic-gate  * required for the string-table and to calculate the size that
59*0Sstevel@tonic-gate  * will be required for the final string table.
60*0Sstevel@tonic-gate  *
61*0Sstevel@tonic-gate  * The second pass allocates the string table and populates the
62*0Sstevel@tonic-gate  * strings into the table and returns the offsets the strings
63*0Sstevel@tonic-gate  * have been assigned.
64*0Sstevel@tonic-gate  *
65*0Sstevel@tonic-gate  * The calling sequence to build and populate a string table is:
66*0Sstevel@tonic-gate  *
67*0Sstevel@tonic-gate  *		st_new();		// initialize strtab
68*0Sstevel@tonic-gate  *
69*0Sstevel@tonic-gate  *		st_insert(st1);		// first pass of strings ...
70*0Sstevel@tonic-gate  *					// calculates size required for
71*0Sstevel@tonic-gate  *					// string table
72*0Sstevel@tonic-gate  *
73*0Sstevel@tonic-gate  *		st_delstring(st?);	// remove string previously
74*0Sstevel@tonic-gate  *					// inserted
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  *		st_insert(stN);
77*0Sstevel@tonic-gate  *
78*0Sstevel@tonic-gate  *		st_getstrtab_sz();	// freezes strtab and computes
79*0Sstevel@tonic-gate  *					// size of table.
80*0Sstevel@tonic-gate  *
81*0Sstevel@tonic-gate  *		st_setstrbuf();		// associates a final destination
82*0Sstevel@tonic-gate  *					// for the string table
83*0Sstevel@tonic-gate  *
84*0Sstevel@tonic-gate  *		st_setstring(st1);	// populate the string table
85*0Sstevel@tonic-gate  *		...			// offsets are based off of second
86*0Sstevel@tonic-gate  *					// pass	through the string table
87*0Sstevel@tonic-gate  *		st_setstring(stN);
88*0Sstevel@tonic-gate  *
89*0Sstevel@tonic-gate  *		st_destroy();		// tear down string table
90*0Sstevel@tonic-gate  *					// structures.
91*0Sstevel@tonic-gate  *
92*0Sstevel@tonic-gate  * String Suffix Compression Algorithm:
93*0Sstevel@tonic-gate  *
94*0Sstevel@tonic-gate  *   Here's a quick high level overview of the Suffix String
95*0Sstevel@tonic-gate  *   compression algorithm used.  First - the heart of the algorithm
96*0Sstevel@tonic-gate  *   is a Hash table list which represents a dictionary of all unique
97*0Sstevel@tonic-gate  *   strings inserted into the string table.  The hash function for
98*0Sstevel@tonic-gate  *   this table is a standard string hash except that the hash starts
99*0Sstevel@tonic-gate  *   at the last character in the string (&str[n - 1]) and works towards
100*0Sstevel@tonic-gate  *   the first character in the function (&str[0]).  As we compute the
101*0Sstevel@tonic-gate  *   HASH value for a given string, we also compute the hash values
102*0Sstevel@tonic-gate  *   for all of the possible suffix strings for that string.
103*0Sstevel@tonic-gate  *
104*0Sstevel@tonic-gate  *   As we compute the hash - at each character see if the current
105*0Sstevel@tonic-gate  *   suffix string for that hash is already present in the table.  If
106*0Sstevel@tonic-gate  *   it is, and the string is a master string.  Then change that
107*0Sstevel@tonic-gate  *   string to a suffix string of the new string being inserted.
108*0Sstevel@tonic-gate  *
109*0Sstevel@tonic-gate  *   When the final hash value is found (hash for str[0...n]), check
110*0Sstevel@tonic-gate  *   to see if it is in the hash table - if so increment the reference
111*0Sstevel@tonic-gate  *   count for the string.  If it is not yet in the table, insert a
112*0Sstevel@tonic-gate  *   new hash table entry for a master string.
113*0Sstevel@tonic-gate  *
114*0Sstevel@tonic-gate  *   The above method will find all suffixes of a given string given
115*0Sstevel@tonic-gate  *   that the strings are inserted from shortest to longest.  That is
116*0Sstevel@tonic-gate  *   why this is a two phase method, we first collect all of the
117*0Sstevel@tonic-gate  *   strings and store them based off of their length in a nice AVL tree.
118*0Sstevel@tonic-gate  *   Once all of the strings have been submitted we then start the
119*0Sstevel@tonic-gate  *   hash table build by traversing the AVL tree in order and
120*0Sstevel@tonic-gate  *   inserting the strings from shortest to longest as described
121*0Sstevel@tonic-gate  *   above.
122*0Sstevel@tonic-gate  *
123*0Sstevel@tonic-gate  */
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate /* LINTLIBRARY */
126*0Sstevel@tonic-gate 
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate int
129*0Sstevel@tonic-gate strlen_compare(const void *elem1, const void *elem2)
130*0Sstevel@tonic-gate {
131*0Sstevel@tonic-gate 	uint_t	l1, l2;
132*0Sstevel@tonic-gate 	l1 = ((Stringelem *)elem1)->se_stlen;
133*0Sstevel@tonic-gate 	l2 = ((Stringelem *)elem2)->se_stlen;
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate 	if (l1 == l2)
136*0Sstevel@tonic-gate 		return (0);
137*0Sstevel@tonic-gate 	if (l2 < l1)
138*0Sstevel@tonic-gate 		return (1);
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate 	return (-1);
141*0Sstevel@tonic-gate }
142*0Sstevel@tonic-gate 
143*0Sstevel@tonic-gate /*
144*0Sstevel@tonic-gate  * Return a initialized Str_tbl - returns NULL on failure.
145*0Sstevel@tonic-gate  *
146*0Sstevel@tonic-gate  * stflags:
147*0Sstevel@tonic-gate  *
148*0Sstevel@tonic-gate  *	FLG_STNEW_COMPRESS - build a compressed string table
149*0Sstevel@tonic-gate  *
150*0Sstevel@tonic-gate  */
151*0Sstevel@tonic-gate Str_tbl *
152*0Sstevel@tonic-gate st_new(uint_t stflags)
153*0Sstevel@tonic-gate {
154*0Sstevel@tonic-gate 	Str_tbl	*stp;
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate 	if ((stp = calloc(sizeof (Str_tbl), 1)) == 0)
157*0Sstevel@tonic-gate 		return (0);
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate 	/*
160*0Sstevel@tonic-gate 	 * Start with a leading '\0' - it's tradition.
161*0Sstevel@tonic-gate 	 */
162*0Sstevel@tonic-gate 	stp->st_stringsize = stp->st_fullstringsize = stp->st_nextoff = 1;
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate 	/*
165*0Sstevel@tonic-gate 	 * Do we compress this string table
166*0Sstevel@tonic-gate 	 */
167*0Sstevel@tonic-gate 	if ((stflags & FLG_STNEW_COMPRESS) == 0)
168*0Sstevel@tonic-gate 		return (stp);
169*0Sstevel@tonic-gate 
170*0Sstevel@tonic-gate 	stp->st_flags |= FLG_STTAB_COMPRESS;
171*0Sstevel@tonic-gate 	if ((stp->st_strtree = calloc(sizeof (avl_tree_t), 1)) == 0) {
172*0Sstevel@tonic-gate 		return (0);
173*0Sstevel@tonic-gate 	}
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate 	avl_create(stp->st_strtree, &strlen_compare, sizeof (Stringelem),
176*0Sstevel@tonic-gate 		SGSOFFSETOF(Stringelem, se_avlnode));
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 	return (stp);
179*0Sstevel@tonic-gate }
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate /*
182*0Sstevel@tonic-gate  * Tear down a String_Table structure.
183*0Sstevel@tonic-gate  */
184*0Sstevel@tonic-gate void
185*0Sstevel@tonic-gate st_destroy(Str_tbl *stp)
186*0Sstevel@tonic-gate {
187*0Sstevel@tonic-gate 	Str_hash	*sthash, *psthash;
188*0Sstevel@tonic-gate 	Str_master	*mstr, *pmstr;
189*0Sstevel@tonic-gate 	uint_t		i;
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 	/*
192*0Sstevel@tonic-gate 	 * cleanup the master strings
193*0Sstevel@tonic-gate 	 */
194*0Sstevel@tonic-gate 	for (mstr = stp->st_mstrlist, pmstr = 0; mstr;
195*0Sstevel@tonic-gate 	    mstr = mstr->sm_next) {
196*0Sstevel@tonic-gate 		if (pmstr)
197*0Sstevel@tonic-gate 			free(pmstr);
198*0Sstevel@tonic-gate 		pmstr = mstr;
199*0Sstevel@tonic-gate 	}
200*0Sstevel@tonic-gate 	if (pmstr)
201*0Sstevel@tonic-gate 		free(pmstr);
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate 	if (stp->st_hashbcks) {
204*0Sstevel@tonic-gate 		for (i = 0; i < stp->st_hbckcnt; i++) {
205*0Sstevel@tonic-gate 			for (sthash = stp->st_hashbcks[i], psthash = 0;
206*0Sstevel@tonic-gate 			    sthash; sthash = sthash->hi_next) {
207*0Sstevel@tonic-gate 				if (psthash)
208*0Sstevel@tonic-gate 					free(psthash);
209*0Sstevel@tonic-gate 				psthash = sthash;
210*0Sstevel@tonic-gate 			}
211*0Sstevel@tonic-gate 			if (psthash)
212*0Sstevel@tonic-gate 				free(psthash);
213*0Sstevel@tonic-gate 		}
214*0Sstevel@tonic-gate 		free(stp->st_hashbcks);
215*0Sstevel@tonic-gate 	}
216*0Sstevel@tonic-gate 	free(stp);
217*0Sstevel@tonic-gate }
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate 
220*0Sstevel@tonic-gate 
221*0Sstevel@tonic-gate 
222*0Sstevel@tonic-gate /*
223*0Sstevel@tonic-gate  * Remove a previously inserted string from the Str_tbl
224*0Sstevel@tonic-gate  */
225*0Sstevel@tonic-gate int
226*0Sstevel@tonic-gate st_delstring(Str_tbl *stp, const char *str)
227*0Sstevel@tonic-gate {
228*0Sstevel@tonic-gate 	uint_t		stlen;
229*0Sstevel@tonic-gate 	Stringelem	qstelem;
230*0Sstevel@tonic-gate 	Stringelem	*stelem;
231*0Sstevel@tonic-gate 	Stringlist	*stlist, *pstlist;
232*0Sstevel@tonic-gate 
233*0Sstevel@tonic-gate 	/*
234*0Sstevel@tonic-gate 	 * String table can't have been cooked
235*0Sstevel@tonic-gate 	 */
236*0Sstevel@tonic-gate 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
239*0Sstevel@tonic-gate 	stp->st_fullstringsize -= stlen + 1;
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
242*0Sstevel@tonic-gate 		return (0);
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 	qstelem.se_stlen = stlen;
245*0Sstevel@tonic-gate 	if ((stelem = avl_find(stp->st_strtree, &qstelem, 0)) == NULL) {
246*0Sstevel@tonic-gate 		/*
247*0Sstevel@tonic-gate 		 * no strings of this length recorded, let alone
248*0Sstevel@tonic-gate 		 * this specific string - someone goofed.
249*0Sstevel@tonic-gate 		 */
250*0Sstevel@tonic-gate 		return (-1);
251*0Sstevel@tonic-gate 	}
252*0Sstevel@tonic-gate 
253*0Sstevel@tonic-gate 	pstlist = 0;
254*0Sstevel@tonic-gate 	for (stlist = stelem->se_strlist; stlist; stlist = stlist->sl_next) {
255*0Sstevel@tonic-gate 		if (strcmp(str, stlist->sl_string) == 0)
256*0Sstevel@tonic-gate 			break;
257*0Sstevel@tonic-gate 		pstlist = stlist;
258*0Sstevel@tonic-gate 	}
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate 	if (stlist == 0) {
261*0Sstevel@tonic-gate 		/*
262*0Sstevel@tonic-gate 		 * string was not found
263*0Sstevel@tonic-gate 		 */
264*0Sstevel@tonic-gate 		return (-1);
265*0Sstevel@tonic-gate 	}
266*0Sstevel@tonic-gate 
267*0Sstevel@tonic-gate 	if (pstlist == 0) {
268*0Sstevel@tonic-gate 		/*
269*0Sstevel@tonic-gate 		 * String is first on list.
270*0Sstevel@tonic-gate 		 */
271*0Sstevel@tonic-gate 		stelem->se_strlist = stlist->sl_next;
272*0Sstevel@tonic-gate 	} else {
273*0Sstevel@tonic-gate 		/*
274*0Sstevel@tonic-gate 		 * remove string from list.
275*0Sstevel@tonic-gate 		 */
276*0Sstevel@tonic-gate 		pstlist->sl_next = stlist->sl_next;
277*0Sstevel@tonic-gate 	}
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate 	free(stlist);
280*0Sstevel@tonic-gate 	return (0);
281*0Sstevel@tonic-gate }
282*0Sstevel@tonic-gate 
283*0Sstevel@tonic-gate 
284*0Sstevel@tonic-gate /*
285*0Sstevel@tonic-gate  * Insert a new string into the Str_tbl
286*0Sstevel@tonic-gate  */
287*0Sstevel@tonic-gate int
288*0Sstevel@tonic-gate st_insert(Str_tbl *stp, const char *str)
289*0Sstevel@tonic-gate {
290*0Sstevel@tonic-gate 	uint_t	stlen;
291*0Sstevel@tonic-gate 	Stringelem	qstelem;
292*0Sstevel@tonic-gate 	Stringelem	*stelem;
293*0Sstevel@tonic-gate 	Stringlist	*strlist;
294*0Sstevel@tonic-gate 	avl_index_t	where;
295*0Sstevel@tonic-gate 
296*0Sstevel@tonic-gate 	/*
297*0Sstevel@tonic-gate 	 * String table can't have been cooked
298*0Sstevel@tonic-gate 	 */
299*0Sstevel@tonic-gate 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
300*0Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
301*0Sstevel@tonic-gate 	/*
302*0Sstevel@tonic-gate 	 * Null strings always point to the head of the string
303*0Sstevel@tonic-gate 	 * table - no reason to keep searching.
304*0Sstevel@tonic-gate 	 */
305*0Sstevel@tonic-gate 	if (stlen == 0)
306*0Sstevel@tonic-gate 		return (0);
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate 	stp->st_fullstringsize += stlen + 1;
309*0Sstevel@tonic-gate 	stp->st_stringcnt++;
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
312*0Sstevel@tonic-gate 		return (0);
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 	qstelem.se_stlen = strlen(str);
315*0Sstevel@tonic-gate 	if ((stelem = avl_find(stp->st_strtree, &qstelem,
316*0Sstevel@tonic-gate 	    &where)) == NULL) {
317*0Sstevel@tonic-gate 		if ((stelem = calloc(sizeof (Stringelem), 1)) == 0)
318*0Sstevel@tonic-gate 			return (-1);
319*0Sstevel@tonic-gate 		stelem->se_stlen = qstelem.se_stlen;
320*0Sstevel@tonic-gate 		avl_insert(stp->st_strtree, stelem, where);
321*0Sstevel@tonic-gate 	}
322*0Sstevel@tonic-gate 	if ((strlist = malloc(sizeof (Stringlist))) == 0)
323*0Sstevel@tonic-gate 		return (-1);
324*0Sstevel@tonic-gate 
325*0Sstevel@tonic-gate 	strlist->sl_string = str;
326*0Sstevel@tonic-gate 	strlist->sl_next = stelem->se_strlist;
327*0Sstevel@tonic-gate 	stelem->se_strlist = strlist;
328*0Sstevel@tonic-gate 
329*0Sstevel@tonic-gate 	return (0);
330*0Sstevel@tonic-gate }
331*0Sstevel@tonic-gate 
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate /*
334*0Sstevel@tonic-gate  * For a given string - copy it into the buffer associated with
335*0Sstevel@tonic-gate  * the string table - and return the offset it has been assigned.
336*0Sstevel@tonic-gate  *
337*0Sstevel@tonic-gate  * If a value of '-1' is returned - the string was not found in
338*0Sstevel@tonic-gate  * the Str_tbl.
339*0Sstevel@tonic-gate  */
340*0Sstevel@tonic-gate int
341*0Sstevel@tonic-gate st_setstring(Str_tbl *stp, const char *str, uint_t *stoff)
342*0Sstevel@tonic-gate {
343*0Sstevel@tonic-gate 	uint_t		stlen;
344*0Sstevel@tonic-gate 	uint_t		hashval;
345*0Sstevel@tonic-gate 	Str_hash	*sthash;
346*0Sstevel@tonic-gate 	Str_master	*mstr;
347*0Sstevel@tonic-gate 	int		i;
348*0Sstevel@tonic-gate 
349*0Sstevel@tonic-gate 	/*
350*0Sstevel@tonic-gate 	 * String table *must* have been previously cooked
351*0Sstevel@tonic-gate 	 */
352*0Sstevel@tonic-gate 	assert(stp->st_strbuf);
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
355*0Sstevel@tonic-gate 	stlen = (uint_t)strlen(str);
356*0Sstevel@tonic-gate 	/*
357*0Sstevel@tonic-gate 	 * Null string always points to head of string table
358*0Sstevel@tonic-gate 	 */
359*0Sstevel@tonic-gate 	if (stlen == 0) {
360*0Sstevel@tonic-gate 		*stoff = 0;
361*0Sstevel@tonic-gate 		return (0);
362*0Sstevel@tonic-gate 	}
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
365*0Sstevel@tonic-gate 		uint_t		_stoff;
366*0Sstevel@tonic-gate 
367*0Sstevel@tonic-gate 		stlen++;	/* count for trailing '\0' */
368*0Sstevel@tonic-gate 		_stoff = stp->st_nextoff;
369*0Sstevel@tonic-gate 		/*
370*0Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
371*0Sstevel@tonic-gate 		 */
372*0Sstevel@tonic-gate 		if ((_stoff + stlen) > stp->st_fullstringsize)
373*0Sstevel@tonic-gate 			return (-1);
374*0Sstevel@tonic-gate 		memcpy(stp->st_strbuf + _stoff, str, stlen);
375*0Sstevel@tonic-gate 		*stoff = _stoff;
376*0Sstevel@tonic-gate 		stp->st_nextoff += stlen;
377*0Sstevel@tonic-gate 		return (0);
378*0Sstevel@tonic-gate 	}
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	/*
381*0Sstevel@tonic-gate 	 * Calculate reverse hash for string
382*0Sstevel@tonic-gate 	 */
383*0Sstevel@tonic-gate 	hashval = HASHSEED;
384*0Sstevel@tonic-gate 	for (i = stlen; i >= 0; i--) {
385*0Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
386*0Sstevel@tonic-gate 			str[i];			/* h = ((h * 33) + c) */
387*0Sstevel@tonic-gate 	}
388*0Sstevel@tonic-gate 
389*0Sstevel@tonic-gate 	for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
390*0Sstevel@tonic-gate 	    sthash = sthash->hi_next) {
391*0Sstevel@tonic-gate 		if (sthash->hi_hashval == hashval) {
392*0Sstevel@tonic-gate 			const char	*hstr;
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate 			hstr = &sthash->hi_mstr->sm_str[
395*0Sstevel@tonic-gate 			    sthash->hi_mstr->sm_stlen -
396*0Sstevel@tonic-gate 			    sthash->hi_stlen];
397*0Sstevel@tonic-gate 			if (strcmp(str, hstr) == 0) {
398*0Sstevel@tonic-gate 				break;
399*0Sstevel@tonic-gate 			}
400*0Sstevel@tonic-gate 		}
401*0Sstevel@tonic-gate 	}
402*0Sstevel@tonic-gate 
403*0Sstevel@tonic-gate 	/*
404*0Sstevel@tonic-gate 	 * Did we find the string?
405*0Sstevel@tonic-gate 	 */
406*0Sstevel@tonic-gate 	if (sthash == 0)
407*0Sstevel@tonic-gate 		return (-1);
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate 	/*
410*0Sstevel@tonic-gate 	 * Has this string been copied into the string table?
411*0Sstevel@tonic-gate 	 */
412*0Sstevel@tonic-gate 	mstr = sthash->hi_mstr;
413*0Sstevel@tonic-gate 	if (mstr->sm_stoff == 0) {
414*0Sstevel@tonic-gate 		uint_t	mstlen = mstr->sm_stlen + 1;
415*0Sstevel@tonic-gate 		mstr->sm_stoff = stp->st_nextoff;
416*0Sstevel@tonic-gate 		/*
417*0Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
418*0Sstevel@tonic-gate 		 */
419*0Sstevel@tonic-gate 		if ((mstr->sm_stoff + mstlen) > stp->st_fullstringsize)
420*0Sstevel@tonic-gate 			return (-1);
421*0Sstevel@tonic-gate 		memcpy(stp->st_strbuf + mstr->sm_stoff, mstr->sm_str,
422*0Sstevel@tonic-gate 			mstlen);
423*0Sstevel@tonic-gate 		stp->st_nextoff += mstlen;
424*0Sstevel@tonic-gate 	}
425*0Sstevel@tonic-gate 	/*
426*0Sstevel@tonic-gate 	 * Calculate offset of (sub)string
427*0Sstevel@tonic-gate 	 */
428*0Sstevel@tonic-gate 	*stoff = mstr->sm_stoff + mstr->sm_stlen - sthash->hi_stlen;
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 	return (0);
431*0Sstevel@tonic-gate }
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate static int
435*0Sstevel@tonic-gate st_hash_insert(Str_tbl *stp, const char *str, uint_t stlen)
436*0Sstevel@tonic-gate {
437*0Sstevel@tonic-gate 	int		i;
438*0Sstevel@tonic-gate 	uint_t		hashval = HASHSEED;
439*0Sstevel@tonic-gate 	uint_t		bckcnt = stp->st_hbckcnt;
440*0Sstevel@tonic-gate 	Str_hash	**hashbcks = stp->st_hashbcks;
441*0Sstevel@tonic-gate 	Str_hash	*sthash;
442*0Sstevel@tonic-gate 	Str_master	*mstr = 0;
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 	/*
445*0Sstevel@tonic-gate 	 * We use a classic 'Bernstein k=33' hash function.  But
446*0Sstevel@tonic-gate 	 * instead of hashing from the start of the string to the
447*0Sstevel@tonic-gate 	 * end, we do it in reverse.
448*0Sstevel@tonic-gate 	 *
449*0Sstevel@tonic-gate 	 * This way - we are essentially building all of the
450*0Sstevel@tonic-gate 	 * suffix hashvalues as we go.  We can check to see if
451*0Sstevel@tonic-gate 	 * any suffixes already exist in the tree as we generate
452*0Sstevel@tonic-gate 	 * the hash.
453*0Sstevel@tonic-gate 	 */
454*0Sstevel@tonic-gate 	for (i = stlen; i >= 0; i--) {
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
457*0Sstevel@tonic-gate 			str[i];			/* h = ((h * 33) + c) */
458*0Sstevel@tonic-gate 		for (sthash = hashbcks[hashval % bckcnt];
459*0Sstevel@tonic-gate 		    sthash; sthash = sthash->hi_next) {
460*0Sstevel@tonic-gate 
461*0Sstevel@tonic-gate 			if (sthash->hi_hashval == hashval) {
462*0Sstevel@tonic-gate 				const char	*hstr;
463*0Sstevel@tonic-gate 				Str_master	*_mstr;
464*0Sstevel@tonic-gate 
465*0Sstevel@tonic-gate 				_mstr = sthash->hi_mstr;
466*0Sstevel@tonic-gate 				hstr = &_mstr->sm_str[_mstr->sm_stlen -
467*0Sstevel@tonic-gate 				    sthash->hi_stlen];
468*0Sstevel@tonic-gate 				if (strcmp(&str[i], hstr) == 0) {
469*0Sstevel@tonic-gate 					if (i == 0) {
470*0Sstevel@tonic-gate 						/*
471*0Sstevel@tonic-gate 						 * Entry already in table,
472*0Sstevel@tonic-gate 						 * increment refcnt and get
473*0Sstevel@tonic-gate 						 * out.
474*0Sstevel@tonic-gate 						 */
475*0Sstevel@tonic-gate 						sthash->hi_refcnt++;
476*0Sstevel@tonic-gate 						return (0);
477*0Sstevel@tonic-gate 					} else {
478*0Sstevel@tonic-gate 						/*
479*0Sstevel@tonic-gate 						 * If this 'suffix' is
480*0Sstevel@tonic-gate 						 * presently a 'master' string,
481*0Sstevel@tonic-gate 						 * then take over it's record.
482*0Sstevel@tonic-gate 						 */
483*0Sstevel@tonic-gate 						if (sthash->hi_stlen ==
484*0Sstevel@tonic-gate 						    _mstr->sm_stlen) {
485*0Sstevel@tonic-gate 							/*
486*0Sstevel@tonic-gate 							 * we should only do
487*0Sstevel@tonic-gate 							 * this once.
488*0Sstevel@tonic-gate 							 */
489*0Sstevel@tonic-gate 							assert(mstr == 0);
490*0Sstevel@tonic-gate 							mstr = _mstr;
491*0Sstevel@tonic-gate 						}
492*0Sstevel@tonic-gate 					}
493*0Sstevel@tonic-gate 				}
494*0Sstevel@tonic-gate 			}
495*0Sstevel@tonic-gate 		}
496*0Sstevel@tonic-gate 	}
497*0Sstevel@tonic-gate 
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 	/*
500*0Sstevel@tonic-gate 	 * Do we need a new master string, or can we take over
501*0Sstevel@tonic-gate 	 * one we already found in the table?
502*0Sstevel@tonic-gate 	 */
503*0Sstevel@tonic-gate 	if (mstr == 0) {
504*0Sstevel@tonic-gate 		/*
505*0Sstevel@tonic-gate 		 * allocate a new master string
506*0Sstevel@tonic-gate 		 */
507*0Sstevel@tonic-gate 		if ((mstr = calloc(sizeof (Str_hash), 1)) == 0)
508*0Sstevel@tonic-gate 			return (-1);
509*0Sstevel@tonic-gate 		mstr->sm_next = stp->st_mstrlist;
510*0Sstevel@tonic-gate 		stp->st_mstrlist = mstr;
511*0Sstevel@tonic-gate 		stp->st_stringsize += stlen + 1;
512*0Sstevel@tonic-gate 	} else {
513*0Sstevel@tonic-gate 		/*
514*0Sstevel@tonic-gate 		 * We are taking over a existing master string,
515*0Sstevel@tonic-gate 		 * the stringsize only increments by the
516*0Sstevel@tonic-gate 		 * difference between the currnet string and the
517*0Sstevel@tonic-gate 		 * previous master.
518*0Sstevel@tonic-gate 		 */
519*0Sstevel@tonic-gate 		assert(stlen > mstr->sm_stlen);
520*0Sstevel@tonic-gate 		stp->st_stringsize += stlen - mstr->sm_stlen;
521*0Sstevel@tonic-gate 	}
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate 	if ((sthash = calloc(sizeof (Str_hash), 1)) == 0)
524*0Sstevel@tonic-gate 		return (-1);
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate 	mstr->sm_hashval = sthash->hi_hashval = hashval;
527*0Sstevel@tonic-gate 	mstr->sm_stlen = sthash->hi_stlen = stlen;
528*0Sstevel@tonic-gate 	mstr->sm_str = str;
529*0Sstevel@tonic-gate 	sthash->hi_refcnt = 1;
530*0Sstevel@tonic-gate 	sthash->hi_mstr = mstr;
531*0Sstevel@tonic-gate 
532*0Sstevel@tonic-gate 	/*
533*0Sstevel@tonic-gate 	 * Insert string element into head of hash list
534*0Sstevel@tonic-gate 	 */
535*0Sstevel@tonic-gate 	hashval = hashval % bckcnt;
536*0Sstevel@tonic-gate 	sthash->hi_next = hashbcks[hashval];
537*0Sstevel@tonic-gate 	hashbcks[hashval] = sthash;
538*0Sstevel@tonic-gate 	return (0);
539*0Sstevel@tonic-gate }
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate /*
542*0Sstevel@tonic-gate  * Return amount of space required for the string table.
543*0Sstevel@tonic-gate  */
544*0Sstevel@tonic-gate uint_t
545*0Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp)
546*0Sstevel@tonic-gate {
547*0Sstevel@tonic-gate 	assert(stp->st_fullstringsize > 0);
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
550*0Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
551*0Sstevel@tonic-gate 		return (stp->st_fullstringsize);
552*0Sstevel@tonic-gate 	}
553*0Sstevel@tonic-gate 
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COOKED) == 0) {
556*0Sstevel@tonic-gate 		Stringelem	*stelem;
557*0Sstevel@tonic-gate 		void		*cookie;
558*0Sstevel@tonic-gate 
559*0Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
560*0Sstevel@tonic-gate 		/*
561*0Sstevel@tonic-gate 		 * allocate a hash table about the size of # of
562*0Sstevel@tonic-gate 		 * strings input.
563*0Sstevel@tonic-gate 		 */
564*0Sstevel@tonic-gate 		stp->st_hbckcnt = findprime(stp->st_stringcnt);
565*0Sstevel@tonic-gate 		if ((stp->st_hashbcks =
566*0Sstevel@tonic-gate 		    calloc(sizeof (Str_hash), stp->st_hbckcnt)) == NULL)
567*0Sstevel@tonic-gate 			return (0);
568*0Sstevel@tonic-gate 
569*0Sstevel@tonic-gate 		/*
570*0Sstevel@tonic-gate 		 * We now walk all of the strings in the list,
571*0Sstevel@tonic-gate 		 * from shortest to longest, and insert them into
572*0Sstevel@tonic-gate 		 * the hashtable.
573*0Sstevel@tonic-gate 		 */
574*0Sstevel@tonic-gate 		if ((stelem = avl_first(stp->st_strtree)) == NULL) {
575*0Sstevel@tonic-gate 			/*
576*0Sstevel@tonic-gate 			 * Is it possible we have a empty string table,
577*0Sstevel@tonic-gate 			 * if so - the table still conains '\0'
578*0Sstevel@tonic-gate 			 * so still return the size.
579*0Sstevel@tonic-gate 			 */
580*0Sstevel@tonic-gate 			if (avl_numnodes(stp->st_strtree) == 0) {
581*0Sstevel@tonic-gate 				assert(stp->st_stringsize == 1);
582*0Sstevel@tonic-gate 				return (stp->st_stringsize);
583*0Sstevel@tonic-gate 			}
584*0Sstevel@tonic-gate 			return (0);
585*0Sstevel@tonic-gate 		}
586*0Sstevel@tonic-gate 		while (stelem) {
587*0Sstevel@tonic-gate 			Stringlist	*strlist, *pstrlist;
588*0Sstevel@tonic-gate 
589*0Sstevel@tonic-gate 			/*
590*0Sstevel@tonic-gate 			 * Walk the string lists and insert them
591*0Sstevel@tonic-gate 			 * into the hash list.  Once a string is
592*0Sstevel@tonic-gate 			 * inserted we no longer need it's entry,
593*0Sstevel@tonic-gate 			 * so free it
594*0Sstevel@tonic-gate 			 */
595*0Sstevel@tonic-gate 			for (strlist = stelem->se_strlist, pstrlist = 0;
596*0Sstevel@tonic-gate 			    strlist; strlist = strlist->sl_next) {
597*0Sstevel@tonic-gate 				if (st_hash_insert(stp, strlist->sl_string,
598*0Sstevel@tonic-gate 				    stelem->se_stlen) == -1)
599*0Sstevel@tonic-gate 					return (0);
600*0Sstevel@tonic-gate 				if (pstrlist)
601*0Sstevel@tonic-gate 					free(pstrlist);
602*0Sstevel@tonic-gate 			}
603*0Sstevel@tonic-gate 			free(pstrlist);
604*0Sstevel@tonic-gate 			stelem->se_strlist = 0;
605*0Sstevel@tonic-gate 			stelem = AVL_NEXT(stp->st_strtree, stelem);
606*0Sstevel@tonic-gate 		}
607*0Sstevel@tonic-gate 
608*0Sstevel@tonic-gate 		/*
609*0Sstevel@tonic-gate 		 * Now that all of the strings have been freed,
610*0Sstevel@tonic-gate 		 * go ahead and quickly re-walk the AVL tree and
611*0Sstevel@tonic-gate 		 * free all of the AVL nodes.
612*0Sstevel@tonic-gate 		 *
613*0Sstevel@tonic-gate 		 * avl_destroy_nodes() beats avl_remove() because
614*0Sstevel@tonic-gate 		 * avl_remove will 'ballance' the tree as nodes
615*0Sstevel@tonic-gate 		 * are deleted - we just want to tear the whole
616*0Sstevel@tonic-gate 		 * thing down now.
617*0Sstevel@tonic-gate 		 */
618*0Sstevel@tonic-gate 		cookie = NULL;
619*0Sstevel@tonic-gate 		while ((stelem = avl_destroy_nodes(stp->st_strtree,
620*0Sstevel@tonic-gate 		    &cookie)) != NULL)
621*0Sstevel@tonic-gate 			free(stelem);
622*0Sstevel@tonic-gate 		avl_destroy(stp->st_strtree);
623*0Sstevel@tonic-gate 		free(stp->st_strtree);
624*0Sstevel@tonic-gate 		stp->st_strtree = 0;
625*0Sstevel@tonic-gate 	}
626*0Sstevel@tonic-gate 
627*0Sstevel@tonic-gate 	assert(stp->st_stringsize > 0);
628*0Sstevel@tonic-gate 	assert(stp->st_fullstringsize >= stp->st_stringsize);
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate 	return (stp->st_stringsize);
631*0Sstevel@tonic-gate }
632*0Sstevel@tonic-gate 
633*0Sstevel@tonic-gate /*
634*0Sstevel@tonic-gate  * Associate a buffer with the string table.
635*0Sstevel@tonic-gate  */
636*0Sstevel@tonic-gate const char *
637*0Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp)
638*0Sstevel@tonic-gate {
639*0Sstevel@tonic-gate 	return (stp->st_strbuf);
640*0Sstevel@tonic-gate }
641*0Sstevel@tonic-gate 
642*0Sstevel@tonic-gate int
643*0Sstevel@tonic-gate st_setstrbuf(Str_tbl *stp, char *stbuf, uint_t bufsize)
644*0Sstevel@tonic-gate {
645*0Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
646*0Sstevel@tonic-gate 
647*0Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
648*0Sstevel@tonic-gate 		if (bufsize < stp->st_fullstringsize)
649*0Sstevel@tonic-gate 			return (-1);
650*0Sstevel@tonic-gate 	} else {
651*0Sstevel@tonic-gate 		if (bufsize < stp->st_stringsize)
652*0Sstevel@tonic-gate 			return (-1);
653*0Sstevel@tonic-gate 	}
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 	stp->st_strbuf = stbuf;
656*0Sstevel@tonic-gate #ifdef	DEBUG
657*0Sstevel@tonic-gate 	/*
658*0Sstevel@tonic-gate 	 * for debug builds - start with a stringtable filled in
659*0Sstevel@tonic-gate 	 * with '0xff'.  This makes it very easy to find wholes
660*0Sstevel@tonic-gate 	 * which we failed to fill in - in the strtab.
661*0Sstevel@tonic-gate 	 */
662*0Sstevel@tonic-gate 	memset(stbuf, 0xff, bufsize);
663*0Sstevel@tonic-gate 	stbuf[0] = '\0';
664*0Sstevel@tonic-gate #else
665*0Sstevel@tonic-gate 	memset(stbuf, 0x0, bufsize);
666*0Sstevel@tonic-gate #endif
667*0Sstevel@tonic-gate 	return (0);
668*0Sstevel@tonic-gate }
669