xref: /onnv-gate/usr/src/common/openssl/doc/crypto/lhash.pod (revision 2175:b0b2f052a486)
1*2175Sjp161948=pod
2*2175Sjp161948
3*2175Sjp161948=head1 NAME
4*2175Sjp161948
5*2175Sjp161948lh_new, lh_free, lh_insert, lh_delete, lh_retrieve, lh_doall, lh_doall_arg, lh_error - dynamic hash table
6*2175Sjp161948
7*2175Sjp161948=head1 SYNOPSIS
8*2175Sjp161948
9*2175Sjp161948 #include <openssl/lhash.h>
10*2175Sjp161948
11*2175Sjp161948 LHASH *lh_new(LHASH_HASH_FN_TYPE hash, LHASH_COMP_FN_TYPE compare);
12*2175Sjp161948 void lh_free(LHASH *table);
13*2175Sjp161948
14*2175Sjp161948 void *lh_insert(LHASH *table, void *data);
15*2175Sjp161948 void *lh_delete(LHASH *table, void *data);
16*2175Sjp161948 void *lh_retrieve(LHASH *table, void *data);
17*2175Sjp161948
18*2175Sjp161948 void lh_doall(LHASH *table, LHASH_DOALL_FN_TYPE func);
19*2175Sjp161948 void lh_doall_arg(LHASH *table, LHASH_DOALL_ARG_FN_TYPE func,
20*2175Sjp161948          void *arg);
21*2175Sjp161948
22*2175Sjp161948 int lh_error(LHASH *table);
23*2175Sjp161948
24*2175Sjp161948 typedef int (*LHASH_COMP_FN_TYPE)(const void *, const void *);
25*2175Sjp161948 typedef unsigned long (*LHASH_HASH_FN_TYPE)(const void *);
26*2175Sjp161948 typedef void (*LHASH_DOALL_FN_TYPE)(const void *);
27*2175Sjp161948 typedef void (*LHASH_DOALL_ARG_FN_TYPE)(const void *, const void *);
28*2175Sjp161948
29*2175Sjp161948=head1 DESCRIPTION
30*2175Sjp161948
31*2175Sjp161948This library implements dynamic hash tables. The hash table entries
32*2175Sjp161948can be arbitrary structures. Usually they consist of key and value
33*2175Sjp161948fields.
34*2175Sjp161948
35*2175Sjp161948lh_new() creates a new B<LHASH> structure to store arbitrary data
36*2175Sjp161948entries, and provides the 'hash' and 'compare' callbacks to be used in
37*2175Sjp161948organising the table's entries.  The B<hash> callback takes a pointer
38*2175Sjp161948to a table entry as its argument and returns an unsigned long hash
39*2175Sjp161948value for its key field.  The hash value is normally truncated to a
40*2175Sjp161948power of 2, so make sure that your hash function returns well mixed
41*2175Sjp161948low order bits.  The B<compare> callback takes two arguments (pointers
42*2175Sjp161948to two hash table entries), and returns 0 if their keys are equal,
43*2175Sjp161948non-zero otherwise.  If your hash table will contain items of some
44*2175Sjp161948particular type and the B<hash> and B<compare> callbacks hash/compare
45*2175Sjp161948these types, then the B<DECLARE_LHASH_HASH_FN> and
46*2175Sjp161948B<IMPLEMENT_LHASH_COMP_FN> macros can be used to create callback
47*2175Sjp161948wrappers of the prototypes required by lh_new().  These provide
48*2175Sjp161948per-variable casts before calling the type-specific callbacks written
49*2175Sjp161948by the application author.  These macros, as well as those used for
50*2175Sjp161948the "doall" callbacks, are defined as;
51*2175Sjp161948
52*2175Sjp161948 #define DECLARE_LHASH_HASH_FN(f_name,o_type) \
53*2175Sjp161948         unsigned long f_name##_LHASH_HASH(const void *);
54*2175Sjp161948 #define IMPLEMENT_LHASH_HASH_FN(f_name,o_type) \
55*2175Sjp161948         unsigned long f_name##_LHASH_HASH(const void *arg) { \
56*2175Sjp161948                 o_type a = (o_type)arg; \
57*2175Sjp161948                 return f_name(a); }
58*2175Sjp161948 #define LHASH_HASH_FN(f_name) f_name##_LHASH_HASH
59*2175Sjp161948
60*2175Sjp161948 #define DECLARE_LHASH_COMP_FN(f_name,o_type) \
61*2175Sjp161948         int f_name##_LHASH_COMP(const void *, const void *);
62*2175Sjp161948 #define IMPLEMENT_LHASH_COMP_FN(f_name,o_type) \
63*2175Sjp161948         int f_name##_LHASH_COMP(const void *arg1, const void *arg2) { \
64*2175Sjp161948                 o_type a = (o_type)arg1; \
65*2175Sjp161948                 o_type b = (o_type)arg2; \
66*2175Sjp161948                 return f_name(a,b); }
67*2175Sjp161948 #define LHASH_COMP_FN(f_name) f_name##_LHASH_COMP
68*2175Sjp161948
69*2175Sjp161948 #define DECLARE_LHASH_DOALL_FN(f_name,o_type) \
70*2175Sjp161948         void f_name##_LHASH_DOALL(const void *);
71*2175Sjp161948 #define IMPLEMENT_LHASH_DOALL_FN(f_name,o_type) \
72*2175Sjp161948         void f_name##_LHASH_DOALL(const void *arg) { \
73*2175Sjp161948                 o_type a = (o_type)arg; \
74*2175Sjp161948                 f_name(a); }
75*2175Sjp161948 #define LHASH_DOALL_FN(f_name) f_name##_LHASH_DOALL
76*2175Sjp161948
77*2175Sjp161948 #define DECLARE_LHASH_DOALL_ARG_FN(f_name,o_type,a_type) \
78*2175Sjp161948         void f_name##_LHASH_DOALL_ARG(const void *, const void *);
79*2175Sjp161948 #define IMPLEMENT_LHASH_DOALL_ARG_FN(f_name,o_type,a_type) \
80*2175Sjp161948         void f_name##_LHASH_DOALL_ARG(const void *arg1, const void *arg2) { \
81*2175Sjp161948                 o_type a = (o_type)arg1; \
82*2175Sjp161948                 a_type b = (a_type)arg2; \
83*2175Sjp161948                 f_name(a,b); }
84*2175Sjp161948 #define LHASH_DOALL_ARG_FN(f_name) f_name##_LHASH_DOALL_ARG
85*2175Sjp161948
86*2175Sjp161948An example of a hash table storing (pointers to) structures of type 'STUFF'
87*2175Sjp161948could be defined as follows;
88*2175Sjp161948
89*2175Sjp161948 /* Calculates the hash value of 'tohash' (implemented elsewhere) */
90*2175Sjp161948 unsigned long STUFF_hash(const STUFF *tohash);
91*2175Sjp161948 /* Orders 'arg1' and 'arg2' (implemented elsewhere) */
92*2175Sjp161948 int STUFF_cmp(const STUFF *arg1, const STUFF *arg2);
93*2175Sjp161948 /* Create the type-safe wrapper functions for use in the LHASH internals */
94*2175Sjp161948 static IMPLEMENT_LHASH_HASH_FN(STUFF_hash, const STUFF *)
95*2175Sjp161948 static IMPLEMENT_LHASH_COMP_FN(STUFF_cmp, const STUFF *);
96*2175Sjp161948 /* ... */
97*2175Sjp161948 int main(int argc, char *argv[]) {
98*2175Sjp161948         /* Create the new hash table using the hash/compare wrappers */
99*2175Sjp161948         LHASH *hashtable = lh_new(LHASH_HASH_FN(STUFF_hash),
100*2175Sjp161948                                   LHASH_COMP_FN(STUFF_cmp));
101*2175Sjp161948	 /* ... */
102*2175Sjp161948 }
103*2175Sjp161948
104*2175Sjp161948lh_free() frees the B<LHASH> structure B<table>. Allocated hash table
105*2175Sjp161948entries will not be freed; consider using lh_doall() to deallocate any
106*2175Sjp161948remaining entries in the hash table (see below).
107*2175Sjp161948
108*2175Sjp161948lh_insert() inserts the structure pointed to by B<data> into B<table>.
109*2175Sjp161948If there already is an entry with the same key, the old value is
110*2175Sjp161948replaced. Note that lh_insert() stores pointers, the data are not
111*2175Sjp161948copied.
112*2175Sjp161948
113*2175Sjp161948lh_delete() deletes an entry from B<table>.
114*2175Sjp161948
115*2175Sjp161948lh_retrieve() looks up an entry in B<table>. Normally, B<data> is
116*2175Sjp161948a structure with the key field(s) set; the function will return a
117*2175Sjp161948pointer to a fully populated structure.
118*2175Sjp161948
119*2175Sjp161948lh_doall() will, for every entry in the hash table, call B<func> with
120*2175Sjp161948the data item as its parameter.  For lh_doall() and lh_doall_arg(),
121*2175Sjp161948function pointer casting should be avoided in the callbacks (see
122*2175Sjp161948B<NOTE>) - instead, either declare the callbacks to match the
123*2175Sjp161948prototype required in lh_new() or use the declare/implement macros to
124*2175Sjp161948create type-safe wrappers that cast variables prior to calling your
125*2175Sjp161948type-specific callbacks.  An example of this is illustrated here where
126*2175Sjp161948the callback is used to cleanup resources for items in the hash table
127*2175Sjp161948prior to the hashtable itself being deallocated:
128*2175Sjp161948
129*2175Sjp161948 /* Cleans up resources belonging to 'a' (this is implemented elsewhere) */
130*2175Sjp161948 void STUFF_cleanup(STUFF *a);
131*2175Sjp161948 /* Implement a prototype-compatible wrapper for "STUFF_cleanup" */
132*2175Sjp161948 IMPLEMENT_LHASH_DOALL_FN(STUFF_cleanup, STUFF *)
133*2175Sjp161948         /* ... then later in the code ... */
134*2175Sjp161948 /* So to run "STUFF_cleanup" against all items in a hash table ... */
135*2175Sjp161948 lh_doall(hashtable, LHASH_DOALL_FN(STUFF_cleanup));
136*2175Sjp161948 /* Then the hash table itself can be deallocated */
137*2175Sjp161948 lh_free(hashtable);
138*2175Sjp161948
139*2175Sjp161948When doing this, be careful if you delete entries from the hash table
140*2175Sjp161948in your callbacks: the table may decrease in size, moving the item
141*2175Sjp161948that you are currently on down lower in the hash table - this could
142*2175Sjp161948cause some entries to be skipped during the iteration.  The second
143*2175Sjp161948best solution to this problem is to set hash-E<gt>down_load=0 before
144*2175Sjp161948you start (which will stop the hash table ever decreasing in size).
145*2175Sjp161948The best solution is probably to avoid deleting items from the hash
146*2175Sjp161948table inside a "doall" callback!
147*2175Sjp161948
148*2175Sjp161948lh_doall_arg() is the same as lh_doall() except that B<func> will be
149*2175Sjp161948called with B<arg> as the second argument and B<func> should be of
150*2175Sjp161948type B<LHASH_DOALL_ARG_FN_TYPE> (a callback prototype that is passed
151*2175Sjp161948both the table entry and an extra argument).  As with lh_doall(), you
152*2175Sjp161948can instead choose to declare your callback with a prototype matching
153*2175Sjp161948the types you are dealing with and use the declare/implement macros to
154*2175Sjp161948create compatible wrappers that cast variables before calling your
155*2175Sjp161948type-specific callbacks.  An example of this is demonstrated here
156*2175Sjp161948(printing all hash table entries to a BIO that is provided by the
157*2175Sjp161948caller):
158*2175Sjp161948
159*2175Sjp161948 /* Prints item 'a' to 'output_bio' (this is implemented elsewhere) */
160*2175Sjp161948 void STUFF_print(const STUFF *a, BIO *output_bio);
161*2175Sjp161948 /* Implement a prototype-compatible wrapper for "STUFF_print" */
162*2175Sjp161948 static IMPLEMENT_LHASH_DOALL_ARG_FN(STUFF_print, const STUFF *, BIO *)
163*2175Sjp161948         /* ... then later in the code ... */
164*2175Sjp161948 /* Print out the entire hashtable to a particular BIO */
165*2175Sjp161948 lh_doall_arg(hashtable, LHASH_DOALL_ARG_FN(STUFF_print), logging_bio);
166*2175Sjp161948
167*2175Sjp161948lh_error() can be used to determine if an error occurred in the last
168*2175Sjp161948operation. lh_error() is a macro.
169*2175Sjp161948
170*2175Sjp161948=head1 RETURN VALUES
171*2175Sjp161948
172*2175Sjp161948lh_new() returns B<NULL> on error, otherwise a pointer to the new
173*2175Sjp161948B<LHASH> structure.
174*2175Sjp161948
175*2175Sjp161948When a hash table entry is replaced, lh_insert() returns the value
176*2175Sjp161948being replaced. B<NULL> is returned on normal operation and on error.
177*2175Sjp161948
178*2175Sjp161948lh_delete() returns the entry being deleted.  B<NULL> is returned if
179*2175Sjp161948there is no such value in the hash table.
180*2175Sjp161948
181*2175Sjp161948lh_retrieve() returns the hash table entry if it has been found,
182*2175Sjp161948B<NULL> otherwise.
183*2175Sjp161948
184*2175Sjp161948lh_error() returns 1 if an error occurred in the last operation, 0
185*2175Sjp161948otherwise.
186*2175Sjp161948
187*2175Sjp161948lh_free(), lh_doall() and lh_doall_arg() return no values.
188*2175Sjp161948
189*2175Sjp161948=head1 NOTE
190*2175Sjp161948
191*2175Sjp161948The various LHASH macros and callback types exist to make it possible
192*2175Sjp161948to write type-safe code without resorting to function-prototype
193*2175Sjp161948casting - an evil that makes application code much harder to
194*2175Sjp161948audit/verify and also opens the window of opportunity for stack
195*2175Sjp161948corruption and other hard-to-find bugs.  It also, apparently, violates
196*2175Sjp161948ANSI-C.
197*2175Sjp161948
198*2175Sjp161948The LHASH code regards table entries as constant data.  As such, it
199*2175Sjp161948internally represents lh_insert()'d items with a "const void *"
200*2175Sjp161948pointer type.  This is why callbacks such as those used by lh_doall()
201*2175Sjp161948and lh_doall_arg() declare their prototypes with "const", even for the
202*2175Sjp161948parameters that pass back the table items' data pointers - for
203*2175Sjp161948consistency, user-provided data is "const" at all times as far as the
204*2175Sjp161948LHASH code is concerned.  However, as callers are themselves providing
205*2175Sjp161948these pointers, they can choose whether they too should be treating
206*2175Sjp161948all such parameters as constant.
207*2175Sjp161948
208*2175Sjp161948As an example, a hash table may be maintained by code that, for
209*2175Sjp161948reasons of encapsulation, has only "const" access to the data being
210*2175Sjp161948indexed in the hash table (ie. it is returned as "const" from
211*2175Sjp161948elsewhere in their code) - in this case the LHASH prototypes are
212*2175Sjp161948appropriate as-is.  Conversely, if the caller is responsible for the
213*2175Sjp161948life-time of the data in question, then they may well wish to make
214*2175Sjp161948modifications to table item passed back in the lh_doall() or
215*2175Sjp161948lh_doall_arg() callbacks (see the "STUFF_cleanup" example above).  If
216*2175Sjp161948so, the caller can either cast the "const" away (if they're providing
217*2175Sjp161948the raw callbacks themselves) or use the macros to declare/implement
218*2175Sjp161948the wrapper functions without "const" types.
219*2175Sjp161948
220*2175Sjp161948Callers that only have "const" access to data they're indexing in a
221*2175Sjp161948table, yet declare callbacks without constant types (or cast the
222*2175Sjp161948"const" away themselves), are therefore creating their own risks/bugs
223*2175Sjp161948without being encouraged to do so by the API.  On a related note,
224*2175Sjp161948those auditing code should pay special attention to any instances of
225*2175Sjp161948DECLARE/IMPLEMENT_LHASH_DOALL_[ARG_]_FN macros that provide types
226*2175Sjp161948without any "const" qualifiers.
227*2175Sjp161948
228*2175Sjp161948=head1 BUGS
229*2175Sjp161948
230*2175Sjp161948lh_insert() returns B<NULL> both for success and error.
231*2175Sjp161948
232*2175Sjp161948=head1 INTERNALS
233*2175Sjp161948
234*2175Sjp161948The following description is based on the SSLeay documentation:
235*2175Sjp161948
236*2175Sjp161948The B<lhash> library implements a hash table described in the
237*2175Sjp161948I<Communications of the ACM> in 1991.  What makes this hash table
238*2175Sjp161948different is that as the table fills, the hash table is increased (or
239*2175Sjp161948decreased) in size via OPENSSL_realloc().  When a 'resize' is done, instead of
240*2175Sjp161948all hashes being redistributed over twice as many 'buckets', one
241*2175Sjp161948bucket is split.  So when an 'expand' is done, there is only a minimal
242*2175Sjp161948cost to redistribute some values.  Subsequent inserts will cause more
243*2175Sjp161948single 'bucket' redistributions but there will never be a sudden large
244*2175Sjp161948cost due to redistributing all the 'buckets'.
245*2175Sjp161948
246*2175Sjp161948The state for a particular hash table is kept in the B<LHASH> structure.
247*2175Sjp161948The decision to increase or decrease the hash table size is made
248*2175Sjp161948depending on the 'load' of the hash table.  The load is the number of
249*2175Sjp161948items in the hash table divided by the size of the hash table.  The
250*2175Sjp161948default values are as follows.  If (hash->up_load E<lt> load) =E<gt>
251*2175Sjp161948expand.  if (hash-E<gt>down_load E<gt> load) =E<gt> contract.  The
252*2175Sjp161948B<up_load> has a default value of 1 and B<down_load> has a default value
253*2175Sjp161948of 2.  These numbers can be modified by the application by just
254*2175Sjp161948playing with the B<up_load> and B<down_load> variables.  The 'load' is
255*2175Sjp161948kept in a form which is multiplied by 256.  So
256*2175Sjp161948hash-E<gt>up_load=8*256; will cause a load of 8 to be set.
257*2175Sjp161948
258*2175Sjp161948If you are interested in performance the field to watch is
259*2175Sjp161948num_comp_calls.  The hash library keeps track of the 'hash' value for
260*2175Sjp161948each item so when a lookup is done, the 'hashes' are compared, if
261*2175Sjp161948there is a match, then a full compare is done, and
262*2175Sjp161948hash-E<gt>num_comp_calls is incremented.  If num_comp_calls is not equal
263*2175Sjp161948to num_delete plus num_retrieve it means that your hash function is
264*2175Sjp161948generating hashes that are the same for different values.  It is
265*2175Sjp161948probably worth changing your hash function if this is the case because
266*2175Sjp161948even if your hash table has 10 items in a 'bucket', it can be searched
267*2175Sjp161948with 10 B<unsigned long> compares and 10 linked list traverses.  This
268*2175Sjp161948will be much less expensive that 10 calls to your compare function.
269*2175Sjp161948
270*2175Sjp161948lh_strhash() is a demo string hashing function:
271*2175Sjp161948
272*2175Sjp161948 unsigned long lh_strhash(const char *c);
273*2175Sjp161948
274*2175Sjp161948Since the B<LHASH> routines would normally be passed structures, this
275*2175Sjp161948routine would not normally be passed to lh_new(), rather it would be
276*2175Sjp161948used in the function passed to lh_new().
277*2175Sjp161948
278*2175Sjp161948=head1 SEE ALSO
279*2175Sjp161948
280*2175Sjp161948L<lh_stats(3)|lh_stats(3)>
281*2175Sjp161948
282*2175Sjp161948=head1 HISTORY
283*2175Sjp161948
284*2175Sjp161948The B<lhash> library is available in all versions of SSLeay and OpenSSL.
285*2175Sjp161948lh_error() was added in SSLeay 0.9.1b.
286*2175Sjp161948
287*2175Sjp161948This manpage is derived from the SSLeay documentation.
288*2175Sjp161948
289*2175Sjp161948In OpenSSL 0.9.7, all lhash functions that were passed function pointers
290*2175Sjp161948were changed for better type safety, and the function types LHASH_COMP_FN_TYPE,
291*2175Sjp161948LHASH_HASH_FN_TYPE, LHASH_DOALL_FN_TYPE and LHASH_DOALL_ARG_FN_TYPE
292*2175Sjp161948became available.
293*2175Sjp161948
294*2175Sjp161948=cut
295