1*b1e83836Smrg /* Hash tables.
2*b1e83836Smrg Copyright (C) 2000-2022 Free Software Foundation, Inc.
3*b1e83836Smrg
4*b1e83836Smrg This program is free software; you can redistribute it and/or modify it
5*b1e83836Smrg under the terms of the GNU General Public License as published by the
6*b1e83836Smrg Free Software Foundation; either version 3, or (at your option) any
7*b1e83836Smrg later version.
8*b1e83836Smrg
9*b1e83836Smrg This program is distributed in the hope that it will be useful,
10*b1e83836Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
11*b1e83836Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*b1e83836Smrg GNU General Public License for more details.
13*b1e83836Smrg
14*b1e83836Smrg You should have received a copy of the GNU General Public License
15*b1e83836Smrg along with this program; see the file COPYING3. If not see
16*b1e83836Smrg <http://www.gnu.org/licenses/>.
17*b1e83836Smrg
18*b1e83836Smrg In other words, you are welcome to use, share and improve this program.
19*b1e83836Smrg You are forbidden to forbid anyone else to use, share and improve
20*b1e83836Smrg what you give them. Help stamp out software-hoarding! */
21*b1e83836Smrg
22*b1e83836Smrg #include "config.h"
23*b1e83836Smrg #include "system.h"
24*b1e83836Smrg #include "symtab.h"
25*b1e83836Smrg
26*b1e83836Smrg /* The code below is a specialization of Vladimir Makarov's expandable
27*b1e83836Smrg hash tables (see libiberty/hashtab.c). The abstraction penalty was
28*b1e83836Smrg too high to continue using the generic form. This code knows
29*b1e83836Smrg intrinsically how to calculate a hash value, and how to compare an
30*b1e83836Smrg existing entry with a potential new one. */
31*b1e83836Smrg
32*b1e83836Smrg static unsigned int calc_hash (const unsigned char *, size_t);
33*b1e83836Smrg static void ht_expand (cpp_hash_table *);
34*b1e83836Smrg static double approx_sqrt (double);
35*b1e83836Smrg
36*b1e83836Smrg /* A deleted entry. */
37*b1e83836Smrg #define DELETED ((hashnode) -1)
38*b1e83836Smrg
39*b1e83836Smrg /* Calculate the hash of the string STR of length LEN. */
40*b1e83836Smrg
41*b1e83836Smrg static unsigned int
calc_hash(const unsigned char * str,size_t len)42*b1e83836Smrg calc_hash (const unsigned char *str, size_t len)
43*b1e83836Smrg {
44*b1e83836Smrg size_t n = len;
45*b1e83836Smrg unsigned int r = 0;
46*b1e83836Smrg
47*b1e83836Smrg while (n--)
48*b1e83836Smrg r = HT_HASHSTEP (r, *str++);
49*b1e83836Smrg
50*b1e83836Smrg return HT_HASHFINISH (r, len);
51*b1e83836Smrg }
52*b1e83836Smrg
53*b1e83836Smrg /* Initialize an identifier hashtable. */
54*b1e83836Smrg
55*b1e83836Smrg cpp_hash_table *
ht_create(unsigned int order)56*b1e83836Smrg ht_create (unsigned int order)
57*b1e83836Smrg {
58*b1e83836Smrg unsigned int nslots = 1 << order;
59*b1e83836Smrg cpp_hash_table *table;
60*b1e83836Smrg
61*b1e83836Smrg table = XCNEW (cpp_hash_table);
62*b1e83836Smrg
63*b1e83836Smrg /* Strings need no alignment. */
64*b1e83836Smrg obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65*b1e83836Smrg
66*b1e83836Smrg obstack_alignment_mask (&table->stack) = 0;
67*b1e83836Smrg
68*b1e83836Smrg table->entries = XCNEWVEC (hashnode, nslots);
69*b1e83836Smrg table->entries_owned = true;
70*b1e83836Smrg table->nslots = nslots;
71*b1e83836Smrg return table;
72*b1e83836Smrg }
73*b1e83836Smrg
74*b1e83836Smrg /* Frees all memory associated with a hash table. */
75*b1e83836Smrg
76*b1e83836Smrg void
ht_destroy(cpp_hash_table * table)77*b1e83836Smrg ht_destroy (cpp_hash_table *table)
78*b1e83836Smrg {
79*b1e83836Smrg obstack_free (&table->stack, NULL);
80*b1e83836Smrg if (table->entries_owned)
81*b1e83836Smrg free (table->entries);
82*b1e83836Smrg free (table);
83*b1e83836Smrg }
84*b1e83836Smrg
85*b1e83836Smrg /* Returns the hash entry for the a STR of length LEN. If that string
86*b1e83836Smrg already exists in the table, returns the existing entry. If the
87*b1e83836Smrg identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88*b1e83836Smrg returns NULL. Otherwise insert and returns a new entry. A new
89*b1e83836Smrg string is allocated. */
90*b1e83836Smrg hashnode
ht_lookup(cpp_hash_table * table,const unsigned char * str,size_t len,enum ht_lookup_option insert)91*b1e83836Smrg ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92*b1e83836Smrg enum ht_lookup_option insert)
93*b1e83836Smrg {
94*b1e83836Smrg return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95*b1e83836Smrg insert);
96*b1e83836Smrg }
97*b1e83836Smrg
98*b1e83836Smrg hashnode
ht_lookup_with_hash(cpp_hash_table * table,const unsigned char * str,size_t len,unsigned int hash,enum ht_lookup_option insert)99*b1e83836Smrg ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100*b1e83836Smrg size_t len, unsigned int hash,
101*b1e83836Smrg enum ht_lookup_option insert)
102*b1e83836Smrg {
103*b1e83836Smrg unsigned int hash2;
104*b1e83836Smrg unsigned int index;
105*b1e83836Smrg unsigned int deleted_index = table->nslots;
106*b1e83836Smrg size_t sizemask;
107*b1e83836Smrg hashnode node;
108*b1e83836Smrg
109*b1e83836Smrg sizemask = table->nslots - 1;
110*b1e83836Smrg index = hash & sizemask;
111*b1e83836Smrg table->searches++;
112*b1e83836Smrg
113*b1e83836Smrg node = table->entries[index];
114*b1e83836Smrg
115*b1e83836Smrg if (node != NULL)
116*b1e83836Smrg {
117*b1e83836Smrg if (node == DELETED)
118*b1e83836Smrg deleted_index = index;
119*b1e83836Smrg else if (node->hash_value == hash
120*b1e83836Smrg && HT_LEN (node) == (unsigned int) len
121*b1e83836Smrg && !memcmp (HT_STR (node), str, len))
122*b1e83836Smrg return node;
123*b1e83836Smrg
124*b1e83836Smrg /* hash2 must be odd, so we're guaranteed to visit every possible
125*b1e83836Smrg location in the table during rehashing. */
126*b1e83836Smrg hash2 = ((hash * 17) & sizemask) | 1;
127*b1e83836Smrg
128*b1e83836Smrg for (;;)
129*b1e83836Smrg {
130*b1e83836Smrg table->collisions++;
131*b1e83836Smrg index = (index + hash2) & sizemask;
132*b1e83836Smrg node = table->entries[index];
133*b1e83836Smrg if (node == NULL)
134*b1e83836Smrg break;
135*b1e83836Smrg
136*b1e83836Smrg if (node == DELETED)
137*b1e83836Smrg {
138*b1e83836Smrg if (deleted_index != table->nslots)
139*b1e83836Smrg deleted_index = index;
140*b1e83836Smrg }
141*b1e83836Smrg else if (node->hash_value == hash
142*b1e83836Smrg && HT_LEN (node) == (unsigned int) len
143*b1e83836Smrg && !memcmp (HT_STR (node), str, len))
144*b1e83836Smrg return node;
145*b1e83836Smrg }
146*b1e83836Smrg }
147*b1e83836Smrg
148*b1e83836Smrg if (insert == HT_NO_INSERT)
149*b1e83836Smrg return NULL;
150*b1e83836Smrg
151*b1e83836Smrg /* We prefer to overwrite the first deleted slot we saw. */
152*b1e83836Smrg if (deleted_index != table->nslots)
153*b1e83836Smrg index = deleted_index;
154*b1e83836Smrg
155*b1e83836Smrg node = (*table->alloc_node) (table);
156*b1e83836Smrg table->entries[index] = node;
157*b1e83836Smrg
158*b1e83836Smrg HT_LEN (node) = (unsigned int) len;
159*b1e83836Smrg node->hash_value = hash;
160*b1e83836Smrg
161*b1e83836Smrg if (table->alloc_subobject)
162*b1e83836Smrg {
163*b1e83836Smrg char *chars = (char *) table->alloc_subobject (len + 1);
164*b1e83836Smrg memcpy (chars, str, len);
165*b1e83836Smrg chars[len] = '\0';
166*b1e83836Smrg HT_STR (node) = (const unsigned char *) chars;
167*b1e83836Smrg }
168*b1e83836Smrg else
169*b1e83836Smrg HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170*b1e83836Smrg str, len);
171*b1e83836Smrg
172*b1e83836Smrg if (++table->nelements * 4 >= table->nslots * 3)
173*b1e83836Smrg /* Must expand the string table. */
174*b1e83836Smrg ht_expand (table);
175*b1e83836Smrg
176*b1e83836Smrg return node;
177*b1e83836Smrg }
178*b1e83836Smrg
179*b1e83836Smrg /* Double the size of a hash table, re-hashing existing entries. */
180*b1e83836Smrg
181*b1e83836Smrg static void
ht_expand(cpp_hash_table * table)182*b1e83836Smrg ht_expand (cpp_hash_table *table)
183*b1e83836Smrg {
184*b1e83836Smrg hashnode *nentries, *p, *limit;
185*b1e83836Smrg unsigned int size, sizemask;
186*b1e83836Smrg
187*b1e83836Smrg size = table->nslots * 2;
188*b1e83836Smrg nentries = XCNEWVEC (hashnode, size);
189*b1e83836Smrg sizemask = size - 1;
190*b1e83836Smrg
191*b1e83836Smrg p = table->entries;
192*b1e83836Smrg limit = p + table->nslots;
193*b1e83836Smrg do
194*b1e83836Smrg if (*p && *p != DELETED)
195*b1e83836Smrg {
196*b1e83836Smrg unsigned int index, hash, hash2;
197*b1e83836Smrg
198*b1e83836Smrg hash = (*p)->hash_value;
199*b1e83836Smrg index = hash & sizemask;
200*b1e83836Smrg
201*b1e83836Smrg if (nentries[index])
202*b1e83836Smrg {
203*b1e83836Smrg hash2 = ((hash * 17) & sizemask) | 1;
204*b1e83836Smrg do
205*b1e83836Smrg {
206*b1e83836Smrg index = (index + hash2) & sizemask;
207*b1e83836Smrg }
208*b1e83836Smrg while (nentries[index]);
209*b1e83836Smrg }
210*b1e83836Smrg nentries[index] = *p;
211*b1e83836Smrg }
212*b1e83836Smrg while (++p < limit);
213*b1e83836Smrg
214*b1e83836Smrg if (table->entries_owned)
215*b1e83836Smrg free (table->entries);
216*b1e83836Smrg table->entries_owned = true;
217*b1e83836Smrg table->entries = nentries;
218*b1e83836Smrg table->nslots = size;
219*b1e83836Smrg }
220*b1e83836Smrg
221*b1e83836Smrg /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222*b1e83836Smrg the node, and V. */
223*b1e83836Smrg void
ht_forall(cpp_hash_table * table,ht_cb cb,const void * v)224*b1e83836Smrg ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225*b1e83836Smrg {
226*b1e83836Smrg hashnode *p, *limit;
227*b1e83836Smrg
228*b1e83836Smrg p = table->entries;
229*b1e83836Smrg limit = p + table->nslots;
230*b1e83836Smrg do
231*b1e83836Smrg if (*p && *p != DELETED)
232*b1e83836Smrg {
233*b1e83836Smrg if ((*cb) (table->pfile, *p, v) == 0)
234*b1e83836Smrg break;
235*b1e83836Smrg }
236*b1e83836Smrg while (++p < limit);
237*b1e83836Smrg }
238*b1e83836Smrg
239*b1e83836Smrg /* Like ht_forall, but a nonzero return from the callback means that
240*b1e83836Smrg the entry should be removed from the table. */
241*b1e83836Smrg void
ht_purge(cpp_hash_table * table,ht_cb cb,const void * v)242*b1e83836Smrg ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243*b1e83836Smrg {
244*b1e83836Smrg hashnode *p, *limit;
245*b1e83836Smrg
246*b1e83836Smrg p = table->entries;
247*b1e83836Smrg limit = p + table->nslots;
248*b1e83836Smrg do
249*b1e83836Smrg if (*p && *p != DELETED)
250*b1e83836Smrg {
251*b1e83836Smrg if ((*cb) (table->pfile, *p, v))
252*b1e83836Smrg *p = DELETED;
253*b1e83836Smrg }
254*b1e83836Smrg while (++p < limit);
255*b1e83836Smrg }
256*b1e83836Smrg
257*b1e83836Smrg /* Restore the hash table. */
258*b1e83836Smrg void
ht_load(cpp_hash_table * ht,hashnode * entries,unsigned int nslots,unsigned int nelements,bool own)259*b1e83836Smrg ht_load (cpp_hash_table *ht, hashnode *entries,
260*b1e83836Smrg unsigned int nslots, unsigned int nelements,
261*b1e83836Smrg bool own)
262*b1e83836Smrg {
263*b1e83836Smrg if (ht->entries_owned)
264*b1e83836Smrg free (ht->entries);
265*b1e83836Smrg ht->entries = entries;
266*b1e83836Smrg ht->nslots = nslots;
267*b1e83836Smrg ht->nelements = nelements;
268*b1e83836Smrg ht->entries_owned = own;
269*b1e83836Smrg }
270*b1e83836Smrg
271*b1e83836Smrg /* Dump allocation statistics to stderr. */
272*b1e83836Smrg
273*b1e83836Smrg void
ht_dump_statistics(cpp_hash_table * table)274*b1e83836Smrg ht_dump_statistics (cpp_hash_table *table)
275*b1e83836Smrg {
276*b1e83836Smrg size_t nelts, nids, overhead, headers;
277*b1e83836Smrg size_t total_bytes, longest, deleted = 0;
278*b1e83836Smrg double sum_of_squares, exp_len, exp_len2, exp2_len;
279*b1e83836Smrg hashnode *p, *limit;
280*b1e83836Smrg
281*b1e83836Smrg #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282*b1e83836Smrg ? (x) \
283*b1e83836Smrg : ((x) < 1024*1024*10 \
284*b1e83836Smrg ? (x) / 1024 \
285*b1e83836Smrg : (x) / (1024*1024))))
286*b1e83836Smrg #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287*b1e83836Smrg
288*b1e83836Smrg total_bytes = longest = sum_of_squares = nids = 0;
289*b1e83836Smrg p = table->entries;
290*b1e83836Smrg limit = p + table->nslots;
291*b1e83836Smrg do
292*b1e83836Smrg if (*p == DELETED)
293*b1e83836Smrg ++deleted;
294*b1e83836Smrg else if (*p)
295*b1e83836Smrg {
296*b1e83836Smrg size_t n = HT_LEN (*p);
297*b1e83836Smrg
298*b1e83836Smrg total_bytes += n;
299*b1e83836Smrg sum_of_squares += (double) n * n;
300*b1e83836Smrg if (n > longest)
301*b1e83836Smrg longest = n;
302*b1e83836Smrg nids++;
303*b1e83836Smrg }
304*b1e83836Smrg while (++p < limit);
305*b1e83836Smrg
306*b1e83836Smrg nelts = table->nelements;
307*b1e83836Smrg headers = table->nslots * sizeof (hashnode);
308*b1e83836Smrg
309*b1e83836Smrg fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:",
310*b1e83836Smrg (unsigned long) nelts);
311*b1e83836Smrg fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:",
312*b1e83836Smrg (unsigned long) nids, nids * 100.0 / nelts);
313*b1e83836Smrg fprintf (stderr, "%-32s%lu\n", "slots:",
314*b1e83836Smrg (unsigned long) table->nslots);
315*b1e83836Smrg fprintf (stderr, "%-32s%lu\n", "deleted:",
316*b1e83836Smrg (unsigned long) deleted);
317*b1e83836Smrg
318*b1e83836Smrg if (table->alloc_subobject)
319*b1e83836Smrg fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:",
320*b1e83836Smrg SCALE (total_bytes), LABEL (total_bytes));
321*b1e83836Smrg else
322*b1e83836Smrg {
323*b1e83836Smrg overhead = obstack_memory_used (&table->stack) - total_bytes;
324*b1e83836Smrg fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n",
325*b1e83836Smrg "obstack bytes:",
326*b1e83836Smrg SCALE (total_bytes), LABEL (total_bytes),
327*b1e83836Smrg SCALE (overhead), LABEL (overhead));
328*b1e83836Smrg }
329*b1e83836Smrg fprintf (stderr, "%-32s%lu%c\n", "table size:",
330*b1e83836Smrg SCALE (headers), LABEL (headers));
331*b1e83836Smrg
332*b1e83836Smrg exp_len = (double)total_bytes / (double)nelts;
333*b1e83836Smrg exp2_len = exp_len * exp_len;
334*b1e83836Smrg exp_len2 = (double) sum_of_squares / (double) nelts;
335*b1e83836Smrg
336*b1e83836Smrg fprintf (stderr, "%-32s%.4f\n", "coll/search:",
337*b1e83836Smrg (double) table->collisions / (double) table->searches);
338*b1e83836Smrg fprintf (stderr, "%-32s%.4f\n", "ins/search:",
339*b1e83836Smrg (double) nelts / (double) table->searches);
340*b1e83836Smrg fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n",
341*b1e83836Smrg "avg. entry:",
342*b1e83836Smrg exp_len, approx_sqrt (exp_len2 - exp2_len));
343*b1e83836Smrg fprintf (stderr, "%-32s%lu\n", "longest entry:",
344*b1e83836Smrg (unsigned long) longest);
345*b1e83836Smrg #undef SCALE
346*b1e83836Smrg #undef LABEL
347*b1e83836Smrg }
348*b1e83836Smrg
349*b1e83836Smrg /* Return the approximate positive square root of a number N. This is for
350*b1e83836Smrg statistical reports, not code generation. */
351*b1e83836Smrg static double
approx_sqrt(double x)352*b1e83836Smrg approx_sqrt (double x)
353*b1e83836Smrg {
354*b1e83836Smrg double s, d;
355*b1e83836Smrg
356*b1e83836Smrg if (x < 0)
357*b1e83836Smrg abort ();
358*b1e83836Smrg if (x == 0)
359*b1e83836Smrg return 0;
360*b1e83836Smrg
361*b1e83836Smrg s = x;
362*b1e83836Smrg do
363*b1e83836Smrg {
364*b1e83836Smrg d = (s * s - x) / (2 * s);
365*b1e83836Smrg s -= d;
366*b1e83836Smrg }
367*b1e83836Smrg while (d > .0001);
368*b1e83836Smrg return s;
369*b1e83836Smrg }
370