1*38fd1498Szrj /* Hash tables.
2*38fd1498Szrj Copyright (C) 2000-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This program is free software; you can redistribute it and/or modify it
5*38fd1498Szrj under the terms of the GNU General Public License as published by the
6*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
7*38fd1498Szrj later version.
8*38fd1498Szrj
9*38fd1498Szrj This program is distributed in the hope that it will be useful,
10*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
11*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*38fd1498Szrj GNU General Public License for more details.
13*38fd1498Szrj
14*38fd1498Szrj You should have received a copy of the GNU General Public License
15*38fd1498Szrj along with this program; see the file COPYING3. If not see
16*38fd1498Szrj <http://www.gnu.org/licenses/>.
17*38fd1498Szrj
18*38fd1498Szrj In other words, you are welcome to use, share and improve this program.
19*38fd1498Szrj You are forbidden to forbid anyone else to use, share and improve
20*38fd1498Szrj what you give them. Help stamp out software-hoarding! */
21*38fd1498Szrj
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "symtab.h"
25*38fd1498Szrj
26*38fd1498Szrj /* The code below is a specialization of Vladimir Makarov's expandable
27*38fd1498Szrj hash tables (see libiberty/hashtab.c). The abstraction penalty was
28*38fd1498Szrj too high to continue using the generic form. This code knows
29*38fd1498Szrj intrinsically how to calculate a hash value, and how to compare an
30*38fd1498Szrj existing entry with a potential new one. */
31*38fd1498Szrj
32*38fd1498Szrj static unsigned int calc_hash (const unsigned char *, size_t);
33*38fd1498Szrj static void ht_expand (cpp_hash_table *);
34*38fd1498Szrj static double approx_sqrt (double);
35*38fd1498Szrj
36*38fd1498Szrj /* A deleted entry. */
37*38fd1498Szrj #define DELETED ((hashnode) -1)
38*38fd1498Szrj
39*38fd1498Szrj /* Calculate the hash of the string STR of length LEN. */
40*38fd1498Szrj
41*38fd1498Szrj static unsigned int
calc_hash(const unsigned char * str,size_t len)42*38fd1498Szrj calc_hash (const unsigned char *str, size_t len)
43*38fd1498Szrj {
44*38fd1498Szrj size_t n = len;
45*38fd1498Szrj unsigned int r = 0;
46*38fd1498Szrj
47*38fd1498Szrj while (n--)
48*38fd1498Szrj r = HT_HASHSTEP (r, *str++);
49*38fd1498Szrj
50*38fd1498Szrj return HT_HASHFINISH (r, len);
51*38fd1498Szrj }
52*38fd1498Szrj
53*38fd1498Szrj /* Initialize an identifier hashtable. */
54*38fd1498Szrj
55*38fd1498Szrj cpp_hash_table *
ht_create(unsigned int order)56*38fd1498Szrj ht_create (unsigned int order)
57*38fd1498Szrj {
58*38fd1498Szrj unsigned int nslots = 1 << order;
59*38fd1498Szrj cpp_hash_table *table;
60*38fd1498Szrj
61*38fd1498Szrj table = XCNEW (cpp_hash_table);
62*38fd1498Szrj
63*38fd1498Szrj /* Strings need no alignment. */
64*38fd1498Szrj obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65*38fd1498Szrj
66*38fd1498Szrj obstack_alignment_mask (&table->stack) = 0;
67*38fd1498Szrj
68*38fd1498Szrj table->entries = XCNEWVEC (hashnode, nslots);
69*38fd1498Szrj table->entries_owned = true;
70*38fd1498Szrj table->nslots = nslots;
71*38fd1498Szrj return table;
72*38fd1498Szrj }
73*38fd1498Szrj
74*38fd1498Szrj /* Frees all memory associated with a hash table. */
75*38fd1498Szrj
76*38fd1498Szrj void
ht_destroy(cpp_hash_table * table)77*38fd1498Szrj ht_destroy (cpp_hash_table *table)
78*38fd1498Szrj {
79*38fd1498Szrj obstack_free (&table->stack, NULL);
80*38fd1498Szrj if (table->entries_owned)
81*38fd1498Szrj free (table->entries);
82*38fd1498Szrj free (table);
83*38fd1498Szrj }
84*38fd1498Szrj
85*38fd1498Szrj /* Returns the hash entry for the a STR of length LEN. If that string
86*38fd1498Szrj already exists in the table, returns the existing entry. If the
87*38fd1498Szrj identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88*38fd1498Szrj returns NULL. Otherwise insert and returns a new entry. A new
89*38fd1498Szrj string is allocated. */
90*38fd1498Szrj hashnode
ht_lookup(cpp_hash_table * table,const unsigned char * str,size_t len,enum ht_lookup_option insert)91*38fd1498Szrj ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92*38fd1498Szrj enum ht_lookup_option insert)
93*38fd1498Szrj {
94*38fd1498Szrj return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95*38fd1498Szrj insert);
96*38fd1498Szrj }
97*38fd1498Szrj
98*38fd1498Szrj hashnode
ht_lookup_with_hash(cpp_hash_table * table,const unsigned char * str,size_t len,unsigned int hash,enum ht_lookup_option insert)99*38fd1498Szrj ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100*38fd1498Szrj size_t len, unsigned int hash,
101*38fd1498Szrj enum ht_lookup_option insert)
102*38fd1498Szrj {
103*38fd1498Szrj unsigned int hash2;
104*38fd1498Szrj unsigned int index;
105*38fd1498Szrj unsigned int deleted_index = table->nslots;
106*38fd1498Szrj size_t sizemask;
107*38fd1498Szrj hashnode node;
108*38fd1498Szrj
109*38fd1498Szrj sizemask = table->nslots - 1;
110*38fd1498Szrj index = hash & sizemask;
111*38fd1498Szrj table->searches++;
112*38fd1498Szrj
113*38fd1498Szrj node = table->entries[index];
114*38fd1498Szrj
115*38fd1498Szrj if (node != NULL)
116*38fd1498Szrj {
117*38fd1498Szrj if (node == DELETED)
118*38fd1498Szrj deleted_index = index;
119*38fd1498Szrj else if (node->hash_value == hash
120*38fd1498Szrj && HT_LEN (node) == (unsigned int) len
121*38fd1498Szrj && !memcmp (HT_STR (node), str, len))
122*38fd1498Szrj return node;
123*38fd1498Szrj
124*38fd1498Szrj /* hash2 must be odd, so we're guaranteed to visit every possible
125*38fd1498Szrj location in the table during rehashing. */
126*38fd1498Szrj hash2 = ((hash * 17) & sizemask) | 1;
127*38fd1498Szrj
128*38fd1498Szrj for (;;)
129*38fd1498Szrj {
130*38fd1498Szrj table->collisions++;
131*38fd1498Szrj index = (index + hash2) & sizemask;
132*38fd1498Szrj node = table->entries[index];
133*38fd1498Szrj if (node == NULL)
134*38fd1498Szrj break;
135*38fd1498Szrj
136*38fd1498Szrj if (node == DELETED)
137*38fd1498Szrj {
138*38fd1498Szrj if (deleted_index != table->nslots)
139*38fd1498Szrj deleted_index = index;
140*38fd1498Szrj }
141*38fd1498Szrj else if (node->hash_value == hash
142*38fd1498Szrj && HT_LEN (node) == (unsigned int) len
143*38fd1498Szrj && !memcmp (HT_STR (node), str, len))
144*38fd1498Szrj return node;
145*38fd1498Szrj }
146*38fd1498Szrj }
147*38fd1498Szrj
148*38fd1498Szrj if (insert == HT_NO_INSERT)
149*38fd1498Szrj return NULL;
150*38fd1498Szrj
151*38fd1498Szrj /* We prefer to overwrite the first deleted slot we saw. */
152*38fd1498Szrj if (deleted_index != table->nslots)
153*38fd1498Szrj index = deleted_index;
154*38fd1498Szrj
155*38fd1498Szrj node = (*table->alloc_node) (table);
156*38fd1498Szrj table->entries[index] = node;
157*38fd1498Szrj
158*38fd1498Szrj HT_LEN (node) = (unsigned int) len;
159*38fd1498Szrj node->hash_value = hash;
160*38fd1498Szrj
161*38fd1498Szrj if (table->alloc_subobject)
162*38fd1498Szrj {
163*38fd1498Szrj char *chars = (char *) table->alloc_subobject (len + 1);
164*38fd1498Szrj memcpy (chars, str, len);
165*38fd1498Szrj chars[len] = '\0';
166*38fd1498Szrj HT_STR (node) = (const unsigned char *) chars;
167*38fd1498Szrj }
168*38fd1498Szrj else
169*38fd1498Szrj HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170*38fd1498Szrj str, len);
171*38fd1498Szrj
172*38fd1498Szrj if (++table->nelements * 4 >= table->nslots * 3)
173*38fd1498Szrj /* Must expand the string table. */
174*38fd1498Szrj ht_expand (table);
175*38fd1498Szrj
176*38fd1498Szrj return node;
177*38fd1498Szrj }
178*38fd1498Szrj
179*38fd1498Szrj /* Double the size of a hash table, re-hashing existing entries. */
180*38fd1498Szrj
181*38fd1498Szrj static void
ht_expand(cpp_hash_table * table)182*38fd1498Szrj ht_expand (cpp_hash_table *table)
183*38fd1498Szrj {
184*38fd1498Szrj hashnode *nentries, *p, *limit;
185*38fd1498Szrj unsigned int size, sizemask;
186*38fd1498Szrj
187*38fd1498Szrj size = table->nslots * 2;
188*38fd1498Szrj nentries = XCNEWVEC (hashnode, size);
189*38fd1498Szrj sizemask = size - 1;
190*38fd1498Szrj
191*38fd1498Szrj p = table->entries;
192*38fd1498Szrj limit = p + table->nslots;
193*38fd1498Szrj do
194*38fd1498Szrj if (*p && *p != DELETED)
195*38fd1498Szrj {
196*38fd1498Szrj unsigned int index, hash, hash2;
197*38fd1498Szrj
198*38fd1498Szrj hash = (*p)->hash_value;
199*38fd1498Szrj index = hash & sizemask;
200*38fd1498Szrj
201*38fd1498Szrj if (nentries[index])
202*38fd1498Szrj {
203*38fd1498Szrj hash2 = ((hash * 17) & sizemask) | 1;
204*38fd1498Szrj do
205*38fd1498Szrj {
206*38fd1498Szrj index = (index + hash2) & sizemask;
207*38fd1498Szrj }
208*38fd1498Szrj while (nentries[index]);
209*38fd1498Szrj }
210*38fd1498Szrj nentries[index] = *p;
211*38fd1498Szrj }
212*38fd1498Szrj while (++p < limit);
213*38fd1498Szrj
214*38fd1498Szrj if (table->entries_owned)
215*38fd1498Szrj free (table->entries);
216*38fd1498Szrj table->entries_owned = true;
217*38fd1498Szrj table->entries = nentries;
218*38fd1498Szrj table->nslots = size;
219*38fd1498Szrj }
220*38fd1498Szrj
221*38fd1498Szrj /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222*38fd1498Szrj the node, and V. */
223*38fd1498Szrj void
ht_forall(cpp_hash_table * table,ht_cb cb,const void * v)224*38fd1498Szrj ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225*38fd1498Szrj {
226*38fd1498Szrj hashnode *p, *limit;
227*38fd1498Szrj
228*38fd1498Szrj p = table->entries;
229*38fd1498Szrj limit = p + table->nslots;
230*38fd1498Szrj do
231*38fd1498Szrj if (*p && *p != DELETED)
232*38fd1498Szrj {
233*38fd1498Szrj if ((*cb) (table->pfile, *p, v) == 0)
234*38fd1498Szrj break;
235*38fd1498Szrj }
236*38fd1498Szrj while (++p < limit);
237*38fd1498Szrj }
238*38fd1498Szrj
239*38fd1498Szrj /* Like ht_forall, but a nonzero return from the callback means that
240*38fd1498Szrj the entry should be removed from the table. */
241*38fd1498Szrj void
ht_purge(cpp_hash_table * table,ht_cb cb,const void * v)242*38fd1498Szrj ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243*38fd1498Szrj {
244*38fd1498Szrj hashnode *p, *limit;
245*38fd1498Szrj
246*38fd1498Szrj p = table->entries;
247*38fd1498Szrj limit = p + table->nslots;
248*38fd1498Szrj do
249*38fd1498Szrj if (*p && *p != DELETED)
250*38fd1498Szrj {
251*38fd1498Szrj if ((*cb) (table->pfile, *p, v))
252*38fd1498Szrj *p = DELETED;
253*38fd1498Szrj }
254*38fd1498Szrj while (++p < limit);
255*38fd1498Szrj }
256*38fd1498Szrj
257*38fd1498Szrj /* Restore the hash table. */
258*38fd1498Szrj void
ht_load(cpp_hash_table * ht,hashnode * entries,unsigned int nslots,unsigned int nelements,bool own)259*38fd1498Szrj ht_load (cpp_hash_table *ht, hashnode *entries,
260*38fd1498Szrj unsigned int nslots, unsigned int nelements,
261*38fd1498Szrj bool own)
262*38fd1498Szrj {
263*38fd1498Szrj if (ht->entries_owned)
264*38fd1498Szrj free (ht->entries);
265*38fd1498Szrj ht->entries = entries;
266*38fd1498Szrj ht->nslots = nslots;
267*38fd1498Szrj ht->nelements = nelements;
268*38fd1498Szrj ht->entries_owned = own;
269*38fd1498Szrj }
270*38fd1498Szrj
271*38fd1498Szrj /* Dump allocation statistics to stderr. */
272*38fd1498Szrj
273*38fd1498Szrj void
ht_dump_statistics(cpp_hash_table * table)274*38fd1498Szrj ht_dump_statistics (cpp_hash_table *table)
275*38fd1498Szrj {
276*38fd1498Szrj size_t nelts, nids, overhead, headers;
277*38fd1498Szrj size_t total_bytes, longest, deleted = 0;
278*38fd1498Szrj double sum_of_squares, exp_len, exp_len2, exp2_len;
279*38fd1498Szrj hashnode *p, *limit;
280*38fd1498Szrj
281*38fd1498Szrj #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282*38fd1498Szrj ? (x) \
283*38fd1498Szrj : ((x) < 1024*1024*10 \
284*38fd1498Szrj ? (x) / 1024 \
285*38fd1498Szrj : (x) / (1024*1024))))
286*38fd1498Szrj #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287*38fd1498Szrj
288*38fd1498Szrj total_bytes = longest = sum_of_squares = nids = 0;
289*38fd1498Szrj p = table->entries;
290*38fd1498Szrj limit = p + table->nslots;
291*38fd1498Szrj do
292*38fd1498Szrj if (*p == DELETED)
293*38fd1498Szrj ++deleted;
294*38fd1498Szrj else if (*p)
295*38fd1498Szrj {
296*38fd1498Szrj size_t n = HT_LEN (*p);
297*38fd1498Szrj
298*38fd1498Szrj total_bytes += n;
299*38fd1498Szrj sum_of_squares += (double) n * n;
300*38fd1498Szrj if (n > longest)
301*38fd1498Szrj longest = n;
302*38fd1498Szrj nids++;
303*38fd1498Szrj }
304*38fd1498Szrj while (++p < limit);
305*38fd1498Szrj
306*38fd1498Szrj nelts = table->nelements;
307*38fd1498Szrj overhead = obstack_memory_used (&table->stack) - total_bytes;
308*38fd1498Szrj headers = table->nslots * sizeof (hashnode);
309*38fd1498Szrj
310*38fd1498Szrj fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
311*38fd1498Szrj (unsigned long) nelts);
312*38fd1498Szrj fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
313*38fd1498Szrj (unsigned long) nids, nids * 100.0 / nelts);
314*38fd1498Szrj fprintf (stderr, "slots\t\t%lu\n",
315*38fd1498Szrj (unsigned long) table->nslots);
316*38fd1498Szrj fprintf (stderr, "deleted\t\t%lu\n",
317*38fd1498Szrj (unsigned long) deleted);
318*38fd1498Szrj fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
319*38fd1498Szrj SCALE (total_bytes), LABEL (total_bytes),
320*38fd1498Szrj SCALE (overhead), LABEL (overhead));
321*38fd1498Szrj fprintf (stderr, "table size\t%lu%c\n",
322*38fd1498Szrj SCALE (headers), LABEL (headers));
323*38fd1498Szrj
324*38fd1498Szrj exp_len = (double)total_bytes / (double)nelts;
325*38fd1498Szrj exp2_len = exp_len * exp_len;
326*38fd1498Szrj exp_len2 = (double) sum_of_squares / (double) nelts;
327*38fd1498Szrj
328*38fd1498Szrj fprintf (stderr, "coll/search\t%.4f\n",
329*38fd1498Szrj (double) table->collisions / (double) table->searches);
330*38fd1498Szrj fprintf (stderr, "ins/search\t%.4f\n",
331*38fd1498Szrj (double) nelts / (double) table->searches);
332*38fd1498Szrj fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
333*38fd1498Szrj exp_len, approx_sqrt (exp_len2 - exp2_len));
334*38fd1498Szrj fprintf (stderr, "longest entry\t%lu\n",
335*38fd1498Szrj (unsigned long) longest);
336*38fd1498Szrj #undef SCALE
337*38fd1498Szrj #undef LABEL
338*38fd1498Szrj }
339*38fd1498Szrj
340*38fd1498Szrj /* Return the approximate positive square root of a number N. This is for
341*38fd1498Szrj statistical reports, not code generation. */
342*38fd1498Szrj static double
approx_sqrt(double x)343*38fd1498Szrj approx_sqrt (double x)
344*38fd1498Szrj {
345*38fd1498Szrj double s, d;
346*38fd1498Szrj
347*38fd1498Szrj if (x < 0)
348*38fd1498Szrj abort ();
349*38fd1498Szrj if (x == 0)
350*38fd1498Szrj return 0;
351*38fd1498Szrj
352*38fd1498Szrj s = x;
353*38fd1498Szrj do
354*38fd1498Szrj {
355*38fd1498Szrj d = (s * s - x) / (2 * s);
356*38fd1498Szrj s -= d;
357*38fd1498Szrj }
358*38fd1498Szrj while (d > .0001);
359*38fd1498Szrj return s;
360*38fd1498Szrj }
361