xref: /netbsd-src/external/gpl3/gcc/dist/libcpp/symtab.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Hash tables.
2*b1e83836Smrg    Copyright (C) 2000-2022 Free Software Foundation, Inc.
3*b1e83836Smrg 
4*b1e83836Smrg This program is free software; you can redistribute it and/or modify it
5*b1e83836Smrg under the terms of the GNU General Public License as published by the
6*b1e83836Smrg Free Software Foundation; either version 3, or (at your option) any
7*b1e83836Smrg later version.
8*b1e83836Smrg 
9*b1e83836Smrg This program is distributed in the hope that it will be useful,
10*b1e83836Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
11*b1e83836Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*b1e83836Smrg GNU General Public License for more details.
13*b1e83836Smrg 
14*b1e83836Smrg You should have received a copy of the GNU General Public License
15*b1e83836Smrg along with this program; see the file COPYING3.  If not see
16*b1e83836Smrg <http://www.gnu.org/licenses/>.
17*b1e83836Smrg 
18*b1e83836Smrg  In other words, you are welcome to use, share and improve this program.
19*b1e83836Smrg  You are forbidden to forbid anyone else to use, share and improve
20*b1e83836Smrg  what you give them.   Help stamp out software-hoarding!  */
21*b1e83836Smrg 
22*b1e83836Smrg #include "config.h"
23*b1e83836Smrg #include "system.h"
24*b1e83836Smrg #include "symtab.h"
25*b1e83836Smrg 
26*b1e83836Smrg /* The code below is a specialization of Vladimir Makarov's expandable
27*b1e83836Smrg    hash tables (see libiberty/hashtab.c).  The abstraction penalty was
28*b1e83836Smrg    too high to continue using the generic form.  This code knows
29*b1e83836Smrg    intrinsically how to calculate a hash value, and how to compare an
30*b1e83836Smrg    existing entry with a potential new one.  */
31*b1e83836Smrg 
32*b1e83836Smrg static unsigned int calc_hash (const unsigned char *, size_t);
33*b1e83836Smrg static void ht_expand (cpp_hash_table *);
34*b1e83836Smrg static double approx_sqrt (double);
35*b1e83836Smrg 
36*b1e83836Smrg /* A deleted entry.  */
37*b1e83836Smrg #define DELETED ((hashnode) -1)
38*b1e83836Smrg 
39*b1e83836Smrg /* Calculate the hash of the string STR of length LEN.  */
40*b1e83836Smrg 
41*b1e83836Smrg static unsigned int
calc_hash(const unsigned char * str,size_t len)42*b1e83836Smrg calc_hash (const unsigned char *str, size_t len)
43*b1e83836Smrg {
44*b1e83836Smrg   size_t n = len;
45*b1e83836Smrg   unsigned int r = 0;
46*b1e83836Smrg 
47*b1e83836Smrg   while (n--)
48*b1e83836Smrg     r = HT_HASHSTEP (r, *str++);
49*b1e83836Smrg 
50*b1e83836Smrg   return HT_HASHFINISH (r, len);
51*b1e83836Smrg }
52*b1e83836Smrg 
53*b1e83836Smrg /* Initialize an identifier hashtable.  */
54*b1e83836Smrg 
55*b1e83836Smrg cpp_hash_table *
ht_create(unsigned int order)56*b1e83836Smrg ht_create (unsigned int order)
57*b1e83836Smrg {
58*b1e83836Smrg   unsigned int nslots = 1 << order;
59*b1e83836Smrg   cpp_hash_table *table;
60*b1e83836Smrg 
61*b1e83836Smrg   table = XCNEW (cpp_hash_table);
62*b1e83836Smrg 
63*b1e83836Smrg   /* Strings need no alignment.  */
64*b1e83836Smrg   obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65*b1e83836Smrg 
66*b1e83836Smrg   obstack_alignment_mask (&table->stack) = 0;
67*b1e83836Smrg 
68*b1e83836Smrg   table->entries = XCNEWVEC (hashnode, nslots);
69*b1e83836Smrg   table->entries_owned = true;
70*b1e83836Smrg   table->nslots = nslots;
71*b1e83836Smrg   return table;
72*b1e83836Smrg }
73*b1e83836Smrg 
74*b1e83836Smrg /* Frees all memory associated with a hash table.  */
75*b1e83836Smrg 
76*b1e83836Smrg void
ht_destroy(cpp_hash_table * table)77*b1e83836Smrg ht_destroy (cpp_hash_table *table)
78*b1e83836Smrg {
79*b1e83836Smrg   obstack_free (&table->stack, NULL);
80*b1e83836Smrg   if (table->entries_owned)
81*b1e83836Smrg     free (table->entries);
82*b1e83836Smrg   free (table);
83*b1e83836Smrg }
84*b1e83836Smrg 
85*b1e83836Smrg /* Returns the hash entry for the a STR of length LEN.  If that string
86*b1e83836Smrg    already exists in the table, returns the existing entry.  If the
87*b1e83836Smrg    identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88*b1e83836Smrg    returns NULL.  Otherwise insert and returns a new entry.  A new
89*b1e83836Smrg    string is allocated.  */
90*b1e83836Smrg hashnode
ht_lookup(cpp_hash_table * table,const unsigned char * str,size_t len,enum ht_lookup_option insert)91*b1e83836Smrg ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92*b1e83836Smrg 	   enum ht_lookup_option insert)
93*b1e83836Smrg {
94*b1e83836Smrg   return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95*b1e83836Smrg 			      insert);
96*b1e83836Smrg }
97*b1e83836Smrg 
98*b1e83836Smrg hashnode
ht_lookup_with_hash(cpp_hash_table * table,const unsigned char * str,size_t len,unsigned int hash,enum ht_lookup_option insert)99*b1e83836Smrg ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100*b1e83836Smrg 		     size_t len, unsigned int hash,
101*b1e83836Smrg 		     enum ht_lookup_option insert)
102*b1e83836Smrg {
103*b1e83836Smrg   unsigned int hash2;
104*b1e83836Smrg   unsigned int index;
105*b1e83836Smrg   unsigned int deleted_index = table->nslots;
106*b1e83836Smrg   size_t sizemask;
107*b1e83836Smrg   hashnode node;
108*b1e83836Smrg 
109*b1e83836Smrg   sizemask = table->nslots - 1;
110*b1e83836Smrg   index = hash & sizemask;
111*b1e83836Smrg   table->searches++;
112*b1e83836Smrg 
113*b1e83836Smrg   node = table->entries[index];
114*b1e83836Smrg 
115*b1e83836Smrg   if (node != NULL)
116*b1e83836Smrg     {
117*b1e83836Smrg       if (node == DELETED)
118*b1e83836Smrg 	deleted_index = index;
119*b1e83836Smrg       else if (node->hash_value == hash
120*b1e83836Smrg 	       && HT_LEN (node) == (unsigned int) len
121*b1e83836Smrg 	       && !memcmp (HT_STR (node), str, len))
122*b1e83836Smrg 	return node;
123*b1e83836Smrg 
124*b1e83836Smrg       /* hash2 must be odd, so we're guaranteed to visit every possible
125*b1e83836Smrg 	 location in the table during rehashing.  */
126*b1e83836Smrg       hash2 = ((hash * 17) & sizemask) | 1;
127*b1e83836Smrg 
128*b1e83836Smrg       for (;;)
129*b1e83836Smrg 	{
130*b1e83836Smrg 	  table->collisions++;
131*b1e83836Smrg 	  index = (index + hash2) & sizemask;
132*b1e83836Smrg 	  node = table->entries[index];
133*b1e83836Smrg 	  if (node == NULL)
134*b1e83836Smrg 	    break;
135*b1e83836Smrg 
136*b1e83836Smrg 	  if (node == DELETED)
137*b1e83836Smrg 	    {
138*b1e83836Smrg 	      if (deleted_index != table->nslots)
139*b1e83836Smrg 		deleted_index = index;
140*b1e83836Smrg 	    }
141*b1e83836Smrg 	  else if (node->hash_value == hash
142*b1e83836Smrg 		   && HT_LEN (node) == (unsigned int) len
143*b1e83836Smrg 		   && !memcmp (HT_STR (node), str, len))
144*b1e83836Smrg 	    return node;
145*b1e83836Smrg 	}
146*b1e83836Smrg     }
147*b1e83836Smrg 
148*b1e83836Smrg   if (insert == HT_NO_INSERT)
149*b1e83836Smrg     return NULL;
150*b1e83836Smrg 
151*b1e83836Smrg   /* We prefer to overwrite the first deleted slot we saw.  */
152*b1e83836Smrg   if (deleted_index != table->nslots)
153*b1e83836Smrg     index = deleted_index;
154*b1e83836Smrg 
155*b1e83836Smrg   node = (*table->alloc_node) (table);
156*b1e83836Smrg   table->entries[index] = node;
157*b1e83836Smrg 
158*b1e83836Smrg   HT_LEN (node) = (unsigned int) len;
159*b1e83836Smrg   node->hash_value = hash;
160*b1e83836Smrg 
161*b1e83836Smrg   if (table->alloc_subobject)
162*b1e83836Smrg     {
163*b1e83836Smrg       char *chars = (char *) table->alloc_subobject (len + 1);
164*b1e83836Smrg       memcpy (chars, str, len);
165*b1e83836Smrg       chars[len] = '\0';
166*b1e83836Smrg       HT_STR (node) = (const unsigned char *) chars;
167*b1e83836Smrg     }
168*b1e83836Smrg   else
169*b1e83836Smrg     HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170*b1e83836Smrg 							   str, len);
171*b1e83836Smrg 
172*b1e83836Smrg   if (++table->nelements * 4 >= table->nslots * 3)
173*b1e83836Smrg     /* Must expand the string table.  */
174*b1e83836Smrg     ht_expand (table);
175*b1e83836Smrg 
176*b1e83836Smrg   return node;
177*b1e83836Smrg }
178*b1e83836Smrg 
179*b1e83836Smrg /* Double the size of a hash table, re-hashing existing entries.  */
180*b1e83836Smrg 
181*b1e83836Smrg static void
ht_expand(cpp_hash_table * table)182*b1e83836Smrg ht_expand (cpp_hash_table *table)
183*b1e83836Smrg {
184*b1e83836Smrg   hashnode *nentries, *p, *limit;
185*b1e83836Smrg   unsigned int size, sizemask;
186*b1e83836Smrg 
187*b1e83836Smrg   size = table->nslots * 2;
188*b1e83836Smrg   nentries = XCNEWVEC (hashnode, size);
189*b1e83836Smrg   sizemask = size - 1;
190*b1e83836Smrg 
191*b1e83836Smrg   p = table->entries;
192*b1e83836Smrg   limit = p + table->nslots;
193*b1e83836Smrg   do
194*b1e83836Smrg     if (*p && *p != DELETED)
195*b1e83836Smrg       {
196*b1e83836Smrg 	unsigned int index, hash, hash2;
197*b1e83836Smrg 
198*b1e83836Smrg 	hash = (*p)->hash_value;
199*b1e83836Smrg 	index = hash & sizemask;
200*b1e83836Smrg 
201*b1e83836Smrg 	if (nentries[index])
202*b1e83836Smrg 	  {
203*b1e83836Smrg 	    hash2 = ((hash * 17) & sizemask) | 1;
204*b1e83836Smrg 	    do
205*b1e83836Smrg 	      {
206*b1e83836Smrg 		index = (index + hash2) & sizemask;
207*b1e83836Smrg 	      }
208*b1e83836Smrg 	    while (nentries[index]);
209*b1e83836Smrg 	  }
210*b1e83836Smrg 	nentries[index] = *p;
211*b1e83836Smrg       }
212*b1e83836Smrg   while (++p < limit);
213*b1e83836Smrg 
214*b1e83836Smrg   if (table->entries_owned)
215*b1e83836Smrg     free (table->entries);
216*b1e83836Smrg   table->entries_owned = true;
217*b1e83836Smrg   table->entries = nentries;
218*b1e83836Smrg   table->nslots = size;
219*b1e83836Smrg }
220*b1e83836Smrg 
221*b1e83836Smrg /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222*b1e83836Smrg    the node, and V.  */
223*b1e83836Smrg void
ht_forall(cpp_hash_table * table,ht_cb cb,const void * v)224*b1e83836Smrg ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225*b1e83836Smrg {
226*b1e83836Smrg   hashnode *p, *limit;
227*b1e83836Smrg 
228*b1e83836Smrg   p = table->entries;
229*b1e83836Smrg   limit = p + table->nslots;
230*b1e83836Smrg   do
231*b1e83836Smrg     if (*p && *p != DELETED)
232*b1e83836Smrg       {
233*b1e83836Smrg 	if ((*cb) (table->pfile, *p, v) == 0)
234*b1e83836Smrg 	  break;
235*b1e83836Smrg       }
236*b1e83836Smrg   while (++p < limit);
237*b1e83836Smrg }
238*b1e83836Smrg 
239*b1e83836Smrg /* Like ht_forall, but a nonzero return from the callback means that
240*b1e83836Smrg    the entry should be removed from the table.  */
241*b1e83836Smrg void
ht_purge(cpp_hash_table * table,ht_cb cb,const void * v)242*b1e83836Smrg ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243*b1e83836Smrg {
244*b1e83836Smrg   hashnode *p, *limit;
245*b1e83836Smrg 
246*b1e83836Smrg   p = table->entries;
247*b1e83836Smrg   limit = p + table->nslots;
248*b1e83836Smrg   do
249*b1e83836Smrg     if (*p && *p != DELETED)
250*b1e83836Smrg       {
251*b1e83836Smrg 	if ((*cb) (table->pfile, *p, v))
252*b1e83836Smrg 	  *p = DELETED;
253*b1e83836Smrg       }
254*b1e83836Smrg   while (++p < limit);
255*b1e83836Smrg }
256*b1e83836Smrg 
257*b1e83836Smrg /* Restore the hash table.  */
258*b1e83836Smrg void
ht_load(cpp_hash_table * ht,hashnode * entries,unsigned int nslots,unsigned int nelements,bool own)259*b1e83836Smrg ht_load (cpp_hash_table *ht, hashnode *entries,
260*b1e83836Smrg 	 unsigned int nslots, unsigned int nelements,
261*b1e83836Smrg 	 bool own)
262*b1e83836Smrg {
263*b1e83836Smrg   if (ht->entries_owned)
264*b1e83836Smrg     free (ht->entries);
265*b1e83836Smrg   ht->entries = entries;
266*b1e83836Smrg   ht->nslots = nslots;
267*b1e83836Smrg   ht->nelements = nelements;
268*b1e83836Smrg   ht->entries_owned = own;
269*b1e83836Smrg }
270*b1e83836Smrg 
271*b1e83836Smrg /* Dump allocation statistics to stderr.  */
272*b1e83836Smrg 
273*b1e83836Smrg void
ht_dump_statistics(cpp_hash_table * table)274*b1e83836Smrg ht_dump_statistics (cpp_hash_table *table)
275*b1e83836Smrg {
276*b1e83836Smrg   size_t nelts, nids, overhead, headers;
277*b1e83836Smrg   size_t total_bytes, longest, deleted = 0;
278*b1e83836Smrg   double sum_of_squares, exp_len, exp_len2, exp2_len;
279*b1e83836Smrg   hashnode *p, *limit;
280*b1e83836Smrg 
281*b1e83836Smrg #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282*b1e83836Smrg 		  ? (x) \
283*b1e83836Smrg 		  : ((x) < 1024*1024*10 \
284*b1e83836Smrg 		     ? (x) / 1024 \
285*b1e83836Smrg 		     : (x) / (1024*1024))))
286*b1e83836Smrg #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287*b1e83836Smrg 
288*b1e83836Smrg   total_bytes = longest = sum_of_squares = nids = 0;
289*b1e83836Smrg   p = table->entries;
290*b1e83836Smrg   limit = p + table->nslots;
291*b1e83836Smrg   do
292*b1e83836Smrg     if (*p == DELETED)
293*b1e83836Smrg       ++deleted;
294*b1e83836Smrg     else if (*p)
295*b1e83836Smrg       {
296*b1e83836Smrg 	size_t n = HT_LEN (*p);
297*b1e83836Smrg 
298*b1e83836Smrg 	total_bytes += n;
299*b1e83836Smrg 	sum_of_squares += (double) n * n;
300*b1e83836Smrg 	if (n > longest)
301*b1e83836Smrg 	  longest = n;
302*b1e83836Smrg 	nids++;
303*b1e83836Smrg       }
304*b1e83836Smrg   while (++p < limit);
305*b1e83836Smrg 
306*b1e83836Smrg   nelts = table->nelements;
307*b1e83836Smrg   headers = table->nslots * sizeof (hashnode);
308*b1e83836Smrg 
309*b1e83836Smrg   fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:",
310*b1e83836Smrg 	   (unsigned long) nelts);
311*b1e83836Smrg   fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:",
312*b1e83836Smrg 	   (unsigned long) nids, nids * 100.0 / nelts);
313*b1e83836Smrg   fprintf (stderr, "%-32s%lu\n", "slots:",
314*b1e83836Smrg 	   (unsigned long) table->nslots);
315*b1e83836Smrg   fprintf (stderr, "%-32s%lu\n", "deleted:",
316*b1e83836Smrg 	   (unsigned long) deleted);
317*b1e83836Smrg 
318*b1e83836Smrg   if (table->alloc_subobject)
319*b1e83836Smrg     fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:",
320*b1e83836Smrg 	     SCALE (total_bytes), LABEL (total_bytes));
321*b1e83836Smrg   else
322*b1e83836Smrg     {
323*b1e83836Smrg       overhead = obstack_memory_used (&table->stack) - total_bytes;
324*b1e83836Smrg       fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n",
325*b1e83836Smrg 	       "obstack bytes:",
326*b1e83836Smrg 	       SCALE (total_bytes), LABEL (total_bytes),
327*b1e83836Smrg 	       SCALE (overhead), LABEL (overhead));
328*b1e83836Smrg     }
329*b1e83836Smrg   fprintf (stderr, "%-32s%lu%c\n", "table size:",
330*b1e83836Smrg 	   SCALE (headers), LABEL (headers));
331*b1e83836Smrg 
332*b1e83836Smrg   exp_len = (double)total_bytes / (double)nelts;
333*b1e83836Smrg   exp2_len = exp_len * exp_len;
334*b1e83836Smrg   exp_len2 = (double) sum_of_squares / (double) nelts;
335*b1e83836Smrg 
336*b1e83836Smrg   fprintf (stderr, "%-32s%.4f\n", "coll/search:",
337*b1e83836Smrg 	   (double) table->collisions / (double) table->searches);
338*b1e83836Smrg   fprintf (stderr, "%-32s%.4f\n", "ins/search:",
339*b1e83836Smrg 	   (double) nelts / (double) table->searches);
340*b1e83836Smrg   fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n",
341*b1e83836Smrg 	   "avg. entry:",
342*b1e83836Smrg 	   exp_len, approx_sqrt (exp_len2 - exp2_len));
343*b1e83836Smrg   fprintf (stderr, "%-32s%lu\n", "longest entry:",
344*b1e83836Smrg 	   (unsigned long) longest);
345*b1e83836Smrg #undef SCALE
346*b1e83836Smrg #undef LABEL
347*b1e83836Smrg }
348*b1e83836Smrg 
349*b1e83836Smrg /* Return the approximate positive square root of a number N.  This is for
350*b1e83836Smrg    statistical reports, not code generation.  */
351*b1e83836Smrg static double
approx_sqrt(double x)352*b1e83836Smrg approx_sqrt (double x)
353*b1e83836Smrg {
354*b1e83836Smrg   double s, d;
355*b1e83836Smrg 
356*b1e83836Smrg   if (x < 0)
357*b1e83836Smrg     abort ();
358*b1e83836Smrg   if (x == 0)
359*b1e83836Smrg     return 0;
360*b1e83836Smrg 
361*b1e83836Smrg   s = x;
362*b1e83836Smrg   do
363*b1e83836Smrg     {
364*b1e83836Smrg       d = (s * s - x) / (2 * s);
365*b1e83836Smrg       s -= d;
366*b1e83836Smrg     }
367*b1e83836Smrg   while (d > .0001);
368*b1e83836Smrg   return s;
369*b1e83836Smrg }
370