xref: /dflybsd-src/contrib/gcc-8.0/libcpp/symtab.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Hash tables.
2*38fd1498Szrj    Copyright (C) 2000-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This program is free software; you can redistribute it and/or modify it
5*38fd1498Szrj under the terms of the GNU General Public License as published by the
6*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
7*38fd1498Szrj later version.
8*38fd1498Szrj 
9*38fd1498Szrj This program is distributed in the hope that it will be useful,
10*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
11*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*38fd1498Szrj GNU General Public License for more details.
13*38fd1498Szrj 
14*38fd1498Szrj You should have received a copy of the GNU General Public License
15*38fd1498Szrj along with this program; see the file COPYING3.  If not see
16*38fd1498Szrj <http://www.gnu.org/licenses/>.
17*38fd1498Szrj 
18*38fd1498Szrj  In other words, you are welcome to use, share and improve this program.
19*38fd1498Szrj  You are forbidden to forbid anyone else to use, share and improve
20*38fd1498Szrj  what you give them.   Help stamp out software-hoarding!  */
21*38fd1498Szrj 
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "symtab.h"
25*38fd1498Szrj 
26*38fd1498Szrj /* The code below is a specialization of Vladimir Makarov's expandable
27*38fd1498Szrj    hash tables (see libiberty/hashtab.c).  The abstraction penalty was
28*38fd1498Szrj    too high to continue using the generic form.  This code knows
29*38fd1498Szrj    intrinsically how to calculate a hash value, and how to compare an
30*38fd1498Szrj    existing entry with a potential new one.  */
31*38fd1498Szrj 
32*38fd1498Szrj static unsigned int calc_hash (const unsigned char *, size_t);
33*38fd1498Szrj static void ht_expand (cpp_hash_table *);
34*38fd1498Szrj static double approx_sqrt (double);
35*38fd1498Szrj 
36*38fd1498Szrj /* A deleted entry.  */
37*38fd1498Szrj #define DELETED ((hashnode) -1)
38*38fd1498Szrj 
39*38fd1498Szrj /* Calculate the hash of the string STR of length LEN.  */
40*38fd1498Szrj 
41*38fd1498Szrj static unsigned int
calc_hash(const unsigned char * str,size_t len)42*38fd1498Szrj calc_hash (const unsigned char *str, size_t len)
43*38fd1498Szrj {
44*38fd1498Szrj   size_t n = len;
45*38fd1498Szrj   unsigned int r = 0;
46*38fd1498Szrj 
47*38fd1498Szrj   while (n--)
48*38fd1498Szrj     r = HT_HASHSTEP (r, *str++);
49*38fd1498Szrj 
50*38fd1498Szrj   return HT_HASHFINISH (r, len);
51*38fd1498Szrj }
52*38fd1498Szrj 
53*38fd1498Szrj /* Initialize an identifier hashtable.  */
54*38fd1498Szrj 
55*38fd1498Szrj cpp_hash_table *
ht_create(unsigned int order)56*38fd1498Szrj ht_create (unsigned int order)
57*38fd1498Szrj {
58*38fd1498Szrj   unsigned int nslots = 1 << order;
59*38fd1498Szrj   cpp_hash_table *table;
60*38fd1498Szrj 
61*38fd1498Szrj   table = XCNEW (cpp_hash_table);
62*38fd1498Szrj 
63*38fd1498Szrj   /* Strings need no alignment.  */
64*38fd1498Szrj   obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65*38fd1498Szrj 
66*38fd1498Szrj   obstack_alignment_mask (&table->stack) = 0;
67*38fd1498Szrj 
68*38fd1498Szrj   table->entries = XCNEWVEC (hashnode, nslots);
69*38fd1498Szrj   table->entries_owned = true;
70*38fd1498Szrj   table->nslots = nslots;
71*38fd1498Szrj   return table;
72*38fd1498Szrj }
73*38fd1498Szrj 
74*38fd1498Szrj /* Frees all memory associated with a hash table.  */
75*38fd1498Szrj 
76*38fd1498Szrj void
ht_destroy(cpp_hash_table * table)77*38fd1498Szrj ht_destroy (cpp_hash_table *table)
78*38fd1498Szrj {
79*38fd1498Szrj   obstack_free (&table->stack, NULL);
80*38fd1498Szrj   if (table->entries_owned)
81*38fd1498Szrj     free (table->entries);
82*38fd1498Szrj   free (table);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj /* Returns the hash entry for the a STR of length LEN.  If that string
86*38fd1498Szrj    already exists in the table, returns the existing entry.  If the
87*38fd1498Szrj    identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88*38fd1498Szrj    returns NULL.  Otherwise insert and returns a new entry.  A new
89*38fd1498Szrj    string is allocated.  */
90*38fd1498Szrj hashnode
ht_lookup(cpp_hash_table * table,const unsigned char * str,size_t len,enum ht_lookup_option insert)91*38fd1498Szrj ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92*38fd1498Szrj 	   enum ht_lookup_option insert)
93*38fd1498Szrj {
94*38fd1498Szrj   return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95*38fd1498Szrj 			      insert);
96*38fd1498Szrj }
97*38fd1498Szrj 
98*38fd1498Szrj hashnode
ht_lookup_with_hash(cpp_hash_table * table,const unsigned char * str,size_t len,unsigned int hash,enum ht_lookup_option insert)99*38fd1498Szrj ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100*38fd1498Szrj 		     size_t len, unsigned int hash,
101*38fd1498Szrj 		     enum ht_lookup_option insert)
102*38fd1498Szrj {
103*38fd1498Szrj   unsigned int hash2;
104*38fd1498Szrj   unsigned int index;
105*38fd1498Szrj   unsigned int deleted_index = table->nslots;
106*38fd1498Szrj   size_t sizemask;
107*38fd1498Szrj   hashnode node;
108*38fd1498Szrj 
109*38fd1498Szrj   sizemask = table->nslots - 1;
110*38fd1498Szrj   index = hash & sizemask;
111*38fd1498Szrj   table->searches++;
112*38fd1498Szrj 
113*38fd1498Szrj   node = table->entries[index];
114*38fd1498Szrj 
115*38fd1498Szrj   if (node != NULL)
116*38fd1498Szrj     {
117*38fd1498Szrj       if (node == DELETED)
118*38fd1498Szrj 	deleted_index = index;
119*38fd1498Szrj       else if (node->hash_value == hash
120*38fd1498Szrj 	       && HT_LEN (node) == (unsigned int) len
121*38fd1498Szrj 	       && !memcmp (HT_STR (node), str, len))
122*38fd1498Szrj 	return node;
123*38fd1498Szrj 
124*38fd1498Szrj       /* hash2 must be odd, so we're guaranteed to visit every possible
125*38fd1498Szrj 	 location in the table during rehashing.  */
126*38fd1498Szrj       hash2 = ((hash * 17) & sizemask) | 1;
127*38fd1498Szrj 
128*38fd1498Szrj       for (;;)
129*38fd1498Szrj 	{
130*38fd1498Szrj 	  table->collisions++;
131*38fd1498Szrj 	  index = (index + hash2) & sizemask;
132*38fd1498Szrj 	  node = table->entries[index];
133*38fd1498Szrj 	  if (node == NULL)
134*38fd1498Szrj 	    break;
135*38fd1498Szrj 
136*38fd1498Szrj 	  if (node == DELETED)
137*38fd1498Szrj 	    {
138*38fd1498Szrj 	      if (deleted_index != table->nslots)
139*38fd1498Szrj 		deleted_index = index;
140*38fd1498Szrj 	    }
141*38fd1498Szrj 	  else if (node->hash_value == hash
142*38fd1498Szrj 		   && HT_LEN (node) == (unsigned int) len
143*38fd1498Szrj 		   && !memcmp (HT_STR (node), str, len))
144*38fd1498Szrj 	    return node;
145*38fd1498Szrj 	}
146*38fd1498Szrj     }
147*38fd1498Szrj 
148*38fd1498Szrj   if (insert == HT_NO_INSERT)
149*38fd1498Szrj     return NULL;
150*38fd1498Szrj 
151*38fd1498Szrj   /* We prefer to overwrite the first deleted slot we saw.  */
152*38fd1498Szrj   if (deleted_index != table->nslots)
153*38fd1498Szrj     index = deleted_index;
154*38fd1498Szrj 
155*38fd1498Szrj   node = (*table->alloc_node) (table);
156*38fd1498Szrj   table->entries[index] = node;
157*38fd1498Szrj 
158*38fd1498Szrj   HT_LEN (node) = (unsigned int) len;
159*38fd1498Szrj   node->hash_value = hash;
160*38fd1498Szrj 
161*38fd1498Szrj   if (table->alloc_subobject)
162*38fd1498Szrj     {
163*38fd1498Szrj       char *chars = (char *) table->alloc_subobject (len + 1);
164*38fd1498Szrj       memcpy (chars, str, len);
165*38fd1498Szrj       chars[len] = '\0';
166*38fd1498Szrj       HT_STR (node) = (const unsigned char *) chars;
167*38fd1498Szrj     }
168*38fd1498Szrj   else
169*38fd1498Szrj     HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170*38fd1498Szrj 							   str, len);
171*38fd1498Szrj 
172*38fd1498Szrj   if (++table->nelements * 4 >= table->nslots * 3)
173*38fd1498Szrj     /* Must expand the string table.  */
174*38fd1498Szrj     ht_expand (table);
175*38fd1498Szrj 
176*38fd1498Szrj   return node;
177*38fd1498Szrj }
178*38fd1498Szrj 
179*38fd1498Szrj /* Double the size of a hash table, re-hashing existing entries.  */
180*38fd1498Szrj 
181*38fd1498Szrj static void
ht_expand(cpp_hash_table * table)182*38fd1498Szrj ht_expand (cpp_hash_table *table)
183*38fd1498Szrj {
184*38fd1498Szrj   hashnode *nentries, *p, *limit;
185*38fd1498Szrj   unsigned int size, sizemask;
186*38fd1498Szrj 
187*38fd1498Szrj   size = table->nslots * 2;
188*38fd1498Szrj   nentries = XCNEWVEC (hashnode, size);
189*38fd1498Szrj   sizemask = size - 1;
190*38fd1498Szrj 
191*38fd1498Szrj   p = table->entries;
192*38fd1498Szrj   limit = p + table->nslots;
193*38fd1498Szrj   do
194*38fd1498Szrj     if (*p && *p != DELETED)
195*38fd1498Szrj       {
196*38fd1498Szrj 	unsigned int index, hash, hash2;
197*38fd1498Szrj 
198*38fd1498Szrj 	hash = (*p)->hash_value;
199*38fd1498Szrj 	index = hash & sizemask;
200*38fd1498Szrj 
201*38fd1498Szrj 	if (nentries[index])
202*38fd1498Szrj 	  {
203*38fd1498Szrj 	    hash2 = ((hash * 17) & sizemask) | 1;
204*38fd1498Szrj 	    do
205*38fd1498Szrj 	      {
206*38fd1498Szrj 		index = (index + hash2) & sizemask;
207*38fd1498Szrj 	      }
208*38fd1498Szrj 	    while (nentries[index]);
209*38fd1498Szrj 	  }
210*38fd1498Szrj 	nentries[index] = *p;
211*38fd1498Szrj       }
212*38fd1498Szrj   while (++p < limit);
213*38fd1498Szrj 
214*38fd1498Szrj   if (table->entries_owned)
215*38fd1498Szrj     free (table->entries);
216*38fd1498Szrj   table->entries_owned = true;
217*38fd1498Szrj   table->entries = nentries;
218*38fd1498Szrj   table->nslots = size;
219*38fd1498Szrj }
220*38fd1498Szrj 
221*38fd1498Szrj /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222*38fd1498Szrj    the node, and V.  */
223*38fd1498Szrj void
ht_forall(cpp_hash_table * table,ht_cb cb,const void * v)224*38fd1498Szrj ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225*38fd1498Szrj {
226*38fd1498Szrj   hashnode *p, *limit;
227*38fd1498Szrj 
228*38fd1498Szrj   p = table->entries;
229*38fd1498Szrj   limit = p + table->nslots;
230*38fd1498Szrj   do
231*38fd1498Szrj     if (*p && *p != DELETED)
232*38fd1498Szrj       {
233*38fd1498Szrj 	if ((*cb) (table->pfile, *p, v) == 0)
234*38fd1498Szrj 	  break;
235*38fd1498Szrj       }
236*38fd1498Szrj   while (++p < limit);
237*38fd1498Szrj }
238*38fd1498Szrj 
239*38fd1498Szrj /* Like ht_forall, but a nonzero return from the callback means that
240*38fd1498Szrj    the entry should be removed from the table.  */
241*38fd1498Szrj void
ht_purge(cpp_hash_table * table,ht_cb cb,const void * v)242*38fd1498Szrj ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243*38fd1498Szrj {
244*38fd1498Szrj   hashnode *p, *limit;
245*38fd1498Szrj 
246*38fd1498Szrj   p = table->entries;
247*38fd1498Szrj   limit = p + table->nslots;
248*38fd1498Szrj   do
249*38fd1498Szrj     if (*p && *p != DELETED)
250*38fd1498Szrj       {
251*38fd1498Szrj 	if ((*cb) (table->pfile, *p, v))
252*38fd1498Szrj 	  *p = DELETED;
253*38fd1498Szrj       }
254*38fd1498Szrj   while (++p < limit);
255*38fd1498Szrj }
256*38fd1498Szrj 
257*38fd1498Szrj /* Restore the hash table.  */
258*38fd1498Szrj void
ht_load(cpp_hash_table * ht,hashnode * entries,unsigned int nslots,unsigned int nelements,bool own)259*38fd1498Szrj ht_load (cpp_hash_table *ht, hashnode *entries,
260*38fd1498Szrj 	 unsigned int nslots, unsigned int nelements,
261*38fd1498Szrj 	 bool own)
262*38fd1498Szrj {
263*38fd1498Szrj   if (ht->entries_owned)
264*38fd1498Szrj     free (ht->entries);
265*38fd1498Szrj   ht->entries = entries;
266*38fd1498Szrj   ht->nslots = nslots;
267*38fd1498Szrj   ht->nelements = nelements;
268*38fd1498Szrj   ht->entries_owned = own;
269*38fd1498Szrj }
270*38fd1498Szrj 
271*38fd1498Szrj /* Dump allocation statistics to stderr.  */
272*38fd1498Szrj 
273*38fd1498Szrj void
ht_dump_statistics(cpp_hash_table * table)274*38fd1498Szrj ht_dump_statistics (cpp_hash_table *table)
275*38fd1498Szrj {
276*38fd1498Szrj   size_t nelts, nids, overhead, headers;
277*38fd1498Szrj   size_t total_bytes, longest, deleted = 0;
278*38fd1498Szrj   double sum_of_squares, exp_len, exp_len2, exp2_len;
279*38fd1498Szrj   hashnode *p, *limit;
280*38fd1498Szrj 
281*38fd1498Szrj #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282*38fd1498Szrj 		  ? (x) \
283*38fd1498Szrj 		  : ((x) < 1024*1024*10 \
284*38fd1498Szrj 		     ? (x) / 1024 \
285*38fd1498Szrj 		     : (x) / (1024*1024))))
286*38fd1498Szrj #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287*38fd1498Szrj 
288*38fd1498Szrj   total_bytes = longest = sum_of_squares = nids = 0;
289*38fd1498Szrj   p = table->entries;
290*38fd1498Szrj   limit = p + table->nslots;
291*38fd1498Szrj   do
292*38fd1498Szrj     if (*p == DELETED)
293*38fd1498Szrj       ++deleted;
294*38fd1498Szrj     else if (*p)
295*38fd1498Szrj       {
296*38fd1498Szrj 	size_t n = HT_LEN (*p);
297*38fd1498Szrj 
298*38fd1498Szrj 	total_bytes += n;
299*38fd1498Szrj 	sum_of_squares += (double) n * n;
300*38fd1498Szrj 	if (n > longest)
301*38fd1498Szrj 	  longest = n;
302*38fd1498Szrj 	nids++;
303*38fd1498Szrj       }
304*38fd1498Szrj   while (++p < limit);
305*38fd1498Szrj 
306*38fd1498Szrj   nelts = table->nelements;
307*38fd1498Szrj   overhead = obstack_memory_used (&table->stack) - total_bytes;
308*38fd1498Szrj   headers = table->nslots * sizeof (hashnode);
309*38fd1498Szrj 
310*38fd1498Szrj   fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
311*38fd1498Szrj 	   (unsigned long) nelts);
312*38fd1498Szrj   fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
313*38fd1498Szrj 	   (unsigned long) nids, nids * 100.0 / nelts);
314*38fd1498Szrj   fprintf (stderr, "slots\t\t%lu\n",
315*38fd1498Szrj 	   (unsigned long) table->nslots);
316*38fd1498Szrj   fprintf (stderr, "deleted\t\t%lu\n",
317*38fd1498Szrj 	   (unsigned long) deleted);
318*38fd1498Szrj   fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
319*38fd1498Szrj 	   SCALE (total_bytes), LABEL (total_bytes),
320*38fd1498Szrj 	   SCALE (overhead), LABEL (overhead));
321*38fd1498Szrj   fprintf (stderr, "table size\t%lu%c\n",
322*38fd1498Szrj 	   SCALE (headers), LABEL (headers));
323*38fd1498Szrj 
324*38fd1498Szrj   exp_len = (double)total_bytes / (double)nelts;
325*38fd1498Szrj   exp2_len = exp_len * exp_len;
326*38fd1498Szrj   exp_len2 = (double) sum_of_squares / (double) nelts;
327*38fd1498Szrj 
328*38fd1498Szrj   fprintf (stderr, "coll/search\t%.4f\n",
329*38fd1498Szrj 	   (double) table->collisions / (double) table->searches);
330*38fd1498Szrj   fprintf (stderr, "ins/search\t%.4f\n",
331*38fd1498Szrj 	   (double) nelts / (double) table->searches);
332*38fd1498Szrj   fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
333*38fd1498Szrj 	   exp_len, approx_sqrt (exp_len2 - exp2_len));
334*38fd1498Szrj   fprintf (stderr, "longest entry\t%lu\n",
335*38fd1498Szrj 	   (unsigned long) longest);
336*38fd1498Szrj #undef SCALE
337*38fd1498Szrj #undef LABEL
338*38fd1498Szrj }
339*38fd1498Szrj 
340*38fd1498Szrj /* Return the approximate positive square root of a number N.  This is for
341*38fd1498Szrj    statistical reports, not code generation.  */
342*38fd1498Szrj static double
approx_sqrt(double x)343*38fd1498Szrj approx_sqrt (double x)
344*38fd1498Szrj {
345*38fd1498Szrj   double s, d;
346*38fd1498Szrj 
347*38fd1498Szrj   if (x < 0)
348*38fd1498Szrj     abort ();
349*38fd1498Szrj   if (x == 0)
350*38fd1498Szrj     return 0;
351*38fd1498Szrj 
352*38fd1498Szrj   s = x;
353*38fd1498Szrj   do
354*38fd1498Szrj     {
355*38fd1498Szrj       d = (s * s - x) / (2 * s);
356*38fd1498Szrj       s -= d;
357*38fd1498Szrj     }
358*38fd1498Szrj   while (d > .0001);
359*38fd1498Szrj   return s;
360*38fd1498Szrj }
361