1*e4b17023SJohn Marino /* Hash tables. 2*e4b17023SJohn Marino Copyright (C) 2000, 2001, 2003, 2004, 2008, 2009 3*e4b17023SJohn Marino Free Software Foundation, Inc. 4*e4b17023SJohn Marino 5*e4b17023SJohn Marino This program is free software; you can redistribute it and/or modify it 6*e4b17023SJohn Marino under the terms of the GNU General Public License as published by the 7*e4b17023SJohn Marino Free Software Foundation; either version 3, or (at your option) any 8*e4b17023SJohn Marino later version. 9*e4b17023SJohn Marino 10*e4b17023SJohn Marino This program is distributed in the hope that it will be useful, 11*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of 12*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13*e4b17023SJohn Marino GNU General Public License for more details. 14*e4b17023SJohn Marino 15*e4b17023SJohn Marino You should have received a copy of the GNU General Public License 16*e4b17023SJohn Marino along with this program; see the file COPYING3. If not see 17*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. 18*e4b17023SJohn Marino 19*e4b17023SJohn Marino In other words, you are welcome to use, share and improve this program. 20*e4b17023SJohn Marino You are forbidden to forbid anyone else to use, share and improve 21*e4b17023SJohn Marino what you give them. Help stamp out software-hoarding! */ 22*e4b17023SJohn Marino 23*e4b17023SJohn Marino #include "config.h" 24*e4b17023SJohn Marino #include "system.h" 25*e4b17023SJohn Marino #include "symtab.h" 26*e4b17023SJohn Marino 27*e4b17023SJohn Marino /* The code below is a specialization of Vladimir Makarov's expandable 28*e4b17023SJohn Marino hash tables (see libiberty/hashtab.c). The abstraction penalty was 29*e4b17023SJohn Marino too high to continue using the generic form. This code knows 30*e4b17023SJohn Marino intrinsically how to calculate a hash value, and how to compare an 31*e4b17023SJohn Marino existing entry with a potential new one. */ 32*e4b17023SJohn Marino 33*e4b17023SJohn Marino static unsigned int calc_hash (const unsigned char *, size_t); 34*e4b17023SJohn Marino static void ht_expand (hash_table *); 35*e4b17023SJohn Marino static double approx_sqrt (double); 36*e4b17023SJohn Marino 37*e4b17023SJohn Marino /* A deleted entry. */ 38*e4b17023SJohn Marino #define DELETED ((hashnode) -1) 39*e4b17023SJohn Marino 40*e4b17023SJohn Marino /* Calculate the hash of the string STR of length LEN. */ 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino static unsigned int 43*e4b17023SJohn Marino calc_hash (const unsigned char *str, size_t len) 44*e4b17023SJohn Marino { 45*e4b17023SJohn Marino size_t n = len; 46*e4b17023SJohn Marino unsigned int r = 0; 47*e4b17023SJohn Marino 48*e4b17023SJohn Marino while (n--) 49*e4b17023SJohn Marino r = HT_HASHSTEP (r, *str++); 50*e4b17023SJohn Marino 51*e4b17023SJohn Marino return HT_HASHFINISH (r, len); 52*e4b17023SJohn Marino } 53*e4b17023SJohn Marino 54*e4b17023SJohn Marino /* Initialize an identifier hashtable. */ 55*e4b17023SJohn Marino 56*e4b17023SJohn Marino hash_table * 57*e4b17023SJohn Marino ht_create (unsigned int order) 58*e4b17023SJohn Marino { 59*e4b17023SJohn Marino unsigned int nslots = 1 << order; 60*e4b17023SJohn Marino hash_table *table; 61*e4b17023SJohn Marino 62*e4b17023SJohn Marino table = XCNEW (hash_table); 63*e4b17023SJohn Marino 64*e4b17023SJohn Marino /* Strings need no alignment. */ 65*e4b17023SJohn Marino _obstack_begin (&table->stack, 0, 0, 66*e4b17023SJohn Marino (void *(*) (long)) xmalloc, 67*e4b17023SJohn Marino (void (*) (void *)) free); 68*e4b17023SJohn Marino 69*e4b17023SJohn Marino obstack_alignment_mask (&table->stack) = 0; 70*e4b17023SJohn Marino 71*e4b17023SJohn Marino table->entries = XCNEWVEC (hashnode, nslots); 72*e4b17023SJohn Marino table->entries_owned = true; 73*e4b17023SJohn Marino table->nslots = nslots; 74*e4b17023SJohn Marino return table; 75*e4b17023SJohn Marino } 76*e4b17023SJohn Marino 77*e4b17023SJohn Marino /* Frees all memory associated with a hash table. */ 78*e4b17023SJohn Marino 79*e4b17023SJohn Marino void 80*e4b17023SJohn Marino ht_destroy (hash_table *table) 81*e4b17023SJohn Marino { 82*e4b17023SJohn Marino obstack_free (&table->stack, NULL); 83*e4b17023SJohn Marino if (table->entries_owned) 84*e4b17023SJohn Marino free (table->entries); 85*e4b17023SJohn Marino free (table); 86*e4b17023SJohn Marino } 87*e4b17023SJohn Marino 88*e4b17023SJohn Marino /* Returns the hash entry for the a STR of length LEN. If that string 89*e4b17023SJohn Marino already exists in the table, returns the existing entry. If the 90*e4b17023SJohn Marino identifier hasn't been seen before, and INSERT is CPP_NO_INSERT, 91*e4b17023SJohn Marino returns NULL. Otherwise insert and returns a new entry. A new 92*e4b17023SJohn Marino string is allocated. */ 93*e4b17023SJohn Marino hashnode 94*e4b17023SJohn Marino ht_lookup (hash_table *table, const unsigned char *str, size_t len, 95*e4b17023SJohn Marino enum ht_lookup_option insert) 96*e4b17023SJohn Marino { 97*e4b17023SJohn Marino return ht_lookup_with_hash (table, str, len, calc_hash (str, len), 98*e4b17023SJohn Marino insert); 99*e4b17023SJohn Marino } 100*e4b17023SJohn Marino 101*e4b17023SJohn Marino hashnode 102*e4b17023SJohn Marino ht_lookup_with_hash (hash_table *table, const unsigned char *str, 103*e4b17023SJohn Marino size_t len, unsigned int hash, 104*e4b17023SJohn Marino enum ht_lookup_option insert) 105*e4b17023SJohn Marino { 106*e4b17023SJohn Marino unsigned int hash2; 107*e4b17023SJohn Marino unsigned int index; 108*e4b17023SJohn Marino unsigned int deleted_index = table->nslots; 109*e4b17023SJohn Marino size_t sizemask; 110*e4b17023SJohn Marino hashnode node; 111*e4b17023SJohn Marino 112*e4b17023SJohn Marino sizemask = table->nslots - 1; 113*e4b17023SJohn Marino index = hash & sizemask; 114*e4b17023SJohn Marino table->searches++; 115*e4b17023SJohn Marino 116*e4b17023SJohn Marino node = table->entries[index]; 117*e4b17023SJohn Marino 118*e4b17023SJohn Marino if (node != NULL) 119*e4b17023SJohn Marino { 120*e4b17023SJohn Marino if (node == DELETED) 121*e4b17023SJohn Marino deleted_index = index; 122*e4b17023SJohn Marino else if (node->hash_value == hash 123*e4b17023SJohn Marino && HT_LEN (node) == (unsigned int) len 124*e4b17023SJohn Marino && !memcmp (HT_STR (node), str, len)) 125*e4b17023SJohn Marino return node; 126*e4b17023SJohn Marino 127*e4b17023SJohn Marino /* hash2 must be odd, so we're guaranteed to visit every possible 128*e4b17023SJohn Marino location in the table during rehashing. */ 129*e4b17023SJohn Marino hash2 = ((hash * 17) & sizemask) | 1; 130*e4b17023SJohn Marino 131*e4b17023SJohn Marino for (;;) 132*e4b17023SJohn Marino { 133*e4b17023SJohn Marino table->collisions++; 134*e4b17023SJohn Marino index = (index + hash2) & sizemask; 135*e4b17023SJohn Marino node = table->entries[index]; 136*e4b17023SJohn Marino if (node == NULL) 137*e4b17023SJohn Marino break; 138*e4b17023SJohn Marino 139*e4b17023SJohn Marino if (node == DELETED) 140*e4b17023SJohn Marino { 141*e4b17023SJohn Marino if (deleted_index != table->nslots) 142*e4b17023SJohn Marino deleted_index = index; 143*e4b17023SJohn Marino } 144*e4b17023SJohn Marino else if (node->hash_value == hash 145*e4b17023SJohn Marino && HT_LEN (node) == (unsigned int) len 146*e4b17023SJohn Marino && !memcmp (HT_STR (node), str, len)) 147*e4b17023SJohn Marino return node; 148*e4b17023SJohn Marino } 149*e4b17023SJohn Marino } 150*e4b17023SJohn Marino 151*e4b17023SJohn Marino if (insert == HT_NO_INSERT) 152*e4b17023SJohn Marino return NULL; 153*e4b17023SJohn Marino 154*e4b17023SJohn Marino /* We prefer to overwrite the first deleted slot we saw. */ 155*e4b17023SJohn Marino if (deleted_index != table->nslots) 156*e4b17023SJohn Marino index = deleted_index; 157*e4b17023SJohn Marino 158*e4b17023SJohn Marino node = (*table->alloc_node) (table); 159*e4b17023SJohn Marino table->entries[index] = node; 160*e4b17023SJohn Marino 161*e4b17023SJohn Marino HT_LEN (node) = (unsigned int) len; 162*e4b17023SJohn Marino node->hash_value = hash; 163*e4b17023SJohn Marino 164*e4b17023SJohn Marino if (table->alloc_subobject) 165*e4b17023SJohn Marino { 166*e4b17023SJohn Marino char *chars = (char *) table->alloc_subobject (len + 1); 167*e4b17023SJohn Marino memcpy (chars, str, len); 168*e4b17023SJohn Marino chars[len] = '\0'; 169*e4b17023SJohn Marino HT_STR (node) = (const unsigned char *) chars; 170*e4b17023SJohn Marino } 171*e4b17023SJohn Marino else 172*e4b17023SJohn Marino HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack, 173*e4b17023SJohn Marino str, len); 174*e4b17023SJohn Marino 175*e4b17023SJohn Marino if (++table->nelements * 4 >= table->nslots * 3) 176*e4b17023SJohn Marino /* Must expand the string table. */ 177*e4b17023SJohn Marino ht_expand (table); 178*e4b17023SJohn Marino 179*e4b17023SJohn Marino return node; 180*e4b17023SJohn Marino } 181*e4b17023SJohn Marino 182*e4b17023SJohn Marino /* Double the size of a hash table, re-hashing existing entries. */ 183*e4b17023SJohn Marino 184*e4b17023SJohn Marino static void 185*e4b17023SJohn Marino ht_expand (hash_table *table) 186*e4b17023SJohn Marino { 187*e4b17023SJohn Marino hashnode *nentries, *p, *limit; 188*e4b17023SJohn Marino unsigned int size, sizemask; 189*e4b17023SJohn Marino 190*e4b17023SJohn Marino size = table->nslots * 2; 191*e4b17023SJohn Marino nentries = XCNEWVEC (hashnode, size); 192*e4b17023SJohn Marino sizemask = size - 1; 193*e4b17023SJohn Marino 194*e4b17023SJohn Marino p = table->entries; 195*e4b17023SJohn Marino limit = p + table->nslots; 196*e4b17023SJohn Marino do 197*e4b17023SJohn Marino if (*p && *p != DELETED) 198*e4b17023SJohn Marino { 199*e4b17023SJohn Marino unsigned int index, hash, hash2; 200*e4b17023SJohn Marino 201*e4b17023SJohn Marino hash = (*p)->hash_value; 202*e4b17023SJohn Marino index = hash & sizemask; 203*e4b17023SJohn Marino 204*e4b17023SJohn Marino if (nentries[index]) 205*e4b17023SJohn Marino { 206*e4b17023SJohn Marino hash2 = ((hash * 17) & sizemask) | 1; 207*e4b17023SJohn Marino do 208*e4b17023SJohn Marino { 209*e4b17023SJohn Marino index = (index + hash2) & sizemask; 210*e4b17023SJohn Marino } 211*e4b17023SJohn Marino while (nentries[index]); 212*e4b17023SJohn Marino } 213*e4b17023SJohn Marino nentries[index] = *p; 214*e4b17023SJohn Marino } 215*e4b17023SJohn Marino while (++p < limit); 216*e4b17023SJohn Marino 217*e4b17023SJohn Marino if (table->entries_owned) 218*e4b17023SJohn Marino free (table->entries); 219*e4b17023SJohn Marino table->entries_owned = true; 220*e4b17023SJohn Marino table->entries = nentries; 221*e4b17023SJohn Marino table->nslots = size; 222*e4b17023SJohn Marino } 223*e4b17023SJohn Marino 224*e4b17023SJohn Marino /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE, 225*e4b17023SJohn Marino the node, and V. */ 226*e4b17023SJohn Marino void 227*e4b17023SJohn Marino ht_forall (hash_table *table, ht_cb cb, const void *v) 228*e4b17023SJohn Marino { 229*e4b17023SJohn Marino hashnode *p, *limit; 230*e4b17023SJohn Marino 231*e4b17023SJohn Marino p = table->entries; 232*e4b17023SJohn Marino limit = p + table->nslots; 233*e4b17023SJohn Marino do 234*e4b17023SJohn Marino if (*p && *p != DELETED) 235*e4b17023SJohn Marino { 236*e4b17023SJohn Marino if ((*cb) (table->pfile, *p, v) == 0) 237*e4b17023SJohn Marino break; 238*e4b17023SJohn Marino } 239*e4b17023SJohn Marino while (++p < limit); 240*e4b17023SJohn Marino } 241*e4b17023SJohn Marino 242*e4b17023SJohn Marino /* Like ht_forall, but a nonzero return from the callback means that 243*e4b17023SJohn Marino the entry should be removed from the table. */ 244*e4b17023SJohn Marino void 245*e4b17023SJohn Marino ht_purge (hash_table *table, ht_cb cb, const void *v) 246*e4b17023SJohn Marino { 247*e4b17023SJohn Marino hashnode *p, *limit; 248*e4b17023SJohn Marino 249*e4b17023SJohn Marino p = table->entries; 250*e4b17023SJohn Marino limit = p + table->nslots; 251*e4b17023SJohn Marino do 252*e4b17023SJohn Marino if (*p && *p != DELETED) 253*e4b17023SJohn Marino { 254*e4b17023SJohn Marino if ((*cb) (table->pfile, *p, v)) 255*e4b17023SJohn Marino *p = DELETED; 256*e4b17023SJohn Marino } 257*e4b17023SJohn Marino while (++p < limit); 258*e4b17023SJohn Marino } 259*e4b17023SJohn Marino 260*e4b17023SJohn Marino /* Restore the hash table. */ 261*e4b17023SJohn Marino void 262*e4b17023SJohn Marino ht_load (hash_table *ht, hashnode *entries, 263*e4b17023SJohn Marino unsigned int nslots, unsigned int nelements, 264*e4b17023SJohn Marino bool own) 265*e4b17023SJohn Marino { 266*e4b17023SJohn Marino if (ht->entries_owned) 267*e4b17023SJohn Marino free (ht->entries); 268*e4b17023SJohn Marino ht->entries = entries; 269*e4b17023SJohn Marino ht->nslots = nslots; 270*e4b17023SJohn Marino ht->nelements = nelements; 271*e4b17023SJohn Marino ht->entries_owned = own; 272*e4b17023SJohn Marino } 273*e4b17023SJohn Marino 274*e4b17023SJohn Marino /* Dump allocation statistics to stderr. */ 275*e4b17023SJohn Marino 276*e4b17023SJohn Marino void 277*e4b17023SJohn Marino ht_dump_statistics (hash_table *table) 278*e4b17023SJohn Marino { 279*e4b17023SJohn Marino size_t nelts, nids, overhead, headers; 280*e4b17023SJohn Marino size_t total_bytes, longest, deleted = 0; 281*e4b17023SJohn Marino double sum_of_squares, exp_len, exp_len2, exp2_len; 282*e4b17023SJohn Marino hashnode *p, *limit; 283*e4b17023SJohn Marino 284*e4b17023SJohn Marino #define SCALE(x) ((unsigned long) ((x) < 1024*10 \ 285*e4b17023SJohn Marino ? (x) \ 286*e4b17023SJohn Marino : ((x) < 1024*1024*10 \ 287*e4b17023SJohn Marino ? (x) / 1024 \ 288*e4b17023SJohn Marino : (x) / (1024*1024)))) 289*e4b17023SJohn Marino #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) 290*e4b17023SJohn Marino 291*e4b17023SJohn Marino total_bytes = longest = sum_of_squares = nids = 0; 292*e4b17023SJohn Marino p = table->entries; 293*e4b17023SJohn Marino limit = p + table->nslots; 294*e4b17023SJohn Marino do 295*e4b17023SJohn Marino if (*p == DELETED) 296*e4b17023SJohn Marino ++deleted; 297*e4b17023SJohn Marino else if (*p) 298*e4b17023SJohn Marino { 299*e4b17023SJohn Marino size_t n = HT_LEN (*p); 300*e4b17023SJohn Marino 301*e4b17023SJohn Marino total_bytes += n; 302*e4b17023SJohn Marino sum_of_squares += (double) n * n; 303*e4b17023SJohn Marino if (n > longest) 304*e4b17023SJohn Marino longest = n; 305*e4b17023SJohn Marino nids++; 306*e4b17023SJohn Marino } 307*e4b17023SJohn Marino while (++p < limit); 308*e4b17023SJohn Marino 309*e4b17023SJohn Marino nelts = table->nelements; 310*e4b17023SJohn Marino overhead = obstack_memory_used (&table->stack) - total_bytes; 311*e4b17023SJohn Marino headers = table->nslots * sizeof (hashnode); 312*e4b17023SJohn Marino 313*e4b17023SJohn Marino fprintf (stderr, "\nString pool\nentries\t\t%lu\n", 314*e4b17023SJohn Marino (unsigned long) nelts); 315*e4b17023SJohn Marino fprintf (stderr, "identifiers\t%lu (%.2f%%)\n", 316*e4b17023SJohn Marino (unsigned long) nids, nids * 100.0 / nelts); 317*e4b17023SJohn Marino fprintf (stderr, "slots\t\t%lu\n", 318*e4b17023SJohn Marino (unsigned long) table->nslots); 319*e4b17023SJohn Marino fprintf (stderr, "deleted\t\t%lu\n", 320*e4b17023SJohn Marino (unsigned long) deleted); 321*e4b17023SJohn Marino fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n", 322*e4b17023SJohn Marino SCALE (total_bytes), LABEL (total_bytes), 323*e4b17023SJohn Marino SCALE (overhead), LABEL (overhead)); 324*e4b17023SJohn Marino fprintf (stderr, "table size\t%lu%c\n", 325*e4b17023SJohn Marino SCALE (headers), LABEL (headers)); 326*e4b17023SJohn Marino 327*e4b17023SJohn Marino exp_len = (double)total_bytes / (double)nelts; 328*e4b17023SJohn Marino exp2_len = exp_len * exp_len; 329*e4b17023SJohn Marino exp_len2 = (double) sum_of_squares / (double) nelts; 330*e4b17023SJohn Marino 331*e4b17023SJohn Marino fprintf (stderr, "coll/search\t%.4f\n", 332*e4b17023SJohn Marino (double) table->collisions / (double) table->searches); 333*e4b17023SJohn Marino fprintf (stderr, "ins/search\t%.4f\n", 334*e4b17023SJohn Marino (double) nelts / (double) table->searches); 335*e4b17023SJohn Marino fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n", 336*e4b17023SJohn Marino exp_len, approx_sqrt (exp_len2 - exp2_len)); 337*e4b17023SJohn Marino fprintf (stderr, "longest entry\t%lu\n", 338*e4b17023SJohn Marino (unsigned long) longest); 339*e4b17023SJohn Marino #undef SCALE 340*e4b17023SJohn Marino #undef LABEL 341*e4b17023SJohn Marino } 342*e4b17023SJohn Marino 343*e4b17023SJohn Marino /* Return the approximate positive square root of a number N. This is for 344*e4b17023SJohn Marino statistical reports, not code generation. */ 345*e4b17023SJohn Marino static double 346*e4b17023SJohn Marino approx_sqrt (double x) 347*e4b17023SJohn Marino { 348*e4b17023SJohn Marino double s, d; 349*e4b17023SJohn Marino 350*e4b17023SJohn Marino if (x < 0) 351*e4b17023SJohn Marino abort (); 352*e4b17023SJohn Marino if (x == 0) 353*e4b17023SJohn Marino return 0; 354*e4b17023SJohn Marino 355*e4b17023SJohn Marino s = x; 356*e4b17023SJohn Marino do 357*e4b17023SJohn Marino { 358*e4b17023SJohn Marino d = (s * s - x) / (2 * s); 359*e4b17023SJohn Marino s -= d; 360*e4b17023SJohn Marino } 361*e4b17023SJohn Marino while (d > .0001); 362*e4b17023SJohn Marino return s; 363*e4b17023SJohn Marino } 364