1 /* $NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Joerg Sonnenberger. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #if HAVE_NBTOOL_CONFIG_H 35 #include "nbtool_config.h" 36 #endif 37 38 #include <sys/cdefs.h> 39 __RCSID("$NetBSD: cdbw.c,v 1.5 2012/07/21 22:49:37 joerg Exp $"); 40 41 #include "namespace.h" 42 43 #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H 44 #include <sys/endian.h> 45 #endif 46 #include <sys/queue.h> 47 #include <cdbw.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 52 #ifdef __weak_alias 53 __weak_alias(cdbw_close,_cdbw_close) 54 __weak_alias(cdbw_open,_cdbw_open) 55 __weak_alias(cdbw_output,_cdbw_output) 56 __weak_alias(cdbw_put,_cdbw_put) 57 __weak_alias(cdbw_put_data,_cdbw_put_data) 58 __weak_alias(cdbw_put_key,_cdbw_put_key) 59 #endif 60 61 struct key_hash { 62 SLIST_ENTRY(key_hash) link; 63 uint32_t hashes[3]; 64 uint32_t idx; 65 void *key; 66 size_t keylen; 67 }; 68 69 SLIST_HEAD(key_hash_head, key_hash); 70 71 struct cdbw { 72 size_t data_counter; 73 size_t data_allocated; 74 size_t data_size; 75 size_t *data_len; 76 void **data_ptr; 77 78 size_t hash_size; 79 struct key_hash_head *hash; 80 size_t key_counter; 81 }; 82 83 /* Max. data counter that allows the index size to be 32bit. */ 84 static const uint32_t max_data_counter = 0xccccccccU; 85 86 struct cdbw * 87 cdbw_open(void) 88 { 89 struct cdbw *cdbw; 90 size_t i; 91 92 cdbw = calloc(sizeof(*cdbw), 1); 93 if (cdbw == NULL) 94 return NULL; 95 96 cdbw->hash_size = 1024; 97 cdbw->hash = calloc(cdbw->hash_size, sizeof(*cdbw->hash)); 98 if (cdbw->hash == NULL) { 99 free(cdbw); 100 return NULL; 101 } 102 103 for (i = 0; i < cdbw->hash_size; ++i) 104 SLIST_INIT(cdbw->hash + i); 105 106 return cdbw; 107 } 108 109 int 110 cdbw_put(struct cdbw *cdbw, const void *key, size_t keylen, 111 const void *data, size_t datalen) 112 { 113 uint32_t idx; 114 int rv; 115 116 rv = cdbw_put_data(cdbw, data, datalen, &idx); 117 if (rv) 118 return rv; 119 rv = cdbw_put_key(cdbw, key, keylen, idx); 120 if (rv) { 121 --cdbw->data_counter; 122 free(cdbw->data_ptr[cdbw->data_counter]); 123 cdbw->data_size -= datalen; 124 return rv; 125 } 126 return 0; 127 } 128 129 int 130 cdbw_put_data(struct cdbw *cdbw, const void *data, size_t datalen, 131 uint32_t *idx) 132 { 133 134 if (cdbw->data_counter == max_data_counter) 135 return -1; 136 137 if (cdbw->data_size + datalen < cdbw->data_size || 138 cdbw->data_size + datalen > 0xffffffffU) 139 return -1; /* Overflow */ 140 141 if (cdbw->data_allocated == cdbw->data_counter) { 142 void **new_data_ptr; 143 size_t *new_data_len; 144 size_t new_allocated; 145 146 if (cdbw->data_allocated == 0) 147 new_allocated = 256; 148 else 149 new_allocated = cdbw->data_allocated * 2; 150 151 new_data_ptr = realloc(cdbw->data_ptr, 152 sizeof(*cdbw->data_ptr) * new_allocated); 153 if (new_data_ptr == NULL) 154 return -1; 155 cdbw->data_ptr = new_data_ptr; 156 157 new_data_len = realloc(cdbw->data_len, 158 sizeof(*cdbw->data_len) * new_allocated); 159 if (new_data_len == NULL) 160 return -1; 161 cdbw->data_len = new_data_len; 162 163 cdbw->data_allocated = new_allocated; 164 } 165 166 cdbw->data_ptr[cdbw->data_counter] = malloc(datalen); 167 if (cdbw->data_ptr[cdbw->data_counter] == NULL) 168 return -1; 169 memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen); 170 cdbw->data_len[cdbw->data_counter] = datalen; 171 cdbw->data_size += datalen; 172 *idx = cdbw->data_counter++; 173 return 0; 174 } 175 176 int 177 cdbw_put_key(struct cdbw *cdbw, const void *key, size_t keylen, uint32_t idx) 178 { 179 uint32_t hashes[3]; 180 struct key_hash_head *head, *head2, *new_head; 181 struct key_hash *key_hash; 182 size_t new_hash_size, i; 183 184 if (idx >= cdbw->data_counter || 185 cdbw->key_counter == max_data_counter) 186 return -1; 187 188 mi_vector_hash(key, keylen, 0, hashes); 189 190 head = cdbw->hash + (hashes[0] & (cdbw->hash_size - 1)); 191 SLIST_FOREACH(key_hash, head, link) { 192 if (key_hash->keylen != keylen) 193 continue; 194 if (key_hash->hashes[0] != hashes[0]) 195 continue; 196 if (key_hash->hashes[1] != hashes[1]) 197 continue; 198 if (key_hash->hashes[2] != hashes[2]) 199 continue; 200 if (memcmp(key, key_hash->key, keylen)) 201 continue; 202 return -1; 203 } 204 key_hash = malloc(sizeof(*key_hash)); 205 if (key_hash == NULL) 206 return -1; 207 key_hash->key = malloc(keylen); 208 if (key_hash->key == NULL) { 209 free(key_hash); 210 return -1; 211 } 212 memcpy(key_hash->key, key, keylen); 213 key_hash->hashes[0] = hashes[0]; 214 key_hash->hashes[1] = hashes[1]; 215 key_hash->hashes[2] = hashes[2]; 216 key_hash->keylen = keylen; 217 key_hash->idx = idx; 218 SLIST_INSERT_HEAD(head, key_hash, link); 219 ++cdbw->key_counter; 220 221 if (cdbw->key_counter <= cdbw->hash_size) 222 return 0; 223 224 /* Try to resize the hash table, but ignore errors. */ 225 new_hash_size = cdbw->hash_size * 2; 226 new_head = calloc(sizeof(*new_head), new_hash_size); 227 if (new_head == NULL) 228 return 0; 229 230 head = &cdbw->hash[hashes[0] & (cdbw->hash_size - 1)]; 231 for (i = 0; i < new_hash_size; ++i) 232 SLIST_INIT(new_head + i); 233 234 for (i = 0; i < cdbw->hash_size; ++i) { 235 head = cdbw->hash + i; 236 237 while ((key_hash = SLIST_FIRST(head)) != NULL) { 238 SLIST_REMOVE_HEAD(head, link); 239 head2 = new_head + 240 (key_hash->hashes[0] & (new_hash_size - 1)); 241 SLIST_INSERT_HEAD(head2, key_hash, link); 242 } 243 } 244 free(cdbw->hash); 245 cdbw->hash_size = new_hash_size; 246 cdbw->hash = new_head; 247 248 return 0; 249 } 250 251 void 252 cdbw_close(struct cdbw *cdbw) 253 { 254 struct key_hash_head *head; 255 struct key_hash *key_hash; 256 size_t i; 257 258 for (i = 0; i < cdbw->hash_size; ++i) { 259 head = cdbw->hash + i; 260 while ((key_hash = SLIST_FIRST(head)) != NULL) { 261 SLIST_REMOVE_HEAD(head, link); 262 free(key_hash->key); 263 free(key_hash); 264 } 265 } 266 267 for (i = 0; i < cdbw->data_counter; ++i) 268 free(cdbw->data_ptr[i]); 269 free(cdbw->data_ptr); 270 free(cdbw->data_len); 271 free(cdbw->hash); 272 free(cdbw); 273 } 274 275 uint32_t 276 cdbw_stable_seeder(void) 277 { 278 return 0; 279 } 280 281 #define unused 0xffffffffU 282 283 struct vertex { 284 uint32_t l_edge, m_edge, r_edge; 285 }; 286 287 struct edge { 288 uint32_t idx; 289 290 uint32_t left, middle, right; 291 uint32_t l_prev, m_prev, l_next; 292 uint32_t r_prev, m_next, r_next; 293 }; 294 295 struct state { 296 uint32_t data_entries; 297 uint32_t entries; 298 uint32_t keys; 299 uint32_t seed; 300 301 uint32_t *g; 302 char *visited; 303 304 struct vertex *verts; 305 struct edge *edges; 306 uint32_t output_index; 307 uint32_t *output_order; 308 }; 309 310 static void 311 remove_vertex(struct state *state, struct vertex *v) 312 { 313 struct edge *e; 314 struct vertex *vl, *vm, *vr; 315 316 if (v->l_edge != unused && v->m_edge != unused) 317 return; 318 if (v->l_edge != unused && v->r_edge != unused) 319 return; 320 if (v->m_edge != unused && v->r_edge != unused) 321 return; 322 if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused) 323 return; 324 325 if (v->l_edge != unused) { 326 e = &state->edges[v->l_edge]; 327 if (e->l_next != unused) 328 return; 329 } else if (v->m_edge != unused) { 330 e = &state->edges[v->m_edge]; 331 if (e->m_next != unused) 332 return; 333 } else { 334 if (v->r_edge == unused) 335 abort(); 336 e = &state->edges[v->r_edge]; 337 if (e->r_next != unused) 338 return; 339 } 340 341 state->output_order[--state->output_index] = e - state->edges; 342 343 vl = &state->verts[e->left]; 344 vm = &state->verts[e->middle]; 345 vr = &state->verts[e->right]; 346 347 if (e->l_prev == unused) 348 vl->l_edge = e->l_next; 349 else 350 state->edges[e->l_prev].l_next = e->l_next; 351 if (e->l_next != unused) 352 state->edges[e->l_next].l_prev = e->l_prev; 353 354 if (e->m_prev == unused) 355 vm->m_edge = e->m_next; 356 else 357 state->edges[e->m_prev].m_next = e->m_next; 358 if (e->m_next != unused) 359 state->edges[e->m_next].m_prev = e->m_prev; 360 361 if (e->r_prev == unused) 362 vr->r_edge = e->r_next; 363 else 364 state->edges[e->r_prev].r_next = e->r_next; 365 if (e->r_next != unused) 366 state->edges[e->r_next].r_prev = e->r_prev; 367 } 368 369 static int 370 build_graph(struct cdbw *cdbw, struct state *state) 371 { 372 struct key_hash_head *head; 373 struct key_hash *key_hash; 374 struct vertex *v; 375 struct edge *e; 376 uint32_t hashes[3]; 377 size_t i; 378 379 e = state->edges; 380 for (i = 0; i < cdbw->hash_size; ++i) { 381 head = &cdbw->hash[i]; 382 SLIST_FOREACH(key_hash, head, link) { 383 e->idx = key_hash->idx; 384 mi_vector_hash(key_hash->key, key_hash->keylen, 385 state->seed, hashes); 386 e->left = hashes[0] % state->entries; 387 e->middle = hashes[1] % state->entries; 388 e->right = hashes[2] % state->entries; 389 390 if (e->left == e->middle) 391 return -1; 392 if (e->left == e->right) 393 return -1; 394 if (e->middle == e->right) 395 return -1; 396 397 ++e; 398 } 399 } 400 401 for (i = 0; i < state->entries; ++i) { 402 v = state->verts + i; 403 v->l_edge = unused; 404 v->m_edge = unused; 405 v->r_edge = unused; 406 } 407 408 for (i = 0; i < state->keys; ++i) { 409 e = state->edges + i; 410 v = state->verts + e->left; 411 if (v->l_edge != unused) 412 state->edges[v->l_edge].l_prev = i; 413 e->l_next = v->l_edge; 414 e->l_prev = unused; 415 v->l_edge = i; 416 417 v = &state->verts[e->middle]; 418 if (v->m_edge != unused) 419 state->edges[v->m_edge].m_prev = i; 420 e->m_next = v->m_edge; 421 e->m_prev = unused; 422 v->m_edge = i; 423 424 v = &state->verts[e->right]; 425 if (v->r_edge != unused) 426 state->edges[v->r_edge].r_prev = i; 427 e->r_next = v->r_edge; 428 e->r_prev = unused; 429 v->r_edge = i; 430 } 431 432 state->output_index = state->keys; 433 for (i = 0; i < state->entries; ++i) 434 remove_vertex(state, state->verts + i); 435 436 i = state->keys; 437 while (i > 0 && i > state->output_index) { 438 --i; 439 e = state->edges + state->output_order[i]; 440 remove_vertex(state, state->verts + e->left); 441 remove_vertex(state, state->verts + e->middle); 442 remove_vertex(state, state->verts + e->right); 443 } 444 445 return state->output_index == 0 ? 0 : -1; 446 } 447 448 static void 449 assign_nodes(struct state *state) 450 { 451 struct edge *e; 452 size_t i; 453 454 for (i = 0; i < state->keys; ++i) { 455 e = state->edges + state->output_order[i]; 456 457 if (!state->visited[e->left]) { 458 state->g[e->left] = 459 (2 * state->data_entries + e->idx 460 - state->g[e->middle] - state->g[e->right]) 461 % state->data_entries; 462 } else if (!state->visited[e->middle]) { 463 state->g[e->middle] = 464 (2 * state->data_entries + e->idx 465 - state->g[e->left] - state->g[e->right]) 466 % state->data_entries; 467 } else { 468 state->g[e->right] = 469 (2 * state->data_entries + e->idx 470 - state->g[e->left] - state->g[e->middle]) 471 % state->data_entries; 472 } 473 state->visited[e->left] = 1; 474 state->visited[e->middle] = 1; 475 state->visited[e->right] = 1; 476 } 477 } 478 479 static size_t 480 compute_size(uint32_t size) 481 { 482 if (size < 0x100) 483 return 1; 484 else if (size < 0x10000) 485 return 2; 486 else 487 return 4; 488 } 489 490 #define COND_FLUSH_BUFFER(n) do { \ 491 if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \ 492 ret = write(fd, buf, cur_pos); \ 493 if (ret == -1 || (size_t)ret != cur_pos) \ 494 return -1; \ 495 cur_pos = 0; \ 496 } \ 497 } while (/* CONSTCOND */ 0) 498 499 static int 500 print_hash(struct cdbw *cdbw, struct state *state, int fd, const char *descr) 501 { 502 uint32_t data_size; 503 uint8_t buf[90000]; 504 size_t i, size, size2, cur_pos; 505 ssize_t ret; 506 507 memcpy(buf, "NBCDB\n\0", 7); 508 buf[7] = 1; 509 strncpy((char *)buf + 8, descr, 16); 510 le32enc(buf + 24, cdbw->data_size); 511 le32enc(buf + 28, cdbw->data_counter); 512 le32enc(buf + 32, state->entries); 513 le32enc(buf + 36, state->seed); 514 cur_pos = 40; 515 516 size = compute_size(state->entries); 517 for (i = 0; i < state->entries; ++i) { 518 COND_FLUSH_BUFFER(4); 519 le32enc(buf + cur_pos, state->g[i]); 520 cur_pos += size; 521 } 522 size2 = compute_size(cdbw->data_size); 523 size = size * state->entries % size2; 524 if (size != 0) { 525 size = size2 - size; 526 COND_FLUSH_BUFFER(4); 527 le32enc(buf + cur_pos, 0); 528 cur_pos += size; 529 } 530 for (data_size = 0, i = 0; i < cdbw->data_counter; ++i) { 531 COND_FLUSH_BUFFER(4); 532 le32enc(buf + cur_pos, data_size); 533 cur_pos += size2; 534 data_size += cdbw->data_len[i]; 535 } 536 COND_FLUSH_BUFFER(4); 537 le32enc(buf + cur_pos, data_size); 538 cur_pos += size2; 539 540 for (i = 0; i < cdbw->data_counter; ++i) { 541 COND_FLUSH_BUFFER(cdbw->data_len[i]); 542 if (cdbw->data_len[i] < sizeof(buf)) { 543 memcpy(buf + cur_pos, cdbw->data_ptr[i], 544 cdbw->data_len[i]); 545 cur_pos += cdbw->data_len[i]; 546 } else { 547 ret = write(fd, cdbw->data_ptr[i], cdbw->data_len[i]); 548 if (ret == -1 || (size_t)ret != cdbw->data_len[i]) 549 return -1; 550 } 551 } 552 if (cur_pos != 0) { 553 ret = write(fd, buf, cur_pos); 554 if (ret == -1 || (size_t)ret != cur_pos) 555 return -1; 556 } 557 return 0; 558 } 559 560 int 561 cdbw_output(struct cdbw *cdbw, int fd, const char descr[16], 562 uint32_t (*seedgen)(void)) 563 { 564 struct state state; 565 int rv; 566 567 if (cdbw->data_counter == 0 || cdbw->key_counter == 0) { 568 state.entries = 0; 569 state.seed = 0; 570 print_hash(cdbw, &state, fd, descr); 571 return 0; 572 } 573 574 #if HAVE_NBTOOL_CONFIG_H 575 if (seedgen == NULL) 576 seedgen = cdbw_stable_seeder; 577 #else 578 if (seedgen == NULL) 579 seedgen = arc4random; 580 #endif 581 582 rv = 0; 583 584 state.keys = cdbw->key_counter; 585 state.data_entries = cdbw->data_counter; 586 state.entries = state.keys + (state.keys + 3) / 4; 587 if (state.entries < 10) 588 state.entries = 10; 589 590 #define NALLOC(var, n) var = calloc(sizeof(*var), n) 591 NALLOC(state.g, state.entries); 592 NALLOC(state.visited, state.entries); 593 NALLOC(state.verts, state.entries); 594 NALLOC(state.edges, state.entries); 595 NALLOC(state.output_order, state.keys); 596 #undef NALLOC 597 598 if (state.g == NULL || state.visited == NULL || state.verts == NULL || 599 state.edges == NULL || state.output_order == NULL) { 600 rv = -1; 601 goto release; 602 } 603 604 state.seed = 0; 605 do { 606 if (seedgen == cdbw_stable_seeder) 607 ++state.seed; 608 else 609 state.seed = (*seedgen)(); 610 } while (build_graph(cdbw, &state)); 611 612 assign_nodes(&state); 613 rv = print_hash(cdbw, &state, fd, descr); 614 615 release: 616 free(state.g); 617 free(state.visited); 618 free(state.verts); 619 free(state.edges); 620 free(state.output_order); 621 622 return rv; 623 } 624