1 /* udb.c - u(micro) data base. 2 * By W.C.A. Wijngaards 3 * Copyright 2010, NLnet Labs. 4 * BSD, see LICENSE. 5 */ 6 #include "config.h" 7 #include "udb.h" 8 #include <string.h> 9 #include <errno.h> 10 #include <stdio.h> 11 #include <unistd.h> 12 #include <assert.h> 13 #include "lookup3.h" 14 #include "util.h" 15 16 /* mmap and friends */ 17 #include <sys/types.h> 18 #include <sys/stat.h> 19 #include <fcntl.h> 20 #include <sys/mman.h> 21 22 /* for systems without, portable definition, failed-1 and async is a flag */ 23 #ifndef MAP_FAILED 24 #define MAP_FAILED ((void*)-1) 25 #endif 26 #ifndef MS_SYNC 27 #define MS_SYNC 0 28 #endif 29 30 /** move and fixup xl segment */ 31 static void move_xl_segment(void* base, udb_base* udb, udb_void xl, 32 udb_void n, uint64_t sz, uint64_t startseg); 33 /** attempt to compact the data and move free space to the end */ 34 static int udb_alloc_compact(void* base, udb_alloc* alloc); 35 36 /** convert pointer to the data part to a pointer to the base of the chunk */ 37 static udb_void 38 chunk_from_dataptr(udb_void data) 39 { 40 /* we use that sizeof(udb_chunk_d) != sizeof(udb_xl_chunk_d) and 41 * that xl_chunk_d is aligned on x**1024 boundaries. */ 42 udb_void xl = data - sizeof(udb_xl_chunk_d); 43 if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0) 44 return xl; 45 return data - sizeof(udb_chunk_d); 46 } 47 48 udb_void chunk_from_dataptr_ext(udb_void data) { 49 return chunk_from_dataptr(data); 50 } 51 52 #ifndef NDEBUG 53 /** read last octet from a chunk */ 54 static uint8_t 55 chunk_get_last(void* base, udb_void chunk, int exp) 56 { 57 return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1)); 58 } 59 #endif 60 61 /** write last octet of a chunk */ 62 static void 63 chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value) 64 { 65 *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1)) = value; 66 } 67 68 /** create udb_base from a file descriptor (must be at start of file) */ 69 udb_base* 70 udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc, 71 void* arg) 72 { 73 uint64_t m; 74 udb_glob_d g; 75 ssize_t r; 76 udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb)); 77 if(!udb) { 78 log_msg(LOG_ERR, "out of memory"); 79 close(fd); 80 return NULL; 81 } 82 udb->fname = strdup(fname); 83 if(!udb->fname) { 84 log_msg(LOG_ERR, "out of memory"); 85 free(udb); 86 close(fd); 87 return NULL; 88 } 89 udb->walkfunc = walkfunc; 90 udb->walkarg = arg; 91 udb->fd = fd; 92 udb->ram_size = 1024; 93 udb->ram_mask = (int)udb->ram_size - 1; 94 udb->ram_hash = (udb_ptr**)xalloc_zero(sizeof(udb_ptr*)*udb->ram_size); 95 if(!udb->ram_hash) { 96 free(udb->fname); 97 free(udb); 98 log_msg(LOG_ERR, "out of memory"); 99 close(fd); 100 return NULL; 101 } 102 103 /* read magic */ 104 if((r=read(fd, &m, sizeof(m))) == -1) { 105 log_msg(LOG_ERR, "%s: %s", fname, strerror(errno)); 106 goto fail; 107 } else if(r != (ssize_t)sizeof(m)) { 108 log_msg(LOG_ERR, "%s: file too short", fname); 109 goto fail; 110 } 111 /* TODO : what if bigendian and littleendian file, see magic */ 112 if(m != UDB_MAGIC) { 113 log_msg(LOG_ERR, "%s: wrong type of file", fname); 114 goto fail; 115 } 116 /* read header */ 117 if((r=read(fd, &g, sizeof(g))) == -1) { 118 log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno)); 119 goto fail; 120 } else if(r != (ssize_t)sizeof(g)) { 121 log_msg(LOG_ERR, "%s: file too short", fname); 122 goto fail; 123 } 124 if(g.version != 0) { 125 log_msg(LOG_ERR, "%s: unknown file version %d", fname, 126 (int)g.version); 127 goto fail; 128 } 129 if(g.hsize < UDB_HEADER_SIZE) { 130 log_msg(LOG_ERR, "%s: header size too small %d", fname, 131 (int)g.hsize); 132 goto fail; 133 } 134 if(g.hsize > UDB_HEADER_SIZE) { 135 log_msg(LOG_WARNING, "%s: header size too large %d", fname, 136 (int)g.hsize); 137 log_msg(LOG_WARNING, "attempting to continue..."); 138 } 139 if(g.clean_close != 0) { 140 log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname, 141 (int)g.clean_close); 142 log_msg(LOG_WARNING, "attempting to continue..."); 143 } 144 /* TODO check if too large (>4g on 32bit); mmap-usage would fail */ 145 146 /* mmap it */ 147 if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) { 148 log_msg(LOG_ERR, "%s: file too short", fname); 149 goto fail; 150 } 151 udb->base_size = (size_t)g.fsize; 152 /* note the size_t casts must be there for portability, on some 153 * systems the layout of memory is otherwise broken. */ 154 udb->base = mmap(NULL, (size_t)udb->base_size, 155 (int)PROT_READ|PROT_WRITE, (int)MAP_SHARED, 156 (int)udb->fd, (off_t)0); 157 if(udb->base == MAP_FAILED) { 158 udb->base = NULL; 159 log_msg(LOG_ERR, "mmap(size %u) error: %s", 160 (unsigned)udb->base_size, strerror(errno)); 161 fail: 162 close(fd); 163 free(udb->fname); 164 free(udb->ram_hash); 165 free(udb); 166 return NULL; 167 } 168 169 /* init completion */ 170 udb->glob_data = (udb_glob_d*)(udb->base+sizeof(uint64_t)); 171 r = 0; 172 if(udb->glob_data->dirty_alloc != udb_dirty_clean) 173 r = 1; 174 udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)( 175 (void*)udb->glob_data+sizeof(*udb->glob_data))); 176 if(!udb->alloc) { 177 log_msg(LOG_ERR, "out of memory"); 178 udb_base_free(udb); 179 return NULL; 180 } 181 if(r) { 182 /* and compact now, or resume compacting */ 183 udb_alloc_compact(udb, udb->alloc); 184 udb_base_sync(udb, 1); 185 } 186 187 return udb; 188 } 189 190 udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc, 191 void* arg) 192 { 193 int fd = open(fname, O_RDWR); 194 if(fd == -1) { 195 log_msg(LOG_ERR, "%s: %s", fname, strerror(errno)); 196 return NULL; 197 } 198 return udb_base_create_fd(fname, fd, walkfunc, arg); 199 } 200 201 /** init new udb_global structure */ 202 static void udb_glob_init_new(udb_glob_d* g) 203 { 204 memset(g, 0, sizeof(*g)); 205 g->hsize = UDB_HEADER_SIZE; 206 g->fsize = UDB_HEADER_SIZE; 207 } 208 209 /** write data to file and check result */ 210 static int 211 write_fdata(const char* fname, int fd, void* data, size_t len) 212 { 213 ssize_t w; 214 if((w=write(fd, data, len)) == -1) { 215 log_msg(LOG_ERR, "%s: %s", fname, strerror(errno)); 216 close(fd); 217 return 0; 218 } else if(w != (ssize_t)len) { 219 log_msg(LOG_ERR, "%s: short write (disk full?)", fname); 220 close(fd); 221 return 0; 222 } 223 return 1; 224 } 225 226 udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc, 227 void* arg) 228 { 229 uint64_t m; 230 udb_glob_d g; 231 udb_alloc_d a; 232 uint64_t endsize = UDB_HEADER_SIZE; 233 uint64_t endexp = 0; 234 int fd = open(fname, O_CREAT|O_RDWR, 0600); 235 if(fd == -1) { 236 log_msg(LOG_ERR, "%s: %s", fname, strerror(errno)); 237 return NULL; 238 } 239 m = UDB_MAGIC; 240 udb_glob_init_new(&g); 241 udb_alloc_init_new(&a); 242 243 /* write new data to file (closes fd on error) */ 244 if(!write_fdata(fname, fd, &m, sizeof(m))) 245 return NULL; 246 if(!write_fdata(fname, fd, &g, sizeof(g))) 247 return NULL; 248 if(!write_fdata(fname, fd, &a, sizeof(a))) 249 return NULL; 250 if(!write_fdata(fname, fd, &endsize, sizeof(endsize))) 251 return NULL; 252 if(!write_fdata(fname, fd, &endexp, sizeof(endexp))) 253 return NULL; 254 /* rewind to start */ 255 if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { 256 log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno)); 257 close(fd); 258 return NULL; 259 } 260 return udb_base_create_fd(fname, fd, walkfunc, arg); 261 } 262 263 /** shrink the udb base if it has unused space at the end */ 264 static void 265 udb_base_shrink(udb_base* udb, uint64_t nsize) 266 { 267 udb->glob_data->dirty_alloc = udb_dirty_fsize; 268 udb->glob_data->fsize = nsize; 269 /* sync, does not *seem* to be required on Linux, but it is 270 certainly required on OpenBSD. Otherwise changed data is lost. */ 271 msync(udb->base, udb->base_size, MS_ASYNC); 272 if(ftruncate(udb->fd, (off_t)nsize) != 0) { 273 log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname, 274 (unsigned)nsize, strerror(errno)); 275 } 276 udb->glob_data->dirty_alloc = udb_dirty_clean; 277 } 278 279 void udb_base_close(udb_base* udb) 280 { 281 if(!udb) 282 return; 283 if(udb->fd != -1 && udb->base && udb->alloc) { 284 uint64_t nsize = udb->alloc->disk->nextgrow; 285 if(nsize < udb->base_size) 286 udb_base_shrink(udb, nsize); 287 } 288 if(udb->fd != -1) { 289 close(udb->fd); 290 udb->fd = -1; 291 } 292 if(udb->base) { 293 if(munmap(udb->base, udb->base_size) == -1) { 294 log_msg(LOG_ERR, "munmap: %s", strerror(errno)); 295 } 296 udb->base = NULL; 297 } 298 } 299 300 void udb_base_free(udb_base* udb) 301 { 302 if(!udb) 303 return; 304 udb_base_close(udb); 305 udb_alloc_delete(udb->alloc); 306 free(udb->ram_hash); 307 free(udb->fname); 308 free(udb); 309 } 310 311 void udb_base_free_keep_mmap(udb_base* udb) 312 { 313 if(!udb) return; 314 if(udb->fd != -1) { 315 close(udb->fd); 316 udb->fd = -1; 317 } 318 udb->base = NULL; 319 udb_alloc_delete(udb->alloc); 320 free(udb->ram_hash); 321 free(udb->fname); 322 free(udb); 323 } 324 325 void udb_base_sync(udb_base* udb, int wait) 326 { 327 if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) { 328 log_msg(LOG_ERR, "msync(%s) error %s", 329 udb->fname, strerror(errno)); 330 } 331 } 332 333 /** hash a chunk pointer */ 334 static uint32_t 335 chunk_hash_ptr(udb_void p) 336 { 337 /* put p into an array of uint32 */ 338 uint32_t h[sizeof(p)/sizeof(uint32_t)]; 339 memcpy(&h, &p, sizeof(h)); 340 return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763); 341 } 342 343 /** check that the given pointer is on the bucket for the given offset */ 344 int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to) 345 { 346 uint32_t i = chunk_hash_ptr(to) & udb->ram_mask; 347 udb_ptr* p; 348 assert((size_t)i < udb->ram_size); 349 for(p = udb->ram_hash[i]; p; p=p->next) { 350 if(p == ptr) 351 return 1; 352 } 353 return 0; 354 } 355 356 /** grow the ram array */ 357 static void 358 grow_ram_hash(udb_base* udb, udb_ptr** newhash) 359 { 360 size_t i; 361 size_t osize= udb->ram_size; 362 udb_ptr* p, *np; 363 udb_ptr** oldhash = udb->ram_hash; 364 udb->ram_size *= 2; 365 udb->ram_mask <<= 1; 366 udb->ram_mask |= 1; 367 udb->ram_hash = newhash; 368 /* have to link in every element in the old list into the new list*/ 369 for(i=0; i<osize; i++) { 370 p = oldhash[i]; 371 while(p) { 372 np = p->next; 373 /* link into newhash */ 374 p->prev=NULL; 375 p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask]; 376 if(p->next) p->next->prev = p; 377 /* go to next element of oldhash */ 378 p = np; 379 } 380 } 381 free(oldhash); 382 } 383 384 void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr) 385 { 386 uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask; 387 assert((size_t)i < udb->ram_size); 388 #ifdef UDB_CHECK 389 assert(udb_valid_dataptr(udb, ptr->data)); /* must be to whole chunk*/ 390 #endif 391 udb->ram_num++; 392 if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0xefffffff) { 393 /* grow the array, if allocation succeeds */ 394 udb_ptr** newram = (udb_ptr**)xalloc_zero(sizeof(udb_ptr*)* 395 udb->ram_size*2); 396 if(newram) { 397 grow_ram_hash(udb, newram); 398 } 399 } 400 ptr->prev = NULL; 401 ptr->next = udb->ram_hash[i]; 402 udb->ram_hash[i] = ptr; 403 if(ptr->next) 404 ptr->next->prev = ptr; 405 } 406 407 void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr) 408 { 409 assert(ptr->data); 410 #ifdef UDB_CHECK 411 assert(udb_valid_dataptr(udb, ptr->data)); /* ptr must be inited */ 412 assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data)); 413 #endif 414 udb->ram_num--; 415 if(ptr->next) 416 ptr->next->prev = ptr->prev; 417 if(ptr->prev) 418 ptr->prev->next = ptr->next; 419 else { 420 uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask; 421 assert((size_t)i < udb->ram_size); 422 udb->ram_hash[i] = ptr->next; 423 } 424 } 425 426 /** change a set of ram ptrs to a new value */ 427 static void 428 udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd) 429 { 430 uint32_t io = chunk_hash_ptr(old) & udb->ram_mask; 431 udb_ptr* p, *np; 432 /* edit them and move them into the new position */ 433 p = udb->ram_hash[io]; 434 while(p) { 435 np = p->next; 436 if(p->data == old) { 437 udb_base_unlink_ptr(udb, p); 438 p->data = newd; 439 udb_base_link_ptr(udb, p); 440 } 441 p = np; 442 } 443 } 444 445 udb_rel_ptr* udb_base_get_userdata(udb_base* udb) 446 { 447 return &udb->glob_data->user_global; 448 } 449 450 void udb_base_set_userdata(udb_base* udb, udb_void user) 451 { 452 #ifdef UDB_CHECK 453 if(user) { assert(udb_valid_dataptr(udb, user)); } 454 #endif 455 udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user); 456 } 457 458 void udb_base_set_userflags(udb_base* udb, uint8_t v) 459 { 460 udb->glob_data->userflags = v; 461 } 462 463 uint8_t udb_base_get_userflags(udb_base* udb) 464 { 465 return udb->glob_data->userflags; 466 } 467 468 /** re-mmap the udb to specified size */ 469 static void* 470 udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize) 471 { 472 void* nb; 473 /* for use with valgrind, do not use mremap, but the other version */ 474 #ifdef MREMAP_MAYMOVE 475 nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE); 476 if(nb == MAP_FAILED) { 477 log_msg(LOG_ERR, "mremap(%s, size %u) error %s", 478 udb->fname, (unsigned)nsize, strerror(errno)); 479 return 0; 480 } 481 #else /* !HAVE MREMAP */ 482 /* use munmap-mmap to simulate mremap */ 483 if(munmap(udb->base, udb->base_size) != 0) { 484 log_msg(LOG_ERR, "munmap(%s) error %s", 485 udb->fname, strerror(errno)); 486 } 487 /* provide hint for new location */ 488 /* note the size_t casts must be there for portability, on some 489 * systems the layout of memory is otherwise broken. */ 490 nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE, 491 (int)MAP_SHARED, (int)udb->fd, (off_t)0); 492 /* retry the mmap without basept in case of ENOMEM (FreeBSD8), 493 * the kernel can then try to mmap it at a different location 494 * where more memory is available */ 495 if(nb == MAP_FAILED && errno == ENOMEM) { 496 nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE, 497 (int)MAP_SHARED, (int)udb->fd, (off_t)0); 498 } 499 if(nb == MAP_FAILED) { 500 log_msg(LOG_ERR, "mmap(%s, size %u) error %s", 501 udb->fname, (unsigned)nsize, strerror(errno)); 502 udb->base = NULL; 503 return 0; 504 } 505 #endif /* HAVE MREMAP */ 506 if(nb != udb->base) { 507 /* fix up realpointers in udb and alloc */ 508 /* but mremap may have been nice and not move the base */ 509 udb->base = nb; 510 udb->glob_data = (udb_glob_d*)(nb+sizeof(uint64_t)); 511 /* use passed alloc pointer because the udb->alloc may not 512 * be initialized yet */ 513 alloc->disk = (udb_alloc_d*)((void*)udb->glob_data 514 +sizeof(*udb->glob_data)); 515 } 516 udb->base_size = nsize; 517 return nb; 518 } 519 520 void 521 udb_base_remap_process(udb_base* udb) 522 { 523 /* assume that fsize is still accessible */ 524 udb_base_remap(udb, udb->alloc, udb->glob_data->fsize); 525 } 526 527 /** grow file to specified size and re-mmap, return new base */ 528 static void* 529 udb_base_grow_and_remap(udb_base* udb, uint64_t nsize) 530 { 531 /* grow file by writing a single zero at that spot, the 532 * rest is filled in with zeroes. */ 533 uint8_t z = 0; 534 ssize_t w; 535 536 assert(nsize > 0); 537 udb->glob_data->dirty_alloc = udb_dirty_fsize; 538 #ifdef HAVE_PWRITE 539 if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) { 540 #else 541 if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) { 542 log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno)); 543 return 0; 544 } 545 if((w=write(udb->fd, &z, sizeof(z))) == -1) { 546 #endif 547 log_msg(LOG_ERR, "grow(%s, size %u) error %s", 548 udb->fname, (unsigned)nsize, strerror(errno)); 549 return 0; 550 } else if(w != (ssize_t)sizeof(z)) { 551 log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)", 552 udb->fname, (unsigned)nsize); 553 return 0; 554 } 555 udb->glob_data->fsize = nsize; 556 udb->glob_data->dirty_alloc = udb_dirty_clean; 557 return udb_base_remap(udb, udb->alloc, nsize); 558 } 559 560 int udb_exp_size(uint64_t a) 561 { 562 /* find enclosing value such that 2**x >= a */ 563 int x = 0; 564 uint64_t i = a; 565 assert(a != 0); 566 567 i --; 568 /* could optimise this with uint8* access, depends on endianness */ 569 /* first whole bytes */ 570 while( (i&(~(uint64_t)0xff)) ) { 571 i >>= 8; 572 x += 8; 573 } 574 /* now details */ 575 while(i) { 576 i >>= 1; 577 x ++; 578 } 579 assert( ((uint64_t)1<<x) >= a); 580 assert( x==0 || ((uint64_t)1<<(x-1)) < a); 581 return x; 582 } 583 584 int udb_exp_offset(uint64_t o) 585 { 586 /* this means measuring the number of 0 bits on the right */ 587 /* so, if exp zero bits then (o&(2**x-1))==0 */ 588 int x = 0; 589 uint64_t i = o; 590 assert(o != 0); 591 /* first whole bytes */ 592 while( (i&(uint64_t)0xff) == 0) { 593 i >>= 8; 594 x += 8; 595 } 596 /* now details */ 597 while( (i&(uint64_t)0x1) == 0) { 598 i >>= 1; 599 x ++; 600 } 601 assert( o % ((uint64_t)1<<x) == 0); 602 assert( o % ((uint64_t)1<<(x+1)) != 0); 603 return x; 604 } 605 606 void udb_alloc_init_new(udb_alloc_d* a) 607 { 608 assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0); 609 memset(a, 0, sizeof(*a)); 610 /* set new allocations after header, as if allocated in a sequence 611 * of minsize allocations */ 612 a->nextgrow = UDB_HEADER_SIZE; 613 } 614 615 /** fsck the file size, false if failed and file is useless */ 616 static int 617 fsck_fsize(udb_base* udb, udb_alloc* alloc) 618 { 619 off_t realsize; 620 log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname); 621 realsize = lseek(udb->fd, (off_t)0, SEEK_END); 622 if(realsize == (off_t)-1) { 623 log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno)); 624 return 0; 625 } 626 udb->glob_data->fsize = (uint64_t)realsize; 627 if(!udb_base_remap(udb, alloc, (uint64_t)realsize)) 628 return 0; 629 udb->glob_data->dirty_alloc = udb_dirty_clean; 630 log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname); 631 udb_base_sync(udb, 1); 632 return 1; 633 } 634 635 /** regenerate freelist add a new free chunk, return next todo */ 636 static udb_void 637 regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen) 638 { 639 udb_free_chunk_d* cp = UDB_FREE_CHUNK(c); 640 uint64_t esz = (uint64_t)1<<exp; 641 if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) { 642 return 0; 643 } 644 cp->type = udb_chunk_type_free; 645 cp->flags = 0; 646 chunk_set_last(base, c, exp, (uint8_t)exp); 647 cp->prev = 0; 648 cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP]; 649 if(cp->next) 650 UDB_FREE_CHUNK(cp->next)->prev = c; 651 regen->stat_free += esz; 652 return c + esz; 653 } 654 655 /** regenerate xl chunk, return next todo */ 656 static udb_void 657 regen_xl(void* base, udb_void c, udb_alloc_d* regen) 658 { 659 udb_xl_chunk_d* cp = UDB_XL_CHUNK(c); 660 uint64_t xlsz = cp->size; 661 if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) { 662 return 0; 663 } 664 if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) { 665 return 0; 666 } 667 /* fixup end-size and end-expmarker */ 668 regen->stat_alloc += xlsz; 669 return c + xlsz; 670 } 671 672 /** regenerate data chunk, return next todo */ 673 static udb_void 674 regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen) 675 { 676 uint64_t esz = (uint64_t)1<<exp; 677 if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) { 678 return 0; 679 } 680 chunk_set_last(base, c, exp, (uint8_t)exp); 681 regen->stat_alloc += esz; 682 return c + esz; 683 } 684 685 /** regenerate a relptr structure inside a data segment */ 686 static void 687 regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg) 688 { 689 udb_void* a = (udb_void*)arg; 690 /* ignore 0 pointers */ 691 if(!rp->data) 692 return; 693 694 /* edit relptrs that point to oldmoved to point to newmoved. */ 695 if(rp->data == a[0]) 696 rp->data = a[1]; 697 698 /* regenerate relptr lists, add this item to the relptr list for 699 * the data that it points to */ 700 udb_rel_ptr_link(base, rp, rp->data); 701 } 702 703 /** regenerate the relptrs store in this data segment */ 704 static void 705 regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp, 706 void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new) 707 { 708 udb_void arg[2]; 709 arg[0] = rb_old; arg[1] = rb_new; 710 /* walk through the structs here and put them on their respective 711 * relptr lists */ 712 (*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz, 713 ®en_relptr_func, arg); 714 715 } 716 717 /** regenerate relptrlists in the file */ 718 static void 719 regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc, 720 udb_void rb_old, udb_void rb_new) 721 { 722 udb_void at = alloc->udb->glob_data->hsize; 723 /* clear all ptrlist start pointers in the file. */ 724 while(at < alloc->disk->nextgrow) { 725 int exp = (int)UDB_CHUNK(at)->exp; 726 udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type; 727 if(exp == UDB_EXP_XL) { 728 UDB_XL_CHUNK(at)->ptrlist = 0; 729 at += UDB_XL_CHUNK(at)->size; 730 } else if(tp == udb_chunk_type_free) { 731 at += (uint64_t)1<<exp; 732 } else { /* data chunk */ 733 UDB_CHUNK(at)->ptrlist = 0; 734 at += (uint64_t)1<<exp; 735 } 736 } 737 /* walk through all relptr structs and put on the right list. */ 738 at = alloc->udb->glob_data->hsize; 739 while(at < alloc->disk->nextgrow) { 740 udb_chunk_d* atp = UDB_CHUNK(at); 741 int exp = (int)atp->exp; 742 udb_chunk_type tp = (udb_chunk_type)atp->type; 743 uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size: 744 (uint64_t)1<<exp); 745 if(exp == UDB_EXP_XL) { 746 assert(at != rb_old); /* should have been freed */ 747 regen_its_ptrs(base, udb, atp, 748 ((void*)atp)+sizeof(udb_xl_chunk_d), 749 sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2, 750 rb_old, rb_new); 751 at += sz; 752 } else if(tp == udb_chunk_type_free) { 753 at += sz; 754 } else { /* data chunk */ 755 assert(at != rb_old); /* should have been freed */ 756 regen_its_ptrs(base, udb, atp, 757 ((void*)atp)+sizeof(udb_chunk_d), 758 sz-sizeof(udb_chunk_d)-1, rb_old, rb_new); 759 at += sz; 760 } 761 } 762 } 763 764 765 /** mark free elements from ex XL chunk space and later fixups pick that up */ 766 static void 767 rb_mark_free_segs(void* base, udb_void s, uint64_t m) 768 { 769 udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE; 770 /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/ 771 assert(s >= UDB_ALLOC_CHUNK_SIZE); 772 while(q >= s) { 773 UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX; 774 UDB_CHUNK(q)->type = udb_chunk_type_free; 775 q -= UDB_ALLOC_CHUNK_SIZE; 776 } 777 } 778 779 780 /** fsck rollback or rollforward XL move results */ 781 static int 782 fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new, 783 uint64_t rb_size, uint64_t rb_seg) 784 { 785 786 if(rb_old <= rb_new) 787 return 0; /* XL move one way */ 788 if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) 789 return 0; /* not aligned */ 790 if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) 791 return 0; /* not aligned */ 792 if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) 793 return 0; /* not aligned */ 794 if(rb_new + rb_size <= rb_old) { 795 /* not overlapping: resume copy */ 796 memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size); 797 /* and free up old piece(s) */ 798 rb_mark_free_segs(base, rb_old, rb_size); 799 } else { 800 /* overlapping, see what segment we stopped at 801 * and continue there. */ 802 move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg); 803 /* free up old piece(s); from the end of the moved segment, 804 * until the end of the old segment */ 805 rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)- 806 (rb_new+rb_size)); 807 } 808 /* do not call fix_ptrs, regenptrs does the job */ 809 return 1; 810 } 811 812 /** fsck rollback or rollforward move results */ 813 static int 814 fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size, 815 udb_void* make_free) 816 { 817 if( (rb_size&(rb_size-1)) != 0) 818 return 0; /* not powerof2 */ 819 if( (rb_old&(rb_size-1)) != 0) 820 return 0; /* not aligned */ 821 if( (rb_new&(rb_size-1)) != 0) 822 return 0; /* not aligned */ 823 /* resume copy */ 824 memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size); 825 /* do not call fix_ptrs, regenptrs does the job */ 826 /* make sure udb_old is freed */ 827 *make_free = rb_old; 828 return 1; 829 } 830 831 /** fsck the file and salvage, false if failed and file is useless */ 832 static int 833 fsck_file(udb_base* udb, udb_alloc* alloc, int moved) 834 { 835 void* base = udb->base; 836 udb_alloc_d regen; 837 udb_void at = udb->glob_data->hsize; 838 udb_void rb_old = udb->glob_data->rb_old; 839 udb_void rb_new = udb->glob_data->rb_new; 840 udb_void rb_seg = udb->glob_data->rb_seg; 841 udb_void make_free = 0; 842 uint64_t rb_size = udb->glob_data->rb_size; 843 log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname); 844 /* walk through the file, use the exp values to see what can be 845 * salvaged */ 846 if(moved && rb_old && rb_new && rb_size) { 847 if(rb_old+rb_size <= alloc->disk->nextgrow 848 && rb_new+rb_size <= alloc->disk->nextgrow) { 849 /* we can use the move information to fix up the 850 * duplicate element (or partially moved element) */ 851 if(rb_size > 1024*1024) { 852 /* XL chunk */ 853 if(!fsck_rb_xl(base, udb, rb_old, rb_new, 854 rb_size, rb_seg)) 855 return 0; 856 } else { 857 if(!fsck_rb(base, rb_old, rb_new, rb_size, 858 &make_free)) 859 return 0; 860 } 861 } 862 } 863 864 /* rebuild freelists */ 865 /* recalculate stats in alloc (except 'stat_data') */ 866 /* possibly new end 'nextgrow' value */ 867 memset(®en, 0, sizeof(regen)); 868 regen.nextgrow = alloc->disk->nextgrow; 869 while(at < regen.nextgrow) { 870 /* figure out this chunk */ 871 int exp = (int)UDB_CHUNK(at)->exp; 872 udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type; 873 /* consistency check possible here with end-exp */ 874 if(tp == udb_chunk_type_free || at == make_free) { 875 at = regen_free(base, at, exp, ®en); 876 if(!at) return 0; 877 } else if(exp == UDB_EXP_XL) { 878 /* allocated data of XL size */ 879 at = regen_xl(base, at, ®en); 880 if(!at) return 0; 881 } else if(exp >= UDB_ALLOC_CHUNK_MINEXP 882 && exp <= UDB_ALLOC_CHUNKS_MAX) { 883 /* allocated data */ 884 at = regen_data(base, at, exp, ®en); 885 if(!at) return 0; 886 } else { 887 /* garbage; this must be EOF then */ 888 regen.nextgrow = at; 889 break; 890 } 891 } 892 *alloc->disk = regen; 893 894 /* rebuild relptr lists */ 895 regen_ptrlist(base, udb, alloc, rb_old, rb_new); 896 897 log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)", 898 udb->fname); 899 udb->glob_data->rb_old = 0; 900 udb->glob_data->rb_new = 0; 901 udb->glob_data->rb_size = 0; 902 udb->glob_data->dirty_alloc = udb_dirty_clean; 903 udb_base_sync(udb, 1); 904 return 1; 905 } 906 907 908 udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk) 909 { 910 udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc)); 911 if(!alloc) 912 return NULL; 913 alloc->udb = udb; 914 alloc->disk = disk; 915 /* see if committed but uncompleted actions need to be done */ 916 /* preserves the alloc state */ 917 if(udb->glob_data->dirty_alloc != udb_dirty_clean) { 918 if(udb->glob_data->dirty_alloc == udb_dirty_fsize) { 919 if(fsck_fsize(udb, alloc)) 920 return alloc; 921 } else if(udb->glob_data->dirty_alloc == udb_dirty_fl) { 922 if(fsck_file(udb, alloc, 0)) 923 return alloc; 924 } else if(udb->glob_data->dirty_alloc == udb_dirty_compact) { 925 if(fsck_file(udb, alloc, 1)) 926 return alloc; 927 } 928 log_msg(LOG_ERR, "error: file allocation dirty (%d)", 929 (int)udb->glob_data->dirty_alloc); 930 free(alloc); 931 return NULL; 932 } 933 return alloc; 934 } 935 936 void udb_alloc_delete(udb_alloc* alloc) 937 { 938 if(!alloc) return; 939 free(alloc); 940 } 941 942 /** unlink this element from its freelist */ 943 static void 944 udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp) 945 { 946 udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk); 947 assert(chunk); 948 /* chunk is a free chunk */ 949 assert(fp->exp == (uint8_t)exp); 950 assert(fp->type == udb_chunk_type_free); 951 assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp); 952 /* and thus freelist not empty */ 953 assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]); 954 /* unlink */ 955 if(fp->prev) 956 UDB_FREE_CHUNK(fp->prev)->next = fp->next; 957 else alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next; 958 if(fp->next) 959 UDB_FREE_CHUNK(fp->next)->prev = fp->prev; 960 } 961 962 /** pop first element off freelist, list may not be empty */ 963 static udb_void 964 udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp) 965 { 966 udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]; 967 udb_free_chunk_d* fp = UDB_FREE_CHUNK(f); 968 assert(f); 969 assert(fp->exp == (uint8_t)exp); 970 assert(fp->type == udb_chunk_type_free); 971 assert(chunk_get_last(base, f, exp) == (uint8_t)exp); 972 alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next; 973 if(fp->next) { 974 UDB_FREE_CHUNK(fp->next)->prev = 0; 975 } 976 return f; 977 } 978 979 /** push new element onto freelist */ 980 static void 981 udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp) 982 { 983 udb_free_chunk_d* fp = UDB_FREE_CHUNK(f); 984 assert(f); 985 fp->exp = (uint8_t)exp; 986 fp->type = udb_chunk_type_free; 987 fp->flags = 0; 988 fp->prev = 0; 989 fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]; 990 if(fp->next) 991 UDB_FREE_CHUNK(fp->next)->prev = f; 992 chunk_set_last(base, f, exp, (uint8_t)exp); 993 alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f; 994 } 995 996 /** push new element onto freelist - do not initialize the elt */ 997 static void 998 udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp) 999 { 1000 udb_free_chunk_d* fp = UDB_FREE_CHUNK(f); 1001 assert(f); 1002 assert(fp->exp == (uint8_t)exp); 1003 assert(fp->type == udb_chunk_type_free); 1004 assert(chunk_get_last(base, f, exp) == (uint8_t)exp); 1005 fp->prev = 0; 1006 fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]; 1007 if(fp->next) 1008 UDB_FREE_CHUNK(fp->next)->prev = f; 1009 alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f; 1010 } 1011 1012 /** add free chunks at end until specified alignment occurs */ 1013 static void 1014 grow_align(void* base, udb_alloc* alloc, uint64_t esz) 1015 { 1016 while( (alloc->disk->nextgrow & (esz-1)) != 0) { 1017 /* the nextgrow is not a whole multiple of esz. */ 1018 /* grow a free chunk of max allowed size */ 1019 int fexp = udb_exp_offset(alloc->disk->nextgrow); 1020 uint64_t fsz = (uint64_t)1<<fexp; 1021 udb_void f = alloc->disk->nextgrow; 1022 udb_void fn = alloc->disk->nextgrow+fsz; 1023 assert(fn <= alloc->udb->base_size); 1024 alloc->disk->stat_free += fsz; 1025 udb_alloc_push_fl(base, alloc, f, fexp); 1026 /* now increase nextgrow to commit that free chunk */ 1027 alloc->disk->nextgrow = fn; 1028 } 1029 } 1030 1031 /** append chunks at end of memory space to get size exp, return dataptr */ 1032 static udb_void 1033 grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp) 1034 { 1035 uint64_t esz = (uint64_t)1<<exp; 1036 udb_void ret; 1037 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1038 grow_align(base, alloc, esz); 1039 /* free chunks are grown, grow the one we want to use */ 1040 ret = alloc->disk->nextgrow; 1041 /* take a new alloced chunk into use */ 1042 UDB_CHUNK(ret)->exp = (uint8_t)exp; 1043 UDB_CHUNK(ret)->flags = 0; 1044 UDB_CHUNK(ret)->ptrlist = 0; 1045 UDB_CHUNK(ret)->type = udb_chunk_type_data; 1046 /* store last octet */ 1047 chunk_set_last(base, ret, exp, (uint8_t)exp); 1048 /* update stats */ 1049 alloc->disk->stat_alloc += esz; 1050 alloc->disk->stat_data += sz; 1051 /* now increase nextgrow to commit this newly allocated chunk */ 1052 alloc->disk->nextgrow += esz; 1053 assert(alloc->disk->nextgrow <= alloc->udb->base_size); 1054 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1055 return ret + sizeof(udb_chunk_d); /* ptr to data */ 1056 } 1057 1058 /** calculate how much space is necessary to grow for this exp */ 1059 static uint64_t 1060 grow_end_calc(udb_alloc* alloc, int exp) 1061 { 1062 uint64_t sz = (uint64_t)1<<exp; 1063 uint64_t ng = alloc->disk->nextgrow; 1064 uint64_t res; 1065 /* if nextgrow is 2**expness, no extra growth needed, only size */ 1066 if( (ng & (sz-1)) == 0) { 1067 /* sz-1 is like 0xfff, and checks if ng is whole 2**exp */ 1068 return ng+sz; /* must grow exactly 2**exp */ 1069 } 1070 /* grow until 2**expness and then we need 2**exp as well */ 1071 /* so, round ng down to whole sz (basically ng-ng%sz, or ng/sz*sz) 1072 * and then add the sz twice (go up to whole sz, and to allocate) */ 1073 res = (ng & ~(sz-1)) + 2*sz; 1074 return res; 1075 } 1076 1077 /** see if we need to grow more than specified to enable sustained growth */ 1078 static uint64_t 1079 grow_extra_check(udb_alloc* alloc, uint64_t ge) 1080 { 1081 const uint64_t mb = 1024*1024; 1082 uint64_t bsz = alloc->udb->base_size; 1083 if(bsz <= mb) { 1084 /* below 1 Mb, double sizes for exponential growth */ 1085 /* takes about 15 times to grow to 1Mb */ 1086 if(ge < bsz*2) 1087 return bsz*2; 1088 } else { 1089 uint64_t gnow = ge - bsz; 1090 /* above 1Mb, grow at least 1 Mb, or 12.5% of current size, 1091 * in whole megabytes rounded up. */ 1092 uint64_t want = ((bsz / 8) & ~(mb-1)) + mb; 1093 if(gnow < want) 1094 return bsz + want; 1095 } 1096 return ge; 1097 } 1098 1099 /** see if free space is enogh to warrant shrink (while file is open) */ 1100 static int 1101 enough_free(udb_alloc* alloc) 1102 { 1103 if(alloc->udb->base_size <= 2*1024*1024) { 1104 /* below 1 Mb, grown by double size, (so up to 2 mb), 1105 * do not shrink unless we can 1/3 in size */ 1106 if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size) 1107 return 1; 1108 } else { 1109 /* grown 12.5%, shrink 25% if possible, at least one mb */ 1110 /* between 1mb and 4mb size, it shrinks by 1mb if possible */ 1111 uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow; 1112 if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size 1113 || alloc->udb->base_size < 4*1024*1024)) 1114 return 1; 1115 } 1116 return 0; 1117 } 1118 1119 /** grow space for a chunk of 2**exp and return dataptr */ 1120 static udb_void 1121 udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp) 1122 { 1123 /* commit the grow action 1124 * - the file grow only changes filesize, but not the nextgrow. 1125 * - taking space after nextgrow into use (as free space), 1126 * is like free-ing a chunk (one at a time). 1127 * - and the last chunk taken into use is like alloc. 1128 */ 1129 /* predict how much free space is needed for this */ 1130 uint64_t grow_end = grow_end_calc(alloc, exp); 1131 assert(alloc->udb->base_size >= alloc->disk->nextgrow); 1132 if(grow_end <= alloc->udb->base_size) { 1133 /* we can do this with the available space */ 1134 return grow_chunks(base, alloc, sz, exp); 1135 } 1136 /* we have to grow the file, re-mmap */ 1137 /* see if we need to grow a little more, to avoid endless grow 1138 * efforts on adding data */ 1139 grow_end = grow_extra_check(alloc, grow_end); 1140 if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) { 1141 return 0; /* mmap or write failed (disk or mem full) */ 1142 } 1143 /* we have enough space now */ 1144 assert(grow_end <= alloc->udb->base_size); 1145 assert(alloc->udb->glob_data->fsize == alloc->udb->base_size); 1146 return grow_chunks(base, alloc, sz, exp); 1147 } 1148 1149 /** take XL allocation into use at end of file, return dataptr */ 1150 static udb_void 1151 grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz) 1152 { 1153 udb_void ret; 1154 udb_xl_chunk_d* p; 1155 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1156 1157 /* align growth to whole mbs */ 1158 grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE); 1159 1160 /* grow XL segment */ 1161 ret = alloc->disk->nextgrow; 1162 p = UDB_XL_CHUNK(ret); 1163 p->exp = UDB_EXP_XL; 1164 p->size = xlsz; 1165 p->flags = 0; 1166 p->ptrlist = 0; 1167 p->type = udb_chunk_type_data; 1168 1169 /* also put size and marker at end for compaction */ 1170 *((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz; 1171 *((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL; 1172 1173 /* stats */ 1174 alloc->disk->stat_data += sz; 1175 alloc->disk->stat_alloc += xlsz; 1176 /* now increase the nextgrow to commit this xl chunk */ 1177 alloc->disk->nextgrow += xlsz; 1178 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1179 return ret + sizeof(udb_xl_chunk_d); /* data ptr */ 1180 } 1181 1182 /** make space for XL allocation */ 1183 static udb_void 1184 udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz) 1185 { 1186 /* allocate whole mbs of space, at end of space */ 1187 uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2; 1188 uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1)); 1189 uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need; 1190 assert(need >= asz); 1191 if(grow_end <= alloc->udb->base_size) { 1192 /* can do this in available space */ 1193 return grow_xl(base, alloc, need, sz); 1194 } 1195 /* have to grow file and re-mmap */ 1196 grow_end = grow_extra_check(alloc, grow_end); 1197 if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) { 1198 return 0; /* mmap or write failed (disk or mem full) */ 1199 } 1200 /* we have enough space now */ 1201 assert(grow_end <= alloc->udb->base_size); 1202 assert(alloc->udb->glob_data->fsize == alloc->udb->base_size); 1203 return grow_xl(base, alloc, need, sz); 1204 } 1205 1206 /** divide big(2**e2) into pieces so 2**exp fits */ 1207 static udb_void 1208 udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2, 1209 int exp) 1210 { 1211 int e = e2; 1212 uint64_t sz = (uint64_t)1<<e2; 1213 assert(big && e2 > exp); 1214 /* so the returned piece to use is the first piece, 1215 * offload the later half until it fits */ 1216 do { 1217 sz >>= 1; /* divide size of big by two */ 1218 e--; /* that means its exp is one smaller */ 1219 udb_alloc_push_fl(base, alloc, big+sz, e); 1220 } while(e != exp); 1221 /* exit loop when last pushed is same size as what we want */ 1222 return big; 1223 } 1224 1225 /** returns the exponent size of the chunk needed for data sz */ 1226 static int 1227 udb_alloc_exp_needed(size_t sz) 1228 { 1229 uint64_t asz = sz + sizeof(udb_chunk_d) + 1; 1230 if(asz > UDB_ALLOC_CHUNK_SIZE) { 1231 return UDB_EXP_XL; 1232 } else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) { 1233 return UDB_ALLOC_CHUNK_MINEXP; 1234 } 1235 return udb_exp_size(asz); 1236 } 1237 1238 udb_void udb_alloc_space(udb_alloc* alloc, size_t sz) 1239 { 1240 void* base = alloc->udb->base; 1241 /* calculate actual allocation size */ 1242 int e2, exp = udb_alloc_exp_needed(sz); 1243 if(exp == UDB_EXP_XL) 1244 return udb_alloc_xl_space(base, alloc, sz); 1245 /* see if there is a free chunk of that size exactly */ 1246 if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) { 1247 /* snip from freelist, udb_chunk_d */ 1248 udb_void ret; 1249 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1250 ret = udb_alloc_pop_fl(base, alloc, exp); 1251 /* use it - size octets already OK */ 1252 UDB_CHUNK(ret)->flags = 0; 1253 UDB_CHUNK(ret)->ptrlist = 0; 1254 UDB_CHUNK(ret)->type = udb_chunk_type_data; 1255 /* update stats */ 1256 alloc->disk->stat_data += sz; 1257 alloc->disk->stat_alloc += (1<<exp); 1258 assert(alloc->disk->stat_free >= (1u<<exp)); 1259 alloc->disk->stat_free -= (1<<exp); 1260 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1261 return ret + sizeof(udb_chunk_d); /* ptr to data */ 1262 } 1263 /* see if we can subdivide a larger chunk */ 1264 for(e2 = exp+1; e2 < UDB_ALLOC_CHUNKS_MAX; e2++) 1265 if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) { 1266 udb_void big, ret; /* udb_chunk_d */ 1267 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1268 big = udb_alloc_pop_fl(base, alloc, e2); 1269 /* push other parts onto freelists (needs inited) */ 1270 ret = udb_alloc_subdivide(base, alloc, big, e2, exp); 1271 /* use final part (needs inited) */ 1272 UDB_CHUNK(ret)->exp = (uint8_t)exp; 1273 /* if stop here; the new exp makes smaller free chunk*/ 1274 UDB_CHUNK(ret)->flags = 0; 1275 UDB_CHUNK(ret)->ptrlist = 0; 1276 /* set type to commit data chunk */ 1277 UDB_CHUNK(ret)->type = udb_chunk_type_data; 1278 /* store last octet */ 1279 chunk_set_last(base, ret, exp, (uint8_t)exp); 1280 /* update stats */ 1281 alloc->disk->stat_data += sz; 1282 alloc->disk->stat_alloc += (1<<exp); 1283 assert(alloc->disk->stat_free >= (1u<<exp)); 1284 alloc->disk->stat_free -= (1<<exp); 1285 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1286 return ret + sizeof(udb_chunk_d); /* ptr to data */ 1287 } 1288 /* we need to grow an extra chunk */ 1289 return udb_alloc_grow_space(base, alloc, sz, exp); 1290 } 1291 1292 /** see if there is free space to allocate a chunk into */ 1293 static int 1294 have_free_for(udb_alloc* alloc, int exp) 1295 { 1296 int e2; 1297 if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) 1298 return exp; 1299 for(e2 = exp+1; e2 < UDB_ALLOC_CHUNKS_MAX; e2++) 1300 if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) { 1301 return e2; 1302 } 1303 return 0; 1304 } 1305 1306 /** fix relptr prev and next for moved relptr structures */ 1307 static void 1308 chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg) 1309 { 1310 udb_void* data = (udb_void*)arg; 1311 udb_void r; 1312 if(!rp->data) 1313 return; 1314 r = UDB_SYSTOREL(base, rp); 1315 if(rp->next) 1316 UDB_REL_PTR(rp->next)->prev = r; 1317 if(rp->prev) 1318 UDB_REL_PTR(rp->prev)->next = r; 1319 else { 1320 /* if this is a pointer to its own chunk, fix it up; 1321 * the data ptr gets set by relptr_edit later. */ 1322 if(rp->data == data[0]) 1323 UDB_CHUNK(data[1])->ptrlist = r; 1324 else UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r; 1325 } 1326 } 1327 1328 /** fix pointers from and to a moved chunk */ 1329 static void 1330 chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data, 1331 uint64_t dsz, udb_void olddata) 1332 { 1333 udb_void d[2]; 1334 d[0] = olddata; 1335 d[1] = data; 1336 (*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data), 1337 dsz, &chunk_fix_ptr_each, d); 1338 udb_rel_ptr_edit(base, cp->ptrlist, data); 1339 udb_base_ram_ptr_edit(udb, olddata, data); 1340 } 1341 1342 /** move an allocated chunk to use a free chunk */ 1343 static void 1344 move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz, 1345 int e2) 1346 { 1347 udb_void res = udb_alloc_pop_fl(base, alloc, e2); 1348 udb_chunk_d* rp; 1349 udb_chunk_d* fp; 1350 if(exp != e2) { 1351 /* it is bigger, subdivide it */ 1352 res = udb_alloc_subdivide(base, alloc, res, e2, exp); 1353 } 1354 assert(res != f); 1355 /* setup rollback information */ 1356 alloc->udb->glob_data->rb_old = f; 1357 alloc->udb->glob_data->rb_new = res; 1358 alloc->udb->glob_data->rb_size = esz; 1359 /* take the res, exp into use */ 1360 rp = UDB_CHUNK(res); 1361 fp = UDB_CHUNK(f); 1362 /* copy over the data */ 1363 memcpy(rp, fp, esz); 1364 /* adjust rel ptrs */ 1365 chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d), 1366 esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d)); 1367 1368 /* do not freeup the fp; caller does that */ 1369 } 1370 1371 /** unlink several free elements to overwrite with xl chunk */ 1372 static void 1373 free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m) 1374 { 1375 udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE; 1376 /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/ 1377 assert(s >= UDB_ALLOC_CHUNK_SIZE); 1378 while(q >= s) { 1379 assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX); 1380 assert(UDB_CHUNK(q)->type == udb_chunk_type_free); 1381 udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX); 1382 q -= UDB_ALLOC_CHUNK_SIZE; 1383 } 1384 } 1385 1386 /** move an XL chunk, and keep track of segments for rollback */ 1387 static void 1388 move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n, 1389 uint64_t sz, uint64_t startseg) 1390 { 1391 udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl); 1392 udb_xl_chunk_d* np = UDB_XL_CHUNK(n); 1393 uint64_t amount = xl - n; 1394 assert(n < xl); /* move to compact */ 1395 1396 /* setup move rollback */ 1397 udb->glob_data->rb_old = xl; 1398 udb->glob_data->rb_new = n; 1399 udb->glob_data->rb_size = sz; 1400 1401 /* is it overlapping? */ 1402 if(sz <= amount) { 1403 memcpy(np, xlp, sz); 1404 } else { 1405 /* move and commit per 1M segment to avoid data loss */ 1406 uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE; 1407 for(seg = startseg; seg<maxseg; seg++) { 1408 udb->glob_data->rb_seg = seg; 1409 memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE, 1410 xlp+seg*UDB_ALLOC_CHUNK_SIZE, 1411 UDB_ALLOC_CHUNK_SIZE); 1412 } 1413 1414 } 1415 } 1416 1417 /** move list of XL chunks to the front by the shift amount */ 1418 static void 1419 move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz, 1420 uint64_t amount) 1421 { 1422 udb_void xl = xl_start; 1423 assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */ 1424 assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */ 1425 assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */ 1426 while(xl < xl_start+xl_sz) { 1427 udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl); 1428 udb_void n = xl-amount; 1429 uint64_t sz = xlp->size; 1430 assert(xlp->exp == UDB_EXP_XL); 1431 move_xl_segment(base, alloc->udb, xl, n, sz, 0); 1432 chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n), 1433 n+sizeof(udb_xl_chunk_d), 1434 sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2, 1435 xl+sizeof(udb_xl_chunk_d)); 1436 } 1437 alloc->disk->stat_free -= amount; 1438 alloc->disk->nextgrow -= amount; 1439 alloc->udb->glob_data->rb_old = 0; 1440 alloc->udb->glob_data->rb_new = 0; 1441 alloc->udb->glob_data->rb_size = 0; 1442 } 1443 1444 /** see if free chunk can coagulate with another chunk, return other chunk */ 1445 static udb_void 1446 coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp, 1447 uint64_t esz) 1448 { 1449 udb_void other = f^esz; 1450 if(exp == UDB_ALLOC_CHUNKS_MAX) 1451 return 0; /* no further merges */ 1452 if(other >= alloc->udb->base_size) 1453 return 0; /* not allocated */ 1454 if(other >= alloc->disk->nextgrow) 1455 return 0; /* not in use */ 1456 if(other < alloc->udb->glob_data->hsize) 1457 return 0; /* cannot merge with header */ 1458 /* the header is also protected by the special exp marker */ 1459 /* see if the other chunk is a free chunk */ 1460 1461 /* check closest marker to avoid large memory churn */ 1462 /* and also it makes XL allocations and header special markers work */ 1463 if(f > other) { 1464 assert(f > 1); /* this is certain because of header */ 1465 if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) { 1466 /* can do it if the other part is a free chunk */ 1467 assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp); 1468 if(UDB_CHUNK(other)->type == udb_chunk_type_free) 1469 return other; 1470 } 1471 } else { 1472 if(UDB_CHUNK(other)->exp == (uint8_t)exp) { 1473 /* can do it if the other part is a free chunk */ 1474 assert(chunk_get_last(base, other, exp)==(uint8_t)exp); 1475 if(UDB_CHUNK(other)->type == udb_chunk_type_free) 1476 return other; 1477 } 1478 } 1479 return 0; 1480 } 1481 1482 /** coagulate and then add new free segment, return final free segment */ 1483 static udb_void 1484 coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp, 1485 uint64_t esz) 1486 { 1487 /* new free chunk here, attempt coagulate */ 1488 udb_void other; 1489 while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) { 1490 /* unlink that other chunk */ 1491 udb_alloc_unlink_fl(base, alloc, other, exp); 1492 /* merge up */ 1493 if(other < last) 1494 last = other; 1495 exp++; 1496 esz <<= 1; 1497 } 1498 /* free the final segment */ 1499 udb_alloc_push_fl(base, alloc, last, exp); 1500 return last; 1501 } 1502 1503 /** attempt to compact the data and move free space to the end */ 1504 static int 1505 udb_alloc_compact(void* base, udb_alloc* alloc) 1506 { 1507 udb_void last; 1508 int exp, e2; 1509 uint64_t esz; 1510 uint64_t at = alloc->disk->nextgrow; 1511 udb_void xl_start = 0; 1512 uint64_t xl_sz = 0; 1513 while(at > alloc->udb->glob_data->hsize) { 1514 /* grab last entry */ 1515 exp = (int)*((uint8_t*)UDB_REL(base, at-1)); 1516 if(exp == UDB_EXP_XL) { 1517 /* for XL chunks: 1518 * - inspect the size of the XLchunklist at end 1519 * - attempt to compact in front of of XLchunklist 1520 */ 1521 uint64_t xlsz = *((uint64_t*)UDB_REL(base, 1522 at-sizeof(uint64_t)*2)); 1523 udb_void xl = at-xlsz; 1524 #ifndef NDEBUG 1525 udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl); 1526 assert(xlp->exp == UDB_EXP_XL); 1527 assert(xlp->type != udb_chunk_type_free); 1528 #endif 1529 /* got thesegment add to the xl chunk list */ 1530 if(xl_start != 0 && xl+xlsz != xl_start) { 1531 /* nonadjoining XL part, but they are aligned, 1532 * so the space in between is whole Mbs, 1533 * shift the later part(s) and continue */ 1534 uint64_t m = xl_start - (xl+xlsz); 1535 assert(xl_start > xl+xlsz); 1536 alloc->udb->glob_data->dirty_alloc = udb_dirty_compact; 1537 free_xl_space(base, alloc, xl+xlsz, m); 1538 move_xl_list(base, alloc, xl_start, xl_sz, m); 1539 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1540 } 1541 xl_start = xl; 1542 xl_sz += xlsz; 1543 at = xl; 1544 continue; 1545 /* end of XL if */ 1546 } else if(exp < UDB_ALLOC_CHUNK_MINEXP 1547 || exp > UDB_ALLOC_CHUNKS_MAX) 1548 break; /* special chunk or garbage */ 1549 esz = (uint64_t)1<<exp; 1550 last = at - esz; 1551 assert(UDB_CHUNK(last)->exp == (uint8_t)exp); 1552 if(UDB_CHUNK(last)->type == udb_chunk_type_free) { 1553 /* if xlstart continue looking to move stuff, but do 1554 * not unlink this free segment */ 1555 if(!xl_start) { 1556 /* it is a free chunk, remove it */ 1557 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1558 udb_alloc_unlink_fl(base, alloc, last, exp); 1559 alloc->disk->stat_free -= esz; 1560 alloc->disk->nextgrow = last; 1561 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1562 /* and continue at this point */ 1563 } 1564 at = last; 1565 } else if( (e2=have_free_for(alloc, exp)) ) { 1566 /* last entry can be allocated in free chunks 1567 * move it to its new position, adjust rel_ptrs */ 1568 alloc->udb->glob_data->dirty_alloc = udb_dirty_compact; 1569 move_chunk(base, alloc, last, exp, esz, e2); 1570 if(xl_start) { 1571 last = coagulate_and_push(base, alloc, 1572 last, exp, esz); 1573 } else { 1574 /* shorten usage */ 1575 alloc->disk->stat_free -= esz; 1576 alloc->disk->nextgrow = last; 1577 } 1578 alloc->udb->glob_data->rb_old = 0; 1579 alloc->udb->glob_data->rb_new = 0; 1580 alloc->udb->glob_data->rb_size = 0; 1581 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1582 /* and continue in front of it */ 1583 at = last; 1584 } else { 1585 /* cannot compact this block, stop compacting */ 1586 break; 1587 } 1588 /* if that worked, repeat it */ 1589 } 1590 /* if we passed xl chunks, see if XL-chunklist can move */ 1591 if(xl_start) { 1592 /* calculate free space in front of the XLchunklist. */ 1593 /* has to be whole mbs of free space */ 1594 /* if so, we can move the XL chunks. Move them all back 1595 * by the new free space. */ 1596 /* this compacts very well, but the XL chunks can be moved 1597 * multiple times; worst case for every mb freed a huge sized 1598 * xlchunklist gets moved. */ 1599 /* free space must be, since aligned and coagulated, in 1600 * chunks of a whole MB */ 1601 udb_void at = xl_start; 1602 uint64_t m = 0; 1603 while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){ 1604 udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE; 1605 if(UDB_CHUNK(chunk)->type != udb_chunk_type_free) 1606 break; 1607 assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX); 1608 m += UDB_ALLOC_CHUNK_SIZE; 1609 at = chunk; 1610 } 1611 if(m != 0) { 1612 assert(at+m == xl_start); 1613 alloc->udb->glob_data->dirty_alloc = udb_dirty_compact; 1614 free_xl_space(base, alloc, at, m); 1615 move_xl_list(base, alloc, xl_start, xl_sz, m); 1616 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1617 } 1618 } 1619 1620 /* if enough free, shrink the file; re-mmap */ 1621 if(enough_free(alloc)) { 1622 uint64_t nsize = alloc->disk->nextgrow; 1623 udb_base_shrink(alloc->udb, nsize); 1624 if(!udb_base_remap(alloc->udb, alloc, nsize)) 1625 return 0; 1626 } 1627 return 1; 1628 } 1629 1630 #ifdef UDB_CHECK 1631 /** check that rptrs are really zero before free */ 1632 void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg) 1633 { 1634 (void)base; 1635 (void)arg; 1636 assert(p->data == 0); 1637 } 1638 #endif /* UDB_CHECK */ 1639 1640 /** free XL chunk as multiples of CHUNK_SIZE free segments */ 1641 static void 1642 udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp, 1643 size_t sz) 1644 { 1645 uint64_t xlsz = fp->size; 1646 uint64_t c; 1647 /* lightweight check for buffer overflow in xl data */ 1648 assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz); 1649 assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL); 1650 assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* whole mbs */ 1651 assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */ 1652 #ifdef UDB_CHECK 1653 /* check that relptrs in this chunk have been zeroed */ 1654 (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type, 1655 UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz, 1656 &udb_check_rptr_zero, NULL); 1657 #endif 1658 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1659 /* update stats */ 1660 alloc->disk->stat_data -= sz; 1661 alloc->disk->stat_alloc -= xlsz; 1662 alloc->disk->stat_free += xlsz; 1663 /* walk in reverse, so the front blocks go first on the list */ 1664 c = f + xlsz - UDB_ALLOC_CHUNK_SIZE; 1665 /* because of header and alignment we know f >= UDB_ALLOC_CHUNK_SIZE*/ 1666 assert(f >= UDB_ALLOC_CHUNK_SIZE); 1667 while(c >= f) { 1668 /* free a block of CHUNK_SIZE (1 Mb) */ 1669 udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX); 1670 c -= UDB_ALLOC_CHUNK_SIZE; 1671 } 1672 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1673 } 1674 1675 int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz) 1676 { 1677 void* base; 1678 /* lookup chunk ptr */ 1679 udb_void f; 1680 udb_chunk_d* fp; 1681 uint64_t esz; 1682 int exp; 1683 udb_void other; 1684 int coagulated = 0; 1685 if(!r) 1686 return 1; /* free(NULL) does nothing */ 1687 1688 /* lookup size of chunk */ 1689 base = alloc->udb->base; 1690 /* fails for XL blocks */ 1691 f = chunk_from_dataptr(r); 1692 fp = UDB_CHUNK(f); 1693 assert(fp->type != udb_chunk_type_free); 1694 1695 /* see if it has a ptrlist, if so: trouble, the list is not properly 1696 * cleaned up. (although you can imagine a wholesale delete where 1697 * it does not matter) */ 1698 assert(fp->ptrlist == 0); 1699 1700 /* set ptrlist to 0 to stop relptr from using it, robustness. */ 1701 fp->ptrlist = 0; 1702 1703 if(fp->exp == UDB_EXP_XL) { 1704 udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz); 1705 /* compact */ 1706 return udb_alloc_compact(base, alloc); 1707 } 1708 /* it is a regular chunk of 2**exp size */ 1709 exp = (int)fp->exp; 1710 esz = (uint64_t)1<<exp; 1711 /* light check for e.g. buffer overflow of the data */ 1712 assert(sz < esz); 1713 assert(chunk_get_last(base, f, exp) == (uint8_t)exp); 1714 #ifdef UDB_CHECK 1715 /* check that relptrs in this chunk have been zeroed */ 1716 (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type, 1717 UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL); 1718 #endif 1719 1720 /* update the stats */ 1721 alloc->udb->glob_data->dirty_alloc = udb_dirty_fl; 1722 alloc->disk->stat_data -= sz; 1723 alloc->disk->stat_free += esz; 1724 alloc->disk->stat_alloc -= esz; 1725 1726 /* if it can be merged with other free chunks, do so */ 1727 while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) { 1728 coagulated = 1; 1729 /* unlink that other chunk and expand it (it has same size) */ 1730 udb_alloc_unlink_fl(base, alloc, other, exp); 1731 /* merge up */ 1732 if(other < f) 1733 f = other; 1734 exp++; 1735 esz <<= 1; 1736 } 1737 if(coagulated) { 1738 /* put big free chunk into freelist, and init it */ 1739 udb_alloc_push_fl(base, alloc, f, exp); 1740 } else { 1741 /* we do not need to touch the last-exp-byte, which may save 1742 * a reference to that page of memory */ 1743 fp->type = udb_chunk_type_free; 1744 fp->flags = 0; 1745 udb_alloc_push_fl_noinit(base, alloc, f, exp); 1746 } 1747 alloc->udb->glob_data->dirty_alloc = udb_dirty_clean; 1748 /* compact */ 1749 return udb_alloc_compact(base, alloc); 1750 } 1751 1752 udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz) 1753 { 1754 /* could be faster maybe, if grown? */ 1755 udb_void r = udb_alloc_space(alloc, sz); 1756 if(!r) return r; 1757 memcpy(UDB_REL(alloc->udb->base, r), d, sz); 1758 return r; 1759 } 1760 1761 udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz) 1762 { 1763 void* base = alloc->udb->base; 1764 udb_void c, n, newd; 1765 udb_chunk_d* cp, *np; 1766 uint64_t avail; 1767 uint8_t cp_type; 1768 /* emulate some posix realloc stuff */ 1769 if(r == 0) 1770 return udb_alloc_space(alloc, sz); 1771 if(sz == 0) { 1772 if(!udb_alloc_free(alloc, r, osz)) 1773 log_msg(LOG_ERR, "udb_alloc_realloc: free failed"); 1774 return 0; 1775 } 1776 c = chunk_from_dataptr(r); 1777 cp = UDB_CHUNK(c); 1778 cp_type = cp->type; 1779 if(cp->exp == UDB_EXP_XL) { 1780 avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d) 1781 - sizeof(uint64_t)*2; 1782 } else { 1783 avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1; 1784 } 1785 if(sz <= avail) 1786 return r; 1787 /* reallocate it, and copy */ 1788 newd = udb_alloc_space(alloc, sz); 1789 if(!newd) return 0; 1790 /* re-base after alloc, since re-mmap may have happened */ 1791 base = alloc->udb->base; 1792 cp = NULL; /* may be invalid now, robustness */ 1793 n = chunk_from_dataptr(newd); 1794 np = UDB_CHUNK(n); 1795 np->type = cp_type; 1796 memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz); 1797 /* fixup ptrs */ 1798 chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r); 1799 1800 if(!udb_alloc_free(alloc, r, osz)) 1801 log_msg(LOG_ERR, "udb_alloc_realloc: free failed"); 1802 return newd; 1803 } 1804 1805 int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num) 1806 { 1807 const uint64_t mb = 1024*1024; 1808 int exp = udb_alloc_exp_needed(sz); 1809 uint64_t esz; 1810 uint64_t want; 1811 if(exp == UDB_EXP_XL) 1812 esz = (sz&(mb-1))+mb; 1813 else esz = (uint64_t)1<<exp; 1814 /* we need grow_end_calc to take into account alignment */ 1815 want = grow_end_calc(alloc, exp) + esz*(num-1); 1816 assert(want >= alloc->udb->base_size); 1817 if(!udb_base_grow_and_remap(alloc->udb, want)) { 1818 log_msg(LOG_ERR, "failed to grow the specified amount"); 1819 return 0; 1820 } 1821 return 1; 1822 } 1823 1824 void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp) 1825 { 1826 void* base = alloc->udb->base; 1827 udb_void f = chunk_from_dataptr(r); 1828 udb_chunk_d* fp = UDB_CHUNK(f); 1829 /* not the 'free' type, that must be set by allocation routines */ 1830 assert(fp->type != udb_chunk_type_free); 1831 assert(tp != udb_chunk_type_free); 1832 fp->type = tp; 1833 } 1834 1835 int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize) 1836 { 1837 /* pointers are not valid before the header-size or after the 1838 * used-region of the mmap */ 1839 return ( (to+destsize) <= udb->base_size && 1840 to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) && 1841 (to+destsize) <= udb->alloc->disk->nextgrow); 1842 } 1843 1844 int udb_valid_dataptr(udb_base* udb, udb_void to) 1845 { 1846 void* base = udb->base; 1847 udb_void ch; 1848 int exp; 1849 uint64_t esz; 1850 /* our data chunks are aligned and at least 8 bytes */ 1851 if(!udb_valid_offset(udb, to, sizeof(uint64_t))) 1852 return 0; 1853 /* get the chunk pointer */ 1854 ch = chunk_from_dataptr(to); 1855 if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d))) 1856 return 0; 1857 /* check its size */ 1858 exp = UDB_CHUNK(ch)->exp; 1859 if(exp == UDB_EXP_XL) { 1860 /* check XL chunk */ 1861 uint64_t xlsz; 1862 if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d))) 1863 return 0; 1864 xlsz = UDB_XL_CHUNK(ch)->size; 1865 if(!udb_valid_offset(udb, ch+xlsz-1, 1)) 1866 return 0; 1867 if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL) 1868 return 0; 1869 if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2)) 1870 != xlsz) 1871 return 0; 1872 return 1; 1873 } 1874 /* check if regular chunk has matching end byte */ 1875 if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) 1876 return 0; /* cannot be a valid chunk */ 1877 esz = 1<<exp; 1878 if(!udb_valid_offset(udb, ch+esz-1, 1)) 1879 return 0; 1880 if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp) 1881 return 0; 1882 return 1; 1883 } 1884 1885 int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to) 1886 { 1887 void* base = udb->base; 1888 udb_void p; 1889 if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr))) 1890 return 0; 1891 if(!udb_valid_dataptr(udb, to)) 1892 return 0; 1893 p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist; 1894 while(p) { 1895 if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr))) 1896 return 0; 1897 if(p == rptr) 1898 return 1; 1899 p = UDB_REL_PTR(p)->next; 1900 } 1901 return 0; 1902 } 1903 1904 void udb_rel_ptr_init(udb_rel_ptr* ptr) 1905 { 1906 memset(ptr, 0, sizeof(*ptr)); 1907 } 1908 1909 void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr) 1910 { 1911 if(!ptr->data) 1912 return; 1913 if(ptr->prev) { 1914 UDB_REL_PTR(ptr->prev)->next = ptr->next; 1915 } else { 1916 UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next; 1917 } 1918 if(ptr->next) { 1919 UDB_REL_PTR(ptr->next)->prev = ptr->prev; 1920 } 1921 } 1922 1923 void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to) 1924 { 1925 udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to)); 1926 ptr->prev = 0; 1927 ptr->next = chunk->ptrlist; 1928 if(ptr->next) 1929 UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr); 1930 chunk->ptrlist = UDB_SYSTOREL(base, ptr); 1931 ptr->data = to; 1932 } 1933 1934 void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to) 1935 { 1936 assert(to == 0 || to > 64); 1937 udb_rel_ptr_unlink(base, ptr); 1938 if(to) 1939 udb_rel_ptr_link(base, ptr, to); 1940 else ptr->data = to; 1941 } 1942 1943 void udb_rel_ptr_edit(void* base, udb_void list, udb_void to) 1944 { 1945 udb_void p = list; 1946 while(p) { 1947 UDB_REL_PTR(p)->data = to; 1948 p = UDB_REL_PTR(p)->next; 1949 } 1950 } 1951 1952 #ifdef UDB_CHECK 1953 /** check that all pointers are validly chained */ 1954 static void 1955 udb_check_ptrs_valid(udb_base* udb) 1956 { 1957 size_t i; 1958 udb_ptr* p, *prev; 1959 for(i=0; i<udb->ram_size; i++) { 1960 prev = NULL; 1961 for(p=udb->ram_hash[i]; p; p=p->next) { 1962 assert(p->prev == prev); 1963 assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask) 1964 == i); 1965 assert(p->base == &udb->base); 1966 prev = p; 1967 } 1968 } 1969 } 1970 #endif /* UDB_CHECK */ 1971 1972 void udb_ptr_init(udb_ptr* ptr, udb_base* udb) 1973 { 1974 #ifdef UDB_CHECK 1975 udb_check_ptrs_valid(udb); /* previous ptrs have been unlinked */ 1976 #endif 1977 memset(ptr, 0, sizeof(*ptr)); 1978 ptr->base = &udb->base; 1979 } 1980 1981 void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval) 1982 { 1983 assert(newval == 0 || newval > 64); 1984 if(ptr->data) 1985 udb_base_unlink_ptr(udb, ptr); 1986 ptr->data = newval; 1987 if(newval) 1988 udb_base_link_ptr(udb, ptr); 1989 } 1990 1991 int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type, 1992 size_t sz) 1993 { 1994 udb_void r; 1995 r = udb_alloc_space(udb->alloc, sz); 1996 if(!r) return 0; 1997 udb_alloc_set_type(udb->alloc, r, type); 1998 udb_ptr_init(ptr, udb); 1999 udb_ptr_set(ptr, udb, r); 2000 return 1; 2001 } 2002 2003 void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz) 2004 { 2005 if(ptr->data) { 2006 udb_void d = ptr->data; 2007 udb_ptr_set(ptr, udb, 0); 2008 udb_alloc_free(udb->alloc, d, sz); 2009 } 2010 } 2011 2012 udb_chunk_type udb_ptr_get_type(udb_ptr* ptr) 2013 { 2014 udb_void f; 2015 if(!ptr || ptr->data == 0) return udb_chunk_type_internal; /* something bad*/ 2016 f = chunk_from_dataptr(ptr->data); 2017 return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type; 2018 } 2019