1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/diskslice.h> 36 #include <sys/diskmbr.h> 37 38 #include "hammer_util.h" 39 40 static void check_volume(struct volume_info *vol); 41 static void get_buffer_readahead(struct buffer_info *base); 42 static void *get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, 43 int isnew); 44 static __inline int readhammervol(struct volume_info *vol); 45 static __inline int readhammerbuf(struct buffer_info *buf); 46 static __inline int writehammervol(struct volume_info *vol); 47 static __inline int writehammerbuf(struct buffer_info *buf); 48 49 uuid_t Hammer_FSType; 50 uuid_t Hammer_FSId; 51 int UseReadBehind = -4; 52 int UseReadAhead = 4; 53 int DebugOpt; 54 55 TAILQ_HEAD(volume_list, volume_info); 56 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 57 static int valid_hammer_volumes; 58 59 static __inline 60 int 61 buffer_hash(hammer_off_t buf_offset) 62 { 63 int hi; 64 65 hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK; 66 return(hi); 67 } 68 69 static struct buffer_info* 70 find_buffer(struct volume_info *volume, hammer_off_t buf_offset) 71 { 72 int hi; 73 struct buffer_info *buf; 74 75 hi = buffer_hash(buf_offset); 76 TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry) 77 if (buf->buf_offset == buf_offset) 78 return(buf); 79 return(NULL); 80 } 81 82 static 83 struct volume_info * 84 __alloc_volume(const char *volname, int oflags) 85 { 86 struct volume_info *vol; 87 int i; 88 89 vol = malloc(sizeof(*vol)); 90 if (vol == NULL) 91 err(1, "alloc_volume"); 92 bzero(vol, sizeof(*vol)); 93 94 vol->vol_no = -1; 95 vol->rdonly = (oflags == O_RDONLY); 96 vol->name = strdup(volname); 97 vol->fd = open(vol->name, oflags); 98 if (vol->fd < 0) 99 err(1, "alloc_volume: Failed to open %s", vol->name); 100 check_volume(vol); 101 102 vol->ondisk = malloc(HAMMER_BUFSIZE); 103 if (vol->ondisk == NULL) 104 err(1, "alloc_volume"); 105 bzero(vol->ondisk, HAMMER_BUFSIZE); 106 107 for (i = 0; i < HAMMER_BUFLISTS; ++i) 108 TAILQ_INIT(&vol->buffer_lists[i]); 109 110 return(vol); 111 } 112 113 static void 114 __add_volume(struct volume_info *vol) 115 { 116 struct volume_info *scan; 117 struct stat st1, st2; 118 119 if (fstat(vol->fd, &st1) != 0) 120 errx(1, "add_volume: %s: Failed to stat", vol->name); 121 122 TAILQ_FOREACH(scan, &VolList, entry) { 123 if (scan->vol_no == vol->vol_no) { 124 errx(1, "add_volume: %s: Duplicate volume number %d " 125 "against %s", 126 vol->name, vol->vol_no, scan->name); 127 } 128 if (fstat(scan->fd, &st2) != 0) { 129 errx(1, "add_volume: %s: Failed to stat %s", 130 vol->name, scan->name); 131 } 132 if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) { 133 errx(1, "add_volume: %s: Specified more than once", 134 vol->name); 135 } 136 } 137 138 TAILQ_INSERT_TAIL(&VolList, vol, entry); 139 } 140 141 /* 142 * Initialize a volume structure and ondisk vol_no field. 143 */ 144 struct volume_info * 145 init_volume(int32_t vol_no, const char *filename, int oflags) 146 { 147 struct volume_info *vol; 148 149 vol = __alloc_volume(filename, oflags); 150 vol->vol_no = vol->ondisk->vol_no = vol_no; 151 152 __add_volume(vol); 153 154 return(vol); 155 } 156 157 /* 158 * Initialize a volume structure and read ondisk volume header. 159 */ 160 struct volume_info* 161 load_volume(const char *filename, int oflags) 162 { 163 struct volume_info *vol; 164 hammer_volume_ondisk_t ondisk; 165 int n; 166 167 vol = __alloc_volume(filename, oflags); 168 169 n = readhammervol(vol); 170 if (n == -1) { 171 err(1, "load_volume: %s: Read failed at offset 0", vol->name); 172 } 173 ondisk = vol->ondisk; 174 vol->vol_no = ondisk->vol_no; 175 176 if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) { 177 errx(1, "load_volume: Invalid root volume# %d", 178 ondisk->vol_rootvol); 179 } 180 181 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) { 182 errx(1, "load_volume: %s: Header does not indicate " 183 "that this is a hammer volume", vol->name); 184 } 185 186 if (valid_hammer_volumes++ == 0) { 187 Hammer_FSId = ondisk->vol_fsid; 188 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) { 189 errx(1, "load_volume: %s: FSId does match other volumes!", 190 vol->name); 191 } 192 193 __add_volume(vol); 194 195 return(vol); 196 } 197 198 /* 199 * Check basic volume characteristics. 200 */ 201 static void 202 check_volume(struct volume_info *vol) 203 { 204 struct partinfo pinfo; 205 struct stat st; 206 207 /* 208 * Get basic information about the volume 209 */ 210 if (ioctl(vol->fd, DIOCGPART, &pinfo) < 0) { 211 /* 212 * Allow the formatting of regular files as HAMMER volumes 213 */ 214 if (fstat(vol->fd, &st) < 0) 215 err(1, "Unable to stat %s", vol->name); 216 vol->size = st.st_size; 217 vol->type = "REGFILE"; 218 } else { 219 /* 220 * When formatting a block device as a HAMMER volume the 221 * sector size must be compatible. HAMMER uses 16384 byte 222 * filesystem buffers. 223 */ 224 if (pinfo.reserved_blocks) { 225 errx(1, "HAMMER cannot be placed in a partition " 226 "which overlaps the disklabel or MBR"); 227 } 228 if (pinfo.media_blksize > HAMMER_BUFSIZE || 229 HAMMER_BUFSIZE % pinfo.media_blksize) { 230 errx(1, "A media sector size of %d is not supported", 231 pinfo.media_blksize); 232 } 233 234 vol->size = pinfo.media_size; 235 vol->device_offset = pinfo.media_offset; 236 vol->type = "DEVICE"; 237 } 238 } 239 240 void 241 assert_volume_offset(struct volume_info *vol) 242 { 243 assert(hammer_is_zone_raw_buffer(vol->vol_free_off)); 244 assert(hammer_is_zone_raw_buffer(vol->vol_free_end)); 245 } 246 247 struct volume_info * 248 get_volume(int32_t vol_no) 249 { 250 struct volume_info *vol; 251 252 TAILQ_FOREACH(vol, &VolList, entry) { 253 if (vol->vol_no == vol_no) 254 break; 255 } 256 257 return(vol); 258 } 259 260 struct volume_info * 261 get_root_volume(void) 262 { 263 struct volume_info *root_vol; 264 265 root_vol = get_volume(HAMMER_ROOT_VOLNO); 266 assert(root_vol != NULL); 267 268 return(root_vol); 269 } 270 271 /* 272 * Acquire the specified buffer. isnew is -1 only when called 273 * via get_buffer_readahead() to prevent another readahead. 274 */ 275 static struct buffer_info * 276 get_buffer(hammer_off_t buf_offset, int isnew) 277 { 278 struct buffer_info *buf; 279 struct volume_info *volume; 280 int vol_no; 281 int zone; 282 int hi; 283 int dora = 0; 284 int error = 0; 285 286 zone = HAMMER_ZONE_DECODE(buf_offset); 287 if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) 288 buf_offset = blockmap_lookup(buf_offset, NULL, NULL, &error); 289 if (error || buf_offset == HAMMER_OFF_BAD) 290 return(NULL); 291 assert(hammer_is_zone_raw_buffer(buf_offset)); 292 293 vol_no = HAMMER_VOL_DECODE(buf_offset); 294 volume = get_volume(vol_no); 295 assert(volume != NULL); 296 297 buf_offset &= ~HAMMER_BUFMASK64; 298 buf = find_buffer(volume, buf_offset); 299 300 if (buf == NULL) { 301 buf = malloc(sizeof(*buf)); 302 bzero(buf, sizeof(*buf)); 303 buf->buf_offset = buf_offset; 304 buf->raw_offset = hammer_xlate_to_phys(volume->ondisk, 305 buf_offset); 306 buf->volume = volume; 307 buf->ondisk = malloc(HAMMER_BUFSIZE); 308 if (isnew <= 0) { 309 if (readhammerbuf(buf) == -1) { 310 err(1, "get_buffer: %s:%016jx " 311 "Read failed at offset %016jx", 312 volume->name, 313 (intmax_t)buf->buf_offset, 314 (intmax_t)buf->raw_offset); 315 } 316 } 317 318 hi = buffer_hash(buf_offset); 319 TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry); 320 hammer_cache_add(&buf->cache); 321 dora = (isnew == 0); 322 } else { 323 assert(buf->ondisk != NULL); 324 assert(isnew != -1); 325 hammer_cache_used(&buf->cache); 326 } 327 328 ++buf->cache.refs; 329 hammer_cache_flush(); 330 331 if (isnew > 0) { 332 assert(buf->cache.modified == 0); 333 bzero(buf->ondisk, HAMMER_BUFSIZE); 334 buf->cache.modified = 1; 335 } 336 if (dora) 337 get_buffer_readahead(buf); 338 return(buf); 339 } 340 341 static void 342 get_buffer_readahead(struct buffer_info *base) 343 { 344 struct buffer_info *buf; 345 struct volume_info *vol; 346 hammer_off_t buf_offset; 347 int64_t raw_offset; 348 int ri = UseReadBehind; 349 int re = UseReadAhead; 350 351 raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE; 352 vol = base->volume; 353 354 while (ri < re) { 355 if (raw_offset >= vol->ondisk->vol_buf_end) 356 break; 357 if (raw_offset < vol->ondisk->vol_buf_beg || ri == 0) { 358 ++ri; 359 raw_offset += HAMMER_BUFSIZE; 360 continue; 361 } 362 buf_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 363 raw_offset - vol->ondisk->vol_buf_beg); 364 buf = find_buffer(vol, buf_offset); 365 if (buf == NULL) { 366 buf = get_buffer(buf_offset, -1); 367 rel_buffer(buf); 368 } 369 ++ri; 370 raw_offset += HAMMER_BUFSIZE; 371 } 372 } 373 374 void 375 rel_buffer(struct buffer_info *buffer) 376 { 377 struct volume_info *volume; 378 int hi; 379 380 if (buffer == NULL) 381 return; 382 assert(buffer->cache.refs > 0); 383 if (--buffer->cache.refs == 0) { 384 if (buffer->cache.delete) { 385 hi = buffer_hash(buffer->buf_offset); 386 volume = buffer->volume; 387 if (buffer->cache.modified) 388 flush_buffer(buffer); 389 TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry); 390 hammer_cache_del(&buffer->cache); 391 free(buffer->ondisk); 392 free(buffer); 393 } 394 } 395 } 396 397 /* 398 * Retrieve a pointer to a buffer data given a buffer offset. The underlying 399 * bufferp is freed if isnew or the offset is out of range of the cached data. 400 * If bufferp is freed a referenced buffer is loaded into it. 401 */ 402 void * 403 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp, 404 int isnew) 405 { 406 if (*bufferp != NULL) { 407 if (isnew > 0 || 408 (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) { 409 rel_buffer(*bufferp); 410 *bufferp = NULL; 411 } 412 } 413 return(get_ondisk(buf_offset, bufferp, isnew)); 414 } 415 416 /* 417 * Retrieve a pointer to a B-Tree node given a zone offset. The underlying 418 * bufferp is freed if non-NULL and a referenced buffer is loaded into it. 419 */ 420 hammer_node_ondisk_t 421 get_node(hammer_off_t node_offset, struct buffer_info **bufferp) 422 { 423 if (*bufferp != NULL) { 424 rel_buffer(*bufferp); 425 *bufferp = NULL; 426 } 427 return(get_ondisk(node_offset, bufferp, 0)); 428 } 429 430 /* 431 * Return a pointer to a buffer data given a buffer offset. 432 * If *bufferp is NULL acquire the buffer otherwise use that buffer. 433 */ 434 static void * 435 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, int isnew) 436 { 437 if (*bufferp == NULL) { 438 *bufferp = get_buffer(buf_offset, isnew); 439 if (*bufferp == NULL) 440 return(NULL); 441 } 442 443 return(((char *)(*bufferp)->ondisk) + 444 ((int32_t)buf_offset & HAMMER_BUFMASK)); 445 } 446 447 /* 448 * Allocate HAMMER elements - B-Tree nodes 449 */ 450 void * 451 alloc_btree_element(hammer_off_t *offp, struct buffer_info **data_bufferp) 452 { 453 hammer_node_ondisk_t node; 454 455 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 456 offp, data_bufferp); 457 bzero(node, sizeof(*node)); 458 return (node); 459 } 460 461 /* 462 * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc) 463 */ 464 void * 465 alloc_meta_element(hammer_off_t *offp, int32_t data_len, 466 struct buffer_info **data_bufferp) 467 { 468 void *data; 469 470 data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len, 471 offp, data_bufferp); 472 bzero(data, data_len); 473 return (data); 474 } 475 476 /* 477 * Allocate HAMMER elements - data storage 478 * 479 * The only data_len supported by HAMMER userspace for large data zone 480 * (zone 10) is HAMMER_BUFSIZE which is 16KB. >16KB data does not fit 481 * in a buffer allocated by get_buffer(). Also alloc_blockmap() does 482 * not consider >16KB buffer size. 483 */ 484 void * 485 alloc_data_element(hammer_off_t *offp, int32_t data_len, 486 struct buffer_info **data_bufferp) 487 { 488 void *data; 489 int zone; 490 491 if (data_len == 0) 492 return(NULL); 493 494 zone = hammer_data_zone_index(data_len); 495 assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */ 496 assert(zone == HAMMER_ZONE_LARGE_DATA_INDEX || 497 zone == HAMMER_ZONE_SMALL_DATA_INDEX); 498 499 data = alloc_blockmap(zone, data_len, offp, data_bufferp); 500 bzero(data, data_len); 501 return(data); 502 } 503 504 /* 505 * Format a new blockmap. This is mostly a degenerate case because 506 * all allocations are now actually done from the freemap. 507 */ 508 void 509 format_blockmap(struct volume_info *root_vol, int zone, hammer_off_t offset) 510 { 511 hammer_blockmap_t blockmap; 512 hammer_off_t zone_base; 513 514 /* Only root volume needs formatting */ 515 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 516 517 assert(hammer_is_zone2_mapped_index(zone)); 518 519 blockmap = &root_vol->ondisk->vol0_blockmap[zone]; 520 zone_base = HAMMER_ZONE_ENCODE(zone, offset); 521 522 bzero(blockmap, sizeof(*blockmap)); 523 blockmap->phys_offset = 0; 524 blockmap->first_offset = zone_base; 525 blockmap->next_offset = zone_base; 526 blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1); 527 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 528 } 529 530 /* 531 * Format a new freemap. Set all layer1 entries to UNAVAIL. The initialize 532 * code will load each volume's freemap. 533 */ 534 void 535 format_freemap(struct volume_info *root_vol) 536 { 537 struct buffer_info *buffer = NULL; 538 hammer_off_t layer1_offset; 539 hammer_blockmap_t blockmap; 540 hammer_blockmap_layer1_t layer1; 541 int i, isnew; 542 543 /* Only root volume needs formatting */ 544 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 545 546 layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX); 547 for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) { 548 isnew = ((i % HAMMER_BUFSIZE) == 0); 549 layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew); 550 bzero(layer1, sizeof(*layer1)); 551 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 552 layer1->blocks_free = 0; 553 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE); 554 } 555 assert(i == HAMMER_BIGBLOCK_SIZE); 556 rel_buffer(buffer); 557 558 blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 559 bzero(blockmap, sizeof(*blockmap)); 560 blockmap->phys_offset = layer1_offset; 561 blockmap->first_offset = 0; 562 blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0); 563 blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1); 564 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 565 } 566 567 /* 568 * Load the volume's remaining free space into the freemap. 569 * 570 * Returns the number of big-blocks available. 571 */ 572 int64_t 573 initialize_freemap(struct volume_info *vol) 574 { 575 struct volume_info *root_vol; 576 struct buffer_info *buffer1 = NULL; 577 struct buffer_info *buffer2 = NULL; 578 hammer_blockmap_layer1_t layer1; 579 hammer_blockmap_layer2_t layer2; 580 hammer_off_t layer1_offset; 581 hammer_off_t layer2_offset; 582 hammer_off_t phys_offset; 583 hammer_off_t block_offset; 584 hammer_off_t aligned_vol_free_end; 585 hammer_blockmap_t freemap; 586 int64_t count = 0; 587 int64_t layer1_count = 0; 588 589 root_vol = get_root_volume(); 590 591 assert_volume_offset(vol); 592 aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK) 593 & ~HAMMER_BLOCKMAP_LAYER2_MASK; 594 595 printf("initialize freemap volume %d\n", vol->vol_no); 596 597 /* 598 * Initialize the freemap. First preallocate the big-blocks required 599 * to implement layer2. This preallocation is a bootstrap allocation 600 * using blocks from the target volume. 601 */ 602 freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 603 604 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 605 phys_offset < aligned_vol_free_end; 606 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 607 layer1_offset = freemap->phys_offset + 608 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 609 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 610 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { 611 layer1->phys_offset = alloc_bigblock(vol, 612 HAMMER_ZONE_FREEMAP_INDEX); 613 layer1->blocks_free = 0; 614 buffer1->cache.modified = 1; 615 layer1->layer1_crc = crc32(layer1, 616 HAMMER_LAYER1_CRCSIZE); 617 } 618 } 619 620 /* 621 * Now fill everything in. 622 */ 623 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 624 phys_offset < aligned_vol_free_end; 625 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 626 layer1_count = 0; 627 layer1_offset = freemap->phys_offset + 628 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); 629 layer1 = get_buffer_data(layer1_offset, &buffer1, 0); 630 assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 631 632 for (block_offset = 0; 633 block_offset < HAMMER_BLOCKMAP_LAYER2; 634 block_offset += HAMMER_BIGBLOCK_SIZE) { 635 layer2_offset = layer1->phys_offset + 636 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset); 637 layer2 = get_buffer_data(layer2_offset, &buffer2, 0); 638 bzero(layer2, sizeof(*layer2)); 639 640 if (phys_offset + block_offset < vol->vol_free_off) { 641 /* 642 * Fixups XXX - big-blocks already allocated as part 643 * of the freemap bootstrap. 644 */ 645 layer2->zone = HAMMER_ZONE_FREEMAP_INDEX; 646 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 647 layer2->bytes_free = 0; 648 } else if (phys_offset + block_offset < vol->vol_free_end) { 649 layer2->zone = 0; 650 layer2->append_off = 0; 651 layer2->bytes_free = HAMMER_BIGBLOCK_SIZE; 652 ++count; 653 ++layer1_count; 654 } else { 655 layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX; 656 layer2->append_off = HAMMER_BIGBLOCK_SIZE; 657 layer2->bytes_free = 0; 658 } 659 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 660 buffer2->cache.modified = 1; 661 } 662 663 layer1->blocks_free += layer1_count; 664 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE); 665 buffer1->cache.modified = 1; 666 } 667 668 rel_buffer(buffer1); 669 rel_buffer(buffer2); 670 return(count); 671 } 672 673 /* 674 * Returns the number of big-blocks available for filesystem data and undos 675 * without formatting. 676 */ 677 int64_t 678 count_freemap(struct volume_info *vol) 679 { 680 hammer_off_t phys_offset; 681 hammer_off_t vol_free_off; 682 hammer_off_t aligned_vol_free_end; 683 int64_t count = 0; 684 685 vol_free_off = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 686 687 assert_volume_offset(vol); 688 aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK) 689 & ~HAMMER_BLOCKMAP_LAYER2_MASK; 690 691 if (vol->vol_no == HAMMER_ROOT_VOLNO) 692 vol_free_off += HAMMER_BIGBLOCK_SIZE; 693 694 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 695 phys_offset < aligned_vol_free_end; 696 phys_offset += HAMMER_BLOCKMAP_LAYER2) { 697 vol_free_off += HAMMER_BIGBLOCK_SIZE; 698 } 699 700 for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0); 701 phys_offset < aligned_vol_free_end; 702 phys_offset += HAMMER_BIGBLOCK_SIZE) { 703 if (phys_offset < vol_free_off) { 704 ; 705 } else if (phys_offset < vol->vol_free_end) { 706 ++count; 707 } 708 } 709 710 return(count); 711 } 712 713 /* 714 * Format the undomap for the root volume. 715 */ 716 void 717 format_undomap(struct volume_info *root_vol, int64_t *undo_buffer_size) 718 { 719 const int undo_zone = HAMMER_ZONE_UNDO_INDEX; 720 hammer_off_t undo_limit; 721 hammer_blockmap_t blockmap; 722 hammer_volume_ondisk_t ondisk; 723 struct buffer_info *buffer = NULL; 724 hammer_off_t scan; 725 int n; 726 int limit_index; 727 uint32_t seqno; 728 729 /* Only root volume needs formatting */ 730 assert(root_vol->vol_no == HAMMER_ROOT_VOLNO); 731 ondisk = root_vol->ondisk; 732 733 /* 734 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE, 735 * up to HAMMER_UNDO_LAYER2 big-blocks. Size to approximately 736 * 0.1% of the disk. 737 * 738 * The minimum UNDO fifo size is 500MB, or approximately 1% of 739 * the recommended 50G disk. 740 * 741 * Changing this minimum is rather dangerous as complex filesystem 742 * operations can cause the UNDO FIFO to fill up otherwise. 743 */ 744 undo_limit = *undo_buffer_size; 745 if (undo_limit == 0) { 746 undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000; 747 if (undo_limit < 500*1024*1024) 748 undo_limit = 500*1024*1024; 749 } 750 undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) & 751 ~HAMMER_BIGBLOCK_MASK64; 752 if (undo_limit < HAMMER_BIGBLOCK_SIZE) 753 undo_limit = HAMMER_BIGBLOCK_SIZE; 754 if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2) 755 undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2; 756 *undo_buffer_size = undo_limit; 757 758 blockmap = &ondisk->vol0_blockmap[undo_zone]; 759 bzero(blockmap, sizeof(*blockmap)); 760 blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL; 761 blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0); 762 blockmap->next_offset = blockmap->first_offset; 763 blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit); 764 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE); 765 766 limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE; 767 assert(limit_index <= HAMMER_UNDO_LAYER2); 768 769 for (n = 0; n < limit_index; ++n) { 770 ondisk->vol0_undo_array[n] = alloc_bigblock(root_vol, 771 HAMMER_ZONE_UNDO_INDEX); 772 } 773 while (n < HAMMER_UNDO_LAYER2) { 774 ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL; 775 } 776 777 /* 778 * Pre-initialize the UNDO blocks (HAMMER version 4+) 779 */ 780 printf("initializing the undo map (%jd MB)\n", 781 (intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) / 782 (1024 * 1024)); 783 784 scan = blockmap->first_offset; 785 seqno = 0; 786 787 while (scan < blockmap->alloc_offset) { 788 hammer_fifo_head_t head; 789 hammer_fifo_tail_t tail; 790 int isnew; 791 int bytes = HAMMER_UNDO_ALIGN; 792 793 isnew = ((scan & HAMMER_BUFMASK64) == 0); 794 head = get_buffer_data(scan, &buffer, isnew); 795 buffer->cache.modified = 1; 796 tail = (void *)((char *)head + bytes - sizeof(*tail)); 797 798 bzero(head, bytes); 799 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 800 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 801 head->hdr_size = bytes; 802 head->hdr_seq = seqno++; 803 804 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 805 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 806 tail->tail_size = bytes; 807 808 head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ 809 crc32(head + 1, bytes - sizeof(*head)); 810 811 scan += bytes; 812 } 813 rel_buffer(buffer); 814 } 815 816 const char *zone_labels[] = { 817 "", /* 0 */ 818 "raw_volume", /* 1 */ 819 "raw_buffer", /* 2 */ 820 "undo", /* 3 */ 821 "freemap", /* 4 */ 822 "", /* 5 */ 823 "", /* 6 */ 824 "", /* 7 */ 825 "btree", /* 8 */ 826 "meta", /* 9 */ 827 "large_data", /* 10 */ 828 "small_data", /* 11 */ 829 "", /* 12 */ 830 "", /* 13 */ 831 "", /* 14 */ 832 "unavail", /* 15 */ 833 }; 834 835 void 836 print_blockmap(const struct volume_info *root_vol) 837 { 838 hammer_blockmap_t blockmap; 839 hammer_volume_ondisk_t ondisk; 840 int64_t size, used; 841 int i; 842 #define INDENT "" 843 844 ondisk = root_vol->ondisk; 845 printf(INDENT"vol_label\t%s\n", ondisk->vol_label); 846 printf(INDENT"vol_count\t%d\n", ondisk->vol_count); 847 printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg)); 848 printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg)); 849 printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg)); 850 printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end)); 851 printf(INDENT"vol0_next_tid\t%016jx\n", 852 (uintmax_t)ondisk->vol0_next_tid); 853 854 blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 855 size = blockmap->alloc_offset & HAMMER_OFF_LONG_MASK; 856 if (blockmap->first_offset <= blockmap->next_offset) 857 used = blockmap->next_offset - blockmap->first_offset; 858 else 859 used = blockmap->alloc_offset - blockmap->first_offset + 860 (blockmap->next_offset & HAMMER_OFF_LONG_MASK); 861 printf(INDENT"undo_size\t%s\n", sizetostr(size)); 862 printf(INDENT"undo_used\t%s\n", sizetostr(used)); 863 864 printf(INDENT"zone # " 865 "phys first next alloc\n"); 866 for (i = 0; i < HAMMER_MAX_ZONES; i++) { 867 blockmap = &ondisk->vol0_blockmap[i]; 868 printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n", 869 i, zone_labels[i], 870 (uintmax_t)blockmap->phys_offset, 871 (uintmax_t)blockmap->first_offset, 872 (uintmax_t)blockmap->next_offset, 873 (uintmax_t)blockmap->alloc_offset); 874 } 875 } 876 877 /* 878 * Flush various tracking structures to disk 879 */ 880 void 881 flush_all_volumes(void) 882 { 883 struct volume_info *vol; 884 885 TAILQ_FOREACH(vol, &VolList, entry) 886 flush_volume(vol); 887 } 888 889 void 890 flush_volume(struct volume_info *volume) 891 { 892 struct buffer_info *buffer; 893 int i; 894 895 for (i = 0; i < HAMMER_BUFLISTS; ++i) { 896 TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry) 897 flush_buffer(buffer); 898 } 899 if (writehammervol(volume) == -1) 900 err(1, "Write volume %d (%s)", volume->vol_no, volume->name); 901 } 902 903 void 904 flush_buffer(struct buffer_info *buffer) 905 { 906 struct volume_info *vol; 907 908 vol = buffer->volume; 909 if (writehammerbuf(buffer) == -1) 910 err(1, "Write volume %d (%s)", vol->vol_no, vol->name); 911 buffer->cache.modified = 0; 912 } 913 914 /* 915 * Core I/O operations 916 */ 917 static int 918 __read(struct volume_info *vol, void *data, int64_t offset, int size) 919 { 920 ssize_t n; 921 922 n = pread(vol->fd, data, size, offset); 923 if (n != size) 924 return(-1); 925 return(0); 926 } 927 928 static __inline int 929 readhammervol(struct volume_info *vol) 930 { 931 return(__read(vol, vol->ondisk, 0, HAMMER_BUFSIZE)); 932 } 933 934 static __inline int 935 readhammerbuf(struct buffer_info *buf) 936 { 937 return(__read(buf->volume, buf->ondisk, buf->raw_offset, HAMMER_BUFSIZE)); 938 } 939 940 static int 941 __write(struct volume_info *vol, const void *data, int64_t offset, int size) 942 { 943 ssize_t n; 944 945 if (vol->rdonly) 946 return(0); 947 948 n = pwrite(vol->fd, data, size, offset); 949 if (n != size) 950 return(-1); 951 return(0); 952 } 953 954 static __inline int 955 writehammervol(struct volume_info *vol) 956 { 957 return(__write(vol, vol->ondisk, 0, HAMMER_BUFSIZE)); 958 } 959 960 static __inline int 961 writehammerbuf(struct buffer_info *buf) 962 { 963 return(__write(buf->volume, buf->ondisk, buf->raw_offset, HAMMER_BUFSIZE)); 964 } 965 966 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size) 967 { 968 if (value == 0) { 969 value = HAMMER_BOOT_NOMBYTES; 970 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 971 value >>= 1; 972 if (value < HAMMER_BOOT_MINBYTES) 973 value = 0; 974 } else if (value < HAMMER_BOOT_MINBYTES) { 975 value = HAMMER_BOOT_MINBYTES; 976 } 977 978 return(value); 979 } 980 981 int64_t init_mem_area_size(int64_t value, off_t avg_vol_size) 982 { 983 if (value == 0) { 984 value = HAMMER_MEM_NOMBYTES; 985 while (value > avg_vol_size / HAMMER_MAX_VOLUMES) 986 value >>= 1; 987 if (value < HAMMER_MEM_MINBYTES) 988 value = 0; 989 } else if (value < HAMMER_MEM_MINBYTES) { 990 value = HAMMER_MEM_MINBYTES; 991 } 992 993 return(value); 994 } 995