1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/fcntl.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/namei.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/mountctl.h> 45 46 #include "hammer2.h" 47 48 #define FREEMAP_DEBUG 0 49 50 struct hammer2_fiterate { 51 hammer2_off_t bpref; 52 hammer2_off_t bnext; 53 int loops; 54 }; 55 56 typedef struct hammer2_fiterate hammer2_fiterate_t; 57 58 static int hammer2_freemap_try_alloc(hammer2_chain_t **parentp, 59 hammer2_blockref_t *bref, 60 int radix, hammer2_fiterate_t *iter); 61 static void hammer2_freemap_init(hammer2_dev_t *hmp, 62 hammer2_key_t key, hammer2_chain_t *chain); 63 static int hammer2_bmap_alloc(hammer2_dev_t *hmp, 64 hammer2_bmap_data_t *bmap, uint16_t class, 65 int n, int radix, hammer2_key_t *basep); 66 static int hammer2_freemap_iterate(hammer2_chain_t **parentp, 67 hammer2_chain_t **chainp, 68 hammer2_fiterate_t *iter); 69 70 static __inline 71 int 72 hammer2_freemapradix(int radix) 73 { 74 return(radix); 75 } 76 77 /* 78 * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF 79 * bref. Return a combined media offset and physical size radix. Freemap 80 * chains use fixed storage offsets in the 4MB reserved area at the 81 * beginning of each 2GB zone 82 * 83 * Rotate between four possibilities. Theoretically this means we have three 84 * good freemaps in case of a crash which we can use as a base for the fixup 85 * scan at mount-time. 86 */ 87 #define H2FMBASE(key, radix) ((key) & ~(((hammer2_off_t)1 << (radix)) - 1)) 88 #define H2FMSHIFT(radix) ((hammer2_off_t)1 << (radix)) 89 90 static 91 int 92 hammer2_freemap_reserve(hammer2_chain_t *chain, int radix) 93 { 94 hammer2_blockref_t *bref = &chain->bref; 95 hammer2_off_t off; 96 int index; 97 int index_inc; 98 size_t bytes; 99 100 /* 101 * Physical allocation size. 102 */ 103 bytes = (size_t)1 << radix; 104 105 /* 106 * Calculate block selection index 0..7 of current block. If this 107 * is the first allocation of the block (verses a modification of an 108 * existing block), we use index 0, otherwise we use the next rotating 109 * index. 110 */ 111 if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) { 112 index = 0; 113 } else { 114 off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX & 115 (((hammer2_off_t)1 << 116 HAMMER2_FREEMAP_LEVEL1_RADIX) - 1); 117 off = off / HAMMER2_PBUFSIZE; 118 KKASSERT(off >= HAMMER2_ZONE_FREEMAP_00 && 119 off < HAMMER2_ZONE_FREEMAP_END); 120 index = (int)(off - HAMMER2_ZONE_FREEMAP_00) / 121 HAMMER2_ZONE_FREEMAP_INC; 122 KKASSERT(index >= 0 && index < HAMMER2_NFREEMAPS); 123 if (++index == HAMMER2_NFREEMAPS) 124 index = 0; 125 } 126 127 /* 128 * Calculate the block offset of the reserved block. This will 129 * point into the 4MB reserved area at the base of the appropriate 130 * 2GB zone, once added to the FREEMAP_x selection above. 131 */ 132 index_inc = index * HAMMER2_ZONE_FREEMAP_INC; 133 134 switch(bref->keybits) { 135 /* case HAMMER2_FREEMAP_LEVEL6_RADIX: not applicable */ 136 case HAMMER2_FREEMAP_LEVEL5_RADIX: /* 2EB */ 137 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 138 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 139 off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL5_RADIX) + 140 (index_inc + HAMMER2_ZONE_FREEMAP_00 + 141 HAMMER2_ZONEFM_LEVEL5) * HAMMER2_PBUFSIZE; 142 break; 143 case HAMMER2_FREEMAP_LEVEL4_RADIX: /* 2EB */ 144 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 145 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 146 off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) + 147 (index_inc + HAMMER2_ZONE_FREEMAP_00 + 148 HAMMER2_ZONEFM_LEVEL4) * HAMMER2_PBUFSIZE; 149 break; 150 case HAMMER2_FREEMAP_LEVEL3_RADIX: /* 2PB */ 151 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 152 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 153 off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) + 154 (index_inc + HAMMER2_ZONE_FREEMAP_00 + 155 HAMMER2_ZONEFM_LEVEL3) * HAMMER2_PBUFSIZE; 156 break; 157 case HAMMER2_FREEMAP_LEVEL2_RADIX: /* 2TB */ 158 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE); 159 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 160 off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) + 161 (index_inc + HAMMER2_ZONE_FREEMAP_00 + 162 HAMMER2_ZONEFM_LEVEL2) * HAMMER2_PBUFSIZE; 163 break; 164 case HAMMER2_FREEMAP_LEVEL1_RADIX: /* 2GB */ 165 KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); 166 KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE); 167 off = H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 168 (index_inc + HAMMER2_ZONE_FREEMAP_00 + 169 HAMMER2_ZONEFM_LEVEL1) * HAMMER2_PBUFSIZE; 170 break; 171 default: 172 panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits); 173 /* NOT REACHED */ 174 off = (hammer2_off_t)-1; 175 break; 176 } 177 bref->data_off = off | radix; 178 #if FREEMAP_DEBUG 179 kprintf("FREEMAP BLOCK TYPE %d %016jx/%d DATA_OFF=%016jx\n", 180 bref->type, bref->key, bref->keybits, bref->data_off); 181 #endif 182 return (0); 183 } 184 185 /* 186 * Normal freemap allocator 187 * 188 * Use available hints to allocate space using the freemap. Create missing 189 * freemap infrastructure on-the-fly as needed (including marking initial 190 * allocations using the iterator as allocated, instantiating new 2GB zones, 191 * and dealing with the end-of-media edge case). 192 * 193 * ip and bpref are only used as a heuristic to determine locality of 194 * reference. bref->key may also be used heuristically. 195 */ 196 int 197 hammer2_freemap_alloc(hammer2_chain_t *chain, size_t bytes) 198 { 199 hammer2_dev_t *hmp = chain->hmp; 200 hammer2_blockref_t *bref = &chain->bref; 201 hammer2_chain_t *parent; 202 int radix; 203 int error; 204 unsigned int hindex; 205 hammer2_fiterate_t iter; 206 207 /* 208 * Validate the allocation size. It must be a power of 2. 209 * 210 * For now require that the caller be aware of the minimum 211 * allocation (1K). 212 */ 213 radix = hammer2_getradix(bytes); 214 KKASSERT((size_t)1 << radix == bytes); 215 216 if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 217 bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 218 /* 219 * Freemap blocks themselves are assigned from the reserve 220 * area, not allocated from the freemap. 221 */ 222 error = hammer2_freemap_reserve(chain, radix); 223 return error; 224 } 225 226 KKASSERT(bytes >= HAMMER2_ALLOC_MIN && bytes <= HAMMER2_ALLOC_MAX); 227 228 /* 229 * Calculate the starting point for our allocation search. 230 * 231 * Each freemap leaf is dedicated to a specific freemap_radix. 232 * The freemap_radix can be more fine-grained than the device buffer 233 * radix which results in inodes being grouped together in their 234 * own segment, terminal-data (16K or less) and initial indirect 235 * block being grouped together, and then full-indirect and full-data 236 * blocks (64K) being grouped together. 237 * 238 * The single most important aspect of this is the inode grouping 239 * because that is what allows 'find' and 'ls' and other filesystem 240 * topology operations to run fast. 241 */ 242 #if 0 243 if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX) 244 bpref = bref->data_off & ~HAMMER2_OFF_MASK_RADIX; 245 else if (trans->tmp_bpref) 246 bpref = trans->tmp_bpref; 247 else if (trans->tmp_ip) 248 bpref = trans->tmp_ip->chain->bref.data_off; 249 else 250 #endif 251 /* 252 * Heuristic tracking index. We would like one for each distinct 253 * bref type if possible. heur_freemap[] has room for two classes 254 * for each type. At a minimum we have to break-up our heuristic 255 * by device block sizes. 256 */ 257 hindex = hammer2_devblkradix(radix) - HAMMER2_MINIORADIX; 258 KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_NRADIX); 259 hindex += bref->type * HAMMER2_FREEMAP_HEUR_NRADIX; 260 hindex &= HAMMER2_FREEMAP_HEUR_TYPES * HAMMER2_FREEMAP_HEUR_NRADIX - 1; 261 KKASSERT(hindex < HAMMER2_FREEMAP_HEUR); 262 263 iter.bpref = hmp->heur_freemap[hindex]; 264 265 /* 266 * Make sure bpref is in-bounds. It's ok if bpref covers a zone's 267 * reserved area, the try code will iterate past it. 268 */ 269 if (iter.bpref > hmp->voldata.volu_size) 270 iter.bpref = hmp->voldata.volu_size - 1; 271 272 /* 273 * Iterate the freemap looking for free space before and after. 274 */ 275 parent = &hmp->fchain; 276 hammer2_chain_ref(parent); 277 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 278 error = EAGAIN; 279 iter.bnext = iter.bpref; 280 iter.loops = 0; 281 282 while (error == EAGAIN) { 283 error = hammer2_freemap_try_alloc(&parent, bref, radix, &iter); 284 } 285 hmp->heur_freemap[hindex] = iter.bnext; 286 hammer2_chain_unlock(parent); 287 hammer2_chain_drop(parent); 288 289 return (error); 290 } 291 292 static int 293 hammer2_freemap_try_alloc(hammer2_chain_t **parentp, 294 hammer2_blockref_t *bref, int radix, 295 hammer2_fiterate_t *iter) 296 { 297 hammer2_dev_t *hmp = (*parentp)->hmp; 298 hammer2_off_t l0size; 299 hammer2_off_t l1size; 300 hammer2_off_t l1mask; 301 hammer2_key_t key_dummy; 302 hammer2_chain_t *chain; 303 hammer2_off_t key; 304 size_t bytes; 305 uint16_t class; 306 int error = 0; 307 int cache_index = -1; 308 309 /* 310 * Calculate the number of bytes being allocated, the number 311 * of contiguous bits of bitmap being allocated, and the bitmap 312 * mask. 313 * 314 * WARNING! cpu hardware may mask bits == 64 -> 0 and blow up the 315 * mask calculation. 316 */ 317 bytes = (size_t)1 << radix; 318 class = (bref->type << 8) | hammer2_devblkradix(radix); 319 320 /* 321 * Lookup the level1 freemap chain, creating and initializing one 322 * if necessary. Intermediate levels will be created automatically 323 * when necessary by hammer2_chain_create(). 324 */ 325 key = H2FMBASE(iter->bnext, HAMMER2_FREEMAP_LEVEL1_RADIX); 326 l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 327 l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 328 l1mask = l1size - 1; 329 330 chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask, 331 &cache_index, 332 HAMMER2_LOOKUP_ALWAYS | 333 HAMMER2_LOOKUP_MATCHIND); 334 335 if (chain == NULL) { 336 /* 337 * Create the missing leaf, be sure to initialize 338 * the auxillary freemap tracking information in 339 * the bref.check.freemap structure. 340 */ 341 #if 0 342 kprintf("freemap create L1 @ %016jx bpref %016jx\n", 343 key, iter->bpref); 344 #endif 345 error = hammer2_chain_create(parentp, &chain, hmp->spmp, 346 key, HAMMER2_FREEMAP_LEVEL1_RADIX, 347 HAMMER2_BREF_TYPE_FREEMAP_LEAF, 348 HAMMER2_FREEMAP_LEVELN_PSIZE, 349 0); 350 KKASSERT(error == 0); 351 if (error == 0) { 352 hammer2_chain_modify(chain, 0); 353 bzero(&chain->data->bmdata[0], 354 HAMMER2_FREEMAP_LEVELN_PSIZE); 355 chain->bref.check.freemap.bigmask = (uint32_t)-1; 356 chain->bref.check.freemap.avail = l1size; 357 /* bref.methods should already be inherited */ 358 359 hammer2_freemap_init(hmp, key, chain); 360 } 361 } else if (chain->error) { 362 /* 363 * Error during lookup. 364 */ 365 kprintf("hammer2_freemap_try_alloc: %016jx: error %s\n", 366 (intmax_t)bref->data_off, 367 hammer2_error_str(chain->error)); 368 error = EIO; 369 } else if ((chain->bref.check.freemap.bigmask & 370 ((size_t)1 << radix)) == 0) { 371 /* 372 * Already flagged as not having enough space 373 */ 374 error = ENOSPC; 375 } else { 376 /* 377 * Modify existing chain to setup for adjustment. 378 */ 379 hammer2_chain_modify(chain, 0); 380 } 381 382 /* 383 * Scan 2MB entries. 384 */ 385 if (error == 0) { 386 hammer2_bmap_data_t *bmap; 387 hammer2_key_t base_key; 388 int count; 389 int start; 390 int n; 391 392 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF); 393 start = (int)((iter->bnext - key) >> 394 HAMMER2_FREEMAP_LEVEL0_RADIX); 395 KKASSERT(start >= 0 && start < HAMMER2_FREEMAP_COUNT); 396 hammer2_chain_modify(chain, 0); 397 398 error = ENOSPC; 399 for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { 400 int availchk; 401 402 if (start + count >= HAMMER2_FREEMAP_COUNT && 403 start - count < 0) { 404 break; 405 } 406 407 /* 408 * Calculate bmap pointer 409 * 410 * NOTE: bmap pointer is invalid if n >= FREEMAP_COUNT. 411 */ 412 n = start + count; 413 bmap = &chain->data->bmdata[n]; 414 415 if (n >= HAMMER2_FREEMAP_COUNT) { 416 availchk = 0; 417 } else if (bmap->avail) { 418 availchk = 1; 419 } else if (radix < HAMMER2_FREEMAP_BLOCK_RADIX && 420 (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK)) { 421 availchk = 1; 422 } else { 423 availchk = 0; 424 } 425 426 if (availchk && 427 (bmap->class == 0 || bmap->class == class)) { 428 base_key = key + n * l0size; 429 error = hammer2_bmap_alloc(hmp, bmap, 430 class, n, radix, 431 &base_key); 432 if (error != ENOSPC) { 433 key = base_key; 434 break; 435 } 436 } 437 438 /* 439 * Must recalculate after potentially having called 440 * hammer2_bmap_alloc() above in case chain was 441 * reallocated. 442 * 443 * NOTE: bmap pointer is invalid if n < 0. 444 */ 445 n = start - count; 446 bmap = &chain->data->bmdata[n]; 447 if (n < 0) { 448 availchk = 0; 449 } else if (bmap->avail) { 450 availchk = 1; 451 } else if (radix < HAMMER2_FREEMAP_BLOCK_RADIX && 452 (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK)) { 453 availchk = 1; 454 } else { 455 availchk = 0; 456 } 457 458 if (availchk && 459 (bmap->class == 0 || bmap->class == class)) { 460 base_key = key + n * l0size; 461 error = hammer2_bmap_alloc(hmp, bmap, 462 class, n, radix, 463 &base_key); 464 if (error != ENOSPC) { 465 key = base_key; 466 break; 467 } 468 } 469 } 470 if (error == ENOSPC) { 471 chain->bref.check.freemap.bigmask &= 472 (uint32_t)~((size_t)1 << radix); 473 } 474 /* XXX also scan down from original count */ 475 } 476 477 if (error == 0) { 478 /* 479 * Assert validity. Must be beyond the static allocator used 480 * by newfs_hammer2 (and thus also beyond the aux area), 481 * not go past the volume size, and must not be in the 482 * reserved segment area for a zone. 483 */ 484 KKASSERT(key >= hmp->voldata.allocator_beg && 485 key + bytes <= hmp->voldata.volu_size); 486 KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); 487 bref->data_off = key | radix; 488 489 #if 0 490 kprintf("alloc cp=%p %016jx %016jx using %016jx\n", 491 chain, 492 bref->key, bref->data_off, chain->bref.data_off); 493 #endif 494 } else if (error == ENOSPC) { 495 /* 496 * Return EAGAIN with next iteration in iter->bnext, or 497 * return ENOSPC if the allocation map has been exhausted. 498 */ 499 error = hammer2_freemap_iterate(parentp, &chain, iter); 500 } 501 502 /* 503 * Cleanup 504 */ 505 if (chain) { 506 hammer2_chain_unlock(chain); 507 hammer2_chain_drop(chain); 508 } 509 return (error); 510 } 511 512 /* 513 * Allocate (1<<radix) bytes from the bmap whos base data offset is (*basep). 514 * 515 * If the linear iterator is mid-block we use it directly (the bitmap should 516 * already be marked allocated), otherwise we search for a block in the bitmap 517 * that fits the allocation request. 518 * 519 * A partial bitmap allocation sets the minimum bitmap granularity (16KB) 520 * to fully allocated and adjusts the linear allocator to allow the 521 * remaining space to be allocated. 522 */ 523 static 524 int 525 hammer2_bmap_alloc(hammer2_dev_t *hmp, hammer2_bmap_data_t *bmap, 526 uint16_t class, int n, int radix, hammer2_key_t *basep) 527 { 528 hammer2_io_t *dio; 529 size_t size; 530 size_t bgsize; 531 int bmradix; 532 hammer2_bitmap_t bmmask; 533 int offset; 534 int error; 535 int i; 536 int j; 537 538 /* 539 * Take into account 2-bits per block when calculating bmradix. 540 */ 541 size = (size_t)1 << radix; 542 543 if (radix <= HAMMER2_FREEMAP_BLOCK_RADIX) { 544 bmradix = 2; 545 /* (16K) 2 bits per allocation block */ 546 } else { 547 bmradix = (hammer2_bitmap_t)2 << 548 (radix - HAMMER2_FREEMAP_BLOCK_RADIX); 549 /* (32K-256K) 4, 8, 16, 32 bits per allocation block */ 550 } 551 552 /* 553 * Use the linear iterator to pack small allocations, otherwise 554 * fall-back to finding a free 16KB chunk. The linear iterator 555 * is only valid when *NOT* on a freemap chunking boundary (16KB). 556 * If it is the bitmap must be scanned. It can become invalid 557 * once we pack to the boundary. We adjust it after a bitmap 558 * allocation only for sub-16KB allocations (so the perfectly good 559 * previous value can still be used for fragments when 16KB+ 560 * allocations are made). 561 * 562 * Beware of hardware artifacts when bmradix == 64 (intermediate 563 * result can wind up being '1' instead of '0' if hardware masks 564 * bit-count & 31). 565 * 566 * NOTE: j needs to be even in the j= calculation. As an artifact 567 * of the /2 division, our bitmask has to clear bit 0. 568 * 569 * NOTE: TODO this can leave little unallocatable fragments lying 570 * around. 571 */ 572 if (((uint32_t)bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) + size <= 573 HAMMER2_FREEMAP_BLOCK_SIZE && 574 (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) && 575 bmap->linear < HAMMER2_SEGSIZE) { 576 KKASSERT(bmap->linear >= 0 && 577 bmap->linear + size <= HAMMER2_SEGSIZE && 578 (bmap->linear & (HAMMER2_ALLOC_MIN - 1)) == 0); 579 offset = bmap->linear; 580 i = offset / (HAMMER2_SEGSIZE / 8); 581 j = (offset / (HAMMER2_FREEMAP_BLOCK_SIZE / 2)) & 30; 582 bmmask = (bmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? 583 HAMMER2_BMAP_ALLONES : 584 ((hammer2_bitmap_t)1 << bmradix) - 1; 585 bmmask <<= j; 586 bmap->linear = offset + size; 587 } else { 588 for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) { 589 bmmask = (bmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? 590 HAMMER2_BMAP_ALLONES : 591 ((hammer2_bitmap_t)1 << bmradix) - 1; 592 for (j = 0; 593 j < HAMMER2_BMAP_BITS_PER_ELEMENT; 594 j += bmradix) { 595 if ((bmap->bitmapq[i] & bmmask) == 0) 596 goto success; 597 bmmask <<= bmradix; 598 } 599 } 600 /*fragments might remain*/ 601 /*KKASSERT(bmap->avail == 0);*/ 602 return (ENOSPC); 603 success: 604 offset = i * (HAMMER2_SEGSIZE / HAMMER2_BMAP_ELEMENTS) + 605 (j * (HAMMER2_FREEMAP_BLOCK_SIZE / 2)); 606 if (size & HAMMER2_FREEMAP_BLOCK_MASK) 607 bmap->linear = offset + size; 608 } 609 610 /* 8 x (64/2) -> 256 x 16K -> 4MB */ 611 KKASSERT(i >= 0 && i < HAMMER2_BMAP_ELEMENTS); 612 613 /* 614 * Optimize the buffer cache to avoid unnecessary read-before-write 615 * operations. 616 * 617 * The device block size could be larger than the allocation size 618 * so the actual bitmap test is somewhat more involved. We have 619 * to use a compatible buffer size for this operation. 620 */ 621 if ((bmap->bitmapq[i] & bmmask) == 0 && 622 hammer2_devblksize(size) != size) { 623 size_t psize = hammer2_devblksize(size); 624 hammer2_off_t pmask = (hammer2_off_t)psize - 1; 625 int pbmradix = (hammer2_bitmap_t)2 << 626 (hammer2_devblkradix(radix) - 627 HAMMER2_FREEMAP_BLOCK_RADIX); 628 hammer2_bitmap_t pbmmask; 629 int pradix = hammer2_getradix(psize); 630 631 pbmmask = (pbmradix == HAMMER2_BMAP_BITS_PER_ELEMENT) ? 632 HAMMER2_BMAP_ALLONES : 633 ((hammer2_bitmap_t)1 << pbmradix) - 1; 634 while ((pbmmask & bmmask) == 0) 635 pbmmask <<= pbmradix; 636 637 #if 0 638 kprintf("%016jx mask %016jx %016jx %016jx (%zd/%zd)\n", 639 *basep + offset, bmap->bitmapq[i], 640 pbmmask, bmmask, size, psize); 641 #endif 642 643 if ((bmap->bitmapq[i] & pbmmask) == 0) { 644 error = hammer2_io_newq(hmp, 645 (*basep + (offset & ~pmask)) | 646 pradix, 647 psize, &dio); 648 hammer2_io_bqrelse(&dio); 649 } 650 } 651 652 #if 0 653 /* 654 * When initializing a new inode segment also attempt to initialize 655 * an adjacent segment. Be careful not to index beyond the array 656 * bounds. 657 * 658 * We do this to try to localize inode accesses to improve 659 * directory scan rates. XXX doesn't improve scan rates. 660 */ 661 if (size == HAMMER2_INODE_BYTES) { 662 if (n & 1) { 663 if (bmap[-1].radix == 0 && bmap[-1].avail) 664 bmap[-1].radix = radix; 665 } else { 666 if (bmap[1].radix == 0 && bmap[1].avail) 667 bmap[1].radix = radix; 668 } 669 } 670 #endif 671 /* 672 * Calculate the bitmap-granular change in bgsize for the volume 673 * header. We cannot use the fine-grained change here because 674 * the bulkfree code can't undo it. If the bitmap element is already 675 * marked allocated it has already been accounted for. 676 */ 677 if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) { 678 if (bmap->bitmapq[i] & bmmask) 679 bgsize = 0; 680 else 681 bgsize = HAMMER2_FREEMAP_BLOCK_SIZE; 682 } else { 683 bgsize = size; 684 } 685 686 /* 687 * Adjust the bitmap, set the class (it might have been 0), 688 * and available bytes, update the allocation offset (*basep) 689 * from the L0 base to the actual offset. 690 * 691 * avail must reflect the bitmap-granular availability. The allocator 692 * tests will also check the linear iterator. 693 */ 694 bmap->bitmapq[i] |= bmmask; 695 bmap->class = class; 696 bmap->avail -= bgsize; 697 *basep += offset; 698 699 /* 700 * Adjust the volume header's allocator_free parameter. This 701 * parameter has to be fixed up by bulkfree which has no way to 702 * figure out sub-16K chunking, so it must be adjusted by the 703 * bitmap-granular size. 704 */ 705 if (bgsize) { 706 hammer2_voldata_lock(hmp); 707 hammer2_voldata_modify(hmp); 708 hmp->voldata.allocator_free -= bgsize; 709 hammer2_voldata_unlock(hmp); 710 } 711 712 return(0); 713 } 714 715 static 716 void 717 hammer2_freemap_init(hammer2_dev_t *hmp, hammer2_key_t key, 718 hammer2_chain_t *chain) 719 { 720 hammer2_off_t l1size; 721 hammer2_off_t lokey; 722 hammer2_off_t hikey; 723 hammer2_bmap_data_t *bmap; 724 int count; 725 726 l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 727 728 /* 729 * Calculate the portion of the 2GB map that should be initialized 730 * as free. Portions below or after will be initialized as allocated. 731 * SEGMASK-align the areas so we don't have to worry about sub-scans 732 * or endianess when using memset. 733 * 734 * (1) Ensure that all statically allocated space from newfs_hammer2 735 * is marked allocated. 736 * 737 * (2) Ensure that the reserved area is marked allocated (typically 738 * the first 4MB of the 2GB area being represented). 739 * 740 * (3) Ensure that any trailing space at the end-of-volume is marked 741 * allocated. 742 * 743 * WARNING! It is possible for lokey to be larger than hikey if the 744 * entire 2GB segment is within the static allocation. 745 */ 746 lokey = (hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) & 747 ~HAMMER2_SEGMASK64; 748 749 if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 750 HAMMER2_ZONE_SEG64) { 751 lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) + 752 HAMMER2_ZONE_SEG64; 753 } 754 755 hikey = key + H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 756 if (hikey > hmp->voldata.volu_size) { 757 hikey = hmp->voldata.volu_size & ~HAMMER2_SEGMASK64; 758 } 759 760 chain->bref.check.freemap.avail = 761 H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 762 bmap = &chain->data->bmdata[0]; 763 764 for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) { 765 if (key < lokey || key >= hikey) { 766 memset(bmap->bitmapq, -1, 767 sizeof(bmap->bitmapq)); 768 bmap->avail = 0; 769 bmap->linear = HAMMER2_SEGSIZE; 770 chain->bref.check.freemap.avail -= 771 H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 772 } else { 773 bmap->avail = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 774 } 775 key += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 776 ++bmap; 777 } 778 } 779 780 /* 781 * The current Level 1 freemap has been exhausted, iterate to the next 782 * one, return ENOSPC if no freemaps remain. 783 * 784 * XXX this should rotate back to the beginning to handle freed-up space 785 * XXX or use intermediate entries to locate free space. TODO 786 */ 787 static int 788 hammer2_freemap_iterate(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 789 hammer2_fiterate_t *iter) 790 { 791 hammer2_dev_t *hmp = (*parentp)->hmp; 792 793 iter->bnext &= ~(H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX) - 1); 794 iter->bnext += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 795 if (iter->bnext >= hmp->voldata.volu_size) { 796 iter->bnext = 0; 797 if (++iter->loops == 2) 798 return (ENOSPC); 799 } 800 return(EAGAIN); 801 } 802 803 /* 804 * Adjust the bit-pattern for data in the freemap bitmap according to 805 * (how). This code is called from on-mount recovery to fixup (mark 806 * as allocated) blocks whos freemap upates might not have been committed 807 * in the last crash and is used by the bulk freemap scan to stage frees. 808 * 809 * XXX currently disabled when how == 0 (the normal real-time case). At 810 * the moment we depend on the bulk freescan to actually free blocks. It 811 * will still call this routine with a non-zero how to stage possible frees 812 * and to do the actual free. 813 */ 814 void 815 hammer2_freemap_adjust(hammer2_dev_t *hmp, hammer2_blockref_t *bref, int how) 816 { 817 hammer2_off_t data_off = bref->data_off; 818 hammer2_chain_t *chain; 819 hammer2_chain_t *parent; 820 hammer2_bmap_data_t *bmap; 821 hammer2_key_t key; 822 hammer2_key_t key_dummy; 823 hammer2_off_t l0size; 824 hammer2_off_t l1size; 825 hammer2_off_t l1mask; 826 hammer2_bitmap_t *bitmap; 827 const hammer2_bitmap_t bmmask00 = 0; 828 hammer2_bitmap_t bmmask01; 829 hammer2_bitmap_t bmmask10; 830 hammer2_bitmap_t bmmask11; 831 size_t bytes; 832 uint16_t class; 833 int radix; 834 int start; 835 int count; 836 int modified = 0; 837 int cache_index = -1; 838 int error; 839 840 KKASSERT(how == HAMMER2_FREEMAP_DORECOVER); 841 842 radix = (int)data_off & HAMMER2_OFF_MASK_RADIX; 843 data_off &= ~HAMMER2_OFF_MASK_RADIX; 844 KKASSERT(radix <= HAMMER2_RADIX_MAX); 845 846 bytes = (size_t)1 << radix; 847 class = (bref->type << 8) | hammer2_devblkradix(radix); 848 849 /* 850 * We can't adjust thre freemap for data allocations made by 851 * newfs_hammer2. 852 */ 853 if (data_off < hmp->voldata.allocator_beg) 854 return; 855 856 KKASSERT((data_off & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG); 857 858 /* 859 * Lookup the level1 freemap chain. The chain must exist. 860 */ 861 key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL1_RADIX); 862 l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX); 863 l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX); 864 l1mask = l1size - 1; 865 866 parent = &hmp->fchain; 867 hammer2_chain_ref(parent); 868 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 869 870 chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask, 871 &cache_index, 872 HAMMER2_LOOKUP_ALWAYS | 873 HAMMER2_LOOKUP_MATCHIND); 874 875 /* 876 * Stop early if we are trying to free something but no leaf exists. 877 */ 878 if (chain == NULL && how != HAMMER2_FREEMAP_DORECOVER) { 879 kprintf("hammer2_freemap_adjust: %016jx: no chain\n", 880 (intmax_t)bref->data_off); 881 goto done; 882 } 883 if (chain->error) { 884 kprintf("hammer2_freemap_adjust: %016jx: error %s\n", 885 (intmax_t)bref->data_off, 886 hammer2_error_str(chain->error)); 887 hammer2_chain_unlock(chain); 888 hammer2_chain_drop(chain); 889 chain = NULL; 890 goto done; 891 } 892 893 /* 894 * Create any missing leaf(s) if we are doing a recovery (marking 895 * the block(s) as being allocated instead of being freed). Be sure 896 * to initialize the auxillary freemap tracking info in the 897 * bref.check.freemap structure. 898 */ 899 if (chain == NULL && how == HAMMER2_FREEMAP_DORECOVER) { 900 error = hammer2_chain_create(&parent, &chain, hmp->spmp, 901 key, HAMMER2_FREEMAP_LEVEL1_RADIX, 902 HAMMER2_BREF_TYPE_FREEMAP_LEAF, 903 HAMMER2_FREEMAP_LEVELN_PSIZE, 904 0); 905 906 if (hammer2_debug & 0x0040) { 907 kprintf("fixup create chain %p %016jx:%d\n", 908 chain, chain->bref.key, chain->bref.keybits); 909 } 910 911 if (error == 0) { 912 hammer2_chain_modify(chain, 0); 913 bzero(&chain->data->bmdata[0], 914 HAMMER2_FREEMAP_LEVELN_PSIZE); 915 chain->bref.check.freemap.bigmask = (uint32_t)-1; 916 chain->bref.check.freemap.avail = l1size; 917 /* bref.methods should already be inherited */ 918 919 hammer2_freemap_init(hmp, key, chain); 920 } 921 /* XXX handle error */ 922 } 923 924 #if FREEMAP_DEBUG 925 kprintf("FREEMAP ADJUST TYPE %d %016jx/%d DATA_OFF=%016jx\n", 926 chain->bref.type, chain->bref.key, 927 chain->bref.keybits, chain->bref.data_off); 928 #endif 929 930 /* 931 * Calculate the bitmask (runs in 2-bit pairs). 932 */ 933 start = ((int)(data_off >> HAMMER2_FREEMAP_BLOCK_RADIX) & 15) * 2; 934 bmmask01 = (hammer2_bitmap_t)1 << start; 935 bmmask10 = (hammer2_bitmap_t)2 << start; 936 bmmask11 = (hammer2_bitmap_t)3 << start; 937 938 /* 939 * Fixup the bitmap. Partial blocks cannot be fully freed unless 940 * a bulk scan is able to roll them up. 941 */ 942 if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) { 943 count = 1; 944 if (how == HAMMER2_FREEMAP_DOREALFREE) 945 how = HAMMER2_FREEMAP_DOMAYFREE; 946 } else { 947 count = 1 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX); 948 } 949 950 /* 951 * [re]load the bmap and bitmap pointers. Each bmap entry covers 952 * a 2MB swath. The bmap itself (LEVEL1) covers 2GB. 953 * 954 * Be sure to reset the linear iterator to ensure that the adjustment 955 * is not ignored. 956 */ 957 again: 958 bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & 959 (HAMMER2_FREEMAP_COUNT - 1)]; 960 bitmap = &bmap->bitmapq[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; 961 962 if (modified) 963 bmap->linear = 0; 964 965 while (count) { 966 KKASSERT(bmmask11); 967 if (how == HAMMER2_FREEMAP_DORECOVER) { 968 /* 969 * Recovery request, mark as allocated. 970 */ 971 if ((*bitmap & bmmask11) != bmmask11) { 972 if (modified == 0) { 973 hammer2_chain_modify(chain, 0); 974 modified = 1; 975 goto again; 976 } 977 if ((*bitmap & bmmask11) == bmmask00) { 978 bmap->avail -= 979 HAMMER2_FREEMAP_BLOCK_SIZE; 980 } 981 if (bmap->class == 0) 982 bmap->class = class; 983 *bitmap |= bmmask11; 984 if (hammer2_debug & 0x0040) { 985 kprintf("hammer2_freemap_recover: " 986 "fixup type=%02x " 987 "block=%016jx/%zd\n", 988 bref->type, data_off, bytes); 989 } 990 } else { 991 /* 992 kprintf("hammer2_freemap_recover: good " 993 "type=%02x block=%016jx/%zd\n", 994 bref->type, data_off, bytes); 995 */ 996 } 997 } 998 #if 0 999 /* 1000 * XXX this stuff doesn't work, avail is miscalculated and 1001 * code 10 means something else now. 1002 */ 1003 else if ((*bitmap & bmmask11) == bmmask11) { 1004 /* 1005 * Mayfree/Realfree request and bitmap is currently 1006 * marked as being fully allocated. 1007 */ 1008 if (!modified) { 1009 hammer2_chain_modify(chain, 0); 1010 modified = 1; 1011 goto again; 1012 } 1013 if (how == HAMMER2_FREEMAP_DOREALFREE) 1014 *bitmap &= ~bmmask11; 1015 else 1016 *bitmap = (*bitmap & ~bmmask11) | bmmask10; 1017 } else if ((*bitmap & bmmask11) == bmmask10) { 1018 /* 1019 * Mayfree/Realfree request and bitmap is currently 1020 * marked as being possibly freeable. 1021 */ 1022 if (how == HAMMER2_FREEMAP_DOREALFREE) { 1023 if (!modified) { 1024 hammer2_chain_modify(chain, 0); 1025 modified = 1; 1026 goto again; 1027 } 1028 *bitmap &= ~bmmask11; 1029 } 1030 } else { 1031 /* 1032 * 01 - Not implemented, currently illegal state 1033 * 00 - Not allocated at all, illegal free. 1034 */ 1035 panic("hammer2_freemap_adjust: " 1036 "Illegal state %08x(%08x)", 1037 *bitmap, *bitmap & bmmask11); 1038 } 1039 #endif 1040 --count; 1041 bmmask01 <<= 2; 1042 bmmask10 <<= 2; 1043 bmmask11 <<= 2; 1044 } 1045 #if HAMMER2_BMAP_ELEMENTS != 8 1046 #error "hammer2_freemap.c: HAMMER2_BMAP_ELEMENTS expected to be 8" 1047 #endif 1048 if (how == HAMMER2_FREEMAP_DOREALFREE && modified) { 1049 bmap->avail += 1 << radix; 1050 KKASSERT(bmap->avail <= HAMMER2_SEGSIZE); 1051 if (bmap->avail == HAMMER2_SEGSIZE && 1052 bmap->bitmapq[0] == 0 && 1053 bmap->bitmapq[1] == 0 && 1054 bmap->bitmapq[2] == 0 && 1055 bmap->bitmapq[3] == 0 && 1056 bmap->bitmapq[4] == 0 && 1057 bmap->bitmapq[5] == 0 && 1058 bmap->bitmapq[6] == 0 && 1059 bmap->bitmapq[7] == 0) { 1060 key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL0_RADIX); 1061 kprintf("Freeseg %016jx\n", (intmax_t)key); 1062 bmap->class = 0; 1063 } 1064 } 1065 1066 /* 1067 * chain->bref.check.freemap.bigmask (XXX) 1068 * 1069 * Setting bigmask is a hint to the allocation code that there might 1070 * be something allocatable. We also set this in recovery... it 1071 * doesn't hurt and we might want to use the hint for other validation 1072 * operations later on. 1073 */ 1074 if (modified) 1075 chain->bref.check.freemap.bigmask |= 1 << radix; 1076 1077 hammer2_chain_unlock(chain); 1078 hammer2_chain_drop(chain); 1079 done: 1080 hammer2_chain_unlock(parent); 1081 hammer2_chain_drop(parent); 1082 } 1083 1084 /* 1085 * Validate the freemap, in three stages. 1086 * 1087 * stage-1 ALLOCATED -> POSSIBLY FREE 1088 * POSSIBLY FREE -> POSSIBLY FREE (type corrected) 1089 * 1090 * This transitions bitmap entries from ALLOCATED to POSSIBLY FREE. 1091 * The POSSIBLY FREE state does not mean that a block is actually free 1092 * and may be transitioned back to ALLOCATED in stage-2. 1093 * 1094 * This is typically done during normal filesystem operations when 1095 * something is deleted or a block is replaced. 1096 * 1097 * This is done by bulkfree in-bulk after a memory-bounded meta-data 1098 * scan to try to determine what might be freeable. 1099 * 1100 * This can be done unconditionally through a freemap scan when the 1101 * intention is to brute-force recover the proper state of the freemap. 1102 * 1103 * stage-2 POSSIBLY FREE -> ALLOCATED (scan metadata topology) 1104 * 1105 * This is done by bulkfree during a meta-data scan to ensure that 1106 * all blocks still actually allocated by the filesystem are marked 1107 * as such. 1108 * 1109 * NOTE! Live filesystem transitions to POSSIBLY FREE can occur while 1110 * the bulkfree stage-2 and stage-3 is running. The live filesystem 1111 * will use the alternative POSSIBLY FREE type (2) to prevent 1112 * stage-3 from improperly transitioning unvetted possibly-free 1113 * blocks to FREE. 1114 * 1115 * stage-3 POSSIBLY FREE (type 1) -> FREE (scan freemap) 1116 * 1117 * This is done by bulkfree to finalize POSSIBLY FREE states. 1118 * 1119 */ 1120