1 /* $NetBSD: chfs_gc.c,v 1.8 2015/01/11 17:28:22 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2010 Department of Software Engineering, 5 * University of Szeged, Hungary 6 * Copyright (c) 2010 Tamas Toth <ttoth@inf.u-szeged.hu> 7 * Copyright (c) 2010 Adam Hoka <ahoka@NetBSD.org> 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by the Department of Software Engineering, University of Szeged, Hungary 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cprng.h> 36 #include "chfs.h" 37 38 void chfs_gc_release_inode(struct chfs_mount *, 39 struct chfs_inode *); 40 struct chfs_inode *chfs_gc_fetch_inode(struct chfs_mount *, 41 ino_t, uint32_t); 42 int chfs_check(struct chfs_mount *, struct chfs_vnode_cache *); 43 void chfs_clear_inode(struct chfs_mount *, struct chfs_inode *); 44 45 46 struct chfs_eraseblock *find_gc_block(struct chfs_mount *); 47 int chfs_gcollect_pristine(struct chfs_mount *, 48 struct chfs_eraseblock *, 49 struct chfs_vnode_cache *, struct chfs_node_ref *); 50 int chfs_gcollect_live(struct chfs_mount *, 51 struct chfs_eraseblock *, struct chfs_node_ref *, 52 struct chfs_inode *); 53 int chfs_gcollect_vnode(struct chfs_mount *, struct chfs_inode *); 54 int chfs_gcollect_dirent(struct chfs_mount *, 55 struct chfs_eraseblock *, struct chfs_inode *, 56 struct chfs_dirent *); 57 int chfs_gcollect_deletion_dirent(struct chfs_mount *, 58 struct chfs_eraseblock *, struct chfs_inode *, 59 struct chfs_dirent *); 60 int chfs_gcollect_dnode(struct chfs_mount *, 61 struct chfs_eraseblock *, struct chfs_inode *, 62 struct chfs_full_dnode *, uint32_t, uint32_t); 63 64 /* 65 * chfs_gc_trigger - wakes up GC thread, if it should run 66 * Must be called with chm_lock_mountfields held. 67 */ 68 void 69 chfs_gc_trigger(struct chfs_mount *chmp) 70 { 71 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 72 73 if (gc->gcth_running && 74 chfs_gc_thread_should_wake(chmp)) { 75 cv_signal(&gc->gcth_wakeup); 76 } 77 } 78 79 80 /* chfs_gc_thread - garbage collector's thread */ 81 void 82 chfs_gc_thread(void *data) 83 { 84 struct chfs_mount *chmp = data; 85 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 86 87 dbg_gc("[GC THREAD] thread started\n"); 88 89 mutex_enter(&chmp->chm_lock_mountfields); 90 while (gc->gcth_running) { 91 /* we must call chfs_gc_thread_should_wake with chm_lock_mountfields 92 * held, which is a bit awkwardly done here, but we cant relly 93 * do it otherway with the current design... 94 */ 95 if (chfs_gc_thread_should_wake(chmp)) { 96 if (chfs_gcollect_pass(chmp) == ENOSPC) { 97 mutex_exit(&chmp->chm_lock_mountfields); 98 panic("No space for garbage collection\n"); 99 /* XXX why break here? i have added a panic 100 * here to see if it gets triggered -ahoka 101 */ 102 break; 103 } 104 /* XXX gcollect_pass drops the mutex */ 105 } 106 107 cv_timedwait_sig(&gc->gcth_wakeup, 108 &chmp->chm_lock_mountfields, mstohz(100)); 109 } 110 mutex_exit(&chmp->chm_lock_mountfields); 111 112 dbg_gc("[GC THREAD] thread stopped\n"); 113 kthread_exit(0); 114 } 115 116 /* chfs_gc_thread_start - starts GC */ 117 void 118 chfs_gc_thread_start(struct chfs_mount *chmp) 119 { 120 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 121 122 cv_init(&gc->gcth_wakeup, "chfsgccv"); 123 124 gc->gcth_running = true; 125 kthread_create(PRI_NONE, /*KTHREAD_MPSAFE |*/ KTHREAD_MUSTJOIN, 126 NULL, chfs_gc_thread, chmp, &gc->gcth_thread, 127 "chfsgcth"); 128 } 129 130 /* chfs_gc_thread_start - stops GC */ 131 void 132 chfs_gc_thread_stop(struct chfs_mount *chmp) 133 { 134 struct garbage_collector_thread *gc = &chmp->chm_gc_thread; 135 136 /* check if it is actually running */ 137 if (gc->gcth_running) { 138 gc->gcth_running = false; 139 } else { 140 return; 141 } 142 cv_signal(&gc->gcth_wakeup); 143 dbg_gc("[GC THREAD] stop signal sent\n"); 144 145 kthread_join(gc->gcth_thread); 146 #ifdef BROKEN_KTH_JOIN 147 kpause("chfsthjoin", false, mstohz(1000), NULL); 148 #endif 149 150 cv_destroy(&gc->gcth_wakeup); 151 } 152 153 /* 154 * chfs_gc_thread_should_wake - checks if GC thread should wake up 155 * Must be called with chm_lock_mountfields held. 156 * Returns 1, if GC should wake up and 0 else. 157 */ 158 int 159 chfs_gc_thread_should_wake(struct chfs_mount *chmp) 160 { 161 int nr_very_dirty = 0; 162 struct chfs_eraseblock *cheb; 163 uint32_t dirty; 164 165 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 166 167 /* Erase pending queue is not empty. */ 168 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 169 dbg_gc("erase_pending\n"); 170 return 1; 171 } 172 173 /* There is something unchecked in the filesystem. */ 174 if (chmp->chm_unchecked_size) { 175 dbg_gc("unchecked\n"); 176 return 1; 177 } 178 179 dirty = chmp->chm_dirty_size - chmp->chm_nr_erasable_blocks * 180 chmp->chm_ebh->eb_size; 181 182 /* Number of free and erasable blocks are critical. */ 183 if (chmp->chm_nr_free_blocks + chmp->chm_nr_erasable_blocks < 184 chmp->chm_resv_blocks_gctrigger && (dirty > chmp->chm_nospc_dirty)) { 185 dbg_gc("free: %d + erasable: %d < resv: %d\n", 186 chmp->chm_nr_free_blocks, chmp->chm_nr_erasable_blocks, 187 chmp->chm_resv_blocks_gctrigger); 188 dbg_gc("dirty: %d > nospc_dirty: %d\n", 189 dirty, chmp->chm_nospc_dirty); 190 191 return 1; 192 } 193 194 /* There is too much very dirty blocks. */ 195 TAILQ_FOREACH(cheb, &chmp->chm_very_dirty_queue, queue) { 196 nr_very_dirty++; 197 if (nr_very_dirty == chmp->chm_vdirty_blocks_gctrigger) { 198 dbg_gc("nr_very_dirty\n"); 199 return 1; 200 } 201 } 202 203 /* Everythin OK, GC shouldn't run. */ 204 return 0; 205 } 206 207 /* chfs_gc_release_inode - does nothing yet */ 208 void 209 chfs_gc_release_inode(struct chfs_mount *chmp, 210 struct chfs_inode *ip) 211 { 212 dbg_gc("release inode\n"); 213 } 214 215 /* chfs_gc_fetch_inode - assign the given inode to the GC */ 216 struct chfs_inode * 217 chfs_gc_fetch_inode(struct chfs_mount *chmp, ino_t vno, 218 uint32_t unlinked) 219 { 220 struct vnode *vp = NULL; 221 struct chfs_vnode_cache *vc; 222 struct chfs_inode *ip; 223 dbg_gc("fetch inode %llu\n", (unsigned long long)vno); 224 225 if (unlinked) { 226 dbg_gc("unlinked\n"); 227 vp = chfs_vnode_lookup(chmp, vno); 228 if (!vp) { 229 mutex_enter(&chmp->chm_lock_vnocache); 230 vc = chfs_vnode_cache_get(chmp, vno); 231 if (!vc) { 232 mutex_exit(&chmp->chm_lock_vnocache); 233 return NULL; 234 } 235 if (vc->state != VNO_STATE_CHECKEDABSENT) { 236 mutex_exit(&chmp->chm_lock_vnocache); 237 /* XXX why do we need the delay here?! */ 238 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 239 cv_timedwait_sig( 240 &chmp->chm_gc_thread.gcth_wakeup, 241 &chmp->chm_lock_mountfields, mstohz(50)); 242 } else { 243 mutex_exit(&chmp->chm_lock_vnocache); 244 } 245 return NULL; 246 } 247 } else { 248 dbg_gc("vnode lookup\n"); 249 vp = chfs_vnode_lookup(chmp, vno); 250 } 251 dbg_gc("vp to ip\n"); 252 ip = VTOI(vp); 253 KASSERT(ip); 254 vrele(vp); 255 256 return ip; 257 } 258 259 extern rb_tree_ops_t frag_rbtree_ops; 260 261 /* chfs_check - checks an inode with minimal initialization */ 262 int 263 chfs_check(struct chfs_mount *chmp, struct chfs_vnode_cache *chvc) 264 { 265 KASSERT(mutex_owned(&chmp->chm_lock_vnocache)); 266 267 struct chfs_inode *ip; 268 struct vnode *vp; 269 int ret; 270 271 /* Get a new inode. */ 272 ip = pool_get(&chfs_inode_pool, PR_WAITOK); 273 if (!ip) { 274 return ENOMEM; 275 } 276 277 vp = kmem_zalloc(sizeof(struct vnode), KM_SLEEP); 278 279 /* Minimal initialization. */ 280 ip->chvc = chvc; 281 ip->vp = vp; 282 283 vp->v_data = ip; 284 285 rb_tree_init(&ip->fragtree, &frag_rbtree_ops); 286 TAILQ_INIT(&ip->dents); 287 288 /* Build the node. */ 289 mutex_exit(&chmp->chm_lock_vnocache); 290 ret = chfs_read_inode_internal(chmp, ip); 291 mutex_enter(&chmp->chm_lock_vnocache); 292 if (!ret) { 293 chfs_clear_inode(chmp, ip); 294 } 295 296 /* Release inode. */ 297 pool_put(&chfs_inode_pool, ip); 298 299 return ret; 300 } 301 302 /* chfs_clear_inode - kills a minimal inode */ 303 void 304 chfs_clear_inode(struct chfs_mount *chmp, struct chfs_inode *ip) 305 { 306 KASSERT(mutex_owned(&chmp->chm_lock_vnocache)); 307 308 struct chfs_dirent *fd, *tmpfd; 309 struct chfs_vnode_cache *chvc; 310 struct chfs_node_ref *nref; 311 312 chvc = ip->chvc; 313 /* shouldnt this be: */ 314 //bool deleted = (chvc && !(chvc->pvno || chvc->nlink)); 315 int deleted = (chvc && !(chvc->pvno | chvc->nlink)); 316 317 /* Set actual state. */ 318 if (chvc && chvc->state != VNO_STATE_CHECKING) { 319 chvc->state = VNO_STATE_CLEARING; 320 } 321 322 /* Remove vnode information. */ 323 while (deleted && chvc->v != (struct chfs_node_ref *)chvc) { 324 nref = chvc->v; 325 chfs_remove_and_obsolete(chmp, chvc, nref, &chvc->v); 326 } 327 328 /* Destroy data. */ 329 chfs_kill_fragtree(chmp, &ip->fragtree); 330 331 /* Clear dirents. */ 332 TAILQ_FOREACH_SAFE(fd, &ip->dents, fds, tmpfd) { 333 chfs_free_dirent(fd); 334 } 335 336 /* Remove node from vnode cache. */ 337 if (chvc && chvc->state == VNO_STATE_CHECKING) { 338 chvc->state = VNO_STATE_CHECKEDABSENT; 339 if ((struct chfs_vnode_cache *)chvc->v == chvc && 340 (struct chfs_vnode_cache *)chvc->dirents == chvc && 341 (struct chfs_vnode_cache *)chvc->dnode == chvc) 342 chfs_vnode_cache_remove(chmp, chvc); 343 } 344 } 345 346 /* find_gc_block - finds the next block for GC */ 347 struct chfs_eraseblock * 348 find_gc_block(struct chfs_mount *chmp) 349 { 350 struct chfs_eraseblock *ret; 351 struct chfs_eraseblock_queue *nextqueue; 352 353 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 354 355 /* Get a random number. */ 356 uint32_t n = cprng_fast32() % 128; 357 358 again: 359 /* Find an eraseblock queue. */ 360 if (n<50 && !TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 361 dbg_gc("Picking block from erase_pending_queue to GC next\n"); 362 nextqueue = &chmp->chm_erase_pending_queue; 363 } else if (n<110 && !TAILQ_EMPTY(&chmp->chm_very_dirty_queue) ) { 364 dbg_gc("Picking block from very_dirty_queue to GC next\n"); 365 nextqueue = &chmp->chm_very_dirty_queue; 366 } else if (n<126 && !TAILQ_EMPTY(&chmp->chm_dirty_queue) ) { 367 dbg_gc("Picking block from dirty_queue to GC next\n"); 368 nextqueue = &chmp->chm_dirty_queue; 369 } else if (!TAILQ_EMPTY(&chmp->chm_clean_queue)) { 370 dbg_gc("Picking block from clean_queue to GC next\n"); 371 nextqueue = &chmp->chm_clean_queue; 372 } else if (!TAILQ_EMPTY(&chmp->chm_dirty_queue)) { 373 dbg_gc("Picking block from dirty_queue to GC next" 374 " (clean_queue was empty)\n"); 375 nextqueue = &chmp->chm_dirty_queue; 376 } else if (!TAILQ_EMPTY(&chmp->chm_very_dirty_queue)) { 377 dbg_gc("Picking block from very_dirty_queue to GC next" 378 " (clean_queue and dirty_queue were empty)\n"); 379 nextqueue = &chmp->chm_very_dirty_queue; 380 } else if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 381 dbg_gc("Picking block from erase_pending_queue to GC next" 382 " (clean_queue and {very_,}dirty_queue were empty)\n"); 383 nextqueue = &chmp->chm_erase_pending_queue; 384 } else if (!TAILQ_EMPTY(&chmp->chm_erasable_pending_wbuf_queue)) { 385 dbg_gc("Synching wbuf in order to reuse " 386 "erasable_pendig_wbuf_queue blocks\n"); 387 rw_enter(&chmp->chm_lock_wbuf, RW_WRITER); 388 chfs_flush_pending_wbuf(chmp); 389 rw_exit(&chmp->chm_lock_wbuf); 390 goto again; 391 } else { 392 dbg_gc("CHFS: no clean, dirty _or_ erasable" 393 " blocks to GC from! Where are they all?\n"); 394 return NULL; 395 } 396 397 /* Get the first block of the queue. */ 398 ret = TAILQ_FIRST(nextqueue); 399 if (chmp->chm_nextblock) { 400 dbg_gc("nextblock num: %u - gcblock num: %u\n", 401 chmp->chm_nextblock->lnr, ret->lnr); 402 if (ret == chmp->chm_nextblock) 403 goto again; 404 } 405 TAILQ_REMOVE(nextqueue, ret, queue); 406 407 /* Set GC block. */ 408 chmp->chm_gcblock = ret; 409 /* Set GC node. */ 410 ret->gc_node = ret->first_node; 411 412 if (!ret->gc_node) { 413 dbg_gc("Oops! ret->gc_node at LEB: %u is NULL\n", ret->lnr); 414 panic("CHFS BUG - one LEB's gc_node is NULL\n"); 415 } 416 417 /* TODO wasted size? */ 418 return ret; 419 } 420 421 /* chfs_gcollect_pass - this is the main function of GC */ 422 int 423 chfs_gcollect_pass(struct chfs_mount *chmp) 424 { 425 struct chfs_vnode_cache *vc; 426 struct chfs_eraseblock *eb; 427 struct chfs_node_ref *nref; 428 uint32_t gcblock_dirty; 429 struct chfs_inode *ip; 430 ino_t vno, pvno; 431 uint32_t nlink; 432 int ret = 0; 433 434 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 435 436 /* Check all vnodes. */ 437 for (;;) { 438 mutex_enter(&chmp->chm_lock_sizes); 439 440 /* Check unchecked size. */ 441 dbg_gc("unchecked size == %u\n", chmp->chm_unchecked_size); 442 if (!chmp->chm_unchecked_size) 443 break; 444 445 /* Compare vnode number to the maximum. */ 446 if (chmp->chm_checked_vno > chmp->chm_max_vno) { 447 mutex_exit(&chmp->chm_lock_sizes); 448 dbg_gc("checked_vno (#%llu) > max_vno (#%llu)\n", 449 (unsigned long long)chmp->chm_checked_vno, 450 (unsigned long long)chmp->chm_max_vno); 451 return ENOSPC; 452 } 453 454 mutex_exit(&chmp->chm_lock_sizes); 455 456 mutex_enter(&chmp->chm_lock_vnocache); 457 dbg_gc("checking vno #%llu\n", 458 (unsigned long long)chmp->chm_checked_vno); 459 dbg_gc("get vnode cache\n"); 460 461 /* OK, Get and check the vnode cache. */ 462 vc = chfs_vnode_cache_get(chmp, chmp->chm_checked_vno++); 463 464 if (!vc) { 465 dbg_gc("!vc\n"); 466 mutex_exit(&chmp->chm_lock_vnocache); 467 continue; 468 } 469 470 if ((vc->pvno | vc->nlink) == 0) { 471 dbg_gc("(pvno | nlink) == 0\n"); 472 mutex_exit(&chmp->chm_lock_vnocache); 473 continue; 474 } 475 476 /* Find out the state of the vnode. */ 477 dbg_gc("switch\n"); 478 switch (vc->state) { 479 case VNO_STATE_CHECKEDABSENT: 480 /* FALLTHROUGH */ 481 case VNO_STATE_PRESENT: 482 mutex_exit(&chmp->chm_lock_vnocache); 483 continue; 484 485 case VNO_STATE_GC: 486 /* FALLTHROUGH */ 487 case VNO_STATE_CHECKING: 488 mutex_exit(&chmp->chm_lock_vnocache); 489 dbg_gc("VNO_STATE GC or CHECKING\n"); 490 panic("CHFS BUG - vc state gc or checking\n"); 491 492 case VNO_STATE_READING: 493 chmp->chm_checked_vno--; 494 mutex_exit(&chmp->chm_lock_vnocache); 495 /* XXX why do we need the delay here?! */ 496 kpause("chvncrea", true, mstohz(50), NULL); 497 498 return 0; 499 500 default: 501 mutex_exit(&chmp->chm_lock_vnocache); 502 dbg_gc("default\n"); 503 panic("CHFS BUG - vc state is other what we" 504 " checked\n"); 505 506 case VNO_STATE_UNCHECKED: 507 ; 508 } 509 510 /* We found an unchecked vnode. */ 511 512 vc->state = VNO_STATE_CHECKING; 513 514 /* XXX check if this is too heavy to call under 515 * chm_lock_vnocache 516 */ 517 ret = chfs_check(chmp, vc); 518 vc->state = VNO_STATE_CHECKEDABSENT; 519 520 mutex_exit(&chmp->chm_lock_vnocache); 521 return ret; 522 } 523 524 /* Get GC block. */ 525 eb = chmp->chm_gcblock; 526 527 if (!eb) { 528 eb = find_gc_block(chmp); 529 } 530 531 if (!eb) { 532 dbg_gc("!eb\n"); 533 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 534 mutex_exit(&chmp->chm_lock_sizes); 535 return EAGAIN; 536 } 537 mutex_exit(&chmp->chm_lock_sizes); 538 return EIO; 539 } 540 541 if (!eb->used_size) { 542 dbg_gc("!eb->used_size\n"); 543 goto eraseit; 544 } 545 546 /* Get GC node. */ 547 nref = eb->gc_node; 548 gcblock_dirty = eb->dirty_size; 549 550 /* Find a node which wasn't obsoleted yet. 551 * Obsoleted nodes will be simply deleted after the whole block has checked. */ 552 while(CHFS_REF_OBSOLETE(nref)) { 553 #ifdef DBG_MSG_GC 554 if (nref == chmp->chm_blocks[nref->nref_lnr].last_node) { 555 dbg_gc("THIS NODE IS THE LAST NODE OF ITS EB\n"); 556 } 557 #endif 558 nref = node_next(nref); 559 if (!nref) { 560 eb->gc_node = nref; 561 mutex_exit(&chmp->chm_lock_sizes); 562 panic("CHFS BUG - nref is NULL)\n"); 563 } 564 } 565 566 /* We found a "not obsoleted" node. */ 567 eb->gc_node = nref; 568 KASSERT(nref->nref_lnr == chmp->chm_gcblock->lnr); 569 570 /* Check if node is in any chain. */ 571 if (!nref->nref_next) { 572 /* This node is not in any chain. Simply collect it, or obsolete. */ 573 mutex_exit(&chmp->chm_lock_sizes); 574 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 575 chfs_gcollect_pristine(chmp, eb, NULL, nref); 576 } else { 577 chfs_mark_node_obsolete(chmp, nref); 578 } 579 goto lock_size; 580 } 581 582 mutex_exit(&chmp->chm_lock_sizes); 583 584 mutex_enter(&chmp->chm_lock_vnocache); 585 586 dbg_gc("nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset); 587 vc = chfs_nref_to_vc(nref); 588 589 /* Check the state of the node. */ 590 dbg_gc("switch\n"); 591 switch(vc->state) { 592 case VNO_STATE_CHECKEDABSENT: 593 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 594 vc->state = VNO_STATE_GC; 595 } 596 break; 597 598 case VNO_STATE_PRESENT: 599 break; 600 601 case VNO_STATE_UNCHECKED: 602 /* FALLTHROUGH */ 603 case VNO_STATE_CHECKING: 604 /* FALLTHROUGH */ 605 case VNO_STATE_GC: 606 mutex_exit(&chmp->chm_lock_vnocache); 607 panic("CHFS BUG - vc state unchecked," 608 " checking or gc (vno #%llu, num #%d)\n", 609 (unsigned long long)vc->vno, vc->state); 610 611 case VNO_STATE_READING: 612 /* Node is in use at this time. */ 613 mutex_exit(&chmp->chm_lock_vnocache); 614 kpause("chvncrea", true, mstohz(50), NULL); 615 return 0; 616 } 617 618 if (vc->state == VNO_STATE_GC) { 619 dbg_gc("vc->state == VNO_STATE_GC\n"); 620 vc->state = VNO_STATE_CHECKEDABSENT; 621 mutex_exit(&chmp->chm_lock_vnocache); 622 ret = chfs_gcollect_pristine(chmp, eb, NULL, nref); 623 624 //TODO wake_up(&chmp->chm_vnocache_wq); 625 if (ret != EBADF) 626 goto test_gcnode; 627 mutex_enter(&chmp->chm_lock_vnocache); 628 } 629 630 /* Collect living node. */ 631 vno = vc->vno; 632 pvno = vc->pvno; 633 nlink = vc->nlink; 634 mutex_exit(&chmp->chm_lock_vnocache); 635 636 ip = chfs_gc_fetch_inode(chmp, vno, !(pvno | nlink)); 637 638 if (!ip) { 639 dbg_gc("!ip\n"); 640 ret = 0; 641 goto lock_size; 642 } 643 644 chfs_gcollect_live(chmp, eb, nref, ip); 645 646 chfs_gc_release_inode(chmp, ip); 647 648 test_gcnode: 649 if (eb->dirty_size == gcblock_dirty && 650 !CHFS_REF_OBSOLETE(eb->gc_node)) { 651 dbg_gc("ERROR collecting node at %u failed.\n", 652 CHFS_GET_OFS(eb->gc_node->nref_offset)); 653 654 ret = ENOSPC; 655 } 656 657 lock_size: 658 KASSERT(mutex_owned(&chmp->chm_lock_mountfields)); 659 mutex_enter(&chmp->chm_lock_sizes); 660 eraseit: 661 dbg_gc("eraseit\n"); 662 663 if (chmp->chm_gcblock) { 664 /* This is only for debugging. */ 665 dbg_gc("eb used size = %u\n", chmp->chm_gcblock->used_size); 666 dbg_gc("eb free size = %u\n", chmp->chm_gcblock->free_size); 667 dbg_gc("eb dirty size = %u\n", chmp->chm_gcblock->dirty_size); 668 dbg_gc("eb unchecked size = %u\n", 669 chmp->chm_gcblock->unchecked_size); 670 dbg_gc("eb wasted size = %u\n", chmp->chm_gcblock->wasted_size); 671 672 KASSERT(chmp->chm_gcblock->used_size + chmp->chm_gcblock->free_size + 673 chmp->chm_gcblock->dirty_size + 674 chmp->chm_gcblock->unchecked_size + 675 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size); 676 677 } 678 679 /* Check the state of GC block. */ 680 if (chmp->chm_gcblock && chmp->chm_gcblock->dirty_size + 681 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size) { 682 dbg_gc("Block at leb #%u completely obsoleted by GC, " 683 "Moving to erase_pending_queue\n", chmp->chm_gcblock->lnr); 684 TAILQ_INSERT_TAIL(&chmp->chm_erase_pending_queue, 685 chmp->chm_gcblock, queue); 686 chmp->chm_gcblock = NULL; 687 chmp->chm_nr_erasable_blocks++; 688 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) { 689 ret = chfs_remap_leb(chmp); 690 } 691 } 692 693 mutex_exit(&chmp->chm_lock_sizes); 694 dbg_gc("return\n"); 695 return ret; 696 } 697 698 699 /* chfs_gcollect_pristine - collects a pristine node */ 700 int 701 chfs_gcollect_pristine(struct chfs_mount *chmp, struct chfs_eraseblock *cheb, 702 struct chfs_vnode_cache *chvc, struct chfs_node_ref *nref) 703 { 704 struct chfs_node_ref *newnref; 705 struct chfs_flash_node_hdr *nhdr; 706 struct chfs_flash_vnode *fvnode; 707 struct chfs_flash_dirent_node *fdirent; 708 struct chfs_flash_data_node *fdata; 709 int ret, retries = 0; 710 uint32_t ofs, crc; 711 size_t totlen = chfs_nref_len(chmp, cheb, nref); 712 char *data; 713 struct iovec vec; 714 size_t retlen; 715 716 dbg_gc("gcollect_pristine\n"); 717 718 data = kmem_alloc(totlen, KM_SLEEP); 719 if (!data) 720 return ENOMEM; 721 722 ofs = CHFS_GET_OFS(nref->nref_offset); 723 724 /* Read header. */ 725 ret = chfs_read_leb(chmp, nref->nref_lnr, data, ofs, totlen, &retlen); 726 if (ret) { 727 dbg_gc("reading error\n"); 728 goto err_out; 729 } 730 if (retlen != totlen) { 731 dbg_gc("read size error\n"); 732 ret = EIO; 733 goto err_out; 734 } 735 nhdr = (struct chfs_flash_node_hdr *)data; 736 737 /* Check the header. */ 738 if (le16toh(nhdr->magic) != CHFS_FS_MAGIC_BITMASK) { 739 dbg_gc("node header magic number error\n"); 740 ret = EBADF; 741 goto err_out; 742 } 743 crc = crc32(0, (uint8_t *)nhdr, CHFS_NODE_HDR_SIZE - 4); 744 if (crc != le32toh(nhdr->hdr_crc)) { 745 dbg_gc("node header crc error\n"); 746 ret = EBADF; 747 goto err_out; 748 } 749 750 /* Read the remaining parts. */ 751 switch(le16toh(nhdr->type)) { 752 case CHFS_NODETYPE_VNODE: 753 /* vnode information node */ 754 fvnode = (struct chfs_flash_vnode *)data; 755 crc = crc32(0, (uint8_t *)fvnode, sizeof(struct chfs_flash_vnode) - 4); 756 if (crc != le32toh(fvnode->node_crc)) { 757 dbg_gc("vnode crc error\n"); 758 ret = EBADF; 759 goto err_out; 760 } 761 break; 762 case CHFS_NODETYPE_DIRENT: 763 /* dirent node */ 764 fdirent = (struct chfs_flash_dirent_node *)data; 765 crc = crc32(0, (uint8_t *)fdirent, sizeof(struct chfs_flash_dirent_node) - 4); 766 if (crc != le32toh(fdirent->node_crc)) { 767 dbg_gc("dirent crc error\n"); 768 ret = EBADF; 769 goto err_out; 770 } 771 crc = crc32(0, fdirent->name, fdirent->nsize); 772 if (crc != le32toh(fdirent->name_crc)) { 773 dbg_gc("dirent name crc error\n"); 774 ret = EBADF; 775 goto err_out; 776 } 777 break; 778 case CHFS_NODETYPE_DATA: 779 /* data node */ 780 fdata = (struct chfs_flash_data_node *)data; 781 crc = crc32(0, (uint8_t *)fdata, sizeof(struct chfs_flash_data_node) - 4); 782 if (crc != le32toh(fdata->node_crc)) { 783 dbg_gc("data node crc error\n"); 784 ret = EBADF; 785 goto err_out; 786 } 787 break; 788 default: 789 /* unknown node */ 790 if (chvc) { 791 dbg_gc("unknown node have vnode cache\n"); 792 ret = EBADF; 793 goto err_out; 794 } 795 } 796 /* CRC's OK, write node to its new place */ 797 retry: 798 ret = chfs_reserve_space_gc(chmp, totlen); 799 if (ret) 800 goto err_out; 801 802 newnref = chfs_alloc_node_ref(chmp->chm_nextblock); 803 if (!newnref) { 804 ret = ENOMEM; 805 goto err_out; 806 } 807 808 ofs = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size; 809 newnref->nref_offset = ofs; 810 811 /* write out the whole node */ 812 vec.iov_base = (void *)data; 813 vec.iov_len = totlen; 814 mutex_enter(&chmp->chm_lock_sizes); 815 ret = chfs_write_wbuf(chmp, &vec, 1, ofs, &retlen); 816 817 if (ret || retlen != totlen) { 818 /* error while writing */ 819 chfs_err("error while writing out to the media\n"); 820 chfs_err("err: %d | size: %zu | retlen : %zu\n", 821 ret, totlen, retlen); 822 823 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen); 824 if (retries) { 825 mutex_exit(&chmp->chm_lock_sizes); 826 ret = EIO; 827 goto err_out; 828 } 829 830 /* try again */ 831 retries++; 832 mutex_exit(&chmp->chm_lock_sizes); 833 goto retry; 834 } 835 836 /* update vnode information */ 837 mutex_exit(&chmp->chm_lock_sizes); 838 //TODO should we set free_size? 839 mutex_enter(&chmp->chm_lock_vnocache); 840 chfs_add_vnode_ref_to_vc(chmp, chvc, newnref); 841 mutex_exit(&chmp->chm_lock_vnocache); 842 ret = 0; 843 /* FALLTHROUGH */ 844 err_out: 845 kmem_free(data, totlen); 846 return ret; 847 } 848 849 850 /* chfs_gcollect_live - collects a living node */ 851 int 852 chfs_gcollect_live(struct chfs_mount *chmp, 853 struct chfs_eraseblock *cheb, struct chfs_node_ref *nref, 854 struct chfs_inode *ip) 855 { 856 struct chfs_node_frag *frag; 857 struct chfs_full_dnode *fn = NULL; 858 int start = 0, end = 0, nrfrags = 0; 859 struct chfs_dirent *fd = NULL; 860 int ret = 0; 861 bool is_dirent; 862 863 dbg_gc("gcollect_live\n"); 864 865 if (chmp->chm_gcblock != cheb) { 866 dbg_gc("GC block is no longer gcblock. Restart.\n"); 867 goto upnout; 868 } 869 870 if (CHFS_REF_OBSOLETE(nref)) { 871 dbg_gc("node to be GC'd was obsoleted in the meantime.\n"); 872 goto upnout; 873 } 874 875 /* It's a vnode? */ 876 if (ip->chvc->v == nref) { 877 chfs_gcollect_vnode(chmp, ip); 878 goto upnout; 879 } 880 881 /* Find data node. */ 882 dbg_gc("find full dnode\n"); 883 for(frag = frag_first(&ip->fragtree); 884 frag; frag = frag_next(&ip->fragtree, frag)) { 885 if (frag->node && frag->node->nref == nref) { 886 fn = frag->node; 887 end = frag->ofs + frag->size; 888 if (!nrfrags++) 889 start = frag->ofs; 890 if (nrfrags == frag->node->frags) 891 break; 892 } 893 } 894 895 /* It's a pristine node, or dnode (or hole? XXX have we hole nodes?) */ 896 if (fn) { 897 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) { 898 ret = chfs_gcollect_pristine(chmp, 899 cheb, ip->chvc, nref); 900 if (!ret) { 901 frag->node->nref = ip->chvc->v; 902 } 903 if (ret != EBADF) 904 goto upnout; 905 } 906 ret = chfs_gcollect_dnode(chmp, cheb, ip, fn, start, end); 907 goto upnout; 908 } 909 910 /* Is it a dirent? */ 911 dbg_gc("find full dirent\n"); 912 is_dirent = false; 913 TAILQ_FOREACH(fd, &ip->dents, fds) { 914 if (fd->nref == nref) { 915 is_dirent = true; 916 break; 917 } 918 } 919 920 if (is_dirent && fd->vno) { 921 /* Living dirent. */ 922 ret = chfs_gcollect_dirent(chmp, cheb, ip, fd); 923 } else if (is_dirent) { 924 /* Already deleted dirent. */ 925 ret = chfs_gcollect_deletion_dirent(chmp, cheb, ip, fd); 926 } else { 927 dbg_gc("Nref at leb #%u offset 0x%08x wasn't in node list" 928 " for ino #%llu\n", 929 nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), 930 (unsigned long long)ip->ino); 931 if (CHFS_REF_OBSOLETE(nref)) { 932 dbg_gc("But it's obsolete so we don't mind" 933 " too much.\n"); 934 } 935 } 936 937 upnout: 938 return ret; 939 } 940 941 /* chfs_gcollect_vnode - collects a vnode information node */ 942 int 943 chfs_gcollect_vnode(struct chfs_mount *chmp, struct chfs_inode *ip) 944 { 945 int ret; 946 dbg_gc("gcollect_vnode\n"); 947 948 /* Simply write the new vnode information to the flash 949 * with GC's space allocation */ 950 ret = chfs_write_flash_vnode(chmp, ip, ALLOC_GC); 951 952 return ret; 953 } 954 955 /* chfs_gcollect_dirent - collects a dirent */ 956 int 957 chfs_gcollect_dirent(struct chfs_mount *chmp, 958 struct chfs_eraseblock *cheb, struct chfs_inode *parent, 959 struct chfs_dirent *fd) 960 { 961 struct vnode *vnode = NULL; 962 struct chfs_inode *ip; 963 dbg_gc("gcollect_dirent\n"); 964 965 /* Find vnode. */ 966 vnode = chfs_vnode_lookup(chmp, fd->vno); 967 968 /* XXX maybe KASSERT or panic on this? */ 969 if (vnode == NULL) { 970 return ENOENT; 971 } 972 973 ip = VTOI(vnode); 974 vrele(vnode); 975 976 /* Remove and obsolete the previous version. */ 977 mutex_enter(&chmp->chm_lock_vnocache); 978 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref, 979 &parent->chvc->dirents); 980 mutex_exit(&chmp->chm_lock_vnocache); 981 982 /* Write the new dirent to the flash. */ 983 return chfs_write_flash_dirent(chmp, 984 parent, ip, fd, fd->vno, ALLOC_GC); 985 } 986 987 /* 988 * chfs_gcollect_deletion_dirent - 989 * collects a dirent what was marked as deleted 990 */ 991 int 992 chfs_gcollect_deletion_dirent(struct chfs_mount *chmp, 993 struct chfs_eraseblock *cheb, struct chfs_inode *parent, 994 struct chfs_dirent *fd) 995 { 996 struct chfs_flash_dirent_node chfdn; 997 struct chfs_node_ref *nref; 998 size_t retlen, name_len, nref_len; 999 uint32_t name_crc; 1000 1001 int ret; 1002 1003 dbg_gc("gcollect_deletion_dirent\n"); 1004 1005 /* Check node. */ 1006 name_len = strlen(fd->name); 1007 name_crc = crc32(0, fd->name, name_len); 1008 1009 nref_len = chfs_nref_len(chmp, cheb, fd->nref); 1010 1011 /* XXX This was a noop (void)chfs_vnode_lookup(chmp, fd->vno); */ 1012 1013 /* Find it in parent dirents. */ 1014 for (nref = parent->chvc->dirents; 1015 nref != (void*)parent->chvc; 1016 nref = nref->nref_next) { 1017 1018 if (!CHFS_REF_OBSOLETE(nref)) 1019 continue; 1020 1021 /* if node refs have different length, skip */ 1022 if (chfs_nref_len(chmp, NULL, nref) != nref_len) 1023 continue; 1024 1025 if (CHFS_GET_OFS(nref->nref_offset) == 1026 CHFS_GET_OFS(fd->nref->nref_offset)) { 1027 continue; 1028 } 1029 1030 /* read it from flash */ 1031 ret = chfs_read_leb(chmp, 1032 nref->nref_lnr, (void*)&chfdn, CHFS_GET_OFS(nref->nref_offset), 1033 nref_len, &retlen); 1034 1035 if (ret) { 1036 dbg_gc("Read error: %d\n", ret); 1037 continue; 1038 } 1039 1040 if (retlen != nref_len) { 1041 dbg_gc("Error reading node:" 1042 " read: %zu insted of: %zu\n", retlen, nref_len); 1043 continue; 1044 } 1045 1046 /* if node type doesn't match, skip */ 1047 if (le16toh(chfdn.type) != CHFS_NODETYPE_DIRENT) 1048 continue; 1049 1050 /* if crc doesn't match, skip */ 1051 if (le32toh(chfdn.name_crc) != name_crc) 1052 continue; 1053 1054 /* if length of name different, or this is an another deletion 1055 * dirent, skip 1056 */ 1057 if (chfdn.nsize != name_len || !le64toh(chfdn.vno)) 1058 continue; 1059 1060 /* check actual name */ 1061 if (memcmp(chfdn.name, fd->name, name_len)) 1062 continue; 1063 1064 mutex_enter(&chmp->chm_lock_vnocache); 1065 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref, 1066 &parent->chvc->dirents); 1067 mutex_exit(&chmp->chm_lock_vnocache); 1068 return chfs_write_flash_dirent(chmp, 1069 parent, NULL, fd, fd->vno, ALLOC_GC); 1070 } 1071 1072 /* Simply remove it from the parent dirents. */ 1073 TAILQ_REMOVE(&parent->dents, fd, fds); 1074 chfs_free_dirent(fd); 1075 return 0; 1076 } 1077 1078 /* chfs_gcollect_dnode - */ 1079 int 1080 chfs_gcollect_dnode(struct chfs_mount *chmp, 1081 struct chfs_eraseblock *orig_cheb, struct chfs_inode *ip, 1082 struct chfs_full_dnode *fn, uint32_t orig_start, uint32_t orig_end) 1083 { 1084 struct chfs_node_ref *nref; 1085 struct chfs_full_dnode *newfn; 1086 struct chfs_flash_data_node *fdnode; 1087 int ret = 0, retries = 0; 1088 uint32_t totlen; 1089 char *data = NULL; 1090 struct iovec vec; 1091 size_t retlen; 1092 dbg_gc("gcollect_dnode\n"); 1093 1094 //TODO merge frags 1095 1096 KASSERT(orig_cheb->lnr == fn->nref->nref_lnr); 1097 totlen = chfs_nref_len(chmp, orig_cheb, fn->nref); 1098 data = kmem_alloc(totlen, KM_SLEEP); 1099 1100 /* Read the node from the flash. */ 1101 ret = chfs_read_leb(chmp, fn->nref->nref_lnr, data, fn->nref->nref_offset, 1102 totlen, &retlen); 1103 1104 fdnode = (struct chfs_flash_data_node *)data; 1105 fdnode->version = htole64(++ip->chvc->highest_version); 1106 fdnode->node_crc = htole32(crc32(0, (uint8_t *)fdnode, 1107 sizeof(*fdnode) - 4)); 1108 1109 vec.iov_base = (void *)data; 1110 vec.iov_len = totlen; 1111 1112 retry: 1113 /* Set the next block where we can write. */ 1114 ret = chfs_reserve_space_gc(chmp, totlen); 1115 if (ret) 1116 goto out; 1117 1118 nref = chfs_alloc_node_ref(chmp->chm_nextblock); 1119 if (!nref) { 1120 ret = ENOMEM; 1121 goto out; 1122 } 1123 1124 mutex_enter(&chmp->chm_lock_sizes); 1125 1126 nref->nref_offset = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size; 1127 KASSERT(nref->nref_offset % 4 == 0); 1128 chfs_change_size_free(chmp, chmp->chm_nextblock, -totlen); 1129 1130 /* Write it to the writebuffer. */ 1131 ret = chfs_write_wbuf(chmp, &vec, 1, nref->nref_offset, &retlen); 1132 if (ret || retlen != totlen) { 1133 /* error during writing */ 1134 chfs_err("error while writing out to the media\n"); 1135 chfs_err("err: %d | size: %d | retlen : %zu\n", 1136 ret, totlen, retlen); 1137 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen); 1138 if (retries) { 1139 ret = EIO; 1140 mutex_exit(&chmp->chm_lock_sizes); 1141 goto out; 1142 } 1143 1144 /* try again */ 1145 retries++; 1146 mutex_exit(&chmp->chm_lock_sizes); 1147 goto retry; 1148 } 1149 1150 dbg_gc("new nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset); 1151 1152 chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen); 1153 mutex_exit(&chmp->chm_lock_sizes); 1154 KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size); 1155 1156 /* Set fields of the new node. */ 1157 newfn = chfs_alloc_full_dnode(); 1158 newfn->nref = nref; 1159 newfn->ofs = fn->ofs; 1160 newfn->size = fn->size; 1161 newfn->frags = 0; 1162 1163 mutex_enter(&chmp->chm_lock_vnocache); 1164 /* Remove every part of the old node. */ 1165 chfs_remove_frags_of_node(chmp, &ip->fragtree, fn->nref); 1166 chfs_remove_and_obsolete(chmp, ip->chvc, fn->nref, &ip->chvc->dnode); 1167 1168 /* Add the new nref to inode. */ 1169 chfs_add_full_dnode_to_inode(chmp, ip, newfn); 1170 chfs_add_node_to_list(chmp, 1171 ip->chvc, newfn->nref, &ip->chvc->dnode); 1172 mutex_exit(&chmp->chm_lock_vnocache); 1173 1174 out: 1175 kmem_free(data, totlen); 1176 return ret; 1177 } 1178