1 /* $NetBSD: lfs_subr.c,v 1.40 2003/04/23 07:20:38 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.40 2003/04/23 07:20:38 perseant Exp $"); 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/vnode.h> 80 #include <sys/buf.h> 81 #include <sys/mount.h> 82 #include <sys/malloc.h> 83 #include <sys/proc.h> 84 85 #include <ufs/ufs/inode.h> 86 #include <ufs/lfs/lfs.h> 87 #include <ufs/lfs/lfs_extern.h> 88 89 #include <uvm/uvm.h> 90 91 /* 92 * Return buffer with the contents of block "offset" from the beginning of 93 * directory "ip". If "res" is non-zero, fill it in with a pointer to the 94 * remaining space in the directory. 95 */ 96 int 97 lfs_blkatoff(void *v) 98 { 99 struct vop_blkatoff_args /* { 100 struct vnode *a_vp; 101 off_t a_offset; 102 char **a_res; 103 struct buf **a_bpp; 104 } */ *ap = v; 105 struct lfs *fs; 106 struct inode *ip; 107 struct buf *bp; 108 daddr_t lbn; 109 int bsize, error; 110 111 ip = VTOI(ap->a_vp); 112 fs = ip->i_lfs; 113 lbn = lblkno(fs, ap->a_offset); 114 bsize = blksize(fs, ip, lbn); 115 116 *ap->a_bpp = NULL; 117 if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) { 118 brelse(bp); 119 return (error); 120 } 121 if (ap->a_res) 122 *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); 123 *ap->a_bpp = bp; 124 return (0); 125 } 126 127 #ifdef LFS_DEBUG_MALLOC 128 char *lfs_res_names[LFS_NB_COUNT] = { 129 "summary", 130 "superblock", 131 "ifile block", 132 "cluster", 133 "clean", 134 }; 135 #endif 136 137 int lfs_res_qty[LFS_NB_COUNT] = { 138 LFS_N_SUMMARIES, 139 LFS_N_SBLOCKS, 140 LFS_N_IBLOCKS, 141 LFS_N_CLUSTERS, 142 LFS_N_CLEAN, 143 }; 144 145 void 146 lfs_setup_resblks(struct lfs *fs) 147 { 148 int i, j; 149 int maxbpp; 150 151 fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, 152 M_WAITOK); 153 for (i = 0; i < LFS_N_TOTAL; i++) { 154 fs->lfs_resblk[i].inuse = 0; 155 fs->lfs_resblk[i].p = NULL; 156 } 157 for (i = 0; i < LFS_RESHASH_WIDTH; i++) 158 LIST_INIT(fs->lfs_reshash + i); 159 160 /* 161 * These types of allocations can be larger than a page, 162 * so we can't use the pool subsystem for them. 163 */ 164 for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) 165 fs->lfs_resblk[i].size = fs->lfs_sumsize; 166 for (j = 0; j < LFS_N_SBLOCKS; j++, i++) 167 fs->lfs_resblk[i].size = LFS_SBPAD; 168 for (j = 0; j < LFS_N_IBLOCKS; j++, i++) 169 fs->lfs_resblk[i].size = fs->lfs_bsize; 170 for (j = 0; j < LFS_N_CLUSTERS; j++, i++) 171 fs->lfs_resblk[i].size = MAXPHYS; 172 for (j = 0; j < LFS_N_CLEAN; j++, i++) 173 fs->lfs_resblk[i].size = MAXPHYS; 174 175 for (i = 0; i < LFS_N_TOTAL; i++) { 176 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size, 177 M_SEGMENT, M_WAITOK); 178 } 179 180 /* 181 * Initialize pools for small types (XXX is BPP small?) 182 */ 183 pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 184 LFS_N_CL, "lfsclpl", &pool_allocator_nointr); 185 pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 186 LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); 187 maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); 188 maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); 189 pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 190 LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); 191 } 192 193 void 194 lfs_free_resblks(struct lfs *fs) 195 { 196 int i; 197 198 pool_destroy(&fs->lfs_bpppool); 199 pool_destroy(&fs->lfs_segpool); 200 pool_destroy(&fs->lfs_clpool); 201 202 for (i = 0; i < LFS_N_TOTAL; i++) { 203 while (fs->lfs_resblk[i].inuse) 204 tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); 205 if (fs->lfs_resblk[i].p != NULL) 206 free(fs->lfs_resblk[i].p, M_SEGMENT); 207 } 208 free(fs->lfs_resblk, M_SEGMENT); 209 } 210 211 static unsigned int 212 lfs_mhash(void *vp) 213 { 214 return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; 215 } 216 217 /* 218 * Return memory of the given size for the given purpose, or use one of a 219 * number of spare last-resort buffers, if malloc returns NULL. 220 */ 221 void * 222 lfs_malloc(struct lfs *fs, size_t size, int type) 223 { 224 struct lfs_res_blk *re; 225 void *r; 226 int i, s, start; 227 unsigned int h; 228 229 r = NULL; 230 231 /* If no mem allocated for this type, it just waits */ 232 if (lfs_res_qty[type] == 0) { 233 r = malloc(size, M_SEGMENT, M_WAITOK); 234 return r; 235 } 236 237 /* Otherwise try a quick malloc, and if it works, great */ 238 if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) { 239 return r; 240 } 241 242 /* 243 * If malloc returned NULL, we are forced to use one of our 244 * reserve blocks. We have on hand at least one summary block, 245 * at least one cluster block, at least one superblock, 246 * and several indirect blocks. 247 */ 248 /* skip over blocks of other types */ 249 for (i = 0, start = 0; i < type; i++) 250 start += lfs_res_qty[i]; 251 while (r == NULL) { 252 for (i = 0; i < lfs_res_qty[type]; i++) { 253 if (fs->lfs_resblk[start + i].inuse == 0) { 254 re = fs->lfs_resblk + start + i; 255 re->inuse = 1; 256 r = re->p; 257 KASSERT(re->size >= size); 258 h = lfs_mhash(r); 259 s = splbio(); 260 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); 261 splx(s); 262 return r; 263 } 264 } 265 #ifdef LFS_DEBUG_MALLOC 266 printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); 267 #endif 268 tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); 269 #ifdef LFS_DEBUG_MALLOC 270 printf("done sleeping on %s\n", lfs_res_names[type]); 271 #endif 272 } 273 /* NOTREACHED */ 274 return r; 275 } 276 277 void 278 lfs_free(struct lfs *fs, void *p, int type) 279 { 280 int s; 281 unsigned int h; 282 res_t *re; 283 #ifdef DEBUG 284 int i; 285 #endif 286 287 h = lfs_mhash(p); 288 s = splbio(); 289 LIST_FOREACH(re, &fs->lfs_reshash[h], res) { 290 if (re->p == p) { 291 KASSERT(re->inuse == 1); 292 LIST_REMOVE(re, res); 293 re->inuse = 0; 294 wakeup(&fs->lfs_resblk); 295 splx(s); 296 return; 297 } 298 } 299 #ifdef DEBUG 300 for (i = 0; i < LFS_N_TOTAL; i++) { 301 if (fs->lfs_resblk[i].p == p) 302 panic("lfs_free: inconsistent reserved block"); 303 } 304 #endif 305 splx(s); 306 307 /* 308 * If we didn't find it, free it. 309 */ 310 free(p, M_SEGMENT); 311 } 312 313 /* 314 * lfs_seglock -- 315 * Single thread the segment writer. 316 */ 317 int 318 lfs_seglock(struct lfs *fs, unsigned long flags) 319 { 320 struct segment *sp; 321 322 simple_lock(&fs->lfs_interlock); 323 if (fs->lfs_seglock) { 324 if (fs->lfs_lockpid == curproc->p_pid) { 325 simple_unlock(&fs->lfs_interlock); 326 ++fs->lfs_seglock; 327 fs->lfs_sp->seg_flags |= flags; 328 return 0; 329 } else if (flags & SEGM_PAGEDAEMON) { 330 simple_unlock(&fs->lfs_interlock); 331 return EWOULDBLOCK; 332 } else while (fs->lfs_seglock) 333 (void)ltsleep(&fs->lfs_seglock, PRIBIO + 1, 334 "lfs seglock", 0, &fs->lfs_interlock); 335 } 336 337 fs->lfs_seglock = 1; 338 fs->lfs_lockpid = curproc->p_pid; 339 simple_unlock(&fs->lfs_interlock); 340 fs->lfs_cleanind = 0; 341 342 /* Drain fragment size changes out */ 343 lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); 344 345 sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); 346 sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); 347 sp->seg_flags = flags; 348 sp->vp = NULL; 349 sp->seg_iocount = 0; 350 (void) lfs_initseg(fs); 351 352 /* 353 * Keep a cumulative count of the outstanding I/O operations. If the 354 * disk drive catches up with us it could go to zero before we finish, 355 * so we artificially increment it by one until we've scheduled all of 356 * the writes we intend to do. 357 */ 358 ++fs->lfs_iocount; 359 return 0; 360 } 361 362 static void lfs_unmark_dirop(struct lfs *); 363 364 static void 365 lfs_unmark_dirop(struct lfs *fs) 366 { 367 struct inode *ip, *nip; 368 struct vnode *vp; 369 int doit; 370 extern int lfs_dirvcount; 371 372 simple_lock(&fs->lfs_interlock); 373 doit = !(fs->lfs_flags & LFS_UNDIROP); 374 if (doit) 375 fs->lfs_flags |= LFS_UNDIROP; 376 simple_unlock(&fs->lfs_interlock); 377 if (!doit) 378 return; 379 380 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 381 nip = TAILQ_NEXT(ip, i_lfs_dchain); 382 vp = ITOV(ip); 383 384 if (VOP_ISLOCKED(vp) && 385 vp->v_lock.lk_lockholder != curproc->p_pid) { 386 continue; 387 } 388 if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { 389 --lfs_dirvcount; 390 vp->v_flag &= ~VDIROP; 391 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 392 wakeup(&lfs_dirvcount); 393 fs->lfs_unlockvp = vp; 394 vrele(vp); 395 fs->lfs_unlockvp = NULL; 396 } 397 } 398 399 simple_lock(&fs->lfs_interlock); 400 fs->lfs_flags &= ~LFS_UNDIROP; 401 simple_unlock(&fs->lfs_interlock); 402 } 403 404 static void 405 lfs_auto_segclean(struct lfs *fs) 406 { 407 int i, error; 408 409 /* 410 * Now that we've swapped lfs_activesb, but while we still 411 * hold the segment lock, run through the segment list marking 412 * the empty ones clean. 413 * XXX - do we really need to do them all at once? 414 */ 415 for (i = 0; i < fs->lfs_nseg; i++) { 416 if ((fs->lfs_suflags[0][i] & 417 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 418 (SEGUSE_DIRTY | SEGUSE_EMPTY) && 419 (fs->lfs_suflags[1][i] & 420 (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == 421 (SEGUSE_DIRTY | SEGUSE_EMPTY)) { 422 423 if ((error = lfs_do_segclean(fs, i)) != 0) { 424 #ifdef DEBUG 425 printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); 426 #endif /* DEBUG */ 427 } 428 } 429 fs->lfs_suflags[1 - fs->lfs_activesb][i] = 430 fs->lfs_suflags[fs->lfs_activesb][i]; 431 } 432 } 433 434 /* 435 * lfs_segunlock -- 436 * Single thread the segment writer. 437 */ 438 void 439 lfs_segunlock(struct lfs *fs) 440 { 441 struct segment *sp; 442 unsigned long sync, ckp; 443 struct buf *bp; 444 int do_unmark_dirop = 0; 445 extern int locked_queue_count; 446 extern long locked_queue_bytes; 447 448 sp = fs->lfs_sp; 449 450 simple_lock(&fs->lfs_interlock); 451 if (fs->lfs_seglock == 1) { 452 if ((sp->seg_flags & SEGM_PROT) == 0) 453 do_unmark_dirop = 1; 454 simple_unlock(&fs->lfs_interlock); 455 sync = sp->seg_flags & SEGM_SYNC; 456 ckp = sp->seg_flags & SEGM_CKP; 457 if (sp->bpp != sp->cbpp) { 458 /* Free allocated segment summary */ 459 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 460 bp = *sp->bpp; 461 lfs_freebuf(fs, bp); 462 } else 463 printf ("unlock to 0 with no summary"); 464 465 pool_put(&fs->lfs_bpppool, sp->bpp); 466 sp->bpp = NULL; 467 468 /* 469 * If we're not sync, we're done with sp, get rid of it. 470 * Otherwise, we keep a local copy around but free 471 * fs->lfs_sp so another process can use it (we have to 472 * wait but they don't have to wait for us). 473 */ 474 if (!sync) 475 pool_put(&fs->lfs_segpool, sp); 476 fs->lfs_sp = NULL; 477 478 /* 479 * If the I/O count is non-zero, sleep until it reaches zero. 480 * At the moment, the user's process hangs around so we can 481 * sleep. 482 */ 483 if (--fs->lfs_iocount == 0) { 484 lfs_countlocked(&locked_queue_count, 485 &locked_queue_bytes, "lfs_segunlock"); 486 wakeup(&locked_queue_count); 487 } 488 if (fs->lfs_iocount <= 1) 489 wakeup(&fs->lfs_iocount); 490 /* 491 * If we're not checkpointing, we don't have to block 492 * other processes to wait for a synchronous write 493 * to complete. 494 */ 495 if (!ckp) { 496 simple_lock(&fs->lfs_interlock); 497 --fs->lfs_seglock; 498 fs->lfs_lockpid = 0; 499 simple_unlock(&fs->lfs_interlock); 500 wakeup(&fs->lfs_seglock); 501 } 502 /* 503 * We let checkpoints happen asynchronously. That means 504 * that during recovery, we have to roll forward between 505 * the two segments described by the first and second 506 * superblocks to make sure that the checkpoint described 507 * by a superblock completed. 508 */ 509 while (ckp && sync && fs->lfs_iocount) 510 (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, 511 "lfs_iocount", 0); 512 while (sync && sp->seg_iocount) { 513 (void)tsleep(&sp->seg_iocount, PRIBIO + 1, 514 "seg_iocount", 0); 515 /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ 516 } 517 if (sync) 518 pool_put(&fs->lfs_segpool, sp); 519 520 if (ckp) { 521 fs->lfs_nactive = 0; 522 /* If we *know* everything's on disk, write both sbs */ 523 /* XXX should wait for this one */ 524 if (sync) 525 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); 526 lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); 527 if (!(fs->lfs_ivnode->v_mount->mnt_flag & MNT_UNMOUNT)) 528 lfs_auto_segclean(fs); 529 fs->lfs_activesb = 1 - fs->lfs_activesb; 530 simple_lock(&fs->lfs_interlock); 531 --fs->lfs_seglock; 532 fs->lfs_lockpid = 0; 533 simple_unlock(&fs->lfs_interlock); 534 wakeup(&fs->lfs_seglock); 535 } 536 /* Reenable fragment size changes */ 537 lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); 538 if (do_unmark_dirop) 539 lfs_unmark_dirop(fs); 540 } else if (fs->lfs_seglock == 0) { 541 simple_unlock(&fs->lfs_interlock); 542 panic ("Seglock not held"); 543 } else { 544 --fs->lfs_seglock; 545 simple_unlock(&fs->lfs_interlock); 546 } 547 } 548