1 /* $NetBSD: lfs_vnops.c,v 1.226 2009/12/07 04:12:10 eeh Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.226 2009/12/07 04:12:10 eeh Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_compat_netbsd.h" 67 #endif 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/namei.h> 72 #include <sys/resourcevar.h> 73 #include <sys/kernel.h> 74 #include <sys/file.h> 75 #include <sys/stat.h> 76 #include <sys/buf.h> 77 #include <sys/proc.h> 78 #include <sys/mount.h> 79 #include <sys/vnode.h> 80 #include <sys/pool.h> 81 #include <sys/signalvar.h> 82 #include <sys/kauth.h> 83 #include <sys/syslog.h> 84 #include <sys/fstrans.h> 85 86 #include <miscfs/fifofs/fifo.h> 87 #include <miscfs/genfs/genfs.h> 88 #include <miscfs/specfs/specdev.h> 89 90 #include <ufs/ufs/inode.h> 91 #include <ufs/ufs/dir.h> 92 #include <ufs/ufs/ufsmount.h> 93 #include <ufs/ufs/ufs_extern.h> 94 95 #include <uvm/uvm.h> 96 #include <uvm/uvm_pmap.h> 97 #include <uvm/uvm_stat.h> 98 #include <uvm/uvm_pager.h> 99 100 #include <ufs/lfs/lfs.h> 101 #include <ufs/lfs/lfs_extern.h> 102 103 extern pid_t lfs_writer_daemon; 104 int lfs_ignore_lazy_sync = 1; 105 106 /* Global vfs data structures for lfs. */ 107 int (**lfs_vnodeop_p)(void *); 108 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 109 { &vop_default_desc, vn_default_error }, 110 { &vop_lookup_desc, ufs_lookup }, /* lookup */ 111 { &vop_create_desc, lfs_create }, /* create */ 112 { &vop_whiteout_desc, ufs_whiteout }, /* whiteout */ 113 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 114 { &vop_open_desc, ufs_open }, /* open */ 115 { &vop_close_desc, lfs_close }, /* close */ 116 { &vop_access_desc, ufs_access }, /* access */ 117 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 118 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 119 { &vop_read_desc, lfs_read }, /* read */ 120 { &vop_write_desc, lfs_write }, /* write */ 121 { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ 122 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 123 { &vop_poll_desc, ufs_poll }, /* poll */ 124 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 125 { &vop_revoke_desc, ufs_revoke }, /* revoke */ 126 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 127 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 128 { &vop_seek_desc, ufs_seek }, /* seek */ 129 { &vop_remove_desc, lfs_remove }, /* remove */ 130 { &vop_link_desc, lfs_link }, /* link */ 131 { &vop_rename_desc, lfs_rename }, /* rename */ 132 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 133 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 134 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 135 { &vop_readdir_desc, ufs_readdir }, /* readdir */ 136 { &vop_readlink_desc, ufs_readlink }, /* readlink */ 137 { &vop_abortop_desc, ufs_abortop }, /* abortop */ 138 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 139 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 140 { &vop_lock_desc, ufs_lock }, /* lock */ 141 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 142 { &vop_bmap_desc, ufs_bmap }, /* bmap */ 143 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 144 { &vop_print_desc, ufs_print }, /* print */ 145 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 146 { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ 147 { &vop_advlock_desc, ufs_advlock }, /* advlock */ 148 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 149 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 150 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 151 { NULL, NULL } 152 }; 153 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 154 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 155 156 int (**lfs_specop_p)(void *); 157 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 158 { &vop_default_desc, vn_default_error }, 159 { &vop_lookup_desc, spec_lookup }, /* lookup */ 160 { &vop_create_desc, spec_create }, /* create */ 161 { &vop_mknod_desc, spec_mknod }, /* mknod */ 162 { &vop_open_desc, spec_open }, /* open */ 163 { &vop_close_desc, lfsspec_close }, /* close */ 164 { &vop_access_desc, ufs_access }, /* access */ 165 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 166 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 167 { &vop_read_desc, ufsspec_read }, /* read */ 168 { &vop_write_desc, ufsspec_write }, /* write */ 169 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 170 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 171 { &vop_poll_desc, spec_poll }, /* poll */ 172 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 173 { &vop_revoke_desc, spec_revoke }, /* revoke */ 174 { &vop_mmap_desc, spec_mmap }, /* mmap */ 175 { &vop_fsync_desc, spec_fsync }, /* fsync */ 176 { &vop_seek_desc, spec_seek }, /* seek */ 177 { &vop_remove_desc, spec_remove }, /* remove */ 178 { &vop_link_desc, spec_link }, /* link */ 179 { &vop_rename_desc, spec_rename }, /* rename */ 180 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 181 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 182 { &vop_symlink_desc, spec_symlink }, /* symlink */ 183 { &vop_readdir_desc, spec_readdir }, /* readdir */ 184 { &vop_readlink_desc, spec_readlink }, /* readlink */ 185 { &vop_abortop_desc, spec_abortop }, /* abortop */ 186 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 187 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 188 { &vop_lock_desc, ufs_lock }, /* lock */ 189 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 190 { &vop_bmap_desc, spec_bmap }, /* bmap */ 191 { &vop_strategy_desc, spec_strategy }, /* strategy */ 192 { &vop_print_desc, ufs_print }, /* print */ 193 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 194 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 195 { &vop_advlock_desc, spec_advlock }, /* advlock */ 196 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 197 { &vop_getpages_desc, spec_getpages }, /* getpages */ 198 { &vop_putpages_desc, spec_putpages }, /* putpages */ 199 { NULL, NULL } 200 }; 201 const struct vnodeopv_desc lfs_specop_opv_desc = 202 { &lfs_specop_p, lfs_specop_entries }; 203 204 int (**lfs_fifoop_p)(void *); 205 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 206 { &vop_default_desc, vn_default_error }, 207 { &vop_lookup_desc, fifo_lookup }, /* lookup */ 208 { &vop_create_desc, fifo_create }, /* create */ 209 { &vop_mknod_desc, fifo_mknod }, /* mknod */ 210 { &vop_open_desc, fifo_open }, /* open */ 211 { &vop_close_desc, lfsfifo_close }, /* close */ 212 { &vop_access_desc, ufs_access }, /* access */ 213 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 214 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 215 { &vop_read_desc, ufsfifo_read }, /* read */ 216 { &vop_write_desc, ufsfifo_write }, /* write */ 217 { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ 218 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 219 { &vop_poll_desc, fifo_poll }, /* poll */ 220 { &vop_kqfilter_desc, fifo_kqfilter }, /* kqfilter */ 221 { &vop_revoke_desc, fifo_revoke }, /* revoke */ 222 { &vop_mmap_desc, fifo_mmap }, /* mmap */ 223 { &vop_fsync_desc, fifo_fsync }, /* fsync */ 224 { &vop_seek_desc, fifo_seek }, /* seek */ 225 { &vop_remove_desc, fifo_remove }, /* remove */ 226 { &vop_link_desc, fifo_link }, /* link */ 227 { &vop_rename_desc, fifo_rename }, /* rename */ 228 { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ 229 { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ 230 { &vop_symlink_desc, fifo_symlink }, /* symlink */ 231 { &vop_readdir_desc, fifo_readdir }, /* readdir */ 232 { &vop_readlink_desc, fifo_readlink }, /* readlink */ 233 { &vop_abortop_desc, fifo_abortop }, /* abortop */ 234 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 235 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 236 { &vop_lock_desc, ufs_lock }, /* lock */ 237 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 238 { &vop_bmap_desc, fifo_bmap }, /* bmap */ 239 { &vop_strategy_desc, fifo_strategy }, /* strategy */ 240 { &vop_print_desc, ufs_print }, /* print */ 241 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 242 { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ 243 { &vop_advlock_desc, fifo_advlock }, /* advlock */ 244 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 245 { &vop_putpages_desc, fifo_putpages }, /* putpages */ 246 { NULL, NULL } 247 }; 248 const struct vnodeopv_desc lfs_fifoop_opv_desc = 249 { &lfs_fifoop_p, lfs_fifoop_entries }; 250 251 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 252 253 #define LFS_READWRITE 254 #include <ufs/ufs/ufs_readwrite.c> 255 #undef LFS_READWRITE 256 257 /* 258 * Synch an open file. 259 */ 260 /* ARGSUSED */ 261 int 262 lfs_fsync(void *v) 263 { 264 struct vop_fsync_args /* { 265 struct vnode *a_vp; 266 kauth_cred_t a_cred; 267 int a_flags; 268 off_t offlo; 269 off_t offhi; 270 } */ *ap = v; 271 struct vnode *vp = ap->a_vp; 272 int error, wait; 273 struct inode *ip = VTOI(vp); 274 struct lfs *fs = ip->i_lfs; 275 276 /* If we're mounted read-only, don't try to sync. */ 277 if (fs->lfs_ronly) 278 return 0; 279 280 /* 281 * Trickle sync simply adds this vnode to the pager list, as if 282 * the pagedaemon had requested a pageout. 283 */ 284 if (ap->a_flags & FSYNC_LAZY) { 285 if (lfs_ignore_lazy_sync == 0) { 286 mutex_enter(&lfs_lock); 287 if (!(ip->i_flags & IN_PAGING)) { 288 ip->i_flags |= IN_PAGING; 289 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 290 i_lfs_pchain); 291 } 292 wakeup(&lfs_writer_daemon); 293 mutex_exit(&lfs_lock); 294 } 295 return 0; 296 } 297 298 /* 299 * If a vnode is bring cleaned, flush it out before we try to 300 * reuse it. This prevents the cleaner from writing files twice 301 * in the same partial segment, causing an accounting underflow. 302 */ 303 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 304 lfs_vflush(vp); 305 } 306 307 wait = (ap->a_flags & FSYNC_WAIT); 308 do { 309 mutex_enter(&vp->v_interlock); 310 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 311 round_page(ap->a_offhi), 312 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 313 if (error == EAGAIN) { 314 mutex_enter(&lfs_lock); 315 mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", 316 hz / 100 + 1, &lfs_lock); 317 mutex_exit(&lfs_lock); 318 } 319 } while (error == EAGAIN); 320 if (error) 321 return error; 322 323 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 324 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 325 326 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 327 int l = 0; 328 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 329 curlwp->l_cred); 330 } 331 if (wait && !VPISEMPTY(vp)) 332 LFS_SET_UINO(ip, IN_MODIFIED); 333 334 return error; 335 } 336 337 /* 338 * Take IN_ADIROP off, then call ufs_inactive. 339 */ 340 int 341 lfs_inactive(void *v) 342 { 343 struct vop_inactive_args /* { 344 struct vnode *a_vp; 345 } */ *ap = v; 346 347 lfs_unmark_vnode(ap->a_vp); 348 349 /* 350 * The Ifile is only ever inactivated on unmount. 351 * Streamline this process by not giving it more dirty blocks. 352 */ 353 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 354 mutex_enter(&lfs_lock); 355 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 356 mutex_exit(&lfs_lock); 357 VOP_UNLOCK(ap->a_vp, 0); 358 return 0; 359 } 360 361 return ufs_inactive(v); 362 } 363 364 /* 365 * These macros are used to bracket UFS directory ops, so that we can 366 * identify all the pages touched during directory ops which need to 367 * be ordered and flushed atomically, so that they may be recovered. 368 * 369 * Because we have to mark nodes VU_DIROP in order to prevent 370 * the cache from reclaiming them while a dirop is in progress, we must 371 * also manage the number of nodes so marked (otherwise we can run out). 372 * We do this by setting lfs_dirvcount to the number of marked vnodes; it 373 * is decremented during segment write, when VU_DIROP is taken off. 374 */ 375 #define MARK_VNODE(vp) lfs_mark_vnode(vp) 376 #define UNMARK_VNODE(vp) lfs_unmark_vnode(vp) 377 #define SET_DIROP_CREATE(dvp, vpp) lfs_set_dirop_create((dvp), (vpp)) 378 #define SET_DIROP_REMOVE(dvp, vp) lfs_set_dirop((dvp), (vp)) 379 static int lfs_set_dirop_create(struct vnode *, struct vnode **); 380 static int lfs_set_dirop(struct vnode *, struct vnode *); 381 382 static int 383 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 384 { 385 struct lfs *fs; 386 int error; 387 388 KASSERT(VOP_ISLOCKED(dvp)); 389 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 390 391 fs = VTOI(dvp)->i_lfs; 392 393 ASSERT_NO_SEGLOCK(fs); 394 /* 395 * LFS_NRESERVE calculates direct and indirect blocks as well 396 * as an inode block; an overestimate in most cases. 397 */ 398 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 399 return (error); 400 401 restart: 402 mutex_enter(&lfs_lock); 403 if (fs->lfs_dirops == 0) { 404 mutex_exit(&lfs_lock); 405 lfs_check(dvp, LFS_UNUSED_LBN, 0); 406 mutex_enter(&lfs_lock); 407 } 408 while (fs->lfs_writer) { 409 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 410 "lfs_sdirop", 0, &lfs_lock); 411 if (error == EINTR) { 412 mutex_exit(&lfs_lock); 413 goto unreserve; 414 } 415 } 416 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 417 wakeup(&lfs_writer_daemon); 418 mutex_exit(&lfs_lock); 419 preempt(); 420 goto restart; 421 } 422 423 if (lfs_dirvcount > LFS_MAX_DIROP) { 424 mutex_exit(&lfs_lock); 425 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 426 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 427 if ((error = mtsleep(&lfs_dirvcount, 428 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 429 &lfs_lock)) != 0) { 430 goto unreserve; 431 } 432 goto restart; 433 } 434 435 ++fs->lfs_dirops; 436 fs->lfs_doifile = 1; 437 mutex_exit(&lfs_lock); 438 439 /* Hold a reference so SET_ENDOP will be happy */ 440 vref(dvp); 441 if (vp) { 442 vref(vp); 443 MARK_VNODE(vp); 444 } 445 446 MARK_VNODE(dvp); 447 return 0; 448 449 unreserve: 450 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 451 return error; 452 } 453 454 /* 455 * Get a new vnode *before* adjusting the dirop count, to avoid a deadlock 456 * in getnewvnode(), if we have a stacked filesystem mounted on top 457 * of us. 458 * 459 * NB: this means we have to clear the new vnodes on error. Fortunately 460 * SET_ENDOP is there to do that for us. 461 */ 462 static int 463 lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) 464 { 465 int error; 466 struct lfs *fs; 467 468 fs = VFSTOUFS(dvp->v_mount)->um_lfs; 469 ASSERT_NO_SEGLOCK(fs); 470 if (fs->lfs_ronly) 471 return EROFS; 472 if (vpp && (error = getnewvnode(VT_LFS, dvp->v_mount, lfs_vnodeop_p, vpp))) { 473 DLOG((DLOG_ALLOC, "lfs_set_dirop_create: dvp %p error %d\n", 474 dvp, error)); 475 return error; 476 } 477 if ((error = lfs_set_dirop(dvp, NULL)) != 0) { 478 if (vpp) { 479 ungetnewvnode(*vpp); 480 *vpp = NULL; 481 } 482 return error; 483 } 484 return 0; 485 } 486 487 #define SET_ENDOP_BASE(fs, dvp, str) \ 488 do { \ 489 mutex_enter(&lfs_lock); \ 490 --(fs)->lfs_dirops; \ 491 if (!(fs)->lfs_dirops) { \ 492 if ((fs)->lfs_nadirop) { \ 493 panic("SET_ENDOP: %s: no dirops but " \ 494 " nadirop=%d", (str), \ 495 (fs)->lfs_nadirop); \ 496 } \ 497 wakeup(&(fs)->lfs_writer); \ 498 mutex_exit(&lfs_lock); \ 499 lfs_check((dvp), LFS_UNUSED_LBN, 0); \ 500 } else \ 501 mutex_exit(&lfs_lock); \ 502 } while(0) 503 #define SET_ENDOP_CREATE(fs, dvp, nvpp, str) \ 504 do { \ 505 UNMARK_VNODE(dvp); \ 506 if (nvpp && *nvpp) \ 507 UNMARK_VNODE(*nvpp); \ 508 /* Check for error return to stem vnode leakage */ \ 509 if (nvpp && *nvpp && !((*nvpp)->v_uflag & VU_DIROP)) \ 510 ungetnewvnode(*(nvpp)); \ 511 SET_ENDOP_BASE((fs), (dvp), (str)); \ 512 lfs_reserve((fs), (dvp), NULL, -LFS_NRESERVE(fs)); \ 513 vrele(dvp); \ 514 } while(0) 515 #define SET_ENDOP_CREATE_AP(ap, str) \ 516 SET_ENDOP_CREATE(VTOI((ap)->a_dvp)->i_lfs, (ap)->a_dvp, \ 517 (ap)->a_vpp, (str)) 518 #define SET_ENDOP_REMOVE(fs, dvp, ovp, str) \ 519 do { \ 520 UNMARK_VNODE(dvp); \ 521 if (ovp) \ 522 UNMARK_VNODE(ovp); \ 523 SET_ENDOP_BASE((fs), (dvp), (str)); \ 524 lfs_reserve((fs), (dvp), (ovp), -LFS_NRESERVE(fs)); \ 525 vrele(dvp); \ 526 if (ovp) \ 527 vrele(ovp); \ 528 } while(0) 529 530 void 531 lfs_mark_vnode(struct vnode *vp) 532 { 533 struct inode *ip = VTOI(vp); 534 struct lfs *fs = ip->i_lfs; 535 536 mutex_enter(&lfs_lock); 537 if (!(ip->i_flag & IN_ADIROP)) { 538 if (!(vp->v_uflag & VU_DIROP)) { 539 mutex_enter(&vp->v_interlock); 540 (void)lfs_vref(vp); 541 ++lfs_dirvcount; 542 ++fs->lfs_dirvcount; 543 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 544 vp->v_uflag |= VU_DIROP; 545 } 546 ++fs->lfs_nadirop; 547 ip->i_flag |= IN_ADIROP; 548 } else 549 KASSERT(vp->v_uflag & VU_DIROP); 550 mutex_exit(&lfs_lock); 551 } 552 553 void 554 lfs_unmark_vnode(struct vnode *vp) 555 { 556 struct inode *ip = VTOI(vp); 557 558 if (ip && (ip->i_flag & IN_ADIROP)) { 559 KASSERT(vp->v_uflag & VU_DIROP); 560 mutex_enter(&lfs_lock); 561 --ip->i_lfs->lfs_nadirop; 562 mutex_exit(&lfs_lock); 563 ip->i_flag &= ~IN_ADIROP; 564 } 565 } 566 567 int 568 lfs_symlink(void *v) 569 { 570 struct vop_symlink_args /* { 571 struct vnode *a_dvp; 572 struct vnode **a_vpp; 573 struct componentname *a_cnp; 574 struct vattr *a_vap; 575 char *a_target; 576 } */ *ap = v; 577 int error; 578 579 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 580 vput(ap->a_dvp); 581 return error; 582 } 583 error = ufs_symlink(ap); 584 SET_ENDOP_CREATE_AP(ap, "symlink"); 585 return (error); 586 } 587 588 int 589 lfs_mknod(void *v) 590 { 591 struct vop_mknod_args /* { 592 struct vnode *a_dvp; 593 struct vnode **a_vpp; 594 struct componentname *a_cnp; 595 struct vattr *a_vap; 596 } */ *ap = v; 597 struct vattr *vap = ap->a_vap; 598 struct vnode **vpp = ap->a_vpp; 599 struct inode *ip; 600 int error; 601 struct mount *mp; 602 ino_t ino; 603 604 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 605 vput(ap->a_dvp); 606 return error; 607 } 608 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 609 ap->a_dvp, vpp, ap->a_cnp); 610 611 /* Either way we're done with the dirop at this point */ 612 SET_ENDOP_CREATE_AP(ap, "mknod"); 613 614 if (error) 615 return (error); 616 617 ip = VTOI(*vpp); 618 mp = (*vpp)->v_mount; 619 ino = ip->i_number; 620 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 621 if (vap->va_rdev != VNOVAL) { 622 /* 623 * Want to be able to use this to make badblock 624 * inodes, so don't truncate the dev number. 625 */ 626 #if 0 627 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 628 UFS_MPNEEDSWAP((*vpp)->v_mount)); 629 #else 630 ip->i_ffs1_rdev = vap->va_rdev; 631 #endif 632 } 633 634 /* 635 * Call fsync to write the vnode so that we don't have to deal with 636 * flushing it when it's marked VU_DIROP|VI_XLOCK. 637 * 638 * XXX KS - If we can't flush we also can't call vgone(), so must 639 * return. But, that leaves this vnode in limbo, also not good. 640 * Can this ever happen (barring hardware failure)? 641 */ 642 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 643 panic("lfs_mknod: couldn't fsync (ino %llu)", 644 (unsigned long long)ino); 645 /* return (error); */ 646 } 647 /* 648 * Remove vnode so that it will be reloaded by VFS_VGET and 649 * checked to see if it is an alias of an existing entry in 650 * the inode cache. 651 */ 652 /* Used to be vput, but that causes us to call VOP_INACTIVE twice. */ 653 654 VOP_UNLOCK(*vpp, 0); 655 (*vpp)->v_type = VNON; 656 vgone(*vpp); 657 error = VFS_VGET(mp, ino, vpp); 658 659 if (error != 0) { 660 *vpp = NULL; 661 return (error); 662 } 663 return (0); 664 } 665 666 int 667 lfs_create(void *v) 668 { 669 struct vop_create_args /* { 670 struct vnode *a_dvp; 671 struct vnode **a_vpp; 672 struct componentname *a_cnp; 673 struct vattr *a_vap; 674 } */ *ap = v; 675 int error; 676 677 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 678 vput(ap->a_dvp); 679 return error; 680 } 681 error = ufs_create(ap); 682 SET_ENDOP_CREATE_AP(ap, "create"); 683 return (error); 684 } 685 686 int 687 lfs_mkdir(void *v) 688 { 689 struct vop_mkdir_args /* { 690 struct vnode *a_dvp; 691 struct vnode **a_vpp; 692 struct componentname *a_cnp; 693 struct vattr *a_vap; 694 } */ *ap = v; 695 int error; 696 697 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 698 vput(ap->a_dvp); 699 return error; 700 } 701 error = ufs_mkdir(ap); 702 SET_ENDOP_CREATE_AP(ap, "mkdir"); 703 return (error); 704 } 705 706 int 707 lfs_remove(void *v) 708 { 709 struct vop_remove_args /* { 710 struct vnode *a_dvp; 711 struct vnode *a_vp; 712 struct componentname *a_cnp; 713 } */ *ap = v; 714 struct vnode *dvp, *vp; 715 struct inode *ip; 716 int error; 717 718 dvp = ap->a_dvp; 719 vp = ap->a_vp; 720 ip = VTOI(vp); 721 if ((error = SET_DIROP_REMOVE(dvp, vp)) != 0) { 722 if (dvp == vp) 723 vrele(vp); 724 else 725 vput(vp); 726 vput(dvp); 727 return error; 728 } 729 error = ufs_remove(ap); 730 if (ip->i_nlink == 0) 731 lfs_orphan(ip->i_lfs, ip->i_number); 732 SET_ENDOP_REMOVE(ip->i_lfs, dvp, ap->a_vp, "remove"); 733 return (error); 734 } 735 736 int 737 lfs_rmdir(void *v) 738 { 739 struct vop_rmdir_args /* { 740 struct vnodeop_desc *a_desc; 741 struct vnode *a_dvp; 742 struct vnode *a_vp; 743 struct componentname *a_cnp; 744 } */ *ap = v; 745 struct vnode *vp; 746 struct inode *ip; 747 int error; 748 749 vp = ap->a_vp; 750 ip = VTOI(vp); 751 if ((error = SET_DIROP_REMOVE(ap->a_dvp, ap->a_vp)) != 0) { 752 if (ap->a_dvp == vp) 753 vrele(ap->a_dvp); 754 else 755 vput(ap->a_dvp); 756 vput(vp); 757 return error; 758 } 759 error = ufs_rmdir(ap); 760 if (ip->i_nlink == 0) 761 lfs_orphan(ip->i_lfs, ip->i_number); 762 SET_ENDOP_REMOVE(ip->i_lfs, ap->a_dvp, ap->a_vp, "rmdir"); 763 return (error); 764 } 765 766 int 767 lfs_link(void *v) 768 { 769 struct vop_link_args /* { 770 struct vnode *a_dvp; 771 struct vnode *a_vp; 772 struct componentname *a_cnp; 773 } */ *ap = v; 774 int error; 775 struct vnode **vpp = NULL; 776 777 if ((error = SET_DIROP_CREATE(ap->a_dvp, vpp)) != 0) { 778 vput(ap->a_dvp); 779 return error; 780 } 781 error = ufs_link(ap); 782 SET_ENDOP_CREATE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vpp, "link"); 783 return (error); 784 } 785 786 int 787 lfs_rename(void *v) 788 { 789 struct vop_rename_args /* { 790 struct vnode *a_fdvp; 791 struct vnode *a_fvp; 792 struct componentname *a_fcnp; 793 struct vnode *a_tdvp; 794 struct vnode *a_tvp; 795 struct componentname *a_tcnp; 796 } */ *ap = v; 797 struct vnode *tvp, *fvp, *tdvp, *fdvp; 798 struct componentname *tcnp, *fcnp; 799 int error; 800 struct lfs *fs; 801 802 fs = VTOI(ap->a_fdvp)->i_lfs; 803 tvp = ap->a_tvp; 804 tdvp = ap->a_tdvp; 805 tcnp = ap->a_tcnp; 806 fvp = ap->a_fvp; 807 fdvp = ap->a_fdvp; 808 fcnp = ap->a_fcnp; 809 810 /* 811 * Check for cross-device rename. 812 * If it is, we don't want to set dirops, just error out. 813 * (In particular note that MARK_VNODE(tdvp) will DTWT on 814 * a cross-device rename.) 815 * 816 * Copied from ufs_rename. 817 */ 818 if ((fvp->v_mount != tdvp->v_mount) || 819 (tvp && (fvp->v_mount != tvp->v_mount))) { 820 error = EXDEV; 821 goto errout; 822 } 823 824 /* 825 * Check to make sure we're not renaming a vnode onto itself 826 * (deleting a hard link by renaming one name onto another); 827 * if we are we can't recursively call VOP_REMOVE since that 828 * would leave us with an unaccounted-for number of live dirops. 829 * 830 * Inline the relevant section of ufs_rename here, *before* 831 * calling SET_DIROP_REMOVE. 832 */ 833 if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || 834 (VTOI(tdvp)->i_flags & APPEND))) { 835 error = EPERM; 836 goto errout; 837 } 838 if (fvp == tvp) { 839 if (fvp->v_type == VDIR) { 840 error = EINVAL; 841 goto errout; 842 } 843 844 /* Release destination completely. */ 845 VOP_ABORTOP(tdvp, tcnp); 846 vput(tdvp); 847 vput(tvp); 848 849 /* Delete source. */ 850 vrele(fvp); 851 fcnp->cn_flags &= ~(MODMASK | SAVESTART); 852 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 853 fcnp->cn_nameiop = DELETE; 854 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); 855 if ((error = relookup(fdvp, &fvp, fcnp))) { 856 vput(fdvp); 857 return (error); 858 } 859 return (VOP_REMOVE(fdvp, fvp, fcnp)); 860 } 861 862 if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0) 863 goto errout; 864 MARK_VNODE(fdvp); 865 MARK_VNODE(fvp); 866 867 error = ufs_rename(ap); 868 UNMARK_VNODE(fdvp); 869 UNMARK_VNODE(fvp); 870 SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename"); 871 return (error); 872 873 errout: 874 VOP_ABORTOP(tdvp, ap->a_tcnp); /* XXX, why not in NFS? */ 875 if (tdvp == tvp) 876 vrele(tdvp); 877 else 878 vput(tdvp); 879 if (tvp) 880 vput(tvp); 881 VOP_ABORTOP(fdvp, ap->a_fcnp); /* XXX, why not in NFS? */ 882 vrele(fdvp); 883 vrele(fvp); 884 return (error); 885 } 886 887 /* XXX hack to avoid calling ITIMES in getattr */ 888 int 889 lfs_getattr(void *v) 890 { 891 struct vop_getattr_args /* { 892 struct vnode *a_vp; 893 struct vattr *a_vap; 894 kauth_cred_t a_cred; 895 } */ *ap = v; 896 struct vnode *vp = ap->a_vp; 897 struct inode *ip = VTOI(vp); 898 struct vattr *vap = ap->a_vap; 899 struct lfs *fs = ip->i_lfs; 900 /* 901 * Copy from inode table 902 */ 903 vap->va_fsid = ip->i_dev; 904 vap->va_fileid = ip->i_number; 905 vap->va_mode = ip->i_mode & ~IFMT; 906 vap->va_nlink = ip->i_nlink; 907 vap->va_uid = ip->i_uid; 908 vap->va_gid = ip->i_gid; 909 vap->va_rdev = (dev_t)ip->i_ffs1_rdev; 910 vap->va_size = vp->v_size; 911 vap->va_atime.tv_sec = ip->i_ffs1_atime; 912 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 913 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 914 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 915 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 916 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 917 vap->va_flags = ip->i_flags; 918 vap->va_gen = ip->i_gen; 919 /* this doesn't belong here */ 920 if (vp->v_type == VBLK) 921 vap->va_blocksize = BLKDEV_IOSIZE; 922 else if (vp->v_type == VCHR) 923 vap->va_blocksize = MAXBSIZE; 924 else 925 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 926 vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); 927 vap->va_type = vp->v_type; 928 vap->va_filerev = ip->i_modrev; 929 return (0); 930 } 931 932 /* 933 * Check to make sure the inode blocks won't choke the buffer 934 * cache, then call ufs_setattr as usual. 935 */ 936 int 937 lfs_setattr(void *v) 938 { 939 struct vop_setattr_args /* { 940 struct vnode *a_vp; 941 struct vattr *a_vap; 942 kauth_cred_t a_cred; 943 } */ *ap = v; 944 struct vnode *vp = ap->a_vp; 945 946 lfs_check(vp, LFS_UNUSED_LBN, 0); 947 return ufs_setattr(v); 948 } 949 950 /* 951 * Release the block we hold on lfs_newseg wrapping. Called on file close, 952 * or explicitly from LFCNWRAPGO. Called with the interlock held. 953 */ 954 static int 955 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 956 { 957 if (fs->lfs_stoplwp != curlwp) 958 return EBUSY; 959 960 fs->lfs_stoplwp = NULL; 961 cv_signal(&fs->lfs_stopcv); 962 963 KASSERT(fs->lfs_nowrap > 0); 964 if (fs->lfs_nowrap <= 0) { 965 return 0; 966 } 967 968 if (--fs->lfs_nowrap == 0) { 969 log(LOG_NOTICE, "%s: re-enabled log wrap\n", fs->lfs_fsmnt); 970 wakeup(&fs->lfs_wrappass); 971 lfs_wakeup_cleaner(fs); 972 } 973 if (waitfor) { 974 mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment", 975 0, &lfs_lock); 976 } 977 978 return 0; 979 } 980 981 /* 982 * Close called 983 */ 984 /* ARGSUSED */ 985 int 986 lfs_close(void *v) 987 { 988 struct vop_close_args /* { 989 struct vnode *a_vp; 990 int a_fflag; 991 kauth_cred_t a_cred; 992 } */ *ap = v; 993 struct vnode *vp = ap->a_vp; 994 struct inode *ip = VTOI(vp); 995 struct lfs *fs = ip->i_lfs; 996 997 if ((ip->i_number == ROOTINO || ip->i_number == LFS_IFILE_INUM) && 998 fs->lfs_stoplwp == curlwp) { 999 mutex_enter(&lfs_lock); 1000 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1001 lfs_wrapgo(fs, ip, 0); 1002 mutex_exit(&lfs_lock); 1003 } 1004 1005 if (vp == ip->i_lfs->lfs_ivnode && 1006 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1007 return 0; 1008 1009 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 1010 LFS_ITIMES(ip, NULL, NULL, NULL); 1011 } 1012 return (0); 1013 } 1014 1015 /* 1016 * Close wrapper for special devices. 1017 * 1018 * Update the times on the inode then do device close. 1019 */ 1020 int 1021 lfsspec_close(void *v) 1022 { 1023 struct vop_close_args /* { 1024 struct vnode *a_vp; 1025 int a_fflag; 1026 kauth_cred_t a_cred; 1027 } */ *ap = v; 1028 struct vnode *vp; 1029 struct inode *ip; 1030 1031 vp = ap->a_vp; 1032 ip = VTOI(vp); 1033 if (vp->v_usecount > 1) { 1034 LFS_ITIMES(ip, NULL, NULL, NULL); 1035 } 1036 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1037 } 1038 1039 /* 1040 * Close wrapper for fifo's. 1041 * 1042 * Update the times on the inode then do device close. 1043 */ 1044 int 1045 lfsfifo_close(void *v) 1046 { 1047 struct vop_close_args /* { 1048 struct vnode *a_vp; 1049 int a_fflag; 1050 kauth_cred_ a_cred; 1051 } */ *ap = v; 1052 struct vnode *vp; 1053 struct inode *ip; 1054 1055 vp = ap->a_vp; 1056 ip = VTOI(vp); 1057 if (ap->a_vp->v_usecount > 1) { 1058 LFS_ITIMES(ip, NULL, NULL, NULL); 1059 } 1060 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1061 } 1062 1063 /* 1064 * Reclaim an inode so that it can be used for other purposes. 1065 */ 1066 1067 int 1068 lfs_reclaim(void *v) 1069 { 1070 struct vop_reclaim_args /* { 1071 struct vnode *a_vp; 1072 } */ *ap = v; 1073 struct vnode *vp = ap->a_vp; 1074 struct inode *ip = VTOI(vp); 1075 struct lfs *fs = ip->i_lfs; 1076 int error; 1077 1078 mutex_enter(&lfs_lock); 1079 LFS_CLR_UINO(ip, IN_ALLMOD); 1080 mutex_exit(&lfs_lock); 1081 if ((error = ufs_reclaim(vp))) 1082 return (error); 1083 1084 /* 1085 * Take us off the paging and/or dirop queues if we were on them. 1086 * We shouldn't be on them. 1087 */ 1088 mutex_enter(&lfs_lock); 1089 if (ip->i_flags & IN_PAGING) { 1090 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1091 fs->lfs_fsmnt); 1092 ip->i_flags &= ~IN_PAGING; 1093 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1094 } 1095 if (vp->v_uflag & VU_DIROP) { 1096 panic("reclaimed vnode is VU_DIROP"); 1097 vp->v_uflag &= ~VU_DIROP; 1098 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1099 } 1100 mutex_exit(&lfs_lock); 1101 1102 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); 1103 lfs_deregister_all(vp); 1104 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1105 ip->inode_ext.lfs = NULL; 1106 genfs_node_destroy(vp); 1107 pool_put(&lfs_inode_pool, vp->v_data); 1108 vp->v_data = NULL; 1109 return (0); 1110 } 1111 1112 /* 1113 * Read a block from a storage device. 1114 * In order to avoid reading blocks that are in the process of being 1115 * written by the cleaner---and hence are not mutexed by the normal 1116 * buffer cache / page cache mechanisms---check for collisions before 1117 * reading. 1118 * 1119 * We inline ufs_strategy to make sure that the VOP_BMAP occurs *before* 1120 * the active cleaner test. 1121 * 1122 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1123 */ 1124 int 1125 lfs_strategy(void *v) 1126 { 1127 struct vop_strategy_args /* { 1128 struct vnode *a_vp; 1129 struct buf *a_bp; 1130 } */ *ap = v; 1131 struct buf *bp; 1132 struct lfs *fs; 1133 struct vnode *vp; 1134 struct inode *ip; 1135 daddr_t tbn; 1136 int i, sn, error, slept; 1137 1138 bp = ap->a_bp; 1139 vp = ap->a_vp; 1140 ip = VTOI(vp); 1141 fs = ip->i_lfs; 1142 1143 /* lfs uses its strategy routine only for read */ 1144 KASSERT(bp->b_flags & B_READ); 1145 1146 if (vp->v_type == VBLK || vp->v_type == VCHR) 1147 panic("lfs_strategy: spec"); 1148 KASSERT(bp->b_bcount != 0); 1149 if (bp->b_blkno == bp->b_lblkno) { 1150 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1151 NULL); 1152 if (error) { 1153 bp->b_error = error; 1154 bp->b_resid = bp->b_bcount; 1155 biodone(bp); 1156 return (error); 1157 } 1158 if ((long)bp->b_blkno == -1) /* no valid data */ 1159 clrbuf(bp); 1160 } 1161 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1162 bp->b_resid = bp->b_bcount; 1163 biodone(bp); 1164 return (0); 1165 } 1166 1167 slept = 1; 1168 mutex_enter(&lfs_lock); 1169 while (slept && fs->lfs_seglock) { 1170 mutex_exit(&lfs_lock); 1171 /* 1172 * Look through list of intervals. 1173 * There will only be intervals to look through 1174 * if the cleaner holds the seglock. 1175 * Since the cleaner is synchronous, we can trust 1176 * the list of intervals to be current. 1177 */ 1178 tbn = dbtofsb(fs, bp->b_blkno); 1179 sn = dtosn(fs, tbn); 1180 slept = 0; 1181 for (i = 0; i < fs->lfs_cleanind; i++) { 1182 if (sn == dtosn(fs, fs->lfs_cleanint[i]) && 1183 tbn >= fs->lfs_cleanint[i]) { 1184 DLOG((DLOG_CLEAN, 1185 "lfs_strategy: ino %d lbn %" PRId64 1186 " ind %d sn %d fsb %" PRIx32 1187 " given sn %d fsb %" PRIx64 "\n", 1188 ip->i_number, bp->b_lblkno, i, 1189 dtosn(fs, fs->lfs_cleanint[i]), 1190 fs->lfs_cleanint[i], sn, tbn)); 1191 DLOG((DLOG_CLEAN, 1192 "lfs_strategy: sleeping on ino %d lbn %" 1193 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1194 mutex_enter(&lfs_lock); 1195 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1196 /* Cleaner can't wait for itself */ 1197 mtsleep(&fs->lfs_iocount, 1198 (PRIBIO + 1) | PNORELOCK, 1199 "clean2", 0, 1200 &lfs_lock); 1201 slept = 1; 1202 break; 1203 } else if (fs->lfs_seglock) { 1204 mtsleep(&fs->lfs_seglock, 1205 (PRIBIO + 1) | PNORELOCK, 1206 "clean1", 0, 1207 &lfs_lock); 1208 slept = 1; 1209 break; 1210 } 1211 mutex_exit(&lfs_lock); 1212 } 1213 } 1214 mutex_enter(&lfs_lock); 1215 } 1216 mutex_exit(&lfs_lock); 1217 1218 vp = ip->i_devvp; 1219 VOP_STRATEGY(vp, bp); 1220 return (0); 1221 } 1222 1223 void 1224 lfs_flush_dirops(struct lfs *fs) 1225 { 1226 struct inode *ip, *nip; 1227 struct vnode *vp; 1228 extern int lfs_dostats; 1229 struct segment *sp; 1230 int waslocked; 1231 1232 ASSERT_MAYBE_SEGLOCK(fs); 1233 KASSERT(fs->lfs_nadirop == 0); 1234 1235 if (fs->lfs_ronly) 1236 return; 1237 1238 mutex_enter(&lfs_lock); 1239 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1240 mutex_exit(&lfs_lock); 1241 return; 1242 } else 1243 mutex_exit(&lfs_lock); 1244 1245 if (lfs_dostats) 1246 ++lfs_stats.flush_invoked; 1247 1248 /* 1249 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1250 * Technically this is a checkpoint (the on-disk state is valid) 1251 * even though we are leaving out all the file data. 1252 */ 1253 lfs_imtime(fs); 1254 lfs_seglock(fs, SEGM_CKP); 1255 sp = fs->lfs_sp; 1256 1257 /* 1258 * lfs_writevnodes, optimized to get dirops out of the way. 1259 * Only write dirops, and don't flush files' pages, only 1260 * blocks from the directories. 1261 * 1262 * We don't need to vref these files because they are 1263 * dirops and so hold an extra reference until the 1264 * segunlock clears them of that status. 1265 * 1266 * We don't need to check for IN_ADIROP because we know that 1267 * no dirops are active. 1268 * 1269 */ 1270 mutex_enter(&lfs_lock); 1271 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1272 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1273 mutex_exit(&lfs_lock); 1274 vp = ITOV(ip); 1275 1276 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1277 1278 /* 1279 * All writes to directories come from dirops; all 1280 * writes to files' direct blocks go through the page 1281 * cache, which we're not touching. Reads to files 1282 * and/or directories will not be affected by writing 1283 * directory blocks inodes and file inodes. So we don't 1284 * really need to lock. If we don't lock, though, 1285 * make sure that we don't clear IN_MODIFIED 1286 * unnecessarily. 1287 */ 1288 if (vp->v_iflag & VI_XLOCK) { 1289 mutex_enter(&lfs_lock); 1290 continue; 1291 } 1292 waslocked = VOP_ISLOCKED(vp); 1293 if (vp->v_type != VREG && 1294 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1295 lfs_writefile(fs, sp, vp); 1296 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1297 !(ip->i_flag & IN_ALLMOD)) { 1298 mutex_enter(&lfs_lock); 1299 LFS_SET_UINO(ip, IN_MODIFIED); 1300 mutex_exit(&lfs_lock); 1301 } 1302 } 1303 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1304 (void) lfs_writeinode(fs, sp, ip); 1305 mutex_enter(&lfs_lock); 1306 if (waslocked == LK_EXCLOTHER) 1307 LFS_SET_UINO(ip, IN_MODIFIED); 1308 } 1309 mutex_exit(&lfs_lock); 1310 /* We've written all the dirops there are */ 1311 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 1312 lfs_finalize_fs_seguse(fs); 1313 (void) lfs_writeseg(fs, sp); 1314 lfs_segunlock(fs); 1315 } 1316 1317 /* 1318 * Flush all vnodes for which the pagedaemon has requested pageouts. 1319 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1320 * has just run, this would be an error). If we have to skip a vnode 1321 * for any reason, just skip it; if we have to wait for the cleaner, 1322 * abort. The writer daemon will call us again later. 1323 */ 1324 void 1325 lfs_flush_pchain(struct lfs *fs) 1326 { 1327 struct inode *ip, *nip; 1328 struct vnode *vp; 1329 extern int lfs_dostats; 1330 struct segment *sp; 1331 int error; 1332 1333 ASSERT_NO_SEGLOCK(fs); 1334 1335 if (fs->lfs_ronly) 1336 return; 1337 1338 mutex_enter(&lfs_lock); 1339 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1340 mutex_exit(&lfs_lock); 1341 return; 1342 } else 1343 mutex_exit(&lfs_lock); 1344 1345 /* Get dirops out of the way */ 1346 lfs_flush_dirops(fs); 1347 1348 if (lfs_dostats) 1349 ++lfs_stats.flush_invoked; 1350 1351 /* 1352 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1353 */ 1354 lfs_imtime(fs); 1355 lfs_seglock(fs, 0); 1356 sp = fs->lfs_sp; 1357 1358 /* 1359 * lfs_writevnodes, optimized to clear pageout requests. 1360 * Only write non-dirop files that are in the pageout queue. 1361 * We're very conservative about what we write; we want to be 1362 * fast and async. 1363 */ 1364 mutex_enter(&lfs_lock); 1365 top: 1366 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1367 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1368 vp = ITOV(ip); 1369 1370 if (!(ip->i_flags & IN_PAGING)) 1371 goto top; 1372 1373 mutex_enter(&vp->v_interlock); 1374 if ((vp->v_iflag & VI_XLOCK) || (vp->v_uflag & VU_DIROP) != 0) { 1375 mutex_exit(&vp->v_interlock); 1376 continue; 1377 } 1378 if (vp->v_type != VREG) { 1379 mutex_exit(&vp->v_interlock); 1380 continue; 1381 } 1382 if (lfs_vref(vp)) 1383 continue; 1384 mutex_exit(&lfs_lock); 1385 1386 if (VOP_ISLOCKED(vp)) { 1387 lfs_vunref(vp); 1388 mutex_enter(&lfs_lock); 1389 continue; 1390 } 1391 1392 error = lfs_writefile(fs, sp, vp); 1393 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1394 !(ip->i_flag & IN_ALLMOD)) { 1395 mutex_enter(&lfs_lock); 1396 LFS_SET_UINO(ip, IN_MODIFIED); 1397 mutex_exit(&lfs_lock); 1398 } 1399 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1400 (void) lfs_writeinode(fs, sp, ip); 1401 1402 lfs_vunref(vp); 1403 1404 if (error == EAGAIN) { 1405 lfs_writeseg(fs, sp); 1406 mutex_enter(&lfs_lock); 1407 break; 1408 } 1409 mutex_enter(&lfs_lock); 1410 } 1411 mutex_exit(&lfs_lock); 1412 (void) lfs_writeseg(fs, sp); 1413 lfs_segunlock(fs); 1414 } 1415 1416 /* 1417 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1418 */ 1419 int 1420 lfs_fcntl(void *v) 1421 { 1422 struct vop_fcntl_args /* { 1423 struct vnode *a_vp; 1424 u_int a_command; 1425 void * a_data; 1426 int a_fflag; 1427 kauth_cred_t a_cred; 1428 } */ *ap = v; 1429 struct timeval tv; 1430 struct timeval *tvp; 1431 BLOCK_INFO *blkiov; 1432 CLEANERINFO *cip; 1433 SEGUSE *sup; 1434 int blkcnt, error, oclean; 1435 size_t fh_size; 1436 struct lfs_fcntl_markv blkvp; 1437 struct lwp *l; 1438 fsid_t *fsidp; 1439 struct lfs *fs; 1440 struct buf *bp; 1441 fhandle_t *fhp; 1442 daddr_t off; 1443 1444 /* Only respect LFS fcntls on fs root or Ifile */ 1445 if (VTOI(ap->a_vp)->i_number != ROOTINO && 1446 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1447 return ufs_fcntl(v); 1448 } 1449 1450 /* Avoid locking a draining lock */ 1451 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1452 return ESHUTDOWN; 1453 } 1454 1455 /* LFS control and monitoring fcntls are available only to root */ 1456 l = curlwp; 1457 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1458 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 1459 NULL)) != 0) 1460 return (error); 1461 1462 fs = VTOI(ap->a_vp)->i_lfs; 1463 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1464 1465 error = 0; 1466 switch ((int)ap->a_command) { 1467 case LFCNSEGWAITALL_COMPAT_50: 1468 case LFCNSEGWAITALL_COMPAT: 1469 fsidp = NULL; 1470 /* FALLSTHROUGH */ 1471 case LFCNSEGWAIT_COMPAT_50: 1472 case LFCNSEGWAIT_COMPAT: 1473 { 1474 struct timeval50 *tvp50 1475 = (struct timeval50 *)ap->a_data; 1476 timeval50_to_timeval(tvp50, &tv); 1477 tvp = &tv; 1478 } 1479 goto segwait_common; 1480 case LFCNSEGWAITALL: 1481 fsidp = NULL; 1482 /* FALLSTHROUGH */ 1483 case LFCNSEGWAIT: 1484 tvp = (struct timeval *)ap->a_data; 1485 segwait_common: 1486 mutex_enter(&lfs_lock); 1487 ++fs->lfs_sleepers; 1488 mutex_exit(&lfs_lock); 1489 1490 error = lfs_segwait(fsidp, tvp); 1491 1492 mutex_enter(&lfs_lock); 1493 if (--fs->lfs_sleepers == 0) 1494 wakeup(&fs->lfs_sleepers); 1495 mutex_exit(&lfs_lock); 1496 return error; 1497 1498 case LFCNBMAPV: 1499 case LFCNMARKV: 1500 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1501 1502 blkcnt = blkvp.blkcnt; 1503 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1504 return (EINVAL); 1505 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1506 if ((error = copyin(blkvp.blkiov, blkiov, 1507 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1508 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1509 return error; 1510 } 1511 1512 mutex_enter(&lfs_lock); 1513 ++fs->lfs_sleepers; 1514 mutex_exit(&lfs_lock); 1515 if (ap->a_command == LFCNBMAPV) 1516 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); 1517 else /* LFCNMARKV */ 1518 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); 1519 if (error == 0) 1520 error = copyout(blkiov, blkvp.blkiov, 1521 blkcnt * sizeof(BLOCK_INFO)); 1522 mutex_enter(&lfs_lock); 1523 if (--fs->lfs_sleepers == 0) 1524 wakeup(&fs->lfs_sleepers); 1525 mutex_exit(&lfs_lock); 1526 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1527 return error; 1528 1529 case LFCNRECLAIM: 1530 /* 1531 * Flush dirops and write Ifile, allowing empty segments 1532 * to be immediately reclaimed. 1533 */ 1534 lfs_writer_enter(fs, "pndirop"); 1535 off = fs->lfs_offset; 1536 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1537 lfs_flush_dirops(fs); 1538 LFS_CLEANERINFO(cip, fs, bp); 1539 oclean = cip->clean; 1540 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1541 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1542 fs->lfs_sp->seg_flags |= SEGM_PROT; 1543 lfs_segunlock(fs); 1544 lfs_writer_leave(fs); 1545 1546 #ifdef DEBUG 1547 LFS_CLEANERINFO(cip, fs, bp); 1548 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1549 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1550 fs->lfs_offset - off, cip->clean - oclean, 1551 fs->lfs_activesb)); 1552 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1553 #endif 1554 1555 return 0; 1556 1557 case LFCNIFILEFH_COMPAT: 1558 /* Return the filehandle of the Ifile */ 1559 if ((error = kauth_authorize_system(l->l_cred, 1560 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0) 1561 return (error); 1562 fhp = (struct fhandle *)ap->a_data; 1563 fhp->fh_fsid = *fsidp; 1564 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1565 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1566 1567 case LFCNIFILEFH_COMPAT2: 1568 case LFCNIFILEFH: 1569 /* Return the filehandle of the Ifile */ 1570 fhp = (struct fhandle *)ap->a_data; 1571 fhp->fh_fsid = *fsidp; 1572 fh_size = sizeof(struct lfs_fhandle) - 1573 offsetof(fhandle_t, fh_fid); 1574 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1575 1576 case LFCNREWIND: 1577 /* Move lfs_offset to the lowest-numbered segment */ 1578 return lfs_rewind(fs, *(int *)ap->a_data); 1579 1580 case LFCNINVAL: 1581 /* Mark a segment SEGUSE_INVAL */ 1582 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1583 if (sup->su_nbytes > 0) { 1584 brelse(bp, 0); 1585 lfs_unset_inval_all(fs); 1586 return EBUSY; 1587 } 1588 sup->su_flags |= SEGUSE_INVAL; 1589 VOP_BWRITE(bp); 1590 return 0; 1591 1592 case LFCNRESIZE: 1593 /* Resize the filesystem */ 1594 return lfs_resize_fs(fs, *(int *)ap->a_data); 1595 1596 case LFCNWRAPSTOP: 1597 case LFCNWRAPSTOP_COMPAT: 1598 /* 1599 * Hold lfs_newseg at segment 0; if requested, sleep until 1600 * the filesystem wraps around. To support external agents 1601 * (dump, fsck-based regression test) that need to look at 1602 * a snapshot of the filesystem, without necessarily 1603 * requiring that all fs activity stops. 1604 */ 1605 if (fs->lfs_stoplwp == curlwp) 1606 return EALREADY; 1607 1608 mutex_enter(&lfs_lock); 1609 while (fs->lfs_stoplwp != NULL) 1610 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1611 fs->lfs_stoplwp = curlwp; 1612 if (fs->lfs_nowrap == 0) 1613 log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); 1614 ++fs->lfs_nowrap; 1615 if (*(int *)ap->a_data == 1 1616 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 1617 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 1618 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1619 "segwrap", 0, &lfs_lock); 1620 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 1621 if (error) { 1622 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 1623 } 1624 } 1625 mutex_exit(&lfs_lock); 1626 return 0; 1627 1628 case LFCNWRAPGO: 1629 case LFCNWRAPGO_COMPAT: 1630 /* 1631 * Having done its work, the agent wakes up the writer. 1632 * If the argument is 1, it sleeps until a new segment 1633 * is selected. 1634 */ 1635 mutex_enter(&lfs_lock); 1636 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 1637 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 1638 *((int *)ap->a_data)); 1639 mutex_exit(&lfs_lock); 1640 return error; 1641 1642 case LFCNWRAPPASS: 1643 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 1644 return EALREADY; 1645 mutex_enter(&lfs_lock); 1646 if (fs->lfs_stoplwp != curlwp) { 1647 mutex_exit(&lfs_lock); 1648 return EALREADY; 1649 } 1650 if (fs->lfs_nowrap == 0) { 1651 mutex_exit(&lfs_lock); 1652 return EBUSY; 1653 } 1654 fs->lfs_wrappass = 1; 1655 wakeup(&fs->lfs_wrappass); 1656 /* Wait for the log to wrap, if asked */ 1657 if (*(int *)ap->a_data) { 1658 mutex_enter(&ap->a_vp->v_interlock); 1659 lfs_vref(ap->a_vp); 1660 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 1661 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 1662 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1663 "segwrap", 0, &lfs_lock); 1664 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 1665 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 1666 lfs_vunref(ap->a_vp); 1667 } 1668 mutex_exit(&lfs_lock); 1669 return error; 1670 1671 case LFCNWRAPSTATUS: 1672 mutex_enter(&lfs_lock); 1673 *(int *)ap->a_data = fs->lfs_wrapstatus; 1674 mutex_exit(&lfs_lock); 1675 return 0; 1676 1677 default: 1678 return ufs_fcntl(v); 1679 } 1680 return 0; 1681 } 1682 1683 int 1684 lfs_getpages(void *v) 1685 { 1686 struct vop_getpages_args /* { 1687 struct vnode *a_vp; 1688 voff_t a_offset; 1689 struct vm_page **a_m; 1690 int *a_count; 1691 int a_centeridx; 1692 vm_prot_t a_access_type; 1693 int a_advice; 1694 int a_flags; 1695 } */ *ap = v; 1696 1697 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 1698 (ap->a_access_type & VM_PROT_WRITE) != 0) { 1699 return EPERM; 1700 } 1701 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 1702 mutex_enter(&lfs_lock); 1703 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 1704 mutex_exit(&lfs_lock); 1705 } 1706 1707 /* 1708 * we're relying on the fact that genfs_getpages() always read in 1709 * entire filesystem blocks. 1710 */ 1711 return genfs_getpages(v); 1712 } 1713 1714 /* 1715 * Wait for a page to become unbusy, possibly printing diagnostic messages 1716 * as well. 1717 * 1718 * Called with vp->v_interlock held; return with it held. 1719 */ 1720 static void 1721 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 1722 { 1723 if ((pg->flags & PG_BUSY) == 0) 1724 return; /* Nothing to wait for! */ 1725 1726 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 1727 static struct vm_page *lastpg; 1728 1729 if (label != NULL && pg != lastpg) { 1730 if (pg->owner_tag) { 1731 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 1732 curproc->p_pid, curlwp->l_lid, label, 1733 pg, pg->owner, pg->lowner, pg->owner_tag); 1734 } else { 1735 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 1736 curproc->p_pid, curlwp->l_lid, label, pg); 1737 } 1738 } 1739 lastpg = pg; 1740 #endif 1741 1742 pg->flags |= PG_WANTED; 1743 UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, "lfsput", 0); 1744 mutex_enter(&vp->v_interlock); 1745 } 1746 1747 /* 1748 * This routine is called by lfs_putpages() when it can't complete the 1749 * write because a page is busy. This means that either (1) someone, 1750 * possibly the pagedaemon, is looking at this page, and will give it up 1751 * presently; or (2) we ourselves are holding the page busy in the 1752 * process of being written (either gathered or actually on its way to 1753 * disk). We don't need to give up the segment lock, but we might need 1754 * to call lfs_writeseg() to expedite the page's journey to disk. 1755 * 1756 * Called with vp->v_interlock held; return with it held. 1757 */ 1758 /* #define BUSYWAIT */ 1759 static void 1760 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 1761 int seglocked, const char *label) 1762 { 1763 #ifndef BUSYWAIT 1764 struct inode *ip = VTOI(vp); 1765 struct segment *sp = fs->lfs_sp; 1766 int count = 0; 1767 1768 if (pg == NULL) 1769 return; 1770 1771 while (pg->flags & PG_BUSY && 1772 pg->uobject == &vp->v_uobj) { 1773 mutex_exit(&vp->v_interlock); 1774 if (sp->cbpp - sp->bpp > 1) { 1775 /* Write gathered pages */ 1776 lfs_updatemeta(sp); 1777 lfs_release_finfo(fs); 1778 (void) lfs_writeseg(fs, sp); 1779 1780 /* 1781 * Reinitialize FIP 1782 */ 1783 KASSERT(sp->vp == vp); 1784 lfs_acquire_finfo(fs, ip->i_number, 1785 ip->i_gen); 1786 } 1787 ++count; 1788 mutex_enter(&vp->v_interlock); 1789 wait_for_page(vp, pg, label); 1790 } 1791 if (label != NULL && count > 1) 1792 printf("lfs_putpages[%d]: %s: %sn = %d\n", curproc->p_pid, 1793 label, (count > 0 ? "looping, " : ""), count); 1794 #else 1795 preempt(1); 1796 #endif 1797 } 1798 1799 /* 1800 * Make sure that for all pages in every block in the given range, 1801 * either all are dirty or all are clean. If any of the pages 1802 * we've seen so far are dirty, put the vnode on the paging chain, 1803 * and mark it IN_PAGING. 1804 * 1805 * If checkfirst != 0, don't check all the pages but return at the 1806 * first dirty page. 1807 */ 1808 static int 1809 check_dirty(struct lfs *fs, struct vnode *vp, 1810 off_t startoffset, off_t endoffset, off_t blkeof, 1811 int flags, int checkfirst, struct vm_page **pgp) 1812 { 1813 int by_list; 1814 struct vm_page *curpg = NULL; /* XXX: gcc */ 1815 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 1816 off_t soff = 0; /* XXX: gcc */ 1817 voff_t off; 1818 int i; 1819 int nonexistent; 1820 int any_dirty; /* number of dirty pages */ 1821 int dirty; /* number of dirty pages in a block */ 1822 int tdirty; 1823 int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; 1824 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 1825 1826 ASSERT_MAYBE_SEGLOCK(fs); 1827 top: 1828 by_list = (vp->v_uobj.uo_npages <= 1829 ((endoffset - startoffset) >> PAGE_SHIFT) * 1830 UVM_PAGE_TREE_PENALTY); 1831 any_dirty = 0; 1832 1833 if (by_list) { 1834 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 1835 } else { 1836 soff = startoffset; 1837 } 1838 while (by_list || soff < MIN(blkeof, endoffset)) { 1839 if (by_list) { 1840 /* 1841 * Find the first page in a block. Skip 1842 * blocks outside our area of interest or beyond 1843 * the end of file. 1844 */ 1845 if (pages_per_block > 1) { 1846 while (curpg && 1847 ((curpg->offset & fs->lfs_bmask) || 1848 curpg->offset >= vp->v_size || 1849 curpg->offset >= endoffset)) 1850 curpg = TAILQ_NEXT(curpg, listq.queue); 1851 } 1852 if (curpg == NULL) 1853 break; 1854 soff = curpg->offset; 1855 } 1856 1857 /* 1858 * Mark all pages in extended range busy; find out if any 1859 * of them are dirty. 1860 */ 1861 nonexistent = dirty = 0; 1862 for (i = 0; i == 0 || i < pages_per_block; i++) { 1863 if (by_list && pages_per_block <= 1) { 1864 pgs[i] = pg = curpg; 1865 } else { 1866 off = soff + (i << PAGE_SHIFT); 1867 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 1868 if (pg == NULL) { 1869 ++nonexistent; 1870 continue; 1871 } 1872 } 1873 KASSERT(pg != NULL); 1874 1875 /* 1876 * If we're holding the segment lock, we can deadlock 1877 * against a process that has our page and is waiting 1878 * for the cleaner, while the cleaner waits for the 1879 * segment lock. Just bail in that case. 1880 */ 1881 if ((pg->flags & PG_BUSY) && 1882 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 1883 if (i > 0) 1884 uvm_page_unbusy(pgs, i); 1885 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 1886 if (pgp) 1887 *pgp = pg; 1888 return -1; 1889 } 1890 1891 while (pg->flags & PG_BUSY) { 1892 wait_for_page(vp, pg, NULL); 1893 if (i > 0) 1894 uvm_page_unbusy(pgs, i); 1895 goto top; 1896 } 1897 pg->flags |= PG_BUSY; 1898 UVM_PAGE_OWN(pg, "lfs_putpages"); 1899 1900 pmap_page_protect(pg, VM_PROT_NONE); 1901 tdirty = (pmap_clear_modify(pg) || 1902 (pg->flags & PG_CLEAN) == 0); 1903 dirty += tdirty; 1904 } 1905 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 1906 if (by_list) { 1907 curpg = TAILQ_NEXT(curpg, listq.queue); 1908 } else { 1909 soff += fs->lfs_bsize; 1910 } 1911 continue; 1912 } 1913 1914 any_dirty += dirty; 1915 KASSERT(nonexistent == 0); 1916 1917 /* 1918 * If any are dirty make all dirty; unbusy them, 1919 * but if we were asked to clean, wire them so that 1920 * the pagedaemon doesn't bother us about them while 1921 * they're on their way to disk. 1922 */ 1923 for (i = 0; i == 0 || i < pages_per_block; i++) { 1924 pg = pgs[i]; 1925 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 1926 if (dirty) { 1927 pg->flags &= ~PG_CLEAN; 1928 if (flags & PGO_FREE) { 1929 /* 1930 * Wire the page so that 1931 * pdaemon doesn't see it again. 1932 */ 1933 mutex_enter(&uvm_pageqlock); 1934 uvm_pagewire(pg); 1935 mutex_exit(&uvm_pageqlock); 1936 1937 /* Suspended write flag */ 1938 pg->flags |= PG_DELWRI; 1939 } 1940 } 1941 if (pg->flags & PG_WANTED) 1942 wakeup(pg); 1943 pg->flags &= ~(PG_WANTED|PG_BUSY); 1944 UVM_PAGE_OWN(pg, NULL); 1945 } 1946 1947 if (checkfirst && any_dirty) 1948 break; 1949 1950 if (by_list) { 1951 curpg = TAILQ_NEXT(curpg, listq.queue); 1952 } else { 1953 soff += MAX(PAGE_SIZE, fs->lfs_bsize); 1954 } 1955 } 1956 1957 return any_dirty; 1958 } 1959 1960 /* 1961 * lfs_putpages functions like genfs_putpages except that 1962 * 1963 * (1) It needs to bounds-check the incoming requests to ensure that 1964 * they are block-aligned; if they are not, expand the range and 1965 * do the right thing in case, e.g., the requested range is clean 1966 * but the expanded range is dirty. 1967 * 1968 * (2) It needs to explicitly send blocks to be written when it is done. 1969 * If VOP_PUTPAGES is called without the seglock held, we simply take 1970 * the seglock and let lfs_segunlock wait for us. 1971 * XXX There might be a bad situation if we have to flush a vnode while 1972 * XXX lfs_markv is in operation. As of this writing we panic in this 1973 * XXX case. 1974 * 1975 * Assumptions: 1976 * 1977 * (1) The caller does not hold any pages in this vnode busy. If it does, 1978 * there is a danger that when we expand the page range and busy the 1979 * pages we will deadlock. 1980 * 1981 * (2) We are called with vp->v_interlock held; we must return with it 1982 * released. 1983 * 1984 * (3) We don't absolutely have to free pages right away, provided that 1985 * the request does not have PGO_SYNCIO. When the pagedaemon gives 1986 * us a request with PGO_FREE, we take the pages out of the paging 1987 * queue and wake up the writer, which will handle freeing them for us. 1988 * 1989 * We ensure that for any filesystem block, all pages for that 1990 * block are either resident or not, even if those pages are higher 1991 * than EOF; that means that we will be getting requests to free 1992 * "unused" pages above EOF all the time, and should ignore them. 1993 * 1994 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 1995 * into has been set up for us by lfs_writefile. If not, we will 1996 * have to handle allocating and/or freeing an finfo entry. 1997 * 1998 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 1999 */ 2000 2001 /* How many times to loop before we should start to worry */ 2002 #define TOOMANY 4 2003 2004 int 2005 lfs_putpages(void *v) 2006 { 2007 int error; 2008 struct vop_putpages_args /* { 2009 struct vnode *a_vp; 2010 voff_t a_offlo; 2011 voff_t a_offhi; 2012 int a_flags; 2013 } */ *ap = v; 2014 struct vnode *vp; 2015 struct inode *ip; 2016 struct lfs *fs; 2017 struct segment *sp; 2018 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 2019 off_t off, max_endoffset; 2020 bool seglocked, sync, pagedaemon; 2021 struct vm_page *pg, *busypg; 2022 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 2023 #ifdef DEBUG 2024 int debug_n_again, debug_n_dirtyclean; 2025 #endif 2026 2027 vp = ap->a_vp; 2028 ip = VTOI(vp); 2029 fs = ip->i_lfs; 2030 sync = (ap->a_flags & PGO_SYNCIO) != 0; 2031 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 2032 2033 /* Putpages does nothing for metadata. */ 2034 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 2035 mutex_exit(&vp->v_interlock); 2036 return 0; 2037 } 2038 2039 /* 2040 * If there are no pages, don't do anything. 2041 */ 2042 if (vp->v_uobj.uo_npages == 0) { 2043 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 2044 (vp->v_iflag & VI_ONWORKLST) && 2045 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 2046 vp->v_iflag &= ~VI_WRMAPDIRTY; 2047 vn_syncer_remove_from_worklist(vp); 2048 } 2049 mutex_exit(&vp->v_interlock); 2050 2051 /* Remove us from paging queue, if we were on it */ 2052 mutex_enter(&lfs_lock); 2053 if (ip->i_flags & IN_PAGING) { 2054 ip->i_flags &= ~IN_PAGING; 2055 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2056 } 2057 mutex_exit(&lfs_lock); 2058 return 0; 2059 } 2060 2061 blkeof = blkroundup(fs, ip->i_size); 2062 2063 /* 2064 * Ignore requests to free pages past EOF but in the same block 2065 * as EOF, unless the request is synchronous. (If the request is 2066 * sync, it comes from lfs_truncate.) 2067 * XXXUBC Make these pages look "active" so the pagedaemon won't 2068 * XXXUBC bother us with them again. 2069 */ 2070 if (!sync && ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 2071 origoffset = ap->a_offlo; 2072 for (off = origoffset; off < blkeof; off += fs->lfs_bsize) { 2073 pg = uvm_pagelookup(&vp->v_uobj, off); 2074 KASSERT(pg != NULL); 2075 while (pg->flags & PG_BUSY) { 2076 pg->flags |= PG_WANTED; 2077 UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, 2078 "lfsput2", 0); 2079 mutex_enter(&vp->v_interlock); 2080 } 2081 mutex_enter(&uvm_pageqlock); 2082 uvm_pageactivate(pg); 2083 mutex_exit(&uvm_pageqlock); 2084 } 2085 ap->a_offlo = blkeof; 2086 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 2087 mutex_exit(&vp->v_interlock); 2088 return 0; 2089 } 2090 } 2091 2092 /* 2093 * Extend page range to start and end at block boundaries. 2094 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 2095 */ 2096 origoffset = ap->a_offlo; 2097 origendoffset = ap->a_offhi; 2098 startoffset = origoffset & ~(fs->lfs_bmask); 2099 max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) 2100 << fs->lfs_bshift; 2101 2102 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2103 endoffset = max_endoffset; 2104 origendoffset = endoffset; 2105 } else { 2106 origendoffset = round_page(ap->a_offhi); 2107 endoffset = round_page(blkroundup(fs, origendoffset)); 2108 } 2109 2110 KASSERT(startoffset > 0 || endoffset >= startoffset); 2111 if (startoffset == endoffset) { 2112 /* Nothing to do, why were we called? */ 2113 mutex_exit(&vp->v_interlock); 2114 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 2115 PRId64 "\n", startoffset)); 2116 return 0; 2117 } 2118 2119 ap->a_offlo = startoffset; 2120 ap->a_offhi = endoffset; 2121 2122 /* 2123 * If not cleaning, just send the pages through genfs_putpages 2124 * to be returned to the pool. 2125 */ 2126 if (!(ap->a_flags & PGO_CLEANIT)) 2127 return genfs_putpages(v); 2128 2129 /* Set PGO_BUSYFAIL to avoid deadlocks */ 2130 ap->a_flags |= PGO_BUSYFAIL; 2131 2132 /* 2133 * Likewise, if we are asked to clean but the pages are not 2134 * dirty, we can just free them using genfs_putpages. 2135 */ 2136 #ifdef DEBUG 2137 debug_n_dirtyclean = 0; 2138 #endif 2139 do { 2140 int r; 2141 2142 /* Count the number of dirty pages */ 2143 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 2144 ap->a_flags, 1, NULL); 2145 if (r < 0) { 2146 /* Pages are busy with another process */ 2147 mutex_exit(&vp->v_interlock); 2148 return EDEADLK; 2149 } 2150 if (r > 0) /* Some pages are dirty */ 2151 break; 2152 2153 /* 2154 * Sometimes pages are dirtied between the time that 2155 * we check and the time we try to clean them. 2156 * Instruct lfs_gop_write to return EDEADLK in this case 2157 * so we can write them properly. 2158 */ 2159 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 2160 r = genfs_do_putpages(vp, startoffset, endoffset, 2161 ap->a_flags & ~PGO_SYNCIO, &busypg); 2162 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 2163 if (r != EDEADLK) 2164 return r; 2165 2166 /* One of the pages was busy. Start over. */ 2167 mutex_enter(&vp->v_interlock); 2168 wait_for_page(vp, busypg, "dirtyclean"); 2169 #ifdef DEBUG 2170 ++debug_n_dirtyclean; 2171 #endif 2172 } while(1); 2173 2174 #ifdef DEBUG 2175 if (debug_n_dirtyclean > TOOMANY) 2176 printf("lfs_putpages: dirtyclean: looping, n = %d\n", 2177 debug_n_dirtyclean); 2178 #endif 2179 2180 /* 2181 * Dirty and asked to clean. 2182 * 2183 * Pagedaemon can't actually write LFS pages; wake up 2184 * the writer to take care of that. The writer will 2185 * notice the pager inode queue and act on that. 2186 */ 2187 if (pagedaemon) { 2188 mutex_enter(&lfs_lock); 2189 if (!(ip->i_flags & IN_PAGING)) { 2190 ip->i_flags |= IN_PAGING; 2191 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2192 } 2193 wakeup(&lfs_writer_daemon); 2194 mutex_exit(&lfs_lock); 2195 mutex_exit(&vp->v_interlock); 2196 preempt(); 2197 return EWOULDBLOCK; 2198 } 2199 2200 /* 2201 * If this is a file created in a recent dirop, we can't flush its 2202 * inode until the dirop is complete. Drain dirops, then flush the 2203 * filesystem (taking care of any other pending dirops while we're 2204 * at it). 2205 */ 2206 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 2207 (vp->v_uflag & VU_DIROP)) { 2208 int locked; 2209 2210 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 2211 locked = (VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2212 mutex_exit(&vp->v_interlock); 2213 lfs_writer_enter(fs, "ppdirop"); 2214 if (locked) 2215 VOP_UNLOCK(vp, 0); /* XXX why? */ 2216 2217 mutex_enter(&lfs_lock); 2218 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 2219 mutex_exit(&lfs_lock); 2220 2221 mutex_enter(&vp->v_interlock); 2222 if (locked) { 2223 VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); 2224 mutex_enter(&vp->v_interlock); 2225 } 2226 lfs_writer_leave(fs); 2227 2228 /* XXX the flush should have taken care of this one too! */ 2229 } 2230 2231 /* 2232 * This is it. We are going to write some pages. From here on 2233 * down it's all just mechanics. 2234 * 2235 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 2236 */ 2237 ap->a_flags &= ~PGO_SYNCIO; 2238 2239 /* 2240 * If we've already got the seglock, flush the node and return. 2241 * The FIP has already been set up for us by lfs_writefile, 2242 * and FIP cleanup and lfs_updatemeta will also be done there, 2243 * unless genfs_putpages returns EDEADLK; then we must flush 2244 * what we have, and correct FIP and segment header accounting. 2245 */ 2246 get_seglock: 2247 /* 2248 * If we are not called with the segment locked, lock it. 2249 * Account for a new FIP in the segment header, and set sp->vp. 2250 * (This should duplicate the setup at the top of lfs_writefile().) 2251 */ 2252 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 2253 if (!seglocked) { 2254 mutex_exit(&vp->v_interlock); 2255 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 2256 if (error != 0) 2257 return error; 2258 mutex_enter(&vp->v_interlock); 2259 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 2260 } 2261 sp = fs->lfs_sp; 2262 KASSERT(sp->vp == NULL); 2263 sp->vp = vp; 2264 2265 /* 2266 * Ensure that the partial segment is marked SS_DIROP if this 2267 * vnode is a DIROP. 2268 */ 2269 if (!seglocked && vp->v_uflag & VU_DIROP) 2270 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 2271 2272 /* 2273 * Loop over genfs_putpages until all pages are gathered. 2274 * genfs_putpages() drops the interlock, so reacquire it if necessary. 2275 * Whenever we lose the interlock we have to rerun check_dirty, as 2276 * well, since more pages might have been dirtied in our absence. 2277 */ 2278 #ifdef DEBUG 2279 debug_n_again = 0; 2280 #endif 2281 do { 2282 busypg = NULL; 2283 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 2284 ap->a_flags, 0, &busypg) < 0) { 2285 mutex_exit(&vp->v_interlock); 2286 2287 mutex_enter(&vp->v_interlock); 2288 write_and_wait(fs, vp, busypg, seglocked, NULL); 2289 if (!seglocked) { 2290 mutex_exit(&vp->v_interlock); 2291 lfs_release_finfo(fs); 2292 lfs_segunlock(fs); 2293 mutex_enter(&vp->v_interlock); 2294 } 2295 sp->vp = NULL; 2296 goto get_seglock; 2297 } 2298 2299 busypg = NULL; 2300 error = genfs_do_putpages(vp, startoffset, endoffset, 2301 ap->a_flags, &busypg); 2302 2303 if (error == EDEADLK || error == EAGAIN) { 2304 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 2305 " %d ino %d off %x (seg %d)\n", error, 2306 ip->i_number, fs->lfs_offset, 2307 dtosn(fs, fs->lfs_offset))); 2308 2309 mutex_enter(&vp->v_interlock); 2310 write_and_wait(fs, vp, busypg, seglocked, "again"); 2311 } 2312 #ifdef DEBUG 2313 ++debug_n_again; 2314 #endif 2315 } while (error == EDEADLK); 2316 #ifdef DEBUG 2317 if (debug_n_again > TOOMANY) 2318 printf("lfs_putpages: again: looping, n = %d\n", debug_n_again); 2319 #endif 2320 2321 KASSERT(sp != NULL && sp->vp == vp); 2322 if (!seglocked) { 2323 sp->vp = NULL; 2324 2325 /* Write indirect blocks as well */ 2326 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 2327 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 2328 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 2329 2330 KASSERT(sp->vp == NULL); 2331 sp->vp = vp; 2332 } 2333 2334 /* 2335 * Blocks are now gathered into a segment waiting to be written. 2336 * All that's left to do is update metadata, and write them. 2337 */ 2338 lfs_updatemeta(sp); 2339 KASSERT(sp->vp == vp); 2340 sp->vp = NULL; 2341 2342 /* 2343 * If we were called from lfs_writefile, we don't need to clean up 2344 * the FIP or unlock the segment lock. We're done. 2345 */ 2346 if (seglocked) 2347 return error; 2348 2349 /* Clean up FIP and send it to disk. */ 2350 lfs_release_finfo(fs); 2351 lfs_writeseg(fs, fs->lfs_sp); 2352 2353 /* 2354 * Remove us from paging queue if we wrote all our pages. 2355 */ 2356 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2357 mutex_enter(&lfs_lock); 2358 if (ip->i_flags & IN_PAGING) { 2359 ip->i_flags &= ~IN_PAGING; 2360 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2361 } 2362 mutex_exit(&lfs_lock); 2363 } 2364 2365 /* 2366 * XXX - with the malloc/copy writeseg, the pages are freed by now 2367 * even if we don't wait (e.g. if we hold a nested lock). This 2368 * will not be true if we stop using malloc/copy. 2369 */ 2370 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 2371 lfs_segunlock(fs); 2372 2373 /* 2374 * Wait for v_numoutput to drop to zero. The seglock should 2375 * take care of this, but there is a slight possibility that 2376 * aiodoned might not have got around to our buffers yet. 2377 */ 2378 if (sync) { 2379 mutex_enter(&vp->v_interlock); 2380 while (vp->v_numoutput > 0) { 2381 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 2382 " num %d\n", ip->i_number, vp->v_numoutput)); 2383 cv_wait(&vp->v_cv, &vp->v_interlock); 2384 } 2385 mutex_exit(&vp->v_interlock); 2386 } 2387 return error; 2388 } 2389 2390 /* 2391 * Return the last logical file offset that should be written for this file 2392 * if we're doing a write that ends at "size". If writing, we need to know 2393 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2394 * to know about entire blocks. 2395 */ 2396 void 2397 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2398 { 2399 struct inode *ip = VTOI(vp); 2400 struct lfs *fs = ip->i_lfs; 2401 daddr_t olbn, nlbn; 2402 2403 olbn = lblkno(fs, ip->i_size); 2404 nlbn = lblkno(fs, size); 2405 if (!(flags & GOP_SIZE_MEM) && nlbn < NDADDR && olbn <= nlbn) { 2406 *eobp = fragroundup(fs, size); 2407 } else { 2408 *eobp = blkroundup(fs, size); 2409 } 2410 } 2411 2412 #ifdef DEBUG 2413 void lfs_dump_vop(void *); 2414 2415 void 2416 lfs_dump_vop(void *v) 2417 { 2418 struct vop_putpages_args /* { 2419 struct vnode *a_vp; 2420 voff_t a_offlo; 2421 voff_t a_offhi; 2422 int a_flags; 2423 } */ *ap = v; 2424 2425 #ifdef DDB 2426 vfs_vnode_print(ap->a_vp, 0, printf); 2427 #endif 2428 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din); 2429 } 2430 #endif 2431 2432 int 2433 lfs_mmap(void *v) 2434 { 2435 struct vop_mmap_args /* { 2436 const struct vnodeop_desc *a_desc; 2437 struct vnode *a_vp; 2438 vm_prot_t a_prot; 2439 kauth_cred_t a_cred; 2440 } */ *ap = v; 2441 2442 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2443 return EOPNOTSUPP; 2444 return ufs_mmap(v); 2445 } 2446