1 /* $NetBSD: lfs_vnops.c,v 1.215 2008/01/25 14:32:17 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1986, 1989, 1991, 1993, 1995 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.215 2008/01/25 14:32:17 ad Exp $"); 71 72 #ifdef _KERNEL_OPT 73 #include "opt_compat_netbsd.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/resourcevar.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/buf.h> 84 #include <sys/proc.h> 85 #include <sys/mount.h> 86 #include <sys/vnode.h> 87 #include <sys/pool.h> 88 #include <sys/signalvar.h> 89 #include <sys/kauth.h> 90 #include <sys/syslog.h> 91 #include <sys/fstrans.h> 92 93 #include <miscfs/fifofs/fifo.h> 94 #include <miscfs/genfs/genfs.h> 95 #include <miscfs/specfs/specdev.h> 96 97 #include <ufs/ufs/inode.h> 98 #include <ufs/ufs/dir.h> 99 #include <ufs/ufs/ufsmount.h> 100 #include <ufs/ufs/ufs_extern.h> 101 102 #include <uvm/uvm.h> 103 #include <uvm/uvm_pmap.h> 104 #include <uvm/uvm_stat.h> 105 #include <uvm/uvm_pager.h> 106 107 #include <ufs/lfs/lfs.h> 108 #include <ufs/lfs/lfs_extern.h> 109 110 extern pid_t lfs_writer_daemon; 111 int lfs_ignore_lazy_sync = 1; 112 113 /* Global vfs data structures for lfs. */ 114 int (**lfs_vnodeop_p)(void *); 115 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 116 { &vop_default_desc, vn_default_error }, 117 { &vop_lookup_desc, ufs_lookup }, /* lookup */ 118 { &vop_create_desc, lfs_create }, /* create */ 119 { &vop_whiteout_desc, ufs_whiteout }, /* whiteout */ 120 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 121 { &vop_open_desc, ufs_open }, /* open */ 122 { &vop_close_desc, lfs_close }, /* close */ 123 { &vop_access_desc, ufs_access }, /* access */ 124 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 125 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 126 { &vop_read_desc, lfs_read }, /* read */ 127 { &vop_write_desc, lfs_write }, /* write */ 128 { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ 129 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 130 { &vop_poll_desc, ufs_poll }, /* poll */ 131 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 132 { &vop_revoke_desc, ufs_revoke }, /* revoke */ 133 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 134 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 135 { &vop_seek_desc, ufs_seek }, /* seek */ 136 { &vop_remove_desc, lfs_remove }, /* remove */ 137 { &vop_link_desc, lfs_link }, /* link */ 138 { &vop_rename_desc, lfs_rename }, /* rename */ 139 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 140 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 141 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 142 { &vop_readdir_desc, ufs_readdir }, /* readdir */ 143 { &vop_readlink_desc, ufs_readlink }, /* readlink */ 144 { &vop_abortop_desc, ufs_abortop }, /* abortop */ 145 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 146 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 147 { &vop_lock_desc, ufs_lock }, /* lock */ 148 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 149 { &vop_bmap_desc, ufs_bmap }, /* bmap */ 150 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 151 { &vop_print_desc, ufs_print }, /* print */ 152 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 153 { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ 154 { &vop_advlock_desc, ufs_advlock }, /* advlock */ 155 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 156 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 157 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 158 { NULL, NULL } 159 }; 160 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 161 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 162 163 int (**lfs_specop_p)(void *); 164 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 165 { &vop_default_desc, vn_default_error }, 166 { &vop_lookup_desc, spec_lookup }, /* lookup */ 167 { &vop_create_desc, spec_create }, /* create */ 168 { &vop_mknod_desc, spec_mknod }, /* mknod */ 169 { &vop_open_desc, spec_open }, /* open */ 170 { &vop_close_desc, lfsspec_close }, /* close */ 171 { &vop_access_desc, ufs_access }, /* access */ 172 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 173 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 174 { &vop_read_desc, ufsspec_read }, /* read */ 175 { &vop_write_desc, ufsspec_write }, /* write */ 176 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 177 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 178 { &vop_poll_desc, spec_poll }, /* poll */ 179 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 180 { &vop_revoke_desc, spec_revoke }, /* revoke */ 181 { &vop_mmap_desc, spec_mmap }, /* mmap */ 182 { &vop_fsync_desc, spec_fsync }, /* fsync */ 183 { &vop_seek_desc, spec_seek }, /* seek */ 184 { &vop_remove_desc, spec_remove }, /* remove */ 185 { &vop_link_desc, spec_link }, /* link */ 186 { &vop_rename_desc, spec_rename }, /* rename */ 187 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 188 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 189 { &vop_symlink_desc, spec_symlink }, /* symlink */ 190 { &vop_readdir_desc, spec_readdir }, /* readdir */ 191 { &vop_readlink_desc, spec_readlink }, /* readlink */ 192 { &vop_abortop_desc, spec_abortop }, /* abortop */ 193 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 194 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 195 { &vop_lock_desc, ufs_lock }, /* lock */ 196 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 197 { &vop_bmap_desc, spec_bmap }, /* bmap */ 198 { &vop_strategy_desc, spec_strategy }, /* strategy */ 199 { &vop_print_desc, ufs_print }, /* print */ 200 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 201 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 202 { &vop_advlock_desc, spec_advlock }, /* advlock */ 203 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 204 { &vop_getpages_desc, spec_getpages }, /* getpages */ 205 { &vop_putpages_desc, spec_putpages }, /* putpages */ 206 { NULL, NULL } 207 }; 208 const struct vnodeopv_desc lfs_specop_opv_desc = 209 { &lfs_specop_p, lfs_specop_entries }; 210 211 int (**lfs_fifoop_p)(void *); 212 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 213 { &vop_default_desc, vn_default_error }, 214 { &vop_lookup_desc, fifo_lookup }, /* lookup */ 215 { &vop_create_desc, fifo_create }, /* create */ 216 { &vop_mknod_desc, fifo_mknod }, /* mknod */ 217 { &vop_open_desc, fifo_open }, /* open */ 218 { &vop_close_desc, lfsfifo_close }, /* close */ 219 { &vop_access_desc, ufs_access }, /* access */ 220 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 221 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 222 { &vop_read_desc, ufsfifo_read }, /* read */ 223 { &vop_write_desc, ufsfifo_write }, /* write */ 224 { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ 225 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 226 { &vop_poll_desc, fifo_poll }, /* poll */ 227 { &vop_kqfilter_desc, fifo_kqfilter }, /* kqfilter */ 228 { &vop_revoke_desc, fifo_revoke }, /* revoke */ 229 { &vop_mmap_desc, fifo_mmap }, /* mmap */ 230 { &vop_fsync_desc, fifo_fsync }, /* fsync */ 231 { &vop_seek_desc, fifo_seek }, /* seek */ 232 { &vop_remove_desc, fifo_remove }, /* remove */ 233 { &vop_link_desc, fifo_link }, /* link */ 234 { &vop_rename_desc, fifo_rename }, /* rename */ 235 { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ 236 { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ 237 { &vop_symlink_desc, fifo_symlink }, /* symlink */ 238 { &vop_readdir_desc, fifo_readdir }, /* readdir */ 239 { &vop_readlink_desc, fifo_readlink }, /* readlink */ 240 { &vop_abortop_desc, fifo_abortop }, /* abortop */ 241 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 242 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 243 { &vop_lock_desc, ufs_lock }, /* lock */ 244 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 245 { &vop_bmap_desc, fifo_bmap }, /* bmap */ 246 { &vop_strategy_desc, fifo_strategy }, /* strategy */ 247 { &vop_print_desc, ufs_print }, /* print */ 248 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 249 { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ 250 { &vop_advlock_desc, fifo_advlock }, /* advlock */ 251 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 252 { &vop_putpages_desc, fifo_putpages }, /* putpages */ 253 { NULL, NULL } 254 }; 255 const struct vnodeopv_desc lfs_fifoop_opv_desc = 256 { &lfs_fifoop_p, lfs_fifoop_entries }; 257 258 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 259 260 #define LFS_READWRITE 261 #include <ufs/ufs/ufs_readwrite.c> 262 #undef LFS_READWRITE 263 264 /* 265 * Synch an open file. 266 */ 267 /* ARGSUSED */ 268 int 269 lfs_fsync(void *v) 270 { 271 struct vop_fsync_args /* { 272 struct vnode *a_vp; 273 kauth_cred_t a_cred; 274 int a_flags; 275 off_t offlo; 276 off_t offhi; 277 } */ *ap = v; 278 struct vnode *vp = ap->a_vp; 279 int error, wait; 280 struct inode *ip = VTOI(vp); 281 struct lfs *fs = ip->i_lfs; 282 283 /* If we're mounted read-only, don't try to sync. */ 284 if (fs->lfs_ronly) 285 return 0; 286 287 /* 288 * Trickle sync simply adds this vnode to the pager list, as if 289 * the pagedaemon had requested a pageout. 290 */ 291 if (ap->a_flags & FSYNC_LAZY) { 292 if (lfs_ignore_lazy_sync == 0) { 293 mutex_enter(&lfs_lock); 294 if (!(ip->i_flags & IN_PAGING)) { 295 ip->i_flags |= IN_PAGING; 296 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 297 i_lfs_pchain); 298 } 299 wakeup(&lfs_writer_daemon); 300 mutex_exit(&lfs_lock); 301 } 302 return 0; 303 } 304 305 /* 306 * If a vnode is bring cleaned, flush it out before we try to 307 * reuse it. This prevents the cleaner from writing files twice 308 * in the same partial segment, causing an accounting underflow. 309 */ 310 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 311 lfs_vflush(vp); 312 } 313 314 wait = (ap->a_flags & FSYNC_WAIT); 315 do { 316 mutex_enter(&vp->v_interlock); 317 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 318 round_page(ap->a_offhi), 319 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 320 if (error == EAGAIN) { 321 mutex_enter(&lfs_lock); 322 mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", 323 hz / 100 + 1, &lfs_lock); 324 mutex_exit(&lfs_lock); 325 } 326 } while (error == EAGAIN); 327 if (error) 328 return error; 329 330 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 331 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 332 333 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 334 int l = 0; 335 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 336 curlwp->l_cred); 337 } 338 if (wait && !VPISEMPTY(vp)) 339 LFS_SET_UINO(ip, IN_MODIFIED); 340 341 return error; 342 } 343 344 /* 345 * Take IN_ADIROP off, then call ufs_inactive. 346 */ 347 int 348 lfs_inactive(void *v) 349 { 350 struct vop_inactive_args /* { 351 struct vnode *a_vp; 352 } */ *ap = v; 353 354 KASSERT(VTOI(ap->a_vp)->i_nlink == VTOI(ap->a_vp)->i_ffs_effnlink); 355 356 lfs_unmark_vnode(ap->a_vp); 357 358 /* 359 * The Ifile is only ever inactivated on unmount. 360 * Streamline this process by not giving it more dirty blocks. 361 */ 362 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 363 mutex_enter(&lfs_lock); 364 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 365 mutex_exit(&lfs_lock); 366 VOP_UNLOCK(ap->a_vp, 0); 367 return 0; 368 } 369 370 return ufs_inactive(v); 371 } 372 373 /* 374 * These macros are used to bracket UFS directory ops, so that we can 375 * identify all the pages touched during directory ops which need to 376 * be ordered and flushed atomically, so that they may be recovered. 377 * 378 * Because we have to mark nodes VU_DIROP in order to prevent 379 * the cache from reclaiming them while a dirop is in progress, we must 380 * also manage the number of nodes so marked (otherwise we can run out). 381 * We do this by setting lfs_dirvcount to the number of marked vnodes; it 382 * is decremented during segment write, when VU_DIROP is taken off. 383 */ 384 #define MARK_VNODE(vp) lfs_mark_vnode(vp) 385 #define UNMARK_VNODE(vp) lfs_unmark_vnode(vp) 386 #define SET_DIROP_CREATE(dvp, vpp) lfs_set_dirop_create((dvp), (vpp)) 387 #define SET_DIROP_REMOVE(dvp, vp) lfs_set_dirop((dvp), (vp)) 388 static int lfs_set_dirop_create(struct vnode *, struct vnode **); 389 static int lfs_set_dirop(struct vnode *, struct vnode *); 390 391 static int 392 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 393 { 394 struct lfs *fs; 395 int error; 396 397 KASSERT(VOP_ISLOCKED(dvp)); 398 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 399 400 fs = VTOI(dvp)->i_lfs; 401 402 ASSERT_NO_SEGLOCK(fs); 403 /* 404 * LFS_NRESERVE calculates direct and indirect blocks as well 405 * as an inode block; an overestimate in most cases. 406 */ 407 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 408 return (error); 409 410 restart: 411 mutex_enter(&lfs_lock); 412 if (fs->lfs_dirops == 0) { 413 mutex_exit(&lfs_lock); 414 lfs_check(dvp, LFS_UNUSED_LBN, 0); 415 mutex_enter(&lfs_lock); 416 } 417 while (fs->lfs_writer) { 418 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 419 "lfs_sdirop", 0, &lfs_lock); 420 if (error == EINTR) { 421 mutex_exit(&lfs_lock); 422 goto unreserve; 423 } 424 } 425 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 426 wakeup(&lfs_writer_daemon); 427 mutex_exit(&lfs_lock); 428 preempt(); 429 goto restart; 430 } 431 432 if (lfs_dirvcount > LFS_MAX_DIROP) { 433 mutex_exit(&lfs_lock); 434 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 435 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 436 if ((error = mtsleep(&lfs_dirvcount, 437 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 438 &lfs_lock)) != 0) { 439 goto unreserve; 440 } 441 goto restart; 442 } 443 444 ++fs->lfs_dirops; 445 fs->lfs_doifile = 1; 446 mutex_exit(&lfs_lock); 447 448 /* Hold a reference so SET_ENDOP will be happy */ 449 vref(dvp); 450 if (vp) { 451 vref(vp); 452 MARK_VNODE(vp); 453 } 454 455 MARK_VNODE(dvp); 456 return 0; 457 458 unreserve: 459 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 460 return error; 461 } 462 463 /* 464 * Get a new vnode *before* adjusting the dirop count, to avoid a deadlock 465 * in getnewvnode(), if we have a stacked filesystem mounted on top 466 * of us. 467 * 468 * NB: this means we have to clear the new vnodes on error. Fortunately 469 * SET_ENDOP is there to do that for us. 470 */ 471 static int 472 lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) 473 { 474 int error; 475 struct lfs *fs; 476 477 fs = VFSTOUFS(dvp->v_mount)->um_lfs; 478 ASSERT_NO_SEGLOCK(fs); 479 if (fs->lfs_ronly) 480 return EROFS; 481 if (vpp && (error = getnewvnode(VT_LFS, dvp->v_mount, lfs_vnodeop_p, vpp))) { 482 DLOG((DLOG_ALLOC, "lfs_set_dirop_create: dvp %p error %d\n", 483 dvp, error)); 484 return error; 485 } 486 if ((error = lfs_set_dirop(dvp, NULL)) != 0) { 487 if (vpp) { 488 ungetnewvnode(*vpp); 489 *vpp = NULL; 490 } 491 return error; 492 } 493 return 0; 494 } 495 496 #define SET_ENDOP_BASE(fs, dvp, str) \ 497 do { \ 498 mutex_enter(&lfs_lock); \ 499 --(fs)->lfs_dirops; \ 500 if (!(fs)->lfs_dirops) { \ 501 if ((fs)->lfs_nadirop) { \ 502 panic("SET_ENDOP: %s: no dirops but " \ 503 " nadirop=%d", (str), \ 504 (fs)->lfs_nadirop); \ 505 } \ 506 wakeup(&(fs)->lfs_writer); \ 507 mutex_exit(&lfs_lock); \ 508 lfs_check((dvp), LFS_UNUSED_LBN, 0); \ 509 } else \ 510 mutex_exit(&lfs_lock); \ 511 } while(0) 512 #define SET_ENDOP_CREATE(fs, dvp, nvpp, str) \ 513 do { \ 514 UNMARK_VNODE(dvp); \ 515 if (nvpp && *nvpp) \ 516 UNMARK_VNODE(*nvpp); \ 517 /* Check for error return to stem vnode leakage */ \ 518 if (nvpp && *nvpp && !((*nvpp)->v_uflag & VU_DIROP)) \ 519 ungetnewvnode(*(nvpp)); \ 520 SET_ENDOP_BASE((fs), (dvp), (str)); \ 521 lfs_reserve((fs), (dvp), NULL, -LFS_NRESERVE(fs)); \ 522 vrele(dvp); \ 523 } while(0) 524 #define SET_ENDOP_CREATE_AP(ap, str) \ 525 SET_ENDOP_CREATE(VTOI((ap)->a_dvp)->i_lfs, (ap)->a_dvp, \ 526 (ap)->a_vpp, (str)) 527 #define SET_ENDOP_REMOVE(fs, dvp, ovp, str) \ 528 do { \ 529 UNMARK_VNODE(dvp); \ 530 if (ovp) \ 531 UNMARK_VNODE(ovp); \ 532 SET_ENDOP_BASE((fs), (dvp), (str)); \ 533 lfs_reserve((fs), (dvp), (ovp), -LFS_NRESERVE(fs)); \ 534 vrele(dvp); \ 535 if (ovp) \ 536 vrele(ovp); \ 537 } while(0) 538 539 void 540 lfs_mark_vnode(struct vnode *vp) 541 { 542 struct inode *ip = VTOI(vp); 543 struct lfs *fs = ip->i_lfs; 544 545 mutex_enter(&lfs_lock); 546 if (!(ip->i_flag & IN_ADIROP)) { 547 if (!(vp->v_uflag & VU_DIROP)) { 548 mutex_enter(&vp->v_interlock); 549 (void)lfs_vref(vp); 550 ++lfs_dirvcount; 551 ++fs->lfs_dirvcount; 552 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 553 vp->v_uflag |= VU_DIROP; 554 } 555 ++fs->lfs_nadirop; 556 ip->i_flag |= IN_ADIROP; 557 } else 558 KASSERT(vp->v_uflag & VU_DIROP); 559 mutex_exit(&lfs_lock); 560 } 561 562 void 563 lfs_unmark_vnode(struct vnode *vp) 564 { 565 struct inode *ip = VTOI(vp); 566 567 if (ip && (ip->i_flag & IN_ADIROP)) { 568 KASSERT(vp->v_uflag & VU_DIROP); 569 mutex_enter(&lfs_lock); 570 --ip->i_lfs->lfs_nadirop; 571 mutex_exit(&lfs_lock); 572 ip->i_flag &= ~IN_ADIROP; 573 } 574 } 575 576 int 577 lfs_symlink(void *v) 578 { 579 struct vop_symlink_args /* { 580 struct vnode *a_dvp; 581 struct vnode **a_vpp; 582 struct componentname *a_cnp; 583 struct vattr *a_vap; 584 char *a_target; 585 } */ *ap = v; 586 int error; 587 588 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 589 vput(ap->a_dvp); 590 return error; 591 } 592 error = ufs_symlink(ap); 593 SET_ENDOP_CREATE_AP(ap, "symlink"); 594 return (error); 595 } 596 597 int 598 lfs_mknod(void *v) 599 { 600 struct vop_mknod_args /* { 601 struct vnode *a_dvp; 602 struct vnode **a_vpp; 603 struct componentname *a_cnp; 604 struct vattr *a_vap; 605 } */ *ap = v; 606 struct vattr *vap = ap->a_vap; 607 struct vnode **vpp = ap->a_vpp; 608 struct inode *ip; 609 int error; 610 struct mount *mp; 611 ino_t ino; 612 613 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 614 vput(ap->a_dvp); 615 return error; 616 } 617 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 618 ap->a_dvp, vpp, ap->a_cnp); 619 620 /* Either way we're done with the dirop at this point */ 621 SET_ENDOP_CREATE_AP(ap, "mknod"); 622 623 if (error) 624 return (error); 625 626 ip = VTOI(*vpp); 627 mp = (*vpp)->v_mount; 628 ino = ip->i_number; 629 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 630 if (vap->va_rdev != VNOVAL) { 631 /* 632 * Want to be able to use this to make badblock 633 * inodes, so don't truncate the dev number. 634 */ 635 #if 0 636 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 637 UFS_MPNEEDSWAP((*vpp)->v_mount)); 638 #else 639 ip->i_ffs1_rdev = vap->va_rdev; 640 #endif 641 } 642 643 /* 644 * Call fsync to write the vnode so that we don't have to deal with 645 * flushing it when it's marked VU_DIROP|VI_XLOCK. 646 * 647 * XXX KS - If we can't flush we also can't call vgone(), so must 648 * return. But, that leaves this vnode in limbo, also not good. 649 * Can this ever happen (barring hardware failure)? 650 */ 651 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 652 panic("lfs_mknod: couldn't fsync (ino %llu)", 653 (unsigned long long)ino); 654 /* return (error); */ 655 } 656 /* 657 * Remove vnode so that it will be reloaded by VFS_VGET and 658 * checked to see if it is an alias of an existing entry in 659 * the inode cache. 660 */ 661 /* Used to be vput, but that causes us to call VOP_INACTIVE twice. */ 662 663 VOP_UNLOCK(*vpp, 0); 664 (*vpp)->v_type = VNON; 665 vgone(*vpp); 666 error = VFS_VGET(mp, ino, vpp); 667 668 if (error != 0) { 669 *vpp = NULL; 670 return (error); 671 } 672 return (0); 673 } 674 675 int 676 lfs_create(void *v) 677 { 678 struct vop_create_args /* { 679 struct vnode *a_dvp; 680 struct vnode **a_vpp; 681 struct componentname *a_cnp; 682 struct vattr *a_vap; 683 } */ *ap = v; 684 int error; 685 686 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 687 vput(ap->a_dvp); 688 return error; 689 } 690 error = ufs_create(ap); 691 SET_ENDOP_CREATE_AP(ap, "create"); 692 return (error); 693 } 694 695 int 696 lfs_mkdir(void *v) 697 { 698 struct vop_mkdir_args /* { 699 struct vnode *a_dvp; 700 struct vnode **a_vpp; 701 struct componentname *a_cnp; 702 struct vattr *a_vap; 703 } */ *ap = v; 704 int error; 705 706 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 707 vput(ap->a_dvp); 708 return error; 709 } 710 error = ufs_mkdir(ap); 711 SET_ENDOP_CREATE_AP(ap, "mkdir"); 712 return (error); 713 } 714 715 int 716 lfs_remove(void *v) 717 { 718 struct vop_remove_args /* { 719 struct vnode *a_dvp; 720 struct vnode *a_vp; 721 struct componentname *a_cnp; 722 } */ *ap = v; 723 struct vnode *dvp, *vp; 724 struct inode *ip; 725 int error; 726 727 dvp = ap->a_dvp; 728 vp = ap->a_vp; 729 ip = VTOI(vp); 730 if ((error = SET_DIROP_REMOVE(dvp, vp)) != 0) { 731 if (dvp == vp) 732 vrele(vp); 733 else 734 vput(vp); 735 vput(dvp); 736 return error; 737 } 738 error = ufs_remove(ap); 739 if (ip->i_nlink == 0) 740 lfs_orphan(ip->i_lfs, ip->i_number); 741 SET_ENDOP_REMOVE(ip->i_lfs, dvp, ap->a_vp, "remove"); 742 return (error); 743 } 744 745 int 746 lfs_rmdir(void *v) 747 { 748 struct vop_rmdir_args /* { 749 struct vnodeop_desc *a_desc; 750 struct vnode *a_dvp; 751 struct vnode *a_vp; 752 struct componentname *a_cnp; 753 } */ *ap = v; 754 struct vnode *vp; 755 struct inode *ip; 756 int error; 757 758 vp = ap->a_vp; 759 ip = VTOI(vp); 760 if ((error = SET_DIROP_REMOVE(ap->a_dvp, ap->a_vp)) != 0) { 761 if (ap->a_dvp == vp) 762 vrele(ap->a_dvp); 763 else 764 vput(ap->a_dvp); 765 vput(vp); 766 return error; 767 } 768 error = ufs_rmdir(ap); 769 if (ip->i_nlink == 0) 770 lfs_orphan(ip->i_lfs, ip->i_number); 771 SET_ENDOP_REMOVE(ip->i_lfs, ap->a_dvp, ap->a_vp, "rmdir"); 772 return (error); 773 } 774 775 int 776 lfs_link(void *v) 777 { 778 struct vop_link_args /* { 779 struct vnode *a_dvp; 780 struct vnode *a_vp; 781 struct componentname *a_cnp; 782 } */ *ap = v; 783 int error; 784 struct vnode **vpp = NULL; 785 786 if ((error = SET_DIROP_CREATE(ap->a_dvp, vpp)) != 0) { 787 vput(ap->a_dvp); 788 return error; 789 } 790 error = ufs_link(ap); 791 SET_ENDOP_CREATE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vpp, "link"); 792 return (error); 793 } 794 795 int 796 lfs_rename(void *v) 797 { 798 struct vop_rename_args /* { 799 struct vnode *a_fdvp; 800 struct vnode *a_fvp; 801 struct componentname *a_fcnp; 802 struct vnode *a_tdvp; 803 struct vnode *a_tvp; 804 struct componentname *a_tcnp; 805 } */ *ap = v; 806 struct vnode *tvp, *fvp, *tdvp, *fdvp; 807 struct componentname *tcnp, *fcnp; 808 int error; 809 struct lfs *fs; 810 811 fs = VTOI(ap->a_fdvp)->i_lfs; 812 tvp = ap->a_tvp; 813 tdvp = ap->a_tdvp; 814 tcnp = ap->a_tcnp; 815 fvp = ap->a_fvp; 816 fdvp = ap->a_fdvp; 817 fcnp = ap->a_fcnp; 818 819 /* 820 * Check for cross-device rename. 821 * If it is, we don't want to set dirops, just error out. 822 * (In particular note that MARK_VNODE(tdvp) will DTWT on 823 * a cross-device rename.) 824 * 825 * Copied from ufs_rename. 826 */ 827 if ((fvp->v_mount != tdvp->v_mount) || 828 (tvp && (fvp->v_mount != tvp->v_mount))) { 829 error = EXDEV; 830 goto errout; 831 } 832 833 /* 834 * Check to make sure we're not renaming a vnode onto itself 835 * (deleting a hard link by renaming one name onto another); 836 * if we are we can't recursively call VOP_REMOVE since that 837 * would leave us with an unaccounted-for number of live dirops. 838 * 839 * Inline the relevant section of ufs_rename here, *before* 840 * calling SET_DIROP_REMOVE. 841 */ 842 if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || 843 (VTOI(tdvp)->i_flags & APPEND))) { 844 error = EPERM; 845 goto errout; 846 } 847 if (fvp == tvp) { 848 if (fvp->v_type == VDIR) { 849 error = EINVAL; 850 goto errout; 851 } 852 853 /* Release destination completely. */ 854 VOP_ABORTOP(tdvp, tcnp); 855 vput(tdvp); 856 vput(tvp); 857 858 /* Delete source. */ 859 vrele(fvp); 860 fcnp->cn_flags &= ~(MODMASK | SAVESTART); 861 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 862 fcnp->cn_nameiop = DELETE; 863 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); 864 if ((error = relookup(fdvp, &fvp, fcnp))) { 865 vput(fdvp); 866 return (error); 867 } 868 return (VOP_REMOVE(fdvp, fvp, fcnp)); 869 } 870 871 if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0) 872 goto errout; 873 MARK_VNODE(fdvp); 874 MARK_VNODE(fvp); 875 876 error = ufs_rename(ap); 877 UNMARK_VNODE(fdvp); 878 UNMARK_VNODE(fvp); 879 SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename"); 880 return (error); 881 882 errout: 883 VOP_ABORTOP(tdvp, ap->a_tcnp); /* XXX, why not in NFS? */ 884 if (tdvp == tvp) 885 vrele(tdvp); 886 else 887 vput(tdvp); 888 if (tvp) 889 vput(tvp); 890 VOP_ABORTOP(fdvp, ap->a_fcnp); /* XXX, why not in NFS? */ 891 vrele(fdvp); 892 vrele(fvp); 893 return (error); 894 } 895 896 /* XXX hack to avoid calling ITIMES in getattr */ 897 int 898 lfs_getattr(void *v) 899 { 900 struct vop_getattr_args /* { 901 struct vnode *a_vp; 902 struct vattr *a_vap; 903 kauth_cred_t a_cred; 904 } */ *ap = v; 905 struct vnode *vp = ap->a_vp; 906 struct inode *ip = VTOI(vp); 907 struct vattr *vap = ap->a_vap; 908 struct lfs *fs = ip->i_lfs; 909 /* 910 * Copy from inode table 911 */ 912 vap->va_fsid = ip->i_dev; 913 vap->va_fileid = ip->i_number; 914 vap->va_mode = ip->i_mode & ~IFMT; 915 vap->va_nlink = ip->i_nlink; 916 vap->va_uid = ip->i_uid; 917 vap->va_gid = ip->i_gid; 918 vap->va_rdev = (dev_t)ip->i_ffs1_rdev; 919 vap->va_size = vp->v_size; 920 vap->va_atime.tv_sec = ip->i_ffs1_atime; 921 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 922 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 923 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 924 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 925 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 926 vap->va_flags = ip->i_flags; 927 vap->va_gen = ip->i_gen; 928 /* this doesn't belong here */ 929 if (vp->v_type == VBLK) 930 vap->va_blocksize = BLKDEV_IOSIZE; 931 else if (vp->v_type == VCHR) 932 vap->va_blocksize = MAXBSIZE; 933 else 934 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 935 vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); 936 vap->va_type = vp->v_type; 937 vap->va_filerev = ip->i_modrev; 938 return (0); 939 } 940 941 /* 942 * Check to make sure the inode blocks won't choke the buffer 943 * cache, then call ufs_setattr as usual. 944 */ 945 int 946 lfs_setattr(void *v) 947 { 948 struct vop_setattr_args /* { 949 struct vnode *a_vp; 950 struct vattr *a_vap; 951 kauth_cred_t a_cred; 952 } */ *ap = v; 953 struct vnode *vp = ap->a_vp; 954 955 lfs_check(vp, LFS_UNUSED_LBN, 0); 956 return ufs_setattr(v); 957 } 958 959 /* 960 * Release the block we hold on lfs_newseg wrapping. Called on file close, 961 * or explicitly from LFCNWRAPGO. Called with the interlock held. 962 */ 963 static int 964 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 965 { 966 if (fs->lfs_stoplwp != curlwp) 967 return EBUSY; 968 969 fs->lfs_stoplwp = NULL; 970 cv_signal(&fs->lfs_stopcv); 971 972 KASSERT(fs->lfs_nowrap > 0); 973 if (fs->lfs_nowrap <= 0) { 974 return 0; 975 } 976 977 if (--fs->lfs_nowrap == 0) { 978 log(LOG_NOTICE, "%s: re-enabled log wrap\n", fs->lfs_fsmnt); 979 wakeup(&fs->lfs_wrappass); 980 lfs_wakeup_cleaner(fs); 981 } 982 if (waitfor) { 983 mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment", 984 0, &lfs_lock); 985 } 986 987 return 0; 988 } 989 990 /* 991 * Close called 992 */ 993 /* ARGSUSED */ 994 int 995 lfs_close(void *v) 996 { 997 struct vop_close_args /* { 998 struct vnode *a_vp; 999 int a_fflag; 1000 kauth_cred_t a_cred; 1001 } */ *ap = v; 1002 struct vnode *vp = ap->a_vp; 1003 struct inode *ip = VTOI(vp); 1004 struct lfs *fs = ip->i_lfs; 1005 1006 if ((ip->i_number == ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1007 fs->lfs_stoplwp == curlwp) { 1008 mutex_enter(&lfs_lock); 1009 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1010 lfs_wrapgo(fs, ip, 0); 1011 mutex_exit(&lfs_lock); 1012 } 1013 1014 if (vp == ip->i_lfs->lfs_ivnode && 1015 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1016 return 0; 1017 1018 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 1019 LFS_ITIMES(ip, NULL, NULL, NULL); 1020 } 1021 return (0); 1022 } 1023 1024 /* 1025 * Close wrapper for special devices. 1026 * 1027 * Update the times on the inode then do device close. 1028 */ 1029 int 1030 lfsspec_close(void *v) 1031 { 1032 struct vop_close_args /* { 1033 struct vnode *a_vp; 1034 int a_fflag; 1035 kauth_cred_t a_cred; 1036 } */ *ap = v; 1037 struct vnode *vp; 1038 struct inode *ip; 1039 1040 vp = ap->a_vp; 1041 ip = VTOI(vp); 1042 if (vp->v_usecount > 1) { 1043 LFS_ITIMES(ip, NULL, NULL, NULL); 1044 } 1045 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1046 } 1047 1048 /* 1049 * Close wrapper for fifo's. 1050 * 1051 * Update the times on the inode then do device close. 1052 */ 1053 int 1054 lfsfifo_close(void *v) 1055 { 1056 struct vop_close_args /* { 1057 struct vnode *a_vp; 1058 int a_fflag; 1059 kauth_cred_ a_cred; 1060 } */ *ap = v; 1061 struct vnode *vp; 1062 struct inode *ip; 1063 1064 vp = ap->a_vp; 1065 ip = VTOI(vp); 1066 if (ap->a_vp->v_usecount > 1) { 1067 LFS_ITIMES(ip, NULL, NULL, NULL); 1068 } 1069 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1070 } 1071 1072 /* 1073 * Reclaim an inode so that it can be used for other purposes. 1074 */ 1075 1076 int 1077 lfs_reclaim(void *v) 1078 { 1079 struct vop_reclaim_args /* { 1080 struct vnode *a_vp; 1081 } */ *ap = v; 1082 struct vnode *vp = ap->a_vp; 1083 struct inode *ip = VTOI(vp); 1084 struct lfs *fs = ip->i_lfs; 1085 int error; 1086 1087 KASSERT(ip->i_nlink == ip->i_ffs_effnlink); 1088 1089 mutex_enter(&lfs_lock); 1090 LFS_CLR_UINO(ip, IN_ALLMOD); 1091 mutex_exit(&lfs_lock); 1092 if ((error = ufs_reclaim(vp))) 1093 return (error); 1094 1095 /* 1096 * Take us off the paging and/or dirop queues if we were on them. 1097 * We shouldn't be on them. 1098 */ 1099 mutex_enter(&lfs_lock); 1100 if (ip->i_flags & IN_PAGING) { 1101 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1102 fs->lfs_fsmnt); 1103 ip->i_flags &= ~IN_PAGING; 1104 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1105 } 1106 if (vp->v_uflag & VU_DIROP) { 1107 panic("reclaimed vnode is VU_DIROP"); 1108 vp->v_uflag &= ~VU_DIROP; 1109 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1110 } 1111 mutex_exit(&lfs_lock); 1112 1113 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); 1114 lfs_deregister_all(vp); 1115 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1116 ip->inode_ext.lfs = NULL; 1117 genfs_node_destroy(vp); 1118 pool_put(&lfs_inode_pool, vp->v_data); 1119 vp->v_data = NULL; 1120 return (0); 1121 } 1122 1123 /* 1124 * Read a block from a storage device. 1125 * In order to avoid reading blocks that are in the process of being 1126 * written by the cleaner---and hence are not mutexed by the normal 1127 * buffer cache / page cache mechanisms---check for collisions before 1128 * reading. 1129 * 1130 * We inline ufs_strategy to make sure that the VOP_BMAP occurs *before* 1131 * the active cleaner test. 1132 * 1133 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1134 */ 1135 int 1136 lfs_strategy(void *v) 1137 { 1138 struct vop_strategy_args /* { 1139 struct vnode *a_vp; 1140 struct buf *a_bp; 1141 } */ *ap = v; 1142 struct buf *bp; 1143 struct lfs *fs; 1144 struct vnode *vp; 1145 struct inode *ip; 1146 daddr_t tbn; 1147 int i, sn, error, slept; 1148 1149 bp = ap->a_bp; 1150 vp = ap->a_vp; 1151 ip = VTOI(vp); 1152 fs = ip->i_lfs; 1153 1154 /* lfs uses its strategy routine only for read */ 1155 KASSERT(bp->b_flags & B_READ); 1156 1157 if (vp->v_type == VBLK || vp->v_type == VCHR) 1158 panic("lfs_strategy: spec"); 1159 KASSERT(bp->b_bcount != 0); 1160 if (bp->b_blkno == bp->b_lblkno) { 1161 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1162 NULL); 1163 if (error) { 1164 bp->b_error = error; 1165 bp->b_resid = bp->b_bcount; 1166 biodone(bp); 1167 return (error); 1168 } 1169 if ((long)bp->b_blkno == -1) /* no valid data */ 1170 clrbuf(bp); 1171 } 1172 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1173 bp->b_resid = bp->b_bcount; 1174 biodone(bp); 1175 return (0); 1176 } 1177 1178 slept = 1; 1179 mutex_enter(&lfs_lock); 1180 while (slept && fs->lfs_seglock) { 1181 mutex_exit(&lfs_lock); 1182 /* 1183 * Look through list of intervals. 1184 * There will only be intervals to look through 1185 * if the cleaner holds the seglock. 1186 * Since the cleaner is synchronous, we can trust 1187 * the list of intervals to be current. 1188 */ 1189 tbn = dbtofsb(fs, bp->b_blkno); 1190 sn = dtosn(fs, tbn); 1191 slept = 0; 1192 for (i = 0; i < fs->lfs_cleanind; i++) { 1193 if (sn == dtosn(fs, fs->lfs_cleanint[i]) && 1194 tbn >= fs->lfs_cleanint[i]) { 1195 DLOG((DLOG_CLEAN, 1196 "lfs_strategy: ino %d lbn %" PRId64 1197 " ind %d sn %d fsb %" PRIx32 1198 " given sn %d fsb %" PRIx64 "\n", 1199 ip->i_number, bp->b_lblkno, i, 1200 dtosn(fs, fs->lfs_cleanint[i]), 1201 fs->lfs_cleanint[i], sn, tbn)); 1202 DLOG((DLOG_CLEAN, 1203 "lfs_strategy: sleeping on ino %d lbn %" 1204 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1205 mutex_enter(&lfs_lock); 1206 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1207 /* Cleaner can't wait for itself */ 1208 mtsleep(&fs->lfs_iocount, 1209 (PRIBIO + 1) | PNORELOCK, 1210 "clean2", 0, 1211 &lfs_lock); 1212 slept = 1; 1213 break; 1214 } else if (fs->lfs_seglock) { 1215 mtsleep(&fs->lfs_seglock, 1216 (PRIBIO + 1) | PNORELOCK, 1217 "clean1", 0, 1218 &lfs_lock); 1219 slept = 1; 1220 break; 1221 } 1222 mutex_exit(&lfs_lock); 1223 } 1224 } 1225 mutex_enter(&lfs_lock); 1226 } 1227 mutex_exit(&lfs_lock); 1228 1229 vp = ip->i_devvp; 1230 VOP_STRATEGY(vp, bp); 1231 return (0); 1232 } 1233 1234 void 1235 lfs_flush_dirops(struct lfs *fs) 1236 { 1237 struct inode *ip, *nip; 1238 struct vnode *vp; 1239 extern int lfs_dostats; 1240 struct segment *sp; 1241 int waslocked; 1242 1243 ASSERT_MAYBE_SEGLOCK(fs); 1244 KASSERT(fs->lfs_nadirop == 0); 1245 1246 if (fs->lfs_ronly) 1247 return; 1248 1249 mutex_enter(&lfs_lock); 1250 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1251 mutex_exit(&lfs_lock); 1252 return; 1253 } else 1254 mutex_exit(&lfs_lock); 1255 1256 if (lfs_dostats) 1257 ++lfs_stats.flush_invoked; 1258 1259 /* 1260 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1261 * Technically this is a checkpoint (the on-disk state is valid) 1262 * even though we are leaving out all the file data. 1263 */ 1264 lfs_imtime(fs); 1265 lfs_seglock(fs, SEGM_CKP); 1266 sp = fs->lfs_sp; 1267 1268 /* 1269 * lfs_writevnodes, optimized to get dirops out of the way. 1270 * Only write dirops, and don't flush files' pages, only 1271 * blocks from the directories. 1272 * 1273 * We don't need to vref these files because they are 1274 * dirops and so hold an extra reference until the 1275 * segunlock clears them of that status. 1276 * 1277 * We don't need to check for IN_ADIROP because we know that 1278 * no dirops are active. 1279 * 1280 */ 1281 mutex_enter(&lfs_lock); 1282 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1283 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1284 mutex_exit(&lfs_lock); 1285 vp = ITOV(ip); 1286 1287 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1288 1289 /* 1290 * All writes to directories come from dirops; all 1291 * writes to files' direct blocks go through the page 1292 * cache, which we're not touching. Reads to files 1293 * and/or directories will not be affected by writing 1294 * directory blocks inodes and file inodes. So we don't 1295 * really need to lock. If we don't lock, though, 1296 * make sure that we don't clear IN_MODIFIED 1297 * unnecessarily. 1298 */ 1299 if (vp->v_iflag & VI_XLOCK) { 1300 mutex_enter(&lfs_lock); 1301 continue; 1302 } 1303 waslocked = VOP_ISLOCKED(vp); 1304 if (vp->v_type != VREG && 1305 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1306 lfs_writefile(fs, sp, vp); 1307 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1308 !(ip->i_flag & IN_ALLMOD)) { 1309 mutex_enter(&lfs_lock); 1310 LFS_SET_UINO(ip, IN_MODIFIED); 1311 mutex_exit(&lfs_lock); 1312 } 1313 } 1314 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1315 (void) lfs_writeinode(fs, sp, ip); 1316 mutex_enter(&lfs_lock); 1317 if (waslocked == LK_EXCLOTHER) 1318 LFS_SET_UINO(ip, IN_MODIFIED); 1319 } 1320 mutex_exit(&lfs_lock); 1321 /* We've written all the dirops there are */ 1322 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 1323 lfs_finalize_fs_seguse(fs); 1324 (void) lfs_writeseg(fs, sp); 1325 lfs_segunlock(fs); 1326 } 1327 1328 /* 1329 * Flush all vnodes for which the pagedaemon has requested pageouts. 1330 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1331 * has just run, this would be an error). If we have to skip a vnode 1332 * for any reason, just skip it; if we have to wait for the cleaner, 1333 * abort. The writer daemon will call us again later. 1334 */ 1335 void 1336 lfs_flush_pchain(struct lfs *fs) 1337 { 1338 struct inode *ip, *nip; 1339 struct vnode *vp; 1340 extern int lfs_dostats; 1341 struct segment *sp; 1342 int error; 1343 1344 ASSERT_NO_SEGLOCK(fs); 1345 1346 if (fs->lfs_ronly) 1347 return; 1348 1349 mutex_enter(&lfs_lock); 1350 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1351 mutex_exit(&lfs_lock); 1352 return; 1353 } else 1354 mutex_exit(&lfs_lock); 1355 1356 /* Get dirops out of the way */ 1357 lfs_flush_dirops(fs); 1358 1359 if (lfs_dostats) 1360 ++lfs_stats.flush_invoked; 1361 1362 /* 1363 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1364 */ 1365 lfs_imtime(fs); 1366 lfs_seglock(fs, 0); 1367 sp = fs->lfs_sp; 1368 1369 /* 1370 * lfs_writevnodes, optimized to clear pageout requests. 1371 * Only write non-dirop files that are in the pageout queue. 1372 * We're very conservative about what we write; we want to be 1373 * fast and async. 1374 */ 1375 mutex_enter(&lfs_lock); 1376 top: 1377 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1378 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1379 vp = ITOV(ip); 1380 1381 if (!(ip->i_flags & IN_PAGING)) 1382 goto top; 1383 1384 mutex_enter(&vp->v_interlock); 1385 if ((vp->v_iflag & VI_XLOCK) || (vp->v_uflag & VU_DIROP) != 0) { 1386 mutex_exit(&vp->v_interlock); 1387 continue; 1388 } 1389 if (vp->v_type != VREG) { 1390 mutex_exit(&vp->v_interlock); 1391 continue; 1392 } 1393 if (lfs_vref(vp)) 1394 continue; 1395 mutex_exit(&lfs_lock); 1396 1397 if (VOP_ISLOCKED(vp)) { 1398 lfs_vunref(vp); 1399 mutex_enter(&lfs_lock); 1400 continue; 1401 } 1402 1403 error = lfs_writefile(fs, sp, vp); 1404 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1405 !(ip->i_flag & IN_ALLMOD)) { 1406 mutex_enter(&lfs_lock); 1407 LFS_SET_UINO(ip, IN_MODIFIED); 1408 mutex_exit(&lfs_lock); 1409 } 1410 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1411 (void) lfs_writeinode(fs, sp, ip); 1412 1413 lfs_vunref(vp); 1414 1415 if (error == EAGAIN) { 1416 lfs_writeseg(fs, sp); 1417 mutex_enter(&lfs_lock); 1418 break; 1419 } 1420 mutex_enter(&lfs_lock); 1421 } 1422 mutex_exit(&lfs_lock); 1423 (void) lfs_writeseg(fs, sp); 1424 lfs_segunlock(fs); 1425 } 1426 1427 /* 1428 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1429 */ 1430 int 1431 lfs_fcntl(void *v) 1432 { 1433 struct vop_fcntl_args /* { 1434 struct vnode *a_vp; 1435 u_long a_command; 1436 void * a_data; 1437 int a_fflag; 1438 kauth_cred_t a_cred; 1439 } */ *ap = v; 1440 struct timeval *tvp; 1441 BLOCK_INFO *blkiov; 1442 CLEANERINFO *cip; 1443 SEGUSE *sup; 1444 int blkcnt, error, oclean; 1445 size_t fh_size; 1446 struct lfs_fcntl_markv blkvp; 1447 struct lwp *l; 1448 fsid_t *fsidp; 1449 struct lfs *fs; 1450 struct buf *bp; 1451 fhandle_t *fhp; 1452 daddr_t off; 1453 1454 /* Only respect LFS fcntls on fs root or Ifile */ 1455 if (VTOI(ap->a_vp)->i_number != ROOTINO && 1456 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1457 return ufs_fcntl(v); 1458 } 1459 1460 /* Avoid locking a draining lock */ 1461 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1462 return ESHUTDOWN; 1463 } 1464 1465 /* LFS control and monitoring fcntls are available only to root */ 1466 l = curlwp; 1467 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1468 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 1469 NULL)) != 0) 1470 return (error); 1471 1472 fs = VTOI(ap->a_vp)->i_lfs; 1473 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1474 1475 error = 0; 1476 switch (ap->a_command) { 1477 case LFCNSEGWAITALL: 1478 case LFCNSEGWAITALL_COMPAT: 1479 fsidp = NULL; 1480 /* FALLSTHROUGH */ 1481 case LFCNSEGWAIT: 1482 case LFCNSEGWAIT_COMPAT: 1483 tvp = (struct timeval *)ap->a_data; 1484 mutex_enter(&lfs_lock); 1485 ++fs->lfs_sleepers; 1486 mutex_exit(&lfs_lock); 1487 1488 error = lfs_segwait(fsidp, tvp); 1489 1490 mutex_enter(&lfs_lock); 1491 if (--fs->lfs_sleepers == 0) 1492 wakeup(&fs->lfs_sleepers); 1493 mutex_exit(&lfs_lock); 1494 return error; 1495 1496 case LFCNBMAPV: 1497 case LFCNMARKV: 1498 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1499 1500 blkcnt = blkvp.blkcnt; 1501 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1502 return (EINVAL); 1503 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1504 if ((error = copyin(blkvp.blkiov, blkiov, 1505 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1506 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1507 return error; 1508 } 1509 1510 mutex_enter(&lfs_lock); 1511 ++fs->lfs_sleepers; 1512 mutex_exit(&lfs_lock); 1513 if (ap->a_command == LFCNBMAPV) 1514 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); 1515 else /* LFCNMARKV */ 1516 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); 1517 if (error == 0) 1518 error = copyout(blkiov, blkvp.blkiov, 1519 blkcnt * sizeof(BLOCK_INFO)); 1520 mutex_enter(&lfs_lock); 1521 if (--fs->lfs_sleepers == 0) 1522 wakeup(&fs->lfs_sleepers); 1523 mutex_exit(&lfs_lock); 1524 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1525 return error; 1526 1527 case LFCNRECLAIM: 1528 /* 1529 * Flush dirops and write Ifile, allowing empty segments 1530 * to be immediately reclaimed. 1531 */ 1532 lfs_writer_enter(fs, "pndirop"); 1533 off = fs->lfs_offset; 1534 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1535 lfs_flush_dirops(fs); 1536 LFS_CLEANERINFO(cip, fs, bp); 1537 oclean = cip->clean; 1538 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1539 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1540 fs->lfs_sp->seg_flags |= SEGM_PROT; 1541 lfs_segunlock(fs); 1542 lfs_writer_leave(fs); 1543 1544 #ifdef DEBUG 1545 LFS_CLEANERINFO(cip, fs, bp); 1546 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1547 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1548 fs->lfs_offset - off, cip->clean - oclean, 1549 fs->lfs_activesb)); 1550 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1551 #endif 1552 1553 return 0; 1554 1555 #ifdef COMPAT_30 1556 case LFCNIFILEFH_COMPAT: 1557 /* Return the filehandle of the Ifile */ 1558 if ((error = kauth_authorize_generic(l->l_cred, 1559 KAUTH_GENERIC_ISSUSER, NULL)) != 0) 1560 return (error); 1561 fhp = (struct fhandle *)ap->a_data; 1562 fhp->fh_fsid = *fsidp; 1563 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1564 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1565 #endif 1566 1567 case LFCNIFILEFH_COMPAT2: 1568 case LFCNIFILEFH: 1569 /* Return the filehandle of the Ifile */ 1570 fhp = (struct fhandle *)ap->a_data; 1571 fhp->fh_fsid = *fsidp; 1572 fh_size = sizeof(struct lfs_fhandle) - 1573 offsetof(fhandle_t, fh_fid); 1574 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1575 1576 case LFCNREWIND: 1577 /* Move lfs_offset to the lowest-numbered segment */ 1578 return lfs_rewind(fs, *(int *)ap->a_data); 1579 1580 case LFCNINVAL: 1581 /* Mark a segment SEGUSE_INVAL */ 1582 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1583 if (sup->su_nbytes > 0) { 1584 brelse(bp, 0); 1585 lfs_unset_inval_all(fs); 1586 return EBUSY; 1587 } 1588 sup->su_flags |= SEGUSE_INVAL; 1589 VOP_BWRITE(bp); 1590 return 0; 1591 1592 case LFCNRESIZE: 1593 /* Resize the filesystem */ 1594 return lfs_resize_fs(fs, *(int *)ap->a_data); 1595 1596 case LFCNWRAPSTOP: 1597 case LFCNWRAPSTOP_COMPAT: 1598 /* 1599 * Hold lfs_newseg at segment 0; if requested, sleep until 1600 * the filesystem wraps around. To support external agents 1601 * (dump, fsck-based regression test) that need to look at 1602 * a snapshot of the filesystem, without necessarily 1603 * requiring that all fs activity stops. 1604 */ 1605 if (fs->lfs_stoplwp == curlwp) 1606 return EALREADY; 1607 1608 mutex_enter(&lfs_lock); 1609 while (fs->lfs_stoplwp != NULL) 1610 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1611 fs->lfs_stoplwp = curlwp; 1612 if (fs->lfs_nowrap == 0) 1613 log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); 1614 ++fs->lfs_nowrap; 1615 if (*(int *)ap->a_data == 1 || 1616 ap->a_command == LFCNWRAPSTOP_COMPAT) { 1617 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 1618 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1619 "segwrap", 0, &lfs_lock); 1620 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 1621 if (error) { 1622 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 1623 } 1624 } 1625 mutex_exit(&lfs_lock); 1626 return 0; 1627 1628 case LFCNWRAPGO: 1629 case LFCNWRAPGO_COMPAT: 1630 /* 1631 * Having done its work, the agent wakes up the writer. 1632 * If the argument is 1, it sleeps until a new segment 1633 * is selected. 1634 */ 1635 mutex_enter(&lfs_lock); 1636 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 1637 (ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 1638 *((int *)ap->a_data))); 1639 mutex_exit(&lfs_lock); 1640 return error; 1641 1642 case LFCNWRAPPASS: 1643 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 1644 return EALREADY; 1645 mutex_enter(&lfs_lock); 1646 if (fs->lfs_stoplwp != curlwp) { 1647 mutex_exit(&lfs_lock); 1648 return EALREADY; 1649 } 1650 if (fs->lfs_nowrap == 0) { 1651 mutex_exit(&lfs_lock); 1652 return EBUSY; 1653 } 1654 fs->lfs_wrappass = 1; 1655 wakeup(&fs->lfs_wrappass); 1656 /* Wait for the log to wrap, if asked */ 1657 if (*(int *)ap->a_data) { 1658 mutex_enter(&ap->a_vp->v_interlock); 1659 lfs_vref(ap->a_vp); 1660 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 1661 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 1662 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1663 "segwrap", 0, &lfs_lock); 1664 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 1665 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 1666 lfs_vunref(ap->a_vp); 1667 } 1668 mutex_exit(&lfs_lock); 1669 return error; 1670 1671 case LFCNWRAPSTATUS: 1672 mutex_enter(&lfs_lock); 1673 *(int *)ap->a_data = fs->lfs_wrapstatus; 1674 mutex_exit(&lfs_lock); 1675 return 0; 1676 1677 default: 1678 return ufs_fcntl(v); 1679 } 1680 return 0; 1681 } 1682 1683 int 1684 lfs_getpages(void *v) 1685 { 1686 struct vop_getpages_args /* { 1687 struct vnode *a_vp; 1688 voff_t a_offset; 1689 struct vm_page **a_m; 1690 int *a_count; 1691 int a_centeridx; 1692 vm_prot_t a_access_type; 1693 int a_advice; 1694 int a_flags; 1695 } */ *ap = v; 1696 1697 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 1698 (ap->a_access_type & VM_PROT_WRITE) != 0) { 1699 return EPERM; 1700 } 1701 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 1702 mutex_enter(&lfs_lock); 1703 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 1704 mutex_exit(&lfs_lock); 1705 } 1706 1707 /* 1708 * we're relying on the fact that genfs_getpages() always read in 1709 * entire filesystem blocks. 1710 */ 1711 return genfs_getpages(v); 1712 } 1713 1714 /* 1715 * Wait for a page to become unbusy, possibly printing diagnostic messages 1716 * as well. 1717 * 1718 * Called with vp->v_interlock held; return with it held. 1719 */ 1720 static void 1721 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 1722 { 1723 if ((pg->flags & PG_BUSY) == 0) 1724 return; /* Nothing to wait for! */ 1725 1726 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 1727 static struct vm_page *lastpg; 1728 1729 if (label != NULL && pg != lastpg) { 1730 if (pg->owner_tag) { 1731 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 1732 curproc->p_pid, curlwp->l_lid, label, 1733 pg, pg->owner, pg->lowner, pg->owner_tag); 1734 } else { 1735 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 1736 curproc->p_pid, curlwp->l_lid, label, pg); 1737 } 1738 } 1739 lastpg = pg; 1740 #endif 1741 1742 pg->flags |= PG_WANTED; 1743 UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, "lfsput", 0); 1744 mutex_enter(&vp->v_interlock); 1745 } 1746 1747 /* 1748 * This routine is called by lfs_putpages() when it can't complete the 1749 * write because a page is busy. This means that either (1) someone, 1750 * possibly the pagedaemon, is looking at this page, and will give it up 1751 * presently; or (2) we ourselves are holding the page busy in the 1752 * process of being written (either gathered or actually on its way to 1753 * disk). We don't need to give up the segment lock, but we might need 1754 * to call lfs_writeseg() to expedite the page's journey to disk. 1755 * 1756 * Called with vp->v_interlock held; return with it held. 1757 */ 1758 /* #define BUSYWAIT */ 1759 static void 1760 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 1761 int seglocked, const char *label) 1762 { 1763 #ifndef BUSYWAIT 1764 struct inode *ip = VTOI(vp); 1765 struct segment *sp = fs->lfs_sp; 1766 int count = 0; 1767 1768 if (pg == NULL) 1769 return; 1770 1771 while (pg->flags & PG_BUSY) { 1772 mutex_exit(&vp->v_interlock); 1773 if (sp->cbpp - sp->bpp > 1) { 1774 /* Write gathered pages */ 1775 lfs_updatemeta(sp); 1776 lfs_release_finfo(fs); 1777 (void) lfs_writeseg(fs, sp); 1778 1779 /* 1780 * Reinitialize FIP 1781 */ 1782 KASSERT(sp->vp == vp); 1783 lfs_acquire_finfo(fs, ip->i_number, 1784 ip->i_gen); 1785 } 1786 ++count; 1787 mutex_enter(&vp->v_interlock); 1788 wait_for_page(vp, pg, label); 1789 } 1790 if (label != NULL && count > 1) 1791 printf("lfs_putpages[%d]: %s: %sn = %d\n", curproc->p_pid, 1792 label, (count > 0 ? "looping, " : ""), count); 1793 #else 1794 preempt(1); 1795 #endif 1796 } 1797 1798 /* 1799 * Make sure that for all pages in every block in the given range, 1800 * either all are dirty or all are clean. If any of the pages 1801 * we've seen so far are dirty, put the vnode on the paging chain, 1802 * and mark it IN_PAGING. 1803 * 1804 * If checkfirst != 0, don't check all the pages but return at the 1805 * first dirty page. 1806 */ 1807 static int 1808 check_dirty(struct lfs *fs, struct vnode *vp, 1809 off_t startoffset, off_t endoffset, off_t blkeof, 1810 int flags, int checkfirst, struct vm_page **pgp) 1811 { 1812 int by_list; 1813 struct vm_page *curpg = NULL; /* XXX: gcc */ 1814 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 1815 off_t soff = 0; /* XXX: gcc */ 1816 voff_t off; 1817 int i; 1818 int nonexistent; 1819 int any_dirty; /* number of dirty pages */ 1820 int dirty; /* number of dirty pages in a block */ 1821 int tdirty; 1822 int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; 1823 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 1824 1825 ASSERT_MAYBE_SEGLOCK(fs); 1826 top: 1827 by_list = (vp->v_uobj.uo_npages <= 1828 ((endoffset - startoffset) >> PAGE_SHIFT) * 1829 UVM_PAGE_HASH_PENALTY); 1830 any_dirty = 0; 1831 1832 if (by_list) { 1833 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 1834 } else { 1835 soff = startoffset; 1836 } 1837 while (by_list || soff < MIN(blkeof, endoffset)) { 1838 if (by_list) { 1839 /* 1840 * Find the first page in a block. Skip 1841 * blocks outside our area of interest or beyond 1842 * the end of file. 1843 */ 1844 if (pages_per_block > 1) { 1845 while (curpg && 1846 ((curpg->offset & fs->lfs_bmask) || 1847 curpg->offset >= vp->v_size || 1848 curpg->offset >= endoffset)) 1849 curpg = TAILQ_NEXT(curpg, listq); 1850 } 1851 if (curpg == NULL) 1852 break; 1853 soff = curpg->offset; 1854 } 1855 1856 /* 1857 * Mark all pages in extended range busy; find out if any 1858 * of them are dirty. 1859 */ 1860 nonexistent = dirty = 0; 1861 for (i = 0; i == 0 || i < pages_per_block; i++) { 1862 if (by_list && pages_per_block <= 1) { 1863 pgs[i] = pg = curpg; 1864 } else { 1865 off = soff + (i << PAGE_SHIFT); 1866 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 1867 if (pg == NULL) { 1868 ++nonexistent; 1869 continue; 1870 } 1871 } 1872 KASSERT(pg != NULL); 1873 1874 /* 1875 * If we're holding the segment lock, we can deadlock 1876 * against a process that has our page and is waiting 1877 * for the cleaner, while the cleaner waits for the 1878 * segment lock. Just bail in that case. 1879 */ 1880 if ((pg->flags & PG_BUSY) && 1881 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 1882 if (i > 0) 1883 uvm_page_unbusy(pgs, i); 1884 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 1885 if (pgp) 1886 *pgp = pg; 1887 return -1; 1888 } 1889 1890 while (pg->flags & PG_BUSY) { 1891 wait_for_page(vp, pg, NULL); 1892 if (i > 0) 1893 uvm_page_unbusy(pgs, i); 1894 goto top; 1895 } 1896 pg->flags |= PG_BUSY; 1897 UVM_PAGE_OWN(pg, "lfs_putpages"); 1898 1899 pmap_page_protect(pg, VM_PROT_NONE); 1900 tdirty = (pmap_clear_modify(pg) || 1901 (pg->flags & PG_CLEAN) == 0); 1902 dirty += tdirty; 1903 } 1904 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 1905 if (by_list) { 1906 curpg = TAILQ_NEXT(curpg, listq); 1907 } else { 1908 soff += fs->lfs_bsize; 1909 } 1910 continue; 1911 } 1912 1913 any_dirty += dirty; 1914 KASSERT(nonexistent == 0); 1915 1916 /* 1917 * If any are dirty make all dirty; unbusy them, 1918 * but if we were asked to clean, wire them so that 1919 * the pagedaemon doesn't bother us about them while 1920 * they're on their way to disk. 1921 */ 1922 for (i = 0; i == 0 || i < pages_per_block; i++) { 1923 pg = pgs[i]; 1924 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 1925 if (dirty) { 1926 pg->flags &= ~PG_CLEAN; 1927 if (flags & PGO_FREE) { 1928 /* 1929 * Wire the page so that 1930 * pdaemon doesn't see it again. 1931 */ 1932 mutex_enter(&uvm_pageqlock); 1933 uvm_pagewire(pg); 1934 mutex_exit(&uvm_pageqlock); 1935 1936 /* Suspended write flag */ 1937 pg->flags |= PG_DELWRI; 1938 } 1939 } 1940 if (pg->flags & PG_WANTED) 1941 wakeup(pg); 1942 pg->flags &= ~(PG_WANTED|PG_BUSY); 1943 UVM_PAGE_OWN(pg, NULL); 1944 } 1945 1946 if (checkfirst && any_dirty) 1947 break; 1948 1949 if (by_list) { 1950 curpg = TAILQ_NEXT(curpg, listq); 1951 } else { 1952 soff += MAX(PAGE_SIZE, fs->lfs_bsize); 1953 } 1954 } 1955 1956 return any_dirty; 1957 } 1958 1959 /* 1960 * lfs_putpages functions like genfs_putpages except that 1961 * 1962 * (1) It needs to bounds-check the incoming requests to ensure that 1963 * they are block-aligned; if they are not, expand the range and 1964 * do the right thing in case, e.g., the requested range is clean 1965 * but the expanded range is dirty. 1966 * 1967 * (2) It needs to explicitly send blocks to be written when it is done. 1968 * If VOP_PUTPAGES is called without the seglock held, we simply take 1969 * the seglock and let lfs_segunlock wait for us. 1970 * XXX There might be a bad situation if we have to flush a vnode while 1971 * XXX lfs_markv is in operation. As of this writing we panic in this 1972 * XXX case. 1973 * 1974 * Assumptions: 1975 * 1976 * (1) The caller does not hold any pages in this vnode busy. If it does, 1977 * there is a danger that when we expand the page range and busy the 1978 * pages we will deadlock. 1979 * 1980 * (2) We are called with vp->v_interlock held; we must return with it 1981 * released. 1982 * 1983 * (3) We don't absolutely have to free pages right away, provided that 1984 * the request does not have PGO_SYNCIO. When the pagedaemon gives 1985 * us a request with PGO_FREE, we take the pages out of the paging 1986 * queue and wake up the writer, which will handle freeing them for us. 1987 * 1988 * We ensure that for any filesystem block, all pages for that 1989 * block are either resident or not, even if those pages are higher 1990 * than EOF; that means that we will be getting requests to free 1991 * "unused" pages above EOF all the time, and should ignore them. 1992 * 1993 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 1994 * into has been set up for us by lfs_writefile. If not, we will 1995 * have to handle allocating and/or freeing an finfo entry. 1996 * 1997 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 1998 */ 1999 2000 /* How many times to loop before we should start to worry */ 2001 #define TOOMANY 4 2002 2003 int 2004 lfs_putpages(void *v) 2005 { 2006 int error; 2007 struct vop_putpages_args /* { 2008 struct vnode *a_vp; 2009 voff_t a_offlo; 2010 voff_t a_offhi; 2011 int a_flags; 2012 } */ *ap = v; 2013 struct vnode *vp; 2014 struct inode *ip; 2015 struct lfs *fs; 2016 struct segment *sp; 2017 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 2018 off_t off, max_endoffset; 2019 bool seglocked, sync, pagedaemon; 2020 struct vm_page *pg, *busypg; 2021 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 2022 #ifdef DEBUG 2023 int debug_n_again, debug_n_dirtyclean; 2024 #endif 2025 2026 vp = ap->a_vp; 2027 ip = VTOI(vp); 2028 fs = ip->i_lfs; 2029 sync = (ap->a_flags & PGO_SYNCIO) != 0; 2030 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 2031 2032 /* Putpages does nothing for metadata. */ 2033 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 2034 mutex_exit(&vp->v_interlock); 2035 return 0; 2036 } 2037 2038 /* 2039 * If there are no pages, don't do anything. 2040 */ 2041 if (vp->v_uobj.uo_npages == 0) { 2042 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 2043 (vp->v_iflag & VI_ONWORKLST) && 2044 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 2045 vp->v_iflag &= ~VI_WRMAPDIRTY; 2046 vn_syncer_remove_from_worklist(vp); 2047 } 2048 mutex_exit(&vp->v_interlock); 2049 2050 /* Remove us from paging queue, if we were on it */ 2051 mutex_enter(&lfs_lock); 2052 if (ip->i_flags & IN_PAGING) { 2053 ip->i_flags &= ~IN_PAGING; 2054 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2055 } 2056 mutex_exit(&lfs_lock); 2057 return 0; 2058 } 2059 2060 blkeof = blkroundup(fs, ip->i_size); 2061 2062 /* 2063 * Ignore requests to free pages past EOF but in the same block 2064 * as EOF, unless the request is synchronous. (If the request is 2065 * sync, it comes from lfs_truncate.) 2066 * XXXUBC Make these pages look "active" so the pagedaemon won't 2067 * XXXUBC bother us with them again. 2068 */ 2069 if (!sync && ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 2070 origoffset = ap->a_offlo; 2071 for (off = origoffset; off < blkeof; off += fs->lfs_bsize) { 2072 pg = uvm_pagelookup(&vp->v_uobj, off); 2073 KASSERT(pg != NULL); 2074 while (pg->flags & PG_BUSY) { 2075 pg->flags |= PG_WANTED; 2076 UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, 2077 "lfsput2", 0); 2078 mutex_enter(&vp->v_interlock); 2079 } 2080 mutex_enter(&uvm_pageqlock); 2081 uvm_pageactivate(pg); 2082 mutex_exit(&uvm_pageqlock); 2083 } 2084 ap->a_offlo = blkeof; 2085 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 2086 mutex_exit(&vp->v_interlock); 2087 return 0; 2088 } 2089 } 2090 2091 /* 2092 * Extend page range to start and end at block boundaries. 2093 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 2094 */ 2095 origoffset = ap->a_offlo; 2096 origendoffset = ap->a_offhi; 2097 startoffset = origoffset & ~(fs->lfs_bmask); 2098 max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) 2099 << fs->lfs_bshift; 2100 2101 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2102 endoffset = max_endoffset; 2103 origendoffset = endoffset; 2104 } else { 2105 origendoffset = round_page(ap->a_offhi); 2106 endoffset = round_page(blkroundup(fs, origendoffset)); 2107 } 2108 2109 KASSERT(startoffset > 0 || endoffset >= startoffset); 2110 if (startoffset == endoffset) { 2111 /* Nothing to do, why were we called? */ 2112 mutex_exit(&vp->v_interlock); 2113 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 2114 PRId64 "\n", startoffset)); 2115 return 0; 2116 } 2117 2118 ap->a_offlo = startoffset; 2119 ap->a_offhi = endoffset; 2120 2121 /* 2122 * If not cleaning, just send the pages through genfs_putpages 2123 * to be returned to the pool. 2124 */ 2125 if (!(ap->a_flags & PGO_CLEANIT)) 2126 return genfs_putpages(v); 2127 2128 /* Set PGO_BUSYFAIL to avoid deadlocks */ 2129 ap->a_flags |= PGO_BUSYFAIL; 2130 2131 /* 2132 * Likewise, if we are asked to clean but the pages are not 2133 * dirty, we can just free them using genfs_putpages. 2134 */ 2135 #ifdef DEBUG 2136 debug_n_dirtyclean = 0; 2137 #endif 2138 do { 2139 int r; 2140 2141 /* Count the number of dirty pages */ 2142 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 2143 ap->a_flags, 1, NULL); 2144 if (r < 0) { 2145 /* Pages are busy with another process */ 2146 mutex_exit(&vp->v_interlock); 2147 return EDEADLK; 2148 } 2149 if (r > 0) /* Some pages are dirty */ 2150 break; 2151 2152 /* 2153 * Sometimes pages are dirtied between the time that 2154 * we check and the time we try to clean them. 2155 * Instruct lfs_gop_write to return EDEADLK in this case 2156 * so we can write them properly. 2157 */ 2158 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 2159 r = genfs_do_putpages(vp, startoffset, endoffset, 2160 ap->a_flags, &busypg); 2161 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 2162 if (r != EDEADLK) 2163 return r; 2164 2165 /* One of the pages was busy. Start over. */ 2166 mutex_enter(&vp->v_interlock); 2167 wait_for_page(vp, busypg, "dirtyclean"); 2168 #ifdef DEBUG 2169 ++debug_n_dirtyclean; 2170 #endif 2171 } while(1); 2172 2173 #ifdef DEBUG 2174 if (debug_n_dirtyclean > TOOMANY) 2175 printf("lfs_putpages: dirtyclean: looping, n = %d\n", 2176 debug_n_dirtyclean); 2177 #endif 2178 2179 /* 2180 * Dirty and asked to clean. 2181 * 2182 * Pagedaemon can't actually write LFS pages; wake up 2183 * the writer to take care of that. The writer will 2184 * notice the pager inode queue and act on that. 2185 */ 2186 if (pagedaemon) { 2187 mutex_enter(&lfs_lock); 2188 if (!(ip->i_flags & IN_PAGING)) { 2189 ip->i_flags |= IN_PAGING; 2190 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2191 } 2192 wakeup(&lfs_writer_daemon); 2193 mutex_exit(&lfs_lock); 2194 mutex_exit(&vp->v_interlock); 2195 preempt(); 2196 return EWOULDBLOCK; 2197 } 2198 2199 /* 2200 * If this is a file created in a recent dirop, we can't flush its 2201 * inode until the dirop is complete. Drain dirops, then flush the 2202 * filesystem (taking care of any other pending dirops while we're 2203 * at it). 2204 */ 2205 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 2206 (vp->v_uflag & VU_DIROP)) { 2207 int locked; 2208 2209 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 2210 locked = (VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2211 mutex_exit(&vp->v_interlock); 2212 lfs_writer_enter(fs, "ppdirop"); 2213 if (locked) 2214 VOP_UNLOCK(vp, 0); /* XXX why? */ 2215 2216 mutex_enter(&lfs_lock); 2217 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 2218 mutex_exit(&lfs_lock); 2219 2220 mutex_enter(&vp->v_interlock); 2221 if (locked) { 2222 VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); 2223 mutex_enter(&vp->v_interlock); 2224 } 2225 lfs_writer_leave(fs); 2226 2227 /* XXX the flush should have taken care of this one too! */ 2228 } 2229 2230 /* 2231 * This is it. We are going to write some pages. From here on 2232 * down it's all just mechanics. 2233 * 2234 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 2235 */ 2236 ap->a_flags &= ~PGO_SYNCIO; 2237 2238 /* 2239 * If we've already got the seglock, flush the node and return. 2240 * The FIP has already been set up for us by lfs_writefile, 2241 * and FIP cleanup and lfs_updatemeta will also be done there, 2242 * unless genfs_putpages returns EDEADLK; then we must flush 2243 * what we have, and correct FIP and segment header accounting. 2244 */ 2245 get_seglock: 2246 /* 2247 * If we are not called with the segment locked, lock it. 2248 * Account for a new FIP in the segment header, and set sp->vp. 2249 * (This should duplicate the setup at the top of lfs_writefile().) 2250 */ 2251 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 2252 if (!seglocked) { 2253 mutex_exit(&vp->v_interlock); 2254 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 2255 if (error != 0) 2256 return error; 2257 mutex_enter(&vp->v_interlock); 2258 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 2259 } 2260 sp = fs->lfs_sp; 2261 KASSERT(sp->vp == NULL); 2262 sp->vp = vp; 2263 2264 /* 2265 * Ensure that the partial segment is marked SS_DIROP if this 2266 * vnode is a DIROP. 2267 */ 2268 if (!seglocked && vp->v_uflag & VU_DIROP) 2269 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 2270 2271 /* 2272 * Loop over genfs_putpages until all pages are gathered. 2273 * genfs_putpages() drops the interlock, so reacquire it if necessary. 2274 * Whenever we lose the interlock we have to rerun check_dirty, as 2275 * well, since more pages might have been dirtied in our absence. 2276 */ 2277 #ifdef DEBUG 2278 debug_n_again = 0; 2279 #endif 2280 do { 2281 busypg = NULL; 2282 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 2283 ap->a_flags, 0, &busypg) < 0) { 2284 mutex_exit(&vp->v_interlock); 2285 2286 mutex_enter(&vp->v_interlock); 2287 write_and_wait(fs, vp, busypg, seglocked, NULL); 2288 if (!seglocked) { 2289 lfs_release_finfo(fs); 2290 lfs_segunlock(fs); 2291 } 2292 sp->vp = NULL; 2293 goto get_seglock; 2294 } 2295 2296 busypg = NULL; 2297 error = genfs_do_putpages(vp, startoffset, endoffset, 2298 ap->a_flags, &busypg); 2299 2300 if (error == EDEADLK || error == EAGAIN) { 2301 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 2302 " %d ino %d off %x (seg %d)\n", error, 2303 ip->i_number, fs->lfs_offset, 2304 dtosn(fs, fs->lfs_offset))); 2305 2306 mutex_enter(&vp->v_interlock); 2307 write_and_wait(fs, vp, busypg, seglocked, "again"); 2308 } 2309 #ifdef DEBUG 2310 ++debug_n_again; 2311 #endif 2312 } while (error == EDEADLK); 2313 #ifdef DEBUG 2314 if (debug_n_again > TOOMANY) 2315 printf("lfs_putpages: again: looping, n = %d\n", debug_n_again); 2316 #endif 2317 2318 KASSERT(sp != NULL && sp->vp == vp); 2319 if (!seglocked) { 2320 sp->vp = NULL; 2321 2322 /* Write indirect blocks as well */ 2323 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 2324 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 2325 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 2326 2327 KASSERT(sp->vp == NULL); 2328 sp->vp = vp; 2329 } 2330 2331 /* 2332 * Blocks are now gathered into a segment waiting to be written. 2333 * All that's left to do is update metadata, and write them. 2334 */ 2335 lfs_updatemeta(sp); 2336 KASSERT(sp->vp == vp); 2337 sp->vp = NULL; 2338 2339 /* 2340 * If we were called from lfs_writefile, we don't need to clean up 2341 * the FIP or unlock the segment lock. We're done. 2342 */ 2343 if (seglocked) 2344 return error; 2345 2346 /* Clean up FIP and send it to disk. */ 2347 lfs_release_finfo(fs); 2348 lfs_writeseg(fs, fs->lfs_sp); 2349 2350 /* 2351 * Remove us from paging queue if we wrote all our pages. 2352 */ 2353 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2354 mutex_enter(&lfs_lock); 2355 if (ip->i_flags & IN_PAGING) { 2356 ip->i_flags &= ~IN_PAGING; 2357 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2358 } 2359 mutex_exit(&lfs_lock); 2360 } 2361 2362 /* 2363 * XXX - with the malloc/copy writeseg, the pages are freed by now 2364 * even if we don't wait (e.g. if we hold a nested lock). This 2365 * will not be true if we stop using malloc/copy. 2366 */ 2367 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 2368 lfs_segunlock(fs); 2369 2370 /* 2371 * Wait for v_numoutput to drop to zero. The seglock should 2372 * take care of this, but there is a slight possibility that 2373 * aiodoned might not have got around to our buffers yet. 2374 */ 2375 if (sync) { 2376 mutex_enter(&vp->v_interlock); 2377 while (vp->v_numoutput > 0) { 2378 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 2379 " num %d\n", ip->i_number, vp->v_numoutput)); 2380 cv_wait(&vp->v_cv, &vp->v_interlock); 2381 } 2382 mutex_exit(&vp->v_interlock); 2383 } 2384 return error; 2385 } 2386 2387 /* 2388 * Return the last logical file offset that should be written for this file 2389 * if we're doing a write that ends at "size". If writing, we need to know 2390 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2391 * to know about entire blocks. 2392 */ 2393 void 2394 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2395 { 2396 struct inode *ip = VTOI(vp); 2397 struct lfs *fs = ip->i_lfs; 2398 daddr_t olbn, nlbn; 2399 2400 olbn = lblkno(fs, ip->i_size); 2401 nlbn = lblkno(fs, size); 2402 if (!(flags & GOP_SIZE_MEM) && nlbn < NDADDR && olbn <= nlbn) { 2403 *eobp = fragroundup(fs, size); 2404 } else { 2405 *eobp = blkroundup(fs, size); 2406 } 2407 } 2408 2409 #ifdef DEBUG 2410 void lfs_dump_vop(void *); 2411 2412 void 2413 lfs_dump_vop(void *v) 2414 { 2415 struct vop_putpages_args /* { 2416 struct vnode *a_vp; 2417 voff_t a_offlo; 2418 voff_t a_offhi; 2419 int a_flags; 2420 } */ *ap = v; 2421 2422 #ifdef DDB 2423 vfs_vnode_print(ap->a_vp, 0, printf); 2424 #endif 2425 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din); 2426 } 2427 #endif 2428 2429 int 2430 lfs_mmap(void *v) 2431 { 2432 struct vop_mmap_args /* { 2433 const struct vnodeop_desc *a_desc; 2434 struct vnode *a_vp; 2435 vm_prot_t a_prot; 2436 kauth_cred_t a_cred; 2437 } */ *ap = v; 2438 2439 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2440 return EOPNOTSUPP; 2441 return ufs_mmap(v); 2442 } 2443