1 /* $NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_compat_netbsd.h" 67 #include "opt_uvm_page_trkown.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/namei.h> 73 #include <sys/resourcevar.h> 74 #include <sys/kernel.h> 75 #include <sys/file.h> 76 #include <sys/stat.h> 77 #include <sys/buf.h> 78 #include <sys/proc.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/pool.h> 82 #include <sys/signalvar.h> 83 #include <sys/kauth.h> 84 #include <sys/syslog.h> 85 #include <sys/fstrans.h> 86 87 #include <miscfs/fifofs/fifo.h> 88 #include <miscfs/genfs/genfs.h> 89 #include <miscfs/specfs/specdev.h> 90 91 #include <ufs/ufs/inode.h> 92 #include <ufs/ufs/dir.h> 93 #include <ufs/ufs/ufsmount.h> 94 #include <ufs/ufs/ufs_extern.h> 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_pmap.h> 98 #include <uvm/uvm_stat.h> 99 #include <uvm/uvm_pager.h> 100 101 #include <ufs/lfs/lfs.h> 102 #include <ufs/lfs/lfs_extern.h> 103 104 extern pid_t lfs_writer_daemon; 105 int lfs_ignore_lazy_sync = 1; 106 107 /* Global vfs data structures for lfs. */ 108 int (**lfs_vnodeop_p)(void *); 109 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 110 { &vop_default_desc, vn_default_error }, 111 { &vop_lookup_desc, ufs_lookup }, /* lookup */ 112 { &vop_create_desc, lfs_create }, /* create */ 113 { &vop_whiteout_desc, ufs_whiteout }, /* whiteout */ 114 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 115 { &vop_open_desc, ufs_open }, /* open */ 116 { &vop_close_desc, lfs_close }, /* close */ 117 { &vop_access_desc, ufs_access }, /* access */ 118 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 119 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 120 { &vop_read_desc, lfs_read }, /* read */ 121 { &vop_write_desc, lfs_write }, /* write */ 122 { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ 123 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 124 { &vop_poll_desc, ufs_poll }, /* poll */ 125 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 126 { &vop_revoke_desc, ufs_revoke }, /* revoke */ 127 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 128 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 129 { &vop_seek_desc, ufs_seek }, /* seek */ 130 { &vop_remove_desc, lfs_remove }, /* remove */ 131 { &vop_link_desc, lfs_link }, /* link */ 132 { &vop_rename_desc, lfs_rename }, /* rename */ 133 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 134 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 135 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 136 { &vop_readdir_desc, ufs_readdir }, /* readdir */ 137 { &vop_readlink_desc, ufs_readlink }, /* readlink */ 138 { &vop_abortop_desc, ufs_abortop }, /* abortop */ 139 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 140 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 141 { &vop_lock_desc, ufs_lock }, /* lock */ 142 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 143 { &vop_bmap_desc, ufs_bmap }, /* bmap */ 144 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 145 { &vop_print_desc, ufs_print }, /* print */ 146 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 147 { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ 148 { &vop_advlock_desc, ufs_advlock }, /* advlock */ 149 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 150 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 151 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 152 { NULL, NULL } 153 }; 154 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 155 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 156 157 int (**lfs_specop_p)(void *); 158 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 159 { &vop_default_desc, vn_default_error }, 160 { &vop_lookup_desc, spec_lookup }, /* lookup */ 161 { &vop_create_desc, spec_create }, /* create */ 162 { &vop_mknod_desc, spec_mknod }, /* mknod */ 163 { &vop_open_desc, spec_open }, /* open */ 164 { &vop_close_desc, lfsspec_close }, /* close */ 165 { &vop_access_desc, ufs_access }, /* access */ 166 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 167 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 168 { &vop_read_desc, ufsspec_read }, /* read */ 169 { &vop_write_desc, ufsspec_write }, /* write */ 170 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 171 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 172 { &vop_poll_desc, spec_poll }, /* poll */ 173 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 174 { &vop_revoke_desc, spec_revoke }, /* revoke */ 175 { &vop_mmap_desc, spec_mmap }, /* mmap */ 176 { &vop_fsync_desc, spec_fsync }, /* fsync */ 177 { &vop_seek_desc, spec_seek }, /* seek */ 178 { &vop_remove_desc, spec_remove }, /* remove */ 179 { &vop_link_desc, spec_link }, /* link */ 180 { &vop_rename_desc, spec_rename }, /* rename */ 181 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 182 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 183 { &vop_symlink_desc, spec_symlink }, /* symlink */ 184 { &vop_readdir_desc, spec_readdir }, /* readdir */ 185 { &vop_readlink_desc, spec_readlink }, /* readlink */ 186 { &vop_abortop_desc, spec_abortop }, /* abortop */ 187 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 188 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 189 { &vop_lock_desc, ufs_lock }, /* lock */ 190 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 191 { &vop_bmap_desc, spec_bmap }, /* bmap */ 192 { &vop_strategy_desc, spec_strategy }, /* strategy */ 193 { &vop_print_desc, ufs_print }, /* print */ 194 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 195 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 196 { &vop_advlock_desc, spec_advlock }, /* advlock */ 197 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 198 { &vop_getpages_desc, spec_getpages }, /* getpages */ 199 { &vop_putpages_desc, spec_putpages }, /* putpages */ 200 { NULL, NULL } 201 }; 202 const struct vnodeopv_desc lfs_specop_opv_desc = 203 { &lfs_specop_p, lfs_specop_entries }; 204 205 int (**lfs_fifoop_p)(void *); 206 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 207 { &vop_default_desc, vn_default_error }, 208 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */ 209 { &vop_create_desc, vn_fifo_bypass }, /* create */ 210 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */ 211 { &vop_open_desc, vn_fifo_bypass }, /* open */ 212 { &vop_close_desc, lfsfifo_close }, /* close */ 213 { &vop_access_desc, ufs_access }, /* access */ 214 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 215 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 216 { &vop_read_desc, ufsfifo_read }, /* read */ 217 { &vop_write_desc, ufsfifo_write }, /* write */ 218 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */ 219 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 220 { &vop_poll_desc, vn_fifo_bypass }, /* poll */ 221 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */ 222 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */ 223 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */ 224 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 225 { &vop_seek_desc, vn_fifo_bypass }, /* seek */ 226 { &vop_remove_desc, vn_fifo_bypass }, /* remove */ 227 { &vop_link_desc, vn_fifo_bypass }, /* link */ 228 { &vop_rename_desc, vn_fifo_bypass }, /* rename */ 229 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */ 230 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */ 231 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */ 232 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */ 233 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */ 234 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */ 235 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 236 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 237 { &vop_lock_desc, ufs_lock }, /* lock */ 238 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 239 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */ 240 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 241 { &vop_print_desc, ufs_print }, /* print */ 242 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 243 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */ 244 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */ 245 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 246 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */ 247 { NULL, NULL } 248 }; 249 const struct vnodeopv_desc lfs_fifoop_opv_desc = 250 { &lfs_fifoop_p, lfs_fifoop_entries }; 251 252 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 253 254 #define LFS_READWRITE 255 #include <ufs/ufs/ufs_readwrite.c> 256 #undef LFS_READWRITE 257 258 /* 259 * Synch an open file. 260 */ 261 /* ARGSUSED */ 262 int 263 lfs_fsync(void *v) 264 { 265 struct vop_fsync_args /* { 266 struct vnode *a_vp; 267 kauth_cred_t a_cred; 268 int a_flags; 269 off_t offlo; 270 off_t offhi; 271 } */ *ap = v; 272 struct vnode *vp = ap->a_vp; 273 int error, wait; 274 struct inode *ip = VTOI(vp); 275 struct lfs *fs = ip->i_lfs; 276 277 /* If we're mounted read-only, don't try to sync. */ 278 if (fs->lfs_ronly) 279 return 0; 280 281 /* If a removed vnode is being cleaned, no need to sync here. */ 282 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 283 return 0; 284 285 /* 286 * Trickle sync simply adds this vnode to the pager list, as if 287 * the pagedaemon had requested a pageout. 288 */ 289 if (ap->a_flags & FSYNC_LAZY) { 290 if (lfs_ignore_lazy_sync == 0) { 291 mutex_enter(&lfs_lock); 292 if (!(ip->i_flags & IN_PAGING)) { 293 ip->i_flags |= IN_PAGING; 294 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 295 i_lfs_pchain); 296 } 297 wakeup(&lfs_writer_daemon); 298 mutex_exit(&lfs_lock); 299 } 300 return 0; 301 } 302 303 /* 304 * If a vnode is bring cleaned, flush it out before we try to 305 * reuse it. This prevents the cleaner from writing files twice 306 * in the same partial segment, causing an accounting underflow. 307 */ 308 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 309 lfs_vflush(vp); 310 } 311 312 wait = (ap->a_flags & FSYNC_WAIT); 313 do { 314 mutex_enter(vp->v_interlock); 315 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 316 round_page(ap->a_offhi), 317 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 318 if (error == EAGAIN) { 319 mutex_enter(&lfs_lock); 320 mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", 321 hz / 100 + 1, &lfs_lock); 322 mutex_exit(&lfs_lock); 323 } 324 } while (error == EAGAIN); 325 if (error) 326 return error; 327 328 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 329 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 330 331 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 332 int l = 0; 333 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 334 curlwp->l_cred); 335 } 336 if (wait && !VPISEMPTY(vp)) 337 LFS_SET_UINO(ip, IN_MODIFIED); 338 339 return error; 340 } 341 342 /* 343 * Take IN_ADIROP off, then call ufs_inactive. 344 */ 345 int 346 lfs_inactive(void *v) 347 { 348 struct vop_inactive_args /* { 349 struct vnode *a_vp; 350 } */ *ap = v; 351 352 lfs_unmark_vnode(ap->a_vp); 353 354 /* 355 * The Ifile is only ever inactivated on unmount. 356 * Streamline this process by not giving it more dirty blocks. 357 */ 358 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 359 mutex_enter(&lfs_lock); 360 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 361 mutex_exit(&lfs_lock); 362 VOP_UNLOCK(ap->a_vp); 363 return 0; 364 } 365 366 return ufs_inactive(v); 367 } 368 369 /* 370 * These macros are used to bracket UFS directory ops, so that we can 371 * identify all the pages touched during directory ops which need to 372 * be ordered and flushed atomically, so that they may be recovered. 373 * 374 * Because we have to mark nodes VU_DIROP in order to prevent 375 * the cache from reclaiming them while a dirop is in progress, we must 376 * also manage the number of nodes so marked (otherwise we can run out). 377 * We do this by setting lfs_dirvcount to the number of marked vnodes; it 378 * is decremented during segment write, when VU_DIROP is taken off. 379 */ 380 #define MARK_VNODE(vp) lfs_mark_vnode(vp) 381 #define UNMARK_VNODE(vp) lfs_unmark_vnode(vp) 382 #define SET_DIROP_CREATE(dvp, vpp) lfs_set_dirop_create((dvp), (vpp)) 383 #define SET_DIROP_REMOVE(dvp, vp) lfs_set_dirop((dvp), (vp)) 384 static int lfs_set_dirop_create(struct vnode *, struct vnode **); 385 static int lfs_set_dirop(struct vnode *, struct vnode *); 386 387 static int 388 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 389 { 390 struct lfs *fs; 391 int error; 392 393 KASSERT(VOP_ISLOCKED(dvp)); 394 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 395 396 fs = VTOI(dvp)->i_lfs; 397 398 ASSERT_NO_SEGLOCK(fs); 399 /* 400 * LFS_NRESERVE calculates direct and indirect blocks as well 401 * as an inode block; an overestimate in most cases. 402 */ 403 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 404 return (error); 405 406 restart: 407 mutex_enter(&lfs_lock); 408 if (fs->lfs_dirops == 0) { 409 mutex_exit(&lfs_lock); 410 lfs_check(dvp, LFS_UNUSED_LBN, 0); 411 mutex_enter(&lfs_lock); 412 } 413 while (fs->lfs_writer) { 414 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 415 "lfs_sdirop", 0, &lfs_lock); 416 if (error == EINTR) { 417 mutex_exit(&lfs_lock); 418 goto unreserve; 419 } 420 } 421 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 422 wakeup(&lfs_writer_daemon); 423 mutex_exit(&lfs_lock); 424 preempt(); 425 goto restart; 426 } 427 428 if (lfs_dirvcount > LFS_MAX_DIROP) { 429 mutex_exit(&lfs_lock); 430 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 431 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 432 if ((error = mtsleep(&lfs_dirvcount, 433 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 434 &lfs_lock)) != 0) { 435 goto unreserve; 436 } 437 goto restart; 438 } 439 440 ++fs->lfs_dirops; 441 fs->lfs_doifile = 1; 442 mutex_exit(&lfs_lock); 443 444 /* Hold a reference so SET_ENDOP will be happy */ 445 vref(dvp); 446 if (vp) { 447 vref(vp); 448 MARK_VNODE(vp); 449 } 450 451 MARK_VNODE(dvp); 452 return 0; 453 454 unreserve: 455 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 456 return error; 457 } 458 459 /* 460 * Get a new vnode *before* adjusting the dirop count, to avoid a deadlock 461 * in getnewvnode(), if we have a stacked filesystem mounted on top 462 * of us. 463 * 464 * NB: this means we have to clear the new vnodes on error. Fortunately 465 * SET_ENDOP is there to do that for us. 466 */ 467 static int 468 lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) 469 { 470 int error; 471 struct lfs *fs; 472 473 fs = VFSTOUFS(dvp->v_mount)->um_lfs; 474 ASSERT_NO_SEGLOCK(fs); 475 if (fs->lfs_ronly) 476 return EROFS; 477 if (vpp == NULL) { 478 return lfs_set_dirop(dvp, NULL); 479 } 480 error = getnewvnode(VT_LFS, dvp->v_mount, lfs_vnodeop_p, NULL, vpp); 481 if (error) { 482 DLOG((DLOG_ALLOC, "lfs_set_dirop_create: dvp %p error %d\n", 483 dvp, error)); 484 return error; 485 } 486 if ((error = lfs_set_dirop(dvp, NULL)) != 0) { 487 ungetnewvnode(*vpp); 488 *vpp = NULL; 489 return error; 490 } 491 return 0; 492 } 493 494 #define SET_ENDOP_BASE(fs, dvp, str) \ 495 do { \ 496 mutex_enter(&lfs_lock); \ 497 --(fs)->lfs_dirops; \ 498 if (!(fs)->lfs_dirops) { \ 499 if ((fs)->lfs_nadirop) { \ 500 panic("SET_ENDOP: %s: no dirops but " \ 501 " nadirop=%d", (str), \ 502 (fs)->lfs_nadirop); \ 503 } \ 504 wakeup(&(fs)->lfs_writer); \ 505 mutex_exit(&lfs_lock); \ 506 lfs_check((dvp), LFS_UNUSED_LBN, 0); \ 507 } else \ 508 mutex_exit(&lfs_lock); \ 509 } while(0) 510 #define SET_ENDOP_CREATE(fs, dvp, nvpp, str) \ 511 do { \ 512 UNMARK_VNODE(dvp); \ 513 if (nvpp && *nvpp) \ 514 UNMARK_VNODE(*nvpp); \ 515 /* Check for error return to stem vnode leakage */ \ 516 if (nvpp && *nvpp && !((*nvpp)->v_uflag & VU_DIROP)) \ 517 ungetnewvnode(*(nvpp)); \ 518 SET_ENDOP_BASE((fs), (dvp), (str)); \ 519 lfs_reserve((fs), (dvp), NULL, -LFS_NRESERVE(fs)); \ 520 vrele(dvp); \ 521 } while(0) 522 #define SET_ENDOP_CREATE_AP(ap, str) \ 523 SET_ENDOP_CREATE(VTOI((ap)->a_dvp)->i_lfs, (ap)->a_dvp, \ 524 (ap)->a_vpp, (str)) 525 #define SET_ENDOP_REMOVE(fs, dvp, ovp, str) \ 526 do { \ 527 UNMARK_VNODE(dvp); \ 528 if (ovp) \ 529 UNMARK_VNODE(ovp); \ 530 SET_ENDOP_BASE((fs), (dvp), (str)); \ 531 lfs_reserve((fs), (dvp), (ovp), -LFS_NRESERVE(fs)); \ 532 vrele(dvp); \ 533 if (ovp) \ 534 vrele(ovp); \ 535 } while(0) 536 537 void 538 lfs_mark_vnode(struct vnode *vp) 539 { 540 struct inode *ip = VTOI(vp); 541 struct lfs *fs = ip->i_lfs; 542 543 mutex_enter(&lfs_lock); 544 if (!(ip->i_flag & IN_ADIROP)) { 545 if (!(vp->v_uflag & VU_DIROP)) { 546 mutex_enter(vp->v_interlock); 547 (void)lfs_vref(vp); 548 ++lfs_dirvcount; 549 ++fs->lfs_dirvcount; 550 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 551 vp->v_uflag |= VU_DIROP; 552 } 553 ++fs->lfs_nadirop; 554 ip->i_flag |= IN_ADIROP; 555 } else 556 KASSERT(vp->v_uflag & VU_DIROP); 557 mutex_exit(&lfs_lock); 558 } 559 560 void 561 lfs_unmark_vnode(struct vnode *vp) 562 { 563 struct inode *ip = VTOI(vp); 564 565 if (ip && (ip->i_flag & IN_ADIROP)) { 566 KASSERT(vp->v_uflag & VU_DIROP); 567 mutex_enter(&lfs_lock); 568 --ip->i_lfs->lfs_nadirop; 569 mutex_exit(&lfs_lock); 570 ip->i_flag &= ~IN_ADIROP; 571 } 572 } 573 574 int 575 lfs_symlink(void *v) 576 { 577 struct vop_symlink_args /* { 578 struct vnode *a_dvp; 579 struct vnode **a_vpp; 580 struct componentname *a_cnp; 581 struct vattr *a_vap; 582 char *a_target; 583 } */ *ap = v; 584 int error; 585 586 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 587 vput(ap->a_dvp); 588 return error; 589 } 590 error = ufs_symlink(ap); 591 SET_ENDOP_CREATE_AP(ap, "symlink"); 592 return (error); 593 } 594 595 int 596 lfs_mknod(void *v) 597 { 598 struct vop_mknod_args /* { 599 struct vnode *a_dvp; 600 struct vnode **a_vpp; 601 struct componentname *a_cnp; 602 struct vattr *a_vap; 603 } */ *ap = v; 604 struct vattr *vap = ap->a_vap; 605 struct vnode **vpp = ap->a_vpp; 606 struct inode *ip; 607 int error; 608 struct mount *mp; 609 ino_t ino; 610 struct ufs_lookup_results *ulr; 611 612 /* XXX should handle this material another way */ 613 ulr = &VTOI(ap->a_dvp)->i_crap; 614 UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 615 616 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 617 vput(ap->a_dvp); 618 return error; 619 } 620 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 621 ap->a_dvp, ulr, vpp, ap->a_cnp); 622 623 /* Either way we're done with the dirop at this point */ 624 SET_ENDOP_CREATE_AP(ap, "mknod"); 625 626 if (error) 627 return (error); 628 629 ip = VTOI(*vpp); 630 mp = (*vpp)->v_mount; 631 ino = ip->i_number; 632 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 633 if (vap->va_rdev != VNOVAL) { 634 /* 635 * Want to be able to use this to make badblock 636 * inodes, so don't truncate the dev number. 637 */ 638 #if 0 639 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 640 UFS_MPNEEDSWAP((*vpp)->v_mount)); 641 #else 642 ip->i_ffs1_rdev = vap->va_rdev; 643 #endif 644 } 645 646 /* 647 * Call fsync to write the vnode so that we don't have to deal with 648 * flushing it when it's marked VU_DIROP|VI_XLOCK. 649 * 650 * XXX KS - If we can't flush we also can't call vgone(), so must 651 * return. But, that leaves this vnode in limbo, also not good. 652 * Can this ever happen (barring hardware failure)? 653 */ 654 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 655 panic("lfs_mknod: couldn't fsync (ino %llu)", 656 (unsigned long long)ino); 657 /* return (error); */ 658 } 659 /* 660 * Remove vnode so that it will be reloaded by VFS_VGET and 661 * checked to see if it is an alias of an existing entry in 662 * the inode cache. 663 */ 664 /* Used to be vput, but that causes us to call VOP_INACTIVE twice. */ 665 666 VOP_UNLOCK(*vpp); 667 (*vpp)->v_type = VNON; 668 vgone(*vpp); 669 error = VFS_VGET(mp, ino, vpp); 670 671 if (error != 0) { 672 *vpp = NULL; 673 return (error); 674 } 675 return (0); 676 } 677 678 int 679 lfs_create(void *v) 680 { 681 struct vop_create_args /* { 682 struct vnode *a_dvp; 683 struct vnode **a_vpp; 684 struct componentname *a_cnp; 685 struct vattr *a_vap; 686 } */ *ap = v; 687 int error; 688 689 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 690 vput(ap->a_dvp); 691 return error; 692 } 693 error = ufs_create(ap); 694 SET_ENDOP_CREATE_AP(ap, "create"); 695 return (error); 696 } 697 698 int 699 lfs_mkdir(void *v) 700 { 701 struct vop_mkdir_args /* { 702 struct vnode *a_dvp; 703 struct vnode **a_vpp; 704 struct componentname *a_cnp; 705 struct vattr *a_vap; 706 } */ *ap = v; 707 int error; 708 709 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 710 vput(ap->a_dvp); 711 return error; 712 } 713 error = ufs_mkdir(ap); 714 SET_ENDOP_CREATE_AP(ap, "mkdir"); 715 return (error); 716 } 717 718 int 719 lfs_remove(void *v) 720 { 721 struct vop_remove_args /* { 722 struct vnode *a_dvp; 723 struct vnode *a_vp; 724 struct componentname *a_cnp; 725 } */ *ap = v; 726 struct vnode *dvp, *vp; 727 struct inode *ip; 728 int error; 729 730 dvp = ap->a_dvp; 731 vp = ap->a_vp; 732 ip = VTOI(vp); 733 if ((error = SET_DIROP_REMOVE(dvp, vp)) != 0) { 734 if (dvp == vp) 735 vrele(vp); 736 else 737 vput(vp); 738 vput(dvp); 739 return error; 740 } 741 error = ufs_remove(ap); 742 if (ip->i_nlink == 0) 743 lfs_orphan(ip->i_lfs, ip->i_number); 744 SET_ENDOP_REMOVE(ip->i_lfs, dvp, ap->a_vp, "remove"); 745 return (error); 746 } 747 748 int 749 lfs_rmdir(void *v) 750 { 751 struct vop_rmdir_args /* { 752 struct vnodeop_desc *a_desc; 753 struct vnode *a_dvp; 754 struct vnode *a_vp; 755 struct componentname *a_cnp; 756 } */ *ap = v; 757 struct vnode *vp; 758 struct inode *ip; 759 int error; 760 761 vp = ap->a_vp; 762 ip = VTOI(vp); 763 if ((error = SET_DIROP_REMOVE(ap->a_dvp, ap->a_vp)) != 0) { 764 if (ap->a_dvp == vp) 765 vrele(ap->a_dvp); 766 else 767 vput(ap->a_dvp); 768 vput(vp); 769 return error; 770 } 771 error = ufs_rmdir(ap); 772 if (ip->i_nlink == 0) 773 lfs_orphan(ip->i_lfs, ip->i_number); 774 SET_ENDOP_REMOVE(ip->i_lfs, ap->a_dvp, ap->a_vp, "rmdir"); 775 return (error); 776 } 777 778 int 779 lfs_link(void *v) 780 { 781 struct vop_link_args /* { 782 struct vnode *a_dvp; 783 struct vnode *a_vp; 784 struct componentname *a_cnp; 785 } */ *ap = v; 786 int error; 787 struct vnode **vpp = NULL; 788 789 if ((error = SET_DIROP_CREATE(ap->a_dvp, vpp)) != 0) { 790 vput(ap->a_dvp); 791 return error; 792 } 793 error = ufs_link(ap); 794 SET_ENDOP_CREATE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vpp, "link"); 795 return (error); 796 } 797 798 int 799 lfs_rename(void *v) 800 { 801 struct vop_rename_args /* { 802 struct vnode *a_fdvp; 803 struct vnode *a_fvp; 804 struct componentname *a_fcnp; 805 struct vnode *a_tdvp; 806 struct vnode *a_tvp; 807 struct componentname *a_tcnp; 808 } */ *ap = v; 809 struct vnode *tvp, *fvp, *tdvp, *fdvp; 810 struct componentname *tcnp, *fcnp; 811 int error; 812 struct lfs *fs; 813 814 fs = VTOI(ap->a_fdvp)->i_lfs; 815 tvp = ap->a_tvp; 816 tdvp = ap->a_tdvp; 817 tcnp = ap->a_tcnp; 818 fvp = ap->a_fvp; 819 fdvp = ap->a_fdvp; 820 fcnp = ap->a_fcnp; 821 822 /* 823 * Check for cross-device rename. 824 * If it is, we don't want to set dirops, just error out. 825 * (In particular note that MARK_VNODE(tdvp) will DTWT on 826 * a cross-device rename.) 827 * 828 * Copied from ufs_rename. 829 */ 830 if ((fvp->v_mount != tdvp->v_mount) || 831 (tvp && (fvp->v_mount != tvp->v_mount))) { 832 error = EXDEV; 833 goto errout; 834 } 835 836 /* 837 * Check to make sure we're not renaming a vnode onto itself 838 * (deleting a hard link by renaming one name onto another); 839 * if we are we can't recursively call VOP_REMOVE since that 840 * would leave us with an unaccounted-for number of live dirops. 841 * 842 * Inline the relevant section of ufs_rename here, *before* 843 * calling SET_DIROP_REMOVE. 844 */ 845 if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || 846 (VTOI(tdvp)->i_flags & APPEND))) { 847 error = EPERM; 848 goto errout; 849 } 850 if (fvp == tvp) { 851 if (fvp->v_type == VDIR) { 852 error = EINVAL; 853 goto errout; 854 } 855 856 /* Release destination completely. */ 857 VOP_ABORTOP(tdvp, tcnp); 858 vput(tdvp); 859 vput(tvp); 860 861 /* Delete source. */ 862 vrele(fvp); 863 fcnp->cn_flags &= ~(MODMASK); 864 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 865 fcnp->cn_nameiop = DELETE; 866 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); 867 if ((error = relookup(fdvp, &fvp, fcnp, 0))) { 868 vput(fdvp); 869 return (error); 870 } 871 return (VOP_REMOVE(fdvp, fvp, fcnp)); 872 } 873 874 if ((error = SET_DIROP_REMOVE(tdvp, tvp)) != 0) 875 goto errout; 876 MARK_VNODE(fdvp); 877 MARK_VNODE(fvp); 878 879 error = ufs_rename(ap); 880 UNMARK_VNODE(fdvp); 881 UNMARK_VNODE(fvp); 882 SET_ENDOP_REMOVE(fs, tdvp, tvp, "rename"); 883 return (error); 884 885 errout: 886 VOP_ABORTOP(tdvp, ap->a_tcnp); /* XXX, why not in NFS? */ 887 if (tdvp == tvp) 888 vrele(tdvp); 889 else 890 vput(tdvp); 891 if (tvp) 892 vput(tvp); 893 VOP_ABORTOP(fdvp, ap->a_fcnp); /* XXX, why not in NFS? */ 894 vrele(fdvp); 895 vrele(fvp); 896 return (error); 897 } 898 899 /* XXX hack to avoid calling ITIMES in getattr */ 900 int 901 lfs_getattr(void *v) 902 { 903 struct vop_getattr_args /* { 904 struct vnode *a_vp; 905 struct vattr *a_vap; 906 kauth_cred_t a_cred; 907 } */ *ap = v; 908 struct vnode *vp = ap->a_vp; 909 struct inode *ip = VTOI(vp); 910 struct vattr *vap = ap->a_vap; 911 struct lfs *fs = ip->i_lfs; 912 /* 913 * Copy from inode table 914 */ 915 vap->va_fsid = ip->i_dev; 916 vap->va_fileid = ip->i_number; 917 vap->va_mode = ip->i_mode & ~IFMT; 918 vap->va_nlink = ip->i_nlink; 919 vap->va_uid = ip->i_uid; 920 vap->va_gid = ip->i_gid; 921 vap->va_rdev = (dev_t)ip->i_ffs1_rdev; 922 vap->va_size = vp->v_size; 923 vap->va_atime.tv_sec = ip->i_ffs1_atime; 924 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 925 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 926 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 927 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 928 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 929 vap->va_flags = ip->i_flags; 930 vap->va_gen = ip->i_gen; 931 /* this doesn't belong here */ 932 if (vp->v_type == VBLK) 933 vap->va_blocksize = BLKDEV_IOSIZE; 934 else if (vp->v_type == VCHR) 935 vap->va_blocksize = MAXBSIZE; 936 else 937 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 938 vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); 939 vap->va_type = vp->v_type; 940 vap->va_filerev = ip->i_modrev; 941 return (0); 942 } 943 944 /* 945 * Check to make sure the inode blocks won't choke the buffer 946 * cache, then call ufs_setattr as usual. 947 */ 948 int 949 lfs_setattr(void *v) 950 { 951 struct vop_setattr_args /* { 952 struct vnode *a_vp; 953 struct vattr *a_vap; 954 kauth_cred_t a_cred; 955 } */ *ap = v; 956 struct vnode *vp = ap->a_vp; 957 958 lfs_check(vp, LFS_UNUSED_LBN, 0); 959 return ufs_setattr(v); 960 } 961 962 /* 963 * Release the block we hold on lfs_newseg wrapping. Called on file close, 964 * or explicitly from LFCNWRAPGO. Called with the interlock held. 965 */ 966 static int 967 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 968 { 969 if (fs->lfs_stoplwp != curlwp) 970 return EBUSY; 971 972 fs->lfs_stoplwp = NULL; 973 cv_signal(&fs->lfs_stopcv); 974 975 KASSERT(fs->lfs_nowrap > 0); 976 if (fs->lfs_nowrap <= 0) { 977 return 0; 978 } 979 980 if (--fs->lfs_nowrap == 0) { 981 log(LOG_NOTICE, "%s: re-enabled log wrap\n", fs->lfs_fsmnt); 982 wakeup(&fs->lfs_wrappass); 983 lfs_wakeup_cleaner(fs); 984 } 985 if (waitfor) { 986 mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment", 987 0, &lfs_lock); 988 } 989 990 return 0; 991 } 992 993 /* 994 * Close called 995 */ 996 /* ARGSUSED */ 997 int 998 lfs_close(void *v) 999 { 1000 struct vop_close_args /* { 1001 struct vnode *a_vp; 1002 int a_fflag; 1003 kauth_cred_t a_cred; 1004 } */ *ap = v; 1005 struct vnode *vp = ap->a_vp; 1006 struct inode *ip = VTOI(vp); 1007 struct lfs *fs = ip->i_lfs; 1008 1009 if ((ip->i_number == ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1010 fs->lfs_stoplwp == curlwp) { 1011 mutex_enter(&lfs_lock); 1012 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1013 lfs_wrapgo(fs, ip, 0); 1014 mutex_exit(&lfs_lock); 1015 } 1016 1017 if (vp == ip->i_lfs->lfs_ivnode && 1018 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1019 return 0; 1020 1021 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 1022 LFS_ITIMES(ip, NULL, NULL, NULL); 1023 } 1024 return (0); 1025 } 1026 1027 /* 1028 * Close wrapper for special devices. 1029 * 1030 * Update the times on the inode then do device close. 1031 */ 1032 int 1033 lfsspec_close(void *v) 1034 { 1035 struct vop_close_args /* { 1036 struct vnode *a_vp; 1037 int a_fflag; 1038 kauth_cred_t a_cred; 1039 } */ *ap = v; 1040 struct vnode *vp; 1041 struct inode *ip; 1042 1043 vp = ap->a_vp; 1044 ip = VTOI(vp); 1045 if (vp->v_usecount > 1) { 1046 LFS_ITIMES(ip, NULL, NULL, NULL); 1047 } 1048 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1049 } 1050 1051 /* 1052 * Close wrapper for fifo's. 1053 * 1054 * Update the times on the inode then do device close. 1055 */ 1056 int 1057 lfsfifo_close(void *v) 1058 { 1059 struct vop_close_args /* { 1060 struct vnode *a_vp; 1061 int a_fflag; 1062 kauth_cred_ a_cred; 1063 } */ *ap = v; 1064 struct vnode *vp; 1065 struct inode *ip; 1066 1067 vp = ap->a_vp; 1068 ip = VTOI(vp); 1069 if (ap->a_vp->v_usecount > 1) { 1070 LFS_ITIMES(ip, NULL, NULL, NULL); 1071 } 1072 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1073 } 1074 1075 /* 1076 * Reclaim an inode so that it can be used for other purposes. 1077 */ 1078 1079 int 1080 lfs_reclaim(void *v) 1081 { 1082 struct vop_reclaim_args /* { 1083 struct vnode *a_vp; 1084 } */ *ap = v; 1085 struct vnode *vp = ap->a_vp; 1086 struct inode *ip = VTOI(vp); 1087 struct lfs *fs = ip->i_lfs; 1088 int error; 1089 1090 /* 1091 * The inode must be freed and updated before being removed 1092 * from its hash chain. Other threads trying to gain a hold 1093 * on the inode will be stalled because it is locked (VI_XLOCK). 1094 */ 1095 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1096 lfs_vfree(vp, ip->i_number, ip->i_omode); 1097 1098 mutex_enter(&lfs_lock); 1099 LFS_CLR_UINO(ip, IN_ALLMOD); 1100 mutex_exit(&lfs_lock); 1101 if ((error = ufs_reclaim(vp))) 1102 return (error); 1103 1104 /* 1105 * Take us off the paging and/or dirop queues if we were on them. 1106 * We shouldn't be on them. 1107 */ 1108 mutex_enter(&lfs_lock); 1109 if (ip->i_flags & IN_PAGING) { 1110 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1111 fs->lfs_fsmnt); 1112 ip->i_flags &= ~IN_PAGING; 1113 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1114 } 1115 if (vp->v_uflag & VU_DIROP) { 1116 panic("reclaimed vnode is VU_DIROP"); 1117 vp->v_uflag &= ~VU_DIROP; 1118 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1119 } 1120 mutex_exit(&lfs_lock); 1121 1122 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); 1123 lfs_deregister_all(vp); 1124 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1125 ip->inode_ext.lfs = NULL; 1126 genfs_node_destroy(vp); 1127 pool_put(&lfs_inode_pool, vp->v_data); 1128 vp->v_data = NULL; 1129 return (0); 1130 } 1131 1132 /* 1133 * Read a block from a storage device. 1134 * In order to avoid reading blocks that are in the process of being 1135 * written by the cleaner---and hence are not mutexed by the normal 1136 * buffer cache / page cache mechanisms---check for collisions before 1137 * reading. 1138 * 1139 * We inline ufs_strategy to make sure that the VOP_BMAP occurs *before* 1140 * the active cleaner test. 1141 * 1142 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1143 */ 1144 int 1145 lfs_strategy(void *v) 1146 { 1147 struct vop_strategy_args /* { 1148 struct vnode *a_vp; 1149 struct buf *a_bp; 1150 } */ *ap = v; 1151 struct buf *bp; 1152 struct lfs *fs; 1153 struct vnode *vp; 1154 struct inode *ip; 1155 daddr_t tbn; 1156 int i, sn, error, slept; 1157 1158 bp = ap->a_bp; 1159 vp = ap->a_vp; 1160 ip = VTOI(vp); 1161 fs = ip->i_lfs; 1162 1163 /* lfs uses its strategy routine only for read */ 1164 KASSERT(bp->b_flags & B_READ); 1165 1166 if (vp->v_type == VBLK || vp->v_type == VCHR) 1167 panic("lfs_strategy: spec"); 1168 KASSERT(bp->b_bcount != 0); 1169 if (bp->b_blkno == bp->b_lblkno) { 1170 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1171 NULL); 1172 if (error) { 1173 bp->b_error = error; 1174 bp->b_resid = bp->b_bcount; 1175 biodone(bp); 1176 return (error); 1177 } 1178 if ((long)bp->b_blkno == -1) /* no valid data */ 1179 clrbuf(bp); 1180 } 1181 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1182 bp->b_resid = bp->b_bcount; 1183 biodone(bp); 1184 return (0); 1185 } 1186 1187 slept = 1; 1188 mutex_enter(&lfs_lock); 1189 while (slept && fs->lfs_seglock) { 1190 mutex_exit(&lfs_lock); 1191 /* 1192 * Look through list of intervals. 1193 * There will only be intervals to look through 1194 * if the cleaner holds the seglock. 1195 * Since the cleaner is synchronous, we can trust 1196 * the list of intervals to be current. 1197 */ 1198 tbn = dbtofsb(fs, bp->b_blkno); 1199 sn = dtosn(fs, tbn); 1200 slept = 0; 1201 for (i = 0; i < fs->lfs_cleanind; i++) { 1202 if (sn == dtosn(fs, fs->lfs_cleanint[i]) && 1203 tbn >= fs->lfs_cleanint[i]) { 1204 DLOG((DLOG_CLEAN, 1205 "lfs_strategy: ino %d lbn %" PRId64 1206 " ind %d sn %d fsb %" PRIx32 1207 " given sn %d fsb %" PRIx64 "\n", 1208 ip->i_number, bp->b_lblkno, i, 1209 dtosn(fs, fs->lfs_cleanint[i]), 1210 fs->lfs_cleanint[i], sn, tbn)); 1211 DLOG((DLOG_CLEAN, 1212 "lfs_strategy: sleeping on ino %d lbn %" 1213 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1214 mutex_enter(&lfs_lock); 1215 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1216 /* Cleaner can't wait for itself */ 1217 mtsleep(&fs->lfs_iocount, 1218 (PRIBIO + 1) | PNORELOCK, 1219 "clean2", 0, 1220 &lfs_lock); 1221 slept = 1; 1222 break; 1223 } else if (fs->lfs_seglock) { 1224 mtsleep(&fs->lfs_seglock, 1225 (PRIBIO + 1) | PNORELOCK, 1226 "clean1", 0, 1227 &lfs_lock); 1228 slept = 1; 1229 break; 1230 } 1231 mutex_exit(&lfs_lock); 1232 } 1233 } 1234 mutex_enter(&lfs_lock); 1235 } 1236 mutex_exit(&lfs_lock); 1237 1238 vp = ip->i_devvp; 1239 VOP_STRATEGY(vp, bp); 1240 return (0); 1241 } 1242 1243 void 1244 lfs_flush_dirops(struct lfs *fs) 1245 { 1246 struct inode *ip, *nip; 1247 struct vnode *vp; 1248 extern int lfs_dostats; 1249 struct segment *sp; 1250 1251 ASSERT_MAYBE_SEGLOCK(fs); 1252 KASSERT(fs->lfs_nadirop == 0); 1253 1254 if (fs->lfs_ronly) 1255 return; 1256 1257 mutex_enter(&lfs_lock); 1258 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1259 mutex_exit(&lfs_lock); 1260 return; 1261 } else 1262 mutex_exit(&lfs_lock); 1263 1264 if (lfs_dostats) 1265 ++lfs_stats.flush_invoked; 1266 1267 /* 1268 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1269 * Technically this is a checkpoint (the on-disk state is valid) 1270 * even though we are leaving out all the file data. 1271 */ 1272 lfs_imtime(fs); 1273 lfs_seglock(fs, SEGM_CKP); 1274 sp = fs->lfs_sp; 1275 1276 /* 1277 * lfs_writevnodes, optimized to get dirops out of the way. 1278 * Only write dirops, and don't flush files' pages, only 1279 * blocks from the directories. 1280 * 1281 * We don't need to vref these files because they are 1282 * dirops and so hold an extra reference until the 1283 * segunlock clears them of that status. 1284 * 1285 * We don't need to check for IN_ADIROP because we know that 1286 * no dirops are active. 1287 * 1288 */ 1289 mutex_enter(&lfs_lock); 1290 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1291 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1292 mutex_exit(&lfs_lock); 1293 vp = ITOV(ip); 1294 1295 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1296 1297 /* 1298 * All writes to directories come from dirops; all 1299 * writes to files' direct blocks go through the page 1300 * cache, which we're not touching. Reads to files 1301 * and/or directories will not be affected by writing 1302 * directory blocks inodes and file inodes. So we don't 1303 * really need to lock. If we don't lock, though, 1304 * make sure that we don't clear IN_MODIFIED 1305 * unnecessarily. 1306 */ 1307 if (vp->v_iflag & VI_XLOCK) { 1308 mutex_enter(&lfs_lock); 1309 continue; 1310 } 1311 /* XXX see below 1312 * waslocked = VOP_ISLOCKED(vp); 1313 */ 1314 if (vp->v_type != VREG && 1315 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1316 lfs_writefile(fs, sp, vp); 1317 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1318 !(ip->i_flag & IN_ALLMOD)) { 1319 mutex_enter(&lfs_lock); 1320 LFS_SET_UINO(ip, IN_MODIFIED); 1321 mutex_exit(&lfs_lock); 1322 } 1323 } 1324 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1325 (void) lfs_writeinode(fs, sp, ip); 1326 mutex_enter(&lfs_lock); 1327 /* 1328 * XXX 1329 * LK_EXCLOTHER is dead -- what is intended here? 1330 * if (waslocked == LK_EXCLOTHER) 1331 * LFS_SET_UINO(ip, IN_MODIFIED); 1332 */ 1333 } 1334 mutex_exit(&lfs_lock); 1335 /* We've written all the dirops there are */ 1336 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 1337 lfs_finalize_fs_seguse(fs); 1338 (void) lfs_writeseg(fs, sp); 1339 lfs_segunlock(fs); 1340 } 1341 1342 /* 1343 * Flush all vnodes for which the pagedaemon has requested pageouts. 1344 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1345 * has just run, this would be an error). If we have to skip a vnode 1346 * for any reason, just skip it; if we have to wait for the cleaner, 1347 * abort. The writer daemon will call us again later. 1348 */ 1349 void 1350 lfs_flush_pchain(struct lfs *fs) 1351 { 1352 struct inode *ip, *nip; 1353 struct vnode *vp; 1354 extern int lfs_dostats; 1355 struct segment *sp; 1356 int error; 1357 1358 ASSERT_NO_SEGLOCK(fs); 1359 1360 if (fs->lfs_ronly) 1361 return; 1362 1363 mutex_enter(&lfs_lock); 1364 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1365 mutex_exit(&lfs_lock); 1366 return; 1367 } else 1368 mutex_exit(&lfs_lock); 1369 1370 /* Get dirops out of the way */ 1371 lfs_flush_dirops(fs); 1372 1373 if (lfs_dostats) 1374 ++lfs_stats.flush_invoked; 1375 1376 /* 1377 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1378 */ 1379 lfs_imtime(fs); 1380 lfs_seglock(fs, 0); 1381 sp = fs->lfs_sp; 1382 1383 /* 1384 * lfs_writevnodes, optimized to clear pageout requests. 1385 * Only write non-dirop files that are in the pageout queue. 1386 * We're very conservative about what we write; we want to be 1387 * fast and async. 1388 */ 1389 mutex_enter(&lfs_lock); 1390 top: 1391 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1392 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1393 vp = ITOV(ip); 1394 1395 if (!(ip->i_flags & IN_PAGING)) 1396 goto top; 1397 1398 mutex_enter(vp->v_interlock); 1399 if ((vp->v_iflag & VI_XLOCK) || (vp->v_uflag & VU_DIROP) != 0) { 1400 mutex_exit(vp->v_interlock); 1401 continue; 1402 } 1403 if (vp->v_type != VREG) { 1404 mutex_exit(vp->v_interlock); 1405 continue; 1406 } 1407 if (lfs_vref(vp)) 1408 continue; 1409 mutex_exit(&lfs_lock); 1410 1411 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_RETRY) != 0) { 1412 lfs_vunref(vp); 1413 mutex_enter(&lfs_lock); 1414 continue; 1415 } 1416 1417 error = lfs_writefile(fs, sp, vp); 1418 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1419 !(ip->i_flag & IN_ALLMOD)) { 1420 mutex_enter(&lfs_lock); 1421 LFS_SET_UINO(ip, IN_MODIFIED); 1422 mutex_exit(&lfs_lock); 1423 } 1424 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1425 (void) lfs_writeinode(fs, sp, ip); 1426 1427 VOP_UNLOCK(vp); 1428 lfs_vunref(vp); 1429 1430 if (error == EAGAIN) { 1431 lfs_writeseg(fs, sp); 1432 mutex_enter(&lfs_lock); 1433 break; 1434 } 1435 mutex_enter(&lfs_lock); 1436 } 1437 mutex_exit(&lfs_lock); 1438 (void) lfs_writeseg(fs, sp); 1439 lfs_segunlock(fs); 1440 } 1441 1442 /* 1443 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1444 */ 1445 int 1446 lfs_fcntl(void *v) 1447 { 1448 struct vop_fcntl_args /* { 1449 struct vnode *a_vp; 1450 u_int a_command; 1451 void * a_data; 1452 int a_fflag; 1453 kauth_cred_t a_cred; 1454 } */ *ap = v; 1455 struct timeval tv; 1456 struct timeval *tvp; 1457 BLOCK_INFO *blkiov; 1458 CLEANERINFO *cip; 1459 SEGUSE *sup; 1460 int blkcnt, error, oclean; 1461 size_t fh_size; 1462 struct lfs_fcntl_markv blkvp; 1463 struct lwp *l; 1464 fsid_t *fsidp; 1465 struct lfs *fs; 1466 struct buf *bp; 1467 fhandle_t *fhp; 1468 daddr_t off; 1469 1470 /* Only respect LFS fcntls on fs root or Ifile */ 1471 if (VTOI(ap->a_vp)->i_number != ROOTINO && 1472 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1473 return ufs_fcntl(v); 1474 } 1475 1476 /* Avoid locking a draining lock */ 1477 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1478 return ESHUTDOWN; 1479 } 1480 1481 /* LFS control and monitoring fcntls are available only to root */ 1482 l = curlwp; 1483 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1484 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 1485 NULL)) != 0) 1486 return (error); 1487 1488 fs = VTOI(ap->a_vp)->i_lfs; 1489 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1490 1491 error = 0; 1492 switch ((int)ap->a_command) { 1493 case LFCNSEGWAITALL_COMPAT_50: 1494 case LFCNSEGWAITALL_COMPAT: 1495 fsidp = NULL; 1496 /* FALLSTHROUGH */ 1497 case LFCNSEGWAIT_COMPAT_50: 1498 case LFCNSEGWAIT_COMPAT: 1499 { 1500 struct timeval50 *tvp50 1501 = (struct timeval50 *)ap->a_data; 1502 timeval50_to_timeval(tvp50, &tv); 1503 tvp = &tv; 1504 } 1505 goto segwait_common; 1506 case LFCNSEGWAITALL: 1507 fsidp = NULL; 1508 /* FALLSTHROUGH */ 1509 case LFCNSEGWAIT: 1510 tvp = (struct timeval *)ap->a_data; 1511 segwait_common: 1512 mutex_enter(&lfs_lock); 1513 ++fs->lfs_sleepers; 1514 mutex_exit(&lfs_lock); 1515 1516 error = lfs_segwait(fsidp, tvp); 1517 1518 mutex_enter(&lfs_lock); 1519 if (--fs->lfs_sleepers == 0) 1520 wakeup(&fs->lfs_sleepers); 1521 mutex_exit(&lfs_lock); 1522 return error; 1523 1524 case LFCNBMAPV: 1525 case LFCNMARKV: 1526 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1527 1528 blkcnt = blkvp.blkcnt; 1529 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1530 return (EINVAL); 1531 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1532 if ((error = copyin(blkvp.blkiov, blkiov, 1533 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1534 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1535 return error; 1536 } 1537 1538 mutex_enter(&lfs_lock); 1539 ++fs->lfs_sleepers; 1540 mutex_exit(&lfs_lock); 1541 if (ap->a_command == LFCNBMAPV) 1542 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); 1543 else /* LFCNMARKV */ 1544 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); 1545 if (error == 0) 1546 error = copyout(blkiov, blkvp.blkiov, 1547 blkcnt * sizeof(BLOCK_INFO)); 1548 mutex_enter(&lfs_lock); 1549 if (--fs->lfs_sleepers == 0) 1550 wakeup(&fs->lfs_sleepers); 1551 mutex_exit(&lfs_lock); 1552 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1553 return error; 1554 1555 case LFCNRECLAIM: 1556 /* 1557 * Flush dirops and write Ifile, allowing empty segments 1558 * to be immediately reclaimed. 1559 */ 1560 lfs_writer_enter(fs, "pndirop"); 1561 off = fs->lfs_offset; 1562 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1563 lfs_flush_dirops(fs); 1564 LFS_CLEANERINFO(cip, fs, bp); 1565 oclean = cip->clean; 1566 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1567 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1568 fs->lfs_sp->seg_flags |= SEGM_PROT; 1569 lfs_segunlock(fs); 1570 lfs_writer_leave(fs); 1571 1572 #ifdef DEBUG 1573 LFS_CLEANERINFO(cip, fs, bp); 1574 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1575 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1576 fs->lfs_offset - off, cip->clean - oclean, 1577 fs->lfs_activesb)); 1578 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1579 #endif 1580 1581 return 0; 1582 1583 case LFCNIFILEFH_COMPAT: 1584 /* Return the filehandle of the Ifile */ 1585 if ((error = kauth_authorize_system(l->l_cred, 1586 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0) 1587 return (error); 1588 fhp = (struct fhandle *)ap->a_data; 1589 fhp->fh_fsid = *fsidp; 1590 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1591 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1592 1593 case LFCNIFILEFH_COMPAT2: 1594 case LFCNIFILEFH: 1595 /* Return the filehandle of the Ifile */ 1596 fhp = (struct fhandle *)ap->a_data; 1597 fhp->fh_fsid = *fsidp; 1598 fh_size = sizeof(struct lfs_fhandle) - 1599 offsetof(fhandle_t, fh_fid); 1600 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1601 1602 case LFCNREWIND: 1603 /* Move lfs_offset to the lowest-numbered segment */ 1604 return lfs_rewind(fs, *(int *)ap->a_data); 1605 1606 case LFCNINVAL: 1607 /* Mark a segment SEGUSE_INVAL */ 1608 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1609 if (sup->su_nbytes > 0) { 1610 brelse(bp, 0); 1611 lfs_unset_inval_all(fs); 1612 return EBUSY; 1613 } 1614 sup->su_flags |= SEGUSE_INVAL; 1615 VOP_BWRITE(bp->b_vp, bp); 1616 return 0; 1617 1618 case LFCNRESIZE: 1619 /* Resize the filesystem */ 1620 return lfs_resize_fs(fs, *(int *)ap->a_data); 1621 1622 case LFCNWRAPSTOP: 1623 case LFCNWRAPSTOP_COMPAT: 1624 /* 1625 * Hold lfs_newseg at segment 0; if requested, sleep until 1626 * the filesystem wraps around. To support external agents 1627 * (dump, fsck-based regression test) that need to look at 1628 * a snapshot of the filesystem, without necessarily 1629 * requiring that all fs activity stops. 1630 */ 1631 if (fs->lfs_stoplwp == curlwp) 1632 return EALREADY; 1633 1634 mutex_enter(&lfs_lock); 1635 while (fs->lfs_stoplwp != NULL) 1636 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1637 fs->lfs_stoplwp = curlwp; 1638 if (fs->lfs_nowrap == 0) 1639 log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); 1640 ++fs->lfs_nowrap; 1641 if (*(int *)ap->a_data == 1 1642 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 1643 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 1644 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1645 "segwrap", 0, &lfs_lock); 1646 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 1647 if (error) { 1648 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 1649 } 1650 } 1651 mutex_exit(&lfs_lock); 1652 return 0; 1653 1654 case LFCNWRAPGO: 1655 case LFCNWRAPGO_COMPAT: 1656 /* 1657 * Having done its work, the agent wakes up the writer. 1658 * If the argument is 1, it sleeps until a new segment 1659 * is selected. 1660 */ 1661 mutex_enter(&lfs_lock); 1662 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 1663 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 1664 *((int *)ap->a_data)); 1665 mutex_exit(&lfs_lock); 1666 return error; 1667 1668 case LFCNWRAPPASS: 1669 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 1670 return EALREADY; 1671 mutex_enter(&lfs_lock); 1672 if (fs->lfs_stoplwp != curlwp) { 1673 mutex_exit(&lfs_lock); 1674 return EALREADY; 1675 } 1676 if (fs->lfs_nowrap == 0) { 1677 mutex_exit(&lfs_lock); 1678 return EBUSY; 1679 } 1680 fs->lfs_wrappass = 1; 1681 wakeup(&fs->lfs_wrappass); 1682 /* Wait for the log to wrap, if asked */ 1683 if (*(int *)ap->a_data) { 1684 mutex_enter(ap->a_vp->v_interlock); 1685 lfs_vref(ap->a_vp); 1686 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 1687 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 1688 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1689 "segwrap", 0, &lfs_lock); 1690 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 1691 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 1692 lfs_vunref(ap->a_vp); 1693 } 1694 mutex_exit(&lfs_lock); 1695 return error; 1696 1697 case LFCNWRAPSTATUS: 1698 mutex_enter(&lfs_lock); 1699 *(int *)ap->a_data = fs->lfs_wrapstatus; 1700 mutex_exit(&lfs_lock); 1701 return 0; 1702 1703 default: 1704 return ufs_fcntl(v); 1705 } 1706 return 0; 1707 } 1708 1709 int 1710 lfs_getpages(void *v) 1711 { 1712 struct vop_getpages_args /* { 1713 struct vnode *a_vp; 1714 voff_t a_offset; 1715 struct vm_page **a_m; 1716 int *a_count; 1717 int a_centeridx; 1718 vm_prot_t a_access_type; 1719 int a_advice; 1720 int a_flags; 1721 } */ *ap = v; 1722 1723 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 1724 (ap->a_access_type & VM_PROT_WRITE) != 0) { 1725 return EPERM; 1726 } 1727 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 1728 mutex_enter(&lfs_lock); 1729 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 1730 mutex_exit(&lfs_lock); 1731 } 1732 1733 /* 1734 * we're relying on the fact that genfs_getpages() always read in 1735 * entire filesystem blocks. 1736 */ 1737 return genfs_getpages(v); 1738 } 1739 1740 /* 1741 * Wait for a page to become unbusy, possibly printing diagnostic messages 1742 * as well. 1743 * 1744 * Called with vp->v_interlock held; return with it held. 1745 */ 1746 static void 1747 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 1748 { 1749 if ((pg->flags & PG_BUSY) == 0) 1750 return; /* Nothing to wait for! */ 1751 1752 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 1753 static struct vm_page *lastpg; 1754 1755 if (label != NULL && pg != lastpg) { 1756 if (pg->owner_tag) { 1757 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 1758 curproc->p_pid, curlwp->l_lid, label, 1759 pg, pg->owner, pg->lowner, pg->owner_tag); 1760 } else { 1761 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 1762 curproc->p_pid, curlwp->l_lid, label, pg); 1763 } 1764 } 1765 lastpg = pg; 1766 #endif 1767 1768 pg->flags |= PG_WANTED; 1769 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); 1770 mutex_enter(vp->v_interlock); 1771 } 1772 1773 /* 1774 * This routine is called by lfs_putpages() when it can't complete the 1775 * write because a page is busy. This means that either (1) someone, 1776 * possibly the pagedaemon, is looking at this page, and will give it up 1777 * presently; or (2) we ourselves are holding the page busy in the 1778 * process of being written (either gathered or actually on its way to 1779 * disk). We don't need to give up the segment lock, but we might need 1780 * to call lfs_writeseg() to expedite the page's journey to disk. 1781 * 1782 * Called with vp->v_interlock held; return with it held. 1783 */ 1784 /* #define BUSYWAIT */ 1785 static void 1786 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 1787 int seglocked, const char *label) 1788 { 1789 #ifndef BUSYWAIT 1790 struct inode *ip = VTOI(vp); 1791 struct segment *sp = fs->lfs_sp; 1792 int count = 0; 1793 1794 if (pg == NULL) 1795 return; 1796 1797 while (pg->flags & PG_BUSY && 1798 pg->uobject == &vp->v_uobj) { 1799 mutex_exit(vp->v_interlock); 1800 if (sp->cbpp - sp->bpp > 1) { 1801 /* Write gathered pages */ 1802 lfs_updatemeta(sp); 1803 lfs_release_finfo(fs); 1804 (void) lfs_writeseg(fs, sp); 1805 1806 /* 1807 * Reinitialize FIP 1808 */ 1809 KASSERT(sp->vp == vp); 1810 lfs_acquire_finfo(fs, ip->i_number, 1811 ip->i_gen); 1812 } 1813 ++count; 1814 mutex_enter(vp->v_interlock); 1815 wait_for_page(vp, pg, label); 1816 } 1817 if (label != NULL && count > 1) 1818 printf("lfs_putpages[%d]: %s: %sn = %d\n", curproc->p_pid, 1819 label, (count > 0 ? "looping, " : ""), count); 1820 #else 1821 preempt(1); 1822 #endif 1823 } 1824 1825 /* 1826 * Make sure that for all pages in every block in the given range, 1827 * either all are dirty or all are clean. If any of the pages 1828 * we've seen so far are dirty, put the vnode on the paging chain, 1829 * and mark it IN_PAGING. 1830 * 1831 * If checkfirst != 0, don't check all the pages but return at the 1832 * first dirty page. 1833 */ 1834 static int 1835 check_dirty(struct lfs *fs, struct vnode *vp, 1836 off_t startoffset, off_t endoffset, off_t blkeof, 1837 int flags, int checkfirst, struct vm_page **pgp) 1838 { 1839 int by_list; 1840 struct vm_page *curpg = NULL; /* XXX: gcc */ 1841 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 1842 off_t soff = 0; /* XXX: gcc */ 1843 voff_t off; 1844 int i; 1845 int nonexistent; 1846 int any_dirty; /* number of dirty pages */ 1847 int dirty; /* number of dirty pages in a block */ 1848 int tdirty; 1849 int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; 1850 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 1851 1852 ASSERT_MAYBE_SEGLOCK(fs); 1853 top: 1854 by_list = (vp->v_uobj.uo_npages <= 1855 ((endoffset - startoffset) >> PAGE_SHIFT) * 1856 UVM_PAGE_TREE_PENALTY); 1857 any_dirty = 0; 1858 1859 if (by_list) { 1860 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 1861 } else { 1862 soff = startoffset; 1863 } 1864 while (by_list || soff < MIN(blkeof, endoffset)) { 1865 if (by_list) { 1866 /* 1867 * Find the first page in a block. Skip 1868 * blocks outside our area of interest or beyond 1869 * the end of file. 1870 */ 1871 KASSERT(curpg == NULL 1872 || (curpg->flags & PG_MARKER) == 0); 1873 if (pages_per_block > 1) { 1874 while (curpg && 1875 ((curpg->offset & fs->lfs_bmask) || 1876 curpg->offset >= vp->v_size || 1877 curpg->offset >= endoffset)) { 1878 curpg = TAILQ_NEXT(curpg, listq.queue); 1879 KASSERT(curpg == NULL || 1880 (curpg->flags & PG_MARKER) == 0); 1881 } 1882 } 1883 if (curpg == NULL) 1884 break; 1885 soff = curpg->offset; 1886 } 1887 1888 /* 1889 * Mark all pages in extended range busy; find out if any 1890 * of them are dirty. 1891 */ 1892 nonexistent = dirty = 0; 1893 for (i = 0; i == 0 || i < pages_per_block; i++) { 1894 if (by_list && pages_per_block <= 1) { 1895 pgs[i] = pg = curpg; 1896 } else { 1897 off = soff + (i << PAGE_SHIFT); 1898 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 1899 if (pg == NULL) { 1900 ++nonexistent; 1901 continue; 1902 } 1903 } 1904 KASSERT(pg != NULL); 1905 1906 /* 1907 * If we're holding the segment lock, we can deadlock 1908 * against a process that has our page and is waiting 1909 * for the cleaner, while the cleaner waits for the 1910 * segment lock. Just bail in that case. 1911 */ 1912 if ((pg->flags & PG_BUSY) && 1913 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 1914 if (i > 0) 1915 uvm_page_unbusy(pgs, i); 1916 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 1917 if (pgp) 1918 *pgp = pg; 1919 return -1; 1920 } 1921 1922 while (pg->flags & PG_BUSY) { 1923 wait_for_page(vp, pg, NULL); 1924 if (i > 0) 1925 uvm_page_unbusy(pgs, i); 1926 goto top; 1927 } 1928 pg->flags |= PG_BUSY; 1929 UVM_PAGE_OWN(pg, "lfs_putpages"); 1930 1931 pmap_page_protect(pg, VM_PROT_NONE); 1932 tdirty = (pmap_clear_modify(pg) || 1933 (pg->flags & PG_CLEAN) == 0); 1934 dirty += tdirty; 1935 } 1936 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 1937 if (by_list) { 1938 curpg = TAILQ_NEXT(curpg, listq.queue); 1939 } else { 1940 soff += fs->lfs_bsize; 1941 } 1942 continue; 1943 } 1944 1945 any_dirty += dirty; 1946 KASSERT(nonexistent == 0); 1947 1948 /* 1949 * If any are dirty make all dirty; unbusy them, 1950 * but if we were asked to clean, wire them so that 1951 * the pagedaemon doesn't bother us about them while 1952 * they're on their way to disk. 1953 */ 1954 for (i = 0; i == 0 || i < pages_per_block; i++) { 1955 pg = pgs[i]; 1956 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 1957 if (dirty) { 1958 pg->flags &= ~PG_CLEAN; 1959 if (flags & PGO_FREE) { 1960 /* 1961 * Wire the page so that 1962 * pdaemon doesn't see it again. 1963 */ 1964 mutex_enter(&uvm_pageqlock); 1965 uvm_pagewire(pg); 1966 mutex_exit(&uvm_pageqlock); 1967 1968 /* Suspended write flag */ 1969 pg->flags |= PG_DELWRI; 1970 } 1971 } 1972 if (pg->flags & PG_WANTED) 1973 wakeup(pg); 1974 pg->flags &= ~(PG_WANTED|PG_BUSY); 1975 UVM_PAGE_OWN(pg, NULL); 1976 } 1977 1978 if (checkfirst && any_dirty) 1979 break; 1980 1981 if (by_list) { 1982 curpg = TAILQ_NEXT(curpg, listq.queue); 1983 } else { 1984 soff += MAX(PAGE_SIZE, fs->lfs_bsize); 1985 } 1986 } 1987 1988 return any_dirty; 1989 } 1990 1991 /* 1992 * lfs_putpages functions like genfs_putpages except that 1993 * 1994 * (1) It needs to bounds-check the incoming requests to ensure that 1995 * they are block-aligned; if they are not, expand the range and 1996 * do the right thing in case, e.g., the requested range is clean 1997 * but the expanded range is dirty. 1998 * 1999 * (2) It needs to explicitly send blocks to be written when it is done. 2000 * If VOP_PUTPAGES is called without the seglock held, we simply take 2001 * the seglock and let lfs_segunlock wait for us. 2002 * XXX There might be a bad situation if we have to flush a vnode while 2003 * XXX lfs_markv is in operation. As of this writing we panic in this 2004 * XXX case. 2005 * 2006 * Assumptions: 2007 * 2008 * (1) The caller does not hold any pages in this vnode busy. If it does, 2009 * there is a danger that when we expand the page range and busy the 2010 * pages we will deadlock. 2011 * 2012 * (2) We are called with vp->v_interlock held; we must return with it 2013 * released. 2014 * 2015 * (3) We don't absolutely have to free pages right away, provided that 2016 * the request does not have PGO_SYNCIO. When the pagedaemon gives 2017 * us a request with PGO_FREE, we take the pages out of the paging 2018 * queue and wake up the writer, which will handle freeing them for us. 2019 * 2020 * We ensure that for any filesystem block, all pages for that 2021 * block are either resident or not, even if those pages are higher 2022 * than EOF; that means that we will be getting requests to free 2023 * "unused" pages above EOF all the time, and should ignore them. 2024 * 2025 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 2026 * into has been set up for us by lfs_writefile. If not, we will 2027 * have to handle allocating and/or freeing an finfo entry. 2028 * 2029 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 2030 */ 2031 2032 /* How many times to loop before we should start to worry */ 2033 #define TOOMANY 4 2034 2035 int 2036 lfs_putpages(void *v) 2037 { 2038 int error; 2039 struct vop_putpages_args /* { 2040 struct vnode *a_vp; 2041 voff_t a_offlo; 2042 voff_t a_offhi; 2043 int a_flags; 2044 } */ *ap = v; 2045 struct vnode *vp; 2046 struct inode *ip; 2047 struct lfs *fs; 2048 struct segment *sp; 2049 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 2050 off_t off, max_endoffset; 2051 bool seglocked, sync, pagedaemon; 2052 struct vm_page *pg, *busypg; 2053 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 2054 #ifdef DEBUG 2055 int debug_n_again, debug_n_dirtyclean; 2056 #endif 2057 2058 vp = ap->a_vp; 2059 ip = VTOI(vp); 2060 fs = ip->i_lfs; 2061 sync = (ap->a_flags & PGO_SYNCIO) != 0; 2062 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 2063 2064 /* Putpages does nothing for metadata. */ 2065 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 2066 mutex_exit(vp->v_interlock); 2067 return 0; 2068 } 2069 2070 /* 2071 * If there are no pages, don't do anything. 2072 */ 2073 if (vp->v_uobj.uo_npages == 0) { 2074 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 2075 (vp->v_iflag & VI_ONWORKLST) && 2076 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 2077 vp->v_iflag &= ~VI_WRMAPDIRTY; 2078 vn_syncer_remove_from_worklist(vp); 2079 } 2080 mutex_exit(vp->v_interlock); 2081 2082 /* Remove us from paging queue, if we were on it */ 2083 mutex_enter(&lfs_lock); 2084 if (ip->i_flags & IN_PAGING) { 2085 ip->i_flags &= ~IN_PAGING; 2086 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2087 } 2088 mutex_exit(&lfs_lock); 2089 return 0; 2090 } 2091 2092 blkeof = blkroundup(fs, ip->i_size); 2093 2094 /* 2095 * Ignore requests to free pages past EOF but in the same block 2096 * as EOF, unless the request is synchronous. (If the request is 2097 * sync, it comes from lfs_truncate.) 2098 * XXXUBC Make these pages look "active" so the pagedaemon won't 2099 * XXXUBC bother us with them again. 2100 */ 2101 if (!sync && ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 2102 origoffset = ap->a_offlo; 2103 for (off = origoffset; off < blkeof; off += fs->lfs_bsize) { 2104 pg = uvm_pagelookup(&vp->v_uobj, off); 2105 KASSERT(pg != NULL); 2106 while (pg->flags & PG_BUSY) { 2107 pg->flags |= PG_WANTED; 2108 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, 2109 "lfsput2", 0); 2110 mutex_enter(vp->v_interlock); 2111 } 2112 mutex_enter(&uvm_pageqlock); 2113 uvm_pageactivate(pg); 2114 mutex_exit(&uvm_pageqlock); 2115 } 2116 ap->a_offlo = blkeof; 2117 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 2118 mutex_exit(vp->v_interlock); 2119 return 0; 2120 } 2121 } 2122 2123 /* 2124 * Extend page range to start and end at block boundaries. 2125 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 2126 */ 2127 origoffset = ap->a_offlo; 2128 origendoffset = ap->a_offhi; 2129 startoffset = origoffset & ~(fs->lfs_bmask); 2130 max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) 2131 << fs->lfs_bshift; 2132 2133 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2134 endoffset = max_endoffset; 2135 origendoffset = endoffset; 2136 } else { 2137 origendoffset = round_page(ap->a_offhi); 2138 endoffset = round_page(blkroundup(fs, origendoffset)); 2139 } 2140 2141 KASSERT(startoffset > 0 || endoffset >= startoffset); 2142 if (startoffset == endoffset) { 2143 /* Nothing to do, why were we called? */ 2144 mutex_exit(vp->v_interlock); 2145 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 2146 PRId64 "\n", startoffset)); 2147 return 0; 2148 } 2149 2150 ap->a_offlo = startoffset; 2151 ap->a_offhi = endoffset; 2152 2153 /* 2154 * If not cleaning, just send the pages through genfs_putpages 2155 * to be returned to the pool. 2156 */ 2157 if (!(ap->a_flags & PGO_CLEANIT)) 2158 return genfs_putpages(v); 2159 2160 /* Set PGO_BUSYFAIL to avoid deadlocks */ 2161 ap->a_flags |= PGO_BUSYFAIL; 2162 2163 /* 2164 * Likewise, if we are asked to clean but the pages are not 2165 * dirty, we can just free them using genfs_putpages. 2166 */ 2167 #ifdef DEBUG 2168 debug_n_dirtyclean = 0; 2169 #endif 2170 do { 2171 int r; 2172 2173 /* Count the number of dirty pages */ 2174 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 2175 ap->a_flags, 1, NULL); 2176 if (r < 0) { 2177 /* Pages are busy with another process */ 2178 mutex_exit(vp->v_interlock); 2179 return EDEADLK; 2180 } 2181 if (r > 0) /* Some pages are dirty */ 2182 break; 2183 2184 /* 2185 * Sometimes pages are dirtied between the time that 2186 * we check and the time we try to clean them. 2187 * Instruct lfs_gop_write to return EDEADLK in this case 2188 * so we can write them properly. 2189 */ 2190 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 2191 r = genfs_do_putpages(vp, startoffset, endoffset, 2192 ap->a_flags & ~PGO_SYNCIO, &busypg); 2193 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 2194 if (r != EDEADLK) 2195 return r; 2196 2197 /* One of the pages was busy. Start over. */ 2198 mutex_enter(vp->v_interlock); 2199 wait_for_page(vp, busypg, "dirtyclean"); 2200 #ifdef DEBUG 2201 ++debug_n_dirtyclean; 2202 #endif 2203 } while(1); 2204 2205 #ifdef DEBUG 2206 if (debug_n_dirtyclean > TOOMANY) 2207 printf("lfs_putpages: dirtyclean: looping, n = %d\n", 2208 debug_n_dirtyclean); 2209 #endif 2210 2211 /* 2212 * Dirty and asked to clean. 2213 * 2214 * Pagedaemon can't actually write LFS pages; wake up 2215 * the writer to take care of that. The writer will 2216 * notice the pager inode queue and act on that. 2217 * 2218 * XXX We must drop the vp->interlock before taking the lfs_lock or we 2219 * get a nasty deadlock with lfs_flush_pchain(). 2220 */ 2221 if (pagedaemon) { 2222 mutex_exit(vp->v_interlock); 2223 mutex_enter(&lfs_lock); 2224 if (!(ip->i_flags & IN_PAGING)) { 2225 ip->i_flags |= IN_PAGING; 2226 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2227 } 2228 wakeup(&lfs_writer_daemon); 2229 mutex_exit(&lfs_lock); 2230 preempt(); 2231 return EWOULDBLOCK; 2232 } 2233 2234 /* 2235 * If this is a file created in a recent dirop, we can't flush its 2236 * inode until the dirop is complete. Drain dirops, then flush the 2237 * filesystem (taking care of any other pending dirops while we're 2238 * at it). 2239 */ 2240 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 2241 (vp->v_uflag & VU_DIROP)) { 2242 int locked; 2243 2244 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 2245 /* XXX VOP_ISLOCKED() may not be used for lock decisions. */ 2246 locked = (VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2247 mutex_exit(vp->v_interlock); 2248 lfs_writer_enter(fs, "ppdirop"); 2249 if (locked) 2250 VOP_UNLOCK(vp); /* XXX why? */ 2251 2252 mutex_enter(&lfs_lock); 2253 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 2254 mutex_exit(&lfs_lock); 2255 2256 if (locked) 2257 VOP_LOCK(vp, LK_EXCLUSIVE); 2258 mutex_enter(vp->v_interlock); 2259 lfs_writer_leave(fs); 2260 2261 /* XXX the flush should have taken care of this one too! */ 2262 } 2263 2264 /* 2265 * This is it. We are going to write some pages. From here on 2266 * down it's all just mechanics. 2267 * 2268 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 2269 */ 2270 ap->a_flags &= ~PGO_SYNCIO; 2271 2272 /* 2273 * If we've already got the seglock, flush the node and return. 2274 * The FIP has already been set up for us by lfs_writefile, 2275 * and FIP cleanup and lfs_updatemeta will also be done there, 2276 * unless genfs_putpages returns EDEADLK; then we must flush 2277 * what we have, and correct FIP and segment header accounting. 2278 */ 2279 get_seglock: 2280 /* 2281 * If we are not called with the segment locked, lock it. 2282 * Account for a new FIP in the segment header, and set sp->vp. 2283 * (This should duplicate the setup at the top of lfs_writefile().) 2284 */ 2285 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 2286 if (!seglocked) { 2287 mutex_exit(vp->v_interlock); 2288 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 2289 if (error != 0) 2290 return error; 2291 mutex_enter(vp->v_interlock); 2292 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 2293 } 2294 sp = fs->lfs_sp; 2295 KASSERT(sp->vp == NULL); 2296 sp->vp = vp; 2297 2298 /* 2299 * Ensure that the partial segment is marked SS_DIROP if this 2300 * vnode is a DIROP. 2301 */ 2302 if (!seglocked && vp->v_uflag & VU_DIROP) 2303 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 2304 2305 /* 2306 * Loop over genfs_putpages until all pages are gathered. 2307 * genfs_putpages() drops the interlock, so reacquire it if necessary. 2308 * Whenever we lose the interlock we have to rerun check_dirty, as 2309 * well, since more pages might have been dirtied in our absence. 2310 */ 2311 #ifdef DEBUG 2312 debug_n_again = 0; 2313 #endif 2314 do { 2315 busypg = NULL; 2316 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 2317 ap->a_flags, 0, &busypg) < 0) { 2318 mutex_exit(vp->v_interlock); 2319 2320 mutex_enter(vp->v_interlock); 2321 write_and_wait(fs, vp, busypg, seglocked, NULL); 2322 if (!seglocked) { 2323 mutex_exit(vp->v_interlock); 2324 lfs_release_finfo(fs); 2325 lfs_segunlock(fs); 2326 mutex_enter(vp->v_interlock); 2327 } 2328 sp->vp = NULL; 2329 goto get_seglock; 2330 } 2331 2332 busypg = NULL; 2333 error = genfs_do_putpages(vp, startoffset, endoffset, 2334 ap->a_flags, &busypg); 2335 2336 if (error == EDEADLK || error == EAGAIN) { 2337 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 2338 " %d ino %d off %x (seg %d)\n", error, 2339 ip->i_number, fs->lfs_offset, 2340 dtosn(fs, fs->lfs_offset))); 2341 2342 mutex_enter(vp->v_interlock); 2343 write_and_wait(fs, vp, busypg, seglocked, "again"); 2344 } 2345 #ifdef DEBUG 2346 ++debug_n_again; 2347 #endif 2348 } while (error == EDEADLK); 2349 #ifdef DEBUG 2350 if (debug_n_again > TOOMANY) 2351 printf("lfs_putpages: again: looping, n = %d\n", debug_n_again); 2352 #endif 2353 2354 KASSERT(sp != NULL && sp->vp == vp); 2355 if (!seglocked) { 2356 sp->vp = NULL; 2357 2358 /* Write indirect blocks as well */ 2359 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 2360 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 2361 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 2362 2363 KASSERT(sp->vp == NULL); 2364 sp->vp = vp; 2365 } 2366 2367 /* 2368 * Blocks are now gathered into a segment waiting to be written. 2369 * All that's left to do is update metadata, and write them. 2370 */ 2371 lfs_updatemeta(sp); 2372 KASSERT(sp->vp == vp); 2373 sp->vp = NULL; 2374 2375 /* 2376 * If we were called from lfs_writefile, we don't need to clean up 2377 * the FIP or unlock the segment lock. We're done. 2378 */ 2379 if (seglocked) 2380 return error; 2381 2382 /* Clean up FIP and send it to disk. */ 2383 lfs_release_finfo(fs); 2384 lfs_writeseg(fs, fs->lfs_sp); 2385 2386 /* 2387 * Remove us from paging queue if we wrote all our pages. 2388 */ 2389 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2390 mutex_enter(&lfs_lock); 2391 if (ip->i_flags & IN_PAGING) { 2392 ip->i_flags &= ~IN_PAGING; 2393 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2394 } 2395 mutex_exit(&lfs_lock); 2396 } 2397 2398 /* 2399 * XXX - with the malloc/copy writeseg, the pages are freed by now 2400 * even if we don't wait (e.g. if we hold a nested lock). This 2401 * will not be true if we stop using malloc/copy. 2402 */ 2403 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 2404 lfs_segunlock(fs); 2405 2406 /* 2407 * Wait for v_numoutput to drop to zero. The seglock should 2408 * take care of this, but there is a slight possibility that 2409 * aiodoned might not have got around to our buffers yet. 2410 */ 2411 if (sync) { 2412 mutex_enter(vp->v_interlock); 2413 while (vp->v_numoutput > 0) { 2414 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 2415 " num %d\n", ip->i_number, vp->v_numoutput)); 2416 cv_wait(&vp->v_cv, vp->v_interlock); 2417 } 2418 mutex_exit(vp->v_interlock); 2419 } 2420 return error; 2421 } 2422 2423 /* 2424 * Return the last logical file offset that should be written for this file 2425 * if we're doing a write that ends at "size". If writing, we need to know 2426 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2427 * to know about entire blocks. 2428 */ 2429 void 2430 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2431 { 2432 struct inode *ip = VTOI(vp); 2433 struct lfs *fs = ip->i_lfs; 2434 daddr_t olbn, nlbn; 2435 2436 olbn = lblkno(fs, ip->i_size); 2437 nlbn = lblkno(fs, size); 2438 if (!(flags & GOP_SIZE_MEM) && nlbn < NDADDR && olbn <= nlbn) { 2439 *eobp = fragroundup(fs, size); 2440 } else { 2441 *eobp = blkroundup(fs, size); 2442 } 2443 } 2444 2445 #ifdef DEBUG 2446 void lfs_dump_vop(void *); 2447 2448 void 2449 lfs_dump_vop(void *v) 2450 { 2451 struct vop_putpages_args /* { 2452 struct vnode *a_vp; 2453 voff_t a_offlo; 2454 voff_t a_offhi; 2455 int a_flags; 2456 } */ *ap = v; 2457 2458 #ifdef DDB 2459 vfs_vnode_print(ap->a_vp, 0, printf); 2460 #endif 2461 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din); 2462 } 2463 #endif 2464 2465 int 2466 lfs_mmap(void *v) 2467 { 2468 struct vop_mmap_args /* { 2469 const struct vnodeop_desc *a_desc; 2470 struct vnode *a_vp; 2471 vm_prot_t a_prot; 2472 kauth_cred_t a_cred; 2473 } */ *ap = v; 2474 2475 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2476 return EOPNOTSUPP; 2477 return ufs_mmap(v); 2478 } 2479