1 /* $NetBSD: lfs_vnops.c,v 1.262 2014/03/24 13:42:40 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.262 2014/03/24 13:42:40 hannken Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_compat_netbsd.h" 67 #include "opt_uvm_page_trkown.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/namei.h> 73 #include <sys/resourcevar.h> 74 #include <sys/kernel.h> 75 #include <sys/file.h> 76 #include <sys/stat.h> 77 #include <sys/buf.h> 78 #include <sys/proc.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/pool.h> 82 #include <sys/signalvar.h> 83 #include <sys/kauth.h> 84 #include <sys/syslog.h> 85 #include <sys/fstrans.h> 86 87 #include <miscfs/fifofs/fifo.h> 88 #include <miscfs/genfs/genfs.h> 89 #include <miscfs/specfs/specdev.h> 90 91 #include <ufs/lfs/ulfs_inode.h> 92 #include <ufs/lfs/ulfsmount.h> 93 #include <ufs/lfs/ulfs_bswap.h> 94 #include <ufs/lfs/ulfs_extern.h> 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_pmap.h> 98 #include <uvm/uvm_stat.h> 99 #include <uvm/uvm_pager.h> 100 101 #include <ufs/lfs/lfs.h> 102 #include <ufs/lfs/lfs_kernel.h> 103 #include <ufs/lfs/lfs_extern.h> 104 105 extern pid_t lfs_writer_daemon; 106 int lfs_ignore_lazy_sync = 1; 107 108 static int lfs_openextattr(void *v); 109 static int lfs_closeextattr(void *v); 110 static int lfs_getextattr(void *v); 111 static int lfs_setextattr(void *v); 112 static int lfs_listextattr(void *v); 113 static int lfs_deleteextattr(void *v); 114 115 /* Global vfs data structures for lfs. */ 116 int (**lfs_vnodeop_p)(void *); 117 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 118 { &vop_default_desc, vn_default_error }, 119 { &vop_lookup_desc, ulfs_lookup }, /* lookup */ 120 { &vop_create_desc, lfs_create }, /* create */ 121 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */ 122 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 123 { &vop_open_desc, ulfs_open }, /* open */ 124 { &vop_close_desc, lfs_close }, /* close */ 125 { &vop_access_desc, ulfs_access }, /* access */ 126 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 127 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 128 { &vop_read_desc, lfs_read }, /* read */ 129 { &vop_write_desc, lfs_write }, /* write */ 130 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */ 131 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 132 { &vop_poll_desc, ulfs_poll }, /* poll */ 133 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 134 { &vop_revoke_desc, ulfs_revoke }, /* revoke */ 135 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 136 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 137 { &vop_seek_desc, ulfs_seek }, /* seek */ 138 { &vop_remove_desc, lfs_remove }, /* remove */ 139 { &vop_link_desc, lfs_link }, /* link */ 140 { &vop_rename_desc, lfs_rename }, /* rename */ 141 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 142 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 143 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 144 { &vop_readdir_desc, ulfs_readdir }, /* readdir */ 145 { &vop_readlink_desc, ulfs_readlink }, /* readlink */ 146 { &vop_abortop_desc, ulfs_abortop }, /* abortop */ 147 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 148 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 149 { &vop_lock_desc, ulfs_lock }, /* lock */ 150 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 151 { &vop_bmap_desc, ulfs_bmap }, /* bmap */ 152 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 153 { &vop_print_desc, ulfs_print }, /* print */ 154 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 155 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */ 156 { &vop_advlock_desc, ulfs_advlock }, /* advlock */ 157 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 158 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 159 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 160 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 161 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 162 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 163 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 164 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 165 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 166 { NULL, NULL } 167 }; 168 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 169 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 170 171 int (**lfs_specop_p)(void *); 172 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 173 { &vop_default_desc, vn_default_error }, 174 { &vop_lookup_desc, spec_lookup }, /* lookup */ 175 { &vop_create_desc, spec_create }, /* create */ 176 { &vop_mknod_desc, spec_mknod }, /* mknod */ 177 { &vop_open_desc, spec_open }, /* open */ 178 { &vop_close_desc, lfsspec_close }, /* close */ 179 { &vop_access_desc, ulfs_access }, /* access */ 180 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 181 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 182 { &vop_read_desc, ulfsspec_read }, /* read */ 183 { &vop_write_desc, ulfsspec_write }, /* write */ 184 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 185 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 186 { &vop_poll_desc, spec_poll }, /* poll */ 187 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 188 { &vop_revoke_desc, spec_revoke }, /* revoke */ 189 { &vop_mmap_desc, spec_mmap }, /* mmap */ 190 { &vop_fsync_desc, spec_fsync }, /* fsync */ 191 { &vop_seek_desc, spec_seek }, /* seek */ 192 { &vop_remove_desc, spec_remove }, /* remove */ 193 { &vop_link_desc, spec_link }, /* link */ 194 { &vop_rename_desc, spec_rename }, /* rename */ 195 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 196 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 197 { &vop_symlink_desc, spec_symlink }, /* symlink */ 198 { &vop_readdir_desc, spec_readdir }, /* readdir */ 199 { &vop_readlink_desc, spec_readlink }, /* readlink */ 200 { &vop_abortop_desc, spec_abortop }, /* abortop */ 201 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 202 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 203 { &vop_lock_desc, ulfs_lock }, /* lock */ 204 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 205 { &vop_bmap_desc, spec_bmap }, /* bmap */ 206 { &vop_strategy_desc, spec_strategy }, /* strategy */ 207 { &vop_print_desc, ulfs_print }, /* print */ 208 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 209 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 210 { &vop_advlock_desc, spec_advlock }, /* advlock */ 211 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 212 { &vop_getpages_desc, spec_getpages }, /* getpages */ 213 { &vop_putpages_desc, spec_putpages }, /* putpages */ 214 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 215 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 216 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 217 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 218 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 219 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 220 { NULL, NULL } 221 }; 222 const struct vnodeopv_desc lfs_specop_opv_desc = 223 { &lfs_specop_p, lfs_specop_entries }; 224 225 int (**lfs_fifoop_p)(void *); 226 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 227 { &vop_default_desc, vn_default_error }, 228 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */ 229 { &vop_create_desc, vn_fifo_bypass }, /* create */ 230 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */ 231 { &vop_open_desc, vn_fifo_bypass }, /* open */ 232 { &vop_close_desc, lfsfifo_close }, /* close */ 233 { &vop_access_desc, ulfs_access }, /* access */ 234 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 235 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 236 { &vop_read_desc, ulfsfifo_read }, /* read */ 237 { &vop_write_desc, ulfsfifo_write }, /* write */ 238 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */ 239 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 240 { &vop_poll_desc, vn_fifo_bypass }, /* poll */ 241 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */ 242 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */ 243 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */ 244 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 245 { &vop_seek_desc, vn_fifo_bypass }, /* seek */ 246 { &vop_remove_desc, vn_fifo_bypass }, /* remove */ 247 { &vop_link_desc, vn_fifo_bypass }, /* link */ 248 { &vop_rename_desc, vn_fifo_bypass }, /* rename */ 249 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */ 250 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */ 251 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */ 252 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */ 253 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */ 254 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */ 255 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 256 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 257 { &vop_lock_desc, ulfs_lock }, /* lock */ 258 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 259 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */ 260 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 261 { &vop_print_desc, ulfs_print }, /* print */ 262 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 263 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */ 264 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */ 265 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 266 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */ 267 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 268 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 269 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 270 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 271 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 272 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 273 { NULL, NULL } 274 }; 275 const struct vnodeopv_desc lfs_fifoop_opv_desc = 276 { &lfs_fifoop_p, lfs_fifoop_entries }; 277 278 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 279 280 #define LFS_READWRITE 281 #include <ufs/lfs/ulfs_readwrite.c> 282 #undef LFS_READWRITE 283 284 /* 285 * Synch an open file. 286 */ 287 /* ARGSUSED */ 288 int 289 lfs_fsync(void *v) 290 { 291 struct vop_fsync_args /* { 292 struct vnode *a_vp; 293 kauth_cred_t a_cred; 294 int a_flags; 295 off_t offlo; 296 off_t offhi; 297 } */ *ap = v; 298 struct vnode *vp = ap->a_vp; 299 int error, wait; 300 struct inode *ip = VTOI(vp); 301 struct lfs *fs = ip->i_lfs; 302 303 /* If we're mounted read-only, don't try to sync. */ 304 if (fs->lfs_ronly) 305 return 0; 306 307 /* If a removed vnode is being cleaned, no need to sync here. */ 308 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 309 return 0; 310 311 /* 312 * Trickle sync simply adds this vnode to the pager list, as if 313 * the pagedaemon had requested a pageout. 314 */ 315 if (ap->a_flags & FSYNC_LAZY) { 316 if (lfs_ignore_lazy_sync == 0) { 317 mutex_enter(&lfs_lock); 318 if (!(ip->i_flags & IN_PAGING)) { 319 ip->i_flags |= IN_PAGING; 320 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 321 i_lfs_pchain); 322 } 323 wakeup(&lfs_writer_daemon); 324 mutex_exit(&lfs_lock); 325 } 326 return 0; 327 } 328 329 /* 330 * If a vnode is bring cleaned, flush it out before we try to 331 * reuse it. This prevents the cleaner from writing files twice 332 * in the same partial segment, causing an accounting underflow. 333 */ 334 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 335 lfs_vflush(vp); 336 } 337 338 wait = (ap->a_flags & FSYNC_WAIT); 339 do { 340 mutex_enter(vp->v_interlock); 341 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 342 round_page(ap->a_offhi), 343 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 344 if (error == EAGAIN) { 345 mutex_enter(&lfs_lock); 346 mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", 347 hz / 100 + 1, &lfs_lock); 348 mutex_exit(&lfs_lock); 349 } 350 } while (error == EAGAIN); 351 if (error) 352 return error; 353 354 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 355 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 356 357 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 358 int l = 0; 359 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 360 curlwp->l_cred); 361 } 362 if (wait && !VPISEMPTY(vp)) 363 LFS_SET_UINO(ip, IN_MODIFIED); 364 365 return error; 366 } 367 368 /* 369 * Take IN_ADIROP off, then call ulfs_inactive. 370 */ 371 int 372 lfs_inactive(void *v) 373 { 374 struct vop_inactive_args /* { 375 struct vnode *a_vp; 376 } */ *ap = v; 377 378 lfs_unmark_vnode(ap->a_vp); 379 380 /* 381 * The Ifile is only ever inactivated on unmount. 382 * Streamline this process by not giving it more dirty blocks. 383 */ 384 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 385 mutex_enter(&lfs_lock); 386 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 387 mutex_exit(&lfs_lock); 388 VOP_UNLOCK(ap->a_vp); 389 return 0; 390 } 391 392 #ifdef DEBUG 393 /* 394 * This might happen on unmount. 395 * XXX If it happens at any other time, it should be a panic. 396 */ 397 if (ap->a_vp->v_uflag & VU_DIROP) { 398 struct inode *ip = VTOI(ap->a_vp); 399 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number); 400 } 401 #endif /* DIAGNOSTIC */ 402 403 return ulfs_inactive(v); 404 } 405 406 int 407 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 408 { 409 struct lfs *fs; 410 int error; 411 412 KASSERT(VOP_ISLOCKED(dvp)); 413 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 414 415 fs = VTOI(dvp)->i_lfs; 416 417 ASSERT_NO_SEGLOCK(fs); 418 /* 419 * LFS_NRESERVE calculates direct and indirect blocks as well 420 * as an inode block; an overestimate in most cases. 421 */ 422 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 423 return (error); 424 425 restart: 426 mutex_enter(&lfs_lock); 427 if (fs->lfs_dirops == 0) { 428 mutex_exit(&lfs_lock); 429 lfs_check(dvp, LFS_UNUSED_LBN, 0); 430 mutex_enter(&lfs_lock); 431 } 432 while (fs->lfs_writer) { 433 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 434 "lfs_sdirop", 0, &lfs_lock); 435 if (error == EINTR) { 436 mutex_exit(&lfs_lock); 437 goto unreserve; 438 } 439 } 440 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 441 wakeup(&lfs_writer_daemon); 442 mutex_exit(&lfs_lock); 443 preempt(); 444 goto restart; 445 } 446 447 if (lfs_dirvcount > LFS_MAX_DIROP) { 448 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 449 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 450 if ((error = mtsleep(&lfs_dirvcount, 451 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 452 &lfs_lock)) != 0) { 453 goto unreserve; 454 } 455 goto restart; 456 } 457 458 ++fs->lfs_dirops; 459 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */ 460 mutex_exit(&lfs_lock); 461 462 /* Hold a reference so SET_ENDOP will be happy */ 463 vref(dvp); 464 if (vp) { 465 vref(vp); 466 MARK_VNODE(vp); 467 } 468 469 MARK_VNODE(dvp); 470 return 0; 471 472 unreserve: 473 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 474 return error; 475 } 476 477 /* 478 * Get a new vnode *before* adjusting the dirop count, to avoid a deadlock 479 * in getnewvnode(), if we have a stacked filesystem mounted on top 480 * of us. 481 * 482 * NB: this means we have to clear the new vnodes on error. Fortunately 483 * SET_ENDOP is there to do that for us. 484 */ 485 int 486 lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) 487 { 488 int error; 489 struct lfs *fs; 490 491 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 492 ASSERT_NO_SEGLOCK(fs); 493 if (fs->lfs_ronly) 494 return EROFS; 495 if (vpp == NULL) { 496 return lfs_set_dirop(dvp, NULL); 497 } 498 error = getnewvnode(VT_LFS, dvp->v_mount, lfs_vnodeop_p, NULL, vpp); 499 if (error) { 500 DLOG((DLOG_ALLOC, "lfs_set_dirop_create: dvp %p error %d\n", 501 dvp, error)); 502 return error; 503 } 504 if ((error = lfs_set_dirop(dvp, NULL)) != 0) { 505 ungetnewvnode(*vpp); 506 *vpp = NULL; 507 return error; 508 } 509 return 0; 510 } 511 512 void 513 lfs_mark_vnode(struct vnode *vp) 514 { 515 struct inode *ip = VTOI(vp); 516 struct lfs *fs = ip->i_lfs; 517 518 mutex_enter(&lfs_lock); 519 if (!(ip->i_flag & IN_ADIROP)) { 520 if (!(vp->v_uflag & VU_DIROP)) { 521 mutex_exit(&lfs_lock); 522 mutex_enter(vp->v_interlock); 523 if (lfs_vref(vp) != 0) 524 panic("lfs_mark_vnode: could not vref"); 525 mutex_enter(&lfs_lock); 526 ++lfs_dirvcount; 527 ++fs->lfs_dirvcount; 528 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 529 vp->v_uflag |= VU_DIROP; 530 } 531 ++fs->lfs_nadirop; 532 ip->i_flag &= ~IN_CDIROP; 533 ip->i_flag |= IN_ADIROP; 534 } else 535 KASSERT(vp->v_uflag & VU_DIROP); 536 mutex_exit(&lfs_lock); 537 } 538 539 void 540 lfs_unmark_vnode(struct vnode *vp) 541 { 542 struct inode *ip = VTOI(vp); 543 544 mutex_enter(&lfs_lock); 545 if (ip && (ip->i_flag & IN_ADIROP)) { 546 KASSERT(vp->v_uflag & VU_DIROP); 547 --ip->i_lfs->lfs_nadirop; 548 ip->i_flag &= ~IN_ADIROP; 549 } 550 mutex_exit(&lfs_lock); 551 } 552 553 int 554 lfs_symlink(void *v) 555 { 556 struct vop_symlink_v3_args /* { 557 struct vnode *a_dvp; 558 struct vnode **a_vpp; 559 struct componentname *a_cnp; 560 struct vattr *a_vap; 561 char *a_target; 562 } */ *ap = v; 563 int error; 564 565 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 566 return error; 567 } 568 error = ulfs_symlink(ap); 569 SET_ENDOP_CREATE_AP(ap, "symlink"); 570 return (error); 571 } 572 573 int 574 lfs_mknod(void *v) 575 { 576 struct vop_mknod_v3_args /* { 577 struct vnode *a_dvp; 578 struct vnode **a_vpp; 579 struct componentname *a_cnp; 580 struct vattr *a_vap; 581 } */ *ap = v; 582 struct vattr *vap; 583 struct vnode **vpp; 584 struct inode *ip; 585 int error; 586 struct mount *mp; 587 ino_t ino; 588 struct ulfs_lookup_results *ulr; 589 590 vap = ap->a_vap; 591 vpp = ap->a_vpp; 592 593 /* XXX should handle this material another way */ 594 ulr = &VTOI(ap->a_dvp)->i_crap; 595 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 596 597 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 598 return error; 599 } 600 601 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 602 error = ulfs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 603 ap->a_dvp, ulr, vpp, ap->a_cnp); 604 605 /* Either way we're done with the dirop at this point */ 606 SET_ENDOP_CREATE_AP(ap, "mknod"); 607 608 if (error) { 609 fstrans_done(ap->a_dvp->v_mount); 610 *vpp = NULL; 611 return (error); 612 } 613 614 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 615 ip = VTOI(*vpp); 616 mp = (*vpp)->v_mount; 617 ino = ip->i_number; 618 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 619 if (vap->va_rdev != VNOVAL) { 620 struct ulfsmount *ump = ip->i_ump; 621 struct lfs *fs = ip->i_lfs; 622 /* 623 * Want to be able to use this to make badblock 624 * inodes, so don't truncate the dev number. 625 */ 626 if (ump->um_fstype == ULFS1) 627 ip->i_ffs1_rdev = ulfs_rw32(vap->va_rdev, 628 ULFS_MPNEEDSWAP(fs)); 629 else 630 ip->i_ffs2_rdev = ulfs_rw64(vap->va_rdev, 631 ULFS_MPNEEDSWAP(fs)); 632 } 633 634 /* 635 * Call fsync to write the vnode so that we don't have to deal with 636 * flushing it when it's marked VU_DIROP or reclaiming. 637 * 638 * XXX KS - If we can't flush we also can't call vgone(), so must 639 * return. But, that leaves this vnode in limbo, also not good. 640 * Can this ever happen (barring hardware failure)? 641 */ 642 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 643 panic("lfs_mknod: couldn't fsync (ino %llu)", 644 (unsigned long long)ino); 645 /* return (error); */ 646 } 647 /* 648 * Remove vnode so that it will be reloaded by VFS_VGET and 649 * checked to see if it is an alias of an existing entry in 650 * the inode cache. 651 */ 652 /* Used to be vput, but that causes us to call VOP_INACTIVE twice. */ 653 654 (*vpp)->v_type = VNON; 655 VOP_UNLOCK(*vpp); 656 vgone(*vpp); 657 error = VFS_VGET(mp, ino, vpp); 658 659 fstrans_done(ap->a_dvp->v_mount); 660 if (error != 0) { 661 *vpp = NULL; 662 return (error); 663 } 664 VOP_UNLOCK(*vpp); 665 return (0); 666 } 667 668 int 669 lfs_create(void *v) 670 { 671 struct vop_create_v3_args /* { 672 struct vnode *a_dvp; 673 struct vnode **a_vpp; 674 struct componentname *a_cnp; 675 struct vattr *a_vap; 676 } */ *ap = v; 677 int error; 678 679 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 680 return error; 681 } 682 error = ulfs_create(ap); 683 SET_ENDOP_CREATE_AP(ap, "create"); 684 return (error); 685 } 686 687 int 688 lfs_mkdir(void *v) 689 { 690 struct vop_mkdir_v3_args /* { 691 struct vnode *a_dvp; 692 struct vnode **a_vpp; 693 struct componentname *a_cnp; 694 struct vattr *a_vap; 695 } */ *ap = v; 696 int error; 697 698 if ((error = SET_DIROP_CREATE(ap->a_dvp, ap->a_vpp)) != 0) { 699 return error; 700 } 701 error = ulfs_mkdir(ap); 702 SET_ENDOP_CREATE_AP(ap, "mkdir"); 703 return (error); 704 } 705 706 int 707 lfs_remove(void *v) 708 { 709 struct vop_remove_args /* { 710 struct vnode *a_dvp; 711 struct vnode *a_vp; 712 struct componentname *a_cnp; 713 } */ *ap = v; 714 struct vnode *dvp, *vp; 715 struct inode *ip; 716 int error; 717 718 dvp = ap->a_dvp; 719 vp = ap->a_vp; 720 ip = VTOI(vp); 721 if ((error = SET_DIROP_REMOVE(dvp, vp)) != 0) { 722 if (dvp == vp) 723 vrele(vp); 724 else 725 vput(vp); 726 vput(dvp); 727 return error; 728 } 729 error = ulfs_remove(ap); 730 if (ip->i_nlink == 0) 731 lfs_orphan(ip->i_lfs, ip->i_number); 732 SET_ENDOP_REMOVE(ip->i_lfs, dvp, ap->a_vp, "remove"); 733 return (error); 734 } 735 736 int 737 lfs_rmdir(void *v) 738 { 739 struct vop_rmdir_args /* { 740 struct vnodeop_desc *a_desc; 741 struct vnode *a_dvp; 742 struct vnode *a_vp; 743 struct componentname *a_cnp; 744 } */ *ap = v; 745 struct vnode *vp; 746 struct inode *ip; 747 int error; 748 749 vp = ap->a_vp; 750 ip = VTOI(vp); 751 if ((error = SET_DIROP_REMOVE(ap->a_dvp, ap->a_vp)) != 0) { 752 if (ap->a_dvp == vp) 753 vrele(ap->a_dvp); 754 else 755 vput(ap->a_dvp); 756 vput(vp); 757 return error; 758 } 759 error = ulfs_rmdir(ap); 760 if (ip->i_nlink == 0) 761 lfs_orphan(ip->i_lfs, ip->i_number); 762 SET_ENDOP_REMOVE(ip->i_lfs, ap->a_dvp, ap->a_vp, "rmdir"); 763 return (error); 764 } 765 766 int 767 lfs_link(void *v) 768 { 769 struct vop_link_args /* { 770 struct vnode *a_dvp; 771 struct vnode *a_vp; 772 struct componentname *a_cnp; 773 } */ *ap = v; 774 int error; 775 struct vnode **vpp = NULL; 776 777 if ((error = SET_DIROP_CREATE(ap->a_dvp, vpp)) != 0) { 778 vput(ap->a_dvp); 779 return error; 780 } 781 error = ulfs_link(ap); 782 SET_ENDOP_CREATE(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vpp, "link"); 783 return (error); 784 } 785 786 /* XXX hack to avoid calling ITIMES in getattr */ 787 int 788 lfs_getattr(void *v) 789 { 790 struct vop_getattr_args /* { 791 struct vnode *a_vp; 792 struct vattr *a_vap; 793 kauth_cred_t a_cred; 794 } */ *ap = v; 795 struct vnode *vp = ap->a_vp; 796 struct inode *ip = VTOI(vp); 797 struct vattr *vap = ap->a_vap; 798 struct lfs *fs = ip->i_lfs; 799 800 fstrans_start(vp->v_mount, FSTRANS_SHARED); 801 /* 802 * Copy from inode table 803 */ 804 vap->va_fsid = ip->i_dev; 805 vap->va_fileid = ip->i_number; 806 vap->va_mode = ip->i_mode & ~LFS_IFMT; 807 vap->va_nlink = ip->i_nlink; 808 vap->va_uid = ip->i_uid; 809 vap->va_gid = ip->i_gid; 810 vap->va_rdev = (dev_t)ip->i_ffs1_rdev; 811 vap->va_size = vp->v_size; 812 vap->va_atime.tv_sec = ip->i_ffs1_atime; 813 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 814 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 815 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 816 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 817 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 818 vap->va_flags = ip->i_flags; 819 vap->va_gen = ip->i_gen; 820 /* this doesn't belong here */ 821 if (vp->v_type == VBLK) 822 vap->va_blocksize = BLKDEV_IOSIZE; 823 else if (vp->v_type == VCHR) 824 vap->va_blocksize = MAXBSIZE; 825 else 826 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 827 vap->va_bytes = lfs_fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); 828 vap->va_type = vp->v_type; 829 vap->va_filerev = ip->i_modrev; 830 fstrans_done(vp->v_mount); 831 return (0); 832 } 833 834 /* 835 * Check to make sure the inode blocks won't choke the buffer 836 * cache, then call ulfs_setattr as usual. 837 */ 838 int 839 lfs_setattr(void *v) 840 { 841 struct vop_setattr_args /* { 842 struct vnode *a_vp; 843 struct vattr *a_vap; 844 kauth_cred_t a_cred; 845 } */ *ap = v; 846 struct vnode *vp = ap->a_vp; 847 848 lfs_check(vp, LFS_UNUSED_LBN, 0); 849 return ulfs_setattr(v); 850 } 851 852 /* 853 * Release the block we hold on lfs_newseg wrapping. Called on file close, 854 * or explicitly from LFCNWRAPGO. Called with the interlock held. 855 */ 856 static int 857 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 858 { 859 if (fs->lfs_stoplwp != curlwp) 860 return EBUSY; 861 862 fs->lfs_stoplwp = NULL; 863 cv_signal(&fs->lfs_stopcv); 864 865 KASSERT(fs->lfs_nowrap > 0); 866 if (fs->lfs_nowrap <= 0) { 867 return 0; 868 } 869 870 if (--fs->lfs_nowrap == 0) { 871 log(LOG_NOTICE, "%s: re-enabled log wrap\n", fs->lfs_fsmnt); 872 wakeup(&fs->lfs_wrappass); 873 lfs_wakeup_cleaner(fs); 874 } 875 if (waitfor) { 876 mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment", 877 0, &lfs_lock); 878 } 879 880 return 0; 881 } 882 883 /* 884 * Close called. 885 * 886 * Update the times on the inode. 887 */ 888 /* ARGSUSED */ 889 int 890 lfs_close(void *v) 891 { 892 struct vop_close_args /* { 893 struct vnode *a_vp; 894 int a_fflag; 895 kauth_cred_t a_cred; 896 } */ *ap = v; 897 struct vnode *vp = ap->a_vp; 898 struct inode *ip = VTOI(vp); 899 struct lfs *fs = ip->i_lfs; 900 901 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) && 902 fs->lfs_stoplwp == curlwp) { 903 mutex_enter(&lfs_lock); 904 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 905 lfs_wrapgo(fs, ip, 0); 906 mutex_exit(&lfs_lock); 907 } 908 909 if (vp == ip->i_lfs->lfs_ivnode && 910 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 911 return 0; 912 913 fstrans_start(vp->v_mount, FSTRANS_SHARED); 914 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 915 LFS_ITIMES(ip, NULL, NULL, NULL); 916 } 917 fstrans_done(vp->v_mount); 918 return (0); 919 } 920 921 /* 922 * Close wrapper for special devices. 923 * 924 * Update the times on the inode then do device close. 925 */ 926 int 927 lfsspec_close(void *v) 928 { 929 struct vop_close_args /* { 930 struct vnode *a_vp; 931 int a_fflag; 932 kauth_cred_t a_cred; 933 } */ *ap = v; 934 struct vnode *vp; 935 struct inode *ip; 936 937 vp = ap->a_vp; 938 ip = VTOI(vp); 939 if (vp->v_usecount > 1) { 940 LFS_ITIMES(ip, NULL, NULL, NULL); 941 } 942 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 943 } 944 945 /* 946 * Close wrapper for fifo's. 947 * 948 * Update the times on the inode then do device close. 949 */ 950 int 951 lfsfifo_close(void *v) 952 { 953 struct vop_close_args /* { 954 struct vnode *a_vp; 955 int a_fflag; 956 kauth_cred_ a_cred; 957 } */ *ap = v; 958 struct vnode *vp; 959 struct inode *ip; 960 961 vp = ap->a_vp; 962 ip = VTOI(vp); 963 if (ap->a_vp->v_usecount > 1) { 964 LFS_ITIMES(ip, NULL, NULL, NULL); 965 } 966 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 967 } 968 969 /* 970 * Reclaim an inode so that it can be used for other purposes. 971 */ 972 973 int 974 lfs_reclaim(void *v) 975 { 976 struct vop_reclaim_args /* { 977 struct vnode *a_vp; 978 } */ *ap = v; 979 struct vnode *vp = ap->a_vp; 980 struct inode *ip = VTOI(vp); 981 struct lfs *fs = ip->i_lfs; 982 int error; 983 984 /* 985 * The inode must be freed and updated before being removed 986 * from its hash chain. Other threads trying to gain a hold 987 * or lock on the inode will be stalled. 988 */ 989 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 990 lfs_vfree(vp, ip->i_number, ip->i_omode); 991 992 mutex_enter(&lfs_lock); 993 LFS_CLR_UINO(ip, IN_ALLMOD); 994 mutex_exit(&lfs_lock); 995 if ((error = ulfs_reclaim(vp))) 996 return (error); 997 998 /* 999 * Take us off the paging and/or dirop queues if we were on them. 1000 * We shouldn't be on them. 1001 */ 1002 mutex_enter(&lfs_lock); 1003 if (ip->i_flags & IN_PAGING) { 1004 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1005 fs->lfs_fsmnt); 1006 ip->i_flags &= ~IN_PAGING; 1007 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1008 } 1009 if (vp->v_uflag & VU_DIROP) { 1010 panic("reclaimed vnode is VU_DIROP"); 1011 vp->v_uflag &= ~VU_DIROP; 1012 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1013 } 1014 mutex_exit(&lfs_lock); 1015 1016 pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); 1017 lfs_deregister_all(vp); 1018 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1019 ip->inode_ext.lfs = NULL; 1020 genfs_node_destroy(vp); 1021 pool_put(&lfs_inode_pool, vp->v_data); 1022 vp->v_data = NULL; 1023 return (0); 1024 } 1025 1026 /* 1027 * Read a block from a storage device. 1028 * 1029 * Calculate the logical to physical mapping if not done already, 1030 * then call the device strategy routine. 1031 * 1032 * In order to avoid reading blocks that are in the process of being 1033 * written by the cleaner---and hence are not mutexed by the normal 1034 * buffer cache / page cache mechanisms---check for collisions before 1035 * reading. 1036 * 1037 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before* 1038 * the active cleaner test. 1039 * 1040 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1041 */ 1042 int 1043 lfs_strategy(void *v) 1044 { 1045 struct vop_strategy_args /* { 1046 struct vnode *a_vp; 1047 struct buf *a_bp; 1048 } */ *ap = v; 1049 struct buf *bp; 1050 struct lfs *fs; 1051 struct vnode *vp; 1052 struct inode *ip; 1053 daddr_t tbn; 1054 #define MAXLOOP 25 1055 int i, sn, error, slept, loopcount; 1056 1057 bp = ap->a_bp; 1058 vp = ap->a_vp; 1059 ip = VTOI(vp); 1060 fs = ip->i_lfs; 1061 1062 /* lfs uses its strategy routine only for read */ 1063 KASSERT(bp->b_flags & B_READ); 1064 1065 if (vp->v_type == VBLK || vp->v_type == VCHR) 1066 panic("lfs_strategy: spec"); 1067 KASSERT(bp->b_bcount != 0); 1068 if (bp->b_blkno == bp->b_lblkno) { 1069 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1070 NULL); 1071 if (error) { 1072 bp->b_error = error; 1073 bp->b_resid = bp->b_bcount; 1074 biodone(bp); 1075 return (error); 1076 } 1077 if ((long)bp->b_blkno == -1) /* no valid data */ 1078 clrbuf(bp); 1079 } 1080 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1081 bp->b_resid = bp->b_bcount; 1082 biodone(bp); 1083 return (0); 1084 } 1085 1086 slept = 1; 1087 loopcount = 0; 1088 mutex_enter(&lfs_lock); 1089 while (slept && fs->lfs_seglock) { 1090 mutex_exit(&lfs_lock); 1091 /* 1092 * Look through list of intervals. 1093 * There will only be intervals to look through 1094 * if the cleaner holds the seglock. 1095 * Since the cleaner is synchronous, we can trust 1096 * the list of intervals to be current. 1097 */ 1098 tbn = LFS_DBTOFSB(fs, bp->b_blkno); 1099 sn = lfs_dtosn(fs, tbn); 1100 slept = 0; 1101 for (i = 0; i < fs->lfs_cleanind; i++) { 1102 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) && 1103 tbn >= fs->lfs_cleanint[i]) { 1104 DLOG((DLOG_CLEAN, 1105 "lfs_strategy: ino %d lbn %" PRId64 1106 " ind %d sn %d fsb %" PRIx32 1107 " given sn %d fsb %" PRIx64 "\n", 1108 ip->i_number, bp->b_lblkno, i, 1109 lfs_dtosn(fs, fs->lfs_cleanint[i]), 1110 fs->lfs_cleanint[i], sn, tbn)); 1111 DLOG((DLOG_CLEAN, 1112 "lfs_strategy: sleeping on ino %d lbn %" 1113 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1114 mutex_enter(&lfs_lock); 1115 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1116 /* 1117 * Cleaner can't wait for itself. 1118 * Instead, wait for the blocks 1119 * to be written to disk. 1120 * XXX we need pribio in the test 1121 * XXX here. 1122 */ 1123 mtsleep(&fs->lfs_iocount, 1124 (PRIBIO + 1) | PNORELOCK, 1125 "clean2", hz/10 + 1, 1126 &lfs_lock); 1127 slept = 1; 1128 ++loopcount; 1129 break; 1130 } else if (fs->lfs_seglock) { 1131 mtsleep(&fs->lfs_seglock, 1132 (PRIBIO + 1) | PNORELOCK, 1133 "clean1", 0, 1134 &lfs_lock); 1135 slept = 1; 1136 break; 1137 } 1138 mutex_exit(&lfs_lock); 1139 } 1140 } 1141 mutex_enter(&lfs_lock); 1142 if (loopcount > MAXLOOP) { 1143 printf("lfs_strategy: breaking out of clean2 loop\n"); 1144 break; 1145 } 1146 } 1147 mutex_exit(&lfs_lock); 1148 1149 vp = ip->i_devvp; 1150 return VOP_STRATEGY(vp, bp); 1151 } 1152 1153 /* 1154 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1155 * Technically this is a checkpoint (the on-disk state is valid) 1156 * even though we are leaving out all the file data. 1157 */ 1158 int 1159 lfs_flush_dirops(struct lfs *fs) 1160 { 1161 struct inode *ip, *nip; 1162 struct vnode *vp; 1163 extern int lfs_dostats; 1164 struct segment *sp; 1165 int flags = 0; 1166 int error = 0; 1167 1168 ASSERT_MAYBE_SEGLOCK(fs); 1169 KASSERT(fs->lfs_nadirop == 0); 1170 1171 if (fs->lfs_ronly) 1172 return EROFS; 1173 1174 mutex_enter(&lfs_lock); 1175 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1176 mutex_exit(&lfs_lock); 1177 return 0; 1178 } else 1179 mutex_exit(&lfs_lock); 1180 1181 if (lfs_dostats) 1182 ++lfs_stats.flush_invoked; 1183 1184 lfs_imtime(fs); 1185 lfs_seglock(fs, flags); 1186 sp = fs->lfs_sp; 1187 1188 /* 1189 * lfs_writevnodes, optimized to get dirops out of the way. 1190 * Only write dirops, and don't flush files' pages, only 1191 * blocks from the directories. 1192 * 1193 * We don't need to vref these files because they are 1194 * dirops and so hold an extra reference until the 1195 * segunlock clears them of that status. 1196 * 1197 * We don't need to check for IN_ADIROP because we know that 1198 * no dirops are active. 1199 * 1200 */ 1201 mutex_enter(&lfs_lock); 1202 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1203 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1204 mutex_exit(&lfs_lock); 1205 vp = ITOV(ip); 1206 mutex_enter(vp->v_interlock); 1207 1208 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1209 KASSERT(vp->v_uflag & VU_DIROP); 1210 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0); 1211 1212 /* 1213 * All writes to directories come from dirops; all 1214 * writes to files' direct blocks go through the page 1215 * cache, which we're not touching. Reads to files 1216 * and/or directories will not be affected by writing 1217 * directory blocks inodes and file inodes. So we don't 1218 * really need to lock. 1219 */ 1220 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 1221 mutex_exit(vp->v_interlock); 1222 mutex_enter(&lfs_lock); 1223 continue; 1224 } 1225 mutex_exit(vp->v_interlock); 1226 /* XXX see below 1227 * waslocked = VOP_ISLOCKED(vp); 1228 */ 1229 if (vp->v_type != VREG && 1230 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1231 error = lfs_writefile(fs, sp, vp); 1232 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1233 !(ip->i_flag & IN_ALLMOD)) { 1234 mutex_enter(&lfs_lock); 1235 LFS_SET_UINO(ip, IN_MODIFIED); 1236 mutex_exit(&lfs_lock); 1237 } 1238 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1239 mutex_enter(&lfs_lock); 1240 error = EAGAIN; 1241 break; 1242 } 1243 } 1244 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1245 error = lfs_writeinode(fs, sp, ip); 1246 mutex_enter(&lfs_lock); 1247 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1248 error = EAGAIN; 1249 break; 1250 } 1251 1252 /* 1253 * We might need to update these inodes again, 1254 * for example, if they have data blocks to write. 1255 * Make sure that after this flush, they are still 1256 * marked IN_MODIFIED so that we don't forget to 1257 * write them. 1258 */ 1259 /* XXX only for non-directories? --KS */ 1260 LFS_SET_UINO(ip, IN_MODIFIED); 1261 } 1262 mutex_exit(&lfs_lock); 1263 /* We've written all the dirops there are */ 1264 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); 1265 lfs_finalize_fs_seguse(fs); 1266 (void) lfs_writeseg(fs, sp); 1267 lfs_segunlock(fs); 1268 1269 return error; 1270 } 1271 1272 /* 1273 * Flush all vnodes for which the pagedaemon has requested pageouts. 1274 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1275 * has just run, this would be an error). If we have to skip a vnode 1276 * for any reason, just skip it; if we have to wait for the cleaner, 1277 * abort. The writer daemon will call us again later. 1278 */ 1279 int 1280 lfs_flush_pchain(struct lfs *fs) 1281 { 1282 struct inode *ip, *nip; 1283 struct vnode *vp; 1284 extern int lfs_dostats; 1285 struct segment *sp; 1286 int error, error2; 1287 1288 ASSERT_NO_SEGLOCK(fs); 1289 1290 if (fs->lfs_ronly) 1291 return EROFS; 1292 1293 mutex_enter(&lfs_lock); 1294 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1295 mutex_exit(&lfs_lock); 1296 return 0; 1297 } else 1298 mutex_exit(&lfs_lock); 1299 1300 /* Get dirops out of the way */ 1301 if ((error = lfs_flush_dirops(fs)) != 0) 1302 return error; 1303 1304 if (lfs_dostats) 1305 ++lfs_stats.flush_invoked; 1306 1307 /* 1308 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1309 */ 1310 lfs_imtime(fs); 1311 lfs_seglock(fs, 0); 1312 sp = fs->lfs_sp; 1313 1314 /* 1315 * lfs_writevnodes, optimized to clear pageout requests. 1316 * Only write non-dirop files that are in the pageout queue. 1317 * We're very conservative about what we write; we want to be 1318 * fast and async. 1319 */ 1320 mutex_enter(&lfs_lock); 1321 top: 1322 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1323 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1324 vp = ITOV(ip); 1325 1326 if (!(ip->i_flags & IN_PAGING)) 1327 goto top; 1328 1329 mutex_enter(vp->v_interlock); 1330 if (vdead_check(vp, VDEAD_NOWAIT) != 0 || 1331 (vp->v_uflag & VU_DIROP) != 0) { 1332 mutex_exit(vp->v_interlock); 1333 continue; 1334 } 1335 if (vp->v_type != VREG) { 1336 mutex_exit(vp->v_interlock); 1337 continue; 1338 } 1339 if (lfs_vref(vp)) 1340 continue; 1341 mutex_exit(&lfs_lock); 1342 1343 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_RETRY) != 0) { 1344 lfs_vunref(vp); 1345 mutex_enter(&lfs_lock); 1346 continue; 1347 } 1348 1349 error = lfs_writefile(fs, sp, vp); 1350 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1351 !(ip->i_flag & IN_ALLMOD)) { 1352 mutex_enter(&lfs_lock); 1353 LFS_SET_UINO(ip, IN_MODIFIED); 1354 mutex_exit(&lfs_lock); 1355 } 1356 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1357 error2 = lfs_writeinode(fs, sp, ip); 1358 1359 VOP_UNLOCK(vp); 1360 lfs_vunref(vp); 1361 1362 if (error == EAGAIN || error2 == EAGAIN) { 1363 lfs_writeseg(fs, sp); 1364 mutex_enter(&lfs_lock); 1365 break; 1366 } 1367 mutex_enter(&lfs_lock); 1368 } 1369 mutex_exit(&lfs_lock); 1370 (void) lfs_writeseg(fs, sp); 1371 lfs_segunlock(fs); 1372 1373 return 0; 1374 } 1375 1376 /* 1377 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1378 */ 1379 int 1380 lfs_fcntl(void *v) 1381 { 1382 struct vop_fcntl_args /* { 1383 struct vnode *a_vp; 1384 u_int a_command; 1385 void * a_data; 1386 int a_fflag; 1387 kauth_cred_t a_cred; 1388 } */ *ap = v; 1389 struct timeval tv; 1390 struct timeval *tvp; 1391 BLOCK_INFO *blkiov; 1392 CLEANERINFO *cip; 1393 SEGUSE *sup; 1394 int blkcnt, error; 1395 size_t fh_size; 1396 struct lfs_fcntl_markv blkvp; 1397 struct lwp *l; 1398 fsid_t *fsidp; 1399 struct lfs *fs; 1400 struct buf *bp; 1401 fhandle_t *fhp; 1402 daddr_t off; 1403 int oclean; 1404 1405 /* Only respect LFS fcntls on fs root or Ifile */ 1406 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO && 1407 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1408 return ulfs_fcntl(v); 1409 } 1410 1411 /* Avoid locking a draining lock */ 1412 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1413 return ESHUTDOWN; 1414 } 1415 1416 /* LFS control and monitoring fcntls are available only to root */ 1417 l = curlwp; 1418 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1419 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, 1420 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0) 1421 return (error); 1422 1423 fs = VTOI(ap->a_vp)->i_lfs; 1424 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1425 1426 error = 0; 1427 switch ((int)ap->a_command) { 1428 case LFCNSEGWAITALL_COMPAT_50: 1429 case LFCNSEGWAITALL_COMPAT: 1430 fsidp = NULL; 1431 /* FALLSTHROUGH */ 1432 case LFCNSEGWAIT_COMPAT_50: 1433 case LFCNSEGWAIT_COMPAT: 1434 { 1435 struct timeval50 *tvp50 1436 = (struct timeval50 *)ap->a_data; 1437 timeval50_to_timeval(tvp50, &tv); 1438 tvp = &tv; 1439 } 1440 goto segwait_common; 1441 case LFCNSEGWAITALL: 1442 fsidp = NULL; 1443 /* FALLSTHROUGH */ 1444 case LFCNSEGWAIT: 1445 tvp = (struct timeval *)ap->a_data; 1446 segwait_common: 1447 mutex_enter(&lfs_lock); 1448 ++fs->lfs_sleepers; 1449 mutex_exit(&lfs_lock); 1450 1451 error = lfs_segwait(fsidp, tvp); 1452 1453 mutex_enter(&lfs_lock); 1454 if (--fs->lfs_sleepers == 0) 1455 wakeup(&fs->lfs_sleepers); 1456 mutex_exit(&lfs_lock); 1457 return error; 1458 1459 case LFCNBMAPV: 1460 case LFCNMARKV: 1461 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1462 1463 blkcnt = blkvp.blkcnt; 1464 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1465 return (EINVAL); 1466 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1467 if ((error = copyin(blkvp.blkiov, blkiov, 1468 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1469 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1470 return error; 1471 } 1472 1473 mutex_enter(&lfs_lock); 1474 ++fs->lfs_sleepers; 1475 mutex_exit(&lfs_lock); 1476 if (ap->a_command == LFCNBMAPV) 1477 error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); 1478 else /* LFCNMARKV */ 1479 error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); 1480 if (error == 0) 1481 error = copyout(blkiov, blkvp.blkiov, 1482 blkcnt * sizeof(BLOCK_INFO)); 1483 mutex_enter(&lfs_lock); 1484 if (--fs->lfs_sleepers == 0) 1485 wakeup(&fs->lfs_sleepers); 1486 mutex_exit(&lfs_lock); 1487 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1488 return error; 1489 1490 case LFCNRECLAIM: 1491 /* 1492 * Flush dirops and write Ifile, allowing empty segments 1493 * to be immediately reclaimed. 1494 */ 1495 lfs_writer_enter(fs, "pndirop"); 1496 off = fs->lfs_offset; 1497 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1498 lfs_flush_dirops(fs); 1499 LFS_CLEANERINFO(cip, fs, bp); 1500 oclean = cip->clean; 1501 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1502 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1503 fs->lfs_sp->seg_flags |= SEGM_PROT; 1504 lfs_segunlock(fs); 1505 lfs_writer_leave(fs); 1506 1507 #ifdef DEBUG 1508 LFS_CLEANERINFO(cip, fs, bp); 1509 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1510 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1511 fs->lfs_offset - off, cip->clean - oclean, 1512 fs->lfs_activesb)); 1513 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1514 #else 1515 __USE(oclean); 1516 __USE(off); 1517 #endif 1518 1519 return 0; 1520 1521 case LFCNIFILEFH_COMPAT: 1522 /* Return the filehandle of the Ifile */ 1523 if ((error = kauth_authorize_system(l->l_cred, 1524 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0) 1525 return (error); 1526 fhp = (struct fhandle *)ap->a_data; 1527 fhp->fh_fsid = *fsidp; 1528 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1529 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1530 1531 case LFCNIFILEFH_COMPAT2: 1532 case LFCNIFILEFH: 1533 /* Return the filehandle of the Ifile */ 1534 fhp = (struct fhandle *)ap->a_data; 1535 fhp->fh_fsid = *fsidp; 1536 fh_size = sizeof(struct lfs_fhandle) - 1537 offsetof(fhandle_t, fh_fid); 1538 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1539 1540 case LFCNREWIND: 1541 /* Move lfs_offset to the lowest-numbered segment */ 1542 return lfs_rewind(fs, *(int *)ap->a_data); 1543 1544 case LFCNINVAL: 1545 /* Mark a segment SEGUSE_INVAL */ 1546 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1547 if (sup->su_nbytes > 0) { 1548 brelse(bp, 0); 1549 lfs_unset_inval_all(fs); 1550 return EBUSY; 1551 } 1552 sup->su_flags |= SEGUSE_INVAL; 1553 VOP_BWRITE(bp->b_vp, bp); 1554 return 0; 1555 1556 case LFCNRESIZE: 1557 /* Resize the filesystem */ 1558 return lfs_resize_fs(fs, *(int *)ap->a_data); 1559 1560 case LFCNWRAPSTOP: 1561 case LFCNWRAPSTOP_COMPAT: 1562 /* 1563 * Hold lfs_newseg at segment 0; if requested, sleep until 1564 * the filesystem wraps around. To support external agents 1565 * (dump, fsck-based regression test) that need to look at 1566 * a snapshot of the filesystem, without necessarily 1567 * requiring that all fs activity stops. 1568 */ 1569 if (fs->lfs_stoplwp == curlwp) 1570 return EALREADY; 1571 1572 mutex_enter(&lfs_lock); 1573 while (fs->lfs_stoplwp != NULL) 1574 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1575 fs->lfs_stoplwp = curlwp; 1576 if (fs->lfs_nowrap == 0) 1577 log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); 1578 ++fs->lfs_nowrap; 1579 if (*(int *)ap->a_data == 1 1580 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 1581 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 1582 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1583 "segwrap", 0, &lfs_lock); 1584 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 1585 if (error) { 1586 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 1587 } 1588 } 1589 mutex_exit(&lfs_lock); 1590 return 0; 1591 1592 case LFCNWRAPGO: 1593 case LFCNWRAPGO_COMPAT: 1594 /* 1595 * Having done its work, the agent wakes up the writer. 1596 * If the argument is 1, it sleeps until a new segment 1597 * is selected. 1598 */ 1599 mutex_enter(&lfs_lock); 1600 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 1601 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 1602 *((int *)ap->a_data)); 1603 mutex_exit(&lfs_lock); 1604 return error; 1605 1606 case LFCNWRAPPASS: 1607 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 1608 return EALREADY; 1609 mutex_enter(&lfs_lock); 1610 if (fs->lfs_stoplwp != curlwp) { 1611 mutex_exit(&lfs_lock); 1612 return EALREADY; 1613 } 1614 if (fs->lfs_nowrap == 0) { 1615 mutex_exit(&lfs_lock); 1616 return EBUSY; 1617 } 1618 fs->lfs_wrappass = 1; 1619 wakeup(&fs->lfs_wrappass); 1620 /* Wait for the log to wrap, if asked */ 1621 if (*(int *)ap->a_data) { 1622 mutex_enter(ap->a_vp->v_interlock); 1623 if (lfs_vref(ap->a_vp) != 0) 1624 panic("LFCNWRAPPASS: lfs_vref failed"); 1625 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 1626 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 1627 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 1628 "segwrap", 0, &lfs_lock); 1629 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 1630 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 1631 lfs_vunref(ap->a_vp); 1632 } 1633 mutex_exit(&lfs_lock); 1634 return error; 1635 1636 case LFCNWRAPSTATUS: 1637 mutex_enter(&lfs_lock); 1638 *(int *)ap->a_data = fs->lfs_wrapstatus; 1639 mutex_exit(&lfs_lock); 1640 return 0; 1641 1642 default: 1643 return ulfs_fcntl(v); 1644 } 1645 return 0; 1646 } 1647 1648 int 1649 lfs_getpages(void *v) 1650 { 1651 struct vop_getpages_args /* { 1652 struct vnode *a_vp; 1653 voff_t a_offset; 1654 struct vm_page **a_m; 1655 int *a_count; 1656 int a_centeridx; 1657 vm_prot_t a_access_type; 1658 int a_advice; 1659 int a_flags; 1660 } */ *ap = v; 1661 1662 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 1663 (ap->a_access_type & VM_PROT_WRITE) != 0) { 1664 return EPERM; 1665 } 1666 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 1667 mutex_enter(&lfs_lock); 1668 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 1669 mutex_exit(&lfs_lock); 1670 } 1671 1672 /* 1673 * we're relying on the fact that genfs_getpages() always read in 1674 * entire filesystem blocks. 1675 */ 1676 return genfs_getpages(v); 1677 } 1678 1679 /* 1680 * Wait for a page to become unbusy, possibly printing diagnostic messages 1681 * as well. 1682 * 1683 * Called with vp->v_interlock held; return with it held. 1684 */ 1685 static void 1686 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 1687 { 1688 KASSERT(mutex_owned(vp->v_interlock)); 1689 if ((pg->flags & PG_BUSY) == 0) 1690 return; /* Nothing to wait for! */ 1691 1692 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 1693 static struct vm_page *lastpg; 1694 1695 if (label != NULL && pg != lastpg) { 1696 if (pg->owner_tag) { 1697 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 1698 curproc->p_pid, curlwp->l_lid, label, 1699 pg, pg->owner, pg->lowner, pg->owner_tag); 1700 } else { 1701 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 1702 curproc->p_pid, curlwp->l_lid, label, pg); 1703 } 1704 } 1705 lastpg = pg; 1706 #endif 1707 1708 pg->flags |= PG_WANTED; 1709 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); 1710 mutex_enter(vp->v_interlock); 1711 } 1712 1713 /* 1714 * This routine is called by lfs_putpages() when it can't complete the 1715 * write because a page is busy. This means that either (1) someone, 1716 * possibly the pagedaemon, is looking at this page, and will give it up 1717 * presently; or (2) we ourselves are holding the page busy in the 1718 * process of being written (either gathered or actually on its way to 1719 * disk). We don't need to give up the segment lock, but we might need 1720 * to call lfs_writeseg() to expedite the page's journey to disk. 1721 * 1722 * Called with vp->v_interlock held; return with it held. 1723 */ 1724 /* #define BUSYWAIT */ 1725 static void 1726 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 1727 int seglocked, const char *label) 1728 { 1729 KASSERT(mutex_owned(vp->v_interlock)); 1730 #ifndef BUSYWAIT 1731 struct inode *ip = VTOI(vp); 1732 struct segment *sp = fs->lfs_sp; 1733 int count = 0; 1734 1735 if (pg == NULL) 1736 return; 1737 1738 while (pg->flags & PG_BUSY && 1739 pg->uobject == &vp->v_uobj) { 1740 mutex_exit(vp->v_interlock); 1741 if (sp->cbpp - sp->bpp > 1) { 1742 /* Write gathered pages */ 1743 lfs_updatemeta(sp); 1744 lfs_release_finfo(fs); 1745 (void) lfs_writeseg(fs, sp); 1746 1747 /* 1748 * Reinitialize FIP 1749 */ 1750 KASSERT(sp->vp == vp); 1751 lfs_acquire_finfo(fs, ip->i_number, 1752 ip->i_gen); 1753 } 1754 ++count; 1755 mutex_enter(vp->v_interlock); 1756 wait_for_page(vp, pg, label); 1757 } 1758 if (label != NULL && count > 1) { 1759 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", 1760 curproc->p_pid, label, (count > 0 ? "looping, " : ""), 1761 count)); 1762 } 1763 #else 1764 preempt(1); 1765 #endif 1766 KASSERT(mutex_owned(vp->v_interlock)); 1767 } 1768 1769 /* 1770 * Make sure that for all pages in every block in the given range, 1771 * either all are dirty or all are clean. If any of the pages 1772 * we've seen so far are dirty, put the vnode on the paging chain, 1773 * and mark it IN_PAGING. 1774 * 1775 * If checkfirst != 0, don't check all the pages but return at the 1776 * first dirty page. 1777 */ 1778 static int 1779 check_dirty(struct lfs *fs, struct vnode *vp, 1780 off_t startoffset, off_t endoffset, off_t blkeof, 1781 int flags, int checkfirst, struct vm_page **pgp) 1782 { 1783 int by_list; 1784 struct vm_page *curpg = NULL; /* XXX: gcc */ 1785 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 1786 off_t soff = 0; /* XXX: gcc */ 1787 voff_t off; 1788 int i; 1789 int nonexistent; 1790 int any_dirty; /* number of dirty pages */ 1791 int dirty; /* number of dirty pages in a block */ 1792 int tdirty; 1793 int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; 1794 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 1795 1796 KASSERT(mutex_owned(vp->v_interlock)); 1797 ASSERT_MAYBE_SEGLOCK(fs); 1798 top: 1799 by_list = (vp->v_uobj.uo_npages <= 1800 ((endoffset - startoffset) >> PAGE_SHIFT) * 1801 UVM_PAGE_TREE_PENALTY); 1802 any_dirty = 0; 1803 1804 if (by_list) { 1805 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 1806 } else { 1807 soff = startoffset; 1808 } 1809 while (by_list || soff < MIN(blkeof, endoffset)) { 1810 if (by_list) { 1811 /* 1812 * Find the first page in a block. Skip 1813 * blocks outside our area of interest or beyond 1814 * the end of file. 1815 */ 1816 KASSERT(curpg == NULL 1817 || (curpg->flags & PG_MARKER) == 0); 1818 if (pages_per_block > 1) { 1819 while (curpg && 1820 ((curpg->offset & fs->lfs_bmask) || 1821 curpg->offset >= vp->v_size || 1822 curpg->offset >= endoffset)) { 1823 curpg = TAILQ_NEXT(curpg, listq.queue); 1824 KASSERT(curpg == NULL || 1825 (curpg->flags & PG_MARKER) == 0); 1826 } 1827 } 1828 if (curpg == NULL) 1829 break; 1830 soff = curpg->offset; 1831 } 1832 1833 /* 1834 * Mark all pages in extended range busy; find out if any 1835 * of them are dirty. 1836 */ 1837 nonexistent = dirty = 0; 1838 for (i = 0; i == 0 || i < pages_per_block; i++) { 1839 KASSERT(mutex_owned(vp->v_interlock)); 1840 if (by_list && pages_per_block <= 1) { 1841 pgs[i] = pg = curpg; 1842 } else { 1843 off = soff + (i << PAGE_SHIFT); 1844 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 1845 if (pg == NULL) { 1846 ++nonexistent; 1847 continue; 1848 } 1849 } 1850 KASSERT(pg != NULL); 1851 1852 /* 1853 * If we're holding the segment lock, we can deadlock 1854 * against a process that has our page and is waiting 1855 * for the cleaner, while the cleaner waits for the 1856 * segment lock. Just bail in that case. 1857 */ 1858 if ((pg->flags & PG_BUSY) && 1859 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 1860 if (i > 0) 1861 uvm_page_unbusy(pgs, i); 1862 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 1863 if (pgp) 1864 *pgp = pg; 1865 KASSERT(mutex_owned(vp->v_interlock)); 1866 return -1; 1867 } 1868 1869 while (pg->flags & PG_BUSY) { 1870 wait_for_page(vp, pg, NULL); 1871 KASSERT(mutex_owned(vp->v_interlock)); 1872 if (i > 0) 1873 uvm_page_unbusy(pgs, i); 1874 KASSERT(mutex_owned(vp->v_interlock)); 1875 goto top; 1876 } 1877 pg->flags |= PG_BUSY; 1878 UVM_PAGE_OWN(pg, "lfs_putpages"); 1879 1880 pmap_page_protect(pg, VM_PROT_NONE); 1881 tdirty = (pmap_clear_modify(pg) || 1882 (pg->flags & PG_CLEAN) == 0); 1883 dirty += tdirty; 1884 } 1885 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 1886 if (by_list) { 1887 curpg = TAILQ_NEXT(curpg, listq.queue); 1888 } else { 1889 soff += fs->lfs_bsize; 1890 } 1891 continue; 1892 } 1893 1894 any_dirty += dirty; 1895 KASSERT(nonexistent == 0); 1896 KASSERT(mutex_owned(vp->v_interlock)); 1897 1898 /* 1899 * If any are dirty make all dirty; unbusy them, 1900 * but if we were asked to clean, wire them so that 1901 * the pagedaemon doesn't bother us about them while 1902 * they're on their way to disk. 1903 */ 1904 for (i = 0; i == 0 || i < pages_per_block; i++) { 1905 KASSERT(mutex_owned(vp->v_interlock)); 1906 pg = pgs[i]; 1907 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 1908 KASSERT(pg->flags & PG_BUSY); 1909 if (dirty) { 1910 pg->flags &= ~PG_CLEAN; 1911 if (flags & PGO_FREE) { 1912 /* 1913 * Wire the page so that 1914 * pdaemon doesn't see it again. 1915 */ 1916 mutex_enter(&uvm_pageqlock); 1917 uvm_pagewire(pg); 1918 mutex_exit(&uvm_pageqlock); 1919 1920 /* Suspended write flag */ 1921 pg->flags |= PG_DELWRI; 1922 } 1923 } 1924 if (pg->flags & PG_WANTED) 1925 wakeup(pg); 1926 pg->flags &= ~(PG_WANTED|PG_BUSY); 1927 UVM_PAGE_OWN(pg, NULL); 1928 } 1929 1930 if (checkfirst && any_dirty) 1931 break; 1932 1933 if (by_list) { 1934 curpg = TAILQ_NEXT(curpg, listq.queue); 1935 } else { 1936 soff += MAX(PAGE_SIZE, fs->lfs_bsize); 1937 } 1938 } 1939 1940 KASSERT(mutex_owned(vp->v_interlock)); 1941 return any_dirty; 1942 } 1943 1944 /* 1945 * lfs_putpages functions like genfs_putpages except that 1946 * 1947 * (1) It needs to bounds-check the incoming requests to ensure that 1948 * they are block-aligned; if they are not, expand the range and 1949 * do the right thing in case, e.g., the requested range is clean 1950 * but the expanded range is dirty. 1951 * 1952 * (2) It needs to explicitly send blocks to be written when it is done. 1953 * If VOP_PUTPAGES is called without the seglock held, we simply take 1954 * the seglock and let lfs_segunlock wait for us. 1955 * XXX There might be a bad situation if we have to flush a vnode while 1956 * XXX lfs_markv is in operation. As of this writing we panic in this 1957 * XXX case. 1958 * 1959 * Assumptions: 1960 * 1961 * (1) The caller does not hold any pages in this vnode busy. If it does, 1962 * there is a danger that when we expand the page range and busy the 1963 * pages we will deadlock. 1964 * 1965 * (2) We are called with vp->v_interlock held; we must return with it 1966 * released. 1967 * 1968 * (3) We don't absolutely have to free pages right away, provided that 1969 * the request does not have PGO_SYNCIO. When the pagedaemon gives 1970 * us a request with PGO_FREE, we take the pages out of the paging 1971 * queue and wake up the writer, which will handle freeing them for us. 1972 * 1973 * We ensure that for any filesystem block, all pages for that 1974 * block are either resident or not, even if those pages are higher 1975 * than EOF; that means that we will be getting requests to free 1976 * "unused" pages above EOF all the time, and should ignore them. 1977 * 1978 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 1979 * into has been set up for us by lfs_writefile. If not, we will 1980 * have to handle allocating and/or freeing an finfo entry. 1981 * 1982 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 1983 */ 1984 1985 /* How many times to loop before we should start to worry */ 1986 #define TOOMANY 4 1987 1988 int 1989 lfs_putpages(void *v) 1990 { 1991 int error; 1992 struct vop_putpages_args /* { 1993 struct vnode *a_vp; 1994 voff_t a_offlo; 1995 voff_t a_offhi; 1996 int a_flags; 1997 } */ *ap = v; 1998 struct vnode *vp; 1999 struct inode *ip; 2000 struct lfs *fs; 2001 struct segment *sp; 2002 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 2003 off_t off, max_endoffset; 2004 bool seglocked, sync, pagedaemon, reclaim; 2005 struct vm_page *pg, *busypg; 2006 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 2007 int oreclaim = 0; 2008 int donewriting = 0; 2009 #ifdef DEBUG 2010 int debug_n_again, debug_n_dirtyclean; 2011 #endif 2012 2013 vp = ap->a_vp; 2014 ip = VTOI(vp); 2015 fs = ip->i_lfs; 2016 sync = (ap->a_flags & PGO_SYNCIO) != 0; 2017 reclaim = (ap->a_flags & PGO_RECLAIM) != 0; 2018 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 2019 2020 KASSERT(mutex_owned(vp->v_interlock)); 2021 2022 /* Putpages does nothing for metadata. */ 2023 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 2024 mutex_exit(vp->v_interlock); 2025 return 0; 2026 } 2027 2028 /* 2029 * If there are no pages, don't do anything. 2030 */ 2031 if (vp->v_uobj.uo_npages == 0) { 2032 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 2033 (vp->v_iflag & VI_ONWORKLST) && 2034 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 2035 vp->v_iflag &= ~VI_WRMAPDIRTY; 2036 vn_syncer_remove_from_worklist(vp); 2037 } 2038 mutex_exit(vp->v_interlock); 2039 2040 /* Remove us from paging queue, if we were on it */ 2041 mutex_enter(&lfs_lock); 2042 if (ip->i_flags & IN_PAGING) { 2043 ip->i_flags &= ~IN_PAGING; 2044 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2045 } 2046 mutex_exit(&lfs_lock); 2047 2048 KASSERT(!mutex_owned(vp->v_interlock)); 2049 return 0; 2050 } 2051 2052 blkeof = lfs_blkroundup(fs, ip->i_size); 2053 2054 /* 2055 * Ignore requests to free pages past EOF but in the same block 2056 * as EOF, unless the vnode is being reclaimed or the request 2057 * is synchronous. (If the request is sync, it comes from 2058 * lfs_truncate.) 2059 * 2060 * To avoid being flooded with this request, make these pages 2061 * look "active". 2062 */ 2063 if (!sync && !reclaim && 2064 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 2065 origoffset = ap->a_offlo; 2066 for (off = origoffset; off < blkeof; off += fs->lfs_bsize) { 2067 pg = uvm_pagelookup(&vp->v_uobj, off); 2068 KASSERT(pg != NULL); 2069 while (pg->flags & PG_BUSY) { 2070 pg->flags |= PG_WANTED; 2071 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, 2072 "lfsput2", 0); 2073 mutex_enter(vp->v_interlock); 2074 } 2075 mutex_enter(&uvm_pageqlock); 2076 uvm_pageactivate(pg); 2077 mutex_exit(&uvm_pageqlock); 2078 } 2079 ap->a_offlo = blkeof; 2080 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 2081 mutex_exit(vp->v_interlock); 2082 return 0; 2083 } 2084 } 2085 2086 /* 2087 * Extend page range to start and end at block boundaries. 2088 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 2089 */ 2090 origoffset = ap->a_offlo; 2091 origendoffset = ap->a_offhi; 2092 startoffset = origoffset & ~(fs->lfs_bmask); 2093 max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) 2094 << fs->lfs_bshift; 2095 2096 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2097 endoffset = max_endoffset; 2098 origendoffset = endoffset; 2099 } else { 2100 origendoffset = round_page(ap->a_offhi); 2101 endoffset = round_page(lfs_blkroundup(fs, origendoffset)); 2102 } 2103 2104 KASSERT(startoffset > 0 || endoffset >= startoffset); 2105 if (startoffset == endoffset) { 2106 /* Nothing to do, why were we called? */ 2107 mutex_exit(vp->v_interlock); 2108 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 2109 PRId64 "\n", startoffset)); 2110 return 0; 2111 } 2112 2113 ap->a_offlo = startoffset; 2114 ap->a_offhi = endoffset; 2115 2116 /* 2117 * If not cleaning, just send the pages through genfs_putpages 2118 * to be returned to the pool. 2119 */ 2120 if (!(ap->a_flags & PGO_CLEANIT)) { 2121 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", 2122 vp, (int)ip->i_number, ap->a_flags)); 2123 int r = genfs_putpages(v); 2124 KASSERT(!mutex_owned(vp->v_interlock)); 2125 return r; 2126 } 2127 2128 /* Set PGO_BUSYFAIL to avoid deadlocks */ 2129 ap->a_flags |= PGO_BUSYFAIL; 2130 2131 /* 2132 * Likewise, if we are asked to clean but the pages are not 2133 * dirty, we can just free them using genfs_putpages. 2134 */ 2135 #ifdef DEBUG 2136 debug_n_dirtyclean = 0; 2137 #endif 2138 do { 2139 int r; 2140 KASSERT(mutex_owned(vp->v_interlock)); 2141 2142 /* Count the number of dirty pages */ 2143 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 2144 ap->a_flags, 1, NULL); 2145 if (r < 0) { 2146 /* Pages are busy with another process */ 2147 mutex_exit(vp->v_interlock); 2148 return EDEADLK; 2149 } 2150 if (r > 0) /* Some pages are dirty */ 2151 break; 2152 2153 /* 2154 * Sometimes pages are dirtied between the time that 2155 * we check and the time we try to clean them. 2156 * Instruct lfs_gop_write to return EDEADLK in this case 2157 * so we can write them properly. 2158 */ 2159 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 2160 r = genfs_do_putpages(vp, startoffset, endoffset, 2161 ap->a_flags & ~PGO_SYNCIO, &busypg); 2162 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 2163 if (r != EDEADLK) { 2164 KASSERT(!mutex_owned(vp->v_interlock)); 2165 return r; 2166 } 2167 2168 /* One of the pages was busy. Start over. */ 2169 mutex_enter(vp->v_interlock); 2170 wait_for_page(vp, busypg, "dirtyclean"); 2171 #ifdef DEBUG 2172 ++debug_n_dirtyclean; 2173 #endif 2174 } while(1); 2175 2176 #ifdef DEBUG 2177 if (debug_n_dirtyclean > TOOMANY) 2178 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", 2179 debug_n_dirtyclean)); 2180 #endif 2181 2182 /* 2183 * Dirty and asked to clean. 2184 * 2185 * Pagedaemon can't actually write LFS pages; wake up 2186 * the writer to take care of that. The writer will 2187 * notice the pager inode queue and act on that. 2188 * 2189 * XXX We must drop the vp->interlock before taking the lfs_lock or we 2190 * get a nasty deadlock with lfs_flush_pchain(). 2191 */ 2192 if (pagedaemon) { 2193 mutex_exit(vp->v_interlock); 2194 mutex_enter(&lfs_lock); 2195 if (!(ip->i_flags & IN_PAGING)) { 2196 ip->i_flags |= IN_PAGING; 2197 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2198 } 2199 wakeup(&lfs_writer_daemon); 2200 mutex_exit(&lfs_lock); 2201 preempt(); 2202 KASSERT(!mutex_owned(vp->v_interlock)); 2203 return EWOULDBLOCK; 2204 } 2205 2206 /* 2207 * If this is a file created in a recent dirop, we can't flush its 2208 * inode until the dirop is complete. Drain dirops, then flush the 2209 * filesystem (taking care of any other pending dirops while we're 2210 * at it). 2211 */ 2212 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 2213 (vp->v_uflag & VU_DIROP)) { 2214 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 2215 2216 lfs_writer_enter(fs, "ppdirop"); 2217 2218 /* Note if we hold the vnode locked */ 2219 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) 2220 { 2221 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); 2222 } else { 2223 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); 2224 } 2225 mutex_exit(vp->v_interlock); 2226 2227 mutex_enter(&lfs_lock); 2228 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 2229 mutex_exit(&lfs_lock); 2230 2231 mutex_enter(vp->v_interlock); 2232 lfs_writer_leave(fs); 2233 2234 /* The flush will have cleaned out this vnode as well, 2235 no need to do more to it. */ 2236 } 2237 2238 /* 2239 * This is it. We are going to write some pages. From here on 2240 * down it's all just mechanics. 2241 * 2242 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 2243 */ 2244 ap->a_flags &= ~PGO_SYNCIO; 2245 2246 /* 2247 * If we've already got the seglock, flush the node and return. 2248 * The FIP has already been set up for us by lfs_writefile, 2249 * and FIP cleanup and lfs_updatemeta will also be done there, 2250 * unless genfs_putpages returns EDEADLK; then we must flush 2251 * what we have, and correct FIP and segment header accounting. 2252 */ 2253 get_seglock: 2254 /* 2255 * If we are not called with the segment locked, lock it. 2256 * Account for a new FIP in the segment header, and set sp->vp. 2257 * (This should duplicate the setup at the top of lfs_writefile().) 2258 */ 2259 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 2260 if (!seglocked) { 2261 mutex_exit(vp->v_interlock); 2262 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 2263 if (error != 0) { 2264 KASSERT(!mutex_owned(vp->v_interlock)); 2265 return error; 2266 } 2267 mutex_enter(vp->v_interlock); 2268 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 2269 } 2270 sp = fs->lfs_sp; 2271 KASSERT(sp->vp == NULL); 2272 sp->vp = vp; 2273 2274 /* Note segments written by reclaim; only for debugging */ 2275 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 2276 sp->seg_flags |= SEGM_RECLAIM; 2277 fs->lfs_reclino = ip->i_number; 2278 } 2279 2280 /* 2281 * Ensure that the partial segment is marked SS_DIROP if this 2282 * vnode is a DIROP. 2283 */ 2284 if (!seglocked && vp->v_uflag & VU_DIROP) 2285 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 2286 2287 /* 2288 * Loop over genfs_putpages until all pages are gathered. 2289 * genfs_putpages() drops the interlock, so reacquire it if necessary. 2290 * Whenever we lose the interlock we have to rerun check_dirty, as 2291 * well, since more pages might have been dirtied in our absence. 2292 */ 2293 #ifdef DEBUG 2294 debug_n_again = 0; 2295 #endif 2296 do { 2297 busypg = NULL; 2298 KASSERT(mutex_owned(vp->v_interlock)); 2299 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 2300 ap->a_flags, 0, &busypg) < 0) { 2301 mutex_exit(vp->v_interlock); 2302 /* XXX why? --ks */ 2303 mutex_enter(vp->v_interlock); 2304 write_and_wait(fs, vp, busypg, seglocked, NULL); 2305 if (!seglocked) { 2306 mutex_exit(vp->v_interlock); 2307 lfs_release_finfo(fs); 2308 lfs_segunlock(fs); 2309 mutex_enter(vp->v_interlock); 2310 } 2311 sp->vp = NULL; 2312 goto get_seglock; 2313 } 2314 2315 busypg = NULL; 2316 KASSERT(!mutex_owned(&uvm_pageqlock)); 2317 oreclaim = (ap->a_flags & PGO_RECLAIM); 2318 ap->a_flags &= ~PGO_RECLAIM; 2319 error = genfs_do_putpages(vp, startoffset, endoffset, 2320 ap->a_flags, &busypg); 2321 ap->a_flags |= oreclaim; 2322 2323 if (error == EDEADLK || error == EAGAIN) { 2324 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 2325 " %d ino %d off %x (seg %d)\n", error, 2326 ip->i_number, fs->lfs_offset, 2327 lfs_dtosn(fs, fs->lfs_offset))); 2328 2329 if (oreclaim) { 2330 mutex_enter(vp->v_interlock); 2331 write_and_wait(fs, vp, busypg, seglocked, "again"); 2332 mutex_exit(vp->v_interlock); 2333 } else { 2334 if ((sp->seg_flags & SEGM_SINGLE) && 2335 fs->lfs_curseg != fs->lfs_startseg) 2336 donewriting = 1; 2337 } 2338 } else if (error) { 2339 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 2340 " %d ino %d off %x (seg %d)\n", error, 2341 (int)ip->i_number, fs->lfs_offset, 2342 lfs_dtosn(fs, fs->lfs_offset))); 2343 } 2344 /* genfs_do_putpages loses the interlock */ 2345 #ifdef DEBUG 2346 ++debug_n_again; 2347 #endif 2348 if (oreclaim && error == EAGAIN) { 2349 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", 2350 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); 2351 mutex_enter(vp->v_interlock); 2352 } 2353 if (error == EDEADLK) 2354 mutex_enter(vp->v_interlock); 2355 } while (error == EDEADLK || (oreclaim && error == EAGAIN)); 2356 #ifdef DEBUG 2357 if (debug_n_again > TOOMANY) 2358 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); 2359 #endif 2360 2361 KASSERT(sp != NULL && sp->vp == vp); 2362 if (!seglocked && !donewriting) { 2363 sp->vp = NULL; 2364 2365 /* Write indirect blocks as well */ 2366 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 2367 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 2368 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 2369 2370 KASSERT(sp->vp == NULL); 2371 sp->vp = vp; 2372 } 2373 2374 /* 2375 * Blocks are now gathered into a segment waiting to be written. 2376 * All that's left to do is update metadata, and write them. 2377 */ 2378 lfs_updatemeta(sp); 2379 KASSERT(sp->vp == vp); 2380 sp->vp = NULL; 2381 2382 /* 2383 * If we were called from lfs_writefile, we don't need to clean up 2384 * the FIP or unlock the segment lock. We're done. 2385 */ 2386 if (seglocked) { 2387 KASSERT(!mutex_owned(vp->v_interlock)); 2388 return error; 2389 } 2390 2391 /* Clean up FIP and send it to disk. */ 2392 lfs_release_finfo(fs); 2393 lfs_writeseg(fs, fs->lfs_sp); 2394 2395 /* 2396 * Remove us from paging queue if we wrote all our pages. 2397 */ 2398 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 2399 mutex_enter(&lfs_lock); 2400 if (ip->i_flags & IN_PAGING) { 2401 ip->i_flags &= ~IN_PAGING; 2402 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 2403 } 2404 mutex_exit(&lfs_lock); 2405 } 2406 2407 /* 2408 * XXX - with the malloc/copy writeseg, the pages are freed by now 2409 * even if we don't wait (e.g. if we hold a nested lock). This 2410 * will not be true if we stop using malloc/copy. 2411 */ 2412 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 2413 lfs_segunlock(fs); 2414 2415 /* 2416 * Wait for v_numoutput to drop to zero. The seglock should 2417 * take care of this, but there is a slight possibility that 2418 * aiodoned might not have got around to our buffers yet. 2419 */ 2420 if (sync) { 2421 mutex_enter(vp->v_interlock); 2422 while (vp->v_numoutput > 0) { 2423 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 2424 " num %d\n", ip->i_number, vp->v_numoutput)); 2425 cv_wait(&vp->v_cv, vp->v_interlock); 2426 } 2427 mutex_exit(vp->v_interlock); 2428 } 2429 KASSERT(!mutex_owned(vp->v_interlock)); 2430 return error; 2431 } 2432 2433 /* 2434 * Return the last logical file offset that should be written for this file 2435 * if we're doing a write that ends at "size". If writing, we need to know 2436 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2437 * to know about entire blocks. 2438 */ 2439 void 2440 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2441 { 2442 struct inode *ip = VTOI(vp); 2443 struct lfs *fs = ip->i_lfs; 2444 daddr_t olbn, nlbn; 2445 2446 olbn = lfs_lblkno(fs, ip->i_size); 2447 nlbn = lfs_lblkno(fs, size); 2448 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) { 2449 *eobp = lfs_fragroundup(fs, size); 2450 } else { 2451 *eobp = lfs_blkroundup(fs, size); 2452 } 2453 } 2454 2455 #ifdef DEBUG 2456 void lfs_dump_vop(void *); 2457 2458 void 2459 lfs_dump_vop(void *v) 2460 { 2461 struct vop_putpages_args /* { 2462 struct vnode *a_vp; 2463 voff_t a_offlo; 2464 voff_t a_offhi; 2465 int a_flags; 2466 } */ *ap = v; 2467 2468 #ifdef DDB 2469 vfs_vnode_print(ap->a_vp, 0, printf); 2470 #endif 2471 lfs_dump_dinode(VTOI(ap->a_vp)->i_din.ffs1_din); 2472 } 2473 #endif 2474 2475 int 2476 lfs_mmap(void *v) 2477 { 2478 struct vop_mmap_args /* { 2479 const struct vnodeop_desc *a_desc; 2480 struct vnode *a_vp; 2481 vm_prot_t a_prot; 2482 kauth_cred_t a_cred; 2483 } */ *ap = v; 2484 2485 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2486 return EOPNOTSUPP; 2487 return ulfs_mmap(v); 2488 } 2489 2490 static int 2491 lfs_openextattr(void *v) 2492 { 2493 struct vop_openextattr_args /* { 2494 struct vnode *a_vp; 2495 kauth_cred_t a_cred; 2496 struct proc *a_p; 2497 } */ *ap = v; 2498 struct inode *ip = VTOI(ap->a_vp); 2499 struct ulfsmount *ump = ip->i_ump; 2500 //struct lfs *fs = ip->i_lfs; 2501 2502 /* Not supported for ULFS1 file systems. */ 2503 if (ump->um_fstype == ULFS1) 2504 return (EOPNOTSUPP); 2505 2506 /* XXX Not implemented for ULFS2 file systems. */ 2507 return (EOPNOTSUPP); 2508 } 2509 2510 static int 2511 lfs_closeextattr(void *v) 2512 { 2513 struct vop_closeextattr_args /* { 2514 struct vnode *a_vp; 2515 int a_commit; 2516 kauth_cred_t a_cred; 2517 struct proc *a_p; 2518 } */ *ap = v; 2519 struct inode *ip = VTOI(ap->a_vp); 2520 struct ulfsmount *ump = ip->i_ump; 2521 //struct lfs *fs = ip->i_lfs; 2522 2523 /* Not supported for ULFS1 file systems. */ 2524 if (ump->um_fstype == ULFS1) 2525 return (EOPNOTSUPP); 2526 2527 /* XXX Not implemented for ULFS2 file systems. */ 2528 return (EOPNOTSUPP); 2529 } 2530 2531 static int 2532 lfs_getextattr(void *v) 2533 { 2534 struct vop_getextattr_args /* { 2535 struct vnode *a_vp; 2536 int a_attrnamespace; 2537 const char *a_name; 2538 struct uio *a_uio; 2539 size_t *a_size; 2540 kauth_cred_t a_cred; 2541 struct proc *a_p; 2542 } */ *ap = v; 2543 struct vnode *vp = ap->a_vp; 2544 struct inode *ip = VTOI(vp); 2545 struct ulfsmount *ump = ip->i_ump; 2546 //struct lfs *fs = ip->i_lfs; 2547 int error; 2548 2549 if (ump->um_fstype == ULFS1) { 2550 #ifdef LFS_EXTATTR 2551 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2552 error = ulfs_getextattr(ap); 2553 fstrans_done(vp->v_mount); 2554 #else 2555 error = EOPNOTSUPP; 2556 #endif 2557 return error; 2558 } 2559 2560 /* XXX Not implemented for ULFS2 file systems. */ 2561 return (EOPNOTSUPP); 2562 } 2563 2564 static int 2565 lfs_setextattr(void *v) 2566 { 2567 struct vop_setextattr_args /* { 2568 struct vnode *a_vp; 2569 int a_attrnamespace; 2570 const char *a_name; 2571 struct uio *a_uio; 2572 kauth_cred_t a_cred; 2573 struct proc *a_p; 2574 } */ *ap = v; 2575 struct vnode *vp = ap->a_vp; 2576 struct inode *ip = VTOI(vp); 2577 struct ulfsmount *ump = ip->i_ump; 2578 //struct lfs *fs = ip->i_lfs; 2579 int error; 2580 2581 if (ump->um_fstype == ULFS1) { 2582 #ifdef LFS_EXTATTR 2583 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2584 error = ulfs_setextattr(ap); 2585 fstrans_done(vp->v_mount); 2586 #else 2587 error = EOPNOTSUPP; 2588 #endif 2589 return error; 2590 } 2591 2592 /* XXX Not implemented for ULFS2 file systems. */ 2593 return (EOPNOTSUPP); 2594 } 2595 2596 static int 2597 lfs_listextattr(void *v) 2598 { 2599 struct vop_listextattr_args /* { 2600 struct vnode *a_vp; 2601 int a_attrnamespace; 2602 struct uio *a_uio; 2603 size_t *a_size; 2604 kauth_cred_t a_cred; 2605 struct proc *a_p; 2606 } */ *ap = v; 2607 struct vnode *vp = ap->a_vp; 2608 struct inode *ip = VTOI(vp); 2609 struct ulfsmount *ump = ip->i_ump; 2610 //struct lfs *fs = ip->i_lfs; 2611 int error; 2612 2613 if (ump->um_fstype == ULFS1) { 2614 #ifdef LFS_EXTATTR 2615 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2616 error = ulfs_listextattr(ap); 2617 fstrans_done(vp->v_mount); 2618 #else 2619 error = EOPNOTSUPP; 2620 #endif 2621 return error; 2622 } 2623 2624 /* XXX Not implemented for ULFS2 file systems. */ 2625 return (EOPNOTSUPP); 2626 } 2627 2628 static int 2629 lfs_deleteextattr(void *v) 2630 { 2631 struct vop_deleteextattr_args /* { 2632 struct vnode *a_vp; 2633 int a_attrnamespace; 2634 kauth_cred_t a_cred; 2635 struct proc *a_p; 2636 } */ *ap = v; 2637 struct vnode *vp = ap->a_vp; 2638 struct inode *ip = VTOI(vp); 2639 struct ulfsmount *ump = ip->i_ump; 2640 //struct fs *fs = ip->i_lfs; 2641 int error; 2642 2643 if (ump->um_fstype == ULFS1) { 2644 #ifdef LFS_EXTATTR 2645 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2646 error = ulfs_deleteextattr(ap); 2647 fstrans_done(vp->v_mount); 2648 #else 2649 error = EOPNOTSUPP; 2650 #endif 2651 return error; 2652 } 2653 2654 /* XXX Not implemented for ULFS2 file systems. */ 2655 return (EOPNOTSUPP); 2656 } 2657