1 /* $NetBSD: lfs_vnops.c,v 1.293 2015/09/21 01:24:23 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 /* from NetBSD: ufs_vnops.c,v 1.213 2013/06/08 05:47:02 kardel Exp */ 63 /*- 64 * Copyright (c) 2008 The NetBSD Foundation, Inc. 65 * All rights reserved. 66 * 67 * This code is derived from software contributed to The NetBSD Foundation 68 * by Wasabi Systems, Inc. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 89 * POSSIBILITY OF SUCH DAMAGE. 90 */ 91 /* 92 * Copyright (c) 1982, 1986, 1989, 1993, 1995 93 * The Regents of the University of California. All rights reserved. 94 * (c) UNIX System Laboratories, Inc. 95 * All or some portions of this file are derived from material licensed 96 * to the University of California by American Telephone and Telegraph 97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 98 * the permission of UNIX System Laboratories, Inc. 99 * 100 * Redistribution and use in source and binary forms, with or without 101 * modification, are permitted provided that the following conditions 102 * are met: 103 * 1. Redistributions of source code must retain the above copyright 104 * notice, this list of conditions and the following disclaimer. 105 * 2. Redistributions in binary form must reproduce the above copyright 106 * notice, this list of conditions and the following disclaimer in the 107 * documentation and/or other materials provided with the distribution. 108 * 3. Neither the name of the University nor the names of its contributors 109 * may be used to endorse or promote products derived from this software 110 * without specific prior written permission. 111 * 112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 122 * SUCH DAMAGE. 123 * 124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 125 */ 126 127 #include <sys/cdefs.h> 128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.293 2015/09/21 01:24:23 dholland Exp $"); 129 130 #ifdef _KERNEL_OPT 131 #include "opt_compat_netbsd.h" 132 #include "opt_uvm_page_trkown.h" 133 #endif 134 135 #include <sys/param.h> 136 #include <sys/systm.h> 137 #include <sys/namei.h> 138 #include <sys/resourcevar.h> 139 #include <sys/kernel.h> 140 #include <sys/file.h> 141 #include <sys/stat.h> 142 #include <sys/buf.h> 143 #include <sys/proc.h> 144 #include <sys/mount.h> 145 #include <sys/vnode.h> 146 #include <sys/pool.h> 147 #include <sys/signalvar.h> 148 #include <sys/kauth.h> 149 #include <sys/syslog.h> 150 #include <sys/fstrans.h> 151 152 #include <miscfs/fifofs/fifo.h> 153 #include <miscfs/genfs/genfs.h> 154 #include <miscfs/specfs/specdev.h> 155 156 #include <ufs/lfs/ulfs_inode.h> 157 #include <ufs/lfs/ulfsmount.h> 158 #include <ufs/lfs/ulfs_bswap.h> 159 #include <ufs/lfs/ulfs_extern.h> 160 161 #include <uvm/uvm.h> 162 #include <uvm/uvm_pmap.h> 163 #include <uvm/uvm_stat.h> 164 #include <uvm/uvm_pager.h> 165 166 #include <ufs/lfs/lfs.h> 167 #include <ufs/lfs/lfs_accessors.h> 168 #include <ufs/lfs/lfs_kernel.h> 169 #include <ufs/lfs/lfs_extern.h> 170 171 extern pid_t lfs_writer_daemon; 172 int lfs_ignore_lazy_sync = 1; 173 174 static int lfs_openextattr(void *v); 175 static int lfs_closeextattr(void *v); 176 static int lfs_getextattr(void *v); 177 static int lfs_setextattr(void *v); 178 static int lfs_listextattr(void *v); 179 static int lfs_deleteextattr(void *v); 180 181 /* Global vfs data structures for lfs. */ 182 int (**lfs_vnodeop_p)(void *); 183 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 184 { &vop_default_desc, vn_default_error }, 185 { &vop_lookup_desc, ulfs_lookup }, /* lookup */ 186 { &vop_create_desc, lfs_create }, /* create */ 187 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */ 188 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 189 { &vop_open_desc, ulfs_open }, /* open */ 190 { &vop_close_desc, lfs_close }, /* close */ 191 { &vop_access_desc, ulfs_access }, /* access */ 192 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 193 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 194 { &vop_read_desc, lfs_read }, /* read */ 195 { &vop_write_desc, lfs_write }, /* write */ 196 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 197 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 198 { &vop_ioctl_desc, ulfs_ioctl }, /* ioctl */ 199 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 200 { &vop_poll_desc, ulfs_poll }, /* poll */ 201 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 202 { &vop_revoke_desc, ulfs_revoke }, /* revoke */ 203 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 204 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 205 { &vop_seek_desc, ulfs_seek }, /* seek */ 206 { &vop_remove_desc, lfs_remove }, /* remove */ 207 { &vop_link_desc, lfs_link }, /* link */ 208 { &vop_rename_desc, lfs_rename }, /* rename */ 209 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 210 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 211 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 212 { &vop_readdir_desc, ulfs_readdir }, /* readdir */ 213 { &vop_readlink_desc, ulfs_readlink }, /* readlink */ 214 { &vop_abortop_desc, ulfs_abortop }, /* abortop */ 215 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 216 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 217 { &vop_lock_desc, ulfs_lock }, /* lock */ 218 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 219 { &vop_bmap_desc, ulfs_bmap }, /* bmap */ 220 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 221 { &vop_print_desc, ulfs_print }, /* print */ 222 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 223 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */ 224 { &vop_advlock_desc, ulfs_advlock }, /* advlock */ 225 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 226 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 227 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 228 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 229 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 230 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 231 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 232 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 233 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 234 { NULL, NULL } 235 }; 236 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 237 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 238 239 int (**lfs_specop_p)(void *); 240 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 241 { &vop_default_desc, vn_default_error }, 242 { &vop_lookup_desc, spec_lookup }, /* lookup */ 243 { &vop_create_desc, spec_create }, /* create */ 244 { &vop_mknod_desc, spec_mknod }, /* mknod */ 245 { &vop_open_desc, spec_open }, /* open */ 246 { &vop_close_desc, lfsspec_close }, /* close */ 247 { &vop_access_desc, ulfs_access }, /* access */ 248 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 249 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 250 { &vop_read_desc, ulfsspec_read }, /* read */ 251 { &vop_write_desc, ulfsspec_write }, /* write */ 252 { &vop_fallocate_desc, spec_fallocate }, /* fallocate */ 253 { &vop_fdiscard_desc, spec_fdiscard }, /* fdiscard */ 254 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 255 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 256 { &vop_poll_desc, spec_poll }, /* poll */ 257 { &vop_kqfilter_desc, spec_kqfilter }, /* kqfilter */ 258 { &vop_revoke_desc, spec_revoke }, /* revoke */ 259 { &vop_mmap_desc, spec_mmap }, /* mmap */ 260 { &vop_fsync_desc, spec_fsync }, /* fsync */ 261 { &vop_seek_desc, spec_seek }, /* seek */ 262 { &vop_remove_desc, spec_remove }, /* remove */ 263 { &vop_link_desc, spec_link }, /* link */ 264 { &vop_rename_desc, spec_rename }, /* rename */ 265 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 266 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 267 { &vop_symlink_desc, spec_symlink }, /* symlink */ 268 { &vop_readdir_desc, spec_readdir }, /* readdir */ 269 { &vop_readlink_desc, spec_readlink }, /* readlink */ 270 { &vop_abortop_desc, spec_abortop }, /* abortop */ 271 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 272 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 273 { &vop_lock_desc, ulfs_lock }, /* lock */ 274 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 275 { &vop_bmap_desc, spec_bmap }, /* bmap */ 276 { &vop_strategy_desc, spec_strategy }, /* strategy */ 277 { &vop_print_desc, ulfs_print }, /* print */ 278 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 279 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 280 { &vop_advlock_desc, spec_advlock }, /* advlock */ 281 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 282 { &vop_getpages_desc, spec_getpages }, /* getpages */ 283 { &vop_putpages_desc, spec_putpages }, /* putpages */ 284 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 285 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 286 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 287 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 288 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 289 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 290 { NULL, NULL } 291 }; 292 const struct vnodeopv_desc lfs_specop_opv_desc = 293 { &lfs_specop_p, lfs_specop_entries }; 294 295 int (**lfs_fifoop_p)(void *); 296 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 297 { &vop_default_desc, vn_default_error }, 298 { &vop_lookup_desc, vn_fifo_bypass }, /* lookup */ 299 { &vop_create_desc, vn_fifo_bypass }, /* create */ 300 { &vop_mknod_desc, vn_fifo_bypass }, /* mknod */ 301 { &vop_open_desc, vn_fifo_bypass }, /* open */ 302 { &vop_close_desc, lfsfifo_close }, /* close */ 303 { &vop_access_desc, ulfs_access }, /* access */ 304 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 305 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 306 { &vop_read_desc, ulfsfifo_read }, /* read */ 307 { &vop_write_desc, ulfsfifo_write }, /* write */ 308 { &vop_fallocate_desc, vn_fifo_bypass }, /* fallocate */ 309 { &vop_fdiscard_desc, vn_fifo_bypass }, /* fdiscard */ 310 { &vop_ioctl_desc, vn_fifo_bypass }, /* ioctl */ 311 { &vop_fcntl_desc, ulfs_fcntl }, /* fcntl */ 312 { &vop_poll_desc, vn_fifo_bypass }, /* poll */ 313 { &vop_kqfilter_desc, vn_fifo_bypass }, /* kqfilter */ 314 { &vop_revoke_desc, vn_fifo_bypass }, /* revoke */ 315 { &vop_mmap_desc, vn_fifo_bypass }, /* mmap */ 316 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 317 { &vop_seek_desc, vn_fifo_bypass }, /* seek */ 318 { &vop_remove_desc, vn_fifo_bypass }, /* remove */ 319 { &vop_link_desc, vn_fifo_bypass }, /* link */ 320 { &vop_rename_desc, vn_fifo_bypass }, /* rename */ 321 { &vop_mkdir_desc, vn_fifo_bypass }, /* mkdir */ 322 { &vop_rmdir_desc, vn_fifo_bypass }, /* rmdir */ 323 { &vop_symlink_desc, vn_fifo_bypass }, /* symlink */ 324 { &vop_readdir_desc, vn_fifo_bypass }, /* readdir */ 325 { &vop_readlink_desc, vn_fifo_bypass }, /* readlink */ 326 { &vop_abortop_desc, vn_fifo_bypass }, /* abortop */ 327 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 328 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 329 { &vop_lock_desc, ulfs_lock }, /* lock */ 330 { &vop_unlock_desc, ulfs_unlock }, /* unlock */ 331 { &vop_bmap_desc, vn_fifo_bypass }, /* bmap */ 332 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 333 { &vop_print_desc, ulfs_print }, /* print */ 334 { &vop_islocked_desc, ulfs_islocked }, /* islocked */ 335 { &vop_pathconf_desc, vn_fifo_bypass }, /* pathconf */ 336 { &vop_advlock_desc, vn_fifo_bypass }, /* advlock */ 337 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 338 { &vop_putpages_desc, vn_fifo_bypass }, /* putpages */ 339 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 340 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 341 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 342 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 343 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 344 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 345 { NULL, NULL } 346 }; 347 const struct vnodeopv_desc lfs_fifoop_opv_desc = 348 { &lfs_fifoop_p, lfs_fifoop_entries }; 349 350 #define LFS_READWRITE 351 #include <ufs/lfs/ulfs_readwrite.c> 352 #undef LFS_READWRITE 353 354 /* 355 * Synch an open file. 356 */ 357 /* ARGSUSED */ 358 int 359 lfs_fsync(void *v) 360 { 361 struct vop_fsync_args /* { 362 struct vnode *a_vp; 363 kauth_cred_t a_cred; 364 int a_flags; 365 off_t offlo; 366 off_t offhi; 367 } */ *ap = v; 368 struct vnode *vp = ap->a_vp; 369 int error, wait; 370 struct inode *ip = VTOI(vp); 371 struct lfs *fs = ip->i_lfs; 372 373 /* If we're mounted read-only, don't try to sync. */ 374 if (fs->lfs_ronly) 375 return 0; 376 377 /* If a removed vnode is being cleaned, no need to sync here. */ 378 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 379 return 0; 380 381 /* 382 * Trickle sync simply adds this vnode to the pager list, as if 383 * the pagedaemon had requested a pageout. 384 */ 385 if (ap->a_flags & FSYNC_LAZY) { 386 if (lfs_ignore_lazy_sync == 0) { 387 mutex_enter(&lfs_lock); 388 if (!(ip->i_flags & IN_PAGING)) { 389 ip->i_flags |= IN_PAGING; 390 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 391 i_lfs_pchain); 392 } 393 wakeup(&lfs_writer_daemon); 394 mutex_exit(&lfs_lock); 395 } 396 return 0; 397 } 398 399 /* 400 * If a vnode is bring cleaned, flush it out before we try to 401 * reuse it. This prevents the cleaner from writing files twice 402 * in the same partial segment, causing an accounting underflow. 403 */ 404 if (ap->a_flags & FSYNC_RECLAIM && ip->i_flags & IN_CLEANING) { 405 lfs_vflush(vp); 406 } 407 408 wait = (ap->a_flags & FSYNC_WAIT); 409 do { 410 mutex_enter(vp->v_interlock); 411 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 412 round_page(ap->a_offhi), 413 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 414 if (error == EAGAIN) { 415 mutex_enter(&lfs_lock); 416 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 417 "lfs_fsync", hz / 100 + 1, &lfs_lock); 418 mutex_exit(&lfs_lock); 419 } 420 } while (error == EAGAIN); 421 if (error) 422 return error; 423 424 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 425 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 426 427 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 428 int l = 0; 429 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 430 curlwp->l_cred); 431 } 432 if (wait && !VPISEMPTY(vp)) 433 LFS_SET_UINO(ip, IN_MODIFIED); 434 435 return error; 436 } 437 438 /* 439 * Take IN_ADIROP off, then call ulfs_inactive. 440 */ 441 int 442 lfs_inactive(void *v) 443 { 444 struct vop_inactive_args /* { 445 struct vnode *a_vp; 446 } */ *ap = v; 447 448 lfs_unmark_vnode(ap->a_vp); 449 450 /* 451 * The Ifile is only ever inactivated on unmount. 452 * Streamline this process by not giving it more dirty blocks. 453 */ 454 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 455 mutex_enter(&lfs_lock); 456 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 457 mutex_exit(&lfs_lock); 458 VOP_UNLOCK(ap->a_vp); 459 return 0; 460 } 461 462 #ifdef DEBUG 463 /* 464 * This might happen on unmount. 465 * XXX If it happens at any other time, it should be a panic. 466 */ 467 if (ap->a_vp->v_uflag & VU_DIROP) { 468 struct inode *ip = VTOI(ap->a_vp); 469 printf("lfs_inactive: inactivating VU_DIROP? ino = %d\n", (int)ip->i_number); 470 } 471 #endif /* DIAGNOSTIC */ 472 473 return ulfs_inactive(v); 474 } 475 476 int 477 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 478 { 479 struct lfs *fs; 480 int error; 481 482 KASSERT(VOP_ISLOCKED(dvp)); 483 KASSERT(vp == NULL || VOP_ISLOCKED(vp)); 484 485 fs = VTOI(dvp)->i_lfs; 486 487 ASSERT_NO_SEGLOCK(fs); 488 /* 489 * LFS_NRESERVE calculates direct and indirect blocks as well 490 * as an inode block; an overestimate in most cases. 491 */ 492 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 493 return (error); 494 495 restart: 496 mutex_enter(&lfs_lock); 497 if (fs->lfs_dirops == 0) { 498 mutex_exit(&lfs_lock); 499 lfs_check(dvp, LFS_UNUSED_LBN, 0); 500 mutex_enter(&lfs_lock); 501 } 502 while (fs->lfs_writer) { 503 error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, 504 "lfs_sdirop", 0, &lfs_lock); 505 if (error == EINTR) { 506 mutex_exit(&lfs_lock); 507 goto unreserve; 508 } 509 } 510 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 511 wakeup(&lfs_writer_daemon); 512 mutex_exit(&lfs_lock); 513 preempt(); 514 goto restart; 515 } 516 517 if (lfs_dirvcount > LFS_MAX_DIROP) { 518 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 519 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 520 if ((error = mtsleep(&lfs_dirvcount, 521 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 522 &lfs_lock)) != 0) { 523 goto unreserve; 524 } 525 goto restart; 526 } 527 528 ++fs->lfs_dirops; 529 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */ 530 mutex_exit(&lfs_lock); 531 532 /* Hold a reference so SET_ENDOP will be happy */ 533 vref(dvp); 534 if (vp) { 535 vref(vp); 536 MARK_VNODE(vp); 537 } 538 539 MARK_VNODE(dvp); 540 return 0; 541 542 unreserve: 543 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 544 return error; 545 } 546 547 /* 548 * Opposite of lfs_set_dirop... mostly. For now at least must call 549 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up) 550 */ 551 void 552 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str) 553 { 554 mutex_enter(&lfs_lock); 555 --fs->lfs_dirops; 556 if (!fs->lfs_dirops) { 557 if (fs->lfs_nadirop) { 558 panic("lfs_unset_dirop: %s: no dirops but " 559 " nadirop=%d", str, 560 fs->lfs_nadirop); 561 } 562 wakeup(&fs->lfs_writer); 563 mutex_exit(&lfs_lock); 564 lfs_check(dvp, LFS_UNUSED_LBN, 0); 565 } else { 566 mutex_exit(&lfs_lock); 567 } 568 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs)); 569 } 570 571 void 572 lfs_mark_vnode(struct vnode *vp) 573 { 574 struct inode *ip = VTOI(vp); 575 struct lfs *fs = ip->i_lfs; 576 577 mutex_enter(&lfs_lock); 578 if (!(ip->i_flag & IN_ADIROP)) { 579 if (!(vp->v_uflag & VU_DIROP)) { 580 mutex_exit(&lfs_lock); 581 vref(vp); 582 mutex_enter(&lfs_lock); 583 ++lfs_dirvcount; 584 ++fs->lfs_dirvcount; 585 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 586 vp->v_uflag |= VU_DIROP; 587 } 588 ++fs->lfs_nadirop; 589 ip->i_flag &= ~IN_CDIROP; 590 ip->i_flag |= IN_ADIROP; 591 } else 592 KASSERT(vp->v_uflag & VU_DIROP); 593 mutex_exit(&lfs_lock); 594 } 595 596 void 597 lfs_unmark_vnode(struct vnode *vp) 598 { 599 struct inode *ip = VTOI(vp); 600 601 mutex_enter(&lfs_lock); 602 if (ip && (ip->i_flag & IN_ADIROP)) { 603 KASSERT(vp->v_uflag & VU_DIROP); 604 --ip->i_lfs->lfs_nadirop; 605 ip->i_flag &= ~IN_ADIROP; 606 } 607 mutex_exit(&lfs_lock); 608 } 609 610 int 611 lfs_symlink(void *v) 612 { 613 struct vop_symlink_v3_args /* { 614 struct vnode *a_dvp; 615 struct vnode **a_vpp; 616 struct componentname *a_cnp; 617 struct vattr *a_vap; 618 char *a_target; 619 } */ *ap = v; 620 struct lfs *fs; 621 struct vnode *dvp, **vpp; 622 struct inode *ip; 623 struct ulfs_lookup_results *ulr; 624 ssize_t len; /* XXX should be size_t */ 625 int error; 626 627 dvp = ap->a_dvp; 628 vpp = ap->a_vpp; 629 630 KASSERT(vpp != NULL); 631 KASSERT(*vpp == NULL); 632 KASSERT(ap->a_vap->va_type == VLNK); 633 634 /* XXX should handle this material another way */ 635 ulr = &VTOI(ap->a_dvp)->i_crap; 636 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 637 638 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 639 ASSERT_NO_SEGLOCK(fs); 640 if (fs->lfs_ronly) { 641 return EROFS; 642 } 643 644 error = lfs_set_dirop(dvp, NULL); 645 if (error) 646 return error; 647 648 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 649 error = ulfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp); 650 if (error) { 651 goto out; 652 } 653 654 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 655 ip = VTOI(*vpp); 656 657 len = strlen(ap->a_target); 658 if (len < ip->i_lfs->um_maxsymlinklen) { 659 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 660 ip->i_size = len; 661 DIP_ASSIGN(ip, size, len); 662 uvm_vnp_setsize(*vpp, ip->i_size); 663 ip->i_flag |= IN_CHANGE | IN_UPDATE; 664 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME) 665 ip->i_flag |= IN_ACCESS; 666 } else { 667 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, 668 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL, 669 NULL); 670 } 671 672 VOP_UNLOCK(*vpp); 673 if (error) 674 vrele(*vpp); 675 676 out: 677 fstrans_done(dvp->v_mount); 678 679 UNMARK_VNODE(dvp); 680 /* XXX: is it even possible for the symlink to get MARK'd? */ 681 UNMARK_VNODE(*vpp); 682 if (error) { 683 *vpp = NULL; 684 } 685 lfs_unset_dirop(fs, dvp, "symlink"); 686 687 vrele(dvp); 688 return (error); 689 } 690 691 int 692 lfs_mknod(void *v) 693 { 694 struct vop_mknod_v3_args /* { 695 struct vnode *a_dvp; 696 struct vnode **a_vpp; 697 struct componentname *a_cnp; 698 struct vattr *a_vap; 699 } */ *ap = v; 700 struct lfs *fs; 701 struct vnode *dvp, **vpp; 702 struct vattr *vap; 703 struct inode *ip; 704 int error; 705 ino_t ino; 706 struct ulfs_lookup_results *ulr; 707 708 dvp = ap->a_dvp; 709 vpp = ap->a_vpp; 710 vap = ap->a_vap; 711 712 KASSERT(vpp != NULL); 713 KASSERT(*vpp == NULL); 714 715 /* XXX should handle this material another way */ 716 ulr = &VTOI(dvp)->i_crap; 717 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 718 719 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 720 ASSERT_NO_SEGLOCK(fs); 721 if (fs->lfs_ronly) { 722 return EROFS; 723 } 724 725 error = lfs_set_dirop(dvp, NULL); 726 if (error) 727 return error; 728 729 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 730 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 731 732 /* Either way we're done with the dirop at this point */ 733 UNMARK_VNODE(dvp); 734 UNMARK_VNODE(*vpp); 735 lfs_unset_dirop(fs, dvp, "mknod"); 736 /* 737 * XXX this is where this used to be (though inside some evil 738 * macros) but it clearly should be moved further down. 739 * - dholland 20140515 740 */ 741 vrele(dvp); 742 743 if (error) { 744 fstrans_done(ap->a_dvp->v_mount); 745 *vpp = NULL; 746 return (error); 747 } 748 749 VN_KNOTE(dvp, NOTE_WRITE); 750 ip = VTOI(*vpp); 751 ino = ip->i_number; 752 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 753 754 /* 755 * Call fsync to write the vnode so that we don't have to deal with 756 * flushing it when it's marked VU_DIROP or reclaiming. 757 * 758 * XXX KS - If we can't flush we also can't call vgone(), so must 759 * return. But, that leaves this vnode in limbo, also not good. 760 * Can this ever happen (barring hardware failure)? 761 */ 762 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 763 panic("lfs_mknod: couldn't fsync (ino %llu)", 764 (unsigned long long)ino); 765 /* return (error); */ 766 } 767 768 fstrans_done(ap->a_dvp->v_mount); 769 KASSERT(error == 0); 770 VOP_UNLOCK(*vpp); 771 return (0); 772 } 773 774 /* 775 * Create a regular file 776 */ 777 int 778 lfs_create(void *v) 779 { 780 struct vop_create_v3_args /* { 781 struct vnode *a_dvp; 782 struct vnode **a_vpp; 783 struct componentname *a_cnp; 784 struct vattr *a_vap; 785 } */ *ap = v; 786 struct lfs *fs; 787 struct vnode *dvp, **vpp; 788 struct vattr *vap; 789 struct ulfs_lookup_results *ulr; 790 int error; 791 792 dvp = ap->a_dvp; 793 vpp = ap->a_vpp; 794 vap = ap->a_vap; 795 796 KASSERT(vpp != NULL); 797 KASSERT(*vpp == NULL); 798 799 /* XXX should handle this material another way */ 800 ulr = &VTOI(dvp)->i_crap; 801 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 802 803 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 804 ASSERT_NO_SEGLOCK(fs); 805 if (fs->lfs_ronly) { 806 return EROFS; 807 } 808 809 error = lfs_set_dirop(dvp, NULL); 810 if (error) 811 return error; 812 813 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 814 error = ulfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 815 if (error) { 816 fstrans_done(dvp->v_mount); 817 goto out; 818 } 819 fstrans_done(dvp->v_mount); 820 VN_KNOTE(dvp, NOTE_WRITE); 821 VOP_UNLOCK(*vpp); 822 823 out: 824 825 UNMARK_VNODE(dvp); 826 UNMARK_VNODE(*vpp); 827 if (error) { 828 *vpp = NULL; 829 } 830 lfs_unset_dirop(fs, dvp, "create"); 831 832 vrele(dvp); 833 return (error); 834 } 835 836 int 837 lfs_mkdir(void *v) 838 { 839 struct vop_mkdir_v3_args /* { 840 struct vnode *a_dvp; 841 struct vnode **a_vpp; 842 struct componentname *a_cnp; 843 struct vattr *a_vap; 844 } */ *ap = v; 845 struct lfs *fs; 846 struct vnode *dvp, *tvp, **vpp; 847 struct inode *dp, *ip; 848 struct componentname *cnp; 849 struct vattr *vap; 850 struct ulfs_lookup_results *ulr; 851 struct buf *bp; 852 LFS_DIRHEADER *dirp; 853 int dirblksiz; 854 int error; 855 856 dvp = ap->a_dvp; 857 tvp = NULL; 858 vpp = ap->a_vpp; 859 cnp = ap->a_cnp; 860 vap = ap->a_vap; 861 862 dp = VTOI(dvp); 863 ip = NULL; 864 865 KASSERT(vap->va_type == VDIR); 866 KASSERT(vpp != NULL); 867 KASSERT(*vpp == NULL); 868 869 /* XXX should handle this material another way */ 870 ulr = &dp->i_crap; 871 ULFS_CHECK_CRAPCOUNTER(dp); 872 873 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 874 ASSERT_NO_SEGLOCK(fs); 875 if (fs->lfs_ronly) { 876 return EROFS; 877 } 878 dirblksiz = fs->um_dirblksiz; 879 /* XXX dholland 20150911 I believe this to be true, but... */ 880 //KASSERT(dirblksiz == LFS_DIRBLKSIZ); 881 882 error = lfs_set_dirop(dvp, NULL); 883 if (error) 884 return error; 885 886 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 887 888 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 889 error = EMLINK; 890 goto out; 891 } 892 893 /* 894 * Must simulate part of ulfs_makeinode here to acquire the inode, 895 * but not have it entered in the parent directory. The entry is 896 * made later after writing "." and ".." entries. 897 */ 898 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, ap->a_vpp); 899 if (error) 900 goto out; 901 902 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE); 903 if (error) { 904 vrele(*ap->a_vpp); 905 *ap->a_vpp = NULL; 906 goto out; 907 } 908 909 tvp = *ap->a_vpp; 910 lfs_mark_vnode(tvp); 911 ip = VTOI(tvp); 912 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 913 ip->i_nlink = 2; 914 DIP_ASSIGN(ip, nlink, 2); 915 if (cnp->cn_flags & ISWHITEOUT) { 916 ip->i_flags |= UF_OPAQUE; 917 DIP_ASSIGN(ip, flags, ip->i_flags); 918 } 919 920 /* 921 * Bump link count in parent directory to reflect work done below. 922 */ 923 dp->i_nlink++; 924 DIP_ASSIGN(dp, nlink, dp->i_nlink); 925 dp->i_flag |= IN_CHANGE; 926 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 927 goto bad; 928 929 /* 930 * Initialize directory with "." and "..". This used to use a 931 * static template but that adds moving parts for very little 932 * benefit. 933 */ 934 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 935 B_CLRBUF, &bp)) != 0) 936 goto bad; 937 ip->i_size = dirblksiz; 938 DIP_ASSIGN(ip, size, dirblksiz); 939 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 940 uvm_vnp_setsize(tvp, ip->i_size); 941 dirp = bp->b_data; 942 943 /* . */ 944 lfs_dir_setino(fs, dirp, ip->i_number); 945 lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1)); 946 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 947 lfs_dir_setnamlen(fs, dirp, 1); 948 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1, 949 LFS_DIRECTSIZ(fs, 1)); 950 dirp = LFS_NEXTDIR(fs, dirp); 951 /* .. */ 952 lfs_dir_setino(fs, dirp, dp->i_number); 953 lfs_dir_setreclen(fs, dirp, dirblksiz - LFS_DIRECTSIZ(fs, 1)); 954 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 955 lfs_dir_setnamlen(fs, dirp, 2); 956 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2, 957 dirblksiz - LFS_DIRECTSIZ(fs, 1)); 958 959 /* 960 * Directory set up; now install its entry in the parent directory. 961 */ 962 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 963 goto bad; 964 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 965 goto bad; 966 } 967 error = ulfs_direnter(dvp, ulr, tvp, 968 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), bp); 969 bad: 970 if (error == 0) { 971 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 972 VOP_UNLOCK(tvp); 973 } else { 974 dp->i_nlink--; 975 DIP_ASSIGN(dp, nlink, dp->i_nlink); 976 dp->i_flag |= IN_CHANGE; 977 /* 978 * No need to do an explicit lfs_truncate here, vrele will 979 * do this for us because we set the link count to 0. 980 */ 981 ip->i_nlink = 0; 982 DIP_ASSIGN(ip, nlink, 0); 983 ip->i_flag |= IN_CHANGE; 984 /* If IN_ADIROP, account for it */ 985 lfs_unmark_vnode(tvp); 986 vput(tvp); 987 } 988 989 out: 990 fstrans_done(dvp->v_mount); 991 992 UNMARK_VNODE(dvp); 993 UNMARK_VNODE(*vpp); 994 if (error) { 995 *vpp = NULL; 996 } 997 lfs_unset_dirop(fs, dvp, "mkdir"); 998 999 vrele(dvp); 1000 return (error); 1001 } 1002 1003 int 1004 lfs_remove(void *v) 1005 { 1006 struct vop_remove_args /* { 1007 struct vnode *a_dvp; 1008 struct vnode *a_vp; 1009 struct componentname *a_cnp; 1010 } */ *ap = v; 1011 struct vnode *dvp, *vp; 1012 struct inode *ip; 1013 int error; 1014 1015 dvp = ap->a_dvp; 1016 vp = ap->a_vp; 1017 ip = VTOI(vp); 1018 if ((error = lfs_set_dirop(dvp, vp)) != 0) { 1019 if (dvp == vp) 1020 vrele(vp); 1021 else 1022 vput(vp); 1023 vput(dvp); 1024 return error; 1025 } 1026 error = ulfs_remove(ap); 1027 if (ip->i_nlink == 0) 1028 lfs_orphan(ip->i_lfs, ip->i_number); 1029 1030 UNMARK_VNODE(dvp); 1031 if (ap->a_vp) { 1032 UNMARK_VNODE(ap->a_vp); 1033 } 1034 lfs_unset_dirop(ip->i_lfs, dvp, "remove"); 1035 vrele(dvp); 1036 if (ap->a_vp) { 1037 vrele(ap->a_vp); 1038 } 1039 1040 return (error); 1041 } 1042 1043 int 1044 lfs_rmdir(void *v) 1045 { 1046 struct vop_rmdir_args /* { 1047 struct vnodeop_desc *a_desc; 1048 struct vnode *a_dvp; 1049 struct vnode *a_vp; 1050 struct componentname *a_cnp; 1051 } */ *ap = v; 1052 struct vnode *vp; 1053 struct inode *ip; 1054 int error; 1055 1056 vp = ap->a_vp; 1057 ip = VTOI(vp); 1058 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) { 1059 if (ap->a_dvp == vp) 1060 vrele(ap->a_dvp); 1061 else 1062 vput(ap->a_dvp); 1063 vput(vp); 1064 return error; 1065 } 1066 error = ulfs_rmdir(ap); 1067 if (ip->i_nlink == 0) 1068 lfs_orphan(ip->i_lfs, ip->i_number); 1069 1070 UNMARK_VNODE(ap->a_dvp); 1071 if (ap->a_vp) { 1072 UNMARK_VNODE(ap->a_vp); 1073 } 1074 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir"); 1075 vrele(ap->a_dvp); 1076 if (ap->a_vp) { 1077 vrele(ap->a_vp); 1078 } 1079 1080 return (error); 1081 } 1082 1083 int 1084 lfs_link(void *v) 1085 { 1086 struct vop_link_v2_args /* { 1087 struct vnode *a_dvp; 1088 struct vnode *a_vp; 1089 struct componentname *a_cnp; 1090 } */ *ap = v; 1091 struct lfs *fs; 1092 struct vnode *dvp; 1093 int error; 1094 1095 dvp = ap->a_dvp; 1096 1097 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 1098 ASSERT_NO_SEGLOCK(fs); 1099 if (fs->lfs_ronly) { 1100 return EROFS; 1101 } 1102 1103 error = lfs_set_dirop(dvp, NULL); 1104 if (error) { 1105 return error; 1106 } 1107 1108 error = ulfs_link(ap); 1109 1110 UNMARK_VNODE(dvp); 1111 lfs_unset_dirop(fs, dvp, "link"); 1112 vrele(dvp); 1113 1114 return (error); 1115 } 1116 1117 /* XXX hack to avoid calling ITIMES in getattr */ 1118 int 1119 lfs_getattr(void *v) 1120 { 1121 struct vop_getattr_args /* { 1122 struct vnode *a_vp; 1123 struct vattr *a_vap; 1124 kauth_cred_t a_cred; 1125 } */ *ap = v; 1126 struct vnode *vp = ap->a_vp; 1127 struct inode *ip = VTOI(vp); 1128 struct vattr *vap = ap->a_vap; 1129 struct lfs *fs = ip->i_lfs; 1130 1131 fstrans_start(vp->v_mount, FSTRANS_SHARED); 1132 /* 1133 * Copy from inode table 1134 */ 1135 vap->va_fsid = ip->i_dev; 1136 vap->va_fileid = ip->i_number; 1137 vap->va_mode = ip->i_mode & ~LFS_IFMT; 1138 vap->va_nlink = ip->i_nlink; 1139 vap->va_uid = ip->i_uid; 1140 vap->va_gid = ip->i_gid; 1141 switch (vp->v_type) { 1142 case VBLK: 1143 case VCHR: 1144 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din); 1145 break; 1146 default: 1147 vap->va_rdev = NODEV; 1148 break; 1149 } 1150 vap->va_size = vp->v_size; 1151 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din); 1152 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din); 1153 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din); 1154 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din); 1155 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din); 1156 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din); 1157 vap->va_flags = ip->i_flags; 1158 vap->va_gen = ip->i_gen; 1159 /* this doesn't belong here */ 1160 if (vp->v_type == VBLK) 1161 vap->va_blocksize = BLKDEV_IOSIZE; 1162 else if (vp->v_type == VCHR) 1163 vap->va_blocksize = MAXBSIZE; 1164 else 1165 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 1166 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks); 1167 vap->va_type = vp->v_type; 1168 vap->va_filerev = ip->i_modrev; 1169 fstrans_done(vp->v_mount); 1170 return (0); 1171 } 1172 1173 /* 1174 * Check to make sure the inode blocks won't choke the buffer 1175 * cache, then call ulfs_setattr as usual. 1176 */ 1177 int 1178 lfs_setattr(void *v) 1179 { 1180 struct vop_setattr_args /* { 1181 struct vnode *a_vp; 1182 struct vattr *a_vap; 1183 kauth_cred_t a_cred; 1184 } */ *ap = v; 1185 struct vnode *vp = ap->a_vp; 1186 1187 lfs_check(vp, LFS_UNUSED_LBN, 0); 1188 return ulfs_setattr(v); 1189 } 1190 1191 /* 1192 * Release the block we hold on lfs_newseg wrapping. Called on file close, 1193 * or explicitly from LFCNWRAPGO. Called with the interlock held. 1194 */ 1195 static int 1196 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 1197 { 1198 if (fs->lfs_stoplwp != curlwp) 1199 return EBUSY; 1200 1201 fs->lfs_stoplwp = NULL; 1202 cv_signal(&fs->lfs_stopcv); 1203 1204 KASSERT(fs->lfs_nowrap > 0); 1205 if (fs->lfs_nowrap <= 0) { 1206 return 0; 1207 } 1208 1209 if (--fs->lfs_nowrap == 0) { 1210 log(LOG_NOTICE, "%s: re-enabled log wrap\n", 1211 lfs_sb_getfsmnt(fs)); 1212 wakeup(&fs->lfs_wrappass); 1213 lfs_wakeup_cleaner(fs); 1214 } 1215 if (waitfor) { 1216 mtsleep(&fs->lfs_nextsegsleep, PCATCH | PUSER, "segment", 1217 0, &lfs_lock); 1218 } 1219 1220 return 0; 1221 } 1222 1223 /* 1224 * Close called. 1225 * 1226 * Update the times on the inode. 1227 */ 1228 /* ARGSUSED */ 1229 int 1230 lfs_close(void *v) 1231 { 1232 struct vop_close_args /* { 1233 struct vnode *a_vp; 1234 int a_fflag; 1235 kauth_cred_t a_cred; 1236 } */ *ap = v; 1237 struct vnode *vp = ap->a_vp; 1238 struct inode *ip = VTOI(vp); 1239 struct lfs *fs = ip->i_lfs; 1240 1241 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1242 fs->lfs_stoplwp == curlwp) { 1243 mutex_enter(&lfs_lock); 1244 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1245 lfs_wrapgo(fs, ip, 0); 1246 mutex_exit(&lfs_lock); 1247 } 1248 1249 if (vp == ip->i_lfs->lfs_ivnode && 1250 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1251 return 0; 1252 1253 fstrans_start(vp->v_mount, FSTRANS_SHARED); 1254 if (vp->v_usecount > 1 && vp != ip->i_lfs->lfs_ivnode) { 1255 LFS_ITIMES(ip, NULL, NULL, NULL); 1256 } 1257 fstrans_done(vp->v_mount); 1258 return (0); 1259 } 1260 1261 /* 1262 * Close wrapper for special devices. 1263 * 1264 * Update the times on the inode then do device close. 1265 */ 1266 int 1267 lfsspec_close(void *v) 1268 { 1269 struct vop_close_args /* { 1270 struct vnode *a_vp; 1271 int a_fflag; 1272 kauth_cred_t a_cred; 1273 } */ *ap = v; 1274 struct vnode *vp; 1275 struct inode *ip; 1276 1277 vp = ap->a_vp; 1278 ip = VTOI(vp); 1279 if (vp->v_usecount > 1) { 1280 LFS_ITIMES(ip, NULL, NULL, NULL); 1281 } 1282 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1283 } 1284 1285 /* 1286 * Close wrapper for fifo's. 1287 * 1288 * Update the times on the inode then do device close. 1289 */ 1290 int 1291 lfsfifo_close(void *v) 1292 { 1293 struct vop_close_args /* { 1294 struct vnode *a_vp; 1295 int a_fflag; 1296 kauth_cred_ a_cred; 1297 } */ *ap = v; 1298 struct vnode *vp; 1299 struct inode *ip; 1300 1301 vp = ap->a_vp; 1302 ip = VTOI(vp); 1303 if (ap->a_vp->v_usecount > 1) { 1304 LFS_ITIMES(ip, NULL, NULL, NULL); 1305 } 1306 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1307 } 1308 1309 /* 1310 * Reclaim an inode so that it can be used for other purposes. 1311 */ 1312 1313 int 1314 lfs_reclaim(void *v) 1315 { 1316 struct vop_reclaim_args /* { 1317 struct vnode *a_vp; 1318 } */ *ap = v; 1319 struct vnode *vp = ap->a_vp; 1320 struct inode *ip = VTOI(vp); 1321 struct lfs *fs = ip->i_lfs; 1322 int error; 1323 1324 /* 1325 * The inode must be freed and updated before being removed 1326 * from its hash chain. Other threads trying to gain a hold 1327 * or lock on the inode will be stalled. 1328 */ 1329 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1330 lfs_vfree(vp, ip->i_number, ip->i_omode); 1331 1332 mutex_enter(&lfs_lock); 1333 LFS_CLR_UINO(ip, IN_ALLMOD); 1334 mutex_exit(&lfs_lock); 1335 if ((error = ulfs_reclaim(vp))) 1336 return (error); 1337 1338 /* 1339 * Take us off the paging and/or dirop queues if we were on them. 1340 * We shouldn't be on them. 1341 */ 1342 mutex_enter(&lfs_lock); 1343 if (ip->i_flags & IN_PAGING) { 1344 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1345 lfs_sb_getfsmnt(fs)); 1346 ip->i_flags &= ~IN_PAGING; 1347 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1348 } 1349 if (vp->v_uflag & VU_DIROP) { 1350 panic("reclaimed vnode is VU_DIROP"); 1351 vp->v_uflag &= ~VU_DIROP; 1352 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 1353 } 1354 mutex_exit(&lfs_lock); 1355 1356 pool_put(&lfs_dinode_pool, ip->i_din); 1357 lfs_deregister_all(vp); 1358 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1359 ip->inode_ext.lfs = NULL; 1360 genfs_node_destroy(vp); 1361 pool_put(&lfs_inode_pool, vp->v_data); 1362 vp->v_data = NULL; 1363 return (0); 1364 } 1365 1366 /* 1367 * Read a block from a storage device. 1368 * 1369 * Calculate the logical to physical mapping if not done already, 1370 * then call the device strategy routine. 1371 * 1372 * In order to avoid reading blocks that are in the process of being 1373 * written by the cleaner---and hence are not mutexed by the normal 1374 * buffer cache / page cache mechanisms---check for collisions before 1375 * reading. 1376 * 1377 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before* 1378 * the active cleaner test. 1379 * 1380 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1381 */ 1382 int 1383 lfs_strategy(void *v) 1384 { 1385 struct vop_strategy_args /* { 1386 struct vnode *a_vp; 1387 struct buf *a_bp; 1388 } */ *ap = v; 1389 struct buf *bp; 1390 struct lfs *fs; 1391 struct vnode *vp; 1392 struct inode *ip; 1393 daddr_t tbn; 1394 #define MAXLOOP 25 1395 int i, sn, error, slept, loopcount; 1396 1397 bp = ap->a_bp; 1398 vp = ap->a_vp; 1399 ip = VTOI(vp); 1400 fs = ip->i_lfs; 1401 1402 /* lfs uses its strategy routine only for read */ 1403 KASSERT(bp->b_flags & B_READ); 1404 1405 if (vp->v_type == VBLK || vp->v_type == VCHR) 1406 panic("lfs_strategy: spec"); 1407 KASSERT(bp->b_bcount != 0); 1408 if (bp->b_blkno == bp->b_lblkno) { 1409 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1410 NULL); 1411 if (error) { 1412 bp->b_error = error; 1413 bp->b_resid = bp->b_bcount; 1414 biodone(bp); 1415 return (error); 1416 } 1417 if ((long)bp->b_blkno == -1) /* no valid data */ 1418 clrbuf(bp); 1419 } 1420 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1421 bp->b_resid = bp->b_bcount; 1422 biodone(bp); 1423 return (0); 1424 } 1425 1426 slept = 1; 1427 loopcount = 0; 1428 mutex_enter(&lfs_lock); 1429 while (slept && fs->lfs_seglock) { 1430 mutex_exit(&lfs_lock); 1431 /* 1432 * Look through list of intervals. 1433 * There will only be intervals to look through 1434 * if the cleaner holds the seglock. 1435 * Since the cleaner is synchronous, we can trust 1436 * the list of intervals to be current. 1437 */ 1438 tbn = LFS_DBTOFSB(fs, bp->b_blkno); 1439 sn = lfs_dtosn(fs, tbn); 1440 slept = 0; 1441 for (i = 0; i < fs->lfs_cleanind; i++) { 1442 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) && 1443 tbn >= fs->lfs_cleanint[i]) { 1444 DLOG((DLOG_CLEAN, 1445 "lfs_strategy: ino %d lbn %" PRId64 1446 " ind %d sn %d fsb %" PRIx64 1447 " given sn %d fsb %" PRIx64 "\n", 1448 ip->i_number, bp->b_lblkno, i, 1449 lfs_dtosn(fs, fs->lfs_cleanint[i]), 1450 fs->lfs_cleanint[i], sn, tbn)); 1451 DLOG((DLOG_CLEAN, 1452 "lfs_strategy: sleeping on ino %d lbn %" 1453 PRId64 "\n", ip->i_number, bp->b_lblkno)); 1454 mutex_enter(&lfs_lock); 1455 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1456 /* 1457 * Cleaner can't wait for itself. 1458 * Instead, wait for the blocks 1459 * to be written to disk. 1460 * XXX we need pribio in the test 1461 * XXX here. 1462 */ 1463 mtsleep(&fs->lfs_iocount, 1464 (PRIBIO + 1) | PNORELOCK, 1465 "clean2", hz/10 + 1, 1466 &lfs_lock); 1467 slept = 1; 1468 ++loopcount; 1469 break; 1470 } else if (fs->lfs_seglock) { 1471 mtsleep(&fs->lfs_seglock, 1472 (PRIBIO + 1) | PNORELOCK, 1473 "clean1", 0, 1474 &lfs_lock); 1475 slept = 1; 1476 break; 1477 } 1478 mutex_exit(&lfs_lock); 1479 } 1480 } 1481 mutex_enter(&lfs_lock); 1482 if (loopcount > MAXLOOP) { 1483 printf("lfs_strategy: breaking out of clean2 loop\n"); 1484 break; 1485 } 1486 } 1487 mutex_exit(&lfs_lock); 1488 1489 vp = ip->i_devvp; 1490 return VOP_STRATEGY(vp, bp); 1491 } 1492 1493 /* 1494 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1495 * Technically this is a checkpoint (the on-disk state is valid) 1496 * even though we are leaving out all the file data. 1497 */ 1498 int 1499 lfs_flush_dirops(struct lfs *fs) 1500 { 1501 struct inode *ip, *nip; 1502 struct vnode *vp; 1503 extern int lfs_dostats; /* XXX this does not belong here */ 1504 struct segment *sp; 1505 SEGSUM *ssp; 1506 int flags = 0; 1507 int error = 0; 1508 1509 ASSERT_MAYBE_SEGLOCK(fs); 1510 KASSERT(fs->lfs_nadirop == 0); 1511 1512 if (fs->lfs_ronly) 1513 return EROFS; 1514 1515 mutex_enter(&lfs_lock); 1516 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1517 mutex_exit(&lfs_lock); 1518 return 0; 1519 } else 1520 mutex_exit(&lfs_lock); 1521 1522 if (lfs_dostats) 1523 ++lfs_stats.flush_invoked; 1524 1525 lfs_imtime(fs); 1526 lfs_seglock(fs, flags); 1527 sp = fs->lfs_sp; 1528 1529 /* 1530 * lfs_writevnodes, optimized to get dirops out of the way. 1531 * Only write dirops, and don't flush files' pages, only 1532 * blocks from the directories. 1533 * 1534 * We don't need to vref these files because they are 1535 * dirops and so hold an extra reference until the 1536 * segunlock clears them of that status. 1537 * 1538 * We don't need to check for IN_ADIROP because we know that 1539 * no dirops are active. 1540 * 1541 */ 1542 mutex_enter(&lfs_lock); 1543 for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { 1544 nip = TAILQ_NEXT(ip, i_lfs_dchain); 1545 mutex_exit(&lfs_lock); 1546 vp = ITOV(ip); 1547 mutex_enter(vp->v_interlock); 1548 1549 KASSERT((ip->i_flag & IN_ADIROP) == 0); 1550 KASSERT(vp->v_uflag & VU_DIROP); 1551 KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0); 1552 1553 /* 1554 * All writes to directories come from dirops; all 1555 * writes to files' direct blocks go through the page 1556 * cache, which we're not touching. Reads to files 1557 * and/or directories will not be affected by writing 1558 * directory blocks inodes and file inodes. So we don't 1559 * really need to lock. 1560 */ 1561 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 1562 mutex_exit(vp->v_interlock); 1563 mutex_enter(&lfs_lock); 1564 continue; 1565 } 1566 mutex_exit(vp->v_interlock); 1567 /* XXX see below 1568 * waslocked = VOP_ISLOCKED(vp); 1569 */ 1570 if (vp->v_type != VREG && 1571 ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp))) { 1572 error = lfs_writefile(fs, sp, vp); 1573 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1574 !(ip->i_flag & IN_ALLMOD)) { 1575 mutex_enter(&lfs_lock); 1576 LFS_SET_UINO(ip, IN_MODIFIED); 1577 mutex_exit(&lfs_lock); 1578 } 1579 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1580 mutex_enter(&lfs_lock); 1581 error = EAGAIN; 1582 break; 1583 } 1584 } 1585 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1586 error = lfs_writeinode(fs, sp, ip); 1587 mutex_enter(&lfs_lock); 1588 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1589 error = EAGAIN; 1590 break; 1591 } 1592 1593 /* 1594 * We might need to update these inodes again, 1595 * for example, if they have data blocks to write. 1596 * Make sure that after this flush, they are still 1597 * marked IN_MODIFIED so that we don't forget to 1598 * write them. 1599 */ 1600 /* XXX only for non-directories? --KS */ 1601 LFS_SET_UINO(ip, IN_MODIFIED); 1602 } 1603 mutex_exit(&lfs_lock); 1604 /* We've written all the dirops there are */ 1605 ssp = (SEGSUM *)sp->segsum; 1606 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 1607 lfs_finalize_fs_seguse(fs); 1608 (void) lfs_writeseg(fs, sp); 1609 lfs_segunlock(fs); 1610 1611 return error; 1612 } 1613 1614 /* 1615 * Flush all vnodes for which the pagedaemon has requested pageouts. 1616 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1617 * has just run, this would be an error). If we have to skip a vnode 1618 * for any reason, just skip it; if we have to wait for the cleaner, 1619 * abort. The writer daemon will call us again later. 1620 */ 1621 int 1622 lfs_flush_pchain(struct lfs *fs) 1623 { 1624 struct inode *ip, *nip; 1625 struct vnode *vp; 1626 extern int lfs_dostats; 1627 struct segment *sp; 1628 int error, error2; 1629 1630 ASSERT_NO_SEGLOCK(fs); 1631 1632 if (fs->lfs_ronly) 1633 return EROFS; 1634 1635 mutex_enter(&lfs_lock); 1636 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1637 mutex_exit(&lfs_lock); 1638 return 0; 1639 } else 1640 mutex_exit(&lfs_lock); 1641 1642 /* Get dirops out of the way */ 1643 if ((error = lfs_flush_dirops(fs)) != 0) 1644 return error; 1645 1646 if (lfs_dostats) 1647 ++lfs_stats.flush_invoked; 1648 1649 /* 1650 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1651 */ 1652 lfs_imtime(fs); 1653 lfs_seglock(fs, 0); 1654 sp = fs->lfs_sp; 1655 1656 /* 1657 * lfs_writevnodes, optimized to clear pageout requests. 1658 * Only write non-dirop files that are in the pageout queue. 1659 * We're very conservative about what we write; we want to be 1660 * fast and async. 1661 */ 1662 mutex_enter(&lfs_lock); 1663 top: 1664 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1665 struct mount *mp = ITOV(ip)->v_mount; 1666 ino_t ino = ip->i_number; 1667 1668 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1669 1670 if (!(ip->i_flags & IN_PAGING)) 1671 goto top; 1672 1673 mutex_exit(&lfs_lock); 1674 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) { 1675 mutex_enter(&lfs_lock); 1676 continue; 1677 }; 1678 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1679 vrele(vp); 1680 mutex_enter(&lfs_lock); 1681 continue; 1682 } 1683 ip = VTOI(vp); 1684 mutex_enter(&lfs_lock); 1685 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG || 1686 !(ip->i_flags & IN_PAGING)) { 1687 mutex_exit(&lfs_lock); 1688 vput(vp); 1689 mutex_enter(&lfs_lock); 1690 goto top; 1691 } 1692 mutex_exit(&lfs_lock); 1693 1694 error = lfs_writefile(fs, sp, vp); 1695 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1696 !(ip->i_flag & IN_ALLMOD)) { 1697 mutex_enter(&lfs_lock); 1698 LFS_SET_UINO(ip, IN_MODIFIED); 1699 mutex_exit(&lfs_lock); 1700 } 1701 KDASSERT(ip->i_number != LFS_IFILE_INUM); 1702 error2 = lfs_writeinode(fs, sp, ip); 1703 1704 VOP_UNLOCK(vp); 1705 vrele(vp); 1706 1707 if (error == EAGAIN || error2 == EAGAIN) { 1708 lfs_writeseg(fs, sp); 1709 mutex_enter(&lfs_lock); 1710 break; 1711 } 1712 mutex_enter(&lfs_lock); 1713 } 1714 mutex_exit(&lfs_lock); 1715 (void) lfs_writeseg(fs, sp); 1716 lfs_segunlock(fs); 1717 1718 return 0; 1719 } 1720 1721 /* 1722 * Conversion for compat. 1723 */ 1724 static void 1725 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70) 1726 { 1727 bi->bi_inode = bi70->bi_inode; 1728 bi->bi_lbn = bi70->bi_lbn; 1729 bi->bi_daddr = bi70->bi_daddr; 1730 bi->bi_segcreate = bi70->bi_segcreate; 1731 bi->bi_version = bi70->bi_version; 1732 bi->bi_bp = bi70->bi_bp; 1733 bi->bi_size = bi70->bi_size; 1734 } 1735 1736 static void 1737 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi) 1738 { 1739 bi70->bi_inode = bi->bi_inode; 1740 bi70->bi_lbn = bi->bi_lbn; 1741 bi70->bi_daddr = bi->bi_daddr; 1742 bi70->bi_segcreate = bi->bi_segcreate; 1743 bi70->bi_version = bi->bi_version; 1744 bi70->bi_bp = bi->bi_bp; 1745 bi70->bi_size = bi->bi_size; 1746 } 1747 1748 /* 1749 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1750 */ 1751 int 1752 lfs_fcntl(void *v) 1753 { 1754 struct vop_fcntl_args /* { 1755 struct vnode *a_vp; 1756 u_int a_command; 1757 void * a_data; 1758 int a_fflag; 1759 kauth_cred_t a_cred; 1760 } */ *ap = v; 1761 struct timeval tv; 1762 struct timeval *tvp; 1763 BLOCK_INFO *blkiov; 1764 BLOCK_INFO_70 *blkiov70; 1765 CLEANERINFO *cip; 1766 SEGUSE *sup; 1767 int blkcnt, i, error; 1768 size_t fh_size; 1769 struct lfs_fcntl_markv blkvp; 1770 struct lfs_fcntl_markv_70 blkvp70; 1771 struct lwp *l; 1772 fsid_t *fsidp; 1773 struct lfs *fs; 1774 struct buf *bp; 1775 fhandle_t *fhp; 1776 daddr_t off; 1777 int oclean; 1778 1779 /* Only respect LFS fcntls on fs root or Ifile */ 1780 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO && 1781 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1782 return ulfs_fcntl(v); 1783 } 1784 1785 /* Avoid locking a draining lock */ 1786 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1787 return ESHUTDOWN; 1788 } 1789 1790 /* LFS control and monitoring fcntls are available only to root */ 1791 l = curlwp; 1792 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1793 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, 1794 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0) 1795 return (error); 1796 1797 fs = VTOI(ap->a_vp)->i_lfs; 1798 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1799 1800 error = 0; 1801 switch ((int)ap->a_command) { 1802 case LFCNSEGWAITALL_COMPAT_50: 1803 case LFCNSEGWAITALL_COMPAT: 1804 fsidp = NULL; 1805 /* FALLTHROUGH */ 1806 case LFCNSEGWAIT_COMPAT_50: 1807 case LFCNSEGWAIT_COMPAT: 1808 { 1809 struct timeval50 *tvp50 1810 = (struct timeval50 *)ap->a_data; 1811 timeval50_to_timeval(tvp50, &tv); 1812 tvp = &tv; 1813 } 1814 goto segwait_common; 1815 case LFCNSEGWAITALL: 1816 fsidp = NULL; 1817 /* FALLTHROUGH */ 1818 case LFCNSEGWAIT: 1819 tvp = (struct timeval *)ap->a_data; 1820 segwait_common: 1821 mutex_enter(&lfs_lock); 1822 ++fs->lfs_sleepers; 1823 mutex_exit(&lfs_lock); 1824 1825 error = lfs_segwait(fsidp, tvp); 1826 1827 mutex_enter(&lfs_lock); 1828 if (--fs->lfs_sleepers == 0) 1829 wakeup(&fs->lfs_sleepers); 1830 mutex_exit(&lfs_lock); 1831 return error; 1832 1833 case LFCNBMAPV_COMPAT_70: 1834 case LFCNMARKV_COMPAT_70: 1835 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data; 1836 1837 blkcnt = blkvp70.blkcnt; 1838 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1839 return (EINVAL); 1840 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1841 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV); 1842 for (i = 0; i < blkcnt; i++) { 1843 error = copyin(&blkvp70.blkiov[i], blkiov70, 1844 sizeof(*blkiov70)); 1845 if (error) { 1846 lfs_free(fs, blkiov70, LFS_NB_BLKIOV); 1847 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1848 return error; 1849 } 1850 block_info_from_70(&blkiov[i], blkiov70); 1851 } 1852 1853 mutex_enter(&lfs_lock); 1854 ++fs->lfs_sleepers; 1855 mutex_exit(&lfs_lock); 1856 if (ap->a_command == LFCNBMAPV) 1857 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 1858 else /* LFCNMARKV */ 1859 error = lfs_markv(l, fsidp, blkiov, blkcnt); 1860 if (error == 0) { 1861 for (i = 0; i < blkcnt; i++) { 1862 block_info_to_70(blkiov70, &blkiov[i]); 1863 error = copyout(blkiov70, &blkvp70.blkiov[i], 1864 sizeof(*blkiov70)); 1865 if (error) { 1866 break; 1867 } 1868 } 1869 } 1870 mutex_enter(&lfs_lock); 1871 if (--fs->lfs_sleepers == 0) 1872 wakeup(&fs->lfs_sleepers); 1873 mutex_exit(&lfs_lock); 1874 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1875 return error; 1876 1877 case LFCNBMAPV: 1878 case LFCNMARKV: 1879 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1880 1881 blkcnt = blkvp.blkcnt; 1882 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1883 return (EINVAL); 1884 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1885 if ((error = copyin(blkvp.blkiov, blkiov, 1886 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1887 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1888 return error; 1889 } 1890 1891 mutex_enter(&lfs_lock); 1892 ++fs->lfs_sleepers; 1893 mutex_exit(&lfs_lock); 1894 if (ap->a_command == LFCNBMAPV) 1895 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 1896 else /* LFCNMARKV */ 1897 error = lfs_markv(l, fsidp, blkiov, blkcnt); 1898 if (error == 0) 1899 error = copyout(blkiov, blkvp.blkiov, 1900 blkcnt * sizeof(BLOCK_INFO)); 1901 mutex_enter(&lfs_lock); 1902 if (--fs->lfs_sleepers == 0) 1903 wakeup(&fs->lfs_sleepers); 1904 mutex_exit(&lfs_lock); 1905 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1906 return error; 1907 1908 case LFCNRECLAIM: 1909 /* 1910 * Flush dirops and write Ifile, allowing empty segments 1911 * to be immediately reclaimed. 1912 */ 1913 lfs_writer_enter(fs, "pndirop"); 1914 off = lfs_sb_getoffset(fs); 1915 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 1916 lfs_flush_dirops(fs); 1917 LFS_CLEANERINFO(cip, fs, bp); 1918 oclean = lfs_ci_getclean(fs, cip); 1919 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 1920 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 1921 fs->lfs_sp->seg_flags |= SEGM_PROT; 1922 lfs_segunlock(fs); 1923 lfs_writer_leave(fs); 1924 1925 #ifdef DEBUG 1926 LFS_CLEANERINFO(cip, fs, bp); 1927 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 1928 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 1929 lfs_sb_getoffset(fs) - off, 1930 lfs_ci_getclean(fs, cip) - oclean, 1931 fs->lfs_activesb)); 1932 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 1933 #else 1934 __USE(oclean); 1935 __USE(off); 1936 #endif 1937 1938 return 0; 1939 1940 case LFCNIFILEFH_COMPAT: 1941 /* Return the filehandle of the Ifile */ 1942 if ((error = kauth_authorize_system(l->l_cred, 1943 KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)) != 0) 1944 return (error); 1945 fhp = (struct fhandle *)ap->a_data; 1946 fhp->fh_fsid = *fsidp; 1947 fh_size = 16; /* former VFS_MAXFIDSIZ */ 1948 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1949 1950 case LFCNIFILEFH_COMPAT2: 1951 case LFCNIFILEFH: 1952 /* Return the filehandle of the Ifile */ 1953 fhp = (struct fhandle *)ap->a_data; 1954 fhp->fh_fsid = *fsidp; 1955 fh_size = sizeof(struct lfs_fhandle) - 1956 offsetof(fhandle_t, fh_fid); 1957 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 1958 1959 case LFCNREWIND: 1960 /* Move lfs_offset to the lowest-numbered segment */ 1961 return lfs_rewind(fs, *(int *)ap->a_data); 1962 1963 case LFCNINVAL: 1964 /* Mark a segment SEGUSE_INVAL */ 1965 LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); 1966 if (sup->su_nbytes > 0) { 1967 brelse(bp, 0); 1968 lfs_unset_inval_all(fs); 1969 return EBUSY; 1970 } 1971 sup->su_flags |= SEGUSE_INVAL; 1972 VOP_BWRITE(bp->b_vp, bp); 1973 return 0; 1974 1975 case LFCNRESIZE: 1976 /* Resize the filesystem */ 1977 return lfs_resize_fs(fs, *(int *)ap->a_data); 1978 1979 case LFCNWRAPSTOP: 1980 case LFCNWRAPSTOP_COMPAT: 1981 /* 1982 * Hold lfs_newseg at segment 0; if requested, sleep until 1983 * the filesystem wraps around. To support external agents 1984 * (dump, fsck-based regression test) that need to look at 1985 * a snapshot of the filesystem, without necessarily 1986 * requiring that all fs activity stops. 1987 */ 1988 if (fs->lfs_stoplwp == curlwp) 1989 return EALREADY; 1990 1991 mutex_enter(&lfs_lock); 1992 while (fs->lfs_stoplwp != NULL) 1993 cv_wait(&fs->lfs_stopcv, &lfs_lock); 1994 fs->lfs_stoplwp = curlwp; 1995 if (fs->lfs_nowrap == 0) 1996 log(LOG_NOTICE, "%s: disabled log wrap\n", 1997 lfs_sb_getfsmnt(fs)); 1998 ++fs->lfs_nowrap; 1999 if (*(int *)ap->a_data == 1 2000 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 2001 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 2002 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2003 "segwrap", 0, &lfs_lock); 2004 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 2005 if (error) { 2006 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 2007 } 2008 } 2009 mutex_exit(&lfs_lock); 2010 return 0; 2011 2012 case LFCNWRAPGO: 2013 case LFCNWRAPGO_COMPAT: 2014 /* 2015 * Having done its work, the agent wakes up the writer. 2016 * If the argument is 1, it sleeps until a new segment 2017 * is selected. 2018 */ 2019 mutex_enter(&lfs_lock); 2020 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 2021 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 2022 *((int *)ap->a_data)); 2023 mutex_exit(&lfs_lock); 2024 return error; 2025 2026 case LFCNWRAPPASS: 2027 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 2028 return EALREADY; 2029 mutex_enter(&lfs_lock); 2030 if (fs->lfs_stoplwp != curlwp) { 2031 mutex_exit(&lfs_lock); 2032 return EALREADY; 2033 } 2034 if (fs->lfs_nowrap == 0) { 2035 mutex_exit(&lfs_lock); 2036 return EBUSY; 2037 } 2038 fs->lfs_wrappass = 1; 2039 wakeup(&fs->lfs_wrappass); 2040 /* Wait for the log to wrap, if asked */ 2041 if (*(int *)ap->a_data) { 2042 vref(ap->a_vp); 2043 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 2044 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 2045 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2046 "segwrap", 0, &lfs_lock); 2047 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 2048 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 2049 vrele(ap->a_vp); 2050 } 2051 mutex_exit(&lfs_lock); 2052 return error; 2053 2054 case LFCNWRAPSTATUS: 2055 mutex_enter(&lfs_lock); 2056 *(int *)ap->a_data = fs->lfs_wrapstatus; 2057 mutex_exit(&lfs_lock); 2058 return 0; 2059 2060 default: 2061 return ulfs_fcntl(v); 2062 } 2063 return 0; 2064 } 2065 2066 /* 2067 * Return the last logical file offset that should be written for this file 2068 * if we're doing a write that ends at "size". If writing, we need to know 2069 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2070 * to know about entire blocks. 2071 */ 2072 void 2073 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2074 { 2075 struct inode *ip = VTOI(vp); 2076 struct lfs *fs = ip->i_lfs; 2077 daddr_t olbn, nlbn; 2078 2079 olbn = lfs_lblkno(fs, ip->i_size); 2080 nlbn = lfs_lblkno(fs, size); 2081 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) { 2082 *eobp = lfs_fragroundup(fs, size); 2083 } else { 2084 *eobp = lfs_blkroundup(fs, size); 2085 } 2086 } 2087 2088 #ifdef DEBUG 2089 void lfs_dump_vop(void *); 2090 2091 void 2092 lfs_dump_vop(void *v) 2093 { 2094 struct vop_putpages_args /* { 2095 struct vnode *a_vp; 2096 voff_t a_offlo; 2097 voff_t a_offhi; 2098 int a_flags; 2099 } */ *ap = v; 2100 2101 struct inode *ip = VTOI(ap->a_vp); 2102 struct lfs *fs = ip->i_lfs; 2103 2104 #ifdef DDB 2105 vfs_vnode_print(ap->a_vp, 0, printf); 2106 #endif 2107 lfs_dump_dinode(fs, ip->i_din); 2108 } 2109 #endif 2110 2111 int 2112 lfs_mmap(void *v) 2113 { 2114 struct vop_mmap_args /* { 2115 const struct vnodeop_desc *a_desc; 2116 struct vnode *a_vp; 2117 vm_prot_t a_prot; 2118 kauth_cred_t a_cred; 2119 } */ *ap = v; 2120 2121 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2122 return EOPNOTSUPP; 2123 return ulfs_mmap(v); 2124 } 2125 2126 static int 2127 lfs_openextattr(void *v) 2128 { 2129 struct vop_openextattr_args /* { 2130 struct vnode *a_vp; 2131 kauth_cred_t a_cred; 2132 struct proc *a_p; 2133 } */ *ap = v; 2134 struct inode *ip = VTOI(ap->a_vp); 2135 struct ulfsmount *ump = ip->i_ump; 2136 //struct lfs *fs = ip->i_lfs; 2137 2138 /* Not supported for ULFS1 file systems. */ 2139 if (ump->um_fstype == ULFS1) 2140 return (EOPNOTSUPP); 2141 2142 /* XXX Not implemented for ULFS2 file systems. */ 2143 return (EOPNOTSUPP); 2144 } 2145 2146 static int 2147 lfs_closeextattr(void *v) 2148 { 2149 struct vop_closeextattr_args /* { 2150 struct vnode *a_vp; 2151 int a_commit; 2152 kauth_cred_t a_cred; 2153 struct proc *a_p; 2154 } */ *ap = v; 2155 struct inode *ip = VTOI(ap->a_vp); 2156 struct ulfsmount *ump = ip->i_ump; 2157 //struct lfs *fs = ip->i_lfs; 2158 2159 /* Not supported for ULFS1 file systems. */ 2160 if (ump->um_fstype == ULFS1) 2161 return (EOPNOTSUPP); 2162 2163 /* XXX Not implemented for ULFS2 file systems. */ 2164 return (EOPNOTSUPP); 2165 } 2166 2167 static int 2168 lfs_getextattr(void *v) 2169 { 2170 struct vop_getextattr_args /* { 2171 struct vnode *a_vp; 2172 int a_attrnamespace; 2173 const char *a_name; 2174 struct uio *a_uio; 2175 size_t *a_size; 2176 kauth_cred_t a_cred; 2177 struct proc *a_p; 2178 } */ *ap = v; 2179 struct vnode *vp = ap->a_vp; 2180 struct inode *ip = VTOI(vp); 2181 struct ulfsmount *ump = ip->i_ump; 2182 //struct lfs *fs = ip->i_lfs; 2183 int error; 2184 2185 if (ump->um_fstype == ULFS1) { 2186 #ifdef LFS_EXTATTR 2187 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2188 error = ulfs_getextattr(ap); 2189 fstrans_done(vp->v_mount); 2190 #else 2191 error = EOPNOTSUPP; 2192 #endif 2193 return error; 2194 } 2195 2196 /* XXX Not implemented for ULFS2 file systems. */ 2197 return (EOPNOTSUPP); 2198 } 2199 2200 static int 2201 lfs_setextattr(void *v) 2202 { 2203 struct vop_setextattr_args /* { 2204 struct vnode *a_vp; 2205 int a_attrnamespace; 2206 const char *a_name; 2207 struct uio *a_uio; 2208 kauth_cred_t a_cred; 2209 struct proc *a_p; 2210 } */ *ap = v; 2211 struct vnode *vp = ap->a_vp; 2212 struct inode *ip = VTOI(vp); 2213 struct ulfsmount *ump = ip->i_ump; 2214 //struct lfs *fs = ip->i_lfs; 2215 int error; 2216 2217 if (ump->um_fstype == ULFS1) { 2218 #ifdef LFS_EXTATTR 2219 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2220 error = ulfs_setextattr(ap); 2221 fstrans_done(vp->v_mount); 2222 #else 2223 error = EOPNOTSUPP; 2224 #endif 2225 return error; 2226 } 2227 2228 /* XXX Not implemented for ULFS2 file systems. */ 2229 return (EOPNOTSUPP); 2230 } 2231 2232 static int 2233 lfs_listextattr(void *v) 2234 { 2235 struct vop_listextattr_args /* { 2236 struct vnode *a_vp; 2237 int a_attrnamespace; 2238 struct uio *a_uio; 2239 size_t *a_size; 2240 kauth_cred_t a_cred; 2241 struct proc *a_p; 2242 } */ *ap = v; 2243 struct vnode *vp = ap->a_vp; 2244 struct inode *ip = VTOI(vp); 2245 struct ulfsmount *ump = ip->i_ump; 2246 //struct lfs *fs = ip->i_lfs; 2247 int error; 2248 2249 if (ump->um_fstype == ULFS1) { 2250 #ifdef LFS_EXTATTR 2251 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2252 error = ulfs_listextattr(ap); 2253 fstrans_done(vp->v_mount); 2254 #else 2255 error = EOPNOTSUPP; 2256 #endif 2257 return error; 2258 } 2259 2260 /* XXX Not implemented for ULFS2 file systems. */ 2261 return (EOPNOTSUPP); 2262 } 2263 2264 static int 2265 lfs_deleteextattr(void *v) 2266 { 2267 struct vop_deleteextattr_args /* { 2268 struct vnode *a_vp; 2269 int a_attrnamespace; 2270 kauth_cred_t a_cred; 2271 struct proc *a_p; 2272 } */ *ap = v; 2273 struct vnode *vp = ap->a_vp; 2274 struct inode *ip = VTOI(vp); 2275 struct ulfsmount *ump = ip->i_ump; 2276 //struct fs *fs = ip->i_lfs; 2277 int error; 2278 2279 if (ump->um_fstype == ULFS1) { 2280 #ifdef LFS_EXTATTR 2281 fstrans_start(vp->v_mount, FSTRANS_SHARED); 2282 error = ulfs_deleteextattr(ap); 2283 fstrans_done(vp->v_mount); 2284 #else 2285 error = EOPNOTSUPP; 2286 #endif 2287 return error; 2288 } 2289 2290 /* XXX Not implemented for ULFS2 file systems. */ 2291 return (EOPNOTSUPP); 2292 } 2293