1 /* $NetBSD: ffs_vnops.c,v 1.48 2001/12/31 21:37:22 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.48 2001/12/31 21:37:22 thorpej Exp $"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/resourcevar.h> 44 #include <sys/kernel.h> 45 #include <sys/file.h> 46 #include <sys/stat.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/vnode.h> 51 #include <sys/pool.h> 52 #include <sys/signalvar.h> 53 54 #include <miscfs/fifofs/fifo.h> 55 #include <miscfs/genfs/genfs.h> 56 #include <miscfs/specfs/specdev.h> 57 58 #include <ufs/ufs/inode.h> 59 #include <ufs/ufs/dir.h> 60 #include <ufs/ufs/ufs_extern.h> 61 #include <ufs/ufs/ufsmount.h> 62 63 #include <ufs/ffs/fs.h> 64 #include <ufs/ffs/ffs_extern.h> 65 66 static int ffs_full_fsync __P((void *)); 67 68 /* Global vfs data structures for ufs. */ 69 int (**ffs_vnodeop_p) __P((void *)); 70 const struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 71 { &vop_default_desc, vn_default_error }, 72 { &vop_lookup_desc, ufs_lookup }, /* lookup */ 73 { &vop_create_desc, ufs_create }, /* create */ 74 { &vop_whiteout_desc, ufs_whiteout }, /* whiteout */ 75 { &vop_mknod_desc, ufs_mknod }, /* mknod */ 76 { &vop_open_desc, ufs_open }, /* open */ 77 { &vop_close_desc, ufs_close }, /* close */ 78 { &vop_access_desc, ufs_access }, /* access */ 79 { &vop_getattr_desc, ufs_getattr }, /* getattr */ 80 { &vop_setattr_desc, ufs_setattr }, /* setattr */ 81 { &vop_read_desc, ffs_read }, /* read */ 82 { &vop_write_desc, ffs_write }, /* write */ 83 { &vop_lease_desc, ufs_lease_check }, /* lease */ 84 { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ 85 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 86 { &vop_poll_desc, ufs_poll }, /* poll */ 87 { &vop_revoke_desc, ufs_revoke }, /* revoke */ 88 { &vop_mmap_desc, ufs_mmap }, /* mmap */ 89 { &vop_fsync_desc, ffs_fsync }, /* fsync */ 90 { &vop_seek_desc, ufs_seek }, /* seek */ 91 { &vop_remove_desc, ufs_remove }, /* remove */ 92 { &vop_link_desc, ufs_link }, /* link */ 93 { &vop_rename_desc, ufs_rename }, /* rename */ 94 { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */ 95 { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */ 96 { &vop_symlink_desc, ufs_symlink }, /* symlink */ 97 { &vop_readdir_desc, ufs_readdir }, /* readdir */ 98 { &vop_readlink_desc, ufs_readlink }, /* readlink */ 99 { &vop_abortop_desc, ufs_abortop }, /* abortop */ 100 { &vop_inactive_desc, ufs_inactive }, /* inactive */ 101 { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ 102 { &vop_lock_desc, ufs_lock }, /* lock */ 103 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 104 { &vop_bmap_desc, ufs_bmap }, /* bmap */ 105 { &vop_strategy_desc, ufs_strategy }, /* strategy */ 106 { &vop_print_desc, ufs_print }, /* print */ 107 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 108 { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ 109 { &vop_advlock_desc, ufs_advlock }, /* advlock */ 110 { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */ 111 { &vop_valloc_desc, ffs_valloc }, /* valloc */ 112 { &vop_balloc_desc, ffs_balloc }, /* balloc */ 113 { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ 114 { &vop_vfree_desc, ffs_vfree }, /* vfree */ 115 { &vop_truncate_desc, ffs_truncate }, /* truncate */ 116 { &vop_update_desc, ffs_update }, /* update */ 117 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 118 { &vop_getpages_desc, ffs_getpages }, /* getpages */ 119 { &vop_putpages_desc, genfs_putpages }, /* putpages */ 120 { NULL, NULL } 121 }; 122 const struct vnodeopv_desc ffs_vnodeop_opv_desc = 123 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 124 125 int (**ffs_specop_p) __P((void *)); 126 const struct vnodeopv_entry_desc ffs_specop_entries[] = { 127 { &vop_default_desc, vn_default_error }, 128 { &vop_lookup_desc, spec_lookup }, /* lookup */ 129 { &vop_create_desc, spec_create }, /* create */ 130 { &vop_mknod_desc, spec_mknod }, /* mknod */ 131 { &vop_open_desc, spec_open }, /* open */ 132 { &vop_close_desc, ufsspec_close }, /* close */ 133 { &vop_access_desc, ufs_access }, /* access */ 134 { &vop_getattr_desc, ufs_getattr }, /* getattr */ 135 { &vop_setattr_desc, ufs_setattr }, /* setattr */ 136 { &vop_read_desc, ufsspec_read }, /* read */ 137 { &vop_write_desc, ufsspec_write }, /* write */ 138 { &vop_lease_desc, spec_lease_check }, /* lease */ 139 { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ 140 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 141 { &vop_poll_desc, spec_poll }, /* poll */ 142 { &vop_revoke_desc, spec_revoke }, /* revoke */ 143 { &vop_mmap_desc, spec_mmap }, /* mmap */ 144 { &vop_fsync_desc, ffs_fsync }, /* fsync */ 145 { &vop_seek_desc, spec_seek }, /* seek */ 146 { &vop_remove_desc, spec_remove }, /* remove */ 147 { &vop_link_desc, spec_link }, /* link */ 148 { &vop_rename_desc, spec_rename }, /* rename */ 149 { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ 150 { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ 151 { &vop_symlink_desc, spec_symlink }, /* symlink */ 152 { &vop_readdir_desc, spec_readdir }, /* readdir */ 153 { &vop_readlink_desc, spec_readlink }, /* readlink */ 154 { &vop_abortop_desc, spec_abortop }, /* abortop */ 155 { &vop_inactive_desc, ufs_inactive }, /* inactive */ 156 { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ 157 { &vop_lock_desc, ufs_lock }, /* lock */ 158 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 159 { &vop_bmap_desc, spec_bmap }, /* bmap */ 160 { &vop_strategy_desc, spec_strategy }, /* strategy */ 161 { &vop_print_desc, ufs_print }, /* print */ 162 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 163 { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ 164 { &vop_advlock_desc, spec_advlock }, /* advlock */ 165 { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ 166 { &vop_valloc_desc, spec_valloc }, /* valloc */ 167 { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ 168 { &vop_vfree_desc, ffs_vfree }, /* vfree */ 169 { &vop_truncate_desc, spec_truncate }, /* truncate */ 170 { &vop_update_desc, ffs_update }, /* update */ 171 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 172 { &vop_getpages_desc, spec_getpages }, /* getpages */ 173 { &vop_putpages_desc, spec_putpages }, /* putpages */ 174 { NULL, NULL } 175 }; 176 const struct vnodeopv_desc ffs_specop_opv_desc = 177 { &ffs_specop_p, ffs_specop_entries }; 178 179 int (**ffs_fifoop_p) __P((void *)); 180 const struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 181 { &vop_default_desc, vn_default_error }, 182 { &vop_lookup_desc, fifo_lookup }, /* lookup */ 183 { &vop_create_desc, fifo_create }, /* create */ 184 { &vop_mknod_desc, fifo_mknod }, /* mknod */ 185 { &vop_open_desc, fifo_open }, /* open */ 186 { &vop_close_desc, ufsfifo_close }, /* close */ 187 { &vop_access_desc, ufs_access }, /* access */ 188 { &vop_getattr_desc, ufs_getattr }, /* getattr */ 189 { &vop_setattr_desc, ufs_setattr }, /* setattr */ 190 { &vop_read_desc, ufsfifo_read }, /* read */ 191 { &vop_write_desc, ufsfifo_write }, /* write */ 192 { &vop_lease_desc, fifo_lease_check }, /* lease */ 193 { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ 194 { &vop_fcntl_desc, ufs_fcntl }, /* fcntl */ 195 { &vop_poll_desc, fifo_poll }, /* poll */ 196 { &vop_revoke_desc, fifo_revoke }, /* revoke */ 197 { &vop_mmap_desc, fifo_mmap }, /* mmap */ 198 { &vop_fsync_desc, ffs_fsync }, /* fsync */ 199 { &vop_seek_desc, fifo_seek }, /* seek */ 200 { &vop_remove_desc, fifo_remove }, /* remove */ 201 { &vop_link_desc, fifo_link }, /* link */ 202 { &vop_rename_desc, fifo_rename }, /* rename */ 203 { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ 204 { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ 205 { &vop_symlink_desc, fifo_symlink }, /* symlink */ 206 { &vop_readdir_desc, fifo_readdir }, /* readdir */ 207 { &vop_readlink_desc, fifo_readlink }, /* readlink */ 208 { &vop_abortop_desc, fifo_abortop }, /* abortop */ 209 { &vop_inactive_desc, ufs_inactive }, /* inactive */ 210 { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ 211 { &vop_lock_desc, ufs_lock }, /* lock */ 212 { &vop_unlock_desc, ufs_unlock }, /* unlock */ 213 { &vop_bmap_desc, fifo_bmap }, /* bmap */ 214 { &vop_strategy_desc, fifo_strategy }, /* strategy */ 215 { &vop_print_desc, ufs_print }, /* print */ 216 { &vop_islocked_desc, ufs_islocked }, /* islocked */ 217 { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ 218 { &vop_advlock_desc, fifo_advlock }, /* advlock */ 219 { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */ 220 { &vop_valloc_desc, fifo_valloc }, /* valloc */ 221 { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ 222 { &vop_vfree_desc, ffs_vfree }, /* vfree */ 223 { &vop_truncate_desc, fifo_truncate }, /* truncate */ 224 { &vop_update_desc, ffs_update }, /* update */ 225 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 226 { &vop_putpages_desc, fifo_putpages }, /* putpages */ 227 { NULL, NULL } 228 }; 229 const struct vnodeopv_desc ffs_fifoop_opv_desc = 230 { &ffs_fifoop_p, ffs_fifoop_entries }; 231 232 int doclusterread = 1; 233 int doclusterwrite = 1; 234 235 #include <ufs/ufs/ufs_readwrite.c> 236 237 int 238 ffs_fsync(v) 239 void *v; 240 { 241 struct vop_fsync_args /* { 242 struct vnode *a_vp; 243 struct ucred *a_cred; 244 int a_flags; 245 off_t a_offlo; 246 off_t a_offhi; 247 struct proc *a_p; 248 } */ *ap = v; 249 struct buf *bp; 250 int s, num, error, i; 251 struct indir ia[NIADDR + 1]; 252 int bsize; 253 daddr_t blk_high; 254 struct vnode *vp; 255 256 /* 257 * XXX no easy way to sync a range in a file with softdep. 258 */ 259 if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(ap->a_vp)) 260 return ffs_full_fsync(v); 261 262 vp = ap->a_vp; 263 264 bsize = ap->a_vp->v_mount->mnt_stat.f_iosize; 265 blk_high = ap->a_offhi / bsize; 266 if (ap->a_offhi % bsize != 0) 267 blk_high++; 268 269 /* 270 * First, flush all pages in range. 271 */ 272 273 if (vp->v_type == VREG) { 274 simple_lock(&vp->v_interlock); 275 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 276 round_page(ap->a_offhi), PGO_CLEANIT|PGO_SYNCIO); 277 if (error) { 278 return error; 279 } 280 } 281 282 /* 283 * Then, flush indirect blocks. 284 */ 285 286 s = splbio(); 287 if (!(ap->a_flags & FSYNC_DATAONLY) && blk_high >= NDADDR) { 288 error = ufs_getlbns(vp, blk_high, ia, &num); 289 if (error) { 290 splx(s); 291 return error; 292 } 293 for (i = 0; i < num; i++) { 294 bp = incore(vp, ia[i].in_lbn); 295 if (bp != NULL && !(bp->b_flags & B_BUSY) && 296 (bp->b_flags & B_DELWRI)) { 297 bp->b_flags |= B_BUSY | B_VFLUSH; 298 splx(s); 299 bawrite(bp); 300 s = splbio(); 301 } 302 } 303 } 304 305 if (ap->a_flags & FSYNC_WAIT) { 306 while (vp->v_numoutput > 0) { 307 vp->v_flag |= VBWAIT; 308 tsleep(&vp->v_numoutput, PRIBIO + 1, "fsync_range", 0); 309 } 310 } 311 splx(s); 312 313 return (VOP_UPDATE(vp, NULL, NULL, 314 (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0)); 315 } 316 317 /* 318 * Synch an open file. 319 */ 320 /* ARGSUSED */ 321 static int 322 ffs_full_fsync(v) 323 void *v; 324 { 325 struct vop_fsync_args /* { 326 struct vnode *a_vp; 327 struct ucred *a_cred; 328 int a_flags; 329 off_t a_offlo; 330 off_t a_offhi; 331 struct proc *a_p; 332 } */ *ap = v; 333 struct vnode *vp = ap->a_vp; 334 struct buf *bp, *nbp; 335 int s, error, passes, skipmeta, inodedeps_only, waitfor; 336 337 if (vp->v_type == VBLK && 338 vp->v_specmountpoint != NULL && 339 (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) 340 softdep_fsync_mountdev(vp); 341 342 inodedeps_only = DOINGSOFTDEP(vp) && (ap->a_flags & FSYNC_RECLAIM) 343 && vp->v_uobj.uo_npages == 0 && LIST_EMPTY(&vp->v_dirtyblkhd); 344 345 /* 346 * Flush all dirty data associated with a vnode. 347 */ 348 349 if (vp->v_type == VREG) { 350 simple_lock(&vp->v_interlock); 351 error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | 352 ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); 353 if (error) { 354 return error; 355 } 356 } 357 358 passes = NIADDR + 1; 359 skipmeta = 0; 360 if (ap->a_flags & (FSYNC_DATAONLY|FSYNC_WAIT)) 361 skipmeta = 1; 362 s = splbio(); 363 364 loop: 365 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) 366 bp->b_flags &= ~B_SCANNED; 367 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 368 nbp = LIST_NEXT(bp, b_vnbufs); 369 if (bp->b_flags & (B_BUSY | B_SCANNED)) 370 continue; 371 if ((bp->b_flags & B_DELWRI) == 0) 372 panic("ffs_fsync: not dirty"); 373 if (skipmeta && bp->b_lblkno < 0) 374 continue; 375 bp->b_flags |= B_BUSY | B_VFLUSH | B_SCANNED; 376 splx(s); 377 /* 378 * On our final pass through, do all I/O synchronously 379 * so that we can find out if our flush is failing 380 * because of write errors. 381 */ 382 if (passes > 0 || !(ap->a_flags & FSYNC_WAIT)) 383 (void) bawrite(bp); 384 else if ((error = bwrite(bp)) != 0) 385 return (error); 386 s = splbio(); 387 /* 388 * Since we may have slept during the I/O, we need 389 * to start from a known point. 390 */ 391 nbp = LIST_FIRST(&vp->v_dirtyblkhd); 392 } 393 if (skipmeta && !(ap->a_flags & FSYNC_DATAONLY)) { 394 skipmeta = 0; 395 goto loop; 396 } 397 if (ap->a_flags & FSYNC_WAIT) { 398 while (vp->v_numoutput) { 399 vp->v_flag |= VBWAIT; 400 (void) tsleep(&vp->v_numoutput, PRIBIO + 1, 401 "ffsfsync", 0); 402 } 403 splx(s); 404 405 if (ap->a_flags & FSYNC_DATAONLY) 406 return (0); 407 408 /* 409 * Ensure that any filesystem metadata associated 410 * with the vnode has been written. 411 */ 412 if ((error = softdep_sync_metadata(ap)) != 0) 413 return (error); 414 415 s = splbio(); 416 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 417 /* 418 * Block devices associated with filesystems may 419 * have new I/O requests posted for them even if 420 * the vnode is locked, so no amount of trying will 421 * get them clean. Thus we give block devices a 422 * good effort, then just give up. For all other file 423 * types, go around and try again until it is clean. 424 */ 425 if (passes > 0) { 426 passes--; 427 goto loop; 428 } 429 #ifdef DIAGNOSTIC 430 if (vp->v_type != VBLK) 431 vprint("ffs_fsync: dirty", vp); 432 #endif 433 } 434 } 435 splx(s); 436 437 if (inodedeps_only) 438 waitfor = 0; 439 else 440 waitfor = (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0; 441 return (VOP_UPDATE(vp, NULL, NULL, waitfor)); 442 } 443 444 /* 445 * Reclaim an inode so that it can be used for other purposes. 446 */ 447 int 448 ffs_reclaim(v) 449 void *v; 450 { 451 struct vop_reclaim_args /* { 452 struct vnode *a_vp; 453 struct proc *a_p; 454 } */ *ap = v; 455 struct vnode *vp = ap->a_vp; 456 int error; 457 458 if ((error = ufs_reclaim(vp, ap->a_p)) != 0) 459 return (error); 460 /* 461 * XXX MFS ends up here, too, to free an inode. Should we create 462 * XXX a separate pool for MFS inodes? 463 */ 464 pool_put(&ffs_inode_pool, vp->v_data); 465 vp->v_data = NULL; 466 return (0); 467 } 468 469 int 470 ffs_getpages(void *v) 471 { 472 struct vop_getpages_args /* { 473 struct vnode *a_vp; 474 voff_t a_offset; 475 struct vm_page **a_m; 476 int *a_count; 477 int a_centeridx; 478 vm_prot_t a_access_type; 479 int a_advice; 480 int a_flags; 481 } */ *ap = v; 482 struct vnode *vp = ap->a_vp; 483 struct inode *ip = VTOI(vp); 484 struct fs *fs = ip->i_fs; 485 486 /* 487 * don't allow a softdep write to create pages for only part of a block. 488 * the dependency tracking requires that all pages be in memory for 489 * a block involved in a dependency. 490 */ 491 492 if (ap->a_flags & PGO_OVERWRITE && 493 (blkoff(fs, ap->a_offset) != 0 || 494 blkoff(fs, *ap->a_count << PAGE_SHIFT) != 0) && 495 DOINGSOFTDEP(ap->a_vp)) { 496 if ((ap->a_flags & PGO_LOCKED) == 0) { 497 simple_unlock(&vp->v_interlock); 498 } 499 return EINVAL; 500 } 501 return genfs_getpages(v); 502 } 503 504 /* 505 * Return the last logical file offset that should be written for this file 506 * if we're doing a write that ends at "size". 507 */ 508 509 void 510 ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp) 511 { 512 struct inode *ip = VTOI(vp); 513 struct fs *fs = ip->i_fs; 514 ufs_lbn_t olbn, nlbn; 515 516 olbn = lblkno(fs, ip->i_ffs_size); 517 nlbn = lblkno(fs, size); 518 if (nlbn < NDADDR && olbn <= nlbn) { 519 *eobp = fragroundup(fs, size); 520 } else { 521 *eobp = blkroundup(fs, size); 522 } 523 } 524