1 /* $NetBSD: vfs_vnops.c,v 1.56 2002/10/14 04:18:57 gmcgarry Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.56 2002/10/14 04:18:57 gmcgarry Exp $"); 45 46 #include "fs_union.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/file.h> 52 #include <sys/stat.h> 53 #include <sys/buf.h> 54 #include <sys/proc.h> 55 #include <sys/mount.h> 56 #include <sys/namei.h> 57 #include <sys/vnode.h> 58 #include <sys/ioctl.h> 59 #include <sys/tty.h> 60 #include <sys/poll.h> 61 62 #include <uvm/uvm_extern.h> 63 64 #ifdef UNION 65 #include <miscfs/union/union.h> 66 #endif 67 68 static int vn_read(struct file *fp, off_t *offset, struct uio *uio, 69 struct ucred *cred, int flags); 70 static int vn_write(struct file *fp, off_t *offset, struct uio *uio, 71 struct ucred *cred, int flags); 72 static int vn_closefile(struct file *fp, struct proc *p); 73 static int vn_poll(struct file *fp, int events, struct proc *p); 74 static int vn_fcntl(struct file *fp, u_int com, caddr_t data, struct proc *p); 75 static int vn_statfile(struct file *fp, struct stat *sb, struct proc *p); 76 static int vn_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p); 77 78 struct fileops vnops = { 79 vn_read, vn_write, vn_ioctl, vn_fcntl, vn_poll, 80 vn_statfile, vn_closefile 81 }; 82 83 /* 84 * Common code for vnode open operations. 85 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 86 */ 87 int 88 vn_open(ndp, fmode, cmode) 89 struct nameidata *ndp; 90 int fmode, cmode; 91 { 92 struct vnode *vp; 93 struct proc *p = ndp->ni_cnd.cn_proc; 94 struct ucred *cred = p->p_ucred; 95 struct vattr va; 96 int error; 97 98 if (fmode & O_CREAT) { 99 ndp->ni_cnd.cn_nameiop = CREATE; 100 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 101 if ((fmode & O_EXCL) == 0 && 102 ((fmode & FNOSYMLINK) == 0)) 103 ndp->ni_cnd.cn_flags |= FOLLOW; 104 if ((error = namei(ndp)) != 0) 105 return (error); 106 if (ndp->ni_vp == NULL) { 107 VATTR_NULL(&va); 108 va.va_type = VREG; 109 va.va_mode = cmode; 110 if (fmode & O_EXCL) 111 va.va_vaflags |= VA_EXCLUSIVE; 112 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 113 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 114 &ndp->ni_cnd, &va); 115 if (error) 116 return (error); 117 fmode &= ~O_TRUNC; 118 vp = ndp->ni_vp; 119 } else { 120 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 121 if (ndp->ni_dvp == ndp->ni_vp) 122 vrele(ndp->ni_dvp); 123 else 124 vput(ndp->ni_dvp); 125 ndp->ni_dvp = NULL; 126 vp = ndp->ni_vp; 127 if (fmode & O_EXCL) { 128 error = EEXIST; 129 goto bad; 130 } 131 if (ndp->ni_vp->v_type == VLNK) { 132 error = EFTYPE; 133 goto bad; 134 } 135 fmode &= ~O_CREAT; 136 } 137 } else { 138 ndp->ni_cnd.cn_nameiop = LOOKUP; 139 ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF; 140 if ((error = namei(ndp)) != 0) 141 return (error); 142 vp = ndp->ni_vp; 143 } 144 if (vp->v_type == VSOCK) { 145 error = EOPNOTSUPP; 146 goto bad; 147 } 148 if ((fmode & O_CREAT) == 0) { 149 if (fmode & FREAD) { 150 if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0) 151 goto bad; 152 } 153 if (fmode & (FWRITE | O_TRUNC)) { 154 if (vp->v_type == VDIR) { 155 error = EISDIR; 156 goto bad; 157 } 158 if ((error = vn_writechk(vp)) != 0 || 159 (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0) 160 goto bad; 161 } 162 } 163 if (fmode & O_TRUNC) { 164 VOP_UNLOCK(vp, 0); /* XXX */ 165 VOP_LEASE(vp, p, cred, LEASE_WRITE); 166 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 167 VATTR_NULL(&va); 168 va.va_size = 0; 169 if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0) 170 goto bad; 171 } 172 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 173 goto bad; 174 if (vp->v_type == VREG && 175 uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) { 176 error = EIO; 177 goto bad; 178 } 179 if (fmode & FWRITE) 180 vp->v_writecount++; 181 182 return (0); 183 bad: 184 vput(vp); 185 return (error); 186 } 187 188 /* 189 * Check for write permissions on the specified vnode. 190 * Prototype text segments cannot be written. 191 */ 192 int 193 vn_writechk(vp) 194 struct vnode *vp; 195 { 196 197 /* 198 * If the vnode is in use as a process's text, 199 * we can't allow writing. 200 */ 201 if (vp->v_flag & VTEXT) 202 return (ETXTBSY); 203 return (0); 204 } 205 206 /* 207 * Mark a vnode as having executable mappings. 208 */ 209 void 210 vn_markexec(vp) 211 struct vnode *vp; 212 { 213 if ((vp->v_flag & VEXECMAP) == 0) { 214 uvmexp.filepages -= vp->v_uobj.uo_npages; 215 uvmexp.execpages += vp->v_uobj.uo_npages; 216 } 217 vp->v_flag |= VEXECMAP; 218 } 219 220 /* 221 * Mark a vnode as being the text of a process. 222 * Fail if the vnode is currently writable. 223 */ 224 int 225 vn_marktext(vp) 226 struct vnode *vp; 227 { 228 229 if (vp->v_writecount != 0) { 230 KASSERT((vp->v_flag & VTEXT) == 0); 231 return (ETXTBSY); 232 } 233 vp->v_flag |= VTEXT; 234 vn_markexec(vp); 235 return (0); 236 } 237 238 /* 239 * Vnode close call 240 * 241 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node. 242 */ 243 int 244 vn_close(vp, flags, cred, p) 245 struct vnode *vp; 246 int flags; 247 struct ucred *cred; 248 struct proc *p; 249 { 250 int error; 251 252 if (flags & FWRITE) 253 vp->v_writecount--; 254 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 255 error = VOP_CLOSE(vp, flags, cred, p); 256 vput(vp); 257 return (error); 258 } 259 260 /* 261 * Package up an I/O request on a vnode into a uio and do it. 262 */ 263 int 264 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 265 enum uio_rw rw; 266 struct vnode *vp; 267 caddr_t base; 268 int len; 269 off_t offset; 270 enum uio_seg segflg; 271 int ioflg; 272 struct ucred *cred; 273 size_t *aresid; 274 struct proc *p; 275 { 276 struct uio auio; 277 struct iovec aiov; 278 int error; 279 280 if ((ioflg & IO_NODELOCKED) == 0) { 281 if (rw == UIO_READ) { 282 vn_lock(vp, LK_SHARED | LK_RETRY); 283 } else { 284 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 285 } 286 } 287 auio.uio_iov = &aiov; 288 auio.uio_iovcnt = 1; 289 aiov.iov_base = base; 290 aiov.iov_len = len; 291 auio.uio_resid = len; 292 auio.uio_offset = offset; 293 auio.uio_segflg = segflg; 294 auio.uio_rw = rw; 295 auio.uio_procp = p; 296 if (rw == UIO_READ) { 297 error = VOP_READ(vp, &auio, ioflg, cred); 298 } else { 299 error = VOP_WRITE(vp, &auio, ioflg, cred); 300 } 301 if (aresid) 302 *aresid = auio.uio_resid; 303 else 304 if (auio.uio_resid && error == 0) 305 error = EIO; 306 if ((ioflg & IO_NODELOCKED) == 0) 307 VOP_UNLOCK(vp, 0); 308 return (error); 309 } 310 311 int 312 vn_readdir(fp, buf, segflg, count, done, p, cookies, ncookies) 313 struct file *fp; 314 char *buf; 315 int segflg, *done, *ncookies; 316 u_int count; 317 struct proc *p; 318 off_t **cookies; 319 { 320 struct vnode *vp = (struct vnode *)fp->f_data; 321 struct iovec aiov; 322 struct uio auio; 323 int error, eofflag; 324 325 unionread: 326 if (vp->v_type != VDIR) 327 return (EINVAL); 328 aiov.iov_base = buf; 329 aiov.iov_len = count; 330 auio.uio_iov = &aiov; 331 auio.uio_iovcnt = 1; 332 auio.uio_rw = UIO_READ; 333 auio.uio_segflg = segflg; 334 auio.uio_procp = p; 335 auio.uio_resid = count; 336 vn_lock(vp, LK_SHARED | LK_RETRY); 337 auio.uio_offset = fp->f_offset; 338 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies, 339 ncookies); 340 fp->f_offset = auio.uio_offset; 341 VOP_UNLOCK(vp, 0); 342 if (error) 343 return (error); 344 345 #ifdef UNION 346 { 347 extern struct vnode *union_dircache __P((struct vnode *)); 348 349 if (count == auio.uio_resid && (vp->v_op == union_vnodeop_p)) { 350 struct vnode *lvp; 351 352 lvp = union_dircache(vp); 353 if (lvp != NULLVP) { 354 struct vattr va; 355 356 /* 357 * If the directory is opaque, 358 * then don't show lower entries 359 */ 360 error = VOP_GETATTR(vp, &va, fp->f_cred, p); 361 if (va.va_flags & OPAQUE) { 362 vput(lvp); 363 lvp = NULL; 364 } 365 } 366 367 if (lvp != NULLVP) { 368 error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); 369 if (error) { 370 vput(lvp); 371 return (error); 372 } 373 VOP_UNLOCK(lvp, 0); 374 fp->f_data = (caddr_t) lvp; 375 fp->f_offset = 0; 376 error = vn_close(vp, FREAD, fp->f_cred, p); 377 if (error) 378 return (error); 379 vp = lvp; 380 goto unionread; 381 } 382 } 383 } 384 #endif /* UNION */ 385 386 if (count == auio.uio_resid && (vp->v_flag & VROOT) && 387 (vp->v_mount->mnt_flag & MNT_UNION)) { 388 struct vnode *tvp = vp; 389 vp = vp->v_mount->mnt_vnodecovered; 390 VREF(vp); 391 fp->f_data = (caddr_t) vp; 392 fp->f_offset = 0; 393 vrele(tvp); 394 goto unionread; 395 } 396 *done = count - auio.uio_resid; 397 return error; 398 } 399 400 /* 401 * File table vnode read routine. 402 */ 403 static int 404 vn_read(fp, offset, uio, cred, flags) 405 struct file *fp; 406 off_t *offset; 407 struct uio *uio; 408 struct ucred *cred; 409 int flags; 410 { 411 struct vnode *vp = (struct vnode *)fp->f_data; 412 int count, error, ioflag = 0; 413 414 VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ); 415 if (fp->f_flag & FNONBLOCK) 416 ioflag |= IO_NDELAY; 417 if ((fp->f_flag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC)) 418 ioflag |= IO_SYNC; 419 if (fp->f_flag & FALTIO) 420 ioflag |= IO_ALTSEMANTICS; 421 vn_lock(vp, LK_SHARED | LK_RETRY); 422 uio->uio_offset = *offset; 423 count = uio->uio_resid; 424 error = VOP_READ(vp, uio, ioflag, cred); 425 if (flags & FOF_UPDATE_OFFSET) 426 *offset += count - uio->uio_resid; 427 VOP_UNLOCK(vp, 0); 428 return (error); 429 } 430 431 /* 432 * File table vnode write routine. 433 */ 434 static int 435 vn_write(fp, offset, uio, cred, flags) 436 struct file *fp; 437 off_t *offset; 438 struct uio *uio; 439 struct ucred *cred; 440 int flags; 441 { 442 struct vnode *vp = (struct vnode *)fp->f_data; 443 int count, error, ioflag = IO_UNIT; 444 445 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 446 ioflag |= IO_APPEND; 447 if (fp->f_flag & FNONBLOCK) 448 ioflag |= IO_NDELAY; 449 if (fp->f_flag & FFSYNC || 450 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 451 ioflag |= IO_SYNC; 452 else if (fp->f_flag & FDSYNC) 453 ioflag |= IO_DSYNC; 454 if (fp->f_flag & FALTIO) 455 ioflag |= IO_ALTSEMANTICS; 456 VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE); 457 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 458 uio->uio_offset = *offset; 459 count = uio->uio_resid; 460 error = VOP_WRITE(vp, uio, ioflag, cred); 461 if (flags & FOF_UPDATE_OFFSET) { 462 if (ioflag & IO_APPEND) 463 *offset = uio->uio_offset; 464 else 465 *offset += count - uio->uio_resid; 466 } 467 VOP_UNLOCK(vp, 0); 468 return (error); 469 } 470 471 /* 472 * File table vnode stat routine. 473 */ 474 static int 475 vn_statfile(fp, sb, p) 476 struct file *fp; 477 struct stat *sb; 478 struct proc *p; 479 { 480 struct vnode *vp = (struct vnode *)fp->f_data; 481 482 return vn_stat(vp, sb, p); 483 } 484 485 int 486 vn_stat(vp, sb, p) 487 struct vnode *vp; 488 struct stat *sb; 489 struct proc *p; 490 { 491 struct vattr va; 492 int error; 493 mode_t mode; 494 495 error = VOP_GETATTR(vp, &va, p->p_ucred, p); 496 if (error) 497 return (error); 498 /* 499 * Copy from vattr table 500 */ 501 sb->st_dev = va.va_fsid; 502 sb->st_ino = va.va_fileid; 503 mode = va.va_mode; 504 switch (vp->v_type) { 505 case VREG: 506 mode |= S_IFREG; 507 break; 508 case VDIR: 509 mode |= S_IFDIR; 510 break; 511 case VBLK: 512 mode |= S_IFBLK; 513 break; 514 case VCHR: 515 mode |= S_IFCHR; 516 break; 517 case VLNK: 518 mode |= S_IFLNK; 519 break; 520 case VSOCK: 521 mode |= S_IFSOCK; 522 break; 523 case VFIFO: 524 mode |= S_IFIFO; 525 break; 526 default: 527 return (EBADF); 528 }; 529 sb->st_mode = mode; 530 sb->st_nlink = va.va_nlink; 531 sb->st_uid = va.va_uid; 532 sb->st_gid = va.va_gid; 533 sb->st_rdev = va.va_rdev; 534 sb->st_size = va.va_size; 535 sb->st_atimespec = va.va_atime; 536 sb->st_mtimespec = va.va_mtime; 537 sb->st_ctimespec = va.va_ctime; 538 sb->st_blksize = va.va_blocksize; 539 sb->st_flags = va.va_flags; 540 sb->st_gen = 0; 541 sb->st_blocks = va.va_bytes / S_BLKSIZE; 542 return (0); 543 } 544 545 /* 546 * File table vnode fcntl routine. 547 */ 548 static int 549 vn_fcntl(fp, com, data, p) 550 struct file *fp; 551 u_int com; 552 caddr_t data; 553 struct proc *p; 554 { 555 struct vnode *vp = ((struct vnode *)fp->f_data); 556 int error; 557 558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 559 error = VOP_FCNTL(vp, com, data, fp->f_flag, p->p_ucred, p); 560 VOP_UNLOCK(vp, 0); 561 return (error); 562 } 563 564 /* 565 * File table vnode ioctl routine. 566 */ 567 static int 568 vn_ioctl(fp, com, data, p) 569 struct file *fp; 570 u_long com; 571 caddr_t data; 572 struct proc *p; 573 { 574 struct vnode *vp = ((struct vnode *)fp->f_data); 575 struct vattr vattr; 576 int error; 577 578 switch (vp->v_type) { 579 580 case VREG: 581 case VDIR: 582 if (com == FIONREAD) { 583 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 584 if (error) 585 return (error); 586 *(int *)data = vattr.va_size - fp->f_offset; 587 return (0); 588 } 589 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 590 return (0); /* XXX */ 591 /* fall into ... */ 592 593 default: 594 return (EPASSTHROUGH); 595 596 case VFIFO: 597 case VCHR: 598 case VBLK: 599 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 600 if (error == 0 && com == TIOCSCTTY) { 601 if (p->p_session->s_ttyvp) 602 vrele(p->p_session->s_ttyvp); 603 p->p_session->s_ttyvp = vp; 604 VREF(vp); 605 } 606 return (error); 607 } 608 } 609 610 /* 611 * File table vnode poll routine. 612 */ 613 static int 614 vn_poll(fp, events, p) 615 struct file *fp; 616 int events; 617 struct proc *p; 618 { 619 620 return (VOP_POLL(((struct vnode *)fp->f_data), events, p)); 621 } 622 623 /* 624 * Check that the vnode is still valid, and if so 625 * acquire requested lock. 626 */ 627 int 628 vn_lock(vp, flags) 629 struct vnode *vp; 630 int flags; 631 { 632 int error; 633 634 do { 635 if ((flags & LK_INTERLOCK) == 0) 636 simple_lock(&vp->v_interlock); 637 if (vp->v_flag & VXLOCK) { 638 if (flags & LK_NOWAIT) { 639 simple_unlock(&vp->v_interlock); 640 return EBUSY; 641 } 642 vp->v_flag |= VXWANT; 643 ltsleep(vp, PINOD | PNORELOCK, 644 "vn_lock", 0, &vp->v_interlock); 645 error = ENOENT; 646 } else { 647 error = VOP_LOCK(vp, flags | LK_INTERLOCK); 648 if (error == 0 || error == EDEADLK || error == EBUSY) 649 return (error); 650 } 651 flags &= ~LK_INTERLOCK; 652 } while (flags & LK_RETRY); 653 return (error); 654 } 655 656 /* 657 * File table vnode close routine. 658 */ 659 static int 660 vn_closefile(fp, p) 661 struct file *fp; 662 struct proc *p; 663 { 664 665 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 666 fp->f_cred, p)); 667 } 668 669 /* 670 * Enable LK_CANRECURSE on lock. Return prior status. 671 */ 672 u_int 673 vn_setrecurse(vp) 674 struct vnode *vp; 675 { 676 struct lock *lkp = &vp->v_lock; 677 u_int retval = lkp->lk_flags & LK_CANRECURSE; 678 679 lkp->lk_flags |= LK_CANRECURSE; 680 return retval; 681 } 682 683 /* 684 * Called when done with locksetrecurse. 685 */ 686 void 687 vn_restorerecurse(vp, flags) 688 struct vnode *vp; 689 u_int flags; 690 { 691 struct lock *lkp = &vp->v_lock; 692 693 lkp->lk_flags &= ~LK_CANRECURSE; 694 lkp->lk_flags |= flags; 695 } 696