1 /* $OpenBSD: vfs_vnops.c,v 1.120 2022/06/20 01:39:44 visa Exp $ */ 2 /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)vfs_vnops.c 8.5 (Berkeley) 12/8/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/stat.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/signalvar.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/lock.h> 53 #include <sys/vnode.h> 54 #include <sys/ioctl.h> 55 #include <sys/tty.h> 56 #include <sys/cdio.h> 57 #include <sys/filedesc.h> 58 #include <sys/specdev.h> 59 #include <sys/unistd.h> 60 61 int vn_read(struct file *, struct uio *, int); 62 int vn_write(struct file *, struct uio *, int); 63 int vn_kqfilter(struct file *, struct knote *); 64 int vn_closefile(struct file *, struct proc *); 65 int vn_seek(struct file *, off_t *, int, struct proc *); 66 67 const struct fileops vnops = { 68 .fo_read = vn_read, 69 .fo_write = vn_write, 70 .fo_ioctl = vn_ioctl, 71 .fo_kqfilter = vn_kqfilter, 72 .fo_stat = vn_statfile, 73 .fo_close = vn_closefile, 74 .fo_seek = vn_seek, 75 }; 76 77 /* 78 * Common code for vnode open operations. 79 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 80 */ 81 int 82 vn_open(struct nameidata *ndp, int fmode, int cmode) 83 { 84 struct vnode *vp; 85 struct proc *p = ndp->ni_cnd.cn_proc; 86 struct ucred *cred = p->p_ucred; 87 struct vattr va; 88 struct cloneinfo *cip; 89 int error; 90 91 /* 92 * The only valid flag to pass in here from NDINIT is 93 * KERNELPATH, This function will override the nameiop based 94 * on the fmode and cmode flags, So validate that our caller 95 * has not set other flags or operations in the nameidata 96 * structure. 97 */ 98 KASSERT(ndp->ni_cnd.cn_flags == 0 || ndp->ni_cnd.cn_flags == KERNELPATH); 99 KASSERT(ndp->ni_cnd.cn_nameiop == 0); 100 101 if ((fmode & (FREAD|FWRITE)) == 0) 102 return (EINVAL); 103 if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC) 104 return (EINVAL); 105 if (fmode & O_CREAT) { 106 ndp->ni_cnd.cn_nameiop = CREATE; 107 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF; 108 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 109 ndp->ni_cnd.cn_flags |= FOLLOW; 110 if ((error = namei(ndp)) != 0) 111 return (error); 112 113 if (ndp->ni_vp == NULL) { 114 VATTR_NULL(&va); 115 va.va_type = VREG; 116 va.va_mode = cmode; 117 if (fmode & O_EXCL) 118 va.va_vaflags |= VA_EXCLUSIVE; 119 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 120 &ndp->ni_cnd, &va); 121 vput(ndp->ni_dvp); 122 if (error) 123 return (error); 124 fmode &= ~O_TRUNC; 125 vp = ndp->ni_vp; 126 } else { 127 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 128 if (ndp->ni_dvp == ndp->ni_vp) 129 vrele(ndp->ni_dvp); 130 else 131 vput(ndp->ni_dvp); 132 ndp->ni_dvp = NULL; 133 vp = ndp->ni_vp; 134 if (fmode & O_EXCL) { 135 error = EEXIST; 136 goto bad; 137 } 138 fmode &= ~O_CREAT; 139 } 140 } else { 141 ndp->ni_cnd.cn_nameiop = LOOKUP; 142 ndp->ni_cnd.cn_flags |= ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 143 if ((error = namei(ndp)) != 0) 144 return (error); 145 vp = ndp->ni_vp; 146 } 147 if (vp->v_type == VSOCK) { 148 error = EOPNOTSUPP; 149 goto bad; 150 } 151 if (vp->v_type == VLNK) { 152 error = ELOOP; 153 goto bad; 154 } 155 if ((fmode & O_DIRECTORY) && vp->v_type != VDIR) { 156 error = ENOTDIR; 157 goto bad; 158 } 159 if ((fmode & O_CREAT) == 0) { 160 if (fmode & FREAD) { 161 if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0) 162 goto bad; 163 } 164 if (fmode & FWRITE) { 165 if (vp->v_type == VDIR) { 166 error = EISDIR; 167 goto bad; 168 } 169 if ((error = vn_writechk(vp)) != 0 || 170 (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0) 171 goto bad; 172 } 173 } 174 if ((fmode & O_TRUNC) && vp->v_type == VREG) { 175 VATTR_NULL(&va); 176 va.va_size = 0; 177 if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0) 178 goto bad; 179 } 180 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 181 goto bad; 182 183 if (vp->v_flag & VCLONED) { 184 cip = (struct cloneinfo *)vp->v_data; 185 186 vp->v_flag &= ~VCLONED; 187 188 ndp->ni_vp = cip->ci_vp; /* return cloned vnode */ 189 vp->v_data = cip->ci_data; /* restore v_data */ 190 VOP_UNLOCK(vp); /* keep a reference */ 191 vp = ndp->ni_vp; /* for the increment below */ 192 193 free(cip, M_TEMP, sizeof(*cip)); 194 } 195 196 if (fmode & FWRITE) 197 vp->v_writecount++; 198 return (0); 199 bad: 200 vput(vp); 201 return (error); 202 } 203 204 /* 205 * Check for write permissions on the specified vnode. 206 * Prototype text segments cannot be written. 207 */ 208 int 209 vn_writechk(struct vnode *vp) 210 { 211 /* 212 * Disallow write attempts on read-only file systems; 213 * unless the file is a socket or a block or character 214 * device resident on the file system. 215 */ 216 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 217 switch (vp->v_type) { 218 case VREG: 219 case VDIR: 220 case VLNK: 221 return (EROFS); 222 case VNON: 223 case VCHR: 224 case VSOCK: 225 case VFIFO: 226 case VBAD: 227 case VBLK: 228 break; 229 } 230 } 231 /* 232 * If there's shared text associated with 233 * the vnode, try to free it up once. If 234 * we fail, we can't allow writing. 235 */ 236 if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp)) 237 return (ETXTBSY); 238 239 return (0); 240 } 241 242 /* 243 * Check whether a write operation would exceed the file size rlimit 244 * for the process, if one should be applied for this operation. 245 * If a partial write should take place, the uio is adjusted and the 246 * amount by which the request would have exceeded the limit is returned 247 * via the 'overrun' argument. 248 */ 249 int 250 vn_fsizechk(struct vnode *vp, struct uio *uio, int ioflag, ssize_t *overrun) 251 { 252 struct proc *p = uio->uio_procp; 253 254 *overrun = 0; 255 if (vp->v_type == VREG && p != NULL && !(ioflag & IO_NOLIMIT)) { 256 rlim_t limit = lim_cur_proc(p, RLIMIT_FSIZE); 257 258 /* if already at or over the limit, send the signal and fail */ 259 if (uio->uio_offset >= limit) { 260 psignal(p, SIGXFSZ); 261 return (EFBIG); 262 } 263 264 /* otherwise, clamp the write to stay under the limit */ 265 if (uio->uio_resid > limit - uio->uio_offset) { 266 *overrun = uio->uio_resid - (limit - uio->uio_offset); 267 uio->uio_resid = limit - uio->uio_offset; 268 } 269 } 270 271 return (0); 272 } 273 274 275 /* 276 * Mark a vnode as being the text image of a running process. 277 */ 278 void 279 vn_marktext(struct vnode *vp) 280 { 281 vp->v_flag |= VTEXT; 282 } 283 284 /* 285 * Vnode close call 286 */ 287 int 288 vn_close(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) 289 { 290 int error; 291 292 if (flags & FWRITE) 293 vp->v_writecount--; 294 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 295 error = VOP_CLOSE(vp, flags, cred, p); 296 vput(vp); 297 return (error); 298 } 299 300 /* 301 * Package up an I/O request on a vnode into a uio and do it. 302 */ 303 int 304 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, 305 enum uio_seg segflg, int ioflg, struct ucred *cred, size_t *aresid, 306 struct proc *p) 307 { 308 struct uio auio; 309 struct iovec aiov; 310 int error; 311 312 auio.uio_iov = &aiov; 313 auio.uio_iovcnt = 1; 314 aiov.iov_base = base; 315 aiov.iov_len = len; 316 auio.uio_resid = len; 317 auio.uio_offset = offset; 318 auio.uio_segflg = segflg; 319 auio.uio_rw = rw; 320 auio.uio_procp = p; 321 322 if ((ioflg & IO_NODELOCKED) == 0) 323 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 324 if (rw == UIO_READ) { 325 error = VOP_READ(vp, &auio, ioflg, cred); 326 } else { 327 error = VOP_WRITE(vp, &auio, ioflg, cred); 328 } 329 if ((ioflg & IO_NODELOCKED) == 0) 330 VOP_UNLOCK(vp); 331 332 if (aresid) 333 *aresid = auio.uio_resid; 334 else 335 if (auio.uio_resid && error == 0) 336 error = EIO; 337 return (error); 338 } 339 340 /* 341 * File table vnode read routine. 342 */ 343 int 344 vn_read(struct file *fp, struct uio *uio, int fflags) 345 { 346 struct vnode *vp = fp->f_data; 347 struct ucred *cred = fp->f_cred; 348 size_t count = uio->uio_resid; 349 off_t offset; 350 int error; 351 352 KERNEL_LOCK(); 353 354 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 355 356 if ((fflags & FO_POSITION) == 0) 357 offset = uio->uio_offset = fp->f_offset; 358 else 359 offset = uio->uio_offset; 360 361 /* no wrap around of offsets except on character devices */ 362 if (vp->v_type != VCHR && count > LLONG_MAX - offset) { 363 error = EINVAL; 364 goto done; 365 } 366 367 if (vp->v_type == VDIR) { 368 error = EISDIR; 369 goto done; 370 } 371 372 error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, 373 cred); 374 if ((fflags & FO_POSITION) == 0) { 375 mtx_enter(&fp->f_mtx); 376 fp->f_offset += count - uio->uio_resid; 377 mtx_leave(&fp->f_mtx); 378 } 379 done: 380 VOP_UNLOCK(vp); 381 KERNEL_UNLOCK(); 382 return (error); 383 } 384 385 /* 386 * File table vnode write routine. 387 */ 388 int 389 vn_write(struct file *fp, struct uio *uio, int fflags) 390 { 391 struct vnode *vp = fp->f_data; 392 struct ucred *cred = fp->f_cred; 393 int error, ioflag = IO_UNIT; 394 size_t count; 395 396 KERNEL_LOCK(); 397 398 /* note: pwrite/pwritev are unaffected by O_APPEND */ 399 if (vp->v_type == VREG && (fp->f_flag & O_APPEND) && 400 (fflags & FO_POSITION) == 0) 401 ioflag |= IO_APPEND; 402 if (fp->f_flag & FNONBLOCK) 403 ioflag |= IO_NDELAY; 404 if ((fp->f_flag & FFSYNC) || 405 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 406 ioflag |= IO_SYNC; 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 408 if ((fflags & FO_POSITION) == 0) 409 uio->uio_offset = fp->f_offset; 410 count = uio->uio_resid; 411 error = VOP_WRITE(vp, uio, ioflag, cred); 412 if ((fflags & FO_POSITION) == 0) { 413 mtx_enter(&fp->f_mtx); 414 if (ioflag & IO_APPEND) 415 fp->f_offset = uio->uio_offset; 416 else 417 fp->f_offset += count - uio->uio_resid; 418 mtx_leave(&fp->f_mtx); 419 } 420 VOP_UNLOCK(vp); 421 422 KERNEL_UNLOCK(); 423 return (error); 424 } 425 426 /* 427 * File table wrapper for vn_stat 428 */ 429 int 430 vn_statfile(struct file *fp, struct stat *sb, struct proc *p) 431 { 432 struct vnode *vp = fp->f_data; 433 return vn_stat(vp, sb, p); 434 } 435 436 /* 437 * vnode stat routine. 438 */ 439 int 440 vn_stat(struct vnode *vp, struct stat *sb, struct proc *p) 441 { 442 struct vattr va; 443 int error; 444 mode_t mode; 445 446 error = VOP_GETATTR(vp, &va, p->p_ucred, p); 447 if (error) 448 return (error); 449 /* 450 * Copy from vattr table 451 */ 452 memset(sb, 0, sizeof(*sb)); 453 sb->st_dev = va.va_fsid; 454 sb->st_ino = va.va_fileid; 455 mode = va.va_mode; 456 switch (vp->v_type) { 457 case VREG: 458 mode |= S_IFREG; 459 break; 460 case VDIR: 461 mode |= S_IFDIR; 462 break; 463 case VBLK: 464 mode |= S_IFBLK; 465 break; 466 case VCHR: 467 mode |= S_IFCHR; 468 break; 469 case VLNK: 470 mode |= S_IFLNK; 471 break; 472 case VSOCK: 473 mode |= S_IFSOCK; 474 break; 475 case VFIFO: 476 mode |= S_IFIFO; 477 break; 478 default: 479 return (EBADF); 480 } 481 sb->st_mode = mode; 482 sb->st_nlink = va.va_nlink; 483 sb->st_uid = va.va_uid; 484 sb->st_gid = va.va_gid; 485 sb->st_rdev = va.va_rdev; 486 sb->st_size = va.va_size; 487 sb->st_atim.tv_sec = va.va_atime.tv_sec; 488 sb->st_atim.tv_nsec = va.va_atime.tv_nsec; 489 sb->st_mtim.tv_sec = va.va_mtime.tv_sec; 490 sb->st_mtim.tv_nsec = va.va_mtime.tv_nsec; 491 sb->st_ctim.tv_sec = va.va_ctime.tv_sec; 492 sb->st_ctim.tv_nsec = va.va_ctime.tv_nsec; 493 sb->st_blksize = va.va_blocksize; 494 sb->st_flags = va.va_flags; 495 sb->st_gen = va.va_gen; 496 sb->st_blocks = va.va_bytes / S_BLKSIZE; 497 return (0); 498 } 499 500 /* 501 * File table vnode ioctl routine. 502 */ 503 int 504 vn_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 505 { 506 struct vnode *vp = fp->f_data; 507 struct vattr vattr; 508 int error = ENOTTY; 509 510 KERNEL_LOCK(); 511 switch (vp->v_type) { 512 513 case VREG: 514 case VDIR: 515 if (com == FIONREAD) { 516 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 517 if (error) 518 break; 519 *(int *)data = vattr.va_size - foffset(fp); 520 521 } else if (com == FIONBIO || com == FIOASYNC) /* XXX */ 522 error = 0; /* XXX */ 523 break; 524 525 case VFIFO: 526 case VCHR: 527 case VBLK: 528 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 529 if (error == 0 && com == TIOCSCTTY) { 530 struct session *s = p->p_p->ps_session; 531 struct vnode *ovp = s->s_ttyvp; 532 s->s_ttyvp = vp; 533 vref(vp); 534 if (ovp) 535 vrele(ovp); 536 } 537 break; 538 539 default: 540 break; 541 } 542 KERNEL_UNLOCK(); 543 544 return (error); 545 } 546 547 /* 548 * Check that the vnode is still valid, and if so 549 * acquire requested lock. 550 */ 551 int 552 vn_lock(struct vnode *vp, int flags) 553 { 554 int error, xlocked, do_wakeup; 555 556 do { 557 mtx_enter(&vnode_mtx); 558 if (vp->v_lflag & VXLOCK) { 559 vp->v_lflag |= VXWANT; 560 msleep_nsec(vp, &vnode_mtx, PINOD, "vn_lock", INFSLP); 561 mtx_leave(&vnode_mtx); 562 error = ENOENT; 563 } else { 564 vp->v_lockcount++; 565 mtx_leave(&vnode_mtx); 566 567 error = VOP_LOCK(vp, flags); 568 569 mtx_enter(&vnode_mtx); 570 vp->v_lockcount--; 571 do_wakeup = (vp->v_lockcount == 0); 572 xlocked = vp->v_lflag & VXLOCK; 573 mtx_leave(&vnode_mtx); 574 575 if (error == 0) { 576 if (!xlocked) 577 return (0); 578 579 /* 580 * The vnode was exclusively locked while 581 * acquiring the requested lock. Release it and 582 * try again. 583 */ 584 error = ENOENT; 585 VOP_UNLOCK(vp); 586 if (do_wakeup) 587 wakeup_one(&vp->v_lockcount); 588 } 589 } 590 } while (flags & LK_RETRY); 591 return (error); 592 } 593 594 /* 595 * File table vnode close routine. 596 */ 597 int 598 vn_closefile(struct file *fp, struct proc *p) 599 { 600 struct vnode *vp = fp->f_data; 601 struct flock lf; 602 int error; 603 604 KERNEL_LOCK(); 605 if ((fp->f_iflags & FIF_HASLOCK)) { 606 lf.l_whence = SEEK_SET; 607 lf.l_start = 0; 608 lf.l_len = 0; 609 lf.l_type = F_UNLCK; 610 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 611 } 612 error = vn_close(vp, fp->f_flag, fp->f_cred, p); 613 KERNEL_UNLOCK(); 614 return (error); 615 } 616 617 int 618 vn_kqfilter(struct file *fp, struct knote *kn) 619 { 620 int error; 621 622 KERNEL_LOCK(); 623 error = VOP_KQFILTER(fp->f_data, fp->f_flag, kn); 624 KERNEL_UNLOCK(); 625 return (error); 626 } 627 628 int 629 vn_seek(struct file *fp, off_t *offset, int whence, struct proc *p) 630 { 631 struct ucred *cred = p->p_ucred; 632 struct vnode *vp = fp->f_data; 633 struct vattr vattr; 634 off_t newoff; 635 int error = 0; 636 int special; 637 638 if (vp->v_type == VFIFO) 639 return (ESPIPE); 640 641 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 642 643 if (vp->v_type == VCHR) 644 special = 1; 645 else 646 special = 0; 647 648 switch (whence) { 649 case SEEK_CUR: 650 newoff = fp->f_offset + *offset; 651 break; 652 case SEEK_END: 653 KERNEL_LOCK(); 654 error = VOP_GETATTR(vp, &vattr, cred, p); 655 KERNEL_UNLOCK(); 656 if (error) 657 goto out; 658 newoff = *offset + (off_t)vattr.va_size; 659 break; 660 case SEEK_SET: 661 newoff = *offset; 662 break; 663 default: 664 error = EINVAL; 665 goto out; 666 } 667 if (!special && newoff < 0) { 668 error = EINVAL; 669 goto out; 670 } 671 mtx_enter(&fp->f_mtx); 672 fp->f_offset = newoff; 673 mtx_leave(&fp->f_mtx); 674 *offset = newoff; 675 676 out: 677 VOP_UNLOCK(vp); 678 return (error); 679 } 680 681 /* 682 * Common code for vnode access operations. 683 */ 684 685 /* Check if a directory can be found inside another in the hierarchy */ 686 int 687 vn_isunder(struct vnode *lvp, struct vnode *rvp, struct proc *p) 688 { 689 int error; 690 691 error = vfs_getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN/2, 0, p); 692 693 if (!error) 694 return (1); 695 696 return (0); 697 } 698