1 /* $NetBSD: vfs_vnops.c,v 1.177 2010/08/25 13:51:50 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.177 2010/08/25 13:51:50 pooka Exp $"); 70 71 #include "veriexec.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/kernel.h> 76 #include <sys/file.h> 77 #include <sys/stat.h> 78 #include <sys/buf.h> 79 #include <sys/proc.h> 80 #include <sys/mount.h> 81 #include <sys/namei.h> 82 #include <sys/vnode.h> 83 #include <sys/ioctl.h> 84 #include <sys/tty.h> 85 #include <sys/poll.h> 86 #include <sys/kauth.h> 87 #include <sys/syslog.h> 88 #include <sys/fstrans.h> 89 #include <sys/atomic.h> 90 #include <sys/filedesc.h> 91 #include <sys/wapbl.h> 92 93 #include <miscfs/specfs/specdev.h> 94 #include <miscfs/fifofs/fifo.h> 95 96 #include <uvm/uvm_extern.h> 97 #include <uvm/uvm_readahead.h> 98 99 #ifdef UNION 100 #include <fs/union/union.h> 101 #endif 102 103 int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *); 104 105 #include <sys/verified_exec.h> 106 107 static int vn_read(file_t *fp, off_t *offset, struct uio *uio, 108 kauth_cred_t cred, int flags); 109 static int vn_write(file_t *fp, off_t *offset, struct uio *uio, 110 kauth_cred_t cred, int flags); 111 static int vn_closefile(file_t *fp); 112 static int vn_poll(file_t *fp, int events); 113 static int vn_fcntl(file_t *fp, u_int com, void *data); 114 static int vn_statfile(file_t *fp, struct stat *sb); 115 static int vn_ioctl(file_t *fp, u_long com, void *data); 116 117 const struct fileops vnops = { 118 .fo_read = vn_read, 119 .fo_write = vn_write, 120 .fo_ioctl = vn_ioctl, 121 .fo_fcntl = vn_fcntl, 122 .fo_poll = vn_poll, 123 .fo_stat = vn_statfile, 124 .fo_close = vn_closefile, 125 .fo_kqfilter = vn_kqfilter, 126 .fo_restart = fnullop_restart, 127 }; 128 129 /* 130 * Common code for vnode open operations. 131 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 132 */ 133 int 134 vn_open(struct nameidata *ndp, int fmode, int cmode) 135 { 136 struct vnode *vp; 137 struct lwp *l = curlwp; 138 kauth_cred_t cred = l->l_cred; 139 struct vattr va; 140 int error; 141 char *path; 142 143 ndp->ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT; 144 145 if (fmode & O_CREAT) { 146 ndp->ni_cnd.cn_nameiop = CREATE; 147 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF; 148 if ((fmode & O_EXCL) == 0 && 149 ((fmode & O_NOFOLLOW) == 0)) 150 ndp->ni_cnd.cn_flags |= FOLLOW; 151 } else { 152 ndp->ni_cnd.cn_nameiop = LOOKUP; 153 ndp->ni_cnd.cn_flags |= LOCKLEAF; 154 if ((fmode & O_NOFOLLOW) == 0) 155 ndp->ni_cnd.cn_flags |= FOLLOW; 156 } 157 158 VERIEXEC_PATH_GET(ndp->ni_dirp, ndp->ni_segflg, ndp->ni_dirp, path); 159 160 error = namei(ndp); 161 if (error) 162 goto out; 163 164 vp = ndp->ni_vp; 165 166 #if NVERIEXEC > 0 167 error = veriexec_openchk(l, ndp->ni_vp, ndp->ni_dirp, fmode); 168 if (error) 169 goto bad; 170 #endif /* NVERIEXEC > 0 */ 171 172 if (fmode & O_CREAT) { 173 if (ndp->ni_vp == NULL) { 174 vattr_null(&va); 175 va.va_type = VREG; 176 va.va_mode = cmode; 177 if (fmode & O_EXCL) 178 va.va_vaflags |= VA_EXCLUSIVE; 179 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 180 &ndp->ni_cnd, &va); 181 if (error) 182 goto out; 183 fmode &= ~O_TRUNC; 184 vp = ndp->ni_vp; 185 } else { 186 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 187 if (ndp->ni_dvp == ndp->ni_vp) 188 vrele(ndp->ni_dvp); 189 else 190 vput(ndp->ni_dvp); 191 ndp->ni_dvp = NULL; 192 vp = ndp->ni_vp; 193 if (fmode & O_EXCL) { 194 error = EEXIST; 195 goto bad; 196 } 197 fmode &= ~O_CREAT; 198 } 199 } else { 200 vp = ndp->ni_vp; 201 } 202 if (vp->v_type == VSOCK) { 203 error = EOPNOTSUPP; 204 goto bad; 205 } 206 if (ndp->ni_vp->v_type == VLNK) { 207 error = EFTYPE; 208 goto bad; 209 } 210 211 if ((fmode & O_CREAT) == 0) { 212 error = vn_openchk(vp, cred, fmode); 213 if (error != 0) 214 goto bad; 215 } 216 217 if (fmode & O_TRUNC) { 218 vattr_null(&va); 219 va.va_size = 0; 220 error = VOP_SETATTR(vp, &va, cred); 221 if (error != 0) 222 goto bad; 223 } 224 if ((error = VOP_OPEN(vp, fmode, cred)) != 0) 225 goto bad; 226 if (fmode & FWRITE) { 227 mutex_enter(&vp->v_interlock); 228 vp->v_writecount++; 229 mutex_exit(&vp->v_interlock); 230 } 231 232 bad: 233 if (error) 234 vput(vp); 235 out: 236 VERIEXEC_PATH_PUT(path); 237 return (error); 238 } 239 240 /* 241 * Check for write permissions on the specified vnode. 242 * Prototype text segments cannot be written. 243 */ 244 int 245 vn_writechk(struct vnode *vp) 246 { 247 248 /* 249 * If the vnode is in use as a process's text, 250 * we can't allow writing. 251 */ 252 if (vp->v_iflag & VI_TEXT) 253 return (ETXTBSY); 254 return (0); 255 } 256 257 int 258 vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags) 259 { 260 int permbits = 0; 261 int error; 262 263 if ((fflags & FREAD) != 0) { 264 permbits = VREAD; 265 } 266 if ((fflags & (FWRITE | O_TRUNC)) != 0) { 267 permbits |= VWRITE; 268 if (vp->v_type == VDIR) { 269 error = EISDIR; 270 goto bad; 271 } 272 error = vn_writechk(vp); 273 if (error != 0) 274 goto bad; 275 } 276 error = VOP_ACCESS(vp, permbits, cred); 277 bad: 278 return error; 279 } 280 281 /* 282 * Mark a vnode as having executable mappings. 283 */ 284 void 285 vn_markexec(struct vnode *vp) 286 { 287 288 if ((vp->v_iflag & VI_EXECMAP) != 0) { 289 /* Safe unlocked, as long as caller holds a reference. */ 290 return; 291 } 292 293 mutex_enter(&vp->v_interlock); 294 if ((vp->v_iflag & VI_EXECMAP) == 0) { 295 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages); 296 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages); 297 vp->v_iflag |= VI_EXECMAP; 298 } 299 mutex_exit(&vp->v_interlock); 300 } 301 302 /* 303 * Mark a vnode as being the text of a process. 304 * Fail if the vnode is currently writable. 305 */ 306 int 307 vn_marktext(struct vnode *vp) 308 { 309 310 if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) { 311 /* Safe unlocked, as long as caller holds a reference. */ 312 return (0); 313 } 314 315 mutex_enter(&vp->v_interlock); 316 if (vp->v_writecount != 0) { 317 KASSERT((vp->v_iflag & VI_TEXT) == 0); 318 mutex_exit(&vp->v_interlock); 319 return (ETXTBSY); 320 } 321 if ((vp->v_iflag & VI_EXECMAP) == 0) { 322 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages); 323 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages); 324 } 325 vp->v_iflag |= (VI_TEXT | VI_EXECMAP); 326 mutex_exit(&vp->v_interlock); 327 return (0); 328 } 329 330 /* 331 * Vnode close call 332 * 333 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node. 334 */ 335 int 336 vn_close(struct vnode *vp, int flags, kauth_cred_t cred) 337 { 338 int error; 339 340 if (flags & FWRITE) { 341 mutex_enter(&vp->v_interlock); 342 vp->v_writecount--; 343 mutex_exit(&vp->v_interlock); 344 } 345 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 346 error = VOP_CLOSE(vp, flags, cred); 347 vput(vp); 348 return (error); 349 } 350 351 static int 352 enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag) 353 { 354 struct lwp *l = curlwp; 355 off_t testoff; 356 357 if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG) 358 return 0; 359 360 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 361 if (ioflag & IO_APPEND) 362 testoff = vp->v_size; 363 else 364 testoff = uio->uio_offset; 365 366 if (testoff + uio->uio_resid > 367 l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 368 mutex_enter(proc_lock); 369 psignal(l->l_proc, SIGXFSZ); 370 mutex_exit(proc_lock); 371 return EFBIG; 372 } 373 374 return 0; 375 } 376 377 /* 378 * Package up an I/O request on a vnode into a uio and do it. 379 */ 380 int 381 vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, 382 enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid, 383 struct lwp *l) 384 { 385 struct uio auio; 386 struct iovec aiov; 387 int error; 388 389 if ((ioflg & IO_NODELOCKED) == 0) { 390 if (rw == UIO_READ) { 391 vn_lock(vp, LK_SHARED | LK_RETRY); 392 } else /* UIO_WRITE */ { 393 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 394 } 395 } 396 auio.uio_iov = &aiov; 397 auio.uio_iovcnt = 1; 398 aiov.iov_base = base; 399 aiov.iov_len = len; 400 auio.uio_resid = len; 401 auio.uio_offset = offset; 402 auio.uio_rw = rw; 403 if (segflg == UIO_SYSSPACE) { 404 UIO_SETUP_SYSSPACE(&auio); 405 } else { 406 auio.uio_vmspace = l->l_proc->p_vmspace; 407 } 408 409 if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0) 410 goto out; 411 412 if (rw == UIO_READ) { 413 error = VOP_READ(vp, &auio, ioflg, cred); 414 } else { 415 error = VOP_WRITE(vp, &auio, ioflg, cred); 416 } 417 418 if (aresid) 419 *aresid = auio.uio_resid; 420 else 421 if (auio.uio_resid && error == 0) 422 error = EIO; 423 424 out: 425 if ((ioflg & IO_NODELOCKED) == 0) { 426 VOP_UNLOCK(vp); 427 } 428 return (error); 429 } 430 431 int 432 vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done, 433 struct lwp *l, off_t **cookies, int *ncookies) 434 { 435 struct vnode *vp = (struct vnode *)fp->f_data; 436 struct iovec aiov; 437 struct uio auio; 438 int error, eofflag; 439 440 /* Limit the size on any kernel buffers used by VOP_READDIR */ 441 count = min(MAXBSIZE, count); 442 443 unionread: 444 if (vp->v_type != VDIR) 445 return (EINVAL); 446 aiov.iov_base = bf; 447 aiov.iov_len = count; 448 auio.uio_iov = &aiov; 449 auio.uio_iovcnt = 1; 450 auio.uio_rw = UIO_READ; 451 if (segflg == UIO_SYSSPACE) { 452 UIO_SETUP_SYSSPACE(&auio); 453 } else { 454 KASSERT(l == curlwp); 455 auio.uio_vmspace = l->l_proc->p_vmspace; 456 } 457 auio.uio_resid = count; 458 vn_lock(vp, LK_SHARED | LK_RETRY); 459 auio.uio_offset = fp->f_offset; 460 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies, 461 ncookies); 462 mutex_enter(&fp->f_lock); 463 fp->f_offset = auio.uio_offset; 464 mutex_exit(&fp->f_lock); 465 VOP_UNLOCK(vp); 466 if (error) 467 return (error); 468 469 if (count == auio.uio_resid && vn_union_readdir_hook) { 470 struct vnode *ovp = vp; 471 472 error = (*vn_union_readdir_hook)(&vp, fp, l); 473 if (error) 474 return (error); 475 if (vp != ovp) 476 goto unionread; 477 } 478 479 if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && 480 (vp->v_mount->mnt_flag & MNT_UNION)) { 481 struct vnode *tvp = vp; 482 vp = vp->v_mount->mnt_vnodecovered; 483 vref(vp); 484 mutex_enter(&fp->f_lock); 485 fp->f_data = vp; 486 fp->f_offset = 0; 487 mutex_exit(&fp->f_lock); 488 vrele(tvp); 489 goto unionread; 490 } 491 *done = count - auio.uio_resid; 492 return error; 493 } 494 495 /* 496 * File table vnode read routine. 497 */ 498 static int 499 vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 500 int flags) 501 { 502 struct vnode *vp = (struct vnode *)fp->f_data; 503 int count, error, ioflag, fflag; 504 505 ioflag = IO_ADV_ENCODE(fp->f_advice); 506 fflag = fp->f_flag; 507 if (fflag & FNONBLOCK) 508 ioflag |= IO_NDELAY; 509 if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC)) 510 ioflag |= IO_SYNC; 511 if (fflag & FALTIO) 512 ioflag |= IO_ALTSEMANTICS; 513 if (fflag & FDIRECT) 514 ioflag |= IO_DIRECT; 515 vn_lock(vp, LK_SHARED | LK_RETRY); 516 uio->uio_offset = *offset; 517 count = uio->uio_resid; 518 error = VOP_READ(vp, uio, ioflag, cred); 519 if (flags & FOF_UPDATE_OFFSET) 520 *offset += count - uio->uio_resid; 521 VOP_UNLOCK(vp); 522 return (error); 523 } 524 525 /* 526 * File table vnode write routine. 527 */ 528 static int 529 vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 530 int flags) 531 { 532 struct vnode *vp = (struct vnode *)fp->f_data; 533 int count, error, ioflag, fflag; 534 535 ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT; 536 fflag = fp->f_flag; 537 if (vp->v_type == VREG && (fflag & O_APPEND)) 538 ioflag |= IO_APPEND; 539 if (fflag & FNONBLOCK) 540 ioflag |= IO_NDELAY; 541 if (fflag & FFSYNC || 542 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 543 ioflag |= IO_SYNC; 544 else if (fflag & FDSYNC) 545 ioflag |= IO_DSYNC; 546 if (fflag & FALTIO) 547 ioflag |= IO_ALTSEMANTICS; 548 if (fflag & FDIRECT) 549 ioflag |= IO_DIRECT; 550 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 551 uio->uio_offset = *offset; 552 count = uio->uio_resid; 553 554 if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0) 555 goto out; 556 557 error = VOP_WRITE(vp, uio, ioflag, cred); 558 559 if (flags & FOF_UPDATE_OFFSET) { 560 if (ioflag & IO_APPEND) { 561 /* 562 * SUSv3 describes behaviour for count = 0 as following: 563 * "Before any action ... is taken, and if nbyte is zero 564 * and the file is a regular file, the write() function 565 * ... in the absence of errors ... shall return zero 566 * and have no other results." 567 */ 568 if (count) 569 *offset = uio->uio_offset; 570 } else 571 *offset += count - uio->uio_resid; 572 } 573 574 out: 575 VOP_UNLOCK(vp); 576 return (error); 577 } 578 579 /* 580 * File table vnode stat routine. 581 */ 582 static int 583 vn_statfile(file_t *fp, struct stat *sb) 584 { 585 struct vnode *vp = fp->f_data; 586 int error; 587 588 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 589 error = vn_stat(vp, sb); 590 VOP_UNLOCK(vp); 591 return error; 592 } 593 594 int 595 vn_stat(struct vnode *vp, struct stat *sb) 596 { 597 struct vattr va; 598 int error; 599 mode_t mode; 600 601 memset(&va, 0, sizeof(va)); 602 error = VOP_GETATTR(vp, &va, kauth_cred_get()); 603 if (error) 604 return (error); 605 /* 606 * Copy from vattr table 607 */ 608 sb->st_dev = va.va_fsid; 609 sb->st_ino = va.va_fileid; 610 mode = va.va_mode; 611 switch (vp->v_type) { 612 case VREG: 613 mode |= S_IFREG; 614 break; 615 case VDIR: 616 mode |= S_IFDIR; 617 break; 618 case VBLK: 619 mode |= S_IFBLK; 620 break; 621 case VCHR: 622 mode |= S_IFCHR; 623 break; 624 case VLNK: 625 mode |= S_IFLNK; 626 break; 627 case VSOCK: 628 mode |= S_IFSOCK; 629 break; 630 case VFIFO: 631 mode |= S_IFIFO; 632 break; 633 default: 634 return (EBADF); 635 }; 636 sb->st_mode = mode; 637 sb->st_nlink = va.va_nlink; 638 sb->st_uid = va.va_uid; 639 sb->st_gid = va.va_gid; 640 sb->st_rdev = va.va_rdev; 641 sb->st_size = va.va_size; 642 sb->st_atimespec = va.va_atime; 643 sb->st_mtimespec = va.va_mtime; 644 sb->st_ctimespec = va.va_ctime; 645 sb->st_birthtimespec = va.va_birthtime; 646 sb->st_blksize = va.va_blocksize; 647 sb->st_flags = va.va_flags; 648 sb->st_gen = 0; 649 sb->st_blocks = va.va_bytes / S_BLKSIZE; 650 memset(sb->st_spare, 0, sizeof(sb->st_spare)); 651 return (0); 652 } 653 654 /* 655 * File table vnode fcntl routine. 656 */ 657 static int 658 vn_fcntl(file_t *fp, u_int com, void *data) 659 { 660 struct vnode *vp = fp->f_data; 661 int error; 662 663 error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get()); 664 return (error); 665 } 666 667 /* 668 * File table vnode ioctl routine. 669 */ 670 static int 671 vn_ioctl(file_t *fp, u_long com, void *data) 672 { 673 struct vnode *vp = fp->f_data, *ovp; 674 struct vattr vattr; 675 int error; 676 677 switch (vp->v_type) { 678 679 case VREG: 680 case VDIR: 681 if (com == FIONREAD) { 682 error = VOP_GETATTR(vp, &vattr, 683 kauth_cred_get()); 684 if (error) 685 return (error); 686 *(int *)data = vattr.va_size - fp->f_offset; 687 return (0); 688 } 689 if ((com == FIONWRITE) || (com == FIONSPACE)) { 690 /* 691 * Files don't have send queues, so there never 692 * are any bytes in them, nor is there any 693 * open space in them. 694 */ 695 *(int *)data = 0; 696 return (0); 697 } 698 if (com == FIOGETBMAP) { 699 daddr_t *block; 700 701 if (*(daddr_t *)data < 0) 702 return (EINVAL); 703 block = (daddr_t *)data; 704 return (VOP_BMAP(vp, *block, NULL, block, NULL)); 705 } 706 if (com == OFIOGETBMAP) { 707 daddr_t ibn, obn; 708 709 if (*(int32_t *)data < 0) 710 return (EINVAL); 711 ibn = (daddr_t)*(int32_t *)data; 712 error = VOP_BMAP(vp, ibn, NULL, &obn, NULL); 713 *(int32_t *)data = (int32_t)obn; 714 return error; 715 } 716 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 717 return (0); /* XXX */ 718 /* fall into ... */ 719 case VFIFO: 720 case VCHR: 721 case VBLK: 722 error = VOP_IOCTL(vp, com, data, fp->f_flag, 723 kauth_cred_get()); 724 if (error == 0 && com == TIOCSCTTY) { 725 vref(vp); 726 mutex_enter(proc_lock); 727 ovp = curproc->p_session->s_ttyvp; 728 curproc->p_session->s_ttyvp = vp; 729 mutex_exit(proc_lock); 730 if (ovp != NULL) 731 vrele(ovp); 732 } 733 return (error); 734 735 default: 736 return (EPASSTHROUGH); 737 } 738 } 739 740 /* 741 * File table vnode poll routine. 742 */ 743 static int 744 vn_poll(file_t *fp, int events) 745 { 746 747 return (VOP_POLL(fp->f_data, events)); 748 } 749 750 /* 751 * File table vnode kqfilter routine. 752 */ 753 int 754 vn_kqfilter(file_t *fp, struct knote *kn) 755 { 756 757 return (VOP_KQFILTER(fp->f_data, kn)); 758 } 759 760 /* 761 * Check that the vnode is still valid, and if so 762 * acquire requested lock. 763 */ 764 int 765 vn_lock(struct vnode *vp, int flags) 766 { 767 int error; 768 769 #if 0 770 KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0); 771 #endif 772 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0); 773 KASSERT(!mutex_owned(&vp->v_interlock)); 774 775 #ifdef DIAGNOSTIC 776 if (wapbl_vphaswapbl(vp)) 777 WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp)); 778 #endif 779 780 do { 781 /* 782 * XXX PR 37706 forced unmount of file systems is unsafe. 783 * Race between vclean() and this the remaining problem. 784 */ 785 mutex_enter(&vp->v_interlock); 786 if (vp->v_iflag & VI_XLOCK) { 787 if (flags & LK_NOWAIT) { 788 mutex_exit(&vp->v_interlock); 789 return EBUSY; 790 } 791 vwait(vp, VI_XLOCK); 792 mutex_exit(&vp->v_interlock); 793 error = ENOENT; 794 } else { 795 mutex_exit(&vp->v_interlock); 796 error = VOP_LOCK(vp, (flags & ~LK_RETRY)); 797 if (error == 0 || error == EDEADLK || error == EBUSY) 798 return (error); 799 } 800 } while (flags & LK_RETRY); 801 return (error); 802 } 803 804 /* 805 * File table vnode close routine. 806 */ 807 static int 808 vn_closefile(file_t *fp) 809 { 810 811 return vn_close(fp->f_data, fp->f_flag, fp->f_cred); 812 } 813 814 /* 815 * Simplified in-kernel wrapper calls for extended attribute access. 816 * Both calls pass in a NULL credential, authorizing a "kernel" access. 817 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 818 */ 819 int 820 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, 821 const char *attrname, size_t *buflen, void *bf, struct lwp *l) 822 { 823 struct uio auio; 824 struct iovec aiov; 825 int error; 826 827 aiov.iov_len = *buflen; 828 aiov.iov_base = bf; 829 830 auio.uio_iov = &aiov; 831 auio.uio_iovcnt = 1; 832 auio.uio_rw = UIO_READ; 833 auio.uio_offset = 0; 834 auio.uio_resid = *buflen; 835 UIO_SETUP_SYSSPACE(&auio); 836 837 if ((ioflg & IO_NODELOCKED) == 0) 838 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 839 840 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL); 841 842 if ((ioflg & IO_NODELOCKED) == 0) 843 VOP_UNLOCK(vp); 844 845 if (error == 0) 846 *buflen = *buflen - auio.uio_resid; 847 848 return (error); 849 } 850 851 /* 852 * XXX Failure mode if partially written? 853 */ 854 int 855 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, 856 const char *attrname, size_t buflen, const void *bf, struct lwp *l) 857 { 858 struct uio auio; 859 struct iovec aiov; 860 int error; 861 862 aiov.iov_len = buflen; 863 aiov.iov_base = __UNCONST(bf); /* XXXUNCONST kills const */ 864 865 auio.uio_iov = &aiov; 866 auio.uio_iovcnt = 1; 867 auio.uio_rw = UIO_WRITE; 868 auio.uio_offset = 0; 869 auio.uio_resid = buflen; 870 UIO_SETUP_SYSSPACE(&auio); 871 872 if ((ioflg & IO_NODELOCKED) == 0) { 873 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 874 } 875 876 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL); 877 878 if ((ioflg & IO_NODELOCKED) == 0) { 879 VOP_UNLOCK(vp); 880 } 881 882 return (error); 883 } 884 885 int 886 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, 887 const char *attrname, struct lwp *l) 888 { 889 int error; 890 891 if ((ioflg & IO_NODELOCKED) == 0) { 892 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 893 } 894 895 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL); 896 if (error == EOPNOTSUPP) 897 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL); 898 899 if ((ioflg & IO_NODELOCKED) == 0) { 900 VOP_UNLOCK(vp); 901 } 902 903 return (error); 904 } 905 906 void 907 vn_ra_allocctx(struct vnode *vp) 908 { 909 struct uvm_ractx *ra = NULL; 910 911 KASSERT(mutex_owned(&vp->v_interlock)); 912 913 if (vp->v_type != VREG) { 914 return; 915 } 916 if (vp->v_ractx != NULL) { 917 return; 918 } 919 if (vp->v_ractx == NULL) { 920 mutex_exit(&vp->v_interlock); 921 ra = uvm_ra_allocctx(); 922 mutex_enter(&vp->v_interlock); 923 if (ra != NULL && vp->v_ractx == NULL) { 924 vp->v_ractx = ra; 925 ra = NULL; 926 } 927 } 928 if (ra != NULL) { 929 uvm_ra_freectx(ra); 930 } 931 } 932 933 int 934 vn_fifo_bypass(void *v) 935 { 936 struct vop_generic_args *ap = v; 937 938 return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v); 939 } 940