1 /* $NetBSD: vfs_vnops.c,v 1.112 2006/05/27 23:46:49 simonb Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.112 2006/05/27 23:46:49 simonb Exp $"); 41 42 #include "opt_verified_exec.h" 43 44 #include "fs_union.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/file.h> 50 #include <sys/stat.h> 51 #include <sys/buf.h> 52 #include <sys/proc.h> 53 #include <sys/malloc.h> 54 #include <sys/mount.h> 55 #include <sys/namei.h> 56 #include <sys/vnode.h> 57 #include <sys/ioctl.h> 58 #include <sys/tty.h> 59 #include <sys/poll.h> 60 #include <sys/kauth.h> 61 62 #include <miscfs/specfs/specdev.h> 63 64 #include <uvm/uvm_extern.h> 65 #include <uvm/uvm_readahead.h> 66 67 #ifdef UNION 68 #include <fs/union/union.h> 69 #endif 70 71 #if defined(LKM) || defined(UNION) 72 int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *); 73 #endif 74 75 #ifdef VERIFIED_EXEC 76 #include <sys/verified_exec.h> 77 #endif 78 79 static int vn_read(struct file *fp, off_t *offset, struct uio *uio, 80 kauth_cred_t cred, int flags); 81 static int vn_write(struct file *fp, off_t *offset, struct uio *uio, 82 kauth_cred_t cred, int flags); 83 static int vn_closefile(struct file *fp, struct lwp *l); 84 static int vn_poll(struct file *fp, int events, struct lwp *l); 85 static int vn_fcntl(struct file *fp, u_int com, void *data, struct lwp *l); 86 static int vn_statfile(struct file *fp, struct stat *sb, struct lwp *l); 87 static int vn_ioctl(struct file *fp, u_long com, void *data, struct lwp *l); 88 89 const struct fileops vnops = { 90 vn_read, vn_write, vn_ioctl, vn_fcntl, vn_poll, 91 vn_statfile, vn_closefile, vn_kqfilter 92 }; 93 94 /* 95 * Common code for vnode open operations. 96 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 97 */ 98 int 99 vn_open(struct nameidata *ndp, int fmode, int cmode) 100 { 101 struct vnode *vp; 102 struct mount *mp = NULL; /* XXX: GCC */ 103 struct lwp *l = ndp->ni_cnd.cn_lwp; 104 kauth_cred_t cred = l->l_proc->p_cred; 105 struct vattr va; 106 int error; 107 #ifdef VERIFIED_EXEC 108 struct veriexec_hash_entry *vhe = NULL; 109 char pathbuf[MAXPATHLEN]; 110 size_t pathlen; 111 int (*copyfun)(const void *, void *, size_t, size_t *) = 112 ndp->ni_segflg == UIO_SYSSPACE ? copystr : copyinstr; 113 #endif /* VERIFIED_EXEC */ 114 115 #ifdef VERIFIED_EXEC 116 error = (*copyfun)(ndp->ni_dirp, pathbuf, sizeof(pathbuf), &pathlen); 117 if (error) { 118 if (veriexec_verbose >= 1) 119 printf("veriexec: Can't copy path. (error=%d)\n", 120 error); 121 122 return (error); 123 } 124 #endif /* VERIFIED_EXEC */ 125 126 restart: 127 if (fmode & O_CREAT) { 128 ndp->ni_cnd.cn_nameiop = CREATE; 129 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 130 if ((fmode & O_EXCL) == 0 && 131 ((fmode & O_NOFOLLOW) == 0)) 132 ndp->ni_cnd.cn_flags |= FOLLOW; 133 if ((error = namei(ndp)) != 0) 134 return (error); 135 if (ndp->ni_vp == NULL) { 136 #ifdef VERIFIED_EXEC 137 /* Lockdown mode: Prevent creation of new files. */ 138 if (veriexec_strict >= 3) { 139 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 140 141 printf("Veriexec: vn_open: Preventing " 142 "new file creation in %s.\n", 143 pathbuf); 144 145 vp = ndp->ni_dvp; 146 error = EPERM; 147 goto bad; 148 } 149 #endif /* VERIFIED_EXEC */ 150 151 VATTR_NULL(&va); 152 va.va_type = VREG; 153 va.va_mode = cmode; 154 if (fmode & O_EXCL) 155 va.va_vaflags |= VA_EXCLUSIVE; 156 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 157 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 158 vput(ndp->ni_dvp); 159 if ((error = vn_start_write(NULL, &mp, 160 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 161 return (error); 162 goto restart; 163 } 164 VOP_LEASE(ndp->ni_dvp, l, cred, LEASE_WRITE); 165 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 166 &ndp->ni_cnd, &va); 167 vn_finished_write(mp, 0); 168 if (error) 169 return (error); 170 fmode &= ~O_TRUNC; 171 vp = ndp->ni_vp; 172 } else { 173 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); 174 if (ndp->ni_dvp == ndp->ni_vp) 175 vrele(ndp->ni_dvp); 176 else 177 vput(ndp->ni_dvp); 178 ndp->ni_dvp = NULL; 179 vp = ndp->ni_vp; 180 if (fmode & O_EXCL) { 181 error = EEXIST; 182 goto bad; 183 } 184 fmode &= ~O_CREAT; 185 } 186 } else { 187 ndp->ni_cnd.cn_nameiop = LOOKUP; 188 ndp->ni_cnd.cn_flags = LOCKLEAF; 189 if ((fmode & O_NOFOLLOW) == 0) 190 ndp->ni_cnd.cn_flags |= FOLLOW; 191 if ((error = namei(ndp)) != 0) 192 return (error); 193 vp = ndp->ni_vp; 194 } 195 if (vp->v_type == VSOCK) { 196 error = EOPNOTSUPP; 197 goto bad; 198 } 199 if (ndp->ni_vp->v_type == VLNK) { 200 error = EFTYPE; 201 goto bad; 202 } 203 204 #ifdef VERIFIED_EXEC 205 if ((error = VOP_GETATTR(vp, &va, cred, l)) != 0) 206 goto bad; 207 #endif 208 209 if ((fmode & O_CREAT) == 0) { 210 #ifdef VERIFIED_EXEC 211 if ((error = veriexec_verify(l, vp, &va, pathbuf, 212 VERIEXEC_FILE, &vhe)) != 0) 213 goto bad; 214 #endif 215 216 if (fmode & FREAD) { 217 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0) 218 goto bad; 219 } 220 221 if (fmode & (FWRITE | O_TRUNC)) { 222 if (vp->v_type == VDIR) { 223 error = EISDIR; 224 goto bad; 225 } 226 if ((error = vn_writechk(vp)) != 0 || 227 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0) 228 goto bad; 229 #ifdef VERIFIED_EXEC 230 if (vhe != NULL) { 231 veriexec_report("Write access request.", 232 pathbuf, &va, l, 233 REPORT_NOVERBOSE, 234 REPORT_ALARM, 235 REPORT_NOPANIC); 236 237 /* IPS mode: Deny writing to monitored files. */ 238 if (veriexec_strict >= 2) { 239 error = EPERM; 240 goto bad; 241 } else { 242 vhe->status = FINGERPRINT_NOTEVAL; 243 } 244 } 245 #endif 246 } 247 } 248 249 if (fmode & O_TRUNC) { 250 VOP_UNLOCK(vp, 0); /* XXX */ 251 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 252 vrele(vp); 253 return (error); 254 } 255 VOP_LEASE(vp, l, cred, LEASE_WRITE); 256 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 257 VATTR_NULL(&va); 258 va.va_size = 0; 259 error = VOP_SETATTR(vp, &va, cred, l); 260 vn_finished_write(mp, 0); 261 if (error != 0) 262 goto bad; 263 } 264 if ((error = VOP_OPEN(vp, fmode, cred, l)) != 0) 265 goto bad; 266 if (vp->v_type == VREG && 267 uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) { 268 error = EIO; 269 goto bad; 270 } 271 if (fmode & FWRITE) 272 vp->v_writecount++; 273 274 return (0); 275 bad: 276 vput(vp); 277 return (error); 278 } 279 280 /* 281 * Check for write permissions on the specified vnode. 282 * Prototype text segments cannot be written. 283 */ 284 int 285 vn_writechk(struct vnode *vp) 286 { 287 288 /* 289 * If the vnode is in use as a process's text, 290 * we can't allow writing. 291 */ 292 if (vp->v_flag & VTEXT) 293 return (ETXTBSY); 294 return (0); 295 } 296 297 /* 298 * Mark a vnode as having executable mappings. 299 */ 300 void 301 vn_markexec(struct vnode *vp) 302 { 303 if ((vp->v_flag & VEXECMAP) == 0) { 304 uvmexp.filepages -= vp->v_uobj.uo_npages; 305 uvmexp.execpages += vp->v_uobj.uo_npages; 306 } 307 vp->v_flag |= VEXECMAP; 308 } 309 310 /* 311 * Mark a vnode as being the text of a process. 312 * Fail if the vnode is currently writable. 313 */ 314 int 315 vn_marktext(struct vnode *vp) 316 { 317 318 if (vp->v_writecount != 0) { 319 KASSERT((vp->v_flag & VTEXT) == 0); 320 return (ETXTBSY); 321 } 322 vp->v_flag |= VTEXT; 323 vn_markexec(vp); 324 return (0); 325 } 326 327 /* 328 * Vnode close call 329 * 330 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node. 331 */ 332 int 333 vn_close(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) 334 { 335 int error; 336 337 if (flags & FWRITE) 338 vp->v_writecount--; 339 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 340 error = VOP_CLOSE(vp, flags, cred, l); 341 vput(vp); 342 return (error); 343 } 344 345 /* 346 * Package up an I/O request on a vnode into a uio and do it. 347 */ 348 int 349 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, 350 enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid, 351 struct lwp *l) 352 { 353 struct uio auio; 354 struct iovec aiov; 355 struct mount *mp = NULL; 356 int error; 357 358 if ((ioflg & IO_NODELOCKED) == 0) { 359 if (rw == UIO_READ) { 360 vn_lock(vp, LK_SHARED | LK_RETRY); 361 } else /* UIO_WRITE */ { 362 if (vp->v_type != VCHR && 363 (error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) 364 != 0) 365 return (error); 366 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 367 } 368 } 369 auio.uio_iov = &aiov; 370 auio.uio_iovcnt = 1; 371 aiov.iov_base = base; 372 aiov.iov_len = len; 373 auio.uio_resid = len; 374 auio.uio_offset = offset; 375 auio.uio_rw = rw; 376 if (segflg == UIO_SYSSPACE) { 377 UIO_SETUP_SYSSPACE(&auio); 378 } else { 379 auio.uio_vmspace = l->l_proc->p_vmspace; 380 } 381 if (rw == UIO_READ) { 382 error = VOP_READ(vp, &auio, ioflg, cred); 383 } else { 384 error = VOP_WRITE(vp, &auio, ioflg, cred); 385 } 386 if (aresid) 387 *aresid = auio.uio_resid; 388 else 389 if (auio.uio_resid && error == 0) 390 error = EIO; 391 if ((ioflg & IO_NODELOCKED) == 0) { 392 if (rw == UIO_WRITE) 393 vn_finished_write(mp, 0); 394 VOP_UNLOCK(vp, 0); 395 } 396 return (error); 397 } 398 399 int 400 vn_readdir(struct file *fp, char *bf, int segflg, u_int count, int *done, 401 struct lwp *l, off_t **cookies, int *ncookies) 402 { 403 struct vnode *vp = (struct vnode *)fp->f_data; 404 struct iovec aiov; 405 struct uio auio; 406 int error, eofflag; 407 408 /* Limit the size on any kernel buffers used by VOP_READDIR */ 409 count = min(MAXBSIZE, count); 410 411 unionread: 412 if (vp->v_type != VDIR) 413 return (EINVAL); 414 aiov.iov_base = bf; 415 aiov.iov_len = count; 416 auio.uio_iov = &aiov; 417 auio.uio_iovcnt = 1; 418 auio.uio_rw = UIO_READ; 419 if (segflg == UIO_SYSSPACE) { 420 UIO_SETUP_SYSSPACE(&auio); 421 } else { 422 KASSERT(l == curlwp); 423 auio.uio_vmspace = l->l_proc->p_vmspace; 424 } 425 auio.uio_resid = count; 426 vn_lock(vp, LK_SHARED | LK_RETRY); 427 auio.uio_offset = fp->f_offset; 428 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies, 429 ncookies); 430 fp->f_offset = auio.uio_offset; 431 VOP_UNLOCK(vp, 0); 432 if (error) 433 return (error); 434 435 #if defined(UNION) || defined(LKM) 436 if (count == auio.uio_resid && vn_union_readdir_hook) { 437 struct vnode *ovp = vp; 438 439 error = (*vn_union_readdir_hook)(&vp, fp, l); 440 if (error) 441 return (error); 442 if (vp != ovp) 443 goto unionread; 444 } 445 #endif /* UNION || LKM */ 446 447 if (count == auio.uio_resid && (vp->v_flag & VROOT) && 448 (vp->v_mount->mnt_flag & MNT_UNION)) { 449 struct vnode *tvp = vp; 450 vp = vp->v_mount->mnt_vnodecovered; 451 VREF(vp); 452 fp->f_data = vp; 453 fp->f_offset = 0; 454 vrele(tvp); 455 goto unionread; 456 } 457 *done = count - auio.uio_resid; 458 return error; 459 } 460 461 /* 462 * File table vnode read routine. 463 */ 464 static int 465 vn_read(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 466 int flags) 467 { 468 struct vnode *vp = (struct vnode *)fp->f_data; 469 int count, error, ioflag; 470 struct lwp *l = curlwp; 471 472 VOP_LEASE(vp, l, cred, LEASE_READ); 473 ioflag = IO_ADV_ENCODE(fp->f_advice); 474 if (fp->f_flag & FNONBLOCK) 475 ioflag |= IO_NDELAY; 476 if ((fp->f_flag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC)) 477 ioflag |= IO_SYNC; 478 if (fp->f_flag & FALTIO) 479 ioflag |= IO_ALTSEMANTICS; 480 vn_lock(vp, LK_SHARED | LK_RETRY); 481 uio->uio_offset = *offset; 482 count = uio->uio_resid; 483 error = VOP_READ(vp, uio, ioflag, cred); 484 if (flags & FOF_UPDATE_OFFSET) 485 *offset += count - uio->uio_resid; 486 VOP_UNLOCK(vp, 0); 487 return (error); 488 } 489 490 /* 491 * File table vnode write routine. 492 */ 493 static int 494 vn_write(struct file *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, 495 int flags) 496 { 497 struct vnode *vp = (struct vnode *)fp->f_data; 498 struct mount *mp; 499 int count, error, ioflag = IO_UNIT; 500 struct lwp *l = curlwp; 501 502 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 503 ioflag |= IO_APPEND; 504 if (fp->f_flag & FNONBLOCK) 505 ioflag |= IO_NDELAY; 506 if (fp->f_flag & FFSYNC || 507 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 508 ioflag |= IO_SYNC; 509 else if (fp->f_flag & FDSYNC) 510 ioflag |= IO_DSYNC; 511 if (fp->f_flag & FALTIO) 512 ioflag |= IO_ALTSEMANTICS; 513 mp = NULL; 514 if (vp->v_type != VCHR && 515 (error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 516 return (error); 517 VOP_LEASE(vp, l, cred, LEASE_WRITE); 518 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 519 uio->uio_offset = *offset; 520 count = uio->uio_resid; 521 error = VOP_WRITE(vp, uio, ioflag, cred); 522 if (flags & FOF_UPDATE_OFFSET) { 523 if (ioflag & IO_APPEND) 524 *offset = uio->uio_offset; 525 else 526 *offset += count - uio->uio_resid; 527 } 528 VOP_UNLOCK(vp, 0); 529 vn_finished_write(mp, 0); 530 return (error); 531 } 532 533 /* 534 * File table vnode stat routine. 535 */ 536 static int 537 vn_statfile(struct file *fp, struct stat *sb, struct lwp *l) 538 { 539 struct vnode *vp = (struct vnode *)fp->f_data; 540 541 return vn_stat(vp, sb, l); 542 } 543 544 int 545 vn_stat(struct vnode *vp, struct stat *sb, struct lwp *l) 546 { 547 struct vattr va; 548 int error; 549 mode_t mode; 550 551 error = VOP_GETATTR(vp, &va, l->l_proc->p_cred, l); 552 if (error) 553 return (error); 554 /* 555 * Copy from vattr table 556 */ 557 sb->st_dev = va.va_fsid; 558 sb->st_ino = va.va_fileid; 559 mode = va.va_mode; 560 switch (vp->v_type) { 561 case VREG: 562 mode |= S_IFREG; 563 break; 564 case VDIR: 565 mode |= S_IFDIR; 566 break; 567 case VBLK: 568 mode |= S_IFBLK; 569 break; 570 case VCHR: 571 mode |= S_IFCHR; 572 break; 573 case VLNK: 574 mode |= S_IFLNK; 575 break; 576 case VSOCK: 577 mode |= S_IFSOCK; 578 break; 579 case VFIFO: 580 mode |= S_IFIFO; 581 break; 582 default: 583 return (EBADF); 584 }; 585 sb->st_mode = mode; 586 sb->st_nlink = va.va_nlink; 587 sb->st_uid = va.va_uid; 588 sb->st_gid = va.va_gid; 589 sb->st_rdev = va.va_rdev; 590 sb->st_size = va.va_size; 591 sb->st_atimespec = va.va_atime; 592 sb->st_mtimespec = va.va_mtime; 593 sb->st_ctimespec = va.va_ctime; 594 sb->st_birthtimespec = va.va_birthtime; 595 sb->st_blksize = va.va_blocksize; 596 sb->st_flags = va.va_flags; 597 sb->st_gen = 0; 598 sb->st_blocks = va.va_bytes / S_BLKSIZE; 599 return (0); 600 } 601 602 /* 603 * File table vnode fcntl routine. 604 */ 605 static int 606 vn_fcntl(struct file *fp, u_int com, void *data, struct lwp *l) 607 { 608 struct vnode *vp = ((struct vnode *)fp->f_data); 609 int error; 610 611 error = VOP_FCNTL(vp, com, data, fp->f_flag, l->l_proc->p_cred, l); 612 return (error); 613 } 614 615 /* 616 * File table vnode ioctl routine. 617 */ 618 static int 619 vn_ioctl(struct file *fp, u_long com, void *data, struct lwp *l) 620 { 621 struct vnode *vp = ((struct vnode *)fp->f_data); 622 struct proc *p = l->l_proc; 623 struct vattr vattr; 624 int error; 625 626 switch (vp->v_type) { 627 628 case VREG: 629 case VDIR: 630 if (com == FIONREAD) { 631 error = VOP_GETATTR(vp, &vattr, l->l_proc->p_cred, l); 632 if (error) 633 return (error); 634 *(int *)data = vattr.va_size - fp->f_offset; 635 return (0); 636 } 637 if ((com == FIONWRITE) || (com == FIONSPACE)) { 638 /* 639 * Files don't have send queues, so there never 640 * are any bytes in them, nor is there any 641 * open space in them. 642 */ 643 *(int *)data = 0; 644 return (0); 645 } 646 if (com == FIOGETBMAP) { 647 daddr_t *block; 648 649 if (*(daddr_t *)data < 0) 650 return (EINVAL); 651 block = (daddr_t *)data; 652 return (VOP_BMAP(vp, *block, NULL, block, NULL)); 653 } 654 if (com == OFIOGETBMAP) { 655 daddr_t ibn, obn; 656 657 if (*(int32_t *)data < 0) 658 return (EINVAL); 659 ibn = (daddr_t)*(int32_t *)data; 660 error = VOP_BMAP(vp, ibn, NULL, &obn, NULL); 661 *(int32_t *)data = (int32_t)obn; 662 return error; 663 } 664 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 665 return (0); /* XXX */ 666 /* fall into ... */ 667 case VFIFO: 668 case VCHR: 669 case VBLK: 670 error = VOP_IOCTL(vp, com, data, fp->f_flag, 671 l->l_proc->p_cred, l); 672 if (error == 0 && com == TIOCSCTTY) { 673 if (p->p_session->s_ttyvp) 674 vrele(p->p_session->s_ttyvp); 675 p->p_session->s_ttyvp = vp; 676 VREF(vp); 677 } 678 return (error); 679 680 default: 681 return (EPASSTHROUGH); 682 } 683 } 684 685 /* 686 * File table vnode poll routine. 687 */ 688 static int 689 vn_poll(struct file *fp, int events, struct lwp *l) 690 { 691 692 return (VOP_POLL(((struct vnode *)fp->f_data), events, l)); 693 } 694 695 /* 696 * File table vnode kqfilter routine. 697 */ 698 int 699 vn_kqfilter(struct file *fp, struct knote *kn) 700 { 701 702 return (VOP_KQFILTER((struct vnode *)fp->f_data, kn)); 703 } 704 705 /* 706 * Check that the vnode is still valid, and if so 707 * acquire requested lock. 708 */ 709 int 710 vn_lock(struct vnode *vp, int flags) 711 { 712 int error; 713 714 #if 0 715 KASSERT(vp->v_usecount > 0 || (flags & LK_INTERLOCK) != 0 716 || (vp->v_flag & VONWORKLST) != 0); 717 #endif 718 KASSERT((flags & 719 ~(LK_INTERLOCK|LK_SHARED|LK_EXCLUSIVE|LK_DRAIN|LK_NOWAIT|LK_RETRY| 720 LK_SETRECURSE|LK_CANRECURSE)) 721 == 0); 722 723 do { 724 if ((flags & LK_INTERLOCK) == 0) 725 simple_lock(&vp->v_interlock); 726 if (vp->v_flag & VXLOCK) { 727 if (flags & LK_NOWAIT) { 728 simple_unlock(&vp->v_interlock); 729 return EBUSY; 730 } 731 vp->v_flag |= VXWANT; 732 ltsleep(vp, PINOD | PNORELOCK, 733 "vn_lock", 0, &vp->v_interlock); 734 error = ENOENT; 735 } else { 736 error = VOP_LOCK(vp, 737 (flags & ~LK_RETRY) | LK_INTERLOCK); 738 if (error == 0 || error == EDEADLK || error == EBUSY) 739 return (error); 740 } 741 flags &= ~LK_INTERLOCK; 742 } while (flags & LK_RETRY); 743 return (error); 744 } 745 746 /* 747 * File table vnode close routine. 748 */ 749 static int 750 vn_closefile(struct file *fp, struct lwp *l) 751 { 752 753 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 754 fp->f_cred, l)); 755 } 756 757 /* 758 * Enable LK_CANRECURSE on lock. Return prior status. 759 */ 760 u_int 761 vn_setrecurse(struct vnode *vp) 762 { 763 struct lock *lkp = &vp->v_lock; 764 u_int retval = lkp->lk_flags & LK_CANRECURSE; 765 766 lkp->lk_flags |= LK_CANRECURSE; 767 return retval; 768 } 769 770 /* 771 * Called when done with locksetrecurse. 772 */ 773 void 774 vn_restorerecurse(struct vnode *vp, u_int flags) 775 { 776 struct lock *lkp = &vp->v_lock; 777 778 lkp->lk_flags &= ~LK_CANRECURSE; 779 lkp->lk_flags |= flags; 780 } 781 782 int 783 vn_cow_establish(struct vnode *vp, 784 int (*func)(void *, struct buf *), void *cookie) 785 { 786 int s; 787 struct spec_cow_entry *e; 788 789 MALLOC(e, struct spec_cow_entry *, sizeof(struct spec_cow_entry), 790 M_DEVBUF, M_WAITOK); 791 e->ce_func = func; 792 e->ce_cookie = cookie; 793 794 SPEC_COW_LOCK(vp->v_specinfo, s); 795 vp->v_spec_cow_req++; 796 while (vp->v_spec_cow_count > 0) 797 ltsleep(&vp->v_spec_cow_req, PRIBIO, "cowlist", 0, 798 &vp->v_spec_cow_slock); 799 800 SLIST_INSERT_HEAD(&vp->v_spec_cow_head, e, ce_list); 801 802 vp->v_spec_cow_req--; 803 if (vp->v_spec_cow_req == 0) 804 wakeup(&vp->v_spec_cow_req); 805 SPEC_COW_UNLOCK(vp->v_specinfo, s); 806 807 return 0; 808 } 809 810 int 811 vn_cow_disestablish(struct vnode *vp, 812 int (*func)(void *, struct buf *), void *cookie) 813 { 814 int s; 815 struct spec_cow_entry *e; 816 817 SPEC_COW_LOCK(vp->v_specinfo, s); 818 vp->v_spec_cow_req++; 819 while (vp->v_spec_cow_count > 0) 820 ltsleep(&vp->v_spec_cow_req, PRIBIO, "cowlist", 0, 821 &vp->v_spec_cow_slock); 822 823 SLIST_FOREACH(e, &vp->v_spec_cow_head, ce_list) 824 if (e->ce_func == func && e->ce_cookie == cookie) { 825 SLIST_REMOVE(&vp->v_spec_cow_head, e, 826 spec_cow_entry, ce_list); 827 FREE(e, M_DEVBUF); 828 break; 829 } 830 831 vp->v_spec_cow_req--; 832 if (vp->v_spec_cow_req == 0) 833 wakeup(&vp->v_spec_cow_req); 834 SPEC_COW_UNLOCK(vp->v_specinfo, s); 835 836 return e ? 0 : EINVAL; 837 } 838 839 /* 840 * Simplified in-kernel wrapper calls for extended attribute access. 841 * Both calls pass in a NULL credential, authorizing a "kernel" access. 842 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 843 */ 844 int 845 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, 846 const char *attrname, size_t *buflen, void *bf, struct lwp *l) 847 { 848 struct uio auio; 849 struct iovec aiov; 850 int error; 851 852 aiov.iov_len = *buflen; 853 aiov.iov_base = bf; 854 855 auio.uio_iov = &aiov; 856 auio.uio_iovcnt = 1; 857 auio.uio_rw = UIO_READ; 858 auio.uio_offset = 0; 859 auio.uio_resid = *buflen; 860 UIO_SETUP_SYSSPACE(&auio); 861 862 if ((ioflg & IO_NODELOCKED) == 0) 863 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 864 865 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL, 866 l); 867 868 if ((ioflg & IO_NODELOCKED) == 0) 869 VOP_UNLOCK(vp, 0); 870 871 if (error == 0) 872 *buflen = *buflen - auio.uio_resid; 873 874 return (error); 875 } 876 877 /* 878 * XXX Failure mode if partially written? 879 */ 880 int 881 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, 882 const char *attrname, size_t buflen, const void *bf, struct lwp *l) 883 { 884 struct uio auio; 885 struct iovec aiov; 886 struct mount *mp = NULL; /* XXX: GCC */ 887 int error; 888 889 aiov.iov_len = buflen; 890 aiov.iov_base = __UNCONST(bf); /* XXXUNCONST kills const */ 891 892 auio.uio_iov = &aiov; 893 auio.uio_iovcnt = 1; 894 auio.uio_rw = UIO_WRITE; 895 auio.uio_offset = 0; 896 auio.uio_resid = buflen; 897 UIO_SETUP_SYSSPACE(&auio); 898 899 if ((ioflg & IO_NODELOCKED) == 0) { 900 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 901 return (error); 902 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 903 } 904 905 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, l); 906 907 if ((ioflg & IO_NODELOCKED) == 0) { 908 vn_finished_write(mp, 0); 909 VOP_UNLOCK(vp, 0); 910 } 911 912 return (error); 913 } 914 915 int 916 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, 917 const char *attrname, struct lwp *l) 918 { 919 struct mount *mp = NULL; /* XXX: GCC */ 920 int error; 921 922 if ((ioflg & IO_NODELOCKED) == 0) { 923 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 924 return (error); 925 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 926 } 927 928 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, l); 929 if (error == EOPNOTSUPP) 930 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, 931 NULL, l); 932 933 if ((ioflg & IO_NODELOCKED) == 0) { 934 vn_finished_write(mp, 0); 935 VOP_UNLOCK(vp, 0); 936 } 937 938 return (error); 939 } 940 941 /* 942 * Preparing to start a filesystem write operation. If the operation is 943 * permitted, then we bump the count of operations in progress and 944 * proceed. If a suspend request is in progress, we wait until the 945 * suspension is over, and then proceed. 946 * V_PCATCH adds PCATCH to the tsleep flags. 947 * V_WAIT waits until suspension is over. Otherwise returns EWOULDBLOCK. 948 * V_SLEEPONLY wait, but do not bump the operations count. 949 * V_LOWER this is a lower level operation. No further vnodes should be 950 * locked. Otherwise it is a upper level operation. No vnodes 951 * should be locked. 952 */ 953 int 954 vn_start_write(struct vnode *vp, struct mount **mpp, int flags) 955 { 956 struct mount *mp; 957 int error, mask, prio; 958 959 /* 960 * If a vnode is provided, get and return the mount point that 961 * to which it will write. 962 */ 963 if (vp != NULL) { 964 *mpp = vp->v_mount; 965 } 966 if ((mp = *mpp) == NULL) 967 return (0); 968 mp = mp->mnt_leaf; 969 /* 970 * Check on status of suspension. 971 */ 972 prio = PUSER - 1; 973 if (flags & V_PCATCH) 974 prio |= PCATCH; 975 976 if ((flags & V_LOWER) == 0) 977 mask = IMNT_SUSPEND; 978 else 979 mask = IMNT_SUSPENDLOW; 980 981 while ((mp->mnt_iflag & mask) != 0) { 982 if ((flags & V_WAIT) == 0) 983 return (EWOULDBLOCK); 984 error = tsleep(&mp->mnt_flag, prio, "suspfs", 0); 985 if (error) 986 return (error); 987 } 988 if (flags & V_SLEEPONLY) 989 return (0); 990 simple_lock(&mp->mnt_slock); 991 if ((flags & V_LOWER) == 0) 992 mp->mnt_writeopcountupper++; 993 else 994 mp->mnt_writeopcountlower++; 995 simple_unlock(&mp->mnt_slock); 996 return (0); 997 } 998 999 /* 1000 * Filesystem write operation has completed. If we are suspending and this 1001 * operation is the last one, notify the suspender that the suspension is 1002 * now in effect. 1003 */ 1004 void 1005 vn_finished_write(struct mount *mp, int flags) 1006 { 1007 if (mp == NULL) 1008 return; 1009 mp = mp->mnt_leaf; 1010 simple_lock(&mp->mnt_slock); 1011 if ((flags & V_LOWER) == 0) { 1012 mp->mnt_writeopcountupper--; 1013 if (mp->mnt_writeopcountupper < 0) 1014 printf("vn_finished_write: neg cnt upper=%d\n", 1015 mp->mnt_writeopcountupper); 1016 if ((mp->mnt_iflag & IMNT_SUSPEND) != 0 && 1017 mp->mnt_writeopcountupper <= 0) 1018 wakeup(&mp->mnt_writeopcountupper); 1019 } else { 1020 mp->mnt_writeopcountlower--; 1021 if (mp->mnt_writeopcountlower < 0) 1022 printf("vn_finished_write: neg cnt lower=%d\n", 1023 mp->mnt_writeopcountlower); 1024 if ((mp->mnt_iflag & IMNT_SUSPENDLOW) != 0 && 1025 mp->mnt_writeopcountupper <= 0) 1026 wakeup(&mp->mnt_writeopcountlower); 1027 } 1028 simple_unlock(&mp->mnt_slock); 1029 } 1030 1031 void 1032 vn_ra_allocctx(struct vnode *vp) 1033 { 1034 struct uvm_ractx *ra = NULL; 1035 1036 if (vp->v_type != VREG) { 1037 return; 1038 } 1039 if (vp->v_ractx != NULL) { 1040 return; 1041 } 1042 simple_lock(&vp->v_interlock); 1043 if (vp->v_ractx == NULL) { 1044 simple_unlock(&vp->v_interlock); 1045 ra = uvm_ra_allocctx(); 1046 simple_lock(&vp->v_interlock); 1047 if (ra != NULL && vp->v_ractx == NULL) { 1048 vp->v_ractx = ra; 1049 ra = NULL; 1050 } 1051 } 1052 simple_unlock(&vp->v_interlock); 1053 if (ra != NULL) { 1054 uvm_ra_freectx(ra); 1055 } 1056 } 1057