1 /* $NetBSD: vfs_syscalls.c,v 1.334 2007/12/08 19:29:50 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.334 2007/12/08 19:29:50 pooka Exp $"); 41 42 #include "opt_compat_netbsd.h" 43 #include "opt_compat_43.h" 44 #include "opt_fileassoc.h" 45 #include "fss.h" 46 #include "veriexec.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/namei.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/file.h> 54 #include <sys/stat.h> 55 #include <sys/vnode.h> 56 #include <sys/mount.h> 57 #include <sys/proc.h> 58 #include <sys/uio.h> 59 #include <sys/malloc.h> 60 #include <sys/kmem.h> 61 #include <sys/dirent.h> 62 #include <sys/sysctl.h> 63 #include <sys/syscallargs.h> 64 #include <sys/vfs_syscalls.h> 65 #include <sys/ktrace.h> 66 #ifdef FILEASSOC 67 #include <sys/fileassoc.h> 68 #endif /* FILEASSOC */ 69 #include <sys/verified_exec.h> 70 #include <sys/kauth.h> 71 72 #include <miscfs/genfs/genfs.h> 73 #include <miscfs/syncfs/syncfs.h> 74 75 #ifdef COMPAT_30 76 #include "opt_nfsserver.h" 77 #include <nfs/rpcv2.h> 78 #endif 79 #include <nfs/nfsproto.h> 80 #ifdef COMPAT_30 81 #include <nfs/nfs.h> 82 #include <nfs/nfs_var.h> 83 #endif 84 85 #if NFSS > 0 86 #include <dev/fssvar.h> 87 #endif 88 89 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 90 91 static int change_dir(struct nameidata *, struct lwp *); 92 static int change_flags(struct vnode *, u_long, struct lwp *); 93 static int change_mode(struct vnode *, int, struct lwp *l); 94 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 95 static int rename_files(const char *, const char *, struct lwp *, int); 96 97 void checkdirs(struct vnode *); 98 99 int dovfsusermount = 0; 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 */ 108 109 #if defined(COMPAT_09) || defined(COMPAT_43) 110 /* 111 * This table is used to maintain compatibility with 4.3BSD 112 * and NetBSD 0.9 mount syscalls. Note, the order is important! 113 * 114 * Do not modify this table. It should only contain filesystems 115 * supported by NetBSD 0.9 and 4.3BSD. 116 */ 117 const char * const mountcompatnames[] = { 118 NULL, /* 0 = MOUNT_NONE */ 119 MOUNT_FFS, /* 1 = MOUNT_UFS */ 120 MOUNT_NFS, /* 2 */ 121 MOUNT_MFS, /* 3 */ 122 MOUNT_MSDOS, /* 4 */ 123 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 124 MOUNT_FDESC, /* 6 */ 125 MOUNT_KERNFS, /* 7 */ 126 NULL, /* 8 = MOUNT_DEVFS */ 127 MOUNT_AFS, /* 9 */ 128 }; 129 const int nmountcompatnames = sizeof(mountcompatnames) / 130 sizeof(mountcompatnames[0]); 131 #endif /* COMPAT_09 || COMPAT_43 */ 132 133 static int 134 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 135 void *data, size_t *data_len) 136 { 137 struct mount *mp; 138 int error = 0, saved_flags; 139 140 mp = vp->v_mount; 141 saved_flags = mp->mnt_flag; 142 143 /* We can operate only on VV_ROOT nodes. */ 144 if ((vp->v_vflag & VV_ROOT) == 0) { 145 error = EINVAL; 146 goto out; 147 } 148 149 /* 150 * We only allow the filesystem to be reloaded if it 151 * is currently mounted read-only. 152 */ 153 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 154 error = EOPNOTSUPP; /* Needs translation */ 155 goto out; 156 } 157 158 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 159 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 160 if (error) 161 goto out; 162 163 if (vfs_busy(mp, LK_NOWAIT, 0)) { 164 error = EPERM; 165 goto out; 166 } 167 168 mp->mnt_flag &= ~MNT_OP_FLAGS; 169 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 170 171 /* 172 * Set the mount level flags. 173 */ 174 if (flags & MNT_RDONLY) 175 mp->mnt_flag |= MNT_RDONLY; 176 else if (mp->mnt_flag & MNT_RDONLY) 177 mp->mnt_iflag |= IMNT_WANTRDWR; 178 mp->mnt_flag &= 179 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 180 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 181 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 182 mp->mnt_flag |= flags & 183 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 184 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 185 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 186 MNT_IGNORE); 187 188 error = VFS_MOUNT(mp, path, data, data_len); 189 190 #if defined(COMPAT_30) && defined(NFSSERVER) 191 if (error && data != NULL) { 192 int error2; 193 194 /* Update failed; let's try and see if it was an 195 * export request. */ 196 error2 = nfs_update_exports_30(mp, path, data, l); 197 198 /* Only update error code if the export request was 199 * understood but some problem occurred while 200 * processing it. */ 201 if (error2 != EJUSTRETURN) 202 error = error2; 203 } 204 #endif 205 if (mp->mnt_iflag & IMNT_WANTRDWR) 206 mp->mnt_flag &= ~MNT_RDONLY; 207 if (error) 208 mp->mnt_flag = saved_flags; 209 mp->mnt_flag &= ~MNT_OP_FLAGS; 210 mp->mnt_iflag &= ~IMNT_WANTRDWR; 211 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 212 if (mp->mnt_syncer == NULL) 213 error = vfs_allocate_syncvnode(mp); 214 } else { 215 if (mp->mnt_syncer != NULL) 216 vfs_deallocate_syncvnode(mp); 217 } 218 vfs_unbusy(mp); 219 220 out: 221 return (error); 222 } 223 224 static int 225 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 226 { 227 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 228 int error; 229 230 /* Copy file-system type from userspace. */ 231 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 232 if (error) { 233 #if defined(COMPAT_09) || defined(COMPAT_43) 234 /* 235 * Historically, filesystem types were identified by numbers. 236 * If we get an integer for the filesystem type instead of a 237 * string, we check to see if it matches one of the historic 238 * filesystem types. 239 */ 240 u_long fsindex = (u_long)fstype; 241 if (fsindex >= nmountcompatnames || 242 mountcompatnames[fsindex] == NULL) 243 return ENODEV; 244 strlcpy(fstypename, mountcompatnames[fsindex], 245 sizeof(fstypename)); 246 #else 247 return error; 248 #endif 249 } 250 251 #ifdef COMPAT_10 252 /* Accept `ufs' as an alias for `ffs'. */ 253 if (strcmp(fstypename, "ufs") == 0) 254 fstypename[0] = 'f'; 255 #endif 256 257 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL) 258 return ENODEV; 259 return 0; 260 } 261 262 static int 263 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 264 const char *path, int flags, void *data, size_t *data_len) 265 { 266 struct mount *mp = NULL; 267 struct vnode *vp = *vpp; 268 struct vattr va; 269 int error; 270 271 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 272 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 273 if (error) 274 return error; 275 276 /* Can't make a non-dir a mount-point (from here anyway). */ 277 if (vp->v_type != VDIR) 278 return ENOTDIR; 279 280 /* 281 * If the user is not root, ensure that they own the directory 282 * onto which we are attempting to mount. 283 */ 284 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 285 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 286 (error = kauth_authorize_generic(l->l_cred, 287 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 288 return error; 289 } 290 291 if (flags & MNT_EXPORTED) 292 return EINVAL; 293 294 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 295 return error; 296 297 /* 298 * Check if a file-system is not already mounted on this vnode. 299 */ 300 if (vp->v_mountedhere != NULL) 301 return EBUSY; 302 303 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO); 304 305 mp->mnt_op = vfsops; 306 307 TAILQ_INIT(&mp->mnt_vnodelist); 308 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 309 simple_lock_init(&mp->mnt_slock); 310 (void)vfs_busy(mp, LK_NOWAIT, 0); 311 312 mp->mnt_vnodecovered = vp; 313 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 314 mp->mnt_unmounter = NULL; 315 mount_initspecific(mp); 316 317 /* 318 * The underlying file system may refuse the mount for 319 * various reasons. Allow the user to force it to happen. 320 * 321 * Set the mount level flags. 322 */ 323 mp->mnt_flag = flags & 324 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 325 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 326 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 327 MNT_IGNORE | MNT_RDONLY); 328 329 error = VFS_MOUNT(mp, path, data, data_len); 330 mp->mnt_flag &= ~MNT_OP_FLAGS; 331 332 /* 333 * Put the new filesystem on the mount list after root. 334 */ 335 cache_purge(vp); 336 if (error != 0) { 337 vp->v_mountedhere = NULL; 338 mp->mnt_op->vfs_refcount--; 339 vfs_unbusy(mp); 340 vfs_destroy(mp); 341 return error; 342 } 343 344 mp->mnt_iflag &= ~IMNT_WANTRDWR; 345 vp->v_mountedhere = mp; 346 mutex_enter(&mountlist_lock); 347 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 348 mutex_exit(&mountlist_lock); 349 VOP_UNLOCK(vp, 0); 350 checkdirs(vp); 351 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 352 error = vfs_allocate_syncvnode(mp); 353 vfs_unbusy(mp); 354 (void) VFS_STATVFS(mp, &mp->mnt_stat); 355 error = VFS_START(mp, 0); 356 if (error) 357 vrele(vp); 358 *vpp = NULL; 359 return error; 360 } 361 362 static int 363 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 364 void *data, size_t *data_len) 365 { 366 struct mount *mp; 367 int error; 368 369 /* If MNT_GETARGS is specified, it should be the only flag. */ 370 if (flags & ~MNT_GETARGS) 371 return EINVAL; 372 373 mp = vp->v_mount; 374 375 /* XXX: probably some notion of "can see" here if we want isolation. */ 376 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 377 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 378 if (error) 379 return error; 380 381 if ((vp->v_vflag & VV_ROOT) == 0) 382 return EINVAL; 383 384 if (vfs_busy(mp, LK_NOWAIT, 0)) 385 return EPERM; 386 387 mp->mnt_flag &= ~MNT_OP_FLAGS; 388 mp->mnt_flag |= MNT_GETARGS; 389 error = VFS_MOUNT(mp, path, data, data_len); 390 mp->mnt_flag &= ~MNT_OP_FLAGS; 391 392 vfs_unbusy(mp); 393 return (error); 394 } 395 396 #ifdef COMPAT_40 397 /* ARGSUSED */ 398 int 399 compat_40_sys_mount(struct lwp *l, void *v, register_t *retval) 400 { 401 struct compat_40_sys_mount_args /* { 402 syscallarg(const char *) type; 403 syscallarg(const char *) path; 404 syscallarg(int) flags; 405 syscallarg(void *) data; 406 } */ *uap = v; 407 register_t dummy; 408 409 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 410 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 411 } 412 #endif 413 414 int 415 sys___mount50(struct lwp *l, void *v, register_t *retval) 416 { 417 struct sys___mount50_args /* { 418 syscallarg(const char *) type; 419 syscallarg(const char *) path; 420 syscallarg(int) flags; 421 syscallarg(void *) data; 422 syscallarg(size_t) data_len; 423 } */ *uap = v; 424 425 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 426 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 427 SCARG(uap, data_len), retval); 428 } 429 430 int 431 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 432 const char *path, int flags, void *data, enum uio_seg data_seg, 433 size_t data_len, register_t *retval) 434 { 435 struct vnode *vp; 436 struct nameidata nd; 437 void *data_buf = data; 438 int error; 439 440 /* 441 * Get vnode to be covered 442 */ 443 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 444 if ((error = namei(&nd)) != 0) 445 return (error); 446 vp = nd.ni_vp; 447 448 /* 449 * A lookup in VFS_MOUNT might result in an attempt to 450 * lock this vnode again, so make the lock recursive. 451 */ 452 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE); 453 454 if (vfsops == NULL) { 455 if (flags & (MNT_GETARGS | MNT_UPDATE)) 456 vfsops = vp->v_mount->mnt_op; 457 else { 458 /* 'type' is userspace */ 459 error = mount_get_vfsops(type, &vfsops); 460 if (error != 0) 461 goto done; 462 } 463 } 464 465 if (data != NULL && data_seg == UIO_USERSPACE) { 466 if (data_len == 0) { 467 /* No length supplied, use default for filesystem */ 468 data_len = vfsops->vfs_min_mount_data; 469 if (data_len > VFS_MAX_MOUNT_DATA) { 470 /* maybe a force loaded old LKM */ 471 error = EINVAL; 472 goto done; 473 } 474 #ifdef COMPAT_30 475 /* Hopefully a longer buffer won't make copyin() fail */ 476 if (flags & MNT_UPDATE 477 && data_len < sizeof (struct mnt_export_args30)) 478 data_len = sizeof (struct mnt_export_args30); 479 #endif 480 } 481 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 482 483 /* NFS needs the buffer even for mnt_getargs .... */ 484 error = copyin(data, data_buf, data_len); 485 if (error != 0) 486 goto done; 487 } 488 489 if (flags & MNT_GETARGS) { 490 if (data_len == 0) { 491 error = EINVAL; 492 goto done; 493 } 494 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 495 if (error != 0) 496 goto done; 497 if (data_seg == UIO_USERSPACE) 498 error = copyout(data_buf, data, data_len); 499 *retval = data_len; 500 } else if (flags & MNT_UPDATE) { 501 error = mount_update(l, vp, path, flags, data_buf, &data_len); 502 } else { 503 /* Locking is handled internally in mount_domount(). */ 504 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 505 &data_len); 506 } 507 508 done: 509 if (vp) 510 vput(vp); 511 if (data_buf != data) 512 free(data_buf, M_TEMP); 513 return (error); 514 } 515 516 /* 517 * Scan all active processes to see if any of them have a current 518 * or root directory onto which the new filesystem has just been 519 * mounted. If so, replace them with the new mount point. 520 */ 521 void 522 checkdirs(struct vnode *olddp) 523 { 524 struct cwdinfo *cwdi; 525 struct vnode *newdp; 526 struct proc *p; 527 528 if (olddp->v_usecount == 1) 529 return; 530 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 531 panic("mount: lost mount"); 532 mutex_enter(&proclist_lock); 533 PROCLIST_FOREACH(p, &allproc) { 534 cwdi = p->p_cwdi; 535 if (!cwdi) 536 continue; 537 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 538 if (cwdi->cwdi_cdir == olddp) { 539 vrele(cwdi->cwdi_cdir); 540 VREF(newdp); 541 cwdi->cwdi_cdir = newdp; 542 } 543 if (cwdi->cwdi_rdir == olddp) { 544 vrele(cwdi->cwdi_rdir); 545 VREF(newdp); 546 cwdi->cwdi_rdir = newdp; 547 } 548 rw_exit(&cwdi->cwdi_lock); 549 } 550 mutex_exit(&proclist_lock); 551 if (rootvnode == olddp) { 552 vrele(rootvnode); 553 VREF(newdp); 554 rootvnode = newdp; 555 } 556 vput(newdp); 557 } 558 559 /* 560 * Unmount a file system. 561 * 562 * Note: unmount takes a path to the vnode mounted on as argument, 563 * not special file (as before). 564 */ 565 /* ARGSUSED */ 566 int 567 sys_unmount(struct lwp *l, void *v, register_t *retval) 568 { 569 struct sys_unmount_args /* { 570 syscallarg(const char *) path; 571 syscallarg(int) flags; 572 } */ *uap = v; 573 struct vnode *vp; 574 struct mount *mp; 575 int error; 576 struct nameidata nd; 577 578 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 579 SCARG(uap, path)); 580 if ((error = namei(&nd)) != 0) 581 return (error); 582 vp = nd.ni_vp; 583 mp = vp->v_mount; 584 585 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 586 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 587 if (error) { 588 vput(vp); 589 return (error); 590 } 591 592 /* 593 * Don't allow unmounting the root file system. 594 */ 595 if (mp->mnt_flag & MNT_ROOTFS) { 596 vput(vp); 597 return (EINVAL); 598 } 599 600 /* 601 * Must be the root of the filesystem 602 */ 603 if ((vp->v_vflag & VV_ROOT) == 0) { 604 vput(vp); 605 return (EINVAL); 606 } 607 vput(vp); 608 609 /* 610 * XXX Freeze syncer. Must do this before locking the 611 * mount point. See dounmount() for details. 612 */ 613 mutex_enter(&syncer_mutex); 614 615 if (vfs_busy(mp, 0, 0)) { 616 mutex_exit(&syncer_mutex); 617 return (EBUSY); 618 } 619 620 return (dounmount(mp, SCARG(uap, flags), l)); 621 } 622 623 /* 624 * Do the actual file system unmount. File system is assumed to have been 625 * marked busy by the caller. 626 */ 627 int 628 dounmount(struct mount *mp, int flags, struct lwp *l) 629 { 630 struct vnode *coveredvp; 631 int error; 632 int async; 633 int used_syncer; 634 635 #if NVERIEXEC > 0 636 error = veriexec_unmountchk(mp); 637 if (error) 638 return (error); 639 #endif /* NVERIEXEC > 0 */ 640 641 mutex_enter(&mountlist_lock); 642 vfs_unbusy(mp); 643 used_syncer = (mp->mnt_syncer != NULL); 644 645 /* 646 * XXX Syncer must be frozen when we get here. This should really 647 * be done on a per-mountpoint basis, but especially the softdep 648 * code possibly called from the syncer doesn't exactly work on a 649 * per-mountpoint basis, so the softdep code would become a maze 650 * of vfs_busy() calls. 651 * 652 * The caller of dounmount() must acquire syncer_mutex because 653 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 654 * order, and we must preserve that order to avoid deadlock. 655 * 656 * So, if the file system did not use the syncer, now is 657 * the time to release the syncer_mutex. 658 */ 659 if (used_syncer == 0) 660 mutex_exit(&syncer_mutex); 661 662 mp->mnt_iflag |= IMNT_UNMOUNT; 663 mp->mnt_unmounter = l; 664 mutex_exit(&mountlist_lock); /* XXX */ 665 lockmgr(&mp->mnt_lock, LK_DRAIN, NULL); 666 667 async = mp->mnt_flag & MNT_ASYNC; 668 mp->mnt_flag &= ~MNT_ASYNC; 669 cache_purgevfs(mp); /* remove cache entries for this file sys */ 670 if (mp->mnt_syncer != NULL) 671 vfs_deallocate_syncvnode(mp); 672 error = 0; 673 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 674 #if NFSS > 0 675 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 676 #endif 677 if (error == 0) 678 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 679 } 680 if (error == 0 || (flags & MNT_FORCE)) 681 error = VFS_UNMOUNT(mp, flags); 682 if (error) { 683 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 684 (void) vfs_allocate_syncvnode(mp); 685 mutex_enter(&mountlist_lock); 686 mp->mnt_iflag &= ~IMNT_UNMOUNT; 687 mp->mnt_unmounter = NULL; 688 mp->mnt_flag |= async; 689 mutex_exit(&mountlist_lock); /* XXX */ 690 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, 691 NULL); 692 if (used_syncer) 693 mutex_exit(&syncer_mutex); 694 simple_lock(&mp->mnt_slock); 695 while (mp->mnt_wcnt > 0) { 696 wakeup(mp); 697 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 698 0, &mp->mnt_slock); 699 } 700 simple_unlock(&mp->mnt_slock); 701 return (error); 702 } 703 mutex_enter(&mountlist_lock); 704 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 705 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 706 coveredvp->v_mountedhere = NULL; 707 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 708 panic("unmount: dangling vnode"); 709 mp->mnt_iflag |= IMNT_GONE; 710 mutex_exit(&mountlist_lock); 711 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 712 if (coveredvp != NULLVP) 713 vrele(coveredvp); 714 if (used_syncer) 715 mutex_exit(&syncer_mutex); 716 simple_lock(&mp->mnt_slock); 717 while (mp->mnt_wcnt > 0) { 718 wakeup(mp); 719 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock); 720 } 721 simple_unlock(&mp->mnt_slock); 722 vfs_hooks_unmount(mp); 723 vfs_delref(mp->mnt_op); 724 vfs_destroy(mp); 725 return (0); 726 } 727 728 /* 729 * Sync each mounted filesystem. 730 */ 731 #ifdef DEBUG 732 int syncprt = 0; 733 struct ctldebug debug0 = { "syncprt", &syncprt }; 734 #endif 735 736 /* ARGSUSED */ 737 int 738 sys_sync(struct lwp *l, void *v, register_t *retval) 739 { 740 struct mount *mp, *nmp; 741 int asyncflag; 742 743 if (l == NULL) 744 l = &lwp0; 745 746 mutex_enter(&mountlist_lock); 747 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 748 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) { 749 nmp = mp->mnt_list.cqe_prev; 750 continue; 751 } 752 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 753 asyncflag = mp->mnt_flag & MNT_ASYNC; 754 mp->mnt_flag &= ~MNT_ASYNC; 755 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 756 if (asyncflag) 757 mp->mnt_flag |= MNT_ASYNC; 758 } 759 mutex_enter(&mountlist_lock); 760 nmp = mp->mnt_list.cqe_prev; 761 vfs_unbusy(mp); 762 763 } 764 mutex_exit(&mountlist_lock); 765 #ifdef DEBUG 766 if (syncprt) 767 vfs_bufstats(); 768 #endif /* DEBUG */ 769 return (0); 770 } 771 772 /* 773 * Change filesystem quotas. 774 */ 775 /* ARGSUSED */ 776 int 777 sys_quotactl(struct lwp *l, void *v, register_t *retval) 778 { 779 struct sys_quotactl_args /* { 780 syscallarg(const char *) path; 781 syscallarg(int) cmd; 782 syscallarg(int) uid; 783 syscallarg(void *) arg; 784 } */ *uap = v; 785 struct mount *mp; 786 int error; 787 struct nameidata nd; 788 789 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 790 SCARG(uap, path)); 791 if ((error = namei(&nd)) != 0) 792 return (error); 793 mp = nd.ni_vp->v_mount; 794 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 795 SCARG(uap, arg)); 796 vrele(nd.ni_vp); 797 return (error); 798 } 799 800 int 801 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 802 int root) 803 { 804 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 805 int error = 0; 806 807 /* 808 * If MNT_NOWAIT or MNT_LAZY is specified, do not 809 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 810 * overrides MNT_NOWAIT. 811 */ 812 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 813 (flags != MNT_WAIT && flags != 0)) { 814 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 815 goto done; 816 } 817 818 /* Get the filesystem stats now */ 819 memset(sp, 0, sizeof(*sp)); 820 if ((error = VFS_STATVFS(mp, sp)) != 0) { 821 return error; 822 } 823 824 if (cwdi->cwdi_rdir == NULL) 825 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 826 done: 827 if (cwdi->cwdi_rdir != NULL) { 828 size_t len; 829 char *bp; 830 char *path = PNBUF_GET(); 831 832 bp = path + MAXPATHLEN; 833 *--bp = '\0'; 834 rw_enter(&cwdi->cwdi_lock, RW_READER); 835 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 836 MAXPATHLEN / 2, 0, l); 837 rw_exit(&cwdi->cwdi_lock); 838 if (error) { 839 PNBUF_PUT(path); 840 return error; 841 } 842 len = strlen(bp); 843 /* 844 * for mount points that are below our root, we can see 845 * them, so we fix up the pathname and return them. The 846 * rest we cannot see, so we don't allow viewing the 847 * data. 848 */ 849 if (strncmp(bp, sp->f_mntonname, len) == 0) { 850 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 851 sizeof(sp->f_mntonname)); 852 if (sp->f_mntonname[0] == '\0') 853 (void)strlcpy(sp->f_mntonname, "/", 854 sizeof(sp->f_mntonname)); 855 } else { 856 if (root) 857 (void)strlcpy(sp->f_mntonname, "/", 858 sizeof(sp->f_mntonname)); 859 else 860 error = EPERM; 861 } 862 PNBUF_PUT(path); 863 } 864 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 865 return error; 866 } 867 868 /* 869 * Get filesystem statistics by path. 870 */ 871 int 872 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 873 { 874 struct mount *mp; 875 int error; 876 struct nameidata nd; 877 878 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 879 if ((error = namei(&nd)) != 0) 880 return error; 881 mp = nd.ni_vp->v_mount; 882 error = dostatvfs(mp, sb, l, flags, 1); 883 vrele(nd.ni_vp); 884 return error; 885 } 886 887 /* ARGSUSED */ 888 int 889 sys_statvfs1(struct lwp *l, void *v, register_t *retval) 890 { 891 struct sys_statvfs1_args /* { 892 syscallarg(const char *) path; 893 syscallarg(struct statvfs *) buf; 894 syscallarg(int) flags; 895 } */ *uap = v; 896 struct statvfs *sb; 897 int error; 898 899 sb = STATVFSBUF_GET(); 900 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 901 if (error == 0) 902 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 903 STATVFSBUF_PUT(sb); 904 return error; 905 } 906 907 /* 908 * Get filesystem statistics by fd. 909 */ 910 int 911 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 912 { 913 struct proc *p = l->l_proc; 914 struct file *fp; 915 struct mount *mp; 916 int error; 917 918 /* getvnode() will use the descriptor for us */ 919 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 920 return (error); 921 mp = ((struct vnode *)fp->f_data)->v_mount; 922 error = dostatvfs(mp, sb, l, flags, 1); 923 FILE_UNUSE(fp, l); 924 return error; 925 } 926 927 /* ARGSUSED */ 928 int 929 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval) 930 { 931 struct sys_fstatvfs1_args /* { 932 syscallarg(int) fd; 933 syscallarg(struct statvfs *) buf; 934 syscallarg(int) flags; 935 } */ *uap = v; 936 struct statvfs *sb; 937 int error; 938 939 sb = STATVFSBUF_GET(); 940 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 941 if (error == 0) 942 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 943 STATVFSBUF_PUT(sb); 944 return error; 945 } 946 947 948 /* 949 * Get statistics on all filesystems. 950 */ 951 int 952 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 953 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 954 register_t *retval) 955 { 956 int root = 0; 957 struct proc *p = l->l_proc; 958 struct mount *mp, *nmp; 959 struct statvfs *sb; 960 size_t count, maxcount; 961 int error = 0; 962 963 sb = STATVFSBUF_GET(); 964 maxcount = bufsize / entry_sz; 965 mutex_enter(&mountlist_lock); 966 count = 0; 967 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 968 mp = nmp) { 969 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) { 970 nmp = CIRCLEQ_NEXT(mp, mnt_list); 971 continue; 972 } 973 if (sfsp && count < maxcount) { 974 error = dostatvfs(mp, sb, l, flags, 0); 975 if (error) { 976 mutex_enter(&mountlist_lock); 977 nmp = CIRCLEQ_NEXT(mp, mnt_list); 978 vfs_unbusy(mp); 979 continue; 980 } 981 error = copyfn(sb, sfsp, entry_sz); 982 if (error) { 983 vfs_unbusy(mp); 984 goto out; 985 } 986 sfsp = (char *)sfsp + entry_sz; 987 root |= strcmp(sb->f_mntonname, "/") == 0; 988 } 989 count++; 990 mutex_enter(&mountlist_lock); 991 nmp = CIRCLEQ_NEXT(mp, mnt_list); 992 vfs_unbusy(mp); 993 } 994 995 mutex_exit(&mountlist_lock); 996 if (root == 0 && p->p_cwdi->cwdi_rdir) { 997 /* 998 * fake a root entry 999 */ 1000 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1001 sb, l, flags, 1); 1002 if (error != 0) 1003 goto out; 1004 if (sfsp) 1005 error = copyfn(sb, sfsp, entry_sz); 1006 count++; 1007 } 1008 if (sfsp && count > maxcount) 1009 *retval = maxcount; 1010 else 1011 *retval = count; 1012 out: 1013 STATVFSBUF_PUT(sb); 1014 return error; 1015 } 1016 1017 int 1018 sys_getvfsstat(struct lwp *l, void *v, register_t *retval) 1019 { 1020 struct sys_getvfsstat_args /* { 1021 syscallarg(struct statvfs *) buf; 1022 syscallarg(size_t) bufsize; 1023 syscallarg(int) flags; 1024 } */ *uap = v; 1025 1026 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1027 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1028 } 1029 1030 /* 1031 * Change current working directory to a given file descriptor. 1032 */ 1033 /* ARGSUSED */ 1034 int 1035 sys_fchdir(struct lwp *l, void *v, register_t *retval) 1036 { 1037 struct sys_fchdir_args /* { 1038 syscallarg(int) fd; 1039 } */ *uap = v; 1040 struct proc *p = l->l_proc; 1041 struct filedesc *fdp = p->p_fd; 1042 struct cwdinfo *cwdi; 1043 struct vnode *vp, *tdp; 1044 struct mount *mp; 1045 struct file *fp; 1046 int error; 1047 1048 /* getvnode() will use the descriptor for us */ 1049 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 1050 return (error); 1051 vp = (struct vnode *)fp->f_data; 1052 1053 VREF(vp); 1054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1055 if (vp->v_type != VDIR) 1056 error = ENOTDIR; 1057 else 1058 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1059 if (error) { 1060 vput(vp); 1061 goto out; 1062 } 1063 while ((mp = vp->v_mountedhere) != NULL) { 1064 if (vfs_busy(mp, 0, 0)) 1065 continue; 1066 1067 vput(vp); 1068 error = VFS_ROOT(mp, &tdp); 1069 vfs_unbusy(mp); 1070 if (error) 1071 goto out; 1072 vp = tdp; 1073 } 1074 VOP_UNLOCK(vp, 0); 1075 1076 /* 1077 * Disallow changing to a directory not under the process's 1078 * current root directory (if there is one). 1079 */ 1080 cwdi = p->p_cwdi; 1081 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1082 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1083 vrele(vp); 1084 error = EPERM; /* operation not permitted */ 1085 } else { 1086 vrele(cwdi->cwdi_cdir); 1087 cwdi->cwdi_cdir = vp; 1088 } 1089 rw_exit(&cwdi->cwdi_lock); 1090 1091 out: 1092 FILE_UNUSE(fp, l); 1093 return (error); 1094 } 1095 1096 /* 1097 * Change this process's notion of the root directory to a given file 1098 * descriptor. 1099 */ 1100 int 1101 sys_fchroot(struct lwp *l, void *v, register_t *retval) 1102 { 1103 struct sys_fchroot_args *uap = v; 1104 struct proc *p = l->l_proc; 1105 struct filedesc *fdp = p->p_fd; 1106 struct cwdinfo *cwdi; 1107 struct vnode *vp; 1108 struct file *fp; 1109 int error; 1110 1111 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1112 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1113 return error; 1114 /* getvnode() will use the descriptor for us */ 1115 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 1116 return error; 1117 vp = (struct vnode *) fp->f_data; 1118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1119 if (vp->v_type != VDIR) 1120 error = ENOTDIR; 1121 else 1122 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1123 VOP_UNLOCK(vp, 0); 1124 if (error) 1125 goto out; 1126 VREF(vp); 1127 1128 /* 1129 * Prevent escaping from chroot by putting the root under 1130 * the working directory. Silently chdir to / if we aren't 1131 * already there. 1132 */ 1133 cwdi = p->p_cwdi; 1134 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1135 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1136 /* 1137 * XXX would be more failsafe to change directory to a 1138 * deadfs node here instead 1139 */ 1140 vrele(cwdi->cwdi_cdir); 1141 VREF(vp); 1142 cwdi->cwdi_cdir = vp; 1143 } 1144 1145 if (cwdi->cwdi_rdir != NULL) 1146 vrele(cwdi->cwdi_rdir); 1147 cwdi->cwdi_rdir = vp; 1148 rw_exit(&cwdi->cwdi_lock); 1149 1150 out: 1151 FILE_UNUSE(fp, l); 1152 return (error); 1153 } 1154 1155 /* 1156 * Change current working directory (``.''). 1157 */ 1158 /* ARGSUSED */ 1159 int 1160 sys_chdir(struct lwp *l, void *v, register_t *retval) 1161 { 1162 struct sys_chdir_args /* { 1163 syscallarg(const char *) path; 1164 } */ *uap = v; 1165 struct proc *p = l->l_proc; 1166 struct cwdinfo *cwdi; 1167 int error; 1168 struct nameidata nd; 1169 1170 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1171 SCARG(uap, path)); 1172 if ((error = change_dir(&nd, l)) != 0) 1173 return (error); 1174 cwdi = p->p_cwdi; 1175 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1176 vrele(cwdi->cwdi_cdir); 1177 cwdi->cwdi_cdir = nd.ni_vp; 1178 rw_exit(&cwdi->cwdi_lock); 1179 return (0); 1180 } 1181 1182 /* 1183 * Change notion of root (``/'') directory. 1184 */ 1185 /* ARGSUSED */ 1186 int 1187 sys_chroot(struct lwp *l, void *v, register_t *retval) 1188 { 1189 struct sys_chroot_args /* { 1190 syscallarg(const char *) path; 1191 } */ *uap = v; 1192 struct proc *p = l->l_proc; 1193 struct cwdinfo *cwdi; 1194 struct vnode *vp; 1195 int error; 1196 struct nameidata nd; 1197 1198 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1199 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1200 return (error); 1201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1202 SCARG(uap, path)); 1203 if ((error = change_dir(&nd, l)) != 0) 1204 return (error); 1205 1206 cwdi = p->p_cwdi; 1207 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1208 if (cwdi->cwdi_rdir != NULL) 1209 vrele(cwdi->cwdi_rdir); 1210 vp = nd.ni_vp; 1211 cwdi->cwdi_rdir = vp; 1212 1213 /* 1214 * Prevent escaping from chroot by putting the root under 1215 * the working directory. Silently chdir to / if we aren't 1216 * already there. 1217 */ 1218 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1219 /* 1220 * XXX would be more failsafe to change directory to a 1221 * deadfs node here instead 1222 */ 1223 vrele(cwdi->cwdi_cdir); 1224 VREF(vp); 1225 cwdi->cwdi_cdir = vp; 1226 } 1227 rw_exit(&cwdi->cwdi_lock); 1228 1229 return (0); 1230 } 1231 1232 /* 1233 * Common routine for chroot and chdir. 1234 */ 1235 static int 1236 change_dir(struct nameidata *ndp, struct lwp *l) 1237 { 1238 struct vnode *vp; 1239 int error; 1240 1241 if ((error = namei(ndp)) != 0) 1242 return (error); 1243 vp = ndp->ni_vp; 1244 if (vp->v_type != VDIR) 1245 error = ENOTDIR; 1246 else 1247 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1248 1249 if (error) 1250 vput(vp); 1251 else 1252 VOP_UNLOCK(vp, 0); 1253 return (error); 1254 } 1255 1256 /* 1257 * Check permissions, allocate an open file structure, 1258 * and call the device open routine if any. 1259 */ 1260 int 1261 sys_open(struct lwp *l, void *v, register_t *retval) 1262 { 1263 struct sys_open_args /* { 1264 syscallarg(const char *) path; 1265 syscallarg(int) flags; 1266 syscallarg(int) mode; 1267 } */ *uap = v; 1268 struct proc *p = l->l_proc; 1269 struct cwdinfo *cwdi = p->p_cwdi; 1270 struct filedesc *fdp = p->p_fd; 1271 struct file *fp; 1272 struct vnode *vp; 1273 int flags, cmode; 1274 int type, indx, error; 1275 struct flock lf; 1276 struct nameidata nd; 1277 1278 flags = FFLAGS(SCARG(uap, flags)); 1279 if ((flags & (FREAD | FWRITE)) == 0) 1280 return (EINVAL); 1281 /* falloc() will use the file descriptor for us */ 1282 if ((error = falloc(l, &fp, &indx)) != 0) 1283 return (error); 1284 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1285 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1286 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1287 SCARG(uap, path)); 1288 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1289 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1290 rw_enter(&fdp->fd_lock, RW_WRITER); 1291 FILE_UNUSE(fp, l); 1292 fdp->fd_ofiles[indx] = NULL; 1293 rw_exit(&fdp->fd_lock); 1294 ffree(fp); 1295 if ((error == EDUPFD || error == EMOVEFD) && 1296 l->l_dupfd >= 0 && /* XXX from fdopen */ 1297 (error = 1298 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) { 1299 *retval = indx; 1300 return (0); 1301 } 1302 if (error == ERESTART) 1303 error = EINTR; 1304 fdremove(fdp, indx); 1305 return (error); 1306 } 1307 1308 l->l_dupfd = 0; 1309 vp = nd.ni_vp; 1310 fp->f_flag = flags & FMASK; 1311 fp->f_type = DTYPE_VNODE; 1312 fp->f_ops = &vnops; 1313 fp->f_data = vp; 1314 if (flags & (O_EXLOCK | O_SHLOCK)) { 1315 lf.l_whence = SEEK_SET; 1316 lf.l_start = 0; 1317 lf.l_len = 0; 1318 if (flags & O_EXLOCK) 1319 lf.l_type = F_WRLCK; 1320 else 1321 lf.l_type = F_RDLCK; 1322 type = F_FLOCK; 1323 if ((flags & FNONBLOCK) == 0) 1324 type |= F_WAIT; 1325 VOP_UNLOCK(vp, 0); 1326 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1327 if (error) { 1328 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1329 FILE_UNUSE(fp, l); 1330 ffree(fp); 1331 fdremove(fdp, indx); 1332 return (error); 1333 } 1334 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1335 fp->f_flag |= FHASLOCK; 1336 } 1337 VOP_UNLOCK(vp, 0); 1338 *retval = indx; 1339 FILE_SET_MATURE(fp); 1340 FILE_UNUSE(fp, l); 1341 return (0); 1342 } 1343 1344 static void 1345 vfs__fhfree(fhandle_t *fhp) 1346 { 1347 size_t fhsize; 1348 1349 if (fhp == NULL) { 1350 return; 1351 } 1352 fhsize = FHANDLE_SIZE(fhp); 1353 kmem_free(fhp, fhsize); 1354 } 1355 1356 /* 1357 * vfs_composefh: compose a filehandle. 1358 */ 1359 1360 int 1361 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1362 { 1363 struct mount *mp; 1364 struct fid *fidp; 1365 int error; 1366 size_t needfhsize; 1367 size_t fidsize; 1368 1369 mp = vp->v_mount; 1370 fidp = NULL; 1371 if (*fh_size < FHANDLE_SIZE_MIN) { 1372 fidsize = 0; 1373 } else { 1374 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1375 if (fhp != NULL) { 1376 memset(fhp, 0, *fh_size); 1377 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1378 fidp = &fhp->fh_fid; 1379 } 1380 } 1381 error = VFS_VPTOFH(vp, fidp, &fidsize); 1382 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1383 if (error == 0 && *fh_size < needfhsize) { 1384 error = E2BIG; 1385 } 1386 *fh_size = needfhsize; 1387 return error; 1388 } 1389 1390 int 1391 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1392 { 1393 struct mount *mp; 1394 fhandle_t *fhp; 1395 size_t fhsize; 1396 size_t fidsize; 1397 int error; 1398 1399 *fhpp = NULL; 1400 mp = vp->v_mount; 1401 fidsize = 0; 1402 error = VFS_VPTOFH(vp, NULL, &fidsize); 1403 KASSERT(error != 0); 1404 if (error != E2BIG) { 1405 goto out; 1406 } 1407 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1408 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1409 if (fhp == NULL) { 1410 error = ENOMEM; 1411 goto out; 1412 } 1413 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1414 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1415 if (error == 0) { 1416 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1417 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1418 *fhpp = fhp; 1419 } else { 1420 kmem_free(fhp, fhsize); 1421 } 1422 out: 1423 return error; 1424 } 1425 1426 void 1427 vfs_composefh_free(fhandle_t *fhp) 1428 { 1429 1430 vfs__fhfree(fhp); 1431 } 1432 1433 /* 1434 * vfs_fhtovp: lookup a vnode by a filehandle. 1435 */ 1436 1437 int 1438 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1439 { 1440 struct mount *mp; 1441 int error; 1442 1443 *vpp = NULL; 1444 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1445 if (mp == NULL) { 1446 error = ESTALE; 1447 goto out; 1448 } 1449 if (mp->mnt_op->vfs_fhtovp == NULL) { 1450 error = EOPNOTSUPP; 1451 goto out; 1452 } 1453 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1454 out: 1455 return error; 1456 } 1457 1458 /* 1459 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1460 * the needed size. 1461 */ 1462 1463 int 1464 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1465 { 1466 fhandle_t *fhp; 1467 int error; 1468 1469 *fhpp = NULL; 1470 if (fhsize > FHANDLE_SIZE_MAX) { 1471 return EINVAL; 1472 } 1473 if (fhsize < FHANDLE_SIZE_MIN) { 1474 return EINVAL; 1475 } 1476 again: 1477 fhp = kmem_alloc(fhsize, KM_SLEEP); 1478 if (fhp == NULL) { 1479 return ENOMEM; 1480 } 1481 error = copyin(ufhp, fhp, fhsize); 1482 if (error == 0) { 1483 /* XXX this check shouldn't be here */ 1484 if (FHANDLE_SIZE(fhp) == fhsize) { 1485 *fhpp = fhp; 1486 return 0; 1487 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1488 /* 1489 * a kludge for nfsv2 padded handles. 1490 */ 1491 size_t sz; 1492 1493 sz = FHANDLE_SIZE(fhp); 1494 kmem_free(fhp, fhsize); 1495 fhsize = sz; 1496 goto again; 1497 } else { 1498 /* 1499 * userland told us wrong size. 1500 */ 1501 error = EINVAL; 1502 } 1503 } 1504 kmem_free(fhp, fhsize); 1505 return error; 1506 } 1507 1508 void 1509 vfs_copyinfh_free(fhandle_t *fhp) 1510 { 1511 1512 vfs__fhfree(fhp); 1513 } 1514 1515 /* 1516 * Get file handle system call 1517 */ 1518 int 1519 sys___getfh30(struct lwp *l, void *v, register_t *retval) 1520 { 1521 struct sys___getfh30_args /* { 1522 syscallarg(char *) fname; 1523 syscallarg(fhandle_t *) fhp; 1524 syscallarg(size_t *) fh_size; 1525 } */ *uap = v; 1526 struct vnode *vp; 1527 fhandle_t *fh; 1528 int error; 1529 struct nameidata nd; 1530 size_t sz; 1531 size_t usz; 1532 1533 /* 1534 * Must be super user 1535 */ 1536 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1537 0, NULL, NULL, NULL); 1538 if (error) 1539 return (error); 1540 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1541 SCARG(uap, fname)); 1542 error = namei(&nd); 1543 if (error) 1544 return (error); 1545 vp = nd.ni_vp; 1546 error = vfs_composefh_alloc(vp, &fh); 1547 vput(vp); 1548 if (error != 0) { 1549 goto out; 1550 } 1551 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1552 if (error != 0) { 1553 goto out; 1554 } 1555 sz = FHANDLE_SIZE(fh); 1556 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1557 if (error != 0) { 1558 goto out; 1559 } 1560 if (usz >= sz) { 1561 error = copyout(fh, SCARG(uap, fhp), sz); 1562 } else { 1563 error = E2BIG; 1564 } 1565 out: 1566 vfs_composefh_free(fh); 1567 return (error); 1568 } 1569 1570 /* 1571 * Open a file given a file handle. 1572 * 1573 * Check permissions, allocate an open file structure, 1574 * and call the device open routine if any. 1575 */ 1576 1577 int 1578 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1579 register_t *retval) 1580 { 1581 struct filedesc *fdp = l->l_proc->p_fd; 1582 struct file *fp; 1583 struct vnode *vp = NULL; 1584 kauth_cred_t cred = l->l_cred; 1585 struct file *nfp; 1586 int type, indx, error=0; 1587 struct flock lf; 1588 struct vattr va; 1589 fhandle_t *fh; 1590 int flags; 1591 1592 /* 1593 * Must be super user 1594 */ 1595 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1596 0, NULL, NULL, NULL))) 1597 return (error); 1598 1599 flags = FFLAGS(oflags); 1600 if ((flags & (FREAD | FWRITE)) == 0) 1601 return (EINVAL); 1602 if ((flags & O_CREAT)) 1603 return (EINVAL); 1604 /* falloc() will use the file descriptor for us */ 1605 if ((error = falloc(l, &nfp, &indx)) != 0) 1606 return (error); 1607 fp = nfp; 1608 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1609 if (error != 0) { 1610 goto bad; 1611 } 1612 error = vfs_fhtovp(fh, &vp); 1613 if (error != 0) { 1614 goto bad; 1615 } 1616 1617 /* Now do an effective vn_open */ 1618 1619 if (vp->v_type == VSOCK) { 1620 error = EOPNOTSUPP; 1621 goto bad; 1622 } 1623 error = vn_openchk(vp, cred, flags); 1624 if (error != 0) 1625 goto bad; 1626 if (flags & O_TRUNC) { 1627 VOP_UNLOCK(vp, 0); /* XXX */ 1628 VOP_LEASE(vp, cred, LEASE_WRITE); 1629 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1630 VATTR_NULL(&va); 1631 va.va_size = 0; 1632 error = VOP_SETATTR(vp, &va, cred); 1633 if (error) 1634 goto bad; 1635 } 1636 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1637 goto bad; 1638 if (flags & FWRITE) 1639 vp->v_writecount++; 1640 1641 /* done with modified vn_open, now finish what sys_open does. */ 1642 1643 fp->f_flag = flags & FMASK; 1644 fp->f_type = DTYPE_VNODE; 1645 fp->f_ops = &vnops; 1646 fp->f_data = vp; 1647 if (flags & (O_EXLOCK | O_SHLOCK)) { 1648 lf.l_whence = SEEK_SET; 1649 lf.l_start = 0; 1650 lf.l_len = 0; 1651 if (flags & O_EXLOCK) 1652 lf.l_type = F_WRLCK; 1653 else 1654 lf.l_type = F_RDLCK; 1655 type = F_FLOCK; 1656 if ((flags & FNONBLOCK) == 0) 1657 type |= F_WAIT; 1658 VOP_UNLOCK(vp, 0); 1659 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1660 if (error) { 1661 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1662 FILE_UNUSE(fp, l); 1663 ffree(fp); 1664 fdremove(fdp, indx); 1665 return (error); 1666 } 1667 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1668 fp->f_flag |= FHASLOCK; 1669 } 1670 VOP_UNLOCK(vp, 0); 1671 *retval = indx; 1672 FILE_SET_MATURE(fp); 1673 FILE_UNUSE(fp, l); 1674 vfs_copyinfh_free(fh); 1675 return (0); 1676 1677 bad: 1678 FILE_UNUSE(fp, l); 1679 ffree(fp); 1680 fdremove(fdp, indx); 1681 if (vp != NULL) 1682 vput(vp); 1683 vfs_copyinfh_free(fh); 1684 return (error); 1685 } 1686 1687 int 1688 sys___fhopen40(struct lwp *l, void *v, register_t *retval) 1689 { 1690 struct sys___fhopen40_args /* { 1691 syscallarg(const void *) fhp; 1692 syscallarg(size_t) fh_size; 1693 syscallarg(int) flags; 1694 } */ *uap = v; 1695 1696 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1697 SCARG(uap, flags), retval); 1698 } 1699 1700 int 1701 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1702 { 1703 int error; 1704 fhandle_t *fh; 1705 struct vnode *vp; 1706 1707 /* 1708 * Must be super user 1709 */ 1710 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1711 0, NULL, NULL, NULL))) 1712 return (error); 1713 1714 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1715 if (error != 0) 1716 return error; 1717 1718 error = vfs_fhtovp(fh, &vp); 1719 vfs_copyinfh_free(fh); 1720 if (error != 0) 1721 return error; 1722 1723 error = vn_stat(vp, sb, l); 1724 vput(vp); 1725 return error; 1726 } 1727 1728 1729 /* ARGSUSED */ 1730 int 1731 sys___fhstat40(struct lwp *l, void *v, register_t *retval) 1732 { 1733 struct sys___fhstat40_args /* { 1734 syscallarg(const void *) fhp; 1735 syscallarg(size_t) fh_size; 1736 syscallarg(struct stat *) sb; 1737 } */ *uap = v; 1738 struct stat sb; 1739 int error; 1740 1741 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1742 if (error) 1743 return error; 1744 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1745 } 1746 1747 int 1748 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1749 int flags) 1750 { 1751 fhandle_t *fh; 1752 struct mount *mp; 1753 struct vnode *vp; 1754 int error; 1755 1756 /* 1757 * Must be super user 1758 */ 1759 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1760 0, NULL, NULL, NULL))) 1761 return error; 1762 1763 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1764 if (error != 0) 1765 return error; 1766 1767 error = vfs_fhtovp(fh, &vp); 1768 vfs_copyinfh_free(fh); 1769 if (error != 0) 1770 return error; 1771 1772 mp = vp->v_mount; 1773 error = dostatvfs(mp, sb, l, flags, 1); 1774 vput(vp); 1775 return error; 1776 } 1777 1778 /* ARGSUSED */ 1779 int 1780 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval) 1781 { 1782 struct sys___fhstatvfs140_args /* { 1783 syscallarg(const void *) fhp; 1784 syscallarg(size_t) fh_size; 1785 syscallarg(struct statvfs *) buf; 1786 syscallarg(int) flags; 1787 } */ *uap = v; 1788 struct statvfs *sb = STATVFSBUF_GET(); 1789 int error; 1790 1791 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1792 SCARG(uap, flags)); 1793 if (error == 0) 1794 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1795 STATVFSBUF_PUT(sb); 1796 return error; 1797 } 1798 1799 /* 1800 * Create a special file. 1801 */ 1802 /* ARGSUSED */ 1803 int 1804 sys_mknod(struct lwp *l, void *v, register_t *retval) 1805 { 1806 struct sys_mknod_args /* { 1807 syscallarg(const char *) path; 1808 syscallarg(int) mode; 1809 syscallarg(int) dev; 1810 } */ *uap = v; 1811 struct proc *p = l->l_proc; 1812 struct vnode *vp; 1813 struct vattr vattr; 1814 int error, optype; 1815 struct nameidata nd; 1816 char *path; 1817 const char *cpath; 1818 enum uio_seg seg = UIO_USERSPACE; 1819 1820 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1821 0, NULL, NULL, NULL)) != 0) 1822 return (error); 1823 1824 optype = VOP_MKNOD_DESCOFFSET; 1825 1826 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1827 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1828 1829 if ((error = namei(&nd)) != 0) 1830 goto out; 1831 vp = nd.ni_vp; 1832 if (vp != NULL) 1833 error = EEXIST; 1834 else { 1835 VATTR_NULL(&vattr); 1836 /* We will read cwdi->cwdi_cmask unlocked. */ 1837 vattr.va_mode = 1838 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1839 vattr.va_rdev = SCARG(uap, dev); 1840 1841 switch (SCARG(uap, mode) & S_IFMT) { 1842 case S_IFMT: /* used by badsect to flag bad sectors */ 1843 vattr.va_type = VBAD; 1844 break; 1845 case S_IFCHR: 1846 vattr.va_type = VCHR; 1847 break; 1848 case S_IFBLK: 1849 vattr.va_type = VBLK; 1850 break; 1851 case S_IFWHT: 1852 optype = VOP_WHITEOUT_DESCOFFSET; 1853 break; 1854 case S_IFREG: 1855 #if NVERIEXEC > 0 1856 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1857 O_CREAT); 1858 #endif /* NVERIEXEC > 0 */ 1859 vattr.va_type = VREG; 1860 vattr.va_rdev = VNOVAL; 1861 optype = VOP_CREATE_DESCOFFSET; 1862 break; 1863 default: 1864 error = EINVAL; 1865 break; 1866 } 1867 } 1868 if (!error) { 1869 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 1870 switch (optype) { 1871 case VOP_WHITEOUT_DESCOFFSET: 1872 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1873 if (error) 1874 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1875 vput(nd.ni_dvp); 1876 break; 1877 1878 case VOP_MKNOD_DESCOFFSET: 1879 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1880 &nd.ni_cnd, &vattr); 1881 if (error == 0) 1882 vput(nd.ni_vp); 1883 break; 1884 1885 case VOP_CREATE_DESCOFFSET: 1886 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1887 &nd.ni_cnd, &vattr); 1888 if (error == 0) 1889 vput(nd.ni_vp); 1890 break; 1891 } 1892 } else { 1893 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1894 if (nd.ni_dvp == vp) 1895 vrele(nd.ni_dvp); 1896 else 1897 vput(nd.ni_dvp); 1898 if (vp) 1899 vrele(vp); 1900 } 1901 out: 1902 VERIEXEC_PATH_PUT(path); 1903 return (error); 1904 } 1905 1906 /* 1907 * Create a named pipe. 1908 */ 1909 /* ARGSUSED */ 1910 int 1911 sys_mkfifo(struct lwp *l, void *v, register_t *retval) 1912 { 1913 struct sys_mkfifo_args /* { 1914 syscallarg(const char *) path; 1915 syscallarg(int) mode; 1916 } */ *uap = v; 1917 struct proc *p = l->l_proc; 1918 struct vattr vattr; 1919 int error; 1920 struct nameidata nd; 1921 1922 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1923 SCARG(uap, path)); 1924 if ((error = namei(&nd)) != 0) 1925 return (error); 1926 if (nd.ni_vp != NULL) { 1927 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1928 if (nd.ni_dvp == nd.ni_vp) 1929 vrele(nd.ni_dvp); 1930 else 1931 vput(nd.ni_dvp); 1932 vrele(nd.ni_vp); 1933 return (EEXIST); 1934 } 1935 VATTR_NULL(&vattr); 1936 vattr.va_type = VFIFO; 1937 /* We will read cwdi->cwdi_cmask unlocked. */ 1938 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1939 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 1940 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1941 if (error == 0) 1942 vput(nd.ni_vp); 1943 return (error); 1944 } 1945 1946 /* 1947 * Make a hard file link. 1948 */ 1949 /* ARGSUSED */ 1950 int 1951 sys_link(struct lwp *l, void *v, register_t *retval) 1952 { 1953 struct sys_link_args /* { 1954 syscallarg(const char *) path; 1955 syscallarg(const char *) link; 1956 } */ *uap = v; 1957 struct vnode *vp; 1958 struct nameidata nd; 1959 int error; 1960 1961 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1962 SCARG(uap, path)); 1963 if ((error = namei(&nd)) != 0) 1964 return (error); 1965 vp = nd.ni_vp; 1966 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1967 SCARG(uap, link)); 1968 if ((error = namei(&nd)) != 0) 1969 goto out; 1970 if (nd.ni_vp) { 1971 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1972 if (nd.ni_dvp == nd.ni_vp) 1973 vrele(nd.ni_dvp); 1974 else 1975 vput(nd.ni_dvp); 1976 vrele(nd.ni_vp); 1977 error = EEXIST; 1978 goto out; 1979 } 1980 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 1981 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 1982 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1983 out: 1984 vrele(vp); 1985 return (error); 1986 } 1987 1988 /* 1989 * Make a symbolic link. 1990 */ 1991 /* ARGSUSED */ 1992 int 1993 sys_symlink(struct lwp *l, void *v, register_t *retval) 1994 { 1995 struct sys_symlink_args /* { 1996 syscallarg(const char *) path; 1997 syscallarg(const char *) link; 1998 } */ *uap = v; 1999 struct proc *p = l->l_proc; 2000 struct vattr vattr; 2001 char *path; 2002 int error; 2003 struct nameidata nd; 2004 2005 path = PNBUF_GET(); 2006 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2007 if (error) 2008 goto out; 2009 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2010 SCARG(uap, link)); 2011 if ((error = namei(&nd)) != 0) 2012 goto out; 2013 if (nd.ni_vp) { 2014 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2015 if (nd.ni_dvp == nd.ni_vp) 2016 vrele(nd.ni_dvp); 2017 else 2018 vput(nd.ni_dvp); 2019 vrele(nd.ni_vp); 2020 error = EEXIST; 2021 goto out; 2022 } 2023 VATTR_NULL(&vattr); 2024 vattr.va_type = VLNK; 2025 /* We will read cwdi->cwdi_cmask unlocked. */ 2026 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2027 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 2028 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2029 if (error == 0) 2030 vput(nd.ni_vp); 2031 out: 2032 PNBUF_PUT(path); 2033 return (error); 2034 } 2035 2036 /* 2037 * Delete a whiteout from the filesystem. 2038 */ 2039 /* ARGSUSED */ 2040 int 2041 sys_undelete(struct lwp *l, void *v, register_t *retval) 2042 { 2043 struct sys_undelete_args /* { 2044 syscallarg(const char *) path; 2045 } */ *uap = v; 2046 int error; 2047 struct nameidata nd; 2048 2049 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2050 UIO_USERSPACE, SCARG(uap, path)); 2051 error = namei(&nd); 2052 if (error) 2053 return (error); 2054 2055 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2056 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2057 if (nd.ni_dvp == nd.ni_vp) 2058 vrele(nd.ni_dvp); 2059 else 2060 vput(nd.ni_dvp); 2061 if (nd.ni_vp) 2062 vrele(nd.ni_vp); 2063 return (EEXIST); 2064 } 2065 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 2066 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2067 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2068 vput(nd.ni_dvp); 2069 return (error); 2070 } 2071 2072 /* 2073 * Delete a name from the filesystem. 2074 */ 2075 /* ARGSUSED */ 2076 int 2077 sys_unlink(struct lwp *l, void *v, register_t *retval) 2078 { 2079 struct sys_unlink_args /* { 2080 syscallarg(const char *) path; 2081 } */ *uap = v; 2082 struct vnode *vp; 2083 int error; 2084 struct nameidata nd; 2085 char *path; 2086 const char *cpath; 2087 enum uio_seg seg = UIO_USERSPACE; 2088 2089 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 2090 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2091 2092 if ((error = namei(&nd)) != 0) 2093 goto out; 2094 vp = nd.ni_vp; 2095 2096 /* 2097 * The root of a mounted filesystem cannot be deleted. 2098 */ 2099 if (vp->v_vflag & VV_ROOT) { 2100 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2101 if (nd.ni_dvp == vp) 2102 vrele(nd.ni_dvp); 2103 else 2104 vput(nd.ni_dvp); 2105 vput(vp); 2106 error = EBUSY; 2107 goto out; 2108 } 2109 2110 #if NVERIEXEC > 0 2111 /* Handle remove requests for veriexec entries. */ 2112 if ((error = veriexec_removechk(l, nd.ni_vp, nd.ni_dirp)) != 0) { 2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2114 if (nd.ni_dvp == vp) 2115 vrele(nd.ni_dvp); 2116 else 2117 vput(nd.ni_dvp); 2118 vput(vp); 2119 goto out; 2120 } 2121 #endif /* NVERIEXEC > 0 */ 2122 2123 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 2124 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 2125 #ifdef FILEASSOC 2126 (void)fileassoc_file_delete(vp); 2127 #endif /* FILEASSOC */ 2128 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2129 out: 2130 VERIEXEC_PATH_PUT(path); 2131 return (error); 2132 } 2133 2134 /* 2135 * Reposition read/write file offset. 2136 */ 2137 int 2138 sys_lseek(struct lwp *l, void *v, register_t *retval) 2139 { 2140 struct sys_lseek_args /* { 2141 syscallarg(int) fd; 2142 syscallarg(int) pad; 2143 syscallarg(off_t) offset; 2144 syscallarg(int) whence; 2145 } */ *uap = v; 2146 struct proc *p = l->l_proc; 2147 kauth_cred_t cred = l->l_cred; 2148 struct filedesc *fdp = p->p_fd; 2149 struct file *fp; 2150 struct vnode *vp; 2151 struct vattr vattr; 2152 off_t newoff; 2153 int error; 2154 2155 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 2156 return (EBADF); 2157 2158 vp = (struct vnode *)fp->f_data; 2159 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2160 error = ESPIPE; 2161 mutex_exit(&fp->f_lock); 2162 goto out; 2163 } 2164 2165 switch (SCARG(uap, whence)) { 2166 case SEEK_CUR: 2167 newoff = fp->f_offset + SCARG(uap, offset); 2168 FILE_USE(fp); 2169 break; 2170 case SEEK_END: 2171 FILE_USE(fp); 2172 error = VOP_GETATTR(vp, &vattr, cred); 2173 if (error) { 2174 FILE_UNUSE(fp, l); 2175 goto out; 2176 } 2177 newoff = SCARG(uap, offset) + vattr.va_size; 2178 break; 2179 case SEEK_SET: 2180 FILE_USE(fp); 2181 newoff = SCARG(uap, offset); 2182 break; 2183 default: 2184 mutex_exit(&fp->f_lock); 2185 error = EINVAL; 2186 goto out; 2187 } 2188 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2189 mutex_enter(&fp->f_lock); 2190 *(off_t *)retval = fp->f_offset = newoff; 2191 mutex_exit(&fp->f_lock); 2192 } 2193 FILE_UNUSE(fp, l); 2194 out: 2195 return (error); 2196 } 2197 2198 /* 2199 * Positional read system call. 2200 */ 2201 int 2202 sys_pread(struct lwp *l, void *v, register_t *retval) 2203 { 2204 struct sys_pread_args /* { 2205 syscallarg(int) fd; 2206 syscallarg(void *) buf; 2207 syscallarg(size_t) nbyte; 2208 syscallarg(off_t) offset; 2209 } */ *uap = v; 2210 struct proc *p = l->l_proc; 2211 struct filedesc *fdp = p->p_fd; 2212 struct file *fp; 2213 struct vnode *vp; 2214 off_t offset; 2215 int error, fd = SCARG(uap, fd); 2216 2217 if ((fp = fd_getfile(fdp, fd)) == NULL) 2218 return (EBADF); 2219 2220 if ((fp->f_flag & FREAD) == 0) { 2221 mutex_exit(&fp->f_lock); 2222 return (EBADF); 2223 } 2224 2225 FILE_USE(fp); 2226 2227 vp = (struct vnode *)fp->f_data; 2228 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2229 error = ESPIPE; 2230 goto out; 2231 } 2232 2233 offset = SCARG(uap, offset); 2234 2235 /* 2236 * XXX This works because no file systems actually 2237 * XXX take any action on the seek operation. 2238 */ 2239 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2240 goto out; 2241 2242 /* dofileread() will unuse the descriptor for us */ 2243 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2244 &offset, 0, retval)); 2245 2246 out: 2247 FILE_UNUSE(fp, l); 2248 return (error); 2249 } 2250 2251 /* 2252 * Positional scatter read system call. 2253 */ 2254 int 2255 sys_preadv(struct lwp *l, void *v, register_t *retval) 2256 { 2257 struct sys_preadv_args /* { 2258 syscallarg(int) fd; 2259 syscallarg(const struct iovec *) iovp; 2260 syscallarg(int) iovcnt; 2261 syscallarg(off_t) offset; 2262 } */ *uap = v; 2263 2264 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2265 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval); 2266 } 2267 2268 /* 2269 * Positional write system call. 2270 */ 2271 int 2272 sys_pwrite(struct lwp *l, void *v, register_t *retval) 2273 { 2274 struct sys_pwrite_args /* { 2275 syscallarg(int) fd; 2276 syscallarg(const void *) buf; 2277 syscallarg(size_t) nbyte; 2278 syscallarg(off_t) offset; 2279 } */ *uap = v; 2280 struct proc *p = l->l_proc; 2281 struct filedesc *fdp = p->p_fd; 2282 struct file *fp; 2283 struct vnode *vp; 2284 off_t offset; 2285 int error, fd = SCARG(uap, fd); 2286 2287 if ((fp = fd_getfile(fdp, fd)) == NULL) 2288 return (EBADF); 2289 2290 if ((fp->f_flag & FWRITE) == 0) { 2291 mutex_exit(&fp->f_lock); 2292 return (EBADF); 2293 } 2294 2295 FILE_USE(fp); 2296 2297 vp = (struct vnode *)fp->f_data; 2298 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2299 error = ESPIPE; 2300 goto out; 2301 } 2302 2303 offset = SCARG(uap, offset); 2304 2305 /* 2306 * XXX This works because no file systems actually 2307 * XXX take any action on the seek operation. 2308 */ 2309 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2310 goto out; 2311 2312 /* dofilewrite() will unuse the descriptor for us */ 2313 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2314 &offset, 0, retval)); 2315 2316 out: 2317 FILE_UNUSE(fp, l); 2318 return (error); 2319 } 2320 2321 /* 2322 * Positional gather write system call. 2323 */ 2324 int 2325 sys_pwritev(struct lwp *l, void *v, register_t *retval) 2326 { 2327 struct sys_pwritev_args /* { 2328 syscallarg(int) fd; 2329 syscallarg(const struct iovec *) iovp; 2330 syscallarg(int) iovcnt; 2331 syscallarg(off_t) offset; 2332 } */ *uap = v; 2333 2334 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2335 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval); 2336 } 2337 2338 /* 2339 * Check access permissions. 2340 */ 2341 int 2342 sys_access(struct lwp *l, void *v, register_t *retval) 2343 { 2344 struct sys_access_args /* { 2345 syscallarg(const char *) path; 2346 syscallarg(int) flags; 2347 } */ *uap = v; 2348 kauth_cred_t cred; 2349 struct vnode *vp; 2350 int error, flags; 2351 struct nameidata nd; 2352 2353 cred = kauth_cred_dup(l->l_cred); 2354 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2355 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2356 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2357 SCARG(uap, path)); 2358 /* Override default credentials */ 2359 nd.ni_cnd.cn_cred = cred; 2360 if ((error = namei(&nd)) != 0) 2361 goto out; 2362 vp = nd.ni_vp; 2363 2364 /* Flags == 0 means only check for existence. */ 2365 if (SCARG(uap, flags)) { 2366 flags = 0; 2367 if (SCARG(uap, flags) & R_OK) 2368 flags |= VREAD; 2369 if (SCARG(uap, flags) & W_OK) 2370 flags |= VWRITE; 2371 if (SCARG(uap, flags) & X_OK) 2372 flags |= VEXEC; 2373 2374 error = VOP_ACCESS(vp, flags, cred); 2375 if (!error && (flags & VWRITE)) 2376 error = vn_writechk(vp); 2377 } 2378 vput(vp); 2379 out: 2380 kauth_cred_free(cred); 2381 return (error); 2382 } 2383 2384 /* 2385 * Common code for all sys_stat functions, including compat versions. 2386 */ 2387 int 2388 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags, 2389 struct stat *sb) 2390 { 2391 int error; 2392 struct nameidata nd; 2393 2394 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2395 UIO_USERSPACE, path); 2396 error = namei(&nd); 2397 if (error != 0) 2398 return error; 2399 error = vn_stat(nd.ni_vp, sb, l); 2400 vput(nd.ni_vp); 2401 return error; 2402 } 2403 2404 /* 2405 * Get file status; this version follows links. 2406 */ 2407 /* ARGSUSED */ 2408 int 2409 sys___stat30(struct lwp *l, void *v, register_t *retval) 2410 { 2411 struct sys___stat30_args /* { 2412 syscallarg(const char *) path; 2413 syscallarg(struct stat *) ub; 2414 } */ *uap = v; 2415 struct stat sb; 2416 int error; 2417 2418 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb); 2419 if (error) 2420 return error; 2421 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2422 } 2423 2424 /* 2425 * Get file status; this version does not follow links. 2426 */ 2427 /* ARGSUSED */ 2428 int 2429 sys___lstat30(struct lwp *l, void *v, register_t *retval) 2430 { 2431 struct sys___lstat30_args /* { 2432 syscallarg(const char *) path; 2433 syscallarg(struct stat *) ub; 2434 } */ *uap = v; 2435 struct stat sb; 2436 int error; 2437 2438 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb); 2439 if (error) 2440 return error; 2441 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2442 } 2443 2444 /* 2445 * Get configurable pathname variables. 2446 */ 2447 /* ARGSUSED */ 2448 int 2449 sys_pathconf(struct lwp *l, void *v, register_t *retval) 2450 { 2451 struct sys_pathconf_args /* { 2452 syscallarg(const char *) path; 2453 syscallarg(int) name; 2454 } */ *uap = v; 2455 int error; 2456 struct nameidata nd; 2457 2458 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2459 SCARG(uap, path)); 2460 if ((error = namei(&nd)) != 0) 2461 return (error); 2462 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2463 vput(nd.ni_vp); 2464 return (error); 2465 } 2466 2467 /* 2468 * Return target name of a symbolic link. 2469 */ 2470 /* ARGSUSED */ 2471 int 2472 sys_readlink(struct lwp *l, void *v, register_t *retval) 2473 { 2474 struct sys_readlink_args /* { 2475 syscallarg(const char *) path; 2476 syscallarg(char *) buf; 2477 syscallarg(size_t) count; 2478 } */ *uap = v; 2479 struct vnode *vp; 2480 struct iovec aiov; 2481 struct uio auio; 2482 int error; 2483 struct nameidata nd; 2484 2485 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2486 SCARG(uap, path)); 2487 if ((error = namei(&nd)) != 0) 2488 return (error); 2489 vp = nd.ni_vp; 2490 if (vp->v_type != VLNK) 2491 error = EINVAL; 2492 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2493 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2494 aiov.iov_base = SCARG(uap, buf); 2495 aiov.iov_len = SCARG(uap, count); 2496 auio.uio_iov = &aiov; 2497 auio.uio_iovcnt = 1; 2498 auio.uio_offset = 0; 2499 auio.uio_rw = UIO_READ; 2500 KASSERT(l == curlwp); 2501 auio.uio_vmspace = l->l_proc->p_vmspace; 2502 auio.uio_resid = SCARG(uap, count); 2503 error = VOP_READLINK(vp, &auio, l->l_cred); 2504 } 2505 vput(vp); 2506 *retval = SCARG(uap, count) - auio.uio_resid; 2507 return (error); 2508 } 2509 2510 /* 2511 * Change flags of a file given a path name. 2512 */ 2513 /* ARGSUSED */ 2514 int 2515 sys_chflags(struct lwp *l, void *v, register_t *retval) 2516 { 2517 struct sys_chflags_args /* { 2518 syscallarg(const char *) path; 2519 syscallarg(u_long) flags; 2520 } */ *uap = v; 2521 struct vnode *vp; 2522 int error; 2523 struct nameidata nd; 2524 2525 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2526 SCARG(uap, path)); 2527 if ((error = namei(&nd)) != 0) 2528 return (error); 2529 vp = nd.ni_vp; 2530 error = change_flags(vp, SCARG(uap, flags), l); 2531 vput(vp); 2532 return (error); 2533 } 2534 2535 /* 2536 * Change flags of a file given a file descriptor. 2537 */ 2538 /* ARGSUSED */ 2539 int 2540 sys_fchflags(struct lwp *l, void *v, register_t *retval) 2541 { 2542 struct sys_fchflags_args /* { 2543 syscallarg(int) fd; 2544 syscallarg(u_long) flags; 2545 } */ *uap = v; 2546 struct proc *p = l->l_proc; 2547 struct vnode *vp; 2548 struct file *fp; 2549 int error; 2550 2551 /* getvnode() will use the descriptor for us */ 2552 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2553 return (error); 2554 vp = (struct vnode *)fp->f_data; 2555 error = change_flags(vp, SCARG(uap, flags), l); 2556 VOP_UNLOCK(vp, 0); 2557 FILE_UNUSE(fp, l); 2558 return (error); 2559 } 2560 2561 /* 2562 * Change flags of a file given a path name; this version does 2563 * not follow links. 2564 */ 2565 int 2566 sys_lchflags(struct lwp *l, void *v, register_t *retval) 2567 { 2568 struct sys_lchflags_args /* { 2569 syscallarg(const char *) path; 2570 syscallarg(u_long) flags; 2571 } */ *uap = v; 2572 struct vnode *vp; 2573 int error; 2574 struct nameidata nd; 2575 2576 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2577 SCARG(uap, path)); 2578 if ((error = namei(&nd)) != 0) 2579 return (error); 2580 vp = nd.ni_vp; 2581 error = change_flags(vp, SCARG(uap, flags), l); 2582 vput(vp); 2583 return (error); 2584 } 2585 2586 /* 2587 * Common routine to change flags of a file. 2588 */ 2589 int 2590 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2591 { 2592 struct vattr vattr; 2593 int error; 2594 2595 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 2596 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2597 /* 2598 * Non-superusers cannot change the flags on devices, even if they 2599 * own them. 2600 */ 2601 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2602 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2603 goto out; 2604 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2605 error = EINVAL; 2606 goto out; 2607 } 2608 } 2609 VATTR_NULL(&vattr); 2610 vattr.va_flags = flags; 2611 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2612 out: 2613 return (error); 2614 } 2615 2616 /* 2617 * Change mode of a file given path name; this version follows links. 2618 */ 2619 /* ARGSUSED */ 2620 int 2621 sys_chmod(struct lwp *l, void *v, register_t *retval) 2622 { 2623 struct sys_chmod_args /* { 2624 syscallarg(const char *) path; 2625 syscallarg(int) mode; 2626 } */ *uap = v; 2627 int error; 2628 struct nameidata nd; 2629 2630 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2631 SCARG(uap, path)); 2632 if ((error = namei(&nd)) != 0) 2633 return (error); 2634 2635 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2636 2637 vrele(nd.ni_vp); 2638 return (error); 2639 } 2640 2641 /* 2642 * Change mode of a file given a file descriptor. 2643 */ 2644 /* ARGSUSED */ 2645 int 2646 sys_fchmod(struct lwp *l, void *v, register_t *retval) 2647 { 2648 struct sys_fchmod_args /* { 2649 syscallarg(int) fd; 2650 syscallarg(int) mode; 2651 } */ *uap = v; 2652 struct proc *p = l->l_proc; 2653 struct file *fp; 2654 int error; 2655 2656 /* getvnode() will use the descriptor for us */ 2657 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2658 return (error); 2659 2660 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l); 2661 FILE_UNUSE(fp, l); 2662 return (error); 2663 } 2664 2665 /* 2666 * Change mode of a file given path name; this version does not follow links. 2667 */ 2668 /* ARGSUSED */ 2669 int 2670 sys_lchmod(struct lwp *l, void *v, register_t *retval) 2671 { 2672 struct sys_lchmod_args /* { 2673 syscallarg(const char *) path; 2674 syscallarg(int) mode; 2675 } */ *uap = v; 2676 int error; 2677 struct nameidata nd; 2678 2679 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2680 SCARG(uap, path)); 2681 if ((error = namei(&nd)) != 0) 2682 return (error); 2683 2684 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2685 2686 vrele(nd.ni_vp); 2687 return (error); 2688 } 2689 2690 /* 2691 * Common routine to set mode given a vnode. 2692 */ 2693 static int 2694 change_mode(struct vnode *vp, int mode, struct lwp *l) 2695 { 2696 struct vattr vattr; 2697 int error; 2698 2699 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 2700 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2701 VATTR_NULL(&vattr); 2702 vattr.va_mode = mode & ALLPERMS; 2703 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2704 VOP_UNLOCK(vp, 0); 2705 return (error); 2706 } 2707 2708 /* 2709 * Set ownership given a path name; this version follows links. 2710 */ 2711 /* ARGSUSED */ 2712 int 2713 sys_chown(struct lwp *l, void *v, register_t *retval) 2714 { 2715 struct sys_chown_args /* { 2716 syscallarg(const char *) path; 2717 syscallarg(uid_t) uid; 2718 syscallarg(gid_t) gid; 2719 } */ *uap = v; 2720 int error; 2721 struct nameidata nd; 2722 2723 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2724 SCARG(uap, path)); 2725 if ((error = namei(&nd)) != 0) 2726 return (error); 2727 2728 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2729 2730 vrele(nd.ni_vp); 2731 return (error); 2732 } 2733 2734 /* 2735 * Set ownership given a path name; this version follows links. 2736 * Provides POSIX semantics. 2737 */ 2738 /* ARGSUSED */ 2739 int 2740 sys___posix_chown(struct lwp *l, void *v, register_t *retval) 2741 { 2742 struct sys_chown_args /* { 2743 syscallarg(const char *) path; 2744 syscallarg(uid_t) uid; 2745 syscallarg(gid_t) gid; 2746 } */ *uap = v; 2747 int error; 2748 struct nameidata nd; 2749 2750 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2751 SCARG(uap, path)); 2752 if ((error = namei(&nd)) != 0) 2753 return (error); 2754 2755 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2756 2757 vrele(nd.ni_vp); 2758 return (error); 2759 } 2760 2761 /* 2762 * Set ownership given a file descriptor. 2763 */ 2764 /* ARGSUSED */ 2765 int 2766 sys_fchown(struct lwp *l, void *v, register_t *retval) 2767 { 2768 struct sys_fchown_args /* { 2769 syscallarg(int) fd; 2770 syscallarg(uid_t) uid; 2771 syscallarg(gid_t) gid; 2772 } */ *uap = v; 2773 struct proc *p = l->l_proc; 2774 int error; 2775 struct file *fp; 2776 2777 /* getvnode() will use the descriptor for us */ 2778 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2779 return (error); 2780 2781 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2782 SCARG(uap, gid), l, 0); 2783 FILE_UNUSE(fp, l); 2784 return (error); 2785 } 2786 2787 /* 2788 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2789 */ 2790 /* ARGSUSED */ 2791 int 2792 sys___posix_fchown(struct lwp *l, void *v, register_t *retval) 2793 { 2794 struct sys_fchown_args /* { 2795 syscallarg(int) fd; 2796 syscallarg(uid_t) uid; 2797 syscallarg(gid_t) gid; 2798 } */ *uap = v; 2799 struct proc *p = l->l_proc; 2800 int error; 2801 struct file *fp; 2802 2803 /* getvnode() will use the descriptor for us */ 2804 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2805 return (error); 2806 2807 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2808 SCARG(uap, gid), l, 1); 2809 FILE_UNUSE(fp, l); 2810 return (error); 2811 } 2812 2813 /* 2814 * Set ownership given a path name; this version does not follow links. 2815 */ 2816 /* ARGSUSED */ 2817 int 2818 sys_lchown(struct lwp *l, void *v, register_t *retval) 2819 { 2820 struct sys_lchown_args /* { 2821 syscallarg(const char *) path; 2822 syscallarg(uid_t) uid; 2823 syscallarg(gid_t) gid; 2824 } */ *uap = v; 2825 int error; 2826 struct nameidata nd; 2827 2828 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2829 SCARG(uap, path)); 2830 if ((error = namei(&nd)) != 0) 2831 return (error); 2832 2833 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2834 2835 vrele(nd.ni_vp); 2836 return (error); 2837 } 2838 2839 /* 2840 * Set ownership given a path name; this version does not follow links. 2841 * Provides POSIX/XPG semantics. 2842 */ 2843 /* ARGSUSED */ 2844 int 2845 sys___posix_lchown(struct lwp *l, void *v, register_t *retval) 2846 { 2847 struct sys_lchown_args /* { 2848 syscallarg(const char *) path; 2849 syscallarg(uid_t) uid; 2850 syscallarg(gid_t) gid; 2851 } */ *uap = v; 2852 int error; 2853 struct nameidata nd; 2854 2855 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2856 SCARG(uap, path)); 2857 if ((error = namei(&nd)) != 0) 2858 return (error); 2859 2860 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2861 2862 vrele(nd.ni_vp); 2863 return (error); 2864 } 2865 2866 /* 2867 * Common routine to set ownership given a vnode. 2868 */ 2869 static int 2870 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2871 int posix_semantics) 2872 { 2873 struct vattr vattr; 2874 mode_t newmode; 2875 int error; 2876 2877 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 2878 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2879 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2880 goto out; 2881 2882 #define CHANGED(x) ((int)(x) != -1) 2883 newmode = vattr.va_mode; 2884 if (posix_semantics) { 2885 /* 2886 * POSIX/XPG semantics: if the caller is not the super-user, 2887 * clear set-user-id and set-group-id bits. Both POSIX and 2888 * the XPG consider the behaviour for calls by the super-user 2889 * implementation-defined; we leave the set-user-id and set- 2890 * group-id settings intact in that case. 2891 */ 2892 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2893 NULL) != 0) 2894 newmode &= ~(S_ISUID | S_ISGID); 2895 } else { 2896 /* 2897 * NetBSD semantics: when changing owner and/or group, 2898 * clear the respective bit(s). 2899 */ 2900 if (CHANGED(uid)) 2901 newmode &= ~S_ISUID; 2902 if (CHANGED(gid)) 2903 newmode &= ~S_ISGID; 2904 } 2905 /* Update va_mode iff altered. */ 2906 if (vattr.va_mode == newmode) 2907 newmode = VNOVAL; 2908 2909 VATTR_NULL(&vattr); 2910 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2911 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2912 vattr.va_mode = newmode; 2913 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2914 #undef CHANGED 2915 2916 out: 2917 VOP_UNLOCK(vp, 0); 2918 return (error); 2919 } 2920 2921 /* 2922 * Set the access and modification times given a path name; this 2923 * version follows links. 2924 */ 2925 /* ARGSUSED */ 2926 int 2927 sys_utimes(struct lwp *l, void *v, register_t *retval) 2928 { 2929 struct sys_utimes_args /* { 2930 syscallarg(const char *) path; 2931 syscallarg(const struct timeval *) tptr; 2932 } */ *uap = v; 2933 2934 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2935 SCARG(uap, tptr), UIO_USERSPACE); 2936 } 2937 2938 /* 2939 * Set the access and modification times given a file descriptor. 2940 */ 2941 /* ARGSUSED */ 2942 int 2943 sys_futimes(struct lwp *l, void *v, register_t *retval) 2944 { 2945 struct sys_futimes_args /* { 2946 syscallarg(int) fd; 2947 syscallarg(const struct timeval *) tptr; 2948 } */ *uap = v; 2949 int error; 2950 struct file *fp; 2951 2952 /* getvnode() will use the descriptor for us */ 2953 if ((error = getvnode(l->l_proc->p_fd, SCARG(uap, fd), &fp)) != 0) 2954 return (error); 2955 2956 error = do_sys_utimes(l, fp->f_data, NULL, 0, 2957 SCARG(uap, tptr), UIO_USERSPACE); 2958 2959 FILE_UNUSE(fp, l); 2960 return (error); 2961 } 2962 2963 /* 2964 * Set the access and modification times given a path name; this 2965 * version does not follow links. 2966 */ 2967 int 2968 sys_lutimes(struct lwp *l, void *v, register_t *retval) 2969 { 2970 struct sys_lutimes_args /* { 2971 syscallarg(const char *) path; 2972 syscallarg(const struct timeval *) tptr; 2973 } */ *uap = v; 2974 2975 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2976 SCARG(uap, tptr), UIO_USERSPACE); 2977 } 2978 2979 /* 2980 * Common routine to set access and modification times given a vnode. 2981 */ 2982 int 2983 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2984 const struct timeval *tptr, enum uio_seg seg) 2985 { 2986 struct vattr vattr; 2987 struct nameidata nd; 2988 int error; 2989 2990 VATTR_NULL(&vattr); 2991 if (tptr == NULL) { 2992 nanotime(&vattr.va_atime); 2993 vattr.va_mtime = vattr.va_atime; 2994 vattr.va_vaflags |= VA_UTIMES_NULL; 2995 } else { 2996 struct timeval tv[2]; 2997 2998 if (seg != UIO_SYSSPACE) { 2999 error = copyin(tptr, &tv, sizeof (tv)); 3000 if (error != 0) 3001 return error; 3002 tptr = tv; 3003 } 3004 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime); 3005 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime); 3006 } 3007 3008 if (vp == NULL) { 3009 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3010 if ((error = namei(&nd)) != 0) 3011 return (error); 3012 vp = nd.ni_vp; 3013 } else 3014 nd.ni_vp = NULL; 3015 3016 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 3017 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3018 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3019 VOP_UNLOCK(vp, 0); 3020 3021 if (nd.ni_vp != NULL) 3022 vrele(nd.ni_vp); 3023 3024 return (error); 3025 } 3026 3027 /* 3028 * Truncate a file given its path name. 3029 */ 3030 /* ARGSUSED */ 3031 int 3032 sys_truncate(struct lwp *l, void *v, register_t *retval) 3033 { 3034 struct sys_truncate_args /* { 3035 syscallarg(const char *) path; 3036 syscallarg(int) pad; 3037 syscallarg(off_t) length; 3038 } */ *uap = v; 3039 struct vnode *vp; 3040 struct vattr vattr; 3041 int error; 3042 struct nameidata nd; 3043 3044 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3045 SCARG(uap, path)); 3046 if ((error = namei(&nd)) != 0) 3047 return (error); 3048 vp = nd.ni_vp; 3049 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 3050 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3051 if (vp->v_type == VDIR) 3052 error = EISDIR; 3053 else if ((error = vn_writechk(vp)) == 0 && 3054 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3055 VATTR_NULL(&vattr); 3056 vattr.va_size = SCARG(uap, length); 3057 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3058 } 3059 vput(vp); 3060 return (error); 3061 } 3062 3063 /* 3064 * Truncate a file given a file descriptor. 3065 */ 3066 /* ARGSUSED */ 3067 int 3068 sys_ftruncate(struct lwp *l, void *v, register_t *retval) 3069 { 3070 struct sys_ftruncate_args /* { 3071 syscallarg(int) fd; 3072 syscallarg(int) pad; 3073 syscallarg(off_t) length; 3074 } */ *uap = v; 3075 struct proc *p = l->l_proc; 3076 struct vattr vattr; 3077 struct vnode *vp; 3078 struct file *fp; 3079 int error; 3080 3081 /* getvnode() will use the descriptor for us */ 3082 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3083 return (error); 3084 if ((fp->f_flag & FWRITE) == 0) { 3085 error = EINVAL; 3086 goto out; 3087 } 3088 vp = (struct vnode *)fp->f_data; 3089 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 3090 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3091 if (vp->v_type == VDIR) 3092 error = EISDIR; 3093 else if ((error = vn_writechk(vp)) == 0) { 3094 VATTR_NULL(&vattr); 3095 vattr.va_size = SCARG(uap, length); 3096 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3097 } 3098 VOP_UNLOCK(vp, 0); 3099 out: 3100 FILE_UNUSE(fp, l); 3101 return (error); 3102 } 3103 3104 /* 3105 * Sync an open file. 3106 */ 3107 /* ARGSUSED */ 3108 int 3109 sys_fsync(struct lwp *l, void *v, register_t *retval) 3110 { 3111 struct sys_fsync_args /* { 3112 syscallarg(int) fd; 3113 } */ *uap = v; 3114 struct proc *p = l->l_proc; 3115 struct vnode *vp; 3116 struct file *fp; 3117 int error; 3118 3119 /* getvnode() will use the descriptor for us */ 3120 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3121 return (error); 3122 vp = (struct vnode *)fp->f_data; 3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3124 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3125 if (error == 0 && bioopsp != NULL && 3126 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3127 (*bioopsp->io_fsync)(vp, 0); 3128 VOP_UNLOCK(vp, 0); 3129 FILE_UNUSE(fp, l); 3130 return (error); 3131 } 3132 3133 /* 3134 * Sync a range of file data. API modeled after that found in AIX. 3135 * 3136 * FDATASYNC indicates that we need only save enough metadata to be able 3137 * to re-read the written data. Note we duplicate AIX's requirement that 3138 * the file be open for writing. 3139 */ 3140 /* ARGSUSED */ 3141 int 3142 sys_fsync_range(struct lwp *l, void *v, register_t *retval) 3143 { 3144 struct sys_fsync_range_args /* { 3145 syscallarg(int) fd; 3146 syscallarg(int) flags; 3147 syscallarg(off_t) start; 3148 syscallarg(off_t) length; 3149 } */ *uap = v; 3150 struct proc *p = l->l_proc; 3151 struct vnode *vp; 3152 struct file *fp; 3153 int flags, nflags; 3154 off_t s, e, len; 3155 int error; 3156 3157 /* getvnode() will use the descriptor for us */ 3158 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3159 return (error); 3160 3161 if ((fp->f_flag & FWRITE) == 0) { 3162 error = EBADF; 3163 goto out; 3164 } 3165 3166 flags = SCARG(uap, flags); 3167 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3168 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3169 error = EINVAL; 3170 goto out; 3171 } 3172 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3173 if (flags & FDATASYNC) 3174 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3175 else 3176 nflags = FSYNC_WAIT; 3177 if (flags & FDISKSYNC) 3178 nflags |= FSYNC_CACHE; 3179 3180 len = SCARG(uap, length); 3181 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3182 if (len) { 3183 s = SCARG(uap, start); 3184 e = s + len; 3185 if (e < s) { 3186 error = EINVAL; 3187 goto out; 3188 } 3189 } else { 3190 e = 0; 3191 s = 0; 3192 } 3193 3194 vp = (struct vnode *)fp->f_data; 3195 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3196 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3197 3198 if (error == 0 && bioopsp != NULL && 3199 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3200 (*bioopsp->io_fsync)(vp, nflags); 3201 3202 VOP_UNLOCK(vp, 0); 3203 out: 3204 FILE_UNUSE(fp, l); 3205 return (error); 3206 } 3207 3208 /* 3209 * Sync the data of an open file. 3210 */ 3211 /* ARGSUSED */ 3212 int 3213 sys_fdatasync(struct lwp *l, void *v, register_t *retval) 3214 { 3215 struct sys_fdatasync_args /* { 3216 syscallarg(int) fd; 3217 } */ *uap = v; 3218 struct proc *p = l->l_proc; 3219 struct vnode *vp; 3220 struct file *fp; 3221 int error; 3222 3223 /* getvnode() will use the descriptor for us */ 3224 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3225 return (error); 3226 if ((fp->f_flag & FWRITE) == 0) { 3227 FILE_UNUSE(fp, l); 3228 return (EBADF); 3229 } 3230 vp = (struct vnode *)fp->f_data; 3231 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3232 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3233 VOP_UNLOCK(vp, 0); 3234 FILE_UNUSE(fp, l); 3235 return (error); 3236 } 3237 3238 /* 3239 * Rename files, (standard) BSD semantics frontend. 3240 */ 3241 /* ARGSUSED */ 3242 int 3243 sys_rename(struct lwp *l, void *v, register_t *retval) 3244 { 3245 struct sys_rename_args /* { 3246 syscallarg(const char *) from; 3247 syscallarg(const char *) to; 3248 } */ *uap = v; 3249 3250 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0)); 3251 } 3252 3253 /* 3254 * Rename files, POSIX semantics frontend. 3255 */ 3256 /* ARGSUSED */ 3257 int 3258 sys___posix_rename(struct lwp *l, void *v, register_t *retval) 3259 { 3260 struct sys___posix_rename_args /* { 3261 syscallarg(const char *) from; 3262 syscallarg(const char *) to; 3263 } */ *uap = v; 3264 3265 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1)); 3266 } 3267 3268 /* 3269 * Rename files. Source and destination must either both be directories, 3270 * or both not be directories. If target is a directory, it must be empty. 3271 * If `from' and `to' refer to the same object, the value of the `retain' 3272 * argument is used to determine whether `from' will be 3273 * 3274 * (retain == 0) deleted unless `from' and `to' refer to the same 3275 * object in the file system's name space (BSD). 3276 * (retain == 1) always retained (POSIX). 3277 */ 3278 static int 3279 rename_files(const char *from, const char *to, struct lwp *l, int retain) 3280 { 3281 struct vnode *tvp, *fvp, *tdvp; 3282 struct nameidata fromnd, tond; 3283 struct proc *p; 3284 int error; 3285 3286 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3287 UIO_USERSPACE, from); 3288 if ((error = namei(&fromnd)) != 0) 3289 return (error); 3290 if (fromnd.ni_dvp != fromnd.ni_vp) 3291 VOP_UNLOCK(fromnd.ni_dvp, 0); 3292 fvp = fromnd.ni_vp; 3293 NDINIT(&tond, RENAME, 3294 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3295 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3296 UIO_USERSPACE, to); 3297 if ((error = namei(&tond)) != 0) { 3298 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3299 vrele(fromnd.ni_dvp); 3300 vrele(fvp); 3301 goto out1; 3302 } 3303 tdvp = tond.ni_dvp; 3304 tvp = tond.ni_vp; 3305 3306 if (tvp != NULL) { 3307 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3308 error = ENOTDIR; 3309 goto out; 3310 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3311 error = EISDIR; 3312 goto out; 3313 } 3314 } 3315 3316 if (fvp == tdvp) 3317 error = EINVAL; 3318 3319 /* 3320 * Source and destination refer to the same object. 3321 */ 3322 if (fvp == tvp) { 3323 if (retain) 3324 error = -1; 3325 else if (fromnd.ni_dvp == tdvp && 3326 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3327 !memcmp(fromnd.ni_cnd.cn_nameptr, 3328 tond.ni_cnd.cn_nameptr, 3329 fromnd.ni_cnd.cn_namelen)) 3330 error = -1; 3331 } 3332 3333 #if NVERIEXEC > 0 3334 if (!error) { 3335 char *f1, *f2; 3336 3337 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3338 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3339 3340 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3341 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3342 3343 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3344 3345 free(f1, M_TEMP); 3346 free(f2, M_TEMP); 3347 } 3348 #endif /* NVERIEXEC > 0 */ 3349 3350 out: 3351 p = l->l_proc; 3352 if (!error) { 3353 VOP_LEASE(tdvp, l->l_cred, LEASE_WRITE); 3354 if (fromnd.ni_dvp != tdvp) 3355 VOP_LEASE(fromnd.ni_dvp, l->l_cred, LEASE_WRITE); 3356 if (tvp) { 3357 VOP_LEASE(tvp, l->l_cred, LEASE_WRITE); 3358 } 3359 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3360 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3361 } else { 3362 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3363 if (tdvp == tvp) 3364 vrele(tdvp); 3365 else 3366 vput(tdvp); 3367 if (tvp) 3368 vput(tvp); 3369 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3370 vrele(fromnd.ni_dvp); 3371 vrele(fvp); 3372 } 3373 vrele(tond.ni_startdir); 3374 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3375 out1: 3376 if (fromnd.ni_startdir) 3377 vrele(fromnd.ni_startdir); 3378 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3379 return (error == -1 ? 0 : error); 3380 } 3381 3382 /* 3383 * Make a directory file. 3384 */ 3385 /* ARGSUSED */ 3386 int 3387 sys_mkdir(struct lwp *l, void *v, register_t *retval) 3388 { 3389 struct sys_mkdir_args /* { 3390 syscallarg(const char *) path; 3391 syscallarg(int) mode; 3392 } */ *uap = v; 3393 struct proc *p = l->l_proc; 3394 struct vnode *vp; 3395 struct vattr vattr; 3396 int error; 3397 struct nameidata nd; 3398 3399 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3400 SCARG(uap, path)); 3401 if ((error = namei(&nd)) != 0) 3402 return (error); 3403 vp = nd.ni_vp; 3404 if (vp != NULL) { 3405 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3406 if (nd.ni_dvp == vp) 3407 vrele(nd.ni_dvp); 3408 else 3409 vput(nd.ni_dvp); 3410 vrele(vp); 3411 return (EEXIST); 3412 } 3413 VATTR_NULL(&vattr); 3414 vattr.va_type = VDIR; 3415 /* We will read cwdi->cwdi_cmask unlocked. */ 3416 vattr.va_mode = 3417 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3418 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 3419 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3420 if (!error) 3421 vput(nd.ni_vp); 3422 return (error); 3423 } 3424 3425 /* 3426 * Remove a directory file. 3427 */ 3428 /* ARGSUSED */ 3429 int 3430 sys_rmdir(struct lwp *l, void *v, register_t *retval) 3431 { 3432 struct sys_rmdir_args /* { 3433 syscallarg(const char *) path; 3434 } */ *uap = v; 3435 struct vnode *vp; 3436 int error; 3437 struct nameidata nd; 3438 3439 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3440 SCARG(uap, path)); 3441 if ((error = namei(&nd)) != 0) 3442 return (error); 3443 vp = nd.ni_vp; 3444 if (vp->v_type != VDIR) { 3445 error = ENOTDIR; 3446 goto out; 3447 } 3448 /* 3449 * No rmdir "." please. 3450 */ 3451 if (nd.ni_dvp == vp) { 3452 error = EINVAL; 3453 goto out; 3454 } 3455 /* 3456 * The root of a mounted filesystem cannot be deleted. 3457 */ 3458 if (vp->v_vflag & VV_ROOT) { 3459 error = EBUSY; 3460 goto out; 3461 } 3462 VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE); 3463 VOP_LEASE(vp, l->l_cred, LEASE_WRITE); 3464 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3465 return (error); 3466 3467 out: 3468 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3469 if (nd.ni_dvp == vp) 3470 vrele(nd.ni_dvp); 3471 else 3472 vput(nd.ni_dvp); 3473 vput(vp); 3474 return (error); 3475 } 3476 3477 /* 3478 * Read a block of directory entries in a file system independent format. 3479 */ 3480 int 3481 sys___getdents30(struct lwp *l, void *v, register_t *retval) 3482 { 3483 struct sys___getdents30_args /* { 3484 syscallarg(int) fd; 3485 syscallarg(char *) buf; 3486 syscallarg(size_t) count; 3487 } */ *uap = v; 3488 struct proc *p = l->l_proc; 3489 struct file *fp; 3490 int error, done; 3491 3492 /* getvnode() will use the descriptor for us */ 3493 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3494 return (error); 3495 if ((fp->f_flag & FREAD) == 0) { 3496 error = EBADF; 3497 goto out; 3498 } 3499 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3500 SCARG(uap, count), &done, l, 0, 0); 3501 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3502 *retval = done; 3503 out: 3504 FILE_UNUSE(fp, l); 3505 return (error); 3506 } 3507 3508 /* 3509 * Set the mode mask for creation of filesystem nodes. 3510 */ 3511 int 3512 sys_umask(struct lwp *l, void *v, register_t *retval) 3513 { 3514 struct sys_umask_args /* { 3515 syscallarg(mode_t) newmask; 3516 } */ *uap = v; 3517 struct proc *p = l->l_proc; 3518 struct cwdinfo *cwdi; 3519 3520 /* 3521 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3522 * important is that we serialize changes to the mask. The 3523 * rw_exit() will issue a write memory barrier on our behalf, 3524 * and force the changes out to other CPUs (as it must use an 3525 * atomic operation, draining the local CPU's store buffers). 3526 */ 3527 cwdi = p->p_cwdi; 3528 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3529 *retval = cwdi->cwdi_cmask; 3530 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3531 rw_exit(&cwdi->cwdi_lock); 3532 3533 return (0); 3534 } 3535 3536 /* 3537 * Void all references to file by ripping underlying filesystem 3538 * away from vnode. 3539 */ 3540 /* ARGSUSED */ 3541 int 3542 sys_revoke(struct lwp *l, void *v, register_t *retval) 3543 { 3544 struct sys_revoke_args /* { 3545 syscallarg(const char *) path; 3546 } */ *uap = v; 3547 struct vnode *vp; 3548 struct vattr vattr; 3549 int error; 3550 bool revoke; 3551 struct nameidata nd; 3552 3553 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3554 SCARG(uap, path)); 3555 if ((error = namei(&nd)) != 0) 3556 return (error); 3557 vp = nd.ni_vp; 3558 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3559 goto out; 3560 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid && 3561 (error = kauth_authorize_generic(l->l_cred, 3562 KAUTH_GENERIC_ISSUSER, NULL)) != 0) 3563 goto out; 3564 simple_lock(&vp->v_interlock); 3565 revoke = (vp->v_usecount > 1 || (vp->v_iflag & (VI_ALIASED|VI_LAYER))); 3566 simple_unlock(&vp->v_interlock); 3567 if (revoke) 3568 VOP_REVOKE(vp, REVOKEALL); 3569 out: 3570 vrele(vp); 3571 return (error); 3572 } 3573 3574 /* 3575 * Convert a user file descriptor to a kernel file entry. 3576 */ 3577 int 3578 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 3579 { 3580 struct vnode *vp; 3581 struct file *fp; 3582 3583 if ((fp = fd_getfile(fdp, fd)) == NULL) 3584 return (EBADF); 3585 3586 FILE_USE(fp); 3587 3588 if (fp->f_type != DTYPE_VNODE) { 3589 FILE_UNUSE(fp, NULL); 3590 return (EINVAL); 3591 } 3592 3593 vp = (struct vnode *)fp->f_data; 3594 if (vp->v_type == VBAD) { 3595 FILE_UNUSE(fp, NULL); 3596 return (EBADF); 3597 } 3598 3599 *fpp = fp; 3600 return (0); 3601 } 3602