1 /* $NetBSD: vfs_syscalls.c,v 1.331 2007/10/24 15:28:55 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.331 2007/10/24 15:28:55 pooka Exp $"); 41 42 #include "opt_compat_netbsd.h" 43 #include "opt_compat_43.h" 44 #include "opt_fileassoc.h" 45 #include "fss.h" 46 #include "veriexec.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/namei.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/file.h> 54 #include <sys/stat.h> 55 #include <sys/vnode.h> 56 #include <sys/mount.h> 57 #include <sys/proc.h> 58 #include <sys/uio.h> 59 #include <sys/malloc.h> 60 #include <sys/kmem.h> 61 #include <sys/dirent.h> 62 #include <sys/sysctl.h> 63 #include <sys/syscallargs.h> 64 #include <sys/vfs_syscalls.h> 65 #include <sys/ktrace.h> 66 #ifdef FILEASSOC 67 #include <sys/fileassoc.h> 68 #endif /* FILEASSOC */ 69 #include <sys/verified_exec.h> 70 #include <sys/kauth.h> 71 72 #include <miscfs/genfs/genfs.h> 73 #include <miscfs/syncfs/syncfs.h> 74 75 #ifdef COMPAT_30 76 #include "opt_nfsserver.h" 77 #include <nfs/rpcv2.h> 78 #endif 79 #include <nfs/nfsproto.h> 80 #ifdef COMPAT_30 81 #include <nfs/nfs.h> 82 #include <nfs/nfs_var.h> 83 #endif 84 85 #if NFSS > 0 86 #include <dev/fssvar.h> 87 #endif 88 89 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 90 91 static int change_dir(struct nameidata *, struct lwp *); 92 static int change_flags(struct vnode *, u_long, struct lwp *); 93 static int change_mode(struct vnode *, int, struct lwp *l); 94 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 95 static int rename_files(const char *, const char *, struct lwp *, int); 96 97 void checkdirs(struct vnode *); 98 99 int dovfsusermount = 0; 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 */ 108 109 #if defined(COMPAT_09) || defined(COMPAT_43) 110 /* 111 * This table is used to maintain compatibility with 4.3BSD 112 * and NetBSD 0.9 mount syscalls. Note, the order is important! 113 * 114 * Do not modify this table. It should only contain filesystems 115 * supported by NetBSD 0.9 and 4.3BSD. 116 */ 117 const char * const mountcompatnames[] = { 118 NULL, /* 0 = MOUNT_NONE */ 119 MOUNT_FFS, /* 1 = MOUNT_UFS */ 120 MOUNT_NFS, /* 2 */ 121 MOUNT_MFS, /* 3 */ 122 MOUNT_MSDOS, /* 4 */ 123 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 124 MOUNT_FDESC, /* 6 */ 125 MOUNT_KERNFS, /* 7 */ 126 NULL, /* 8 = MOUNT_DEVFS */ 127 MOUNT_AFS, /* 9 */ 128 }; 129 const int nmountcompatnames = sizeof(mountcompatnames) / 130 sizeof(mountcompatnames[0]); 131 #endif /* COMPAT_09 || COMPAT_43 */ 132 133 static int 134 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 135 void *data, size_t *data_len) 136 { 137 struct mount *mp; 138 int error = 0, saved_flags; 139 140 mp = vp->v_mount; 141 saved_flags = mp->mnt_flag; 142 143 /* We can operate only on VV_ROOT nodes. */ 144 if ((vp->v_vflag & VV_ROOT) == 0) { 145 error = EINVAL; 146 goto out; 147 } 148 149 /* 150 * We only allow the filesystem to be reloaded if it 151 * is currently mounted read-only. 152 */ 153 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 154 error = EOPNOTSUPP; /* Needs translation */ 155 goto out; 156 } 157 158 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 159 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 160 if (error) 161 goto out; 162 163 if (vfs_busy(mp, LK_NOWAIT, 0)) { 164 error = EPERM; 165 goto out; 166 } 167 168 mp->mnt_flag &= ~MNT_OP_FLAGS; 169 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 170 171 /* 172 * Set the mount level flags. 173 */ 174 if (flags & MNT_RDONLY) 175 mp->mnt_flag |= MNT_RDONLY; 176 else if (mp->mnt_flag & MNT_RDONLY) 177 mp->mnt_iflag |= IMNT_WANTRDWR; 178 mp->mnt_flag &= 179 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 180 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 181 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 182 mp->mnt_flag |= flags & 183 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 184 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 185 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 186 MNT_IGNORE); 187 188 error = VFS_MOUNT(mp, path, data, data_len, l); 189 190 #if defined(COMPAT_30) && defined(NFSSERVER) 191 if (error && data != NULL) { 192 int error2; 193 194 /* Update failed; let's try and see if it was an 195 * export request. */ 196 error2 = nfs_update_exports_30(mp, path, data, l); 197 198 /* Only update error code if the export request was 199 * understood but some problem occurred while 200 * processing it. */ 201 if (error2 != EJUSTRETURN) 202 error = error2; 203 } 204 #endif 205 if (mp->mnt_iflag & IMNT_WANTRDWR) 206 mp->mnt_flag &= ~MNT_RDONLY; 207 if (error) 208 mp->mnt_flag = saved_flags; 209 mp->mnt_flag &= ~MNT_OP_FLAGS; 210 mp->mnt_iflag &= ~IMNT_WANTRDWR; 211 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 212 if (mp->mnt_syncer == NULL) 213 error = vfs_allocate_syncvnode(mp); 214 } else { 215 if (mp->mnt_syncer != NULL) 216 vfs_deallocate_syncvnode(mp); 217 } 218 vfs_unbusy(mp); 219 220 out: 221 return (error); 222 } 223 224 static int 225 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 226 { 227 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 228 int error; 229 230 /* Copy file-system type from userspace. */ 231 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 232 if (error) { 233 #if defined(COMPAT_09) || defined(COMPAT_43) 234 /* 235 * Historically, filesystem types were identified by numbers. 236 * If we get an integer for the filesystem type instead of a 237 * string, we check to see if it matches one of the historic 238 * filesystem types. 239 */ 240 u_long fsindex = (u_long)fstype; 241 if (fsindex >= nmountcompatnames || 242 mountcompatnames[fsindex] == NULL) 243 return ENODEV; 244 strlcpy(fstypename, mountcompatnames[fsindex], 245 sizeof(fstypename)); 246 #else 247 return error; 248 #endif 249 } 250 251 #ifdef COMPAT_10 252 /* Accept `ufs' as an alias for `ffs'. */ 253 if (strcmp(fstypename, "ufs") == 0) 254 fstypename[0] = 'f'; 255 #endif 256 257 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL) 258 return ENODEV; 259 return 0; 260 } 261 262 static int 263 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 264 const char *path, int flags, void *data, size_t *data_len) 265 { 266 struct mount *mp = NULL; 267 struct vnode *vp = *vpp; 268 struct vattr va; 269 int error; 270 271 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 272 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 273 if (error) 274 return error; 275 276 /* Can't make a non-dir a mount-point (from here anyway). */ 277 if (vp->v_type != VDIR) 278 return ENOTDIR; 279 280 /* 281 * If the user is not root, ensure that they own the directory 282 * onto which we are attempting to mount. 283 */ 284 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 || 285 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 286 (error = kauth_authorize_generic(l->l_cred, 287 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 288 return error; 289 } 290 291 if (flags & MNT_EXPORTED) 292 return EINVAL; 293 294 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 295 return error; 296 297 /* 298 * Check if a file-system is not already mounted on this vnode. 299 */ 300 if (vp->v_mountedhere != NULL) 301 return EBUSY; 302 303 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO); 304 305 mp->mnt_op = vfsops; 306 307 TAILQ_INIT(&mp->mnt_vnodelist); 308 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 309 simple_lock_init(&mp->mnt_slock); 310 (void)vfs_busy(mp, LK_NOWAIT, 0); 311 312 mp->mnt_vnodecovered = vp; 313 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 314 mp->mnt_unmounter = NULL; 315 mount_initspecific(mp); 316 317 /* 318 * The underlying file system may refuse the mount for 319 * various reasons. Allow the user to force it to happen. 320 * 321 * Set the mount level flags. 322 */ 323 mp->mnt_flag = flags & 324 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 325 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 326 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 327 MNT_IGNORE | MNT_RDONLY); 328 329 error = VFS_MOUNT(mp, path, data, data_len, l); 330 mp->mnt_flag &= ~MNT_OP_FLAGS; 331 332 /* 333 * Put the new filesystem on the mount list after root. 334 */ 335 cache_purge(vp); 336 if (error != 0) { 337 vp->v_mountedhere = NULL; 338 mp->mnt_op->vfs_refcount--; 339 vfs_unbusy(mp); 340 vfs_destroy(mp); 341 return error; 342 } 343 344 mp->mnt_iflag &= ~IMNT_WANTRDWR; 345 vp->v_mountedhere = mp; 346 mutex_enter(&mountlist_lock); 347 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 348 mutex_exit(&mountlist_lock); 349 VOP_UNLOCK(vp, 0); 350 checkdirs(vp); 351 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 352 error = vfs_allocate_syncvnode(mp); 353 vfs_unbusy(mp); 354 (void) VFS_STATVFS(mp, &mp->mnt_stat, l); 355 error = VFS_START(mp, 0, l); 356 if (error) 357 vrele(vp); 358 *vpp = NULL; 359 return error; 360 } 361 362 static int 363 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 364 void *data, size_t *data_len) 365 { 366 struct mount *mp; 367 int error; 368 369 /* If MNT_GETARGS is specified, it should be the only flag. */ 370 if (flags & ~MNT_GETARGS) 371 return EINVAL; 372 373 mp = vp->v_mount; 374 375 /* XXX: probably some notion of "can see" here if we want isolation. */ 376 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 377 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 378 if (error) 379 return error; 380 381 if ((vp->v_vflag & VV_ROOT) == 0) 382 return EINVAL; 383 384 if (vfs_busy(mp, LK_NOWAIT, 0)) 385 return EPERM; 386 387 mp->mnt_flag &= ~MNT_OP_FLAGS; 388 mp->mnt_flag |= MNT_GETARGS; 389 error = VFS_MOUNT(mp, path, data, data_len, l); 390 mp->mnt_flag &= ~MNT_OP_FLAGS; 391 392 vfs_unbusy(mp); 393 return (error); 394 } 395 396 #ifdef COMPAT_40 397 /* ARGSUSED */ 398 int 399 compat_40_sys_mount(struct lwp *l, void *v, register_t *retval) 400 { 401 struct compat_40_sys_mount_args /* { 402 syscallarg(const char *) type; 403 syscallarg(const char *) path; 404 syscallarg(int) flags; 405 syscallarg(void *) data; 406 } */ *uap = v; 407 register_t dummy; 408 409 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 410 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 411 } 412 #endif 413 414 int 415 sys___mount50(struct lwp *l, void *v, register_t *retval) 416 { 417 struct sys___mount50_args /* { 418 syscallarg(const char *) type; 419 syscallarg(const char *) path; 420 syscallarg(int) flags; 421 syscallarg(void *) data; 422 syscallarg(size_t) data_len; 423 } */ *uap = v; 424 425 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 426 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 427 SCARG(uap, data_len), retval); 428 } 429 430 int 431 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 432 const char *path, int flags, void *data, enum uio_seg data_seg, 433 size_t data_len, register_t *retval) 434 { 435 struct vnode *vp; 436 struct nameidata nd; 437 void *data_buf = data; 438 int error; 439 440 /* 441 * Get vnode to be covered 442 */ 443 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l); 444 if ((error = namei(&nd)) != 0) 445 return (error); 446 vp = nd.ni_vp; 447 448 /* 449 * A lookup in VFS_MOUNT might result in an attempt to 450 * lock this vnode again, so make the lock recursive. 451 */ 452 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE); 453 454 if (vfsops == NULL) { 455 if (flags & (MNT_GETARGS | MNT_UPDATE)) 456 vfsops = vp->v_mount->mnt_op; 457 else { 458 /* 'type' is userspace */ 459 error = mount_get_vfsops(type, &vfsops); 460 if (error != 0) 461 goto done; 462 } 463 } 464 465 if (data != NULL && data_seg == UIO_USERSPACE) { 466 if (data_len == 0) { 467 /* No length supplied, use default for filesystem */ 468 data_len = vfsops->vfs_min_mount_data; 469 if (data_len > VFS_MAX_MOUNT_DATA) { 470 /* maybe a force loaded old LKM */ 471 error = EINVAL; 472 goto done; 473 } 474 #ifdef COMPAT_30 475 /* Hopefully a longer buffer won't make copyin() fail */ 476 if (flags & MNT_UPDATE 477 && data_len < sizeof (struct mnt_export_args30)) 478 data_len = sizeof (struct mnt_export_args30); 479 #endif 480 } 481 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 482 483 /* NFS needs the buffer even for mnt_getargs .... */ 484 error = copyin(data, data_buf, data_len); 485 if (error != 0) 486 goto done; 487 } 488 489 if (flags & MNT_GETARGS) { 490 if (data_len == 0) { 491 error = EINVAL; 492 goto done; 493 } 494 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 495 if (error != 0) 496 goto done; 497 if (data_seg == UIO_USERSPACE) 498 error = copyout(data_buf, data, data_len); 499 *retval = data_len; 500 } else if (flags & MNT_UPDATE) { 501 error = mount_update(l, vp, path, flags, data_buf, &data_len); 502 } else { 503 /* Locking is handled internally in mount_domount(). */ 504 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 505 &data_len); 506 } 507 508 done: 509 if (vp) 510 vput(vp); 511 if (data_buf != data) 512 free(data_buf, M_TEMP); 513 return (error); 514 } 515 516 /* 517 * Scan all active processes to see if any of them have a current 518 * or root directory onto which the new filesystem has just been 519 * mounted. If so, replace them with the new mount point. 520 */ 521 void 522 checkdirs(struct vnode *olddp) 523 { 524 struct cwdinfo *cwdi; 525 struct vnode *newdp; 526 struct proc *p; 527 528 if (olddp->v_usecount == 1) 529 return; 530 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 531 panic("mount: lost mount"); 532 mutex_enter(&proclist_lock); 533 PROCLIST_FOREACH(p, &allproc) { 534 cwdi = p->p_cwdi; 535 if (!cwdi) 536 continue; 537 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 538 if (cwdi->cwdi_cdir == olddp) { 539 vrele(cwdi->cwdi_cdir); 540 VREF(newdp); 541 cwdi->cwdi_cdir = newdp; 542 } 543 if (cwdi->cwdi_rdir == olddp) { 544 vrele(cwdi->cwdi_rdir); 545 VREF(newdp); 546 cwdi->cwdi_rdir = newdp; 547 } 548 rw_exit(&cwdi->cwdi_lock); 549 } 550 mutex_exit(&proclist_lock); 551 if (rootvnode == olddp) { 552 vrele(rootvnode); 553 VREF(newdp); 554 rootvnode = newdp; 555 } 556 vput(newdp); 557 } 558 559 /* 560 * Unmount a file system. 561 * 562 * Note: unmount takes a path to the vnode mounted on as argument, 563 * not special file (as before). 564 */ 565 /* ARGSUSED */ 566 int 567 sys_unmount(struct lwp *l, void *v, register_t *retval) 568 { 569 struct sys_unmount_args /* { 570 syscallarg(const char *) path; 571 syscallarg(int) flags; 572 } */ *uap = v; 573 struct vnode *vp; 574 struct mount *mp; 575 int error; 576 struct nameidata nd; 577 578 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 579 SCARG(uap, path), l); 580 if ((error = namei(&nd)) != 0) 581 return (error); 582 vp = nd.ni_vp; 583 mp = vp->v_mount; 584 585 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 586 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 587 if (error) { 588 vput(vp); 589 return (error); 590 } 591 592 /* 593 * Don't allow unmounting the root file system. 594 */ 595 if (mp->mnt_flag & MNT_ROOTFS) { 596 vput(vp); 597 return (EINVAL); 598 } 599 600 /* 601 * Must be the root of the filesystem 602 */ 603 if ((vp->v_vflag & VV_ROOT) == 0) { 604 vput(vp); 605 return (EINVAL); 606 } 607 vput(vp); 608 609 /* 610 * XXX Freeze syncer. Must do this before locking the 611 * mount point. See dounmount() for details. 612 */ 613 mutex_enter(&syncer_mutex); 614 615 if (vfs_busy(mp, 0, 0)) { 616 mutex_exit(&syncer_mutex); 617 return (EBUSY); 618 } 619 620 return (dounmount(mp, SCARG(uap, flags), l)); 621 } 622 623 /* 624 * Do the actual file system unmount. File system is assumed to have been 625 * marked busy by the caller. 626 */ 627 int 628 dounmount(struct mount *mp, int flags, struct lwp *l) 629 { 630 struct vnode *coveredvp; 631 int error; 632 int async; 633 int used_syncer; 634 635 #if NVERIEXEC > 0 636 error = veriexec_unmountchk(mp); 637 if (error) 638 return (error); 639 #endif /* NVERIEXEC > 0 */ 640 641 mutex_enter(&mountlist_lock); 642 vfs_unbusy(mp); 643 used_syncer = (mp->mnt_syncer != NULL); 644 645 /* 646 * XXX Syncer must be frozen when we get here. This should really 647 * be done on a per-mountpoint basis, but especially the softdep 648 * code possibly called from the syncer doesn't exactly work on a 649 * per-mountpoint basis, so the softdep code would become a maze 650 * of vfs_busy() calls. 651 * 652 * The caller of dounmount() must acquire syncer_mutex because 653 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 654 * order, and we must preserve that order to avoid deadlock. 655 * 656 * So, if the file system did not use the syncer, now is 657 * the time to release the syncer_mutex. 658 */ 659 if (used_syncer == 0) 660 mutex_exit(&syncer_mutex); 661 662 mp->mnt_iflag |= IMNT_UNMOUNT; 663 mp->mnt_unmounter = l; 664 mutex_exit(&mountlist_lock); /* XXX */ 665 lockmgr(&mp->mnt_lock, LK_DRAIN, NULL); 666 667 async = mp->mnt_flag & MNT_ASYNC; 668 mp->mnt_flag &= ~MNT_ASYNC; 669 cache_purgevfs(mp); /* remove cache entries for this file sys */ 670 if (mp->mnt_syncer != NULL) 671 vfs_deallocate_syncvnode(mp); 672 error = 0; 673 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 674 #if NFSS > 0 675 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 676 #endif 677 if (error == 0) 678 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l); 679 } 680 if (error == 0 || (flags & MNT_FORCE)) 681 error = VFS_UNMOUNT(mp, flags, l); 682 if (error) { 683 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 684 (void) vfs_allocate_syncvnode(mp); 685 mutex_enter(&mountlist_lock); 686 mp->mnt_iflag &= ~IMNT_UNMOUNT; 687 mp->mnt_unmounter = NULL; 688 mp->mnt_flag |= async; 689 mutex_exit(&mountlist_lock); /* XXX */ 690 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, 691 NULL); 692 if (used_syncer) 693 mutex_exit(&syncer_mutex); 694 simple_lock(&mp->mnt_slock); 695 while (mp->mnt_wcnt > 0) { 696 wakeup(mp); 697 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 698 0, &mp->mnt_slock); 699 } 700 simple_unlock(&mp->mnt_slock); 701 return (error); 702 } 703 mutex_enter(&mountlist_lock); 704 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 705 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 706 coveredvp->v_mountedhere = NULL; 707 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 708 panic("unmount: dangling vnode"); 709 mp->mnt_iflag |= IMNT_GONE; 710 mutex_exit(&mountlist_lock); 711 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 712 if (coveredvp != NULLVP) 713 vrele(coveredvp); 714 if (used_syncer) 715 mutex_exit(&syncer_mutex); 716 simple_lock(&mp->mnt_slock); 717 while (mp->mnt_wcnt > 0) { 718 wakeup(mp); 719 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock); 720 } 721 simple_unlock(&mp->mnt_slock); 722 vfs_hooks_unmount(mp); 723 vfs_delref(mp->mnt_op); 724 vfs_destroy(mp); 725 return (0); 726 } 727 728 /* 729 * Sync each mounted filesystem. 730 */ 731 #ifdef DEBUG 732 int syncprt = 0; 733 struct ctldebug debug0 = { "syncprt", &syncprt }; 734 #endif 735 736 /* ARGSUSED */ 737 int 738 sys_sync(struct lwp *l, void *v, register_t *retval) 739 { 740 struct mount *mp, *nmp; 741 int asyncflag; 742 743 if (l == NULL) 744 l = &lwp0; 745 746 mutex_enter(&mountlist_lock); 747 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 748 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) { 749 nmp = mp->mnt_list.cqe_prev; 750 continue; 751 } 752 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 753 asyncflag = mp->mnt_flag & MNT_ASYNC; 754 mp->mnt_flag &= ~MNT_ASYNC; 755 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l); 756 if (asyncflag) 757 mp->mnt_flag |= MNT_ASYNC; 758 } 759 mutex_enter(&mountlist_lock); 760 nmp = mp->mnt_list.cqe_prev; 761 vfs_unbusy(mp); 762 763 } 764 mutex_exit(&mountlist_lock); 765 #ifdef DEBUG 766 if (syncprt) 767 vfs_bufstats(); 768 #endif /* DEBUG */ 769 return (0); 770 } 771 772 /* 773 * Change filesystem quotas. 774 */ 775 /* ARGSUSED */ 776 int 777 sys_quotactl(struct lwp *l, void *v, register_t *retval) 778 { 779 struct sys_quotactl_args /* { 780 syscallarg(const char *) path; 781 syscallarg(int) cmd; 782 syscallarg(int) uid; 783 syscallarg(void *) arg; 784 } */ *uap = v; 785 struct mount *mp; 786 int error; 787 struct nameidata nd; 788 789 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 790 SCARG(uap, path), l); 791 if ((error = namei(&nd)) != 0) 792 return (error); 793 mp = nd.ni_vp->v_mount; 794 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 795 SCARG(uap, arg), l); 796 vrele(nd.ni_vp); 797 return (error); 798 } 799 800 int 801 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 802 int root) 803 { 804 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 805 int error = 0; 806 807 /* 808 * If MNT_NOWAIT or MNT_LAZY is specified, do not 809 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 810 * overrides MNT_NOWAIT. 811 */ 812 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 813 (flags != MNT_WAIT && flags != 0)) { 814 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 815 goto done; 816 } 817 818 /* Get the filesystem stats now */ 819 memset(sp, 0, sizeof(*sp)); 820 if ((error = VFS_STATVFS(mp, sp, l)) != 0) { 821 return error; 822 } 823 824 if (cwdi->cwdi_rdir == NULL) 825 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 826 done: 827 if (cwdi->cwdi_rdir != NULL) { 828 size_t len; 829 char *bp; 830 char *path = PNBUF_GET(); 831 832 bp = path + MAXPATHLEN; 833 *--bp = '\0'; 834 rw_enter(&cwdi->cwdi_lock, RW_READER); 835 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 836 MAXPATHLEN / 2, 0, l); 837 rw_exit(&cwdi->cwdi_lock); 838 if (error) { 839 PNBUF_PUT(path); 840 return error; 841 } 842 len = strlen(bp); 843 /* 844 * for mount points that are below our root, we can see 845 * them, so we fix up the pathname and return them. The 846 * rest we cannot see, so we don't allow viewing the 847 * data. 848 */ 849 if (strncmp(bp, sp->f_mntonname, len) == 0) { 850 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 851 sizeof(sp->f_mntonname)); 852 if (sp->f_mntonname[0] == '\0') 853 (void)strlcpy(sp->f_mntonname, "/", 854 sizeof(sp->f_mntonname)); 855 } else { 856 if (root) 857 (void)strlcpy(sp->f_mntonname, "/", 858 sizeof(sp->f_mntonname)); 859 else 860 error = EPERM; 861 } 862 PNBUF_PUT(path); 863 } 864 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 865 return error; 866 } 867 868 /* 869 * Get filesystem statistics by path. 870 */ 871 int 872 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 873 { 874 struct mount *mp; 875 int error; 876 struct nameidata nd; 877 878 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l); 879 if ((error = namei(&nd)) != 0) 880 return error; 881 mp = nd.ni_vp->v_mount; 882 error = dostatvfs(mp, sb, l, flags, 1); 883 vrele(nd.ni_vp); 884 return error; 885 } 886 887 /* ARGSUSED */ 888 int 889 sys_statvfs1(struct lwp *l, void *v, register_t *retval) 890 { 891 struct sys_statvfs1_args /* { 892 syscallarg(const char *) path; 893 syscallarg(struct statvfs *) buf; 894 syscallarg(int) flags; 895 } */ *uap = v; 896 struct statvfs *sb; 897 int error; 898 899 sb = STATVFSBUF_GET(); 900 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 901 if (error == 0) 902 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 903 STATVFSBUF_PUT(sb); 904 return error; 905 } 906 907 /* 908 * Get filesystem statistics by fd. 909 */ 910 int 911 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 912 { 913 struct proc *p = l->l_proc; 914 struct file *fp; 915 struct mount *mp; 916 int error; 917 918 /* getvnode() will use the descriptor for us */ 919 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 920 return (error); 921 mp = ((struct vnode *)fp->f_data)->v_mount; 922 error = dostatvfs(mp, sb, l, flags, 1); 923 FILE_UNUSE(fp, l); 924 return error; 925 } 926 927 /* ARGSUSED */ 928 int 929 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval) 930 { 931 struct sys_fstatvfs1_args /* { 932 syscallarg(int) fd; 933 syscallarg(struct statvfs *) buf; 934 syscallarg(int) flags; 935 } */ *uap = v; 936 struct statvfs *sb; 937 int error; 938 939 sb = STATVFSBUF_GET(); 940 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 941 if (error == 0) 942 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 943 STATVFSBUF_PUT(sb); 944 return error; 945 } 946 947 948 /* 949 * Get statistics on all filesystems. 950 */ 951 int 952 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 953 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 954 register_t *retval) 955 { 956 int root = 0; 957 struct proc *p = l->l_proc; 958 struct mount *mp, *nmp; 959 struct statvfs *sb; 960 size_t count, maxcount; 961 int error = 0; 962 963 sb = STATVFSBUF_GET(); 964 maxcount = bufsize / entry_sz; 965 mutex_enter(&mountlist_lock); 966 count = 0; 967 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 968 mp = nmp) { 969 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) { 970 nmp = CIRCLEQ_NEXT(mp, mnt_list); 971 continue; 972 } 973 if (sfsp && count < maxcount) { 974 error = dostatvfs(mp, sb, l, flags, 0); 975 if (error) { 976 mutex_enter(&mountlist_lock); 977 nmp = CIRCLEQ_NEXT(mp, mnt_list); 978 vfs_unbusy(mp); 979 continue; 980 } 981 error = copyfn(sb, sfsp, entry_sz); 982 if (error) { 983 vfs_unbusy(mp); 984 goto out; 985 } 986 sfsp = (char *)sfsp + entry_sz; 987 root |= strcmp(sb->f_mntonname, "/") == 0; 988 } 989 count++; 990 mutex_enter(&mountlist_lock); 991 nmp = CIRCLEQ_NEXT(mp, mnt_list); 992 vfs_unbusy(mp); 993 } 994 995 mutex_exit(&mountlist_lock); 996 if (root == 0 && p->p_cwdi->cwdi_rdir) { 997 /* 998 * fake a root entry 999 */ 1000 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1001 sb, l, flags, 1); 1002 if (error != 0) 1003 goto out; 1004 if (sfsp) 1005 error = copyfn(sb, sfsp, entry_sz); 1006 count++; 1007 } 1008 if (sfsp && count > maxcount) 1009 *retval = maxcount; 1010 else 1011 *retval = count; 1012 out: 1013 STATVFSBUF_PUT(sb); 1014 return error; 1015 } 1016 1017 int 1018 sys_getvfsstat(struct lwp *l, void *v, register_t *retval) 1019 { 1020 struct sys_getvfsstat_args /* { 1021 syscallarg(struct statvfs *) buf; 1022 syscallarg(size_t) bufsize; 1023 syscallarg(int) flags; 1024 } */ *uap = v; 1025 1026 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1027 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1028 } 1029 1030 /* 1031 * Change current working directory to a given file descriptor. 1032 */ 1033 /* ARGSUSED */ 1034 int 1035 sys_fchdir(struct lwp *l, void *v, register_t *retval) 1036 { 1037 struct sys_fchdir_args /* { 1038 syscallarg(int) fd; 1039 } */ *uap = v; 1040 struct proc *p = l->l_proc; 1041 struct filedesc *fdp = p->p_fd; 1042 struct cwdinfo *cwdi; 1043 struct vnode *vp, *tdp; 1044 struct mount *mp; 1045 struct file *fp; 1046 int error; 1047 1048 /* getvnode() will use the descriptor for us */ 1049 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 1050 return (error); 1051 vp = (struct vnode *)fp->f_data; 1052 1053 VREF(vp); 1054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1055 if (vp->v_type != VDIR) 1056 error = ENOTDIR; 1057 else 1058 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 1059 if (error) { 1060 vput(vp); 1061 goto out; 1062 } 1063 while ((mp = vp->v_mountedhere) != NULL) { 1064 if (vfs_busy(mp, 0, 0)) 1065 continue; 1066 1067 vput(vp); 1068 error = VFS_ROOT(mp, &tdp); 1069 vfs_unbusy(mp); 1070 if (error) 1071 goto out; 1072 vp = tdp; 1073 } 1074 VOP_UNLOCK(vp, 0); 1075 1076 /* 1077 * Disallow changing to a directory not under the process's 1078 * current root directory (if there is one). 1079 */ 1080 cwdi = p->p_cwdi; 1081 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1082 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1083 vrele(vp); 1084 error = EPERM; /* operation not permitted */ 1085 } else { 1086 vrele(cwdi->cwdi_cdir); 1087 cwdi->cwdi_cdir = vp; 1088 } 1089 rw_exit(&cwdi->cwdi_lock); 1090 1091 out: 1092 FILE_UNUSE(fp, l); 1093 return (error); 1094 } 1095 1096 /* 1097 * Change this process's notion of the root directory to a given file 1098 * descriptor. 1099 */ 1100 int 1101 sys_fchroot(struct lwp *l, void *v, register_t *retval) 1102 { 1103 struct sys_fchroot_args *uap = v; 1104 struct proc *p = l->l_proc; 1105 struct filedesc *fdp = p->p_fd; 1106 struct cwdinfo *cwdi; 1107 struct vnode *vp; 1108 struct file *fp; 1109 int error; 1110 1111 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1112 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1113 return error; 1114 /* getvnode() will use the descriptor for us */ 1115 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 1116 return error; 1117 vp = (struct vnode *) fp->f_data; 1118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1119 if (vp->v_type != VDIR) 1120 error = ENOTDIR; 1121 else 1122 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 1123 VOP_UNLOCK(vp, 0); 1124 if (error) 1125 goto out; 1126 VREF(vp); 1127 1128 /* 1129 * Prevent escaping from chroot by putting the root under 1130 * the working directory. Silently chdir to / if we aren't 1131 * already there. 1132 */ 1133 cwdi = p->p_cwdi; 1134 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1135 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1136 /* 1137 * XXX would be more failsafe to change directory to a 1138 * deadfs node here instead 1139 */ 1140 vrele(cwdi->cwdi_cdir); 1141 VREF(vp); 1142 cwdi->cwdi_cdir = vp; 1143 } 1144 1145 if (cwdi->cwdi_rdir != NULL) 1146 vrele(cwdi->cwdi_rdir); 1147 cwdi->cwdi_rdir = vp; 1148 rw_exit(&cwdi->cwdi_lock); 1149 1150 out: 1151 FILE_UNUSE(fp, l); 1152 return (error); 1153 } 1154 1155 /* 1156 * Change current working directory (``.''). 1157 */ 1158 /* ARGSUSED */ 1159 int 1160 sys_chdir(struct lwp *l, void *v, register_t *retval) 1161 { 1162 struct sys_chdir_args /* { 1163 syscallarg(const char *) path; 1164 } */ *uap = v; 1165 struct proc *p = l->l_proc; 1166 struct cwdinfo *cwdi; 1167 int error; 1168 struct nameidata nd; 1169 1170 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1171 SCARG(uap, path), l); 1172 if ((error = change_dir(&nd, l)) != 0) 1173 return (error); 1174 cwdi = p->p_cwdi; 1175 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1176 vrele(cwdi->cwdi_cdir); 1177 cwdi->cwdi_cdir = nd.ni_vp; 1178 rw_exit(&cwdi->cwdi_lock); 1179 return (0); 1180 } 1181 1182 /* 1183 * Change notion of root (``/'') directory. 1184 */ 1185 /* ARGSUSED */ 1186 int 1187 sys_chroot(struct lwp *l, void *v, register_t *retval) 1188 { 1189 struct sys_chroot_args /* { 1190 syscallarg(const char *) path; 1191 } */ *uap = v; 1192 struct proc *p = l->l_proc; 1193 struct cwdinfo *cwdi; 1194 struct vnode *vp; 1195 int error; 1196 struct nameidata nd; 1197 1198 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1199 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1200 return (error); 1201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1202 SCARG(uap, path), l); 1203 if ((error = change_dir(&nd, l)) != 0) 1204 return (error); 1205 1206 cwdi = p->p_cwdi; 1207 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1208 if (cwdi->cwdi_rdir != NULL) 1209 vrele(cwdi->cwdi_rdir); 1210 vp = nd.ni_vp; 1211 cwdi->cwdi_rdir = vp; 1212 1213 /* 1214 * Prevent escaping from chroot by putting the root under 1215 * the working directory. Silently chdir to / if we aren't 1216 * already there. 1217 */ 1218 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1219 /* 1220 * XXX would be more failsafe to change directory to a 1221 * deadfs node here instead 1222 */ 1223 vrele(cwdi->cwdi_cdir); 1224 VREF(vp); 1225 cwdi->cwdi_cdir = vp; 1226 } 1227 rw_exit(&cwdi->cwdi_lock); 1228 1229 return (0); 1230 } 1231 1232 /* 1233 * Common routine for chroot and chdir. 1234 */ 1235 static int 1236 change_dir(struct nameidata *ndp, struct lwp *l) 1237 { 1238 struct vnode *vp; 1239 int error; 1240 1241 if ((error = namei(ndp)) != 0) 1242 return (error); 1243 vp = ndp->ni_vp; 1244 if (vp->v_type != VDIR) 1245 error = ENOTDIR; 1246 else 1247 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 1248 1249 if (error) 1250 vput(vp); 1251 else 1252 VOP_UNLOCK(vp, 0); 1253 return (error); 1254 } 1255 1256 /* 1257 * Check permissions, allocate an open file structure, 1258 * and call the device open routine if any. 1259 */ 1260 int 1261 sys_open(struct lwp *l, void *v, register_t *retval) 1262 { 1263 struct sys_open_args /* { 1264 syscallarg(const char *) path; 1265 syscallarg(int) flags; 1266 syscallarg(int) mode; 1267 } */ *uap = v; 1268 struct proc *p = l->l_proc; 1269 struct cwdinfo *cwdi = p->p_cwdi; 1270 struct filedesc *fdp = p->p_fd; 1271 struct file *fp; 1272 struct vnode *vp; 1273 int flags, cmode; 1274 int type, indx, error; 1275 struct flock lf; 1276 struct nameidata nd; 1277 1278 flags = FFLAGS(SCARG(uap, flags)); 1279 if ((flags & (FREAD | FWRITE)) == 0) 1280 return (EINVAL); 1281 /* falloc() will use the file descriptor for us */ 1282 if ((error = falloc(l, &fp, &indx)) != 0) 1283 return (error); 1284 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1285 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1286 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1287 SCARG(uap, path), l); 1288 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1289 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1290 rw_enter(&fdp->fd_lock, RW_WRITER); 1291 FILE_UNUSE(fp, l); 1292 fdp->fd_ofiles[indx] = NULL; 1293 rw_exit(&fdp->fd_lock); 1294 ffree(fp); 1295 if ((error == EDUPFD || error == EMOVEFD) && 1296 l->l_dupfd >= 0 && /* XXX from fdopen */ 1297 (error = 1298 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) { 1299 *retval = indx; 1300 return (0); 1301 } 1302 if (error == ERESTART) 1303 error = EINTR; 1304 fdremove(fdp, indx); 1305 return (error); 1306 } 1307 1308 l->l_dupfd = 0; 1309 vp = nd.ni_vp; 1310 fp->f_flag = flags & FMASK; 1311 fp->f_type = DTYPE_VNODE; 1312 fp->f_ops = &vnops; 1313 fp->f_data = vp; 1314 if (flags & (O_EXLOCK | O_SHLOCK)) { 1315 lf.l_whence = SEEK_SET; 1316 lf.l_start = 0; 1317 lf.l_len = 0; 1318 if (flags & O_EXLOCK) 1319 lf.l_type = F_WRLCK; 1320 else 1321 lf.l_type = F_RDLCK; 1322 type = F_FLOCK; 1323 if ((flags & FNONBLOCK) == 0) 1324 type |= F_WAIT; 1325 VOP_UNLOCK(vp, 0); 1326 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1327 if (error) { 1328 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1329 FILE_UNUSE(fp, l); 1330 ffree(fp); 1331 fdremove(fdp, indx); 1332 return (error); 1333 } 1334 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1335 fp->f_flag |= FHASLOCK; 1336 } 1337 VOP_UNLOCK(vp, 0); 1338 *retval = indx; 1339 FILE_SET_MATURE(fp); 1340 FILE_UNUSE(fp, l); 1341 return (0); 1342 } 1343 1344 static void 1345 vfs__fhfree(fhandle_t *fhp) 1346 { 1347 size_t fhsize; 1348 1349 if (fhp == NULL) { 1350 return; 1351 } 1352 fhsize = FHANDLE_SIZE(fhp); 1353 kmem_free(fhp, fhsize); 1354 } 1355 1356 /* 1357 * vfs_composefh: compose a filehandle. 1358 */ 1359 1360 int 1361 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1362 { 1363 struct mount *mp; 1364 struct fid *fidp; 1365 int error; 1366 size_t needfhsize; 1367 size_t fidsize; 1368 1369 mp = vp->v_mount; 1370 fidp = NULL; 1371 if (*fh_size < FHANDLE_SIZE_MIN) { 1372 fidsize = 0; 1373 } else { 1374 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1375 if (fhp != NULL) { 1376 memset(fhp, 0, *fh_size); 1377 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1378 fidp = &fhp->fh_fid; 1379 } 1380 } 1381 error = VFS_VPTOFH(vp, fidp, &fidsize); 1382 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1383 if (error == 0 && *fh_size < needfhsize) { 1384 error = E2BIG; 1385 } 1386 *fh_size = needfhsize; 1387 return error; 1388 } 1389 1390 int 1391 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1392 { 1393 struct mount *mp; 1394 fhandle_t *fhp; 1395 size_t fhsize; 1396 size_t fidsize; 1397 int error; 1398 1399 *fhpp = NULL; 1400 mp = vp->v_mount; 1401 fidsize = 0; 1402 error = VFS_VPTOFH(vp, NULL, &fidsize); 1403 KASSERT(error != 0); 1404 if (error != E2BIG) { 1405 goto out; 1406 } 1407 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1408 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1409 if (fhp == NULL) { 1410 error = ENOMEM; 1411 goto out; 1412 } 1413 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1414 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1415 if (error == 0) { 1416 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1417 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1418 *fhpp = fhp; 1419 } else { 1420 kmem_free(fhp, fhsize); 1421 } 1422 out: 1423 return error; 1424 } 1425 1426 void 1427 vfs_composefh_free(fhandle_t *fhp) 1428 { 1429 1430 vfs__fhfree(fhp); 1431 } 1432 1433 /* 1434 * vfs_fhtovp: lookup a vnode by a filehandle. 1435 */ 1436 1437 int 1438 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1439 { 1440 struct mount *mp; 1441 int error; 1442 1443 *vpp = NULL; 1444 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1445 if (mp == NULL) { 1446 error = ESTALE; 1447 goto out; 1448 } 1449 if (mp->mnt_op->vfs_fhtovp == NULL) { 1450 error = EOPNOTSUPP; 1451 goto out; 1452 } 1453 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1454 out: 1455 return error; 1456 } 1457 1458 /* 1459 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1460 * the needed size. 1461 */ 1462 1463 int 1464 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1465 { 1466 fhandle_t *fhp; 1467 int error; 1468 1469 *fhpp = NULL; 1470 if (fhsize > FHANDLE_SIZE_MAX) { 1471 return EINVAL; 1472 } 1473 if (fhsize < FHANDLE_SIZE_MIN) { 1474 return EINVAL; 1475 } 1476 again: 1477 fhp = kmem_alloc(fhsize, KM_SLEEP); 1478 if (fhp == NULL) { 1479 return ENOMEM; 1480 } 1481 error = copyin(ufhp, fhp, fhsize); 1482 if (error == 0) { 1483 /* XXX this check shouldn't be here */ 1484 if (FHANDLE_SIZE(fhp) == fhsize) { 1485 *fhpp = fhp; 1486 return 0; 1487 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1488 /* 1489 * a kludge for nfsv2 padded handles. 1490 */ 1491 size_t sz; 1492 1493 sz = FHANDLE_SIZE(fhp); 1494 kmem_free(fhp, fhsize); 1495 fhsize = sz; 1496 goto again; 1497 } else { 1498 /* 1499 * userland told us wrong size. 1500 */ 1501 error = EINVAL; 1502 } 1503 } 1504 kmem_free(fhp, fhsize); 1505 return error; 1506 } 1507 1508 void 1509 vfs_copyinfh_free(fhandle_t *fhp) 1510 { 1511 1512 vfs__fhfree(fhp); 1513 } 1514 1515 /* 1516 * Get file handle system call 1517 */ 1518 int 1519 sys___getfh30(struct lwp *l, void *v, register_t *retval) 1520 { 1521 struct sys___getfh30_args /* { 1522 syscallarg(char *) fname; 1523 syscallarg(fhandle_t *) fhp; 1524 syscallarg(size_t *) fh_size; 1525 } */ *uap = v; 1526 struct vnode *vp; 1527 fhandle_t *fh; 1528 int error; 1529 struct nameidata nd; 1530 size_t sz; 1531 size_t usz; 1532 1533 /* 1534 * Must be super user 1535 */ 1536 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1537 0, NULL, NULL, NULL); 1538 if (error) 1539 return (error); 1540 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1541 SCARG(uap, fname), l); 1542 error = namei(&nd); 1543 if (error) 1544 return (error); 1545 vp = nd.ni_vp; 1546 error = vfs_composefh_alloc(vp, &fh); 1547 vput(vp); 1548 if (error != 0) { 1549 goto out; 1550 } 1551 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1552 if (error != 0) { 1553 goto out; 1554 } 1555 sz = FHANDLE_SIZE(fh); 1556 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1557 if (error != 0) { 1558 goto out; 1559 } 1560 if (usz >= sz) { 1561 error = copyout(fh, SCARG(uap, fhp), sz); 1562 } else { 1563 error = E2BIG; 1564 } 1565 out: 1566 vfs_composefh_free(fh); 1567 return (error); 1568 } 1569 1570 /* 1571 * Open a file given a file handle. 1572 * 1573 * Check permissions, allocate an open file structure, 1574 * and call the device open routine if any. 1575 */ 1576 1577 int 1578 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1579 register_t *retval) 1580 { 1581 struct filedesc *fdp = l->l_proc->p_fd; 1582 struct file *fp; 1583 struct vnode *vp = NULL; 1584 kauth_cred_t cred = l->l_cred; 1585 struct file *nfp; 1586 int type, indx, error=0; 1587 struct flock lf; 1588 struct vattr va; 1589 fhandle_t *fh; 1590 int flags; 1591 1592 /* 1593 * Must be super user 1594 */ 1595 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1596 0, NULL, NULL, NULL))) 1597 return (error); 1598 1599 flags = FFLAGS(oflags); 1600 if ((flags & (FREAD | FWRITE)) == 0) 1601 return (EINVAL); 1602 if ((flags & O_CREAT)) 1603 return (EINVAL); 1604 /* falloc() will use the file descriptor for us */ 1605 if ((error = falloc(l, &nfp, &indx)) != 0) 1606 return (error); 1607 fp = nfp; 1608 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1609 if (error != 0) { 1610 goto bad; 1611 } 1612 error = vfs_fhtovp(fh, &vp); 1613 if (error != 0) { 1614 goto bad; 1615 } 1616 1617 /* Now do an effective vn_open */ 1618 1619 if (vp->v_type == VSOCK) { 1620 error = EOPNOTSUPP; 1621 goto bad; 1622 } 1623 if (flags & FREAD) { 1624 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0) 1625 goto bad; 1626 } 1627 if (flags & (FWRITE | O_TRUNC)) { 1628 if (vp->v_type == VDIR) { 1629 error = EISDIR; 1630 goto bad; 1631 } 1632 if ((error = vn_writechk(vp)) != 0 || 1633 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0) 1634 goto bad; 1635 } 1636 if (flags & O_TRUNC) { 1637 VOP_UNLOCK(vp, 0); /* XXX */ 1638 VOP_LEASE(vp, l, cred, LEASE_WRITE); 1639 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1640 VATTR_NULL(&va); 1641 va.va_size = 0; 1642 error = VOP_SETATTR(vp, &va, cred, l); 1643 if (error) 1644 goto bad; 1645 } 1646 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0) 1647 goto bad; 1648 if (flags & FWRITE) 1649 vp->v_writecount++; 1650 1651 /* done with modified vn_open, now finish what sys_open does. */ 1652 1653 fp->f_flag = flags & FMASK; 1654 fp->f_type = DTYPE_VNODE; 1655 fp->f_ops = &vnops; 1656 fp->f_data = vp; 1657 if (flags & (O_EXLOCK | O_SHLOCK)) { 1658 lf.l_whence = SEEK_SET; 1659 lf.l_start = 0; 1660 lf.l_len = 0; 1661 if (flags & O_EXLOCK) 1662 lf.l_type = F_WRLCK; 1663 else 1664 lf.l_type = F_RDLCK; 1665 type = F_FLOCK; 1666 if ((flags & FNONBLOCK) == 0) 1667 type |= F_WAIT; 1668 VOP_UNLOCK(vp, 0); 1669 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1670 if (error) { 1671 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1672 FILE_UNUSE(fp, l); 1673 ffree(fp); 1674 fdremove(fdp, indx); 1675 return (error); 1676 } 1677 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1678 fp->f_flag |= FHASLOCK; 1679 } 1680 VOP_UNLOCK(vp, 0); 1681 *retval = indx; 1682 FILE_SET_MATURE(fp); 1683 FILE_UNUSE(fp, l); 1684 vfs_copyinfh_free(fh); 1685 return (0); 1686 1687 bad: 1688 FILE_UNUSE(fp, l); 1689 ffree(fp); 1690 fdremove(fdp, indx); 1691 if (vp != NULL) 1692 vput(vp); 1693 vfs_copyinfh_free(fh); 1694 return (error); 1695 } 1696 1697 int 1698 sys___fhopen40(struct lwp *l, void *v, register_t *retval) 1699 { 1700 struct sys___fhopen40_args /* { 1701 syscallarg(const void *) fhp; 1702 syscallarg(size_t) fh_size; 1703 syscallarg(int) flags; 1704 } */ *uap = v; 1705 1706 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1707 SCARG(uap, flags), retval); 1708 } 1709 1710 int 1711 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1712 { 1713 int error; 1714 fhandle_t *fh; 1715 struct vnode *vp; 1716 1717 /* 1718 * Must be super user 1719 */ 1720 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1721 0, NULL, NULL, NULL))) 1722 return (error); 1723 1724 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1725 if (error != 0) 1726 return error; 1727 1728 error = vfs_fhtovp(fh, &vp); 1729 vfs_copyinfh_free(fh); 1730 if (error != 0) 1731 return error; 1732 1733 error = vn_stat(vp, sb, l); 1734 vput(vp); 1735 return error; 1736 } 1737 1738 1739 /* ARGSUSED */ 1740 int 1741 sys___fhstat40(struct lwp *l, void *v, register_t *retval) 1742 { 1743 struct sys___fhstat40_args /* { 1744 syscallarg(const void *) fhp; 1745 syscallarg(size_t) fh_size; 1746 syscallarg(struct stat *) sb; 1747 } */ *uap = v; 1748 struct stat sb; 1749 int error; 1750 1751 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1752 if (error) 1753 return error; 1754 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1755 } 1756 1757 int 1758 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1759 int flags) 1760 { 1761 fhandle_t *fh; 1762 struct mount *mp; 1763 struct vnode *vp; 1764 int error; 1765 1766 /* 1767 * Must be super user 1768 */ 1769 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1770 0, NULL, NULL, NULL))) 1771 return error; 1772 1773 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1774 if (error != 0) 1775 return error; 1776 1777 error = vfs_fhtovp(fh, &vp); 1778 vfs_copyinfh_free(fh); 1779 if (error != 0) 1780 return error; 1781 1782 mp = vp->v_mount; 1783 error = dostatvfs(mp, sb, l, flags, 1); 1784 vput(vp); 1785 return error; 1786 } 1787 1788 /* ARGSUSED */ 1789 int 1790 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval) 1791 { 1792 struct sys___fhstatvfs140_args /* { 1793 syscallarg(const void *) fhp; 1794 syscallarg(size_t) fh_size; 1795 syscallarg(struct statvfs *) buf; 1796 syscallarg(int) flags; 1797 } */ *uap = v; 1798 struct statvfs *sb = STATVFSBUF_GET(); 1799 int error; 1800 1801 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1802 SCARG(uap, flags)); 1803 if (error == 0) 1804 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1805 STATVFSBUF_PUT(sb); 1806 return error; 1807 } 1808 1809 /* 1810 * Create a special file. 1811 */ 1812 /* ARGSUSED */ 1813 int 1814 sys_mknod(struct lwp *l, void *v, register_t *retval) 1815 { 1816 struct sys_mknod_args /* { 1817 syscallarg(const char *) path; 1818 syscallarg(int) mode; 1819 syscallarg(int) dev; 1820 } */ *uap = v; 1821 struct proc *p = l->l_proc; 1822 struct vnode *vp; 1823 struct vattr vattr; 1824 int error, optype; 1825 struct nameidata nd; 1826 char *path; 1827 const char *cpath; 1828 enum uio_seg seg = UIO_USERSPACE; 1829 1830 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1831 0, NULL, NULL, NULL)) != 0) 1832 return (error); 1833 1834 optype = VOP_MKNOD_DESCOFFSET; 1835 1836 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1837 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath, l); 1838 1839 if ((error = namei(&nd)) != 0) 1840 goto out; 1841 vp = nd.ni_vp; 1842 if (vp != NULL) 1843 error = EEXIST; 1844 else { 1845 VATTR_NULL(&vattr); 1846 /* We will read cwdi->cwdi_cmask unlocked. */ 1847 vattr.va_mode = 1848 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1849 vattr.va_rdev = SCARG(uap, dev); 1850 1851 switch (SCARG(uap, mode) & S_IFMT) { 1852 case S_IFMT: /* used by badsect to flag bad sectors */ 1853 vattr.va_type = VBAD; 1854 break; 1855 case S_IFCHR: 1856 vattr.va_type = VCHR; 1857 break; 1858 case S_IFBLK: 1859 vattr.va_type = VBLK; 1860 break; 1861 case S_IFWHT: 1862 optype = VOP_WHITEOUT_DESCOFFSET; 1863 break; 1864 case S_IFREG: 1865 #if NVERIEXEC > 0 1866 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1867 O_CREAT); 1868 #endif /* NVERIEXEC > 0 */ 1869 vattr.va_type = VREG; 1870 vattr.va_rdev = VNOVAL; 1871 optype = VOP_CREATE_DESCOFFSET; 1872 break; 1873 default: 1874 error = EINVAL; 1875 break; 1876 } 1877 } 1878 if (!error) { 1879 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1880 switch (optype) { 1881 case VOP_WHITEOUT_DESCOFFSET: 1882 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1883 if (error) 1884 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1885 vput(nd.ni_dvp); 1886 break; 1887 1888 case VOP_MKNOD_DESCOFFSET: 1889 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1890 &nd.ni_cnd, &vattr); 1891 if (error == 0) 1892 vput(nd.ni_vp); 1893 break; 1894 1895 case VOP_CREATE_DESCOFFSET: 1896 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1897 &nd.ni_cnd, &vattr); 1898 if (error == 0) 1899 vput(nd.ni_vp); 1900 break; 1901 } 1902 } else { 1903 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1904 if (nd.ni_dvp == vp) 1905 vrele(nd.ni_dvp); 1906 else 1907 vput(nd.ni_dvp); 1908 if (vp) 1909 vrele(vp); 1910 } 1911 out: 1912 VERIEXEC_PATH_PUT(path); 1913 return (error); 1914 } 1915 1916 /* 1917 * Create a named pipe. 1918 */ 1919 /* ARGSUSED */ 1920 int 1921 sys_mkfifo(struct lwp *l, void *v, register_t *retval) 1922 { 1923 struct sys_mkfifo_args /* { 1924 syscallarg(const char *) path; 1925 syscallarg(int) mode; 1926 } */ *uap = v; 1927 struct proc *p = l->l_proc; 1928 struct vattr vattr; 1929 int error; 1930 struct nameidata nd; 1931 1932 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l); 1933 if ((error = namei(&nd)) != 0) 1934 return (error); 1935 if (nd.ni_vp != NULL) { 1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1937 if (nd.ni_dvp == nd.ni_vp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 vrele(nd.ni_vp); 1942 return (EEXIST); 1943 } 1944 VATTR_NULL(&vattr); 1945 vattr.va_type = VFIFO; 1946 /* We will read cwdi->cwdi_cmask unlocked. */ 1947 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1948 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1949 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1950 if (error == 0) 1951 vput(nd.ni_vp); 1952 return (error); 1953 } 1954 1955 /* 1956 * Make a hard file link. 1957 */ 1958 /* ARGSUSED */ 1959 int 1960 sys_link(struct lwp *l, void *v, register_t *retval) 1961 { 1962 struct sys_link_args /* { 1963 syscallarg(const char *) path; 1964 syscallarg(const char *) link; 1965 } */ *uap = v; 1966 struct vnode *vp; 1967 struct nameidata nd; 1968 int error; 1969 1970 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1971 SCARG(uap, path), l); 1972 if ((error = namei(&nd)) != 0) 1973 return (error); 1974 vp = nd.ni_vp; 1975 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1976 SCARG(uap, link), l); 1977 if ((error = namei(&nd)) != 0) 1978 goto out; 1979 if (nd.ni_vp) { 1980 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1981 if (nd.ni_dvp == nd.ni_vp) 1982 vrele(nd.ni_dvp); 1983 else 1984 vput(nd.ni_dvp); 1985 vrele(nd.ni_vp); 1986 error = EEXIST; 1987 goto out; 1988 } 1989 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1990 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 1991 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1992 out: 1993 vrele(vp); 1994 return (error); 1995 } 1996 1997 /* 1998 * Make a symbolic link. 1999 */ 2000 /* ARGSUSED */ 2001 int 2002 sys_symlink(struct lwp *l, void *v, register_t *retval) 2003 { 2004 struct sys_symlink_args /* { 2005 syscallarg(const char *) path; 2006 syscallarg(const char *) link; 2007 } */ *uap = v; 2008 struct proc *p = l->l_proc; 2009 struct vattr vattr; 2010 char *path; 2011 int error; 2012 struct nameidata nd; 2013 2014 path = PNBUF_GET(); 2015 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2016 if (error) 2017 goto out; 2018 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2019 SCARG(uap, link), l); 2020 if ((error = namei(&nd)) != 0) 2021 goto out; 2022 if (nd.ni_vp) { 2023 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2024 if (nd.ni_dvp == nd.ni_vp) 2025 vrele(nd.ni_dvp); 2026 else 2027 vput(nd.ni_dvp); 2028 vrele(nd.ni_vp); 2029 error = EEXIST; 2030 goto out; 2031 } 2032 VATTR_NULL(&vattr); 2033 vattr.va_type = VLNK; 2034 /* We will read cwdi->cwdi_cmask unlocked. */ 2035 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2036 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 2037 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2038 if (error == 0) 2039 vput(nd.ni_vp); 2040 out: 2041 PNBUF_PUT(path); 2042 return (error); 2043 } 2044 2045 /* 2046 * Delete a whiteout from the filesystem. 2047 */ 2048 /* ARGSUSED */ 2049 int 2050 sys_undelete(struct lwp *l, void *v, register_t *retval) 2051 { 2052 struct sys_undelete_args /* { 2053 syscallarg(const char *) path; 2054 } */ *uap = v; 2055 int error; 2056 struct nameidata nd; 2057 2058 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2059 UIO_USERSPACE, SCARG(uap, path), l); 2060 error = namei(&nd); 2061 if (error) 2062 return (error); 2063 2064 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2065 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2066 if (nd.ni_dvp == nd.ni_vp) 2067 vrele(nd.ni_dvp); 2068 else 2069 vput(nd.ni_dvp); 2070 if (nd.ni_vp) 2071 vrele(nd.ni_vp); 2072 return (EEXIST); 2073 } 2074 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 2075 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2076 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2077 vput(nd.ni_dvp); 2078 return (error); 2079 } 2080 2081 /* 2082 * Delete a name from the filesystem. 2083 */ 2084 /* ARGSUSED */ 2085 int 2086 sys_unlink(struct lwp *l, void *v, register_t *retval) 2087 { 2088 struct sys_unlink_args /* { 2089 syscallarg(const char *) path; 2090 } */ *uap = v; 2091 struct vnode *vp; 2092 int error; 2093 struct nameidata nd; 2094 char *path; 2095 const char *cpath; 2096 enum uio_seg seg = UIO_USERSPACE; 2097 2098 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 2099 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath, l); 2100 2101 if ((error = namei(&nd)) != 0) 2102 goto out; 2103 vp = nd.ni_vp; 2104 2105 /* 2106 * The root of a mounted filesystem cannot be deleted. 2107 */ 2108 if (vp->v_vflag & VV_ROOT) { 2109 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2110 if (nd.ni_dvp == vp) 2111 vrele(nd.ni_dvp); 2112 else 2113 vput(nd.ni_dvp); 2114 vput(vp); 2115 error = EBUSY; 2116 goto out; 2117 } 2118 2119 #if NVERIEXEC > 0 2120 /* Handle remove requests for veriexec entries. */ 2121 if ((error = veriexec_removechk(l, nd.ni_vp, nd.ni_dirp)) != 0) { 2122 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2123 if (nd.ni_dvp == vp) 2124 vrele(nd.ni_dvp); 2125 else 2126 vput(nd.ni_dvp); 2127 vput(vp); 2128 goto out; 2129 } 2130 #endif /* NVERIEXEC > 0 */ 2131 2132 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 2133 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2134 #ifdef FILEASSOC 2135 (void)fileassoc_file_delete(vp); 2136 #endif /* FILEASSOC */ 2137 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2138 out: 2139 VERIEXEC_PATH_PUT(path); 2140 return (error); 2141 } 2142 2143 /* 2144 * Reposition read/write file offset. 2145 */ 2146 int 2147 sys_lseek(struct lwp *l, void *v, register_t *retval) 2148 { 2149 struct sys_lseek_args /* { 2150 syscallarg(int) fd; 2151 syscallarg(int) pad; 2152 syscallarg(off_t) offset; 2153 syscallarg(int) whence; 2154 } */ *uap = v; 2155 struct proc *p = l->l_proc; 2156 kauth_cred_t cred = l->l_cred; 2157 struct filedesc *fdp = p->p_fd; 2158 struct file *fp; 2159 struct vnode *vp; 2160 struct vattr vattr; 2161 off_t newoff; 2162 int error; 2163 2164 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 2165 return (EBADF); 2166 2167 vp = (struct vnode *)fp->f_data; 2168 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2169 error = ESPIPE; 2170 mutex_exit(&fp->f_lock); 2171 goto out; 2172 } 2173 2174 switch (SCARG(uap, whence)) { 2175 case SEEK_CUR: 2176 newoff = fp->f_offset + SCARG(uap, offset); 2177 FILE_USE(fp); 2178 break; 2179 case SEEK_END: 2180 FILE_USE(fp); 2181 error = VOP_GETATTR(vp, &vattr, cred, l); 2182 if (error) { 2183 FILE_UNUSE(fp, l); 2184 goto out; 2185 } 2186 newoff = SCARG(uap, offset) + vattr.va_size; 2187 break; 2188 case SEEK_SET: 2189 FILE_USE(fp); 2190 newoff = SCARG(uap, offset); 2191 break; 2192 default: 2193 mutex_exit(&fp->f_lock); 2194 error = EINVAL; 2195 goto out; 2196 } 2197 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2198 mutex_enter(&fp->f_lock); 2199 *(off_t *)retval = fp->f_offset = newoff; 2200 mutex_exit(&fp->f_lock); 2201 } 2202 FILE_UNUSE(fp, l); 2203 out: 2204 return (error); 2205 } 2206 2207 /* 2208 * Positional read system call. 2209 */ 2210 int 2211 sys_pread(struct lwp *l, void *v, register_t *retval) 2212 { 2213 struct sys_pread_args /* { 2214 syscallarg(int) fd; 2215 syscallarg(void *) buf; 2216 syscallarg(size_t) nbyte; 2217 syscallarg(off_t) offset; 2218 } */ *uap = v; 2219 struct proc *p = l->l_proc; 2220 struct filedesc *fdp = p->p_fd; 2221 struct file *fp; 2222 struct vnode *vp; 2223 off_t offset; 2224 int error, fd = SCARG(uap, fd); 2225 2226 if ((fp = fd_getfile(fdp, fd)) == NULL) 2227 return (EBADF); 2228 2229 if ((fp->f_flag & FREAD) == 0) { 2230 mutex_exit(&fp->f_lock); 2231 return (EBADF); 2232 } 2233 2234 FILE_USE(fp); 2235 2236 vp = (struct vnode *)fp->f_data; 2237 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2238 error = ESPIPE; 2239 goto out; 2240 } 2241 2242 offset = SCARG(uap, offset); 2243 2244 /* 2245 * XXX This works because no file systems actually 2246 * XXX take any action on the seek operation. 2247 */ 2248 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2249 goto out; 2250 2251 /* dofileread() will unuse the descriptor for us */ 2252 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2253 &offset, 0, retval)); 2254 2255 out: 2256 FILE_UNUSE(fp, l); 2257 return (error); 2258 } 2259 2260 /* 2261 * Positional scatter read system call. 2262 */ 2263 int 2264 sys_preadv(struct lwp *l, void *v, register_t *retval) 2265 { 2266 struct sys_preadv_args /* { 2267 syscallarg(int) fd; 2268 syscallarg(const struct iovec *) iovp; 2269 syscallarg(int) iovcnt; 2270 syscallarg(off_t) offset; 2271 } */ *uap = v; 2272 2273 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2274 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval); 2275 } 2276 2277 /* 2278 * Positional write system call. 2279 */ 2280 int 2281 sys_pwrite(struct lwp *l, void *v, register_t *retval) 2282 { 2283 struct sys_pwrite_args /* { 2284 syscallarg(int) fd; 2285 syscallarg(const void *) buf; 2286 syscallarg(size_t) nbyte; 2287 syscallarg(off_t) offset; 2288 } */ *uap = v; 2289 struct proc *p = l->l_proc; 2290 struct filedesc *fdp = p->p_fd; 2291 struct file *fp; 2292 struct vnode *vp; 2293 off_t offset; 2294 int error, fd = SCARG(uap, fd); 2295 2296 if ((fp = fd_getfile(fdp, fd)) == NULL) 2297 return (EBADF); 2298 2299 if ((fp->f_flag & FWRITE) == 0) { 2300 mutex_exit(&fp->f_lock); 2301 return (EBADF); 2302 } 2303 2304 FILE_USE(fp); 2305 2306 vp = (struct vnode *)fp->f_data; 2307 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2308 error = ESPIPE; 2309 goto out; 2310 } 2311 2312 offset = SCARG(uap, offset); 2313 2314 /* 2315 * XXX This works because no file systems actually 2316 * XXX take any action on the seek operation. 2317 */ 2318 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2319 goto out; 2320 2321 /* dofilewrite() will unuse the descriptor for us */ 2322 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2323 &offset, 0, retval)); 2324 2325 out: 2326 FILE_UNUSE(fp, l); 2327 return (error); 2328 } 2329 2330 /* 2331 * Positional gather write system call. 2332 */ 2333 int 2334 sys_pwritev(struct lwp *l, void *v, register_t *retval) 2335 { 2336 struct sys_pwritev_args /* { 2337 syscallarg(int) fd; 2338 syscallarg(const struct iovec *) iovp; 2339 syscallarg(int) iovcnt; 2340 syscallarg(off_t) offset; 2341 } */ *uap = v; 2342 2343 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2344 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval); 2345 } 2346 2347 /* 2348 * Check access permissions. 2349 */ 2350 int 2351 sys_access(struct lwp *l, void *v, register_t *retval) 2352 { 2353 struct sys_access_args /* { 2354 syscallarg(const char *) path; 2355 syscallarg(int) flags; 2356 } */ *uap = v; 2357 kauth_cred_t cred; 2358 struct vnode *vp; 2359 int error, flags; 2360 struct nameidata nd; 2361 2362 cred = kauth_cred_dup(l->l_cred); 2363 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2364 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2365 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2366 SCARG(uap, path), l); 2367 /* Override default credentials */ 2368 nd.ni_cnd.cn_cred = cred; 2369 if ((error = namei(&nd)) != 0) 2370 goto out; 2371 vp = nd.ni_vp; 2372 2373 /* Flags == 0 means only check for existence. */ 2374 if (SCARG(uap, flags)) { 2375 flags = 0; 2376 if (SCARG(uap, flags) & R_OK) 2377 flags |= VREAD; 2378 if (SCARG(uap, flags) & W_OK) 2379 flags |= VWRITE; 2380 if (SCARG(uap, flags) & X_OK) 2381 flags |= VEXEC; 2382 2383 error = VOP_ACCESS(vp, flags, cred, l); 2384 if (!error && (flags & VWRITE)) 2385 error = vn_writechk(vp); 2386 } 2387 vput(vp); 2388 out: 2389 kauth_cred_free(cred); 2390 return (error); 2391 } 2392 2393 /* 2394 * Common code for all sys_stat functions, including compat versions. 2395 */ 2396 int 2397 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags, 2398 struct stat *sb) 2399 { 2400 int error; 2401 struct nameidata nd; 2402 2403 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2404 UIO_USERSPACE, path, l); 2405 error = namei(&nd); 2406 if (error != 0) 2407 return error; 2408 error = vn_stat(nd.ni_vp, sb, l); 2409 vput(nd.ni_vp); 2410 return error; 2411 } 2412 2413 /* 2414 * Get file status; this version follows links. 2415 */ 2416 /* ARGSUSED */ 2417 int 2418 sys___stat30(struct lwp *l, void *v, register_t *retval) 2419 { 2420 struct sys___stat30_args /* { 2421 syscallarg(const char *) path; 2422 syscallarg(struct stat *) ub; 2423 } */ *uap = v; 2424 struct stat sb; 2425 int error; 2426 2427 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb); 2428 if (error) 2429 return error; 2430 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2431 } 2432 2433 /* 2434 * Get file status; this version does not follow links. 2435 */ 2436 /* ARGSUSED */ 2437 int 2438 sys___lstat30(struct lwp *l, void *v, register_t *retval) 2439 { 2440 struct sys___lstat30_args /* { 2441 syscallarg(const char *) path; 2442 syscallarg(struct stat *) ub; 2443 } */ *uap = v; 2444 struct stat sb; 2445 int error; 2446 2447 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb); 2448 if (error) 2449 return error; 2450 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2451 } 2452 2453 /* 2454 * Get configurable pathname variables. 2455 */ 2456 /* ARGSUSED */ 2457 int 2458 sys_pathconf(struct lwp *l, void *v, register_t *retval) 2459 { 2460 struct sys_pathconf_args /* { 2461 syscallarg(const char *) path; 2462 syscallarg(int) name; 2463 } */ *uap = v; 2464 int error; 2465 struct nameidata nd; 2466 2467 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2468 SCARG(uap, path), l); 2469 if ((error = namei(&nd)) != 0) 2470 return (error); 2471 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2472 vput(nd.ni_vp); 2473 return (error); 2474 } 2475 2476 /* 2477 * Return target name of a symbolic link. 2478 */ 2479 /* ARGSUSED */ 2480 int 2481 sys_readlink(struct lwp *l, void *v, register_t *retval) 2482 { 2483 struct sys_readlink_args /* { 2484 syscallarg(const char *) path; 2485 syscallarg(char *) buf; 2486 syscallarg(size_t) count; 2487 } */ *uap = v; 2488 struct vnode *vp; 2489 struct iovec aiov; 2490 struct uio auio; 2491 int error; 2492 struct nameidata nd; 2493 2494 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2495 SCARG(uap, path), l); 2496 if ((error = namei(&nd)) != 0) 2497 return (error); 2498 vp = nd.ni_vp; 2499 if (vp->v_type != VLNK) 2500 error = EINVAL; 2501 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2502 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) { 2503 aiov.iov_base = SCARG(uap, buf); 2504 aiov.iov_len = SCARG(uap, count); 2505 auio.uio_iov = &aiov; 2506 auio.uio_iovcnt = 1; 2507 auio.uio_offset = 0; 2508 auio.uio_rw = UIO_READ; 2509 KASSERT(l == curlwp); 2510 auio.uio_vmspace = l->l_proc->p_vmspace; 2511 auio.uio_resid = SCARG(uap, count); 2512 error = VOP_READLINK(vp, &auio, l->l_cred); 2513 } 2514 vput(vp); 2515 *retval = SCARG(uap, count) - auio.uio_resid; 2516 return (error); 2517 } 2518 2519 /* 2520 * Change flags of a file given a path name. 2521 */ 2522 /* ARGSUSED */ 2523 int 2524 sys_chflags(struct lwp *l, void *v, register_t *retval) 2525 { 2526 struct sys_chflags_args /* { 2527 syscallarg(const char *) path; 2528 syscallarg(u_long) flags; 2529 } */ *uap = v; 2530 struct vnode *vp; 2531 int error; 2532 struct nameidata nd; 2533 2534 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2535 SCARG(uap, path), l); 2536 if ((error = namei(&nd)) != 0) 2537 return (error); 2538 vp = nd.ni_vp; 2539 error = change_flags(vp, SCARG(uap, flags), l); 2540 vput(vp); 2541 return (error); 2542 } 2543 2544 /* 2545 * Change flags of a file given a file descriptor. 2546 */ 2547 /* ARGSUSED */ 2548 int 2549 sys_fchflags(struct lwp *l, void *v, register_t *retval) 2550 { 2551 struct sys_fchflags_args /* { 2552 syscallarg(int) fd; 2553 syscallarg(u_long) flags; 2554 } */ *uap = v; 2555 struct proc *p = l->l_proc; 2556 struct vnode *vp; 2557 struct file *fp; 2558 int error; 2559 2560 /* getvnode() will use the descriptor for us */ 2561 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2562 return (error); 2563 vp = (struct vnode *)fp->f_data; 2564 error = change_flags(vp, SCARG(uap, flags), l); 2565 VOP_UNLOCK(vp, 0); 2566 FILE_UNUSE(fp, l); 2567 return (error); 2568 } 2569 2570 /* 2571 * Change flags of a file given a path name; this version does 2572 * not follow links. 2573 */ 2574 int 2575 sys_lchflags(struct lwp *l, void *v, register_t *retval) 2576 { 2577 struct sys_lchflags_args /* { 2578 syscallarg(const char *) path; 2579 syscallarg(u_long) flags; 2580 } */ *uap = v; 2581 struct vnode *vp; 2582 int error; 2583 struct nameidata nd; 2584 2585 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2586 SCARG(uap, path), l); 2587 if ((error = namei(&nd)) != 0) 2588 return (error); 2589 vp = nd.ni_vp; 2590 error = change_flags(vp, SCARG(uap, flags), l); 2591 vput(vp); 2592 return (error); 2593 } 2594 2595 /* 2596 * Common routine to change flags of a file. 2597 */ 2598 int 2599 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2600 { 2601 struct vattr vattr; 2602 int error; 2603 2604 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2605 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2606 /* 2607 * Non-superusers cannot change the flags on devices, even if they 2608 * own them. 2609 */ 2610 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2611 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 2612 goto out; 2613 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2614 error = EINVAL; 2615 goto out; 2616 } 2617 } 2618 VATTR_NULL(&vattr); 2619 vattr.va_flags = flags; 2620 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2621 out: 2622 return (error); 2623 } 2624 2625 /* 2626 * Change mode of a file given path name; this version follows links. 2627 */ 2628 /* ARGSUSED */ 2629 int 2630 sys_chmod(struct lwp *l, void *v, register_t *retval) 2631 { 2632 struct sys_chmod_args /* { 2633 syscallarg(const char *) path; 2634 syscallarg(int) mode; 2635 } */ *uap = v; 2636 int error; 2637 struct nameidata nd; 2638 2639 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2640 SCARG(uap, path), l); 2641 if ((error = namei(&nd)) != 0) 2642 return (error); 2643 2644 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2645 2646 vrele(nd.ni_vp); 2647 return (error); 2648 } 2649 2650 /* 2651 * Change mode of a file given a file descriptor. 2652 */ 2653 /* ARGSUSED */ 2654 int 2655 sys_fchmod(struct lwp *l, void *v, register_t *retval) 2656 { 2657 struct sys_fchmod_args /* { 2658 syscallarg(int) fd; 2659 syscallarg(int) mode; 2660 } */ *uap = v; 2661 struct proc *p = l->l_proc; 2662 struct file *fp; 2663 int error; 2664 2665 /* getvnode() will use the descriptor for us */ 2666 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2667 return (error); 2668 2669 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l); 2670 FILE_UNUSE(fp, l); 2671 return (error); 2672 } 2673 2674 /* 2675 * Change mode of a file given path name; this version does not follow links. 2676 */ 2677 /* ARGSUSED */ 2678 int 2679 sys_lchmod(struct lwp *l, void *v, register_t *retval) 2680 { 2681 struct sys_lchmod_args /* { 2682 syscallarg(const char *) path; 2683 syscallarg(int) mode; 2684 } */ *uap = v; 2685 int error; 2686 struct nameidata nd; 2687 2688 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2689 SCARG(uap, path), l); 2690 if ((error = namei(&nd)) != 0) 2691 return (error); 2692 2693 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2694 2695 vrele(nd.ni_vp); 2696 return (error); 2697 } 2698 2699 /* 2700 * Common routine to set mode given a vnode. 2701 */ 2702 static int 2703 change_mode(struct vnode *vp, int mode, struct lwp *l) 2704 { 2705 struct vattr vattr; 2706 int error; 2707 2708 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2709 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2710 VATTR_NULL(&vattr); 2711 vattr.va_mode = mode & ALLPERMS; 2712 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2713 VOP_UNLOCK(vp, 0); 2714 return (error); 2715 } 2716 2717 /* 2718 * Set ownership given a path name; this version follows links. 2719 */ 2720 /* ARGSUSED */ 2721 int 2722 sys_chown(struct lwp *l, void *v, register_t *retval) 2723 { 2724 struct sys_chown_args /* { 2725 syscallarg(const char *) path; 2726 syscallarg(uid_t) uid; 2727 syscallarg(gid_t) gid; 2728 } */ *uap = v; 2729 int error; 2730 struct nameidata nd; 2731 2732 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2733 SCARG(uap, path), l); 2734 if ((error = namei(&nd)) != 0) 2735 return (error); 2736 2737 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2738 2739 vrele(nd.ni_vp); 2740 return (error); 2741 } 2742 2743 /* 2744 * Set ownership given a path name; this version follows links. 2745 * Provides POSIX semantics. 2746 */ 2747 /* ARGSUSED */ 2748 int 2749 sys___posix_chown(struct lwp *l, void *v, register_t *retval) 2750 { 2751 struct sys_chown_args /* { 2752 syscallarg(const char *) path; 2753 syscallarg(uid_t) uid; 2754 syscallarg(gid_t) gid; 2755 } */ *uap = v; 2756 int error; 2757 struct nameidata nd; 2758 2759 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2760 SCARG(uap, path), l); 2761 if ((error = namei(&nd)) != 0) 2762 return (error); 2763 2764 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2765 2766 vrele(nd.ni_vp); 2767 return (error); 2768 } 2769 2770 /* 2771 * Set ownership given a file descriptor. 2772 */ 2773 /* ARGSUSED */ 2774 int 2775 sys_fchown(struct lwp *l, void *v, register_t *retval) 2776 { 2777 struct sys_fchown_args /* { 2778 syscallarg(int) fd; 2779 syscallarg(uid_t) uid; 2780 syscallarg(gid_t) gid; 2781 } */ *uap = v; 2782 struct proc *p = l->l_proc; 2783 int error; 2784 struct file *fp; 2785 2786 /* getvnode() will use the descriptor for us */ 2787 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2788 return (error); 2789 2790 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2791 SCARG(uap, gid), l, 0); 2792 FILE_UNUSE(fp, l); 2793 return (error); 2794 } 2795 2796 /* 2797 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2798 */ 2799 /* ARGSUSED */ 2800 int 2801 sys___posix_fchown(struct lwp *l, void *v, register_t *retval) 2802 { 2803 struct sys_fchown_args /* { 2804 syscallarg(int) fd; 2805 syscallarg(uid_t) uid; 2806 syscallarg(gid_t) gid; 2807 } */ *uap = v; 2808 struct proc *p = l->l_proc; 2809 int error; 2810 struct file *fp; 2811 2812 /* getvnode() will use the descriptor for us */ 2813 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2814 return (error); 2815 2816 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2817 SCARG(uap, gid), l, 1); 2818 FILE_UNUSE(fp, l); 2819 return (error); 2820 } 2821 2822 /* 2823 * Set ownership given a path name; this version does not follow links. 2824 */ 2825 /* ARGSUSED */ 2826 int 2827 sys_lchown(struct lwp *l, void *v, register_t *retval) 2828 { 2829 struct sys_lchown_args /* { 2830 syscallarg(const char *) path; 2831 syscallarg(uid_t) uid; 2832 syscallarg(gid_t) gid; 2833 } */ *uap = v; 2834 int error; 2835 struct nameidata nd; 2836 2837 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2838 SCARG(uap, path), l); 2839 if ((error = namei(&nd)) != 0) 2840 return (error); 2841 2842 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2843 2844 vrele(nd.ni_vp); 2845 return (error); 2846 } 2847 2848 /* 2849 * Set ownership given a path name; this version does not follow links. 2850 * Provides POSIX/XPG semantics. 2851 */ 2852 /* ARGSUSED */ 2853 int 2854 sys___posix_lchown(struct lwp *l, void *v, register_t *retval) 2855 { 2856 struct sys_lchown_args /* { 2857 syscallarg(const char *) path; 2858 syscallarg(uid_t) uid; 2859 syscallarg(gid_t) gid; 2860 } */ *uap = v; 2861 int error; 2862 struct nameidata nd; 2863 2864 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2865 SCARG(uap, path), l); 2866 if ((error = namei(&nd)) != 0) 2867 return (error); 2868 2869 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2870 2871 vrele(nd.ni_vp); 2872 return (error); 2873 } 2874 2875 /* 2876 * Common routine to set ownership given a vnode. 2877 */ 2878 static int 2879 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2880 int posix_semantics) 2881 { 2882 struct vattr vattr; 2883 mode_t newmode; 2884 int error; 2885 2886 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2888 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 2889 goto out; 2890 2891 #define CHANGED(x) ((int)(x) != -1) 2892 newmode = vattr.va_mode; 2893 if (posix_semantics) { 2894 /* 2895 * POSIX/XPG semantics: if the caller is not the super-user, 2896 * clear set-user-id and set-group-id bits. Both POSIX and 2897 * the XPG consider the behaviour for calls by the super-user 2898 * implementation-defined; we leave the set-user-id and set- 2899 * group-id settings intact in that case. 2900 */ 2901 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2902 NULL) != 0) 2903 newmode &= ~(S_ISUID | S_ISGID); 2904 } else { 2905 /* 2906 * NetBSD semantics: when changing owner and/or group, 2907 * clear the respective bit(s). 2908 */ 2909 if (CHANGED(uid)) 2910 newmode &= ~S_ISUID; 2911 if (CHANGED(gid)) 2912 newmode &= ~S_ISGID; 2913 } 2914 /* Update va_mode iff altered. */ 2915 if (vattr.va_mode == newmode) 2916 newmode = VNOVAL; 2917 2918 VATTR_NULL(&vattr); 2919 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2920 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2921 vattr.va_mode = newmode; 2922 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2923 #undef CHANGED 2924 2925 out: 2926 VOP_UNLOCK(vp, 0); 2927 return (error); 2928 } 2929 2930 /* 2931 * Set the access and modification times given a path name; this 2932 * version follows links. 2933 */ 2934 /* ARGSUSED */ 2935 int 2936 sys_utimes(struct lwp *l, void *v, register_t *retval) 2937 { 2938 struct sys_utimes_args /* { 2939 syscallarg(const char *) path; 2940 syscallarg(const struct timeval *) tptr; 2941 } */ *uap = v; 2942 2943 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2944 SCARG(uap, tptr), UIO_USERSPACE); 2945 } 2946 2947 /* 2948 * Set the access and modification times given a file descriptor. 2949 */ 2950 /* ARGSUSED */ 2951 int 2952 sys_futimes(struct lwp *l, void *v, register_t *retval) 2953 { 2954 struct sys_futimes_args /* { 2955 syscallarg(int) fd; 2956 syscallarg(const struct timeval *) tptr; 2957 } */ *uap = v; 2958 int error; 2959 struct file *fp; 2960 2961 /* getvnode() will use the descriptor for us */ 2962 if ((error = getvnode(l->l_proc->p_fd, SCARG(uap, fd), &fp)) != 0) 2963 return (error); 2964 2965 error = do_sys_utimes(l, fp->f_data, NULL, 0, 2966 SCARG(uap, tptr), UIO_USERSPACE); 2967 2968 FILE_UNUSE(fp, l); 2969 return (error); 2970 } 2971 2972 /* 2973 * Set the access and modification times given a path name; this 2974 * version does not follow links. 2975 */ 2976 int 2977 sys_lutimes(struct lwp *l, void *v, register_t *retval) 2978 { 2979 struct sys_lutimes_args /* { 2980 syscallarg(const char *) path; 2981 syscallarg(const struct timeval *) tptr; 2982 } */ *uap = v; 2983 2984 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2985 SCARG(uap, tptr), UIO_USERSPACE); 2986 } 2987 2988 /* 2989 * Common routine to set access and modification times given a vnode. 2990 */ 2991 int 2992 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2993 const struct timeval *tptr, enum uio_seg seg) 2994 { 2995 struct vattr vattr; 2996 struct nameidata nd; 2997 int error; 2998 2999 VATTR_NULL(&vattr); 3000 if (tptr == NULL) { 3001 nanotime(&vattr.va_atime); 3002 vattr.va_mtime = vattr.va_atime; 3003 vattr.va_vaflags |= VA_UTIMES_NULL; 3004 } else { 3005 struct timeval tv[2]; 3006 3007 if (seg != UIO_SYSSPACE) { 3008 error = copyin(tptr, &tv, sizeof (tv)); 3009 if (error != 0) 3010 return error; 3011 tptr = tv; 3012 } 3013 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime); 3014 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime); 3015 } 3016 3017 if (vp == NULL) { 3018 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path, l); 3019 if ((error = namei(&nd)) != 0) 3020 return (error); 3021 vp = nd.ni_vp; 3022 } else 3023 nd.ni_vp = NULL; 3024 3025 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3026 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3027 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 3028 VOP_UNLOCK(vp, 0); 3029 3030 if (nd.ni_vp != NULL) 3031 vrele(nd.ni_vp); 3032 3033 return (error); 3034 } 3035 3036 /* 3037 * Truncate a file given its path name. 3038 */ 3039 /* ARGSUSED */ 3040 int 3041 sys_truncate(struct lwp *l, void *v, register_t *retval) 3042 { 3043 struct sys_truncate_args /* { 3044 syscallarg(const char *) path; 3045 syscallarg(int) pad; 3046 syscallarg(off_t) length; 3047 } */ *uap = v; 3048 struct vnode *vp; 3049 struct vattr vattr; 3050 int error; 3051 struct nameidata nd; 3052 3053 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3054 SCARG(uap, path), l); 3055 if ((error = namei(&nd)) != 0) 3056 return (error); 3057 vp = nd.ni_vp; 3058 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3059 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3060 if (vp->v_type == VDIR) 3061 error = EISDIR; 3062 else if ((error = vn_writechk(vp)) == 0 && 3063 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) { 3064 VATTR_NULL(&vattr); 3065 vattr.va_size = SCARG(uap, length); 3066 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 3067 } 3068 vput(vp); 3069 return (error); 3070 } 3071 3072 /* 3073 * Truncate a file given a file descriptor. 3074 */ 3075 /* ARGSUSED */ 3076 int 3077 sys_ftruncate(struct lwp *l, void *v, register_t *retval) 3078 { 3079 struct sys_ftruncate_args /* { 3080 syscallarg(int) fd; 3081 syscallarg(int) pad; 3082 syscallarg(off_t) length; 3083 } */ *uap = v; 3084 struct proc *p = l->l_proc; 3085 struct vattr vattr; 3086 struct vnode *vp; 3087 struct file *fp; 3088 int error; 3089 3090 /* getvnode() will use the descriptor for us */ 3091 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3092 return (error); 3093 if ((fp->f_flag & FWRITE) == 0) { 3094 error = EINVAL; 3095 goto out; 3096 } 3097 vp = (struct vnode *)fp->f_data; 3098 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3099 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3100 if (vp->v_type == VDIR) 3101 error = EISDIR; 3102 else if ((error = vn_writechk(vp)) == 0) { 3103 VATTR_NULL(&vattr); 3104 vattr.va_size = SCARG(uap, length); 3105 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l); 3106 } 3107 VOP_UNLOCK(vp, 0); 3108 out: 3109 FILE_UNUSE(fp, l); 3110 return (error); 3111 } 3112 3113 /* 3114 * Sync an open file. 3115 */ 3116 /* ARGSUSED */ 3117 int 3118 sys_fsync(struct lwp *l, void *v, register_t *retval) 3119 { 3120 struct sys_fsync_args /* { 3121 syscallarg(int) fd; 3122 } */ *uap = v; 3123 struct proc *p = l->l_proc; 3124 struct vnode *vp; 3125 struct file *fp; 3126 int error; 3127 3128 /* getvnode() will use the descriptor for us */ 3129 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3130 return (error); 3131 vp = (struct vnode *)fp->f_data; 3132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3133 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l); 3134 if (error == 0 && bioopsp != NULL && 3135 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3136 (*bioopsp->io_fsync)(vp, 0); 3137 VOP_UNLOCK(vp, 0); 3138 FILE_UNUSE(fp, l); 3139 return (error); 3140 } 3141 3142 /* 3143 * Sync a range of file data. API modeled after that found in AIX. 3144 * 3145 * FDATASYNC indicates that we need only save enough metadata to be able 3146 * to re-read the written data. Note we duplicate AIX's requirement that 3147 * the file be open for writing. 3148 */ 3149 /* ARGSUSED */ 3150 int 3151 sys_fsync_range(struct lwp *l, void *v, register_t *retval) 3152 { 3153 struct sys_fsync_range_args /* { 3154 syscallarg(int) fd; 3155 syscallarg(int) flags; 3156 syscallarg(off_t) start; 3157 syscallarg(off_t) length; 3158 } */ *uap = v; 3159 struct proc *p = l->l_proc; 3160 struct vnode *vp; 3161 struct file *fp; 3162 int flags, nflags; 3163 off_t s, e, len; 3164 int error; 3165 3166 /* getvnode() will use the descriptor for us */ 3167 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3168 return (error); 3169 3170 if ((fp->f_flag & FWRITE) == 0) { 3171 error = EBADF; 3172 goto out; 3173 } 3174 3175 flags = SCARG(uap, flags); 3176 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3177 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3178 error = EINVAL; 3179 goto out; 3180 } 3181 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3182 if (flags & FDATASYNC) 3183 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3184 else 3185 nflags = FSYNC_WAIT; 3186 if (flags & FDISKSYNC) 3187 nflags |= FSYNC_CACHE; 3188 3189 len = SCARG(uap, length); 3190 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3191 if (len) { 3192 s = SCARG(uap, start); 3193 e = s + len; 3194 if (e < s) { 3195 error = EINVAL; 3196 goto out; 3197 } 3198 } else { 3199 e = 0; 3200 s = 0; 3201 } 3202 3203 vp = (struct vnode *)fp->f_data; 3204 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3205 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l); 3206 3207 if (error == 0 && bioopsp != NULL && 3208 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3209 (*bioopsp->io_fsync)(vp, nflags); 3210 3211 VOP_UNLOCK(vp, 0); 3212 out: 3213 FILE_UNUSE(fp, l); 3214 return (error); 3215 } 3216 3217 /* 3218 * Sync the data of an open file. 3219 */ 3220 /* ARGSUSED */ 3221 int 3222 sys_fdatasync(struct lwp *l, void *v, register_t *retval) 3223 { 3224 struct sys_fdatasync_args /* { 3225 syscallarg(int) fd; 3226 } */ *uap = v; 3227 struct proc *p = l->l_proc; 3228 struct vnode *vp; 3229 struct file *fp; 3230 int error; 3231 3232 /* getvnode() will use the descriptor for us */ 3233 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3234 return (error); 3235 if ((fp->f_flag & FWRITE) == 0) { 3236 FILE_UNUSE(fp, l); 3237 return (EBADF); 3238 } 3239 vp = (struct vnode *)fp->f_data; 3240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3241 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l); 3242 VOP_UNLOCK(vp, 0); 3243 FILE_UNUSE(fp, l); 3244 return (error); 3245 } 3246 3247 /* 3248 * Rename files, (standard) BSD semantics frontend. 3249 */ 3250 /* ARGSUSED */ 3251 int 3252 sys_rename(struct lwp *l, void *v, register_t *retval) 3253 { 3254 struct sys_rename_args /* { 3255 syscallarg(const char *) from; 3256 syscallarg(const char *) to; 3257 } */ *uap = v; 3258 3259 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0)); 3260 } 3261 3262 /* 3263 * Rename files, POSIX semantics frontend. 3264 */ 3265 /* ARGSUSED */ 3266 int 3267 sys___posix_rename(struct lwp *l, void *v, register_t *retval) 3268 { 3269 struct sys___posix_rename_args /* { 3270 syscallarg(const char *) from; 3271 syscallarg(const char *) to; 3272 } */ *uap = v; 3273 3274 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1)); 3275 } 3276 3277 /* 3278 * Rename files. Source and destination must either both be directories, 3279 * or both not be directories. If target is a directory, it must be empty. 3280 * If `from' and `to' refer to the same object, the value of the `retain' 3281 * argument is used to determine whether `from' will be 3282 * 3283 * (retain == 0) deleted unless `from' and `to' refer to the same 3284 * object in the file system's name space (BSD). 3285 * (retain == 1) always retained (POSIX). 3286 */ 3287 static int 3288 rename_files(const char *from, const char *to, struct lwp *l, int retain) 3289 { 3290 struct vnode *tvp, *fvp, *tdvp; 3291 struct nameidata fromnd, tond; 3292 struct proc *p; 3293 int error; 3294 3295 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3296 UIO_USERSPACE, from, l); 3297 if ((error = namei(&fromnd)) != 0) 3298 return (error); 3299 if (fromnd.ni_dvp != fromnd.ni_vp) 3300 VOP_UNLOCK(fromnd.ni_dvp, 0); 3301 fvp = fromnd.ni_vp; 3302 NDINIT(&tond, RENAME, 3303 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3304 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3305 UIO_USERSPACE, to, l); 3306 if ((error = namei(&tond)) != 0) { 3307 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3308 vrele(fromnd.ni_dvp); 3309 vrele(fvp); 3310 goto out1; 3311 } 3312 tdvp = tond.ni_dvp; 3313 tvp = tond.ni_vp; 3314 3315 if (tvp != NULL) { 3316 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3317 error = ENOTDIR; 3318 goto out; 3319 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3320 error = EISDIR; 3321 goto out; 3322 } 3323 } 3324 3325 if (fvp == tdvp) 3326 error = EINVAL; 3327 3328 /* 3329 * Source and destination refer to the same object. 3330 */ 3331 if (fvp == tvp) { 3332 if (retain) 3333 error = -1; 3334 else if (fromnd.ni_dvp == tdvp && 3335 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3336 !memcmp(fromnd.ni_cnd.cn_nameptr, 3337 tond.ni_cnd.cn_nameptr, 3338 fromnd.ni_cnd.cn_namelen)) 3339 error = -1; 3340 } 3341 3342 #if NVERIEXEC > 0 3343 if (!error) { 3344 char *f1, *f2; 3345 3346 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3347 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3348 3349 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3350 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3351 3352 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3353 3354 free(f1, M_TEMP); 3355 free(f2, M_TEMP); 3356 } 3357 #endif /* NVERIEXEC > 0 */ 3358 3359 out: 3360 p = l->l_proc; 3361 if (!error) { 3362 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE); 3363 if (fromnd.ni_dvp != tdvp) 3364 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3365 if (tvp) { 3366 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE); 3367 } 3368 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3369 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3370 } else { 3371 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3372 if (tdvp == tvp) 3373 vrele(tdvp); 3374 else 3375 vput(tdvp); 3376 if (tvp) 3377 vput(tvp); 3378 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3379 vrele(fromnd.ni_dvp); 3380 vrele(fvp); 3381 } 3382 vrele(tond.ni_startdir); 3383 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3384 out1: 3385 if (fromnd.ni_startdir) 3386 vrele(fromnd.ni_startdir); 3387 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3388 return (error == -1 ? 0 : error); 3389 } 3390 3391 /* 3392 * Make a directory file. 3393 */ 3394 /* ARGSUSED */ 3395 int 3396 sys_mkdir(struct lwp *l, void *v, register_t *retval) 3397 { 3398 struct sys_mkdir_args /* { 3399 syscallarg(const char *) path; 3400 syscallarg(int) mode; 3401 } */ *uap = v; 3402 struct proc *p = l->l_proc; 3403 struct vnode *vp; 3404 struct vattr vattr; 3405 int error; 3406 struct nameidata nd; 3407 3408 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3409 SCARG(uap, path), l); 3410 if ((error = namei(&nd)) != 0) 3411 return (error); 3412 vp = nd.ni_vp; 3413 if (vp != NULL) { 3414 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3415 if (nd.ni_dvp == vp) 3416 vrele(nd.ni_dvp); 3417 else 3418 vput(nd.ni_dvp); 3419 vrele(vp); 3420 return (EEXIST); 3421 } 3422 VATTR_NULL(&vattr); 3423 vattr.va_type = VDIR; 3424 /* We will read cwdi->cwdi_cmask unlocked. */ 3425 vattr.va_mode = 3426 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3427 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3428 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3429 if (!error) 3430 vput(nd.ni_vp); 3431 return (error); 3432 } 3433 3434 /* 3435 * Remove a directory file. 3436 */ 3437 /* ARGSUSED */ 3438 int 3439 sys_rmdir(struct lwp *l, void *v, register_t *retval) 3440 { 3441 struct sys_rmdir_args /* { 3442 syscallarg(const char *) path; 3443 } */ *uap = v; 3444 struct vnode *vp; 3445 int error; 3446 struct nameidata nd; 3447 3448 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3449 SCARG(uap, path), l); 3450 if ((error = namei(&nd)) != 0) 3451 return (error); 3452 vp = nd.ni_vp; 3453 if (vp->v_type != VDIR) { 3454 error = ENOTDIR; 3455 goto out; 3456 } 3457 /* 3458 * No rmdir "." please. 3459 */ 3460 if (nd.ni_dvp == vp) { 3461 error = EINVAL; 3462 goto out; 3463 } 3464 /* 3465 * The root of a mounted filesystem cannot be deleted. 3466 */ 3467 if (vp->v_vflag & VV_ROOT) { 3468 error = EBUSY; 3469 goto out; 3470 } 3471 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3472 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3473 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3474 return (error); 3475 3476 out: 3477 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3478 if (nd.ni_dvp == vp) 3479 vrele(nd.ni_dvp); 3480 else 3481 vput(nd.ni_dvp); 3482 vput(vp); 3483 return (error); 3484 } 3485 3486 /* 3487 * Read a block of directory entries in a file system independent format. 3488 */ 3489 int 3490 sys___getdents30(struct lwp *l, void *v, register_t *retval) 3491 { 3492 struct sys___getdents30_args /* { 3493 syscallarg(int) fd; 3494 syscallarg(char *) buf; 3495 syscallarg(size_t) count; 3496 } */ *uap = v; 3497 struct proc *p = l->l_proc; 3498 struct file *fp; 3499 int error, done; 3500 3501 /* getvnode() will use the descriptor for us */ 3502 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3503 return (error); 3504 if ((fp->f_flag & FREAD) == 0) { 3505 error = EBADF; 3506 goto out; 3507 } 3508 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3509 SCARG(uap, count), &done, l, 0, 0); 3510 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3511 *retval = done; 3512 out: 3513 FILE_UNUSE(fp, l); 3514 return (error); 3515 } 3516 3517 /* 3518 * Set the mode mask for creation of filesystem nodes. 3519 */ 3520 int 3521 sys_umask(struct lwp *l, void *v, register_t *retval) 3522 { 3523 struct sys_umask_args /* { 3524 syscallarg(mode_t) newmask; 3525 } */ *uap = v; 3526 struct proc *p = l->l_proc; 3527 struct cwdinfo *cwdi; 3528 3529 /* 3530 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3531 * important is that we serialize changes to the mask. The 3532 * rw_exit() will issue a write memory barrier on our behalf, 3533 * and force the changes out to other CPUs (as it must use an 3534 * atomic operation, draining the local CPU's store buffers). 3535 */ 3536 cwdi = p->p_cwdi; 3537 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3538 *retval = cwdi->cwdi_cmask; 3539 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3540 rw_exit(&cwdi->cwdi_lock); 3541 3542 return (0); 3543 } 3544 3545 /* 3546 * Void all references to file by ripping underlying filesystem 3547 * away from vnode. 3548 */ 3549 /* ARGSUSED */ 3550 int 3551 sys_revoke(struct lwp *l, void *v, register_t *retval) 3552 { 3553 struct sys_revoke_args /* { 3554 syscallarg(const char *) path; 3555 } */ *uap = v; 3556 struct vnode *vp; 3557 struct vattr vattr; 3558 int error; 3559 bool revoke; 3560 struct nameidata nd; 3561 3562 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3563 SCARG(uap, path), l); 3564 if ((error = namei(&nd)) != 0) 3565 return (error); 3566 vp = nd.ni_vp; 3567 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 3568 goto out; 3569 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid && 3570 (error = kauth_authorize_generic(l->l_cred, 3571 KAUTH_GENERIC_ISSUSER, NULL)) != 0) 3572 goto out; 3573 simple_lock(&vp->v_interlock); 3574 revoke = (vp->v_usecount > 1 || (vp->v_iflag & (VI_ALIASED|VI_LAYER))); 3575 simple_unlock(&vp->v_interlock); 3576 if (revoke) 3577 VOP_REVOKE(vp, REVOKEALL); 3578 out: 3579 vrele(vp); 3580 return (error); 3581 } 3582 3583 /* 3584 * Convert a user file descriptor to a kernel file entry. 3585 */ 3586 int 3587 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 3588 { 3589 struct vnode *vp; 3590 struct file *fp; 3591 3592 if ((fp = fd_getfile(fdp, fd)) == NULL) 3593 return (EBADF); 3594 3595 FILE_USE(fp); 3596 3597 if (fp->f_type != DTYPE_VNODE) { 3598 FILE_UNUSE(fp, NULL); 3599 return (EINVAL); 3600 } 3601 3602 vp = (struct vnode *)fp->f_data; 3603 if (vp->v_type == VBAD) { 3604 FILE_UNUSE(fp, NULL); 3605 return (EBADF); 3606 } 3607 3608 *fpp = fp; 3609 return (0); 3610 } 3611