1 /* $NetBSD: vfs_syscalls.c,v 1.402 2010/01/08 11:35:10 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.402 2010/01/08 11:35:10 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0) { 173 error = EOPNOTSUPP; /* Needs translation */ 174 goto out; 175 } 176 177 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 179 if (error) 180 goto out; 181 182 if (vfs_busy(mp, NULL)) { 183 error = EPERM; 184 goto out; 185 } 186 187 mutex_enter(&mp->mnt_updating); 188 189 mp->mnt_flag &= ~MNT_OP_FLAGS; 190 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 191 192 /* 193 * Set the mount level flags. 194 */ 195 if (flags & MNT_RDONLY) 196 mp->mnt_flag |= MNT_RDONLY; 197 else if (mp->mnt_flag & MNT_RDONLY) 198 mp->mnt_iflag |= IMNT_WANTRDWR; 199 mp->mnt_flag &= 200 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 201 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 202 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 203 MNT_LOG); 204 mp->mnt_flag |= flags & 205 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 206 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 207 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 208 MNT_LOG | MNT_IGNORE); 209 210 error = VFS_MOUNT(mp, path, data, data_len); 211 212 if (error && data != NULL) { 213 int error2; 214 215 /* 216 * Update failed; let's try and see if it was an 217 * export request. For compat with 3.0 and earlier. 218 */ 219 error2 = vfs_hooks_reexport(mp, path, data); 220 221 /* 222 * Only update error code if the export request was 223 * understood but some problem occurred while 224 * processing it. 225 */ 226 if (error2 != EJUSTRETURN) 227 error = error2; 228 } 229 230 if (mp->mnt_iflag & IMNT_WANTRDWR) 231 mp->mnt_flag &= ~MNT_RDONLY; 232 if (error) 233 mp->mnt_flag = saved_flags; 234 mp->mnt_flag &= ~MNT_OP_FLAGS; 235 mp->mnt_iflag &= ~IMNT_WANTRDWR; 236 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 237 if (mp->mnt_syncer == NULL) 238 error = vfs_allocate_syncvnode(mp); 239 } else { 240 if (mp->mnt_syncer != NULL) 241 vfs_deallocate_syncvnode(mp); 242 } 243 mutex_exit(&mp->mnt_updating); 244 vfs_unbusy(mp, false, NULL); 245 246 out: 247 return (error); 248 } 249 250 static int 251 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 252 { 253 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 254 int error; 255 256 /* Copy file-system type from userspace. */ 257 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 258 if (error) { 259 /* 260 * Historically, filesystem types were identified by numbers. 261 * If we get an integer for the filesystem type instead of a 262 * string, we check to see if it matches one of the historic 263 * filesystem types. 264 */ 265 u_long fsindex = (u_long)fstype; 266 if (fsindex >= nmountcompatnames || 267 mountcompatnames[fsindex] == NULL) 268 return ENODEV; 269 strlcpy(fstypename, mountcompatnames[fsindex], 270 sizeof(fstypename)); 271 } 272 273 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 274 if (strcmp(fstypename, "ufs") == 0) 275 fstypename[0] = 'f'; 276 277 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 278 return 0; 279 280 /* If we can autoload a vfs module, try again */ 281 mutex_enter(&module_lock); 282 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 283 mutex_exit(&module_lock); 284 285 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 286 return 0; 287 288 return ENODEV; 289 } 290 291 static int 292 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 293 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 294 { 295 struct mount *mp; 296 struct vnode *vp = *vpp; 297 struct vattr va; 298 int error; 299 300 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 301 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 302 if (error) 303 return error; 304 305 /* Can't make a non-dir a mount-point (from here anyway). */ 306 if (vp->v_type != VDIR) 307 return ENOTDIR; 308 309 /* 310 * If the user is not root, ensure that they own the directory 311 * onto which we are attempting to mount. 312 */ 313 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 314 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 315 (error = kauth_authorize_generic(l->l_cred, 316 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 317 return error; 318 } 319 320 if (flags & MNT_EXPORTED) 321 return EINVAL; 322 323 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 324 return error; 325 326 /* 327 * Check if a file-system is not already mounted on this vnode. 328 */ 329 if (vp->v_mountedhere != NULL) 330 return EBUSY; 331 332 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) 333 return ENOMEM; 334 335 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 336 337 /* 338 * The underlying file system may refuse the mount for 339 * various reasons. Allow the user to force it to happen. 340 * 341 * Set the mount level flags. 342 */ 343 mp->mnt_flag = flags & 344 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 345 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 346 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 347 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 348 349 mutex_enter(&mp->mnt_updating); 350 error = VFS_MOUNT(mp, path, data, data_len); 351 mp->mnt_flag &= ~MNT_OP_FLAGS; 352 353 /* 354 * Put the new filesystem on the mount list after root. 355 */ 356 cache_purge(vp); 357 if (error != 0) { 358 vp->v_mountedhere = NULL; 359 mutex_exit(&mp->mnt_updating); 360 vfs_unbusy(mp, false, NULL); 361 vfs_destroy(mp); 362 return error; 363 } 364 365 mp->mnt_iflag &= ~IMNT_WANTRDWR; 366 mutex_enter(&mountlist_lock); 367 vp->v_mountedhere = mp; 368 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 369 mutex_exit(&mountlist_lock); 370 vn_restorerecurse(vp, recurse); 371 VOP_UNLOCK(vp, 0); 372 checkdirs(vp); 373 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 374 error = vfs_allocate_syncvnode(mp); 375 /* Hold an additional reference to the mount across VFS_START(). */ 376 mutex_exit(&mp->mnt_updating); 377 vfs_unbusy(mp, true, NULL); 378 (void) VFS_STATVFS(mp, &mp->mnt_stat); 379 error = VFS_START(mp, 0); 380 if (error) 381 vrele(vp); 382 /* Drop reference held for VFS_START(). */ 383 vfs_destroy(mp); 384 *vpp = NULL; 385 return error; 386 } 387 388 static int 389 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 390 void *data, size_t *data_len) 391 { 392 struct mount *mp; 393 int error; 394 395 /* If MNT_GETARGS is specified, it should be the only flag. */ 396 if (flags & ~MNT_GETARGS) 397 return EINVAL; 398 399 mp = vp->v_mount; 400 401 /* XXX: probably some notion of "can see" here if we want isolation. */ 402 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 403 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 404 if (error) 405 return error; 406 407 if ((vp->v_vflag & VV_ROOT) == 0) 408 return EINVAL; 409 410 if (vfs_busy(mp, NULL)) 411 return EPERM; 412 413 mutex_enter(&mp->mnt_updating); 414 mp->mnt_flag &= ~MNT_OP_FLAGS; 415 mp->mnt_flag |= MNT_GETARGS; 416 error = VFS_MOUNT(mp, path, data, data_len); 417 mp->mnt_flag &= ~MNT_OP_FLAGS; 418 mutex_exit(&mp->mnt_updating); 419 420 vfs_unbusy(mp, false, NULL); 421 return (error); 422 } 423 424 int 425 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 426 { 427 /* { 428 syscallarg(const char *) type; 429 syscallarg(const char *) path; 430 syscallarg(int) flags; 431 syscallarg(void *) data; 432 syscallarg(size_t) data_len; 433 } */ 434 435 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 436 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 437 SCARG(uap, data_len), retval); 438 } 439 440 int 441 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 442 const char *path, int flags, void *data, enum uio_seg data_seg, 443 size_t data_len, register_t *retval) 444 { 445 struct vnode *vp; 446 void *data_buf = data; 447 u_int recurse; 448 int error; 449 450 /* 451 * Get vnode to be covered 452 */ 453 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 454 if (error != 0) 455 return (error); 456 457 /* 458 * A lookup in VFS_MOUNT might result in an attempt to 459 * lock this vnode again, so make the lock recursive. 460 */ 461 if (vfsops == NULL) { 462 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 463 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 464 recurse = vn_setrecurse(vp); 465 vfsops = vp->v_mount->mnt_op; 466 } else { 467 /* 'type' is userspace */ 468 error = mount_get_vfsops(type, &vfsops); 469 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 470 recurse = vn_setrecurse(vp); 471 if (error != 0) 472 goto done; 473 } 474 } else { 475 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 476 recurse = vn_setrecurse(vp); 477 } 478 479 if (data != NULL && data_seg == UIO_USERSPACE) { 480 if (data_len == 0) { 481 /* No length supplied, use default for filesystem */ 482 data_len = vfsops->vfs_min_mount_data; 483 if (data_len > VFS_MAX_MOUNT_DATA) { 484 error = EINVAL; 485 goto done; 486 } 487 /* 488 * Hopefully a longer buffer won't make copyin() fail. 489 * For compatibility with 3.0 and earlier. 490 */ 491 if (flags & MNT_UPDATE 492 && data_len < sizeof (struct mnt_export_args30)) 493 data_len = sizeof (struct mnt_export_args30); 494 } 495 data_buf = kmem_alloc(data_len, KM_SLEEP); 496 497 /* NFS needs the buffer even for mnt_getargs .... */ 498 error = copyin(data, data_buf, data_len); 499 if (error != 0) 500 goto done; 501 } 502 503 if (flags & MNT_GETARGS) { 504 if (data_len == 0) { 505 error = EINVAL; 506 goto done; 507 } 508 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 509 if (error != 0) 510 goto done; 511 if (data_seg == UIO_USERSPACE) 512 error = copyout(data_buf, data, data_len); 513 *retval = data_len; 514 } else if (flags & MNT_UPDATE) { 515 error = mount_update(l, vp, path, flags, data_buf, &data_len); 516 } else { 517 /* Locking is handled internally in mount_domount(). */ 518 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 519 &data_len, recurse); 520 } 521 522 done: 523 if (vp != NULL) { 524 vn_restorerecurse(vp, recurse); 525 vput(vp); 526 } 527 if (data_buf != data) 528 kmem_free(data_buf, data_len); 529 return (error); 530 } 531 532 /* 533 * Scan all active processes to see if any of them have a current 534 * or root directory onto which the new filesystem has just been 535 * mounted. If so, replace them with the new mount point. 536 */ 537 void 538 checkdirs(struct vnode *olddp) 539 { 540 struct cwdinfo *cwdi; 541 struct vnode *newdp, *rele1, *rele2; 542 struct proc *p; 543 bool retry; 544 545 if (olddp->v_usecount == 1) 546 return; 547 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 548 panic("mount: lost mount"); 549 550 do { 551 retry = false; 552 mutex_enter(proc_lock); 553 PROCLIST_FOREACH(p, &allproc) { 554 if ((p->p_flag & PK_MARKER) != 0) 555 continue; 556 if ((cwdi = p->p_cwdi) == NULL) 557 continue; 558 /* 559 * Can't change to the old directory any more, 560 * so even if we see a stale value it's not a 561 * problem. 562 */ 563 if (cwdi->cwdi_cdir != olddp && 564 cwdi->cwdi_rdir != olddp) 565 continue; 566 retry = true; 567 rele1 = NULL; 568 rele2 = NULL; 569 atomic_inc_uint(&cwdi->cwdi_refcnt); 570 mutex_exit(proc_lock); 571 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 572 if (cwdi->cwdi_cdir == olddp) { 573 rele1 = cwdi->cwdi_cdir; 574 vref(newdp); 575 cwdi->cwdi_cdir = newdp; 576 } 577 if (cwdi->cwdi_rdir == olddp) { 578 rele2 = cwdi->cwdi_rdir; 579 vref(newdp); 580 cwdi->cwdi_rdir = newdp; 581 } 582 rw_exit(&cwdi->cwdi_lock); 583 cwdfree(cwdi); 584 if (rele1 != NULL) 585 vrele(rele1); 586 if (rele2 != NULL) 587 vrele(rele2); 588 mutex_enter(proc_lock); 589 break; 590 } 591 mutex_exit(proc_lock); 592 } while (retry); 593 594 if (rootvnode == olddp) { 595 vrele(rootvnode); 596 vref(newdp); 597 rootvnode = newdp; 598 } 599 vput(newdp); 600 } 601 602 /* 603 * Unmount a file system. 604 * 605 * Note: unmount takes a path to the vnode mounted on as argument, 606 * not special file (as before). 607 */ 608 /* ARGSUSED */ 609 int 610 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 611 { 612 /* { 613 syscallarg(const char *) path; 614 syscallarg(int) flags; 615 } */ 616 struct vnode *vp; 617 struct mount *mp; 618 int error; 619 struct nameidata nd; 620 621 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 622 SCARG(uap, path)); 623 if ((error = namei(&nd)) != 0) 624 return (error); 625 vp = nd.ni_vp; 626 mp = vp->v_mount; 627 atomic_inc_uint(&mp->mnt_refcnt); 628 VOP_UNLOCK(vp, 0); 629 630 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 631 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 632 if (error) { 633 vrele(vp); 634 vfs_destroy(mp); 635 return (error); 636 } 637 638 /* 639 * Don't allow unmounting the root file system. 640 */ 641 if (mp->mnt_flag & MNT_ROOTFS) { 642 vrele(vp); 643 vfs_destroy(mp); 644 return (EINVAL); 645 } 646 647 /* 648 * Must be the root of the filesystem 649 */ 650 if ((vp->v_vflag & VV_ROOT) == 0) { 651 vrele(vp); 652 vfs_destroy(mp); 653 return (EINVAL); 654 } 655 656 vrele(vp); 657 error = dounmount(mp, SCARG(uap, flags), l); 658 vfs_destroy(mp); 659 return error; 660 } 661 662 /* 663 * Do the actual file system unmount. File system is assumed to have 664 * been locked by the caller. 665 * 666 * => Caller hold reference to the mount, explicitly for dounmount(). 667 */ 668 int 669 dounmount(struct mount *mp, int flags, struct lwp *l) 670 { 671 struct vnode *coveredvp; 672 int error; 673 int async; 674 int used_syncer; 675 676 #if NVERIEXEC > 0 677 error = veriexec_unmountchk(mp); 678 if (error) 679 return (error); 680 #endif /* NVERIEXEC > 0 */ 681 682 /* 683 * XXX Freeze syncer. Must do this before locking the 684 * mount point. See dounmount() for details. 685 */ 686 mutex_enter(&syncer_mutex); 687 rw_enter(&mp->mnt_unmounting, RW_WRITER); 688 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 689 rw_exit(&mp->mnt_unmounting); 690 mutex_exit(&syncer_mutex); 691 return ENOENT; 692 } 693 694 used_syncer = (mp->mnt_syncer != NULL); 695 696 /* 697 * XXX Syncer must be frozen when we get here. This should really 698 * be done on a per-mountpoint basis, but the syncer doesn't work 699 * like that. 700 * 701 * The caller of dounmount() must acquire syncer_mutex because 702 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 703 * order, and we must preserve that order to avoid deadlock. 704 * 705 * So, if the file system did not use the syncer, now is 706 * the time to release the syncer_mutex. 707 */ 708 if (used_syncer == 0) 709 mutex_exit(&syncer_mutex); 710 711 mp->mnt_iflag |= IMNT_UNMOUNT; 712 async = mp->mnt_flag & MNT_ASYNC; 713 mp->mnt_flag &= ~MNT_ASYNC; 714 cache_purgevfs(mp); /* remove cache entries for this file sys */ 715 if (mp->mnt_syncer != NULL) 716 vfs_deallocate_syncvnode(mp); 717 error = 0; 718 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 719 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 720 } 721 vfs_scrubvnlist(mp); 722 if (error == 0 || (flags & MNT_FORCE)) 723 error = VFS_UNMOUNT(mp, flags); 724 if (error) { 725 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 726 (void) vfs_allocate_syncvnode(mp); 727 mp->mnt_iflag &= ~IMNT_UNMOUNT; 728 mp->mnt_flag |= async; 729 rw_exit(&mp->mnt_unmounting); 730 if (used_syncer) 731 mutex_exit(&syncer_mutex); 732 return (error); 733 } 734 vfs_scrubvnlist(mp); 735 mutex_enter(&mountlist_lock); 736 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 737 coveredvp->v_mountedhere = NULL; 738 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 739 mp->mnt_iflag |= IMNT_GONE; 740 mutex_exit(&mountlist_lock); 741 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 742 panic("unmount: dangling vnode"); 743 if (used_syncer) 744 mutex_exit(&syncer_mutex); 745 vfs_hooks_unmount(mp); 746 rw_exit(&mp->mnt_unmounting); 747 vfs_destroy(mp); /* reference from mount() */ 748 if (coveredvp != NULLVP) 749 vrele(coveredvp); 750 return (0); 751 } 752 753 /* 754 * Sync each mounted filesystem. 755 */ 756 #ifdef DEBUG 757 int syncprt = 0; 758 struct ctldebug debug0 = { "syncprt", &syncprt }; 759 #endif 760 761 /* ARGSUSED */ 762 int 763 sys_sync(struct lwp *l, const void *v, register_t *retval) 764 { 765 struct mount *mp, *nmp; 766 int asyncflag; 767 768 if (l == NULL) 769 l = &lwp0; 770 771 mutex_enter(&mountlist_lock); 772 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 773 mp = nmp) { 774 if (vfs_busy(mp, &nmp)) { 775 continue; 776 } 777 mutex_enter(&mp->mnt_updating); 778 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 779 asyncflag = mp->mnt_flag & MNT_ASYNC; 780 mp->mnt_flag &= ~MNT_ASYNC; 781 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 782 if (asyncflag) 783 mp->mnt_flag |= MNT_ASYNC; 784 } 785 mutex_exit(&mp->mnt_updating); 786 vfs_unbusy(mp, false, &nmp); 787 } 788 mutex_exit(&mountlist_lock); 789 #ifdef DEBUG 790 if (syncprt) 791 vfs_bufstats(); 792 #endif /* DEBUG */ 793 return (0); 794 } 795 796 /* 797 * Change filesystem quotas. 798 */ 799 /* ARGSUSED */ 800 int 801 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 802 { 803 /* { 804 syscallarg(const char *) path; 805 syscallarg(int) cmd; 806 syscallarg(int) uid; 807 syscallarg(void *) arg; 808 } */ 809 struct mount *mp; 810 int error; 811 struct vnode *vp; 812 813 error = namei_simple_user(SCARG(uap, path), 814 NSM_FOLLOW_TRYEMULROOT, &vp); 815 if (error != 0) 816 return (error); 817 mp = vp->v_mount; 818 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 819 SCARG(uap, arg)); 820 vrele(vp); 821 return (error); 822 } 823 824 int 825 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 826 int root) 827 { 828 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 829 int error = 0; 830 831 /* 832 * If MNT_NOWAIT or MNT_LAZY is specified, do not 833 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 834 * overrides MNT_NOWAIT. 835 */ 836 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 837 (flags != MNT_WAIT && flags != 0)) { 838 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 839 goto done; 840 } 841 842 /* Get the filesystem stats now */ 843 memset(sp, 0, sizeof(*sp)); 844 if ((error = VFS_STATVFS(mp, sp)) != 0) { 845 return error; 846 } 847 848 if (cwdi->cwdi_rdir == NULL) 849 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 850 done: 851 if (cwdi->cwdi_rdir != NULL) { 852 size_t len; 853 char *bp; 854 char c; 855 char *path = PNBUF_GET(); 856 857 bp = path + MAXPATHLEN; 858 *--bp = '\0'; 859 rw_enter(&cwdi->cwdi_lock, RW_READER); 860 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 861 MAXPATHLEN / 2, 0, l); 862 rw_exit(&cwdi->cwdi_lock); 863 if (error) { 864 PNBUF_PUT(path); 865 return error; 866 } 867 len = strlen(bp); 868 if (len != 1) { 869 /* 870 * for mount points that are below our root, we can see 871 * them, so we fix up the pathname and return them. The 872 * rest we cannot see, so we don't allow viewing the 873 * data. 874 */ 875 if (strncmp(bp, sp->f_mntonname, len) == 0 && 876 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 877 (void)strlcpy(sp->f_mntonname, 878 c == '\0' ? "/" : &sp->f_mntonname[len], 879 sizeof(sp->f_mntonname)); 880 } else { 881 if (root) 882 (void)strlcpy(sp->f_mntonname, "/", 883 sizeof(sp->f_mntonname)); 884 else 885 error = EPERM; 886 } 887 } 888 PNBUF_PUT(path); 889 } 890 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 891 return error; 892 } 893 894 /* 895 * Get filesystem statistics by path. 896 */ 897 int 898 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 899 { 900 struct mount *mp; 901 int error; 902 struct vnode *vp; 903 904 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 905 if (error != 0) 906 return error; 907 mp = vp->v_mount; 908 error = dostatvfs(mp, sb, l, flags, 1); 909 vrele(vp); 910 return error; 911 } 912 913 /* ARGSUSED */ 914 int 915 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 916 { 917 /* { 918 syscallarg(const char *) path; 919 syscallarg(struct statvfs *) buf; 920 syscallarg(int) flags; 921 } */ 922 struct statvfs *sb; 923 int error; 924 925 sb = STATVFSBUF_GET(); 926 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 927 if (error == 0) 928 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 929 STATVFSBUF_PUT(sb); 930 return error; 931 } 932 933 /* 934 * Get filesystem statistics by fd. 935 */ 936 int 937 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 938 { 939 file_t *fp; 940 struct mount *mp; 941 int error; 942 943 /* fd_getvnode() will use the descriptor for us */ 944 if ((error = fd_getvnode(fd, &fp)) != 0) 945 return (error); 946 mp = ((struct vnode *)fp->f_data)->v_mount; 947 error = dostatvfs(mp, sb, curlwp, flags, 1); 948 fd_putfile(fd); 949 return error; 950 } 951 952 /* ARGSUSED */ 953 int 954 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 955 { 956 /* { 957 syscallarg(int) fd; 958 syscallarg(struct statvfs *) buf; 959 syscallarg(int) flags; 960 } */ 961 struct statvfs *sb; 962 int error; 963 964 sb = STATVFSBUF_GET(); 965 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 966 if (error == 0) 967 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 968 STATVFSBUF_PUT(sb); 969 return error; 970 } 971 972 973 /* 974 * Get statistics on all filesystems. 975 */ 976 int 977 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 978 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 979 register_t *retval) 980 { 981 int root = 0; 982 struct proc *p = l->l_proc; 983 struct mount *mp, *nmp; 984 struct statvfs *sb; 985 size_t count, maxcount; 986 int error = 0; 987 988 sb = STATVFSBUF_GET(); 989 maxcount = bufsize / entry_sz; 990 mutex_enter(&mountlist_lock); 991 count = 0; 992 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 993 mp = nmp) { 994 if (vfs_busy(mp, &nmp)) { 995 continue; 996 } 997 if (sfsp && count < maxcount) { 998 error = dostatvfs(mp, sb, l, flags, 0); 999 if (error) { 1000 vfs_unbusy(mp, false, &nmp); 1001 error = 0; 1002 continue; 1003 } 1004 error = copyfn(sb, sfsp, entry_sz); 1005 if (error) { 1006 vfs_unbusy(mp, false, NULL); 1007 goto out; 1008 } 1009 sfsp = (char *)sfsp + entry_sz; 1010 root |= strcmp(sb->f_mntonname, "/") == 0; 1011 } 1012 count++; 1013 vfs_unbusy(mp, false, &nmp); 1014 } 1015 mutex_exit(&mountlist_lock); 1016 1017 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1018 /* 1019 * fake a root entry 1020 */ 1021 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1022 sb, l, flags, 1); 1023 if (error != 0) 1024 goto out; 1025 if (sfsp) { 1026 error = copyfn(sb, sfsp, entry_sz); 1027 if (error != 0) 1028 goto out; 1029 } 1030 count++; 1031 } 1032 if (sfsp && count > maxcount) 1033 *retval = maxcount; 1034 else 1035 *retval = count; 1036 out: 1037 STATVFSBUF_PUT(sb); 1038 return error; 1039 } 1040 1041 int 1042 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1043 { 1044 /* { 1045 syscallarg(struct statvfs *) buf; 1046 syscallarg(size_t) bufsize; 1047 syscallarg(int) flags; 1048 } */ 1049 1050 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1051 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1052 } 1053 1054 /* 1055 * Change current working directory to a given file descriptor. 1056 */ 1057 /* ARGSUSED */ 1058 int 1059 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1060 { 1061 /* { 1062 syscallarg(int) fd; 1063 } */ 1064 struct proc *p = l->l_proc; 1065 struct cwdinfo *cwdi; 1066 struct vnode *vp, *tdp; 1067 struct mount *mp; 1068 file_t *fp; 1069 int error, fd; 1070 1071 /* fd_getvnode() will use the descriptor for us */ 1072 fd = SCARG(uap, fd); 1073 if ((error = fd_getvnode(fd, &fp)) != 0) 1074 return (error); 1075 vp = fp->f_data; 1076 1077 vref(vp); 1078 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1079 if (vp->v_type != VDIR) 1080 error = ENOTDIR; 1081 else 1082 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1083 if (error) { 1084 vput(vp); 1085 goto out; 1086 } 1087 while ((mp = vp->v_mountedhere) != NULL) { 1088 error = vfs_busy(mp, NULL); 1089 vput(vp); 1090 if (error != 0) 1091 goto out; 1092 error = VFS_ROOT(mp, &tdp); 1093 vfs_unbusy(mp, false, NULL); 1094 if (error) 1095 goto out; 1096 vp = tdp; 1097 } 1098 VOP_UNLOCK(vp, 0); 1099 1100 /* 1101 * Disallow changing to a directory not under the process's 1102 * current root directory (if there is one). 1103 */ 1104 cwdi = p->p_cwdi; 1105 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1106 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1107 vrele(vp); 1108 error = EPERM; /* operation not permitted */ 1109 } else { 1110 vrele(cwdi->cwdi_cdir); 1111 cwdi->cwdi_cdir = vp; 1112 } 1113 rw_exit(&cwdi->cwdi_lock); 1114 1115 out: 1116 fd_putfile(fd); 1117 return (error); 1118 } 1119 1120 /* 1121 * Change this process's notion of the root directory to a given file 1122 * descriptor. 1123 */ 1124 int 1125 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1126 { 1127 struct proc *p = l->l_proc; 1128 struct vnode *vp; 1129 file_t *fp; 1130 int error, fd = SCARG(uap, fd); 1131 1132 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1133 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1134 return error; 1135 /* fd_getvnode() will use the descriptor for us */ 1136 if ((error = fd_getvnode(fd, &fp)) != 0) 1137 return error; 1138 vp = fp->f_data; 1139 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1140 if (vp->v_type != VDIR) 1141 error = ENOTDIR; 1142 else 1143 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1144 VOP_UNLOCK(vp, 0); 1145 if (error) 1146 goto out; 1147 vref(vp); 1148 1149 change_root(p->p_cwdi, vp, l); 1150 1151 out: 1152 fd_putfile(fd); 1153 return (error); 1154 } 1155 1156 /* 1157 * Change current working directory (``.''). 1158 */ 1159 /* ARGSUSED */ 1160 int 1161 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1162 { 1163 /* { 1164 syscallarg(const char *) path; 1165 } */ 1166 struct proc *p = l->l_proc; 1167 struct cwdinfo *cwdi; 1168 int error; 1169 struct vnode *vp; 1170 1171 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1172 &vp, l)) != 0) 1173 return (error); 1174 cwdi = p->p_cwdi; 1175 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1176 vrele(cwdi->cwdi_cdir); 1177 cwdi->cwdi_cdir = vp; 1178 rw_exit(&cwdi->cwdi_lock); 1179 return (0); 1180 } 1181 1182 /* 1183 * Change notion of root (``/'') directory. 1184 */ 1185 /* ARGSUSED */ 1186 int 1187 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1188 { 1189 /* { 1190 syscallarg(const char *) path; 1191 } */ 1192 struct proc *p = l->l_proc; 1193 int error; 1194 struct vnode *vp; 1195 1196 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1197 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1198 return (error); 1199 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1200 &vp, l)) != 0) 1201 return (error); 1202 1203 change_root(p->p_cwdi, vp, l); 1204 1205 return (0); 1206 } 1207 1208 /* 1209 * Common routine for chroot and fchroot. 1210 * NB: callers need to properly authorize the change root operation. 1211 */ 1212 void 1213 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1214 { 1215 1216 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1217 if (cwdi->cwdi_rdir != NULL) 1218 vrele(cwdi->cwdi_rdir); 1219 cwdi->cwdi_rdir = vp; 1220 1221 /* 1222 * Prevent escaping from chroot by putting the root under 1223 * the working directory. Silently chdir to / if we aren't 1224 * already there. 1225 */ 1226 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1227 /* 1228 * XXX would be more failsafe to change directory to a 1229 * deadfs node here instead 1230 */ 1231 vrele(cwdi->cwdi_cdir); 1232 vref(vp); 1233 cwdi->cwdi_cdir = vp; 1234 } 1235 rw_exit(&cwdi->cwdi_lock); 1236 } 1237 1238 /* 1239 * Common routine for chroot and chdir. 1240 */ 1241 int 1242 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1243 { 1244 struct nameidata nd; 1245 int error; 1246 1247 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, where, 1248 path); 1249 if ((error = namei(&nd)) != 0) 1250 return (error); 1251 *vpp = nd.ni_vp; 1252 if ((*vpp)->v_type != VDIR) 1253 error = ENOTDIR; 1254 else 1255 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1256 1257 if (error) 1258 vput(*vpp); 1259 else 1260 VOP_UNLOCK(*vpp, 0); 1261 return (error); 1262 } 1263 1264 /* 1265 * Check permissions, allocate an open file structure, 1266 * and call the device open routine if any. 1267 */ 1268 int 1269 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1270 { 1271 /* { 1272 syscallarg(const char *) path; 1273 syscallarg(int) flags; 1274 syscallarg(int) mode; 1275 } */ 1276 struct proc *p = l->l_proc; 1277 struct cwdinfo *cwdi = p->p_cwdi; 1278 file_t *fp; 1279 struct vnode *vp; 1280 int flags, cmode; 1281 int type, indx, error; 1282 struct flock lf; 1283 struct nameidata nd; 1284 1285 flags = FFLAGS(SCARG(uap, flags)); 1286 if ((flags & (FREAD | FWRITE)) == 0) 1287 return (EINVAL); 1288 if ((error = fd_allocfile(&fp, &indx)) != 0) 1289 return (error); 1290 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1291 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1292 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1293 SCARG(uap, path)); 1294 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1295 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1296 fd_abort(p, fp, indx); 1297 if ((error == EDUPFD || error == EMOVEFD) && 1298 l->l_dupfd >= 0 && /* XXX from fdopen */ 1299 (error = 1300 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1301 *retval = indx; 1302 return (0); 1303 } 1304 if (error == ERESTART) 1305 error = EINTR; 1306 return (error); 1307 } 1308 1309 l->l_dupfd = 0; 1310 vp = nd.ni_vp; 1311 fp->f_flag = flags & FMASK; 1312 fp->f_type = DTYPE_VNODE; 1313 fp->f_ops = &vnops; 1314 fp->f_data = vp; 1315 if (flags & (O_EXLOCK | O_SHLOCK)) { 1316 lf.l_whence = SEEK_SET; 1317 lf.l_start = 0; 1318 lf.l_len = 0; 1319 if (flags & O_EXLOCK) 1320 lf.l_type = F_WRLCK; 1321 else 1322 lf.l_type = F_RDLCK; 1323 type = F_FLOCK; 1324 if ((flags & FNONBLOCK) == 0) 1325 type |= F_WAIT; 1326 VOP_UNLOCK(vp, 0); 1327 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1328 if (error) { 1329 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1330 fd_abort(p, fp, indx); 1331 return (error); 1332 } 1333 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1334 atomic_or_uint(&fp->f_flag, FHASLOCK); 1335 } 1336 VOP_UNLOCK(vp, 0); 1337 *retval = indx; 1338 fd_affix(p, fp, indx); 1339 return (0); 1340 } 1341 1342 static void 1343 vfs__fhfree(fhandle_t *fhp) 1344 { 1345 size_t fhsize; 1346 1347 if (fhp == NULL) { 1348 return; 1349 } 1350 fhsize = FHANDLE_SIZE(fhp); 1351 kmem_free(fhp, fhsize); 1352 } 1353 1354 /* 1355 * vfs_composefh: compose a filehandle. 1356 */ 1357 1358 int 1359 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1360 { 1361 struct mount *mp; 1362 struct fid *fidp; 1363 int error; 1364 size_t needfhsize; 1365 size_t fidsize; 1366 1367 mp = vp->v_mount; 1368 fidp = NULL; 1369 if (*fh_size < FHANDLE_SIZE_MIN) { 1370 fidsize = 0; 1371 } else { 1372 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1373 if (fhp != NULL) { 1374 memset(fhp, 0, *fh_size); 1375 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1376 fidp = &fhp->fh_fid; 1377 } 1378 } 1379 error = VFS_VPTOFH(vp, fidp, &fidsize); 1380 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1381 if (error == 0 && *fh_size < needfhsize) { 1382 error = E2BIG; 1383 } 1384 *fh_size = needfhsize; 1385 return error; 1386 } 1387 1388 int 1389 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1390 { 1391 struct mount *mp; 1392 fhandle_t *fhp; 1393 size_t fhsize; 1394 size_t fidsize; 1395 int error; 1396 1397 *fhpp = NULL; 1398 mp = vp->v_mount; 1399 fidsize = 0; 1400 error = VFS_VPTOFH(vp, NULL, &fidsize); 1401 KASSERT(error != 0); 1402 if (error != E2BIG) { 1403 goto out; 1404 } 1405 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1406 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1407 if (fhp == NULL) { 1408 error = ENOMEM; 1409 goto out; 1410 } 1411 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1412 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1413 if (error == 0) { 1414 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1415 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1416 *fhpp = fhp; 1417 } else { 1418 kmem_free(fhp, fhsize); 1419 } 1420 out: 1421 return error; 1422 } 1423 1424 void 1425 vfs_composefh_free(fhandle_t *fhp) 1426 { 1427 1428 vfs__fhfree(fhp); 1429 } 1430 1431 /* 1432 * vfs_fhtovp: lookup a vnode by a filehandle. 1433 */ 1434 1435 int 1436 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1437 { 1438 struct mount *mp; 1439 int error; 1440 1441 *vpp = NULL; 1442 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1443 if (mp == NULL) { 1444 error = ESTALE; 1445 goto out; 1446 } 1447 if (mp->mnt_op->vfs_fhtovp == NULL) { 1448 error = EOPNOTSUPP; 1449 goto out; 1450 } 1451 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1452 out: 1453 return error; 1454 } 1455 1456 /* 1457 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1458 * the needed size. 1459 */ 1460 1461 int 1462 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1463 { 1464 fhandle_t *fhp; 1465 int error; 1466 1467 *fhpp = NULL; 1468 if (fhsize > FHANDLE_SIZE_MAX) { 1469 return EINVAL; 1470 } 1471 if (fhsize < FHANDLE_SIZE_MIN) { 1472 return EINVAL; 1473 } 1474 again: 1475 fhp = kmem_alloc(fhsize, KM_SLEEP); 1476 if (fhp == NULL) { 1477 return ENOMEM; 1478 } 1479 error = copyin(ufhp, fhp, fhsize); 1480 if (error == 0) { 1481 /* XXX this check shouldn't be here */ 1482 if (FHANDLE_SIZE(fhp) == fhsize) { 1483 *fhpp = fhp; 1484 return 0; 1485 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1486 /* 1487 * a kludge for nfsv2 padded handles. 1488 */ 1489 size_t sz; 1490 1491 sz = FHANDLE_SIZE(fhp); 1492 kmem_free(fhp, fhsize); 1493 fhsize = sz; 1494 goto again; 1495 } else { 1496 /* 1497 * userland told us wrong size. 1498 */ 1499 error = EINVAL; 1500 } 1501 } 1502 kmem_free(fhp, fhsize); 1503 return error; 1504 } 1505 1506 void 1507 vfs_copyinfh_free(fhandle_t *fhp) 1508 { 1509 1510 vfs__fhfree(fhp); 1511 } 1512 1513 /* 1514 * Get file handle system call 1515 */ 1516 int 1517 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1518 { 1519 /* { 1520 syscallarg(char *) fname; 1521 syscallarg(fhandle_t *) fhp; 1522 syscallarg(size_t *) fh_size; 1523 } */ 1524 struct vnode *vp; 1525 fhandle_t *fh; 1526 int error; 1527 struct nameidata nd; 1528 size_t sz; 1529 size_t usz; 1530 1531 /* 1532 * Must be super user 1533 */ 1534 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1535 0, NULL, NULL, NULL); 1536 if (error) 1537 return (error); 1538 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1539 SCARG(uap, fname)); 1540 error = namei(&nd); 1541 if (error) 1542 return (error); 1543 vp = nd.ni_vp; 1544 error = vfs_composefh_alloc(vp, &fh); 1545 vput(vp); 1546 if (error != 0) { 1547 goto out; 1548 } 1549 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1550 if (error != 0) { 1551 goto out; 1552 } 1553 sz = FHANDLE_SIZE(fh); 1554 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1555 if (error != 0) { 1556 goto out; 1557 } 1558 if (usz >= sz) { 1559 error = copyout(fh, SCARG(uap, fhp), sz); 1560 } else { 1561 error = E2BIG; 1562 } 1563 out: 1564 vfs_composefh_free(fh); 1565 return (error); 1566 } 1567 1568 /* 1569 * Open a file given a file handle. 1570 * 1571 * Check permissions, allocate an open file structure, 1572 * and call the device open routine if any. 1573 */ 1574 1575 int 1576 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1577 register_t *retval) 1578 { 1579 file_t *fp; 1580 struct vnode *vp = NULL; 1581 kauth_cred_t cred = l->l_cred; 1582 file_t *nfp; 1583 int type, indx, error=0; 1584 struct flock lf; 1585 struct vattr va; 1586 fhandle_t *fh; 1587 int flags; 1588 proc_t *p; 1589 1590 p = curproc; 1591 1592 /* 1593 * Must be super user 1594 */ 1595 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1596 0, NULL, NULL, NULL))) 1597 return (error); 1598 1599 flags = FFLAGS(oflags); 1600 if ((flags & (FREAD | FWRITE)) == 0) 1601 return (EINVAL); 1602 if ((flags & O_CREAT)) 1603 return (EINVAL); 1604 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1605 return (error); 1606 fp = nfp; 1607 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1608 if (error != 0) { 1609 goto bad; 1610 } 1611 error = vfs_fhtovp(fh, &vp); 1612 if (error != 0) { 1613 goto bad; 1614 } 1615 1616 /* Now do an effective vn_open */ 1617 1618 if (vp->v_type == VSOCK) { 1619 error = EOPNOTSUPP; 1620 goto bad; 1621 } 1622 error = vn_openchk(vp, cred, flags); 1623 if (error != 0) 1624 goto bad; 1625 if (flags & O_TRUNC) { 1626 VOP_UNLOCK(vp, 0); /* XXX */ 1627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1628 vattr_null(&va); 1629 va.va_size = 0; 1630 error = VOP_SETATTR(vp, &va, cred); 1631 if (error) 1632 goto bad; 1633 } 1634 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1635 goto bad; 1636 if (flags & FWRITE) { 1637 mutex_enter(&vp->v_interlock); 1638 vp->v_writecount++; 1639 mutex_exit(&vp->v_interlock); 1640 } 1641 1642 /* done with modified vn_open, now finish what sys_open does. */ 1643 1644 fp->f_flag = flags & FMASK; 1645 fp->f_type = DTYPE_VNODE; 1646 fp->f_ops = &vnops; 1647 fp->f_data = vp; 1648 if (flags & (O_EXLOCK | O_SHLOCK)) { 1649 lf.l_whence = SEEK_SET; 1650 lf.l_start = 0; 1651 lf.l_len = 0; 1652 if (flags & O_EXLOCK) 1653 lf.l_type = F_WRLCK; 1654 else 1655 lf.l_type = F_RDLCK; 1656 type = F_FLOCK; 1657 if ((flags & FNONBLOCK) == 0) 1658 type |= F_WAIT; 1659 VOP_UNLOCK(vp, 0); 1660 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1661 if (error) { 1662 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1663 fd_abort(p, fp, indx); 1664 return (error); 1665 } 1666 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1667 atomic_or_uint(&fp->f_flag, FHASLOCK); 1668 } 1669 VOP_UNLOCK(vp, 0); 1670 *retval = indx; 1671 fd_affix(p, fp, indx); 1672 vfs_copyinfh_free(fh); 1673 return (0); 1674 1675 bad: 1676 fd_abort(p, fp, indx); 1677 if (vp != NULL) 1678 vput(vp); 1679 vfs_copyinfh_free(fh); 1680 return (error); 1681 } 1682 1683 int 1684 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1685 { 1686 /* { 1687 syscallarg(const void *) fhp; 1688 syscallarg(size_t) fh_size; 1689 syscallarg(int) flags; 1690 } */ 1691 1692 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1693 SCARG(uap, flags), retval); 1694 } 1695 1696 int 1697 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1698 { 1699 int error; 1700 fhandle_t *fh; 1701 struct vnode *vp; 1702 1703 /* 1704 * Must be super user 1705 */ 1706 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1707 0, NULL, NULL, NULL))) 1708 return (error); 1709 1710 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1711 if (error != 0) 1712 return error; 1713 1714 error = vfs_fhtovp(fh, &vp); 1715 vfs_copyinfh_free(fh); 1716 if (error != 0) 1717 return error; 1718 1719 error = vn_stat(vp, sb); 1720 vput(vp); 1721 return error; 1722 } 1723 1724 1725 /* ARGSUSED */ 1726 int 1727 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1728 { 1729 /* { 1730 syscallarg(const void *) fhp; 1731 syscallarg(size_t) fh_size; 1732 syscallarg(struct stat *) sb; 1733 } */ 1734 struct stat sb; 1735 int error; 1736 1737 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1738 if (error) 1739 return error; 1740 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1741 } 1742 1743 int 1744 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1745 int flags) 1746 { 1747 fhandle_t *fh; 1748 struct mount *mp; 1749 struct vnode *vp; 1750 int error; 1751 1752 /* 1753 * Must be super user 1754 */ 1755 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1756 0, NULL, NULL, NULL))) 1757 return error; 1758 1759 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1760 if (error != 0) 1761 return error; 1762 1763 error = vfs_fhtovp(fh, &vp); 1764 vfs_copyinfh_free(fh); 1765 if (error != 0) 1766 return error; 1767 1768 mp = vp->v_mount; 1769 error = dostatvfs(mp, sb, l, flags, 1); 1770 vput(vp); 1771 return error; 1772 } 1773 1774 /* ARGSUSED */ 1775 int 1776 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1777 { 1778 /* { 1779 syscallarg(const void *) fhp; 1780 syscallarg(size_t) fh_size; 1781 syscallarg(struct statvfs *) buf; 1782 syscallarg(int) flags; 1783 } */ 1784 struct statvfs *sb = STATVFSBUF_GET(); 1785 int error; 1786 1787 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1788 SCARG(uap, flags)); 1789 if (error == 0) 1790 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1791 STATVFSBUF_PUT(sb); 1792 return error; 1793 } 1794 1795 /* 1796 * Create a special file. 1797 */ 1798 /* ARGSUSED */ 1799 int 1800 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1801 register_t *retval) 1802 { 1803 /* { 1804 syscallarg(const char *) path; 1805 syscallarg(mode_t) mode; 1806 syscallarg(dev_t) dev; 1807 } */ 1808 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1809 SCARG(uap, dev), retval, UIO_USERSPACE); 1810 } 1811 1812 int 1813 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1814 register_t *retval, enum uio_seg seg) 1815 { 1816 struct proc *p = l->l_proc; 1817 struct vnode *vp; 1818 struct vattr vattr; 1819 int error, optype; 1820 struct nameidata nd; 1821 char *path; 1822 const char *cpath; 1823 1824 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1825 0, NULL, NULL, NULL)) != 0) 1826 return (error); 1827 1828 optype = VOP_MKNOD_DESCOFFSET; 1829 1830 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1831 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1832 1833 if ((error = namei(&nd)) != 0) 1834 goto out; 1835 vp = nd.ni_vp; 1836 if (vp != NULL) 1837 error = EEXIST; 1838 else { 1839 vattr_null(&vattr); 1840 /* We will read cwdi->cwdi_cmask unlocked. */ 1841 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1842 vattr.va_rdev = dev; 1843 1844 switch (mode & S_IFMT) { 1845 case S_IFMT: /* used by badsect to flag bad sectors */ 1846 vattr.va_type = VBAD; 1847 break; 1848 case S_IFCHR: 1849 vattr.va_type = VCHR; 1850 break; 1851 case S_IFBLK: 1852 vattr.va_type = VBLK; 1853 break; 1854 case S_IFWHT: 1855 optype = VOP_WHITEOUT_DESCOFFSET; 1856 break; 1857 case S_IFREG: 1858 #if NVERIEXEC > 0 1859 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1860 O_CREAT); 1861 #endif /* NVERIEXEC > 0 */ 1862 vattr.va_type = VREG; 1863 vattr.va_rdev = VNOVAL; 1864 optype = VOP_CREATE_DESCOFFSET; 1865 break; 1866 default: 1867 error = EINVAL; 1868 break; 1869 } 1870 } 1871 if (!error) { 1872 switch (optype) { 1873 case VOP_WHITEOUT_DESCOFFSET: 1874 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1875 if (error) 1876 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1877 vput(nd.ni_dvp); 1878 break; 1879 1880 case VOP_MKNOD_DESCOFFSET: 1881 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1882 &nd.ni_cnd, &vattr); 1883 if (error == 0) 1884 vput(nd.ni_vp); 1885 break; 1886 1887 case VOP_CREATE_DESCOFFSET: 1888 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1889 &nd.ni_cnd, &vattr); 1890 if (error == 0) 1891 vput(nd.ni_vp); 1892 break; 1893 } 1894 } else { 1895 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1896 if (nd.ni_dvp == vp) 1897 vrele(nd.ni_dvp); 1898 else 1899 vput(nd.ni_dvp); 1900 if (vp) 1901 vrele(vp); 1902 } 1903 out: 1904 VERIEXEC_PATH_PUT(path); 1905 return (error); 1906 } 1907 1908 /* 1909 * Create a named pipe. 1910 */ 1911 /* ARGSUSED */ 1912 int 1913 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1914 { 1915 /* { 1916 syscallarg(const char *) path; 1917 syscallarg(int) mode; 1918 } */ 1919 struct proc *p = l->l_proc; 1920 struct vattr vattr; 1921 int error; 1922 struct nameidata nd; 1923 1924 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1925 SCARG(uap, path)); 1926 if ((error = namei(&nd)) != 0) 1927 return (error); 1928 if (nd.ni_vp != NULL) { 1929 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1930 if (nd.ni_dvp == nd.ni_vp) 1931 vrele(nd.ni_dvp); 1932 else 1933 vput(nd.ni_dvp); 1934 vrele(nd.ni_vp); 1935 return (EEXIST); 1936 } 1937 vattr_null(&vattr); 1938 vattr.va_type = VFIFO; 1939 /* We will read cwdi->cwdi_cmask unlocked. */ 1940 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1941 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1942 if (error == 0) 1943 vput(nd.ni_vp); 1944 return (error); 1945 } 1946 1947 /* 1948 * Make a hard file link. 1949 */ 1950 /* ARGSUSED */ 1951 int 1952 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1953 { 1954 /* { 1955 syscallarg(const char *) path; 1956 syscallarg(const char *) link; 1957 } */ 1958 struct vnode *vp; 1959 struct nameidata nd; 1960 int error; 1961 1962 error = namei_simple_user(SCARG(uap, path), 1963 NSM_FOLLOW_TRYEMULROOT, &vp); 1964 if (error != 0) 1965 return (error); 1966 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1967 SCARG(uap, link)); 1968 if ((error = namei(&nd)) != 0) 1969 goto out; 1970 if (nd.ni_vp) { 1971 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1972 if (nd.ni_dvp == nd.ni_vp) 1973 vrele(nd.ni_dvp); 1974 else 1975 vput(nd.ni_dvp); 1976 vrele(nd.ni_vp); 1977 error = EEXIST; 1978 goto out; 1979 } 1980 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1981 out: 1982 vrele(vp); 1983 return (error); 1984 } 1985 1986 /* 1987 * Make a symbolic link. 1988 */ 1989 /* ARGSUSED */ 1990 int 1991 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 1992 { 1993 /* { 1994 syscallarg(const char *) path; 1995 syscallarg(const char *) link; 1996 } */ 1997 struct proc *p = l->l_proc; 1998 struct vattr vattr; 1999 char *path; 2000 int error; 2001 struct nameidata nd; 2002 2003 path = PNBUF_GET(); 2004 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2005 if (error) 2006 goto out; 2007 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2008 SCARG(uap, link)); 2009 if ((error = namei(&nd)) != 0) 2010 goto out; 2011 if (nd.ni_vp) { 2012 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2013 if (nd.ni_dvp == nd.ni_vp) 2014 vrele(nd.ni_dvp); 2015 else 2016 vput(nd.ni_dvp); 2017 vrele(nd.ni_vp); 2018 error = EEXIST; 2019 goto out; 2020 } 2021 vattr_null(&vattr); 2022 vattr.va_type = VLNK; 2023 /* We will read cwdi->cwdi_cmask unlocked. */ 2024 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2025 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2026 if (error == 0) 2027 vput(nd.ni_vp); 2028 out: 2029 PNBUF_PUT(path); 2030 return (error); 2031 } 2032 2033 /* 2034 * Delete a whiteout from the filesystem. 2035 */ 2036 /* ARGSUSED */ 2037 int 2038 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2039 { 2040 /* { 2041 syscallarg(const char *) path; 2042 } */ 2043 int error; 2044 struct nameidata nd; 2045 2046 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2047 UIO_USERSPACE, SCARG(uap, path)); 2048 error = namei(&nd); 2049 if (error) 2050 return (error); 2051 2052 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2053 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2054 if (nd.ni_dvp == nd.ni_vp) 2055 vrele(nd.ni_dvp); 2056 else 2057 vput(nd.ni_dvp); 2058 if (nd.ni_vp) 2059 vrele(nd.ni_vp); 2060 return (EEXIST); 2061 } 2062 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2063 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2064 vput(nd.ni_dvp); 2065 return (error); 2066 } 2067 2068 /* 2069 * Delete a name from the filesystem. 2070 */ 2071 /* ARGSUSED */ 2072 int 2073 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2074 { 2075 /* { 2076 syscallarg(const char *) path; 2077 } */ 2078 2079 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2080 } 2081 2082 int 2083 do_sys_unlink(const char *arg, enum uio_seg seg) 2084 { 2085 struct vnode *vp; 2086 int error; 2087 struct nameidata nd; 2088 char *path; 2089 const char *cpath; 2090 2091 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2092 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2093 2094 if ((error = namei(&nd)) != 0) 2095 goto out; 2096 vp = nd.ni_vp; 2097 2098 /* 2099 * The root of a mounted filesystem cannot be deleted. 2100 */ 2101 if (vp->v_vflag & VV_ROOT) { 2102 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2103 if (nd.ni_dvp == vp) 2104 vrele(nd.ni_dvp); 2105 else 2106 vput(nd.ni_dvp); 2107 vput(vp); 2108 error = EBUSY; 2109 goto out; 2110 } 2111 2112 #if NVERIEXEC > 0 2113 /* Handle remove requests for veriexec entries. */ 2114 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2115 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2116 if (nd.ni_dvp == vp) 2117 vrele(nd.ni_dvp); 2118 else 2119 vput(nd.ni_dvp); 2120 vput(vp); 2121 goto out; 2122 } 2123 #endif /* NVERIEXEC > 0 */ 2124 2125 #ifdef FILEASSOC 2126 (void)fileassoc_file_delete(vp); 2127 #endif /* FILEASSOC */ 2128 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2129 out: 2130 VERIEXEC_PATH_PUT(path); 2131 return (error); 2132 } 2133 2134 /* 2135 * Reposition read/write file offset. 2136 */ 2137 int 2138 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2139 { 2140 /* { 2141 syscallarg(int) fd; 2142 syscallarg(int) pad; 2143 syscallarg(off_t) offset; 2144 syscallarg(int) whence; 2145 } */ 2146 kauth_cred_t cred = l->l_cred; 2147 file_t *fp; 2148 struct vnode *vp; 2149 struct vattr vattr; 2150 off_t newoff; 2151 int error, fd; 2152 2153 fd = SCARG(uap, fd); 2154 2155 if ((fp = fd_getfile(fd)) == NULL) 2156 return (EBADF); 2157 2158 vp = fp->f_data; 2159 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2160 error = ESPIPE; 2161 goto out; 2162 } 2163 2164 switch (SCARG(uap, whence)) { 2165 case SEEK_CUR: 2166 newoff = fp->f_offset + SCARG(uap, offset); 2167 break; 2168 case SEEK_END: 2169 error = VOP_GETATTR(vp, &vattr, cred); 2170 if (error) { 2171 goto out; 2172 } 2173 newoff = SCARG(uap, offset) + vattr.va_size; 2174 break; 2175 case SEEK_SET: 2176 newoff = SCARG(uap, offset); 2177 break; 2178 default: 2179 error = EINVAL; 2180 goto out; 2181 } 2182 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2183 *(off_t *)retval = fp->f_offset = newoff; 2184 } 2185 out: 2186 fd_putfile(fd); 2187 return (error); 2188 } 2189 2190 /* 2191 * Positional read system call. 2192 */ 2193 int 2194 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2195 { 2196 /* { 2197 syscallarg(int) fd; 2198 syscallarg(void *) buf; 2199 syscallarg(size_t) nbyte; 2200 syscallarg(off_t) offset; 2201 } */ 2202 file_t *fp; 2203 struct vnode *vp; 2204 off_t offset; 2205 int error, fd = SCARG(uap, fd); 2206 2207 if ((fp = fd_getfile(fd)) == NULL) 2208 return (EBADF); 2209 2210 if ((fp->f_flag & FREAD) == 0) { 2211 fd_putfile(fd); 2212 return (EBADF); 2213 } 2214 2215 vp = fp->f_data; 2216 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2217 error = ESPIPE; 2218 goto out; 2219 } 2220 2221 offset = SCARG(uap, offset); 2222 2223 /* 2224 * XXX This works because no file systems actually 2225 * XXX take any action on the seek operation. 2226 */ 2227 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2228 goto out; 2229 2230 /* dofileread() will unuse the descriptor for us */ 2231 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2232 &offset, 0, retval)); 2233 2234 out: 2235 fd_putfile(fd); 2236 return (error); 2237 } 2238 2239 /* 2240 * Positional scatter read system call. 2241 */ 2242 int 2243 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2244 { 2245 /* { 2246 syscallarg(int) fd; 2247 syscallarg(const struct iovec *) iovp; 2248 syscallarg(int) iovcnt; 2249 syscallarg(off_t) offset; 2250 } */ 2251 off_t offset = SCARG(uap, offset); 2252 2253 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2254 SCARG(uap, iovcnt), &offset, 0, retval); 2255 } 2256 2257 /* 2258 * Positional write system call. 2259 */ 2260 int 2261 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2262 { 2263 /* { 2264 syscallarg(int) fd; 2265 syscallarg(const void *) buf; 2266 syscallarg(size_t) nbyte; 2267 syscallarg(off_t) offset; 2268 } */ 2269 file_t *fp; 2270 struct vnode *vp; 2271 off_t offset; 2272 int error, fd = SCARG(uap, fd); 2273 2274 if ((fp = fd_getfile(fd)) == NULL) 2275 return (EBADF); 2276 2277 if ((fp->f_flag & FWRITE) == 0) { 2278 fd_putfile(fd); 2279 return (EBADF); 2280 } 2281 2282 vp = fp->f_data; 2283 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2284 error = ESPIPE; 2285 goto out; 2286 } 2287 2288 offset = SCARG(uap, offset); 2289 2290 /* 2291 * XXX This works because no file systems actually 2292 * XXX take any action on the seek operation. 2293 */ 2294 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2295 goto out; 2296 2297 /* dofilewrite() will unuse the descriptor for us */ 2298 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2299 &offset, 0, retval)); 2300 2301 out: 2302 fd_putfile(fd); 2303 return (error); 2304 } 2305 2306 /* 2307 * Positional gather write system call. 2308 */ 2309 int 2310 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2311 { 2312 /* { 2313 syscallarg(int) fd; 2314 syscallarg(const struct iovec *) iovp; 2315 syscallarg(int) iovcnt; 2316 syscallarg(off_t) offset; 2317 } */ 2318 off_t offset = SCARG(uap, offset); 2319 2320 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2321 SCARG(uap, iovcnt), &offset, 0, retval); 2322 } 2323 2324 /* 2325 * Check access permissions. 2326 */ 2327 int 2328 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2329 { 2330 /* { 2331 syscallarg(const char *) path; 2332 syscallarg(int) flags; 2333 } */ 2334 kauth_cred_t cred; 2335 struct vnode *vp; 2336 int error, flags; 2337 struct nameidata nd; 2338 2339 cred = kauth_cred_dup(l->l_cred); 2340 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2341 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2342 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2343 SCARG(uap, path)); 2344 /* Override default credentials */ 2345 nd.ni_cnd.cn_cred = cred; 2346 if ((error = namei(&nd)) != 0) 2347 goto out; 2348 vp = nd.ni_vp; 2349 2350 /* Flags == 0 means only check for existence. */ 2351 if (SCARG(uap, flags)) { 2352 flags = 0; 2353 if (SCARG(uap, flags) & R_OK) 2354 flags |= VREAD; 2355 if (SCARG(uap, flags) & W_OK) 2356 flags |= VWRITE; 2357 if (SCARG(uap, flags) & X_OK) 2358 flags |= VEXEC; 2359 2360 error = VOP_ACCESS(vp, flags, cred); 2361 if (!error && (flags & VWRITE)) 2362 error = vn_writechk(vp); 2363 } 2364 vput(vp); 2365 out: 2366 kauth_cred_free(cred); 2367 return (error); 2368 } 2369 2370 /* 2371 * Common code for all sys_stat functions, including compat versions. 2372 */ 2373 int 2374 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2375 { 2376 int error; 2377 struct nameidata nd; 2378 2379 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2380 UIO_USERSPACE, path); 2381 error = namei(&nd); 2382 if (error != 0) 2383 return error; 2384 error = vn_stat(nd.ni_vp, sb); 2385 vput(nd.ni_vp); 2386 return error; 2387 } 2388 2389 /* 2390 * Get file status; this version follows links. 2391 */ 2392 /* ARGSUSED */ 2393 int 2394 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2395 { 2396 /* { 2397 syscallarg(const char *) path; 2398 syscallarg(struct stat *) ub; 2399 } */ 2400 struct stat sb; 2401 int error; 2402 2403 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2404 if (error) 2405 return error; 2406 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2407 } 2408 2409 /* 2410 * Get file status; this version does not follow links. 2411 */ 2412 /* ARGSUSED */ 2413 int 2414 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2415 { 2416 /* { 2417 syscallarg(const char *) path; 2418 syscallarg(struct stat *) ub; 2419 } */ 2420 struct stat sb; 2421 int error; 2422 2423 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2424 if (error) 2425 return error; 2426 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2427 } 2428 2429 /* 2430 * Get configurable pathname variables. 2431 */ 2432 /* ARGSUSED */ 2433 int 2434 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2435 { 2436 /* { 2437 syscallarg(const char *) path; 2438 syscallarg(int) name; 2439 } */ 2440 int error; 2441 struct nameidata nd; 2442 2443 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2444 SCARG(uap, path)); 2445 if ((error = namei(&nd)) != 0) 2446 return (error); 2447 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2448 vput(nd.ni_vp); 2449 return (error); 2450 } 2451 2452 /* 2453 * Return target name of a symbolic link. 2454 */ 2455 /* ARGSUSED */ 2456 int 2457 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2458 { 2459 /* { 2460 syscallarg(const char *) path; 2461 syscallarg(char *) buf; 2462 syscallarg(size_t) count; 2463 } */ 2464 struct vnode *vp; 2465 struct iovec aiov; 2466 struct uio auio; 2467 int error; 2468 struct nameidata nd; 2469 2470 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2471 SCARG(uap, path)); 2472 if ((error = namei(&nd)) != 0) 2473 return (error); 2474 vp = nd.ni_vp; 2475 if (vp->v_type != VLNK) 2476 error = EINVAL; 2477 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2478 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2479 aiov.iov_base = SCARG(uap, buf); 2480 aiov.iov_len = SCARG(uap, count); 2481 auio.uio_iov = &aiov; 2482 auio.uio_iovcnt = 1; 2483 auio.uio_offset = 0; 2484 auio.uio_rw = UIO_READ; 2485 KASSERT(l == curlwp); 2486 auio.uio_vmspace = l->l_proc->p_vmspace; 2487 auio.uio_resid = SCARG(uap, count); 2488 error = VOP_READLINK(vp, &auio, l->l_cred); 2489 } 2490 vput(vp); 2491 *retval = SCARG(uap, count) - auio.uio_resid; 2492 return (error); 2493 } 2494 2495 /* 2496 * Change flags of a file given a path name. 2497 */ 2498 /* ARGSUSED */ 2499 int 2500 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2501 { 2502 /* { 2503 syscallarg(const char *) path; 2504 syscallarg(u_long) flags; 2505 } */ 2506 struct vnode *vp; 2507 int error; 2508 2509 error = namei_simple_user(SCARG(uap, path), 2510 NSM_FOLLOW_TRYEMULROOT, &vp); 2511 if (error != 0) 2512 return (error); 2513 error = change_flags(vp, SCARG(uap, flags), l); 2514 vput(vp); 2515 return (error); 2516 } 2517 2518 /* 2519 * Change flags of a file given a file descriptor. 2520 */ 2521 /* ARGSUSED */ 2522 int 2523 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2524 { 2525 /* { 2526 syscallarg(int) fd; 2527 syscallarg(u_long) flags; 2528 } */ 2529 struct vnode *vp; 2530 file_t *fp; 2531 int error; 2532 2533 /* fd_getvnode() will use the descriptor for us */ 2534 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2535 return (error); 2536 vp = fp->f_data; 2537 error = change_flags(vp, SCARG(uap, flags), l); 2538 VOP_UNLOCK(vp, 0); 2539 fd_putfile(SCARG(uap, fd)); 2540 return (error); 2541 } 2542 2543 /* 2544 * Change flags of a file given a path name; this version does 2545 * not follow links. 2546 */ 2547 int 2548 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2549 { 2550 /* { 2551 syscallarg(const char *) path; 2552 syscallarg(u_long) flags; 2553 } */ 2554 struct vnode *vp; 2555 int error; 2556 2557 error = namei_simple_user(SCARG(uap, path), 2558 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2559 if (error != 0) 2560 return (error); 2561 error = change_flags(vp, SCARG(uap, flags), l); 2562 vput(vp); 2563 return (error); 2564 } 2565 2566 /* 2567 * Common routine to change flags of a file. 2568 */ 2569 int 2570 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2571 { 2572 struct vattr vattr; 2573 int error; 2574 2575 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2576 /* 2577 * Non-superusers cannot change the flags on devices, even if they 2578 * own them. 2579 */ 2580 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2581 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2582 goto out; 2583 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2584 error = EINVAL; 2585 goto out; 2586 } 2587 } 2588 vattr_null(&vattr); 2589 vattr.va_flags = flags; 2590 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2591 out: 2592 return (error); 2593 } 2594 2595 /* 2596 * Change mode of a file given path name; this version follows links. 2597 */ 2598 /* ARGSUSED */ 2599 int 2600 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2601 { 2602 /* { 2603 syscallarg(const char *) path; 2604 syscallarg(int) mode; 2605 } */ 2606 int error; 2607 struct vnode *vp; 2608 2609 error = namei_simple_user(SCARG(uap, path), 2610 NSM_FOLLOW_TRYEMULROOT, &vp); 2611 if (error != 0) 2612 return (error); 2613 2614 error = change_mode(vp, SCARG(uap, mode), l); 2615 2616 vrele(vp); 2617 return (error); 2618 } 2619 2620 /* 2621 * Change mode of a file given a file descriptor. 2622 */ 2623 /* ARGSUSED */ 2624 int 2625 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2626 { 2627 /* { 2628 syscallarg(int) fd; 2629 syscallarg(int) mode; 2630 } */ 2631 file_t *fp; 2632 int error; 2633 2634 /* fd_getvnode() will use the descriptor for us */ 2635 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2636 return (error); 2637 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2638 fd_putfile(SCARG(uap, fd)); 2639 return (error); 2640 } 2641 2642 /* 2643 * Change mode of a file given path name; this version does not follow links. 2644 */ 2645 /* ARGSUSED */ 2646 int 2647 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2648 { 2649 /* { 2650 syscallarg(const char *) path; 2651 syscallarg(int) mode; 2652 } */ 2653 int error; 2654 struct vnode *vp; 2655 2656 error = namei_simple_user(SCARG(uap, path), 2657 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2658 if (error != 0) 2659 return (error); 2660 2661 error = change_mode(vp, SCARG(uap, mode), l); 2662 2663 vrele(vp); 2664 return (error); 2665 } 2666 2667 /* 2668 * Common routine to set mode given a vnode. 2669 */ 2670 static int 2671 change_mode(struct vnode *vp, int mode, struct lwp *l) 2672 { 2673 struct vattr vattr; 2674 int error; 2675 2676 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2677 vattr_null(&vattr); 2678 vattr.va_mode = mode & ALLPERMS; 2679 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2680 VOP_UNLOCK(vp, 0); 2681 return (error); 2682 } 2683 2684 /* 2685 * Set ownership given a path name; this version follows links. 2686 */ 2687 /* ARGSUSED */ 2688 int 2689 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2690 { 2691 /* { 2692 syscallarg(const char *) path; 2693 syscallarg(uid_t) uid; 2694 syscallarg(gid_t) gid; 2695 } */ 2696 int error; 2697 struct vnode *vp; 2698 2699 error = namei_simple_user(SCARG(uap, path), 2700 NSM_FOLLOW_TRYEMULROOT, &vp); 2701 if (error != 0) 2702 return (error); 2703 2704 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2705 2706 vrele(vp); 2707 return (error); 2708 } 2709 2710 /* 2711 * Set ownership given a path name; this version follows links. 2712 * Provides POSIX semantics. 2713 */ 2714 /* ARGSUSED */ 2715 int 2716 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2717 { 2718 /* { 2719 syscallarg(const char *) path; 2720 syscallarg(uid_t) uid; 2721 syscallarg(gid_t) gid; 2722 } */ 2723 int error; 2724 struct vnode *vp; 2725 2726 error = namei_simple_user(SCARG(uap, path), 2727 NSM_FOLLOW_TRYEMULROOT, &vp); 2728 if (error != 0) 2729 return (error); 2730 2731 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2732 2733 vrele(vp); 2734 return (error); 2735 } 2736 2737 /* 2738 * Set ownership given a file descriptor. 2739 */ 2740 /* ARGSUSED */ 2741 int 2742 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2743 { 2744 /* { 2745 syscallarg(int) fd; 2746 syscallarg(uid_t) uid; 2747 syscallarg(gid_t) gid; 2748 } */ 2749 int error; 2750 file_t *fp; 2751 2752 /* fd_getvnode() will use the descriptor for us */ 2753 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2754 return (error); 2755 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2756 l, 0); 2757 fd_putfile(SCARG(uap, fd)); 2758 return (error); 2759 } 2760 2761 /* 2762 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2763 */ 2764 /* ARGSUSED */ 2765 int 2766 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2767 { 2768 /* { 2769 syscallarg(int) fd; 2770 syscallarg(uid_t) uid; 2771 syscallarg(gid_t) gid; 2772 } */ 2773 int error; 2774 file_t *fp; 2775 2776 /* fd_getvnode() will use the descriptor for us */ 2777 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2778 return (error); 2779 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2780 l, 1); 2781 fd_putfile(SCARG(uap, fd)); 2782 return (error); 2783 } 2784 2785 /* 2786 * Set ownership given a path name; this version does not follow links. 2787 */ 2788 /* ARGSUSED */ 2789 int 2790 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2791 { 2792 /* { 2793 syscallarg(const char *) path; 2794 syscallarg(uid_t) uid; 2795 syscallarg(gid_t) gid; 2796 } */ 2797 int error; 2798 struct vnode *vp; 2799 2800 error = namei_simple_user(SCARG(uap, path), 2801 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2802 if (error != 0) 2803 return (error); 2804 2805 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2806 2807 vrele(vp); 2808 return (error); 2809 } 2810 2811 /* 2812 * Set ownership given a path name; this version does not follow links. 2813 * Provides POSIX/XPG semantics. 2814 */ 2815 /* ARGSUSED */ 2816 int 2817 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2818 { 2819 /* { 2820 syscallarg(const char *) path; 2821 syscallarg(uid_t) uid; 2822 syscallarg(gid_t) gid; 2823 } */ 2824 int error; 2825 struct vnode *vp; 2826 2827 error = namei_simple_user(SCARG(uap, path), 2828 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2829 if (error != 0) 2830 return (error); 2831 2832 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2833 2834 vrele(vp); 2835 return (error); 2836 } 2837 2838 /* 2839 * Common routine to set ownership given a vnode. 2840 */ 2841 static int 2842 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2843 int posix_semantics) 2844 { 2845 struct vattr vattr; 2846 mode_t newmode; 2847 int error; 2848 2849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2850 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2851 goto out; 2852 2853 #define CHANGED(x) ((int)(x) != -1) 2854 newmode = vattr.va_mode; 2855 if (posix_semantics) { 2856 /* 2857 * POSIX/XPG semantics: if the caller is not the super-user, 2858 * clear set-user-id and set-group-id bits. Both POSIX and 2859 * the XPG consider the behaviour for calls by the super-user 2860 * implementation-defined; we leave the set-user-id and set- 2861 * group-id settings intact in that case. 2862 */ 2863 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2864 NULL) != 0) 2865 newmode &= ~(S_ISUID | S_ISGID); 2866 } else { 2867 /* 2868 * NetBSD semantics: when changing owner and/or group, 2869 * clear the respective bit(s). 2870 */ 2871 if (CHANGED(uid)) 2872 newmode &= ~S_ISUID; 2873 if (CHANGED(gid)) 2874 newmode &= ~S_ISGID; 2875 } 2876 /* Update va_mode iff altered. */ 2877 if (vattr.va_mode == newmode) 2878 newmode = VNOVAL; 2879 2880 vattr_null(&vattr); 2881 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2882 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2883 vattr.va_mode = newmode; 2884 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2885 #undef CHANGED 2886 2887 out: 2888 VOP_UNLOCK(vp, 0); 2889 return (error); 2890 } 2891 2892 /* 2893 * Set the access and modification times given a path name; this 2894 * version follows links. 2895 */ 2896 /* ARGSUSED */ 2897 int 2898 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2899 register_t *retval) 2900 { 2901 /* { 2902 syscallarg(const char *) path; 2903 syscallarg(const struct timeval *) tptr; 2904 } */ 2905 2906 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2907 SCARG(uap, tptr), UIO_USERSPACE); 2908 } 2909 2910 /* 2911 * Set the access and modification times given a file descriptor. 2912 */ 2913 /* ARGSUSED */ 2914 int 2915 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2916 register_t *retval) 2917 { 2918 /* { 2919 syscallarg(int) fd; 2920 syscallarg(const struct timeval *) tptr; 2921 } */ 2922 int error; 2923 file_t *fp; 2924 2925 /* fd_getvnode() will use the descriptor for us */ 2926 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2927 return (error); 2928 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2929 UIO_USERSPACE); 2930 fd_putfile(SCARG(uap, fd)); 2931 return (error); 2932 } 2933 2934 /* 2935 * Set the access and modification times given a path name; this 2936 * version does not follow links. 2937 */ 2938 int 2939 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2940 register_t *retval) 2941 { 2942 /* { 2943 syscallarg(const char *) path; 2944 syscallarg(const struct timeval *) tptr; 2945 } */ 2946 2947 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2948 SCARG(uap, tptr), UIO_USERSPACE); 2949 } 2950 2951 /* 2952 * Common routine to set access and modification times given a vnode. 2953 */ 2954 int 2955 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2956 const struct timeval *tptr, enum uio_seg seg) 2957 { 2958 struct vattr vattr; 2959 int error, dorele = 0; 2960 namei_simple_flags_t sflags; 2961 2962 bool vanull, setbirthtime; 2963 struct timespec ts[2]; 2964 2965 /* 2966 * I have checked all callers and they pass either FOLLOW, 2967 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 2968 * is 0. More to the point, they don't pass anything else. 2969 * Let's keep it that way at least until the namei interfaces 2970 * are fully sanitized. 2971 */ 2972 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 2973 sflags = (flag == FOLLOW) ? 2974 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 2975 2976 if (tptr == NULL) { 2977 vanull = true; 2978 nanotime(&ts[0]); 2979 ts[1] = ts[0]; 2980 } else { 2981 struct timeval tv[2]; 2982 2983 vanull = false; 2984 if (seg != UIO_SYSSPACE) { 2985 error = copyin(tptr, tv, sizeof (tv)); 2986 if (error != 0) 2987 return error; 2988 tptr = tv; 2989 } 2990 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 2991 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 2992 } 2993 2994 if (vp == NULL) { 2995 /* note: SEG describes TPTR, not PATH; PATH is always user */ 2996 error = namei_simple_user(path, sflags, &vp); 2997 if (error != 0) 2998 return error; 2999 dorele = 1; 3000 } 3001 3002 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3003 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3004 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3005 vattr_null(&vattr); 3006 vattr.va_atime = ts[0]; 3007 vattr.va_mtime = ts[1]; 3008 if (setbirthtime) 3009 vattr.va_birthtime = ts[1]; 3010 if (vanull) 3011 vattr.va_vaflags |= VA_UTIMES_NULL; 3012 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3013 VOP_UNLOCK(vp, 0); 3014 3015 if (dorele != 0) 3016 vrele(vp); 3017 3018 return error; 3019 } 3020 3021 /* 3022 * Truncate a file given its path name. 3023 */ 3024 /* ARGSUSED */ 3025 int 3026 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3027 { 3028 /* { 3029 syscallarg(const char *) path; 3030 syscallarg(int) pad; 3031 syscallarg(off_t) length; 3032 } */ 3033 struct vnode *vp; 3034 struct vattr vattr; 3035 int error; 3036 3037 error = namei_simple_user(SCARG(uap, path), 3038 NSM_FOLLOW_TRYEMULROOT, &vp); 3039 if (error != 0) 3040 return (error); 3041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3042 if (vp->v_type == VDIR) 3043 error = EISDIR; 3044 else if ((error = vn_writechk(vp)) == 0 && 3045 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3046 vattr_null(&vattr); 3047 vattr.va_size = SCARG(uap, length); 3048 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3049 } 3050 vput(vp); 3051 return (error); 3052 } 3053 3054 /* 3055 * Truncate a file given a file descriptor. 3056 */ 3057 /* ARGSUSED */ 3058 int 3059 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3060 { 3061 /* { 3062 syscallarg(int) fd; 3063 syscallarg(int) pad; 3064 syscallarg(off_t) length; 3065 } */ 3066 struct vattr vattr; 3067 struct vnode *vp; 3068 file_t *fp; 3069 int error; 3070 3071 /* fd_getvnode() will use the descriptor for us */ 3072 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3073 return (error); 3074 if ((fp->f_flag & FWRITE) == 0) { 3075 error = EINVAL; 3076 goto out; 3077 } 3078 vp = fp->f_data; 3079 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3080 if (vp->v_type == VDIR) 3081 error = EISDIR; 3082 else if ((error = vn_writechk(vp)) == 0) { 3083 vattr_null(&vattr); 3084 vattr.va_size = SCARG(uap, length); 3085 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3086 } 3087 VOP_UNLOCK(vp, 0); 3088 out: 3089 fd_putfile(SCARG(uap, fd)); 3090 return (error); 3091 } 3092 3093 /* 3094 * Sync an open file. 3095 */ 3096 /* ARGSUSED */ 3097 int 3098 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3099 { 3100 /* { 3101 syscallarg(int) fd; 3102 } */ 3103 struct vnode *vp; 3104 file_t *fp; 3105 int error; 3106 3107 /* fd_getvnode() will use the descriptor for us */ 3108 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3109 return (error); 3110 vp = fp->f_data; 3111 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3112 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3113 VOP_UNLOCK(vp, 0); 3114 fd_putfile(SCARG(uap, fd)); 3115 return (error); 3116 } 3117 3118 /* 3119 * Sync a range of file data. API modeled after that found in AIX. 3120 * 3121 * FDATASYNC indicates that we need only save enough metadata to be able 3122 * to re-read the written data. Note we duplicate AIX's requirement that 3123 * the file be open for writing. 3124 */ 3125 /* ARGSUSED */ 3126 int 3127 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3128 { 3129 /* { 3130 syscallarg(int) fd; 3131 syscallarg(int) flags; 3132 syscallarg(off_t) start; 3133 syscallarg(off_t) length; 3134 } */ 3135 struct vnode *vp; 3136 file_t *fp; 3137 int flags, nflags; 3138 off_t s, e, len; 3139 int error; 3140 3141 /* fd_getvnode() will use the descriptor for us */ 3142 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3143 return (error); 3144 3145 if ((fp->f_flag & FWRITE) == 0) { 3146 error = EBADF; 3147 goto out; 3148 } 3149 3150 flags = SCARG(uap, flags); 3151 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3152 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3153 error = EINVAL; 3154 goto out; 3155 } 3156 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3157 if (flags & FDATASYNC) 3158 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3159 else 3160 nflags = FSYNC_WAIT; 3161 if (flags & FDISKSYNC) 3162 nflags |= FSYNC_CACHE; 3163 3164 len = SCARG(uap, length); 3165 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3166 if (len) { 3167 s = SCARG(uap, start); 3168 e = s + len; 3169 if (e < s) { 3170 error = EINVAL; 3171 goto out; 3172 } 3173 } else { 3174 e = 0; 3175 s = 0; 3176 } 3177 3178 vp = fp->f_data; 3179 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3180 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3181 VOP_UNLOCK(vp, 0); 3182 out: 3183 fd_putfile(SCARG(uap, fd)); 3184 return (error); 3185 } 3186 3187 /* 3188 * Sync the data of an open file. 3189 */ 3190 /* ARGSUSED */ 3191 int 3192 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3193 { 3194 /* { 3195 syscallarg(int) fd; 3196 } */ 3197 struct vnode *vp; 3198 file_t *fp; 3199 int error; 3200 3201 /* fd_getvnode() will use the descriptor for us */ 3202 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3203 return (error); 3204 if ((fp->f_flag & FWRITE) == 0) { 3205 fd_putfile(SCARG(uap, fd)); 3206 return (EBADF); 3207 } 3208 vp = fp->f_data; 3209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3210 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3211 VOP_UNLOCK(vp, 0); 3212 fd_putfile(SCARG(uap, fd)); 3213 return (error); 3214 } 3215 3216 /* 3217 * Rename files, (standard) BSD semantics frontend. 3218 */ 3219 /* ARGSUSED */ 3220 int 3221 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3222 { 3223 /* { 3224 syscallarg(const char *) from; 3225 syscallarg(const char *) to; 3226 } */ 3227 3228 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3229 } 3230 3231 /* 3232 * Rename files, POSIX semantics frontend. 3233 */ 3234 /* ARGSUSED */ 3235 int 3236 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3237 { 3238 /* { 3239 syscallarg(const char *) from; 3240 syscallarg(const char *) to; 3241 } */ 3242 3243 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3244 } 3245 3246 /* 3247 * Rename files. Source and destination must either both be directories, 3248 * or both not be directories. If target is a directory, it must be empty. 3249 * If `from' and `to' refer to the same object, the value of the `retain' 3250 * argument is used to determine whether `from' will be 3251 * 3252 * (retain == 0) deleted unless `from' and `to' refer to the same 3253 * object in the file system's name space (BSD). 3254 * (retain == 1) always retained (POSIX). 3255 */ 3256 int 3257 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3258 { 3259 struct vnode *tvp, *fvp, *tdvp; 3260 struct nameidata fromnd, tond; 3261 struct mount *fs; 3262 struct lwp *l = curlwp; 3263 struct proc *p; 3264 uint32_t saveflag; 3265 int error; 3266 3267 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME, 3268 seg, from); 3269 if ((error = namei(&fromnd)) != 0) 3270 return (error); 3271 if (fromnd.ni_dvp != fromnd.ni_vp) 3272 VOP_UNLOCK(fromnd.ni_dvp, 0); 3273 fvp = fromnd.ni_vp; 3274 3275 fs = fvp->v_mount; 3276 error = VFS_RENAMELOCK_ENTER(fs); 3277 if (error) { 3278 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3279 vrele(fromnd.ni_dvp); 3280 vrele(fvp); 3281 goto out1; 3282 } 3283 3284 /* 3285 * close, partially, yet another race - ideally we should only 3286 * go as far as getting fromnd.ni_dvp before getting the per-fs 3287 * lock, and then continue to get fromnd.ni_vp, but we can't do 3288 * that with namei as it stands. 3289 * 3290 * This still won't prevent rmdir from nuking fromnd.ni_vp 3291 * under us. The real fix is to get the locks in the right 3292 * order and do the lookups in the right places, but that's a 3293 * major rototill. 3294 * 3295 * Preserve the SAVESTART in cn_flags, because who knows what 3296 * might happen if we don't. 3297 * 3298 * Note: this logic (as well as this whole function) is cloned 3299 * in nfs_serv.c. Proceed accordingly. 3300 */ 3301 vrele(fvp); 3302 if ((fromnd.ni_cnd.cn_namelen == 1 && 3303 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3304 (fromnd.ni_cnd.cn_namelen == 2 && 3305 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3306 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3307 error = EINVAL; 3308 VFS_RENAMELOCK_EXIT(fs); 3309 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3310 vrele(fromnd.ni_dvp); 3311 goto out1; 3312 } 3313 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3314 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3315 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3316 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3317 fromnd.ni_cnd.cn_flags |= saveflag; 3318 if (error) { 3319 VOP_UNLOCK(fromnd.ni_dvp, 0); 3320 VFS_RENAMELOCK_EXIT(fs); 3321 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3322 vrele(fromnd.ni_dvp); 3323 goto out1; 3324 } 3325 VOP_UNLOCK(fromnd.ni_vp, 0); 3326 if (fromnd.ni_dvp != fromnd.ni_vp) 3327 VOP_UNLOCK(fromnd.ni_dvp, 0); 3328 fvp = fromnd.ni_vp; 3329 3330 NDINIT(&tond, RENAME, 3331 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3332 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3333 seg, to); 3334 if ((error = namei(&tond)) != 0) { 3335 VFS_RENAMELOCK_EXIT(fs); 3336 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3337 vrele(fromnd.ni_dvp); 3338 vrele(fvp); 3339 goto out1; 3340 } 3341 tdvp = tond.ni_dvp; 3342 tvp = tond.ni_vp; 3343 3344 if (tvp != NULL) { 3345 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3346 error = ENOTDIR; 3347 goto out; 3348 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3349 error = EISDIR; 3350 goto out; 3351 } 3352 } 3353 3354 if (fvp == tdvp) 3355 error = EINVAL; 3356 3357 /* 3358 * Source and destination refer to the same object. 3359 */ 3360 if (fvp == tvp) { 3361 if (retain) 3362 error = -1; 3363 else if (fromnd.ni_dvp == tdvp && 3364 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3365 !memcmp(fromnd.ni_cnd.cn_nameptr, 3366 tond.ni_cnd.cn_nameptr, 3367 fromnd.ni_cnd.cn_namelen)) 3368 error = -1; 3369 } 3370 3371 #if NVERIEXEC > 0 3372 if (!error) { 3373 char *f1, *f2; 3374 size_t f1_len; 3375 size_t f2_len; 3376 3377 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3378 f1 = kmem_alloc(f1_len, KM_SLEEP); 3379 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3380 3381 f2_len = tond.ni_cnd.cn_namelen + 1; 3382 f2 = kmem_alloc(f2_len, KM_SLEEP); 3383 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3384 3385 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3386 3387 kmem_free(f1, f1_len); 3388 kmem_free(f2, f2_len); 3389 } 3390 #endif /* NVERIEXEC > 0 */ 3391 3392 out: 3393 p = l->l_proc; 3394 if (!error) { 3395 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3396 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3397 VFS_RENAMELOCK_EXIT(fs); 3398 } else { 3399 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3400 if (tdvp == tvp) 3401 vrele(tdvp); 3402 else 3403 vput(tdvp); 3404 if (tvp) 3405 vput(tvp); 3406 VFS_RENAMELOCK_EXIT(fs); 3407 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3408 vrele(fromnd.ni_dvp); 3409 vrele(fvp); 3410 } 3411 vrele(tond.ni_startdir); 3412 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3413 out1: 3414 if (fromnd.ni_startdir) 3415 vrele(fromnd.ni_startdir); 3416 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3417 return (error == -1 ? 0 : error); 3418 } 3419 3420 /* 3421 * Make a directory file. 3422 */ 3423 /* ARGSUSED */ 3424 int 3425 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3426 { 3427 /* { 3428 syscallarg(const char *) path; 3429 syscallarg(int) mode; 3430 } */ 3431 3432 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3433 } 3434 3435 int 3436 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3437 { 3438 struct proc *p = curlwp->l_proc; 3439 struct vnode *vp; 3440 struct vattr vattr; 3441 int error; 3442 struct nameidata nd; 3443 3444 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, 3445 seg, path); 3446 if ((error = namei(&nd)) != 0) 3447 return (error); 3448 vp = nd.ni_vp; 3449 if (vp != NULL) { 3450 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3451 if (nd.ni_dvp == vp) 3452 vrele(nd.ni_dvp); 3453 else 3454 vput(nd.ni_dvp); 3455 vrele(vp); 3456 return (EEXIST); 3457 } 3458 vattr_null(&vattr); 3459 vattr.va_type = VDIR; 3460 /* We will read cwdi->cwdi_cmask unlocked. */ 3461 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3462 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3463 if (!error) 3464 vput(nd.ni_vp); 3465 return (error); 3466 } 3467 3468 /* 3469 * Remove a directory file. 3470 */ 3471 /* ARGSUSED */ 3472 int 3473 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3474 { 3475 /* { 3476 syscallarg(const char *) path; 3477 } */ 3478 struct vnode *vp; 3479 int error; 3480 struct nameidata nd; 3481 3482 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3483 SCARG(uap, path)); 3484 if ((error = namei(&nd)) != 0) 3485 return (error); 3486 vp = nd.ni_vp; 3487 if (vp->v_type != VDIR) { 3488 error = ENOTDIR; 3489 goto out; 3490 } 3491 /* 3492 * No rmdir "." please. 3493 */ 3494 if (nd.ni_dvp == vp) { 3495 error = EINVAL; 3496 goto out; 3497 } 3498 /* 3499 * The root of a mounted filesystem cannot be deleted. 3500 */ 3501 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3502 error = EBUSY; 3503 goto out; 3504 } 3505 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3506 return (error); 3507 3508 out: 3509 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3510 if (nd.ni_dvp == vp) 3511 vrele(nd.ni_dvp); 3512 else 3513 vput(nd.ni_dvp); 3514 vput(vp); 3515 return (error); 3516 } 3517 3518 /* 3519 * Read a block of directory entries in a file system independent format. 3520 */ 3521 int 3522 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3523 { 3524 /* { 3525 syscallarg(int) fd; 3526 syscallarg(char *) buf; 3527 syscallarg(size_t) count; 3528 } */ 3529 file_t *fp; 3530 int error, done; 3531 3532 /* fd_getvnode() will use the descriptor for us */ 3533 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3534 return (error); 3535 if ((fp->f_flag & FREAD) == 0) { 3536 error = EBADF; 3537 goto out; 3538 } 3539 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3540 SCARG(uap, count), &done, l, 0, 0); 3541 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3542 *retval = done; 3543 out: 3544 fd_putfile(SCARG(uap, fd)); 3545 return (error); 3546 } 3547 3548 /* 3549 * Set the mode mask for creation of filesystem nodes. 3550 */ 3551 int 3552 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3553 { 3554 /* { 3555 syscallarg(mode_t) newmask; 3556 } */ 3557 struct proc *p = l->l_proc; 3558 struct cwdinfo *cwdi; 3559 3560 /* 3561 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3562 * important is that we serialize changes to the mask. The 3563 * rw_exit() will issue a write memory barrier on our behalf, 3564 * and force the changes out to other CPUs (as it must use an 3565 * atomic operation, draining the local CPU's store buffers). 3566 */ 3567 cwdi = p->p_cwdi; 3568 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3569 *retval = cwdi->cwdi_cmask; 3570 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3571 rw_exit(&cwdi->cwdi_lock); 3572 3573 return (0); 3574 } 3575 3576 int 3577 dorevoke(struct vnode *vp, kauth_cred_t cred) 3578 { 3579 struct vattr vattr; 3580 int error; 3581 3582 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3583 return error; 3584 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3585 (error = kauth_authorize_generic(cred, 3586 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3587 VOP_REVOKE(vp, REVOKEALL); 3588 return (error); 3589 } 3590 3591 /* 3592 * Void all references to file by ripping underlying filesystem 3593 * away from vnode. 3594 */ 3595 /* ARGSUSED */ 3596 int 3597 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3598 { 3599 /* { 3600 syscallarg(const char *) path; 3601 } */ 3602 struct vnode *vp; 3603 int error; 3604 3605 error = namei_simple_user(SCARG(uap, path), 3606 NSM_FOLLOW_TRYEMULROOT, &vp); 3607 if (error != 0) 3608 return (error); 3609 error = dorevoke(vp, l->l_cred); 3610 vrele(vp); 3611 return (error); 3612 } 3613