1 /* $NetBSD: vfs_syscalls.c,v 1.394 2009/05/02 14:13:28 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.394 2009/05/02 14:13:28 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_dir(struct nameidata *, struct lwp *); 114 static int change_flags(struct vnode *, u_long, struct lwp *); 115 static int change_mode(struct vnode *, int, struct lwp *l); 116 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 117 118 void checkdirs(struct vnode *); 119 120 /* 121 * Virtual File System System Calls 122 */ 123 124 /* 125 * Mount a file system. 126 */ 127 128 /* 129 * This table is used to maintain compatibility with 4.3BSD 130 * and NetBSD 0.9 mount syscalls - and possibly other systems. 131 * Note, the order is important! 132 * 133 * Do not modify this table. It should only contain filesystems 134 * supported by NetBSD 0.9 and 4.3BSD. 135 */ 136 const char * const mountcompatnames[] = { 137 NULL, /* 0 = MOUNT_NONE */ 138 MOUNT_FFS, /* 1 = MOUNT_UFS */ 139 MOUNT_NFS, /* 2 */ 140 MOUNT_MFS, /* 3 */ 141 MOUNT_MSDOS, /* 4 */ 142 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 143 MOUNT_FDESC, /* 6 */ 144 MOUNT_KERNFS, /* 7 */ 145 NULL, /* 8 = MOUNT_DEVFS */ 146 MOUNT_AFS, /* 9 */ 147 }; 148 const int nmountcompatnames = sizeof(mountcompatnames) / 149 sizeof(mountcompatnames[0]); 150 151 static int 152 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 153 void *data, size_t *data_len) 154 { 155 struct mount *mp; 156 int error = 0, saved_flags; 157 158 mp = vp->v_mount; 159 saved_flags = mp->mnt_flag; 160 161 /* We can operate only on VV_ROOT nodes. */ 162 if ((vp->v_vflag & VV_ROOT) == 0) { 163 error = EINVAL; 164 goto out; 165 } 166 167 /* 168 * We only allow the filesystem to be reloaded if it 169 * is currently mounted read-only. Additionally, we 170 * prevent read-write to read-only downgrades. 171 */ 172 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 173 (mp->mnt_flag & MNT_RDONLY) == 0) { 174 error = EOPNOTSUPP; /* Needs translation */ 175 goto out; 176 } 177 178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 180 if (error) 181 goto out; 182 183 if (vfs_busy(mp, NULL)) { 184 error = EPERM; 185 goto out; 186 } 187 188 mutex_enter(&mp->mnt_updating); 189 190 mp->mnt_flag &= ~MNT_OP_FLAGS; 191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 192 193 /* 194 * Set the mount level flags. 195 */ 196 if (flags & MNT_RDONLY) 197 mp->mnt_flag |= MNT_RDONLY; 198 else if (mp->mnt_flag & MNT_RDONLY) 199 mp->mnt_iflag |= IMNT_WANTRDWR; 200 mp->mnt_flag &= 201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 204 MNT_LOG); 205 mp->mnt_flag |= flags & 206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 209 MNT_LOG | MNT_IGNORE); 210 211 error = VFS_MOUNT(mp, path, data, data_len); 212 213 if (error && data != NULL) { 214 int error2; 215 216 /* 217 * Update failed; let's try and see if it was an 218 * export request. For compat with 3.0 and earlier. 219 */ 220 error2 = vfs_hooks_reexport(mp, path, data); 221 222 /* 223 * Only update error code if the export request was 224 * understood but some problem occurred while 225 * processing it. 226 */ 227 if (error2 != EJUSTRETURN) 228 error = error2; 229 } 230 231 if (mp->mnt_iflag & IMNT_WANTRDWR) 232 mp->mnt_flag &= ~MNT_RDONLY; 233 if (error) 234 mp->mnt_flag = saved_flags; 235 mp->mnt_flag &= ~MNT_OP_FLAGS; 236 mp->mnt_iflag &= ~IMNT_WANTRDWR; 237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 238 if (mp->mnt_syncer == NULL) 239 error = vfs_allocate_syncvnode(mp); 240 } else { 241 if (mp->mnt_syncer != NULL) 242 vfs_deallocate_syncvnode(mp); 243 } 244 mutex_exit(&mp->mnt_updating); 245 vfs_unbusy(mp, false, NULL); 246 247 out: 248 return (error); 249 } 250 251 static int 252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 253 { 254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 255 int error; 256 257 /* Copy file-system type from userspace. */ 258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 259 if (error) { 260 /* 261 * Historically, filesystem types were identified by numbers. 262 * If we get an integer for the filesystem type instead of a 263 * string, we check to see if it matches one of the historic 264 * filesystem types. 265 */ 266 u_long fsindex = (u_long)fstype; 267 if (fsindex >= nmountcompatnames || 268 mountcompatnames[fsindex] == NULL) 269 return ENODEV; 270 strlcpy(fstypename, mountcompatnames[fsindex], 271 sizeof(fstypename)); 272 } 273 274 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 275 if (strcmp(fstypename, "ufs") == 0) 276 fstypename[0] = 'f'; 277 278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 279 return 0; 280 281 /* If we can autoload a vfs module, try again */ 282 mutex_enter(&module_lock); 283 (void)module_autoload(fstype, MODULE_CLASS_VFS); 284 mutex_exit(&module_lock); 285 286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 287 return 0; 288 289 return ENODEV; 290 } 291 292 static int 293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 294 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 295 { 296 struct mount *mp; 297 struct vnode *vp = *vpp; 298 struct vattr va; 299 int error; 300 301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 303 if (error) 304 return error; 305 306 /* Can't make a non-dir a mount-point (from here anyway). */ 307 if (vp->v_type != VDIR) 308 return ENOTDIR; 309 310 /* 311 * If the user is not root, ensure that they own the directory 312 * onto which we are attempting to mount. 313 */ 314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 315 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 316 (error = kauth_authorize_generic(l->l_cred, 317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 318 return error; 319 } 320 321 if (flags & MNT_EXPORTED) 322 return EINVAL; 323 324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 325 return error; 326 327 /* 328 * Check if a file-system is not already mounted on this vnode. 329 */ 330 if (vp->v_mountedhere != NULL) 331 return EBUSY; 332 333 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) 334 return ENOMEM; 335 336 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 337 338 /* 339 * The underlying file system may refuse the mount for 340 * various reasons. Allow the user to force it to happen. 341 * 342 * Set the mount level flags. 343 */ 344 mp->mnt_flag = flags & 345 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 346 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 347 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 348 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 349 350 mutex_enter(&mp->mnt_updating); 351 error = VFS_MOUNT(mp, path, data, data_len); 352 mp->mnt_flag &= ~MNT_OP_FLAGS; 353 354 /* 355 * Put the new filesystem on the mount list after root. 356 */ 357 cache_purge(vp); 358 if (error != 0) { 359 vp->v_mountedhere = NULL; 360 mutex_exit(&mp->mnt_updating); 361 vfs_unbusy(mp, false, NULL); 362 vfs_destroy(mp); 363 return error; 364 } 365 366 mp->mnt_iflag &= ~IMNT_WANTRDWR; 367 mutex_enter(&mountlist_lock); 368 vp->v_mountedhere = mp; 369 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 370 mutex_exit(&mountlist_lock); 371 vn_restorerecurse(vp, recurse); 372 VOP_UNLOCK(vp, 0); 373 checkdirs(vp); 374 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 375 error = vfs_allocate_syncvnode(mp); 376 /* Hold an additional reference to the mount across VFS_START(). */ 377 mutex_exit(&mp->mnt_updating); 378 vfs_unbusy(mp, true, NULL); 379 (void) VFS_STATVFS(mp, &mp->mnt_stat); 380 error = VFS_START(mp, 0); 381 if (error) 382 vrele(vp); 383 /* Drop reference held for VFS_START(). */ 384 vfs_destroy(mp); 385 *vpp = NULL; 386 return error; 387 } 388 389 static int 390 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 391 void *data, size_t *data_len) 392 { 393 struct mount *mp; 394 int error; 395 396 /* If MNT_GETARGS is specified, it should be the only flag. */ 397 if (flags & ~MNT_GETARGS) 398 return EINVAL; 399 400 mp = vp->v_mount; 401 402 /* XXX: probably some notion of "can see" here if we want isolation. */ 403 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 404 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 405 if (error) 406 return error; 407 408 if ((vp->v_vflag & VV_ROOT) == 0) 409 return EINVAL; 410 411 if (vfs_busy(mp, NULL)) 412 return EPERM; 413 414 mutex_enter(&mp->mnt_updating); 415 mp->mnt_flag &= ~MNT_OP_FLAGS; 416 mp->mnt_flag |= MNT_GETARGS; 417 error = VFS_MOUNT(mp, path, data, data_len); 418 mp->mnt_flag &= ~MNT_OP_FLAGS; 419 mutex_exit(&mp->mnt_updating); 420 421 vfs_unbusy(mp, false, NULL); 422 return (error); 423 } 424 425 int 426 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 427 { 428 /* { 429 syscallarg(const char *) type; 430 syscallarg(const char *) path; 431 syscallarg(int) flags; 432 syscallarg(void *) data; 433 syscallarg(size_t) data_len; 434 } */ 435 436 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 437 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 438 SCARG(uap, data_len), retval); 439 } 440 441 int 442 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 443 const char *path, int flags, void *data, enum uio_seg data_seg, 444 size_t data_len, register_t *retval) 445 { 446 struct vnode *vp; 447 struct nameidata nd; 448 void *data_buf = data; 449 u_int recurse; 450 int error; 451 452 /* 453 * Get vnode to be covered 454 */ 455 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 456 if ((error = namei(&nd)) != 0) 457 return (error); 458 vp = nd.ni_vp; 459 460 /* 461 * A lookup in VFS_MOUNT might result in an attempt to 462 * lock this vnode again, so make the lock recursive. 463 */ 464 if (vfsops == NULL) { 465 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 466 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 467 recurse = vn_setrecurse(vp); 468 vfsops = vp->v_mount->mnt_op; 469 } else { 470 /* 'type' is userspace */ 471 error = mount_get_vfsops(type, &vfsops); 472 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 473 recurse = vn_setrecurse(vp); 474 if (error != 0) 475 goto done; 476 } 477 } else { 478 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 479 recurse = vn_setrecurse(vp); 480 } 481 482 if (data != NULL && data_seg == UIO_USERSPACE) { 483 if (data_len == 0) { 484 /* No length supplied, use default for filesystem */ 485 data_len = vfsops->vfs_min_mount_data; 486 if (data_len > VFS_MAX_MOUNT_DATA) { 487 error = EINVAL; 488 goto done; 489 } 490 /* 491 * Hopefully a longer buffer won't make copyin() fail. 492 * For compatibility with 3.0 and earlier. 493 */ 494 if (flags & MNT_UPDATE 495 && data_len < sizeof (struct mnt_export_args30)) 496 data_len = sizeof (struct mnt_export_args30); 497 } 498 data_buf = kmem_alloc(data_len, KM_SLEEP); 499 500 /* NFS needs the buffer even for mnt_getargs .... */ 501 error = copyin(data, data_buf, data_len); 502 if (error != 0) 503 goto done; 504 } 505 506 if (flags & MNT_GETARGS) { 507 if (data_len == 0) { 508 error = EINVAL; 509 goto done; 510 } 511 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 512 if (error != 0) 513 goto done; 514 if (data_seg == UIO_USERSPACE) 515 error = copyout(data_buf, data, data_len); 516 *retval = data_len; 517 } else if (flags & MNT_UPDATE) { 518 error = mount_update(l, vp, path, flags, data_buf, &data_len); 519 } else { 520 /* Locking is handled internally in mount_domount(). */ 521 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 522 &data_len, recurse); 523 } 524 525 done: 526 if (vp != NULL) { 527 vn_restorerecurse(vp, recurse); 528 vput(vp); 529 } 530 if (data_buf != data) 531 kmem_free(data_buf, data_len); 532 return (error); 533 } 534 535 /* 536 * Scan all active processes to see if any of them have a current 537 * or root directory onto which the new filesystem has just been 538 * mounted. If so, replace them with the new mount point. 539 */ 540 void 541 checkdirs(struct vnode *olddp) 542 { 543 struct cwdinfo *cwdi; 544 struct vnode *newdp, *rele1, *rele2; 545 struct proc *p; 546 bool retry; 547 548 if (olddp->v_usecount == 1) 549 return; 550 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 551 panic("mount: lost mount"); 552 553 do { 554 retry = false; 555 mutex_enter(proc_lock); 556 PROCLIST_FOREACH(p, &allproc) { 557 if ((p->p_flag & PK_MARKER) != 0) 558 continue; 559 if ((cwdi = p->p_cwdi) == NULL) 560 continue; 561 /* 562 * Can't change to the old directory any more, 563 * so even if we see a stale value it's not a 564 * problem. 565 */ 566 if (cwdi->cwdi_cdir != olddp && 567 cwdi->cwdi_rdir != olddp) 568 continue; 569 retry = true; 570 rele1 = NULL; 571 rele2 = NULL; 572 atomic_inc_uint(&cwdi->cwdi_refcnt); 573 mutex_exit(proc_lock); 574 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 575 if (cwdi->cwdi_cdir == olddp) { 576 rele1 = cwdi->cwdi_cdir; 577 VREF(newdp); 578 cwdi->cwdi_cdir = newdp; 579 } 580 if (cwdi->cwdi_rdir == olddp) { 581 rele2 = cwdi->cwdi_rdir; 582 VREF(newdp); 583 cwdi->cwdi_rdir = newdp; 584 } 585 rw_exit(&cwdi->cwdi_lock); 586 cwdfree(cwdi); 587 if (rele1 != NULL) 588 vrele(rele1); 589 if (rele2 != NULL) 590 vrele(rele2); 591 mutex_enter(proc_lock); 592 break; 593 } 594 mutex_exit(proc_lock); 595 } while (retry); 596 597 if (rootvnode == olddp) { 598 vrele(rootvnode); 599 VREF(newdp); 600 rootvnode = newdp; 601 } 602 vput(newdp); 603 } 604 605 /* 606 * Unmount a file system. 607 * 608 * Note: unmount takes a path to the vnode mounted on as argument, 609 * not special file (as before). 610 */ 611 /* ARGSUSED */ 612 int 613 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 614 { 615 /* { 616 syscallarg(const char *) path; 617 syscallarg(int) flags; 618 } */ 619 struct vnode *vp; 620 struct mount *mp; 621 int error; 622 struct nameidata nd; 623 624 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 625 SCARG(uap, path)); 626 if ((error = namei(&nd)) != 0) 627 return (error); 628 vp = nd.ni_vp; 629 mp = vp->v_mount; 630 atomic_inc_uint(&mp->mnt_refcnt); 631 VOP_UNLOCK(vp, 0); 632 633 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 634 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 635 if (error) { 636 vrele(vp); 637 vfs_destroy(mp); 638 return (error); 639 } 640 641 /* 642 * Don't allow unmounting the root file system. 643 */ 644 if (mp->mnt_flag & MNT_ROOTFS) { 645 vrele(vp); 646 vfs_destroy(mp); 647 return (EINVAL); 648 } 649 650 /* 651 * Must be the root of the filesystem 652 */ 653 if ((vp->v_vflag & VV_ROOT) == 0) { 654 vrele(vp); 655 vfs_destroy(mp); 656 return (EINVAL); 657 } 658 659 vrele(vp); 660 error = dounmount(mp, SCARG(uap, flags), l); 661 vfs_destroy(mp); 662 return error; 663 } 664 665 /* 666 * Do the actual file system unmount. File system is assumed to have 667 * been locked by the caller. 668 * 669 * => Caller hold reference to the mount, explicitly for dounmount(). 670 */ 671 int 672 dounmount(struct mount *mp, int flags, struct lwp *l) 673 { 674 struct vnode *coveredvp; 675 int error; 676 int async; 677 int used_syncer; 678 679 #if NVERIEXEC > 0 680 error = veriexec_unmountchk(mp); 681 if (error) 682 return (error); 683 #endif /* NVERIEXEC > 0 */ 684 685 /* 686 * XXX Freeze syncer. Must do this before locking the 687 * mount point. See dounmount() for details. 688 */ 689 mutex_enter(&syncer_mutex); 690 rw_enter(&mp->mnt_unmounting, RW_WRITER); 691 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 692 rw_exit(&mp->mnt_unmounting); 693 mutex_exit(&syncer_mutex); 694 return ENOENT; 695 } 696 697 used_syncer = (mp->mnt_syncer != NULL); 698 699 /* 700 * XXX Syncer must be frozen when we get here. This should really 701 * be done on a per-mountpoint basis, but the syncer doesn't work 702 * like that. 703 * 704 * The caller of dounmount() must acquire syncer_mutex because 705 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 706 * order, and we must preserve that order to avoid deadlock. 707 * 708 * So, if the file system did not use the syncer, now is 709 * the time to release the syncer_mutex. 710 */ 711 if (used_syncer == 0) 712 mutex_exit(&syncer_mutex); 713 714 mp->mnt_iflag |= IMNT_UNMOUNT; 715 async = mp->mnt_flag & MNT_ASYNC; 716 mp->mnt_flag &= ~MNT_ASYNC; 717 cache_purgevfs(mp); /* remove cache entries for this file sys */ 718 if (mp->mnt_syncer != NULL) 719 vfs_deallocate_syncvnode(mp); 720 error = 0; 721 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 722 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 723 } 724 vfs_scrubvnlist(mp); 725 if (error == 0 || (flags & MNT_FORCE)) 726 error = VFS_UNMOUNT(mp, flags); 727 if (error) { 728 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 729 (void) vfs_allocate_syncvnode(mp); 730 mp->mnt_iflag &= ~IMNT_UNMOUNT; 731 mp->mnt_flag |= async; 732 rw_exit(&mp->mnt_unmounting); 733 if (used_syncer) 734 mutex_exit(&syncer_mutex); 735 return (error); 736 } 737 vfs_scrubvnlist(mp); 738 mutex_enter(&mountlist_lock); 739 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 740 coveredvp->v_mountedhere = NULL; 741 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 742 mp->mnt_iflag |= IMNT_GONE; 743 mutex_exit(&mountlist_lock); 744 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 745 panic("unmount: dangling vnode"); 746 if (used_syncer) 747 mutex_exit(&syncer_mutex); 748 vfs_hooks_unmount(mp); 749 rw_exit(&mp->mnt_unmounting); 750 vfs_destroy(mp); /* reference from mount() */ 751 if (coveredvp != NULLVP) 752 vrele(coveredvp); 753 return (0); 754 } 755 756 /* 757 * Sync each mounted filesystem. 758 */ 759 #ifdef DEBUG 760 int syncprt = 0; 761 struct ctldebug debug0 = { "syncprt", &syncprt }; 762 #endif 763 764 /* ARGSUSED */ 765 int 766 sys_sync(struct lwp *l, const void *v, register_t *retval) 767 { 768 struct mount *mp, *nmp; 769 int asyncflag; 770 771 if (l == NULL) 772 l = &lwp0; 773 774 mutex_enter(&mountlist_lock); 775 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 776 mp = nmp) { 777 if (vfs_busy(mp, &nmp)) { 778 continue; 779 } 780 mutex_enter(&mp->mnt_updating); 781 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 782 asyncflag = mp->mnt_flag & MNT_ASYNC; 783 mp->mnt_flag &= ~MNT_ASYNC; 784 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 785 if (asyncflag) 786 mp->mnt_flag |= MNT_ASYNC; 787 } 788 mutex_exit(&mp->mnt_updating); 789 vfs_unbusy(mp, false, &nmp); 790 } 791 mutex_exit(&mountlist_lock); 792 #ifdef DEBUG 793 if (syncprt) 794 vfs_bufstats(); 795 #endif /* DEBUG */ 796 return (0); 797 } 798 799 /* 800 * Change filesystem quotas. 801 */ 802 /* ARGSUSED */ 803 int 804 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 805 { 806 /* { 807 syscallarg(const char *) path; 808 syscallarg(int) cmd; 809 syscallarg(int) uid; 810 syscallarg(void *) arg; 811 } */ 812 struct mount *mp; 813 int error; 814 struct nameidata nd; 815 816 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 817 SCARG(uap, path)); 818 if ((error = namei(&nd)) != 0) 819 return (error); 820 mp = nd.ni_vp->v_mount; 821 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 822 SCARG(uap, arg)); 823 vrele(nd.ni_vp); 824 return (error); 825 } 826 827 int 828 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 829 int root) 830 { 831 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 832 int error = 0; 833 834 /* 835 * If MNT_NOWAIT or MNT_LAZY is specified, do not 836 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 837 * overrides MNT_NOWAIT. 838 */ 839 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 840 (flags != MNT_WAIT && flags != 0)) { 841 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 842 goto done; 843 } 844 845 /* Get the filesystem stats now */ 846 memset(sp, 0, sizeof(*sp)); 847 if ((error = VFS_STATVFS(mp, sp)) != 0) { 848 return error; 849 } 850 851 if (cwdi->cwdi_rdir == NULL) 852 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 853 done: 854 if (cwdi->cwdi_rdir != NULL) { 855 size_t len; 856 char *bp; 857 char c; 858 char *path = PNBUF_GET(); 859 860 bp = path + MAXPATHLEN; 861 *--bp = '\0'; 862 rw_enter(&cwdi->cwdi_lock, RW_READER); 863 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 864 MAXPATHLEN / 2, 0, l); 865 rw_exit(&cwdi->cwdi_lock); 866 if (error) { 867 PNBUF_PUT(path); 868 return error; 869 } 870 len = strlen(bp); 871 if (len != 1) { 872 /* 873 * for mount points that are below our root, we can see 874 * them, so we fix up the pathname and return them. The 875 * rest we cannot see, so we don't allow viewing the 876 * data. 877 */ 878 if (strncmp(bp, sp->f_mntonname, len) == 0 && 879 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 880 (void)strlcpy(sp->f_mntonname, 881 c == '\0' ? "/" : &sp->f_mntonname[len], 882 sizeof(sp->f_mntonname)); 883 } else { 884 if (root) 885 (void)strlcpy(sp->f_mntonname, "/", 886 sizeof(sp->f_mntonname)); 887 else 888 error = EPERM; 889 } 890 } 891 PNBUF_PUT(path); 892 } 893 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 894 return error; 895 } 896 897 /* 898 * Get filesystem statistics by path. 899 */ 900 int 901 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 902 { 903 struct mount *mp; 904 int error; 905 struct nameidata nd; 906 907 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 908 if ((error = namei(&nd)) != 0) 909 return error; 910 mp = nd.ni_vp->v_mount; 911 error = dostatvfs(mp, sb, l, flags, 1); 912 vrele(nd.ni_vp); 913 return error; 914 } 915 916 /* ARGSUSED */ 917 int 918 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 919 { 920 /* { 921 syscallarg(const char *) path; 922 syscallarg(struct statvfs *) buf; 923 syscallarg(int) flags; 924 } */ 925 struct statvfs *sb; 926 int error; 927 928 sb = STATVFSBUF_GET(); 929 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 930 if (error == 0) 931 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 932 STATVFSBUF_PUT(sb); 933 return error; 934 } 935 936 /* 937 * Get filesystem statistics by fd. 938 */ 939 int 940 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 941 { 942 file_t *fp; 943 struct mount *mp; 944 int error; 945 946 /* fd_getvnode() will use the descriptor for us */ 947 if ((error = fd_getvnode(fd, &fp)) != 0) 948 return (error); 949 mp = ((struct vnode *)fp->f_data)->v_mount; 950 error = dostatvfs(mp, sb, curlwp, flags, 1); 951 fd_putfile(fd); 952 return error; 953 } 954 955 /* ARGSUSED */ 956 int 957 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 958 { 959 /* { 960 syscallarg(int) fd; 961 syscallarg(struct statvfs *) buf; 962 syscallarg(int) flags; 963 } */ 964 struct statvfs *sb; 965 int error; 966 967 sb = STATVFSBUF_GET(); 968 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 969 if (error == 0) 970 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 971 STATVFSBUF_PUT(sb); 972 return error; 973 } 974 975 976 /* 977 * Get statistics on all filesystems. 978 */ 979 int 980 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 981 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 982 register_t *retval) 983 { 984 int root = 0; 985 struct proc *p = l->l_proc; 986 struct mount *mp, *nmp; 987 struct statvfs *sb; 988 size_t count, maxcount; 989 int error = 0; 990 991 sb = STATVFSBUF_GET(); 992 maxcount = bufsize / entry_sz; 993 mutex_enter(&mountlist_lock); 994 count = 0; 995 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 996 mp = nmp) { 997 if (vfs_busy(mp, &nmp)) { 998 continue; 999 } 1000 if (sfsp && count < maxcount) { 1001 error = dostatvfs(mp, sb, l, flags, 0); 1002 if (error) { 1003 vfs_unbusy(mp, false, &nmp); 1004 error = 0; 1005 continue; 1006 } 1007 error = copyfn(sb, sfsp, entry_sz); 1008 if (error) { 1009 vfs_unbusy(mp, false, NULL); 1010 goto out; 1011 } 1012 sfsp = (char *)sfsp + entry_sz; 1013 root |= strcmp(sb->f_mntonname, "/") == 0; 1014 } 1015 count++; 1016 vfs_unbusy(mp, false, &nmp); 1017 } 1018 mutex_exit(&mountlist_lock); 1019 1020 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1021 /* 1022 * fake a root entry 1023 */ 1024 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1025 sb, l, flags, 1); 1026 if (error != 0) 1027 goto out; 1028 if (sfsp) { 1029 error = copyfn(sb, sfsp, entry_sz); 1030 if (error != 0) 1031 goto out; 1032 } 1033 count++; 1034 } 1035 if (sfsp && count > maxcount) 1036 *retval = maxcount; 1037 else 1038 *retval = count; 1039 out: 1040 STATVFSBUF_PUT(sb); 1041 return error; 1042 } 1043 1044 int 1045 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1046 { 1047 /* { 1048 syscallarg(struct statvfs *) buf; 1049 syscallarg(size_t) bufsize; 1050 syscallarg(int) flags; 1051 } */ 1052 1053 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1054 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1055 } 1056 1057 /* 1058 * Change current working directory to a given file descriptor. 1059 */ 1060 /* ARGSUSED */ 1061 int 1062 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1063 { 1064 /* { 1065 syscallarg(int) fd; 1066 } */ 1067 struct proc *p = l->l_proc; 1068 struct cwdinfo *cwdi; 1069 struct vnode *vp, *tdp; 1070 struct mount *mp; 1071 file_t *fp; 1072 int error, fd; 1073 1074 /* fd_getvnode() will use the descriptor for us */ 1075 fd = SCARG(uap, fd); 1076 if ((error = fd_getvnode(fd, &fp)) != 0) 1077 return (error); 1078 vp = fp->f_data; 1079 1080 VREF(vp); 1081 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1082 if (vp->v_type != VDIR) 1083 error = ENOTDIR; 1084 else 1085 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1086 if (error) { 1087 vput(vp); 1088 goto out; 1089 } 1090 while ((mp = vp->v_mountedhere) != NULL) { 1091 error = vfs_busy(mp, NULL); 1092 vput(vp); 1093 if (error != 0) 1094 goto out; 1095 error = VFS_ROOT(mp, &tdp); 1096 vfs_unbusy(mp, false, NULL); 1097 if (error) 1098 goto out; 1099 vp = tdp; 1100 } 1101 VOP_UNLOCK(vp, 0); 1102 1103 /* 1104 * Disallow changing to a directory not under the process's 1105 * current root directory (if there is one). 1106 */ 1107 cwdi = p->p_cwdi; 1108 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1109 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1110 vrele(vp); 1111 error = EPERM; /* operation not permitted */ 1112 } else { 1113 vrele(cwdi->cwdi_cdir); 1114 cwdi->cwdi_cdir = vp; 1115 } 1116 rw_exit(&cwdi->cwdi_lock); 1117 1118 out: 1119 fd_putfile(fd); 1120 return (error); 1121 } 1122 1123 /* 1124 * Change this process's notion of the root directory to a given file 1125 * descriptor. 1126 */ 1127 int 1128 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1129 { 1130 struct proc *p = l->l_proc; 1131 struct cwdinfo *cwdi; 1132 struct vnode *vp; 1133 file_t *fp; 1134 int error, fd = SCARG(uap, fd); 1135 1136 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1137 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1138 return error; 1139 /* fd_getvnode() will use the descriptor for us */ 1140 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1141 return error; 1142 vp = fp->f_data; 1143 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1144 if (vp->v_type != VDIR) 1145 error = ENOTDIR; 1146 else 1147 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1148 VOP_UNLOCK(vp, 0); 1149 if (error) 1150 goto out; 1151 VREF(vp); 1152 1153 /* 1154 * Prevent escaping from chroot by putting the root under 1155 * the working directory. Silently chdir to / if we aren't 1156 * already there. 1157 */ 1158 cwdi = p->p_cwdi; 1159 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1160 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1161 /* 1162 * XXX would be more failsafe to change directory to a 1163 * deadfs node here instead 1164 */ 1165 vrele(cwdi->cwdi_cdir); 1166 VREF(vp); 1167 cwdi->cwdi_cdir = vp; 1168 } 1169 1170 if (cwdi->cwdi_rdir != NULL) 1171 vrele(cwdi->cwdi_rdir); 1172 cwdi->cwdi_rdir = vp; 1173 rw_exit(&cwdi->cwdi_lock); 1174 1175 out: 1176 fd_putfile(fd); 1177 return (error); 1178 } 1179 1180 /* 1181 * Change current working directory (``.''). 1182 */ 1183 /* ARGSUSED */ 1184 int 1185 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1186 { 1187 /* { 1188 syscallarg(const char *) path; 1189 } */ 1190 struct proc *p = l->l_proc; 1191 struct cwdinfo *cwdi; 1192 int error; 1193 struct nameidata nd; 1194 1195 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1196 SCARG(uap, path)); 1197 if ((error = change_dir(&nd, l)) != 0) 1198 return (error); 1199 cwdi = p->p_cwdi; 1200 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1201 vrele(cwdi->cwdi_cdir); 1202 cwdi->cwdi_cdir = nd.ni_vp; 1203 rw_exit(&cwdi->cwdi_lock); 1204 return (0); 1205 } 1206 1207 /* 1208 * Change notion of root (``/'') directory. 1209 */ 1210 /* ARGSUSED */ 1211 int 1212 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1213 { 1214 /* { 1215 syscallarg(const char *) path; 1216 } */ 1217 struct proc *p = l->l_proc; 1218 struct cwdinfo *cwdi; 1219 struct vnode *vp; 1220 int error; 1221 struct nameidata nd; 1222 1223 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1224 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1225 return (error); 1226 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1227 SCARG(uap, path)); 1228 if ((error = change_dir(&nd, l)) != 0) 1229 return (error); 1230 1231 cwdi = p->p_cwdi; 1232 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1233 if (cwdi->cwdi_rdir != NULL) 1234 vrele(cwdi->cwdi_rdir); 1235 vp = nd.ni_vp; 1236 cwdi->cwdi_rdir = vp; 1237 1238 /* 1239 * Prevent escaping from chroot by putting the root under 1240 * the working directory. Silently chdir to / if we aren't 1241 * already there. 1242 */ 1243 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1244 /* 1245 * XXX would be more failsafe to change directory to a 1246 * deadfs node here instead 1247 */ 1248 vrele(cwdi->cwdi_cdir); 1249 VREF(vp); 1250 cwdi->cwdi_cdir = vp; 1251 } 1252 rw_exit(&cwdi->cwdi_lock); 1253 1254 return (0); 1255 } 1256 1257 /* 1258 * Common routine for chroot and chdir. 1259 */ 1260 static int 1261 change_dir(struct nameidata *ndp, struct lwp *l) 1262 { 1263 struct vnode *vp; 1264 int error; 1265 1266 if ((error = namei(ndp)) != 0) 1267 return (error); 1268 vp = ndp->ni_vp; 1269 if (vp->v_type != VDIR) 1270 error = ENOTDIR; 1271 else 1272 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1273 1274 if (error) 1275 vput(vp); 1276 else 1277 VOP_UNLOCK(vp, 0); 1278 return (error); 1279 } 1280 1281 /* 1282 * Check permissions, allocate an open file structure, 1283 * and call the device open routine if any. 1284 */ 1285 int 1286 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1287 { 1288 /* { 1289 syscallarg(const char *) path; 1290 syscallarg(int) flags; 1291 syscallarg(int) mode; 1292 } */ 1293 struct proc *p = l->l_proc; 1294 struct cwdinfo *cwdi = p->p_cwdi; 1295 file_t *fp; 1296 struct vnode *vp; 1297 int flags, cmode; 1298 int type, indx, error; 1299 struct flock lf; 1300 struct nameidata nd; 1301 1302 flags = FFLAGS(SCARG(uap, flags)); 1303 if ((flags & (FREAD | FWRITE)) == 0) 1304 return (EINVAL); 1305 if ((error = fd_allocfile(&fp, &indx)) != 0) 1306 return (error); 1307 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1308 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1309 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1310 SCARG(uap, path)); 1311 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1312 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1313 fd_abort(p, fp, indx); 1314 if ((error == EDUPFD || error == EMOVEFD) && 1315 l->l_dupfd >= 0 && /* XXX from fdopen */ 1316 (error = 1317 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1318 *retval = indx; 1319 return (0); 1320 } 1321 if (error == ERESTART) 1322 error = EINTR; 1323 return (error); 1324 } 1325 1326 l->l_dupfd = 0; 1327 vp = nd.ni_vp; 1328 fp->f_flag = flags & FMASK; 1329 fp->f_type = DTYPE_VNODE; 1330 fp->f_ops = &vnops; 1331 fp->f_data = vp; 1332 if (flags & (O_EXLOCK | O_SHLOCK)) { 1333 lf.l_whence = SEEK_SET; 1334 lf.l_start = 0; 1335 lf.l_len = 0; 1336 if (flags & O_EXLOCK) 1337 lf.l_type = F_WRLCK; 1338 else 1339 lf.l_type = F_RDLCK; 1340 type = F_FLOCK; 1341 if ((flags & FNONBLOCK) == 0) 1342 type |= F_WAIT; 1343 VOP_UNLOCK(vp, 0); 1344 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1345 if (error) { 1346 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1347 fd_abort(p, fp, indx); 1348 return (error); 1349 } 1350 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1351 atomic_or_uint(&fp->f_flag, FHASLOCK); 1352 } 1353 VOP_UNLOCK(vp, 0); 1354 *retval = indx; 1355 fd_affix(p, fp, indx); 1356 return (0); 1357 } 1358 1359 static void 1360 vfs__fhfree(fhandle_t *fhp) 1361 { 1362 size_t fhsize; 1363 1364 if (fhp == NULL) { 1365 return; 1366 } 1367 fhsize = FHANDLE_SIZE(fhp); 1368 kmem_free(fhp, fhsize); 1369 } 1370 1371 /* 1372 * vfs_composefh: compose a filehandle. 1373 */ 1374 1375 int 1376 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1377 { 1378 struct mount *mp; 1379 struct fid *fidp; 1380 int error; 1381 size_t needfhsize; 1382 size_t fidsize; 1383 1384 mp = vp->v_mount; 1385 fidp = NULL; 1386 if (*fh_size < FHANDLE_SIZE_MIN) { 1387 fidsize = 0; 1388 } else { 1389 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1390 if (fhp != NULL) { 1391 memset(fhp, 0, *fh_size); 1392 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1393 fidp = &fhp->fh_fid; 1394 } 1395 } 1396 error = VFS_VPTOFH(vp, fidp, &fidsize); 1397 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1398 if (error == 0 && *fh_size < needfhsize) { 1399 error = E2BIG; 1400 } 1401 *fh_size = needfhsize; 1402 return error; 1403 } 1404 1405 int 1406 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1407 { 1408 struct mount *mp; 1409 fhandle_t *fhp; 1410 size_t fhsize; 1411 size_t fidsize; 1412 int error; 1413 1414 *fhpp = NULL; 1415 mp = vp->v_mount; 1416 fidsize = 0; 1417 error = VFS_VPTOFH(vp, NULL, &fidsize); 1418 KASSERT(error != 0); 1419 if (error != E2BIG) { 1420 goto out; 1421 } 1422 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1423 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1424 if (fhp == NULL) { 1425 error = ENOMEM; 1426 goto out; 1427 } 1428 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1429 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1430 if (error == 0) { 1431 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1432 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1433 *fhpp = fhp; 1434 } else { 1435 kmem_free(fhp, fhsize); 1436 } 1437 out: 1438 return error; 1439 } 1440 1441 void 1442 vfs_composefh_free(fhandle_t *fhp) 1443 { 1444 1445 vfs__fhfree(fhp); 1446 } 1447 1448 /* 1449 * vfs_fhtovp: lookup a vnode by a filehandle. 1450 */ 1451 1452 int 1453 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1454 { 1455 struct mount *mp; 1456 int error; 1457 1458 *vpp = NULL; 1459 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1460 if (mp == NULL) { 1461 error = ESTALE; 1462 goto out; 1463 } 1464 if (mp->mnt_op->vfs_fhtovp == NULL) { 1465 error = EOPNOTSUPP; 1466 goto out; 1467 } 1468 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1469 out: 1470 return error; 1471 } 1472 1473 /* 1474 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1475 * the needed size. 1476 */ 1477 1478 int 1479 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1480 { 1481 fhandle_t *fhp; 1482 int error; 1483 1484 *fhpp = NULL; 1485 if (fhsize > FHANDLE_SIZE_MAX) { 1486 return EINVAL; 1487 } 1488 if (fhsize < FHANDLE_SIZE_MIN) { 1489 return EINVAL; 1490 } 1491 again: 1492 fhp = kmem_alloc(fhsize, KM_SLEEP); 1493 if (fhp == NULL) { 1494 return ENOMEM; 1495 } 1496 error = copyin(ufhp, fhp, fhsize); 1497 if (error == 0) { 1498 /* XXX this check shouldn't be here */ 1499 if (FHANDLE_SIZE(fhp) == fhsize) { 1500 *fhpp = fhp; 1501 return 0; 1502 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1503 /* 1504 * a kludge for nfsv2 padded handles. 1505 */ 1506 size_t sz; 1507 1508 sz = FHANDLE_SIZE(fhp); 1509 kmem_free(fhp, fhsize); 1510 fhsize = sz; 1511 goto again; 1512 } else { 1513 /* 1514 * userland told us wrong size. 1515 */ 1516 error = EINVAL; 1517 } 1518 } 1519 kmem_free(fhp, fhsize); 1520 return error; 1521 } 1522 1523 void 1524 vfs_copyinfh_free(fhandle_t *fhp) 1525 { 1526 1527 vfs__fhfree(fhp); 1528 } 1529 1530 /* 1531 * Get file handle system call 1532 */ 1533 int 1534 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1535 { 1536 /* { 1537 syscallarg(char *) fname; 1538 syscallarg(fhandle_t *) fhp; 1539 syscallarg(size_t *) fh_size; 1540 } */ 1541 struct vnode *vp; 1542 fhandle_t *fh; 1543 int error; 1544 struct nameidata nd; 1545 size_t sz; 1546 size_t usz; 1547 1548 /* 1549 * Must be super user 1550 */ 1551 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1552 0, NULL, NULL, NULL); 1553 if (error) 1554 return (error); 1555 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1556 SCARG(uap, fname)); 1557 error = namei(&nd); 1558 if (error) 1559 return (error); 1560 vp = nd.ni_vp; 1561 error = vfs_composefh_alloc(vp, &fh); 1562 vput(vp); 1563 if (error != 0) { 1564 goto out; 1565 } 1566 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1567 if (error != 0) { 1568 goto out; 1569 } 1570 sz = FHANDLE_SIZE(fh); 1571 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1572 if (error != 0) { 1573 goto out; 1574 } 1575 if (usz >= sz) { 1576 error = copyout(fh, SCARG(uap, fhp), sz); 1577 } else { 1578 error = E2BIG; 1579 } 1580 out: 1581 vfs_composefh_free(fh); 1582 return (error); 1583 } 1584 1585 /* 1586 * Open a file given a file handle. 1587 * 1588 * Check permissions, allocate an open file structure, 1589 * and call the device open routine if any. 1590 */ 1591 1592 int 1593 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1594 register_t *retval) 1595 { 1596 file_t *fp; 1597 struct vnode *vp = NULL; 1598 kauth_cred_t cred = l->l_cred; 1599 file_t *nfp; 1600 int type, indx, error=0; 1601 struct flock lf; 1602 struct vattr va; 1603 fhandle_t *fh; 1604 int flags; 1605 proc_t *p; 1606 1607 p = curproc; 1608 1609 /* 1610 * Must be super user 1611 */ 1612 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1613 0, NULL, NULL, NULL))) 1614 return (error); 1615 1616 flags = FFLAGS(oflags); 1617 if ((flags & (FREAD | FWRITE)) == 0) 1618 return (EINVAL); 1619 if ((flags & O_CREAT)) 1620 return (EINVAL); 1621 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1622 return (error); 1623 fp = nfp; 1624 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1625 if (error != 0) { 1626 goto bad; 1627 } 1628 error = vfs_fhtovp(fh, &vp); 1629 if (error != 0) { 1630 goto bad; 1631 } 1632 1633 /* Now do an effective vn_open */ 1634 1635 if (vp->v_type == VSOCK) { 1636 error = EOPNOTSUPP; 1637 goto bad; 1638 } 1639 error = vn_openchk(vp, cred, flags); 1640 if (error != 0) 1641 goto bad; 1642 if (flags & O_TRUNC) { 1643 VOP_UNLOCK(vp, 0); /* XXX */ 1644 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1645 VATTR_NULL(&va); 1646 va.va_size = 0; 1647 error = VOP_SETATTR(vp, &va, cred); 1648 if (error) 1649 goto bad; 1650 } 1651 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1652 goto bad; 1653 if (flags & FWRITE) { 1654 mutex_enter(&vp->v_interlock); 1655 vp->v_writecount++; 1656 mutex_exit(&vp->v_interlock); 1657 } 1658 1659 /* done with modified vn_open, now finish what sys_open does. */ 1660 1661 fp->f_flag = flags & FMASK; 1662 fp->f_type = DTYPE_VNODE; 1663 fp->f_ops = &vnops; 1664 fp->f_data = vp; 1665 if (flags & (O_EXLOCK | O_SHLOCK)) { 1666 lf.l_whence = SEEK_SET; 1667 lf.l_start = 0; 1668 lf.l_len = 0; 1669 if (flags & O_EXLOCK) 1670 lf.l_type = F_WRLCK; 1671 else 1672 lf.l_type = F_RDLCK; 1673 type = F_FLOCK; 1674 if ((flags & FNONBLOCK) == 0) 1675 type |= F_WAIT; 1676 VOP_UNLOCK(vp, 0); 1677 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1678 if (error) { 1679 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1680 fd_abort(p, fp, indx); 1681 return (error); 1682 } 1683 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1684 atomic_or_uint(&fp->f_flag, FHASLOCK); 1685 } 1686 VOP_UNLOCK(vp, 0); 1687 *retval = indx; 1688 fd_affix(p, fp, indx); 1689 vfs_copyinfh_free(fh); 1690 return (0); 1691 1692 bad: 1693 fd_abort(p, fp, indx); 1694 if (vp != NULL) 1695 vput(vp); 1696 vfs_copyinfh_free(fh); 1697 return (error); 1698 } 1699 1700 int 1701 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1702 { 1703 /* { 1704 syscallarg(const void *) fhp; 1705 syscallarg(size_t) fh_size; 1706 syscallarg(int) flags; 1707 } */ 1708 1709 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1710 SCARG(uap, flags), retval); 1711 } 1712 1713 int 1714 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1715 { 1716 int error; 1717 fhandle_t *fh; 1718 struct vnode *vp; 1719 1720 /* 1721 * Must be super user 1722 */ 1723 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1724 0, NULL, NULL, NULL))) 1725 return (error); 1726 1727 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1728 if (error != 0) 1729 return error; 1730 1731 error = vfs_fhtovp(fh, &vp); 1732 vfs_copyinfh_free(fh); 1733 if (error != 0) 1734 return error; 1735 1736 error = vn_stat(vp, sb); 1737 vput(vp); 1738 return error; 1739 } 1740 1741 1742 /* ARGSUSED */ 1743 int 1744 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1745 { 1746 /* { 1747 syscallarg(const void *) fhp; 1748 syscallarg(size_t) fh_size; 1749 syscallarg(struct stat *) sb; 1750 } */ 1751 struct stat sb; 1752 int error; 1753 1754 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1755 if (error) 1756 return error; 1757 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1758 } 1759 1760 int 1761 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1762 int flags) 1763 { 1764 fhandle_t *fh; 1765 struct mount *mp; 1766 struct vnode *vp; 1767 int error; 1768 1769 /* 1770 * Must be super user 1771 */ 1772 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1773 0, NULL, NULL, NULL))) 1774 return error; 1775 1776 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1777 if (error != 0) 1778 return error; 1779 1780 error = vfs_fhtovp(fh, &vp); 1781 vfs_copyinfh_free(fh); 1782 if (error != 0) 1783 return error; 1784 1785 mp = vp->v_mount; 1786 error = dostatvfs(mp, sb, l, flags, 1); 1787 vput(vp); 1788 return error; 1789 } 1790 1791 /* ARGSUSED */ 1792 int 1793 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1794 { 1795 /* { 1796 syscallarg(const void *) fhp; 1797 syscallarg(size_t) fh_size; 1798 syscallarg(struct statvfs *) buf; 1799 syscallarg(int) flags; 1800 } */ 1801 struct statvfs *sb = STATVFSBUF_GET(); 1802 int error; 1803 1804 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1805 SCARG(uap, flags)); 1806 if (error == 0) 1807 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1808 STATVFSBUF_PUT(sb); 1809 return error; 1810 } 1811 1812 /* 1813 * Create a special file. 1814 */ 1815 /* ARGSUSED */ 1816 int 1817 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1818 register_t *retval) 1819 { 1820 /* { 1821 syscallarg(const char *) path; 1822 syscallarg(mode_t) mode; 1823 syscallarg(dev_t) dev; 1824 } */ 1825 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1826 SCARG(uap, dev), retval); 1827 } 1828 1829 int 1830 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1831 register_t *retval) 1832 { 1833 struct proc *p = l->l_proc; 1834 struct vnode *vp; 1835 struct vattr vattr; 1836 int error, optype; 1837 struct nameidata nd; 1838 char *path; 1839 const char *cpath; 1840 enum uio_seg seg = UIO_USERSPACE; 1841 1842 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1843 0, NULL, NULL, NULL)) != 0) 1844 return (error); 1845 1846 optype = VOP_MKNOD_DESCOFFSET; 1847 1848 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1849 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1850 1851 if ((error = namei(&nd)) != 0) 1852 goto out; 1853 vp = nd.ni_vp; 1854 if (vp != NULL) 1855 error = EEXIST; 1856 else { 1857 VATTR_NULL(&vattr); 1858 /* We will read cwdi->cwdi_cmask unlocked. */ 1859 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1860 vattr.va_rdev = dev; 1861 1862 switch (mode & S_IFMT) { 1863 case S_IFMT: /* used by badsect to flag bad sectors */ 1864 vattr.va_type = VBAD; 1865 break; 1866 case S_IFCHR: 1867 vattr.va_type = VCHR; 1868 break; 1869 case S_IFBLK: 1870 vattr.va_type = VBLK; 1871 break; 1872 case S_IFWHT: 1873 optype = VOP_WHITEOUT_DESCOFFSET; 1874 break; 1875 case S_IFREG: 1876 #if NVERIEXEC > 0 1877 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1878 O_CREAT); 1879 #endif /* NVERIEXEC > 0 */ 1880 vattr.va_type = VREG; 1881 vattr.va_rdev = VNOVAL; 1882 optype = VOP_CREATE_DESCOFFSET; 1883 break; 1884 default: 1885 error = EINVAL; 1886 break; 1887 } 1888 } 1889 if (!error) { 1890 switch (optype) { 1891 case VOP_WHITEOUT_DESCOFFSET: 1892 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1893 if (error) 1894 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1895 vput(nd.ni_dvp); 1896 break; 1897 1898 case VOP_MKNOD_DESCOFFSET: 1899 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1900 &nd.ni_cnd, &vattr); 1901 if (error == 0) 1902 vput(nd.ni_vp); 1903 break; 1904 1905 case VOP_CREATE_DESCOFFSET: 1906 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1907 &nd.ni_cnd, &vattr); 1908 if (error == 0) 1909 vput(nd.ni_vp); 1910 break; 1911 } 1912 } else { 1913 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1914 if (nd.ni_dvp == vp) 1915 vrele(nd.ni_dvp); 1916 else 1917 vput(nd.ni_dvp); 1918 if (vp) 1919 vrele(vp); 1920 } 1921 out: 1922 VERIEXEC_PATH_PUT(path); 1923 return (error); 1924 } 1925 1926 /* 1927 * Create a named pipe. 1928 */ 1929 /* ARGSUSED */ 1930 int 1931 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1932 { 1933 /* { 1934 syscallarg(const char *) path; 1935 syscallarg(int) mode; 1936 } */ 1937 struct proc *p = l->l_proc; 1938 struct vattr vattr; 1939 int error; 1940 struct nameidata nd; 1941 1942 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1943 SCARG(uap, path)); 1944 if ((error = namei(&nd)) != 0) 1945 return (error); 1946 if (nd.ni_vp != NULL) { 1947 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1948 if (nd.ni_dvp == nd.ni_vp) 1949 vrele(nd.ni_dvp); 1950 else 1951 vput(nd.ni_dvp); 1952 vrele(nd.ni_vp); 1953 return (EEXIST); 1954 } 1955 VATTR_NULL(&vattr); 1956 vattr.va_type = VFIFO; 1957 /* We will read cwdi->cwdi_cmask unlocked. */ 1958 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1959 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1960 if (error == 0) 1961 vput(nd.ni_vp); 1962 return (error); 1963 } 1964 1965 /* 1966 * Make a hard file link. 1967 */ 1968 /* ARGSUSED */ 1969 int 1970 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1971 { 1972 /* { 1973 syscallarg(const char *) path; 1974 syscallarg(const char *) link; 1975 } */ 1976 struct vnode *vp; 1977 struct nameidata nd; 1978 int error; 1979 1980 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1981 SCARG(uap, path)); 1982 if ((error = namei(&nd)) != 0) 1983 return (error); 1984 vp = nd.ni_vp; 1985 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1986 SCARG(uap, link)); 1987 if ((error = namei(&nd)) != 0) 1988 goto out; 1989 if (nd.ni_vp) { 1990 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1991 if (nd.ni_dvp == nd.ni_vp) 1992 vrele(nd.ni_dvp); 1993 else 1994 vput(nd.ni_dvp); 1995 vrele(nd.ni_vp); 1996 error = EEXIST; 1997 goto out; 1998 } 1999 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2000 out: 2001 vrele(vp); 2002 return (error); 2003 } 2004 2005 /* 2006 * Make a symbolic link. 2007 */ 2008 /* ARGSUSED */ 2009 int 2010 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2011 { 2012 /* { 2013 syscallarg(const char *) path; 2014 syscallarg(const char *) link; 2015 } */ 2016 struct proc *p = l->l_proc; 2017 struct vattr vattr; 2018 char *path; 2019 int error; 2020 struct nameidata nd; 2021 2022 path = PNBUF_GET(); 2023 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2024 if (error) 2025 goto out; 2026 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2027 SCARG(uap, link)); 2028 if ((error = namei(&nd)) != 0) 2029 goto out; 2030 if (nd.ni_vp) { 2031 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2032 if (nd.ni_dvp == nd.ni_vp) 2033 vrele(nd.ni_dvp); 2034 else 2035 vput(nd.ni_dvp); 2036 vrele(nd.ni_vp); 2037 error = EEXIST; 2038 goto out; 2039 } 2040 VATTR_NULL(&vattr); 2041 vattr.va_type = VLNK; 2042 /* We will read cwdi->cwdi_cmask unlocked. */ 2043 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2044 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2045 if (error == 0) 2046 vput(nd.ni_vp); 2047 out: 2048 PNBUF_PUT(path); 2049 return (error); 2050 } 2051 2052 /* 2053 * Delete a whiteout from the filesystem. 2054 */ 2055 /* ARGSUSED */ 2056 int 2057 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2058 { 2059 /* { 2060 syscallarg(const char *) path; 2061 } */ 2062 int error; 2063 struct nameidata nd; 2064 2065 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2066 UIO_USERSPACE, SCARG(uap, path)); 2067 error = namei(&nd); 2068 if (error) 2069 return (error); 2070 2071 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2072 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2073 if (nd.ni_dvp == nd.ni_vp) 2074 vrele(nd.ni_dvp); 2075 else 2076 vput(nd.ni_dvp); 2077 if (nd.ni_vp) 2078 vrele(nd.ni_vp); 2079 return (EEXIST); 2080 } 2081 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2082 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2083 vput(nd.ni_dvp); 2084 return (error); 2085 } 2086 2087 /* 2088 * Delete a name from the filesystem. 2089 */ 2090 /* ARGSUSED */ 2091 int 2092 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2093 { 2094 /* { 2095 syscallarg(const char *) path; 2096 } */ 2097 2098 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2099 } 2100 2101 int 2102 do_sys_unlink(const char *arg, enum uio_seg seg) 2103 { 2104 struct vnode *vp; 2105 int error; 2106 struct nameidata nd; 2107 char *path; 2108 const char *cpath; 2109 2110 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2111 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2112 2113 if ((error = namei(&nd)) != 0) 2114 goto out; 2115 vp = nd.ni_vp; 2116 2117 /* 2118 * The root of a mounted filesystem cannot be deleted. 2119 */ 2120 if (vp->v_vflag & VV_ROOT) { 2121 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2122 if (nd.ni_dvp == vp) 2123 vrele(nd.ni_dvp); 2124 else 2125 vput(nd.ni_dvp); 2126 vput(vp); 2127 error = EBUSY; 2128 goto out; 2129 } 2130 2131 #if NVERIEXEC > 0 2132 /* Handle remove requests for veriexec entries. */ 2133 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2134 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2135 if (nd.ni_dvp == vp) 2136 vrele(nd.ni_dvp); 2137 else 2138 vput(nd.ni_dvp); 2139 vput(vp); 2140 goto out; 2141 } 2142 #endif /* NVERIEXEC > 0 */ 2143 2144 #ifdef FILEASSOC 2145 (void)fileassoc_file_delete(vp); 2146 #endif /* FILEASSOC */ 2147 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2148 out: 2149 VERIEXEC_PATH_PUT(path); 2150 return (error); 2151 } 2152 2153 /* 2154 * Reposition read/write file offset. 2155 */ 2156 int 2157 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2158 { 2159 /* { 2160 syscallarg(int) fd; 2161 syscallarg(int) pad; 2162 syscallarg(off_t) offset; 2163 syscallarg(int) whence; 2164 } */ 2165 kauth_cred_t cred = l->l_cred; 2166 file_t *fp; 2167 struct vnode *vp; 2168 struct vattr vattr; 2169 off_t newoff; 2170 int error, fd; 2171 2172 fd = SCARG(uap, fd); 2173 2174 if ((fp = fd_getfile(fd)) == NULL) 2175 return (EBADF); 2176 2177 vp = fp->f_data; 2178 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2179 error = ESPIPE; 2180 goto out; 2181 } 2182 2183 switch (SCARG(uap, whence)) { 2184 case SEEK_CUR: 2185 newoff = fp->f_offset + SCARG(uap, offset); 2186 break; 2187 case SEEK_END: 2188 error = VOP_GETATTR(vp, &vattr, cred); 2189 if (error) { 2190 goto out; 2191 } 2192 newoff = SCARG(uap, offset) + vattr.va_size; 2193 break; 2194 case SEEK_SET: 2195 newoff = SCARG(uap, offset); 2196 break; 2197 default: 2198 error = EINVAL; 2199 goto out; 2200 } 2201 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2202 *(off_t *)retval = fp->f_offset = newoff; 2203 } 2204 out: 2205 fd_putfile(fd); 2206 return (error); 2207 } 2208 2209 /* 2210 * Positional read system call. 2211 */ 2212 int 2213 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2214 { 2215 /* { 2216 syscallarg(int) fd; 2217 syscallarg(void *) buf; 2218 syscallarg(size_t) nbyte; 2219 syscallarg(off_t) offset; 2220 } */ 2221 file_t *fp; 2222 struct vnode *vp; 2223 off_t offset; 2224 int error, fd = SCARG(uap, fd); 2225 2226 if ((fp = fd_getfile(fd)) == NULL) 2227 return (EBADF); 2228 2229 if ((fp->f_flag & FREAD) == 0) { 2230 fd_putfile(fd); 2231 return (EBADF); 2232 } 2233 2234 vp = fp->f_data; 2235 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2236 error = ESPIPE; 2237 goto out; 2238 } 2239 2240 offset = SCARG(uap, offset); 2241 2242 /* 2243 * XXX This works because no file systems actually 2244 * XXX take any action on the seek operation. 2245 */ 2246 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2247 goto out; 2248 2249 /* dofileread() will unuse the descriptor for us */ 2250 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2251 &offset, 0, retval)); 2252 2253 out: 2254 fd_putfile(fd); 2255 return (error); 2256 } 2257 2258 /* 2259 * Positional scatter read system call. 2260 */ 2261 int 2262 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2263 { 2264 /* { 2265 syscallarg(int) fd; 2266 syscallarg(const struct iovec *) iovp; 2267 syscallarg(int) iovcnt; 2268 syscallarg(off_t) offset; 2269 } */ 2270 off_t offset = SCARG(uap, offset); 2271 2272 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2273 SCARG(uap, iovcnt), &offset, 0, retval); 2274 } 2275 2276 /* 2277 * Positional write system call. 2278 */ 2279 int 2280 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2281 { 2282 /* { 2283 syscallarg(int) fd; 2284 syscallarg(const void *) buf; 2285 syscallarg(size_t) nbyte; 2286 syscallarg(off_t) offset; 2287 } */ 2288 file_t *fp; 2289 struct vnode *vp; 2290 off_t offset; 2291 int error, fd = SCARG(uap, fd); 2292 2293 if ((fp = fd_getfile(fd)) == NULL) 2294 return (EBADF); 2295 2296 if ((fp->f_flag & FWRITE) == 0) { 2297 fd_putfile(fd); 2298 return (EBADF); 2299 } 2300 2301 vp = fp->f_data; 2302 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2303 error = ESPIPE; 2304 goto out; 2305 } 2306 2307 offset = SCARG(uap, offset); 2308 2309 /* 2310 * XXX This works because no file systems actually 2311 * XXX take any action on the seek operation. 2312 */ 2313 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2314 goto out; 2315 2316 /* dofilewrite() will unuse the descriptor for us */ 2317 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2318 &offset, 0, retval)); 2319 2320 out: 2321 fd_putfile(fd); 2322 return (error); 2323 } 2324 2325 /* 2326 * Positional gather write system call. 2327 */ 2328 int 2329 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2330 { 2331 /* { 2332 syscallarg(int) fd; 2333 syscallarg(const struct iovec *) iovp; 2334 syscallarg(int) iovcnt; 2335 syscallarg(off_t) offset; 2336 } */ 2337 off_t offset = SCARG(uap, offset); 2338 2339 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2340 SCARG(uap, iovcnt), &offset, 0, retval); 2341 } 2342 2343 /* 2344 * Check access permissions. 2345 */ 2346 int 2347 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2348 { 2349 /* { 2350 syscallarg(const char *) path; 2351 syscallarg(int) flags; 2352 } */ 2353 kauth_cred_t cred; 2354 struct vnode *vp; 2355 int error, flags; 2356 struct nameidata nd; 2357 2358 cred = kauth_cred_dup(l->l_cred); 2359 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2360 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2361 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2362 SCARG(uap, path)); 2363 /* Override default credentials */ 2364 nd.ni_cnd.cn_cred = cred; 2365 if ((error = namei(&nd)) != 0) 2366 goto out; 2367 vp = nd.ni_vp; 2368 2369 /* Flags == 0 means only check for existence. */ 2370 if (SCARG(uap, flags)) { 2371 flags = 0; 2372 if (SCARG(uap, flags) & R_OK) 2373 flags |= VREAD; 2374 if (SCARG(uap, flags) & W_OK) 2375 flags |= VWRITE; 2376 if (SCARG(uap, flags) & X_OK) 2377 flags |= VEXEC; 2378 2379 error = VOP_ACCESS(vp, flags, cred); 2380 if (!error && (flags & VWRITE)) 2381 error = vn_writechk(vp); 2382 } 2383 vput(vp); 2384 out: 2385 kauth_cred_free(cred); 2386 return (error); 2387 } 2388 2389 /* 2390 * Common code for all sys_stat functions, including compat versions. 2391 */ 2392 int 2393 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2394 { 2395 int error; 2396 struct nameidata nd; 2397 2398 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2399 UIO_USERSPACE, path); 2400 error = namei(&nd); 2401 if (error != 0) 2402 return error; 2403 error = vn_stat(nd.ni_vp, sb); 2404 vput(nd.ni_vp); 2405 return error; 2406 } 2407 2408 /* 2409 * Get file status; this version follows links. 2410 */ 2411 /* ARGSUSED */ 2412 int 2413 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2414 { 2415 /* { 2416 syscallarg(const char *) path; 2417 syscallarg(struct stat *) ub; 2418 } */ 2419 struct stat sb; 2420 int error; 2421 2422 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2423 if (error) 2424 return error; 2425 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2426 } 2427 2428 /* 2429 * Get file status; this version does not follow links. 2430 */ 2431 /* ARGSUSED */ 2432 int 2433 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2434 { 2435 /* { 2436 syscallarg(const char *) path; 2437 syscallarg(struct stat *) ub; 2438 } */ 2439 struct stat sb; 2440 int error; 2441 2442 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2443 if (error) 2444 return error; 2445 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2446 } 2447 2448 /* 2449 * Get configurable pathname variables. 2450 */ 2451 /* ARGSUSED */ 2452 int 2453 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2454 { 2455 /* { 2456 syscallarg(const char *) path; 2457 syscallarg(int) name; 2458 } */ 2459 int error; 2460 struct nameidata nd; 2461 2462 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2463 SCARG(uap, path)); 2464 if ((error = namei(&nd)) != 0) 2465 return (error); 2466 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2467 vput(nd.ni_vp); 2468 return (error); 2469 } 2470 2471 /* 2472 * Return target name of a symbolic link. 2473 */ 2474 /* ARGSUSED */ 2475 int 2476 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2477 { 2478 /* { 2479 syscallarg(const char *) path; 2480 syscallarg(char *) buf; 2481 syscallarg(size_t) count; 2482 } */ 2483 struct vnode *vp; 2484 struct iovec aiov; 2485 struct uio auio; 2486 int error; 2487 struct nameidata nd; 2488 2489 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2490 SCARG(uap, path)); 2491 if ((error = namei(&nd)) != 0) 2492 return (error); 2493 vp = nd.ni_vp; 2494 if (vp->v_type != VLNK) 2495 error = EINVAL; 2496 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2497 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2498 aiov.iov_base = SCARG(uap, buf); 2499 aiov.iov_len = SCARG(uap, count); 2500 auio.uio_iov = &aiov; 2501 auio.uio_iovcnt = 1; 2502 auio.uio_offset = 0; 2503 auio.uio_rw = UIO_READ; 2504 KASSERT(l == curlwp); 2505 auio.uio_vmspace = l->l_proc->p_vmspace; 2506 auio.uio_resid = SCARG(uap, count); 2507 error = VOP_READLINK(vp, &auio, l->l_cred); 2508 } 2509 vput(vp); 2510 *retval = SCARG(uap, count) - auio.uio_resid; 2511 return (error); 2512 } 2513 2514 /* 2515 * Change flags of a file given a path name. 2516 */ 2517 /* ARGSUSED */ 2518 int 2519 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2520 { 2521 /* { 2522 syscallarg(const char *) path; 2523 syscallarg(u_long) flags; 2524 } */ 2525 struct vnode *vp; 2526 int error; 2527 struct nameidata nd; 2528 2529 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2530 SCARG(uap, path)); 2531 if ((error = namei(&nd)) != 0) 2532 return (error); 2533 vp = nd.ni_vp; 2534 error = change_flags(vp, SCARG(uap, flags), l); 2535 vput(vp); 2536 return (error); 2537 } 2538 2539 /* 2540 * Change flags of a file given a file descriptor. 2541 */ 2542 /* ARGSUSED */ 2543 int 2544 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2545 { 2546 /* { 2547 syscallarg(int) fd; 2548 syscallarg(u_long) flags; 2549 } */ 2550 struct vnode *vp; 2551 file_t *fp; 2552 int error; 2553 2554 /* fd_getvnode() will use the descriptor for us */ 2555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2556 return (error); 2557 vp = fp->f_data; 2558 error = change_flags(vp, SCARG(uap, flags), l); 2559 VOP_UNLOCK(vp, 0); 2560 fd_putfile(SCARG(uap, fd)); 2561 return (error); 2562 } 2563 2564 /* 2565 * Change flags of a file given a path name; this version does 2566 * not follow links. 2567 */ 2568 int 2569 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2570 { 2571 /* { 2572 syscallarg(const char *) path; 2573 syscallarg(u_long) flags; 2574 } */ 2575 struct vnode *vp; 2576 int error; 2577 struct nameidata nd; 2578 2579 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2580 SCARG(uap, path)); 2581 if ((error = namei(&nd)) != 0) 2582 return (error); 2583 vp = nd.ni_vp; 2584 error = change_flags(vp, SCARG(uap, flags), l); 2585 vput(vp); 2586 return (error); 2587 } 2588 2589 /* 2590 * Common routine to change flags of a file. 2591 */ 2592 int 2593 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2594 { 2595 struct vattr vattr; 2596 int error; 2597 2598 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2599 /* 2600 * Non-superusers cannot change the flags on devices, even if they 2601 * own them. 2602 */ 2603 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2604 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2605 goto out; 2606 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2607 error = EINVAL; 2608 goto out; 2609 } 2610 } 2611 VATTR_NULL(&vattr); 2612 vattr.va_flags = flags; 2613 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2614 out: 2615 return (error); 2616 } 2617 2618 /* 2619 * Change mode of a file given path name; this version follows links. 2620 */ 2621 /* ARGSUSED */ 2622 int 2623 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2624 { 2625 /* { 2626 syscallarg(const char *) path; 2627 syscallarg(int) mode; 2628 } */ 2629 int error; 2630 struct nameidata nd; 2631 2632 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2633 SCARG(uap, path)); 2634 if ((error = namei(&nd)) != 0) 2635 return (error); 2636 2637 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2638 2639 vrele(nd.ni_vp); 2640 return (error); 2641 } 2642 2643 /* 2644 * Change mode of a file given a file descriptor. 2645 */ 2646 /* ARGSUSED */ 2647 int 2648 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2649 { 2650 /* { 2651 syscallarg(int) fd; 2652 syscallarg(int) mode; 2653 } */ 2654 file_t *fp; 2655 int error; 2656 2657 /* fd_getvnode() will use the descriptor for us */ 2658 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2659 return (error); 2660 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2661 fd_putfile(SCARG(uap, fd)); 2662 return (error); 2663 } 2664 2665 /* 2666 * Change mode of a file given path name; this version does not follow links. 2667 */ 2668 /* ARGSUSED */ 2669 int 2670 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2671 { 2672 /* { 2673 syscallarg(const char *) path; 2674 syscallarg(int) mode; 2675 } */ 2676 int error; 2677 struct nameidata nd; 2678 2679 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2680 SCARG(uap, path)); 2681 if ((error = namei(&nd)) != 0) 2682 return (error); 2683 2684 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2685 2686 vrele(nd.ni_vp); 2687 return (error); 2688 } 2689 2690 /* 2691 * Common routine to set mode given a vnode. 2692 */ 2693 static int 2694 change_mode(struct vnode *vp, int mode, struct lwp *l) 2695 { 2696 struct vattr vattr; 2697 int error; 2698 2699 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2700 VATTR_NULL(&vattr); 2701 vattr.va_mode = mode & ALLPERMS; 2702 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2703 VOP_UNLOCK(vp, 0); 2704 return (error); 2705 } 2706 2707 /* 2708 * Set ownership given a path name; this version follows links. 2709 */ 2710 /* ARGSUSED */ 2711 int 2712 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2713 { 2714 /* { 2715 syscallarg(const char *) path; 2716 syscallarg(uid_t) uid; 2717 syscallarg(gid_t) gid; 2718 } */ 2719 int error; 2720 struct nameidata nd; 2721 2722 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2723 SCARG(uap, path)); 2724 if ((error = namei(&nd)) != 0) 2725 return (error); 2726 2727 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2728 2729 vrele(nd.ni_vp); 2730 return (error); 2731 } 2732 2733 /* 2734 * Set ownership given a path name; this version follows links. 2735 * Provides POSIX semantics. 2736 */ 2737 /* ARGSUSED */ 2738 int 2739 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2740 { 2741 /* { 2742 syscallarg(const char *) path; 2743 syscallarg(uid_t) uid; 2744 syscallarg(gid_t) gid; 2745 } */ 2746 int error; 2747 struct nameidata nd; 2748 2749 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2750 SCARG(uap, path)); 2751 if ((error = namei(&nd)) != 0) 2752 return (error); 2753 2754 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2755 2756 vrele(nd.ni_vp); 2757 return (error); 2758 } 2759 2760 /* 2761 * Set ownership given a file descriptor. 2762 */ 2763 /* ARGSUSED */ 2764 int 2765 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2766 { 2767 /* { 2768 syscallarg(int) fd; 2769 syscallarg(uid_t) uid; 2770 syscallarg(gid_t) gid; 2771 } */ 2772 int error; 2773 file_t *fp; 2774 2775 /* fd_getvnode() will use the descriptor for us */ 2776 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2777 return (error); 2778 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2779 l, 0); 2780 fd_putfile(SCARG(uap, fd)); 2781 return (error); 2782 } 2783 2784 /* 2785 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2786 */ 2787 /* ARGSUSED */ 2788 int 2789 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2790 { 2791 /* { 2792 syscallarg(int) fd; 2793 syscallarg(uid_t) uid; 2794 syscallarg(gid_t) gid; 2795 } */ 2796 int error; 2797 file_t *fp; 2798 2799 /* fd_getvnode() will use the descriptor for us */ 2800 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2801 return (error); 2802 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2803 l, 1); 2804 fd_putfile(SCARG(uap, fd)); 2805 return (error); 2806 } 2807 2808 /* 2809 * Set ownership given a path name; this version does not follow links. 2810 */ 2811 /* ARGSUSED */ 2812 int 2813 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2814 { 2815 /* { 2816 syscallarg(const char *) path; 2817 syscallarg(uid_t) uid; 2818 syscallarg(gid_t) gid; 2819 } */ 2820 int error; 2821 struct nameidata nd; 2822 2823 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2824 SCARG(uap, path)); 2825 if ((error = namei(&nd)) != 0) 2826 return (error); 2827 2828 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2829 2830 vrele(nd.ni_vp); 2831 return (error); 2832 } 2833 2834 /* 2835 * Set ownership given a path name; this version does not follow links. 2836 * Provides POSIX/XPG semantics. 2837 */ 2838 /* ARGSUSED */ 2839 int 2840 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2841 { 2842 /* { 2843 syscallarg(const char *) path; 2844 syscallarg(uid_t) uid; 2845 syscallarg(gid_t) gid; 2846 } */ 2847 int error; 2848 struct nameidata nd; 2849 2850 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2851 SCARG(uap, path)); 2852 if ((error = namei(&nd)) != 0) 2853 return (error); 2854 2855 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2856 2857 vrele(nd.ni_vp); 2858 return (error); 2859 } 2860 2861 /* 2862 * Common routine to set ownership given a vnode. 2863 */ 2864 static int 2865 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2866 int posix_semantics) 2867 { 2868 struct vattr vattr; 2869 mode_t newmode; 2870 int error; 2871 2872 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2873 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2874 goto out; 2875 2876 #define CHANGED(x) ((int)(x) != -1) 2877 newmode = vattr.va_mode; 2878 if (posix_semantics) { 2879 /* 2880 * POSIX/XPG semantics: if the caller is not the super-user, 2881 * clear set-user-id and set-group-id bits. Both POSIX and 2882 * the XPG consider the behaviour for calls by the super-user 2883 * implementation-defined; we leave the set-user-id and set- 2884 * group-id settings intact in that case. 2885 */ 2886 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2887 NULL) != 0) 2888 newmode &= ~(S_ISUID | S_ISGID); 2889 } else { 2890 /* 2891 * NetBSD semantics: when changing owner and/or group, 2892 * clear the respective bit(s). 2893 */ 2894 if (CHANGED(uid)) 2895 newmode &= ~S_ISUID; 2896 if (CHANGED(gid)) 2897 newmode &= ~S_ISGID; 2898 } 2899 /* Update va_mode iff altered. */ 2900 if (vattr.va_mode == newmode) 2901 newmode = VNOVAL; 2902 2903 VATTR_NULL(&vattr); 2904 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2905 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2906 vattr.va_mode = newmode; 2907 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2908 #undef CHANGED 2909 2910 out: 2911 VOP_UNLOCK(vp, 0); 2912 return (error); 2913 } 2914 2915 /* 2916 * Set the access and modification times given a path name; this 2917 * version follows links. 2918 */ 2919 /* ARGSUSED */ 2920 int 2921 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2922 register_t *retval) 2923 { 2924 /* { 2925 syscallarg(const char *) path; 2926 syscallarg(const struct timeval *) tptr; 2927 } */ 2928 2929 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2930 SCARG(uap, tptr), UIO_USERSPACE); 2931 } 2932 2933 /* 2934 * Set the access and modification times given a file descriptor. 2935 */ 2936 /* ARGSUSED */ 2937 int 2938 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2939 register_t *retval) 2940 { 2941 /* { 2942 syscallarg(int) fd; 2943 syscallarg(const struct timeval *) tptr; 2944 } */ 2945 int error; 2946 file_t *fp; 2947 2948 /* fd_getvnode() will use the descriptor for us */ 2949 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2950 return (error); 2951 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2952 UIO_USERSPACE); 2953 fd_putfile(SCARG(uap, fd)); 2954 return (error); 2955 } 2956 2957 /* 2958 * Set the access and modification times given a path name; this 2959 * version does not follow links. 2960 */ 2961 int 2962 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2963 register_t *retval) 2964 { 2965 /* { 2966 syscallarg(const char *) path; 2967 syscallarg(const struct timeval *) tptr; 2968 } */ 2969 2970 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2971 SCARG(uap, tptr), UIO_USERSPACE); 2972 } 2973 2974 /* 2975 * Common routine to set access and modification times given a vnode. 2976 */ 2977 int 2978 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2979 const struct timeval *tptr, enum uio_seg seg) 2980 { 2981 struct vattr vattr; 2982 struct nameidata nd; 2983 int error; 2984 bool vanull, setbirthtime; 2985 struct timespec ts[2]; 2986 2987 if (tptr == NULL) { 2988 vanull = true; 2989 nanotime(&ts[0]); 2990 ts[1] = ts[0]; 2991 } else { 2992 struct timeval tv[2]; 2993 2994 vanull = false; 2995 if (seg != UIO_SYSSPACE) { 2996 error = copyin(tptr, tv, sizeof (tv)); 2997 if (error != 0) 2998 return error; 2999 tptr = tv; 3000 } 3001 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3002 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3003 } 3004 3005 if (vp == NULL) { 3006 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3007 if ((error = namei(&nd)) != 0) 3008 return error; 3009 vp = nd.ni_vp; 3010 } else 3011 nd.ni_vp = NULL; 3012 3013 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3014 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3015 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3016 VATTR_NULL(&vattr); 3017 vattr.va_atime = ts[0]; 3018 vattr.va_mtime = ts[1]; 3019 if (setbirthtime) 3020 vattr.va_birthtime = ts[1]; 3021 if (vanull) 3022 vattr.va_vaflags |= VA_UTIMES_NULL; 3023 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3024 VOP_UNLOCK(vp, 0); 3025 3026 if (nd.ni_vp != NULL) 3027 vrele(nd.ni_vp); 3028 3029 return error; 3030 } 3031 3032 /* 3033 * Truncate a file given its path name. 3034 */ 3035 /* ARGSUSED */ 3036 int 3037 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3038 { 3039 /* { 3040 syscallarg(const char *) path; 3041 syscallarg(int) pad; 3042 syscallarg(off_t) length; 3043 } */ 3044 struct vnode *vp; 3045 struct vattr vattr; 3046 int error; 3047 struct nameidata nd; 3048 3049 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3050 SCARG(uap, path)); 3051 if ((error = namei(&nd)) != 0) 3052 return (error); 3053 vp = nd.ni_vp; 3054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3055 if (vp->v_type == VDIR) 3056 error = EISDIR; 3057 else if ((error = vn_writechk(vp)) == 0 && 3058 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3059 VATTR_NULL(&vattr); 3060 vattr.va_size = SCARG(uap, length); 3061 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3062 } 3063 vput(vp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Truncate a file given a file descriptor. 3069 */ 3070 /* ARGSUSED */ 3071 int 3072 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3073 { 3074 /* { 3075 syscallarg(int) fd; 3076 syscallarg(int) pad; 3077 syscallarg(off_t) length; 3078 } */ 3079 struct vattr vattr; 3080 struct vnode *vp; 3081 file_t *fp; 3082 int error; 3083 3084 /* fd_getvnode() will use the descriptor for us */ 3085 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3086 return (error); 3087 if ((fp->f_flag & FWRITE) == 0) { 3088 error = EINVAL; 3089 goto out; 3090 } 3091 vp = fp->f_data; 3092 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3093 if (vp->v_type == VDIR) 3094 error = EISDIR; 3095 else if ((error = vn_writechk(vp)) == 0) { 3096 VATTR_NULL(&vattr); 3097 vattr.va_size = SCARG(uap, length); 3098 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3099 } 3100 VOP_UNLOCK(vp, 0); 3101 out: 3102 fd_putfile(SCARG(uap, fd)); 3103 return (error); 3104 } 3105 3106 /* 3107 * Sync an open file. 3108 */ 3109 /* ARGSUSED */ 3110 int 3111 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3112 { 3113 /* { 3114 syscallarg(int) fd; 3115 } */ 3116 struct vnode *vp; 3117 file_t *fp; 3118 int error; 3119 3120 /* fd_getvnode() will use the descriptor for us */ 3121 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3122 return (error); 3123 vp = fp->f_data; 3124 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3125 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3126 VOP_UNLOCK(vp, 0); 3127 fd_putfile(SCARG(uap, fd)); 3128 return (error); 3129 } 3130 3131 /* 3132 * Sync a range of file data. API modeled after that found in AIX. 3133 * 3134 * FDATASYNC indicates that we need only save enough metadata to be able 3135 * to re-read the written data. Note we duplicate AIX's requirement that 3136 * the file be open for writing. 3137 */ 3138 /* ARGSUSED */ 3139 int 3140 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3141 { 3142 /* { 3143 syscallarg(int) fd; 3144 syscallarg(int) flags; 3145 syscallarg(off_t) start; 3146 syscallarg(off_t) length; 3147 } */ 3148 struct vnode *vp; 3149 file_t *fp; 3150 int flags, nflags; 3151 off_t s, e, len; 3152 int error; 3153 3154 /* fd_getvnode() will use the descriptor for us */ 3155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3156 return (error); 3157 3158 if ((fp->f_flag & FWRITE) == 0) { 3159 error = EBADF; 3160 goto out; 3161 } 3162 3163 flags = SCARG(uap, flags); 3164 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3165 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3166 error = EINVAL; 3167 goto out; 3168 } 3169 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3170 if (flags & FDATASYNC) 3171 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3172 else 3173 nflags = FSYNC_WAIT; 3174 if (flags & FDISKSYNC) 3175 nflags |= FSYNC_CACHE; 3176 3177 len = SCARG(uap, length); 3178 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3179 if (len) { 3180 s = SCARG(uap, start); 3181 e = s + len; 3182 if (e < s) { 3183 error = EINVAL; 3184 goto out; 3185 } 3186 } else { 3187 e = 0; 3188 s = 0; 3189 } 3190 3191 vp = fp->f_data; 3192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3193 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3194 VOP_UNLOCK(vp, 0); 3195 out: 3196 fd_putfile(SCARG(uap, fd)); 3197 return (error); 3198 } 3199 3200 /* 3201 * Sync the data of an open file. 3202 */ 3203 /* ARGSUSED */ 3204 int 3205 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3206 { 3207 /* { 3208 syscallarg(int) fd; 3209 } */ 3210 struct vnode *vp; 3211 file_t *fp; 3212 int error; 3213 3214 /* fd_getvnode() will use the descriptor for us */ 3215 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3216 return (error); 3217 if ((fp->f_flag & FWRITE) == 0) { 3218 fd_putfile(SCARG(uap, fd)); 3219 return (EBADF); 3220 } 3221 vp = fp->f_data; 3222 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3223 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3224 VOP_UNLOCK(vp, 0); 3225 fd_putfile(SCARG(uap, fd)); 3226 return (error); 3227 } 3228 3229 /* 3230 * Rename files, (standard) BSD semantics frontend. 3231 */ 3232 /* ARGSUSED */ 3233 int 3234 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3235 { 3236 /* { 3237 syscallarg(const char *) from; 3238 syscallarg(const char *) to; 3239 } */ 3240 3241 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3242 } 3243 3244 /* 3245 * Rename files, POSIX semantics frontend. 3246 */ 3247 /* ARGSUSED */ 3248 int 3249 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3250 { 3251 /* { 3252 syscallarg(const char *) from; 3253 syscallarg(const char *) to; 3254 } */ 3255 3256 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3257 } 3258 3259 /* 3260 * Rename files. Source and destination must either both be directories, 3261 * or both not be directories. If target is a directory, it must be empty. 3262 * If `from' and `to' refer to the same object, the value of the `retain' 3263 * argument is used to determine whether `from' will be 3264 * 3265 * (retain == 0) deleted unless `from' and `to' refer to the same 3266 * object in the file system's name space (BSD). 3267 * (retain == 1) always retained (POSIX). 3268 */ 3269 int 3270 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3271 { 3272 struct vnode *tvp, *fvp, *tdvp; 3273 struct nameidata fromnd, tond; 3274 struct mount *fs; 3275 struct lwp *l = curlwp; 3276 struct proc *p; 3277 uint32_t saveflag; 3278 int error; 3279 3280 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3281 seg, from); 3282 if ((error = namei(&fromnd)) != 0) 3283 return (error); 3284 if (fromnd.ni_dvp != fromnd.ni_vp) 3285 VOP_UNLOCK(fromnd.ni_dvp, 0); 3286 fvp = fromnd.ni_vp; 3287 3288 fs = fvp->v_mount; 3289 error = VFS_RENAMELOCK_ENTER(fs); 3290 if (error) { 3291 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3292 vrele(fromnd.ni_dvp); 3293 vrele(fvp); 3294 goto out1; 3295 } 3296 3297 /* 3298 * close, partially, yet another race - ideally we should only 3299 * go as far as getting fromnd.ni_dvp before getting the per-fs 3300 * lock, and then continue to get fromnd.ni_vp, but we can't do 3301 * that with namei as it stands. 3302 * 3303 * This still won't prevent rmdir from nuking fromnd.ni_vp 3304 * under us. The real fix is to get the locks in the right 3305 * order and do the lookups in the right places, but that's a 3306 * major rototill. 3307 * 3308 * Preserve the SAVESTART in cn_flags, because who knows what 3309 * might happen if we don't. 3310 * 3311 * Note: this logic (as well as this whole function) is cloned 3312 * in nfs_serv.c. Proceed accordingly. 3313 */ 3314 vrele(fvp); 3315 if ((fromnd.ni_cnd.cn_namelen == 1 && 3316 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3317 (fromnd.ni_cnd.cn_namelen == 2 && 3318 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3319 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3320 error = EINVAL; 3321 VFS_RENAMELOCK_EXIT(fs); 3322 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3323 vrele(fromnd.ni_dvp); 3324 goto out1; 3325 } 3326 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3327 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3328 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3329 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3330 fromnd.ni_cnd.cn_flags |= saveflag; 3331 if (error) { 3332 VOP_UNLOCK(fromnd.ni_dvp, 0); 3333 VFS_RENAMELOCK_EXIT(fs); 3334 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3335 vrele(fromnd.ni_dvp); 3336 goto out1; 3337 } 3338 VOP_UNLOCK(fromnd.ni_vp, 0); 3339 if (fromnd.ni_dvp != fromnd.ni_vp) 3340 VOP_UNLOCK(fromnd.ni_dvp, 0); 3341 fvp = fromnd.ni_vp; 3342 3343 NDINIT(&tond, RENAME, 3344 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3345 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3346 seg, to); 3347 if ((error = namei(&tond)) != 0) { 3348 VFS_RENAMELOCK_EXIT(fs); 3349 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3350 vrele(fromnd.ni_dvp); 3351 vrele(fvp); 3352 goto out1; 3353 } 3354 tdvp = tond.ni_dvp; 3355 tvp = tond.ni_vp; 3356 3357 if (tvp != NULL) { 3358 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3359 error = ENOTDIR; 3360 goto out; 3361 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3362 error = EISDIR; 3363 goto out; 3364 } 3365 } 3366 3367 if (fvp == tdvp) 3368 error = EINVAL; 3369 3370 /* 3371 * Source and destination refer to the same object. 3372 */ 3373 if (fvp == tvp) { 3374 if (retain) 3375 error = -1; 3376 else if (fromnd.ni_dvp == tdvp && 3377 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3378 !memcmp(fromnd.ni_cnd.cn_nameptr, 3379 tond.ni_cnd.cn_nameptr, 3380 fromnd.ni_cnd.cn_namelen)) 3381 error = -1; 3382 } 3383 3384 #if NVERIEXEC > 0 3385 if (!error) { 3386 char *f1, *f2; 3387 size_t f1_len; 3388 size_t f2_len; 3389 3390 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3391 f1 = kmem_alloc(f1_len, KM_SLEEP); 3392 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3393 3394 f2_len = tond.ni_cnd.cn_namelen + 1; 3395 f2 = kmem_alloc(f2_len, KM_SLEEP); 3396 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3397 3398 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3399 3400 kmem_free(f1, f1_len); 3401 kmem_free(f2, f2_len); 3402 } 3403 #endif /* NVERIEXEC > 0 */ 3404 3405 out: 3406 p = l->l_proc; 3407 if (!error) { 3408 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3409 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3410 VFS_RENAMELOCK_EXIT(fs); 3411 } else { 3412 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3413 if (tdvp == tvp) 3414 vrele(tdvp); 3415 else 3416 vput(tdvp); 3417 if (tvp) 3418 vput(tvp); 3419 VFS_RENAMELOCK_EXIT(fs); 3420 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3421 vrele(fromnd.ni_dvp); 3422 vrele(fvp); 3423 } 3424 vrele(tond.ni_startdir); 3425 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3426 out1: 3427 if (fromnd.ni_startdir) 3428 vrele(fromnd.ni_startdir); 3429 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3430 return (error == -1 ? 0 : error); 3431 } 3432 3433 /* 3434 * Make a directory file. 3435 */ 3436 /* ARGSUSED */ 3437 int 3438 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3439 { 3440 /* { 3441 syscallarg(const char *) path; 3442 syscallarg(int) mode; 3443 } */ 3444 struct proc *p = l->l_proc; 3445 struct vnode *vp; 3446 struct vattr vattr; 3447 int error; 3448 struct nameidata nd; 3449 3450 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3451 SCARG(uap, path)); 3452 if ((error = namei(&nd)) != 0) 3453 return (error); 3454 vp = nd.ni_vp; 3455 if (vp != NULL) { 3456 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3457 if (nd.ni_dvp == vp) 3458 vrele(nd.ni_dvp); 3459 else 3460 vput(nd.ni_dvp); 3461 vrele(vp); 3462 return (EEXIST); 3463 } 3464 VATTR_NULL(&vattr); 3465 vattr.va_type = VDIR; 3466 /* We will read cwdi->cwdi_cmask unlocked. */ 3467 vattr.va_mode = 3468 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3469 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3470 if (!error) 3471 vput(nd.ni_vp); 3472 return (error); 3473 } 3474 3475 /* 3476 * Remove a directory file. 3477 */ 3478 /* ARGSUSED */ 3479 int 3480 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3481 { 3482 /* { 3483 syscallarg(const char *) path; 3484 } */ 3485 struct vnode *vp; 3486 int error; 3487 struct nameidata nd; 3488 3489 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3490 SCARG(uap, path)); 3491 if ((error = namei(&nd)) != 0) 3492 return (error); 3493 vp = nd.ni_vp; 3494 if (vp->v_type != VDIR) { 3495 error = ENOTDIR; 3496 goto out; 3497 } 3498 /* 3499 * No rmdir "." please. 3500 */ 3501 if (nd.ni_dvp == vp) { 3502 error = EINVAL; 3503 goto out; 3504 } 3505 /* 3506 * The root of a mounted filesystem cannot be deleted. 3507 */ 3508 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3509 error = EBUSY; 3510 goto out; 3511 } 3512 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3513 return (error); 3514 3515 out: 3516 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3517 if (nd.ni_dvp == vp) 3518 vrele(nd.ni_dvp); 3519 else 3520 vput(nd.ni_dvp); 3521 vput(vp); 3522 return (error); 3523 } 3524 3525 /* 3526 * Read a block of directory entries in a file system independent format. 3527 */ 3528 int 3529 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3530 { 3531 /* { 3532 syscallarg(int) fd; 3533 syscallarg(char *) buf; 3534 syscallarg(size_t) count; 3535 } */ 3536 file_t *fp; 3537 int error, done; 3538 3539 /* fd_getvnode() will use the descriptor for us */ 3540 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3541 return (error); 3542 if ((fp->f_flag & FREAD) == 0) { 3543 error = EBADF; 3544 goto out; 3545 } 3546 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3547 SCARG(uap, count), &done, l, 0, 0); 3548 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3549 *retval = done; 3550 out: 3551 fd_putfile(SCARG(uap, fd)); 3552 return (error); 3553 } 3554 3555 /* 3556 * Set the mode mask for creation of filesystem nodes. 3557 */ 3558 int 3559 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3560 { 3561 /* { 3562 syscallarg(mode_t) newmask; 3563 } */ 3564 struct proc *p = l->l_proc; 3565 struct cwdinfo *cwdi; 3566 3567 /* 3568 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3569 * important is that we serialize changes to the mask. The 3570 * rw_exit() will issue a write memory barrier on our behalf, 3571 * and force the changes out to other CPUs (as it must use an 3572 * atomic operation, draining the local CPU's store buffers). 3573 */ 3574 cwdi = p->p_cwdi; 3575 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3576 *retval = cwdi->cwdi_cmask; 3577 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3578 rw_exit(&cwdi->cwdi_lock); 3579 3580 return (0); 3581 } 3582 3583 int 3584 dorevoke(struct vnode *vp, kauth_cred_t cred) 3585 { 3586 struct vattr vattr; 3587 int error; 3588 3589 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3590 return error; 3591 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3592 (error = kauth_authorize_generic(cred, 3593 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3594 VOP_REVOKE(vp, REVOKEALL); 3595 return (error); 3596 } 3597 3598 /* 3599 * Void all references to file by ripping underlying filesystem 3600 * away from vnode. 3601 */ 3602 /* ARGSUSED */ 3603 int 3604 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3605 { 3606 /* { 3607 syscallarg(const char *) path; 3608 } */ 3609 struct vnode *vp; 3610 int error; 3611 struct nameidata nd; 3612 3613 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3614 SCARG(uap, path)); 3615 if ((error = namei(&nd)) != 0) 3616 return (error); 3617 vp = nd.ni_vp; 3618 error = dorevoke(vp, l->l_cred); 3619 vrele(vp); 3620 return (error); 3621 } 3622