1 /* $NetBSD: vfs_syscalls.c,v 1.403 2010/01/15 01:00:46 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.403 2010/01/15 01:00:46 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0) { 173 error = EOPNOTSUPP; /* Needs translation */ 174 goto out; 175 } 176 177 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 179 if (error) 180 goto out; 181 182 if (vfs_busy(mp, NULL)) { 183 error = EPERM; 184 goto out; 185 } 186 187 mutex_enter(&mp->mnt_updating); 188 189 mp->mnt_flag &= ~MNT_OP_FLAGS; 190 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 191 192 /* 193 * Set the mount level flags. 194 */ 195 if (flags & MNT_RDONLY) 196 mp->mnt_flag |= MNT_RDONLY; 197 else if (mp->mnt_flag & MNT_RDONLY) 198 mp->mnt_iflag |= IMNT_WANTRDWR; 199 mp->mnt_flag &= 200 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 201 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 202 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 203 MNT_LOG); 204 mp->mnt_flag |= flags & 205 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 206 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 207 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 208 MNT_LOG | MNT_IGNORE); 209 210 error = VFS_MOUNT(mp, path, data, data_len); 211 212 if (error && data != NULL) { 213 int error2; 214 215 /* 216 * Update failed; let's try and see if it was an 217 * export request. For compat with 3.0 and earlier. 218 */ 219 error2 = vfs_hooks_reexport(mp, path, data); 220 221 /* 222 * Only update error code if the export request was 223 * understood but some problem occurred while 224 * processing it. 225 */ 226 if (error2 != EJUSTRETURN) 227 error = error2; 228 } 229 230 if (mp->mnt_iflag & IMNT_WANTRDWR) 231 mp->mnt_flag &= ~MNT_RDONLY; 232 if (error) 233 mp->mnt_flag = saved_flags; 234 mp->mnt_flag &= ~MNT_OP_FLAGS; 235 mp->mnt_iflag &= ~IMNT_WANTRDWR; 236 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 237 if (mp->mnt_syncer == NULL) 238 error = vfs_allocate_syncvnode(mp); 239 } else { 240 if (mp->mnt_syncer != NULL) 241 vfs_deallocate_syncvnode(mp); 242 } 243 mutex_exit(&mp->mnt_updating); 244 vfs_unbusy(mp, false, NULL); 245 246 out: 247 return (error); 248 } 249 250 static int 251 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 252 { 253 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 254 int error; 255 256 /* Copy file-system type from userspace. */ 257 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 258 if (error) { 259 /* 260 * Historically, filesystem types were identified by numbers. 261 * If we get an integer for the filesystem type instead of a 262 * string, we check to see if it matches one of the historic 263 * filesystem types. 264 */ 265 u_long fsindex = (u_long)fstype; 266 if (fsindex >= nmountcompatnames || 267 mountcompatnames[fsindex] == NULL) 268 return ENODEV; 269 strlcpy(fstypename, mountcompatnames[fsindex], 270 sizeof(fstypename)); 271 } 272 273 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 274 if (strcmp(fstypename, "ufs") == 0) 275 fstypename[0] = 'f'; 276 277 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 278 return 0; 279 280 /* If we can autoload a vfs module, try again */ 281 mutex_enter(&module_lock); 282 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 283 mutex_exit(&module_lock); 284 285 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 286 return 0; 287 288 return ENODEV; 289 } 290 291 static int 292 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 293 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 294 { 295 struct mount *mp; 296 struct vnode *vp = *vpp; 297 struct vattr va; 298 int error; 299 300 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 301 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 302 if (error) { 303 vfs_delref(vfsops); 304 return error; 305 } 306 307 /* Can't make a non-dir a mount-point (from here anyway). */ 308 if (vp->v_type != VDIR) { 309 vfs_delref(vfsops); 310 return ENOTDIR; 311 } 312 313 /* 314 * If the user is not root, ensure that they own the directory 315 * onto which we are attempting to mount. 316 */ 317 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 318 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 319 (error = kauth_authorize_generic(l->l_cred, 320 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 321 vfs_delref(vfsops); 322 return error; 323 } 324 325 if (flags & MNT_EXPORTED) { 326 vfs_delref(vfsops); 327 return EINVAL; 328 } 329 330 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) { 331 vfs_delref(vfsops); 332 return error; 333 } 334 335 /* 336 * Check if a file-system is not already mounted on this vnode. 337 */ 338 if (vp->v_mountedhere != NULL) { 339 vfs_delref(vfsops); 340 return EBUSY; 341 } 342 343 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 344 vfs_delref(vfsops); 345 return ENOMEM; 346 } 347 348 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 349 350 /* 351 * The underlying file system may refuse the mount for 352 * various reasons. Allow the user to force it to happen. 353 * 354 * Set the mount level flags. 355 */ 356 mp->mnt_flag = flags & 357 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 359 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 360 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 361 362 mutex_enter(&mp->mnt_updating); 363 error = VFS_MOUNT(mp, path, data, data_len); 364 mp->mnt_flag &= ~MNT_OP_FLAGS; 365 366 /* 367 * Put the new filesystem on the mount list after root. 368 */ 369 cache_purge(vp); 370 if (error != 0) { 371 vp->v_mountedhere = NULL; 372 mutex_exit(&mp->mnt_updating); 373 vfs_unbusy(mp, false, NULL); 374 vfs_destroy(mp); 375 return error; 376 } 377 378 mp->mnt_iflag &= ~IMNT_WANTRDWR; 379 mutex_enter(&mountlist_lock); 380 vp->v_mountedhere = mp; 381 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 382 mutex_exit(&mountlist_lock); 383 vn_restorerecurse(vp, recurse); 384 VOP_UNLOCK(vp, 0); 385 checkdirs(vp); 386 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 387 error = vfs_allocate_syncvnode(mp); 388 /* Hold an additional reference to the mount across VFS_START(). */ 389 mutex_exit(&mp->mnt_updating); 390 vfs_unbusy(mp, true, NULL); 391 (void) VFS_STATVFS(mp, &mp->mnt_stat); 392 error = VFS_START(mp, 0); 393 if (error) 394 vrele(vp); 395 /* Drop reference held for VFS_START(). */ 396 vfs_destroy(mp); 397 *vpp = NULL; 398 return error; 399 } 400 401 static int 402 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 403 void *data, size_t *data_len) 404 { 405 struct mount *mp; 406 int error; 407 408 /* If MNT_GETARGS is specified, it should be the only flag. */ 409 if (flags & ~MNT_GETARGS) 410 return EINVAL; 411 412 mp = vp->v_mount; 413 414 /* XXX: probably some notion of "can see" here if we want isolation. */ 415 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 416 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 417 if (error) 418 return error; 419 420 if ((vp->v_vflag & VV_ROOT) == 0) 421 return EINVAL; 422 423 if (vfs_busy(mp, NULL)) 424 return EPERM; 425 426 mutex_enter(&mp->mnt_updating); 427 mp->mnt_flag &= ~MNT_OP_FLAGS; 428 mp->mnt_flag |= MNT_GETARGS; 429 error = VFS_MOUNT(mp, path, data, data_len); 430 mp->mnt_flag &= ~MNT_OP_FLAGS; 431 mutex_exit(&mp->mnt_updating); 432 433 vfs_unbusy(mp, false, NULL); 434 return (error); 435 } 436 437 int 438 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 439 { 440 /* { 441 syscallarg(const char *) type; 442 syscallarg(const char *) path; 443 syscallarg(int) flags; 444 syscallarg(void *) data; 445 syscallarg(size_t) data_len; 446 } */ 447 448 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 449 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 450 SCARG(uap, data_len), retval); 451 } 452 453 int 454 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 455 const char *path, int flags, void *data, enum uio_seg data_seg, 456 size_t data_len, register_t *retval) 457 { 458 struct vnode *vp; 459 void *data_buf = data; 460 u_int recurse; 461 bool vfsopsrele = false; 462 int error; 463 464 /* XXX: The calling convention of this routine is totally bizarre */ 465 if (vfsops) 466 vfsopsrele = true; 467 468 /* 469 * Get vnode to be covered 470 */ 471 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 472 if (error != 0) { 473 /* XXXgcc */ 474 vp = NULL; 475 recurse = 0; 476 goto done; 477 } 478 479 /* 480 * A lookup in VFS_MOUNT might result in an attempt to 481 * lock this vnode again, so make the lock recursive. 482 */ 483 if (vfsops == NULL) { 484 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 485 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 486 recurse = vn_setrecurse(vp); 487 vfsops = vp->v_mount->mnt_op; 488 } else { 489 /* 'type' is userspace */ 490 error = mount_get_vfsops(type, &vfsops); 491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 492 recurse = vn_setrecurse(vp); 493 if (error != 0) 494 goto done; 495 vfsopsrele = true; 496 } 497 } else { 498 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 499 recurse = vn_setrecurse(vp); 500 } 501 502 if (data != NULL && data_seg == UIO_USERSPACE) { 503 if (data_len == 0) { 504 /* No length supplied, use default for filesystem */ 505 data_len = vfsops->vfs_min_mount_data; 506 if (data_len > VFS_MAX_MOUNT_DATA) { 507 error = EINVAL; 508 goto done; 509 } 510 /* 511 * Hopefully a longer buffer won't make copyin() fail. 512 * For compatibility with 3.0 and earlier. 513 */ 514 if (flags & MNT_UPDATE 515 && data_len < sizeof (struct mnt_export_args30)) 516 data_len = sizeof (struct mnt_export_args30); 517 } 518 data_buf = kmem_alloc(data_len, KM_SLEEP); 519 520 /* NFS needs the buffer even for mnt_getargs .... */ 521 error = copyin(data, data_buf, data_len); 522 if (error != 0) 523 goto done; 524 } 525 526 if (flags & MNT_GETARGS) { 527 if (data_len == 0) { 528 error = EINVAL; 529 goto done; 530 } 531 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 532 if (error != 0) 533 goto done; 534 if (data_seg == UIO_USERSPACE) 535 error = copyout(data_buf, data, data_len); 536 *retval = data_len; 537 } else if (flags & MNT_UPDATE) { 538 error = mount_update(l, vp, path, flags, data_buf, &data_len); 539 } else { 540 /* Locking is handled internally in mount_domount(). */ 541 KASSERT(vfsopsrele == true); 542 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 543 &data_len, recurse); 544 vfsopsrele = false; 545 } 546 547 done: 548 if (vfsopsrele) 549 vfs_delref(vfsops); 550 if (vp != NULL) { 551 vn_restorerecurse(vp, recurse); 552 vput(vp); 553 } 554 if (data_buf != data) 555 kmem_free(data_buf, data_len); 556 return (error); 557 } 558 559 /* 560 * Scan all active processes to see if any of them have a current 561 * or root directory onto which the new filesystem has just been 562 * mounted. If so, replace them with the new mount point. 563 */ 564 void 565 checkdirs(struct vnode *olddp) 566 { 567 struct cwdinfo *cwdi; 568 struct vnode *newdp, *rele1, *rele2; 569 struct proc *p; 570 bool retry; 571 572 if (olddp->v_usecount == 1) 573 return; 574 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 575 panic("mount: lost mount"); 576 577 do { 578 retry = false; 579 mutex_enter(proc_lock); 580 PROCLIST_FOREACH(p, &allproc) { 581 if ((p->p_flag & PK_MARKER) != 0) 582 continue; 583 if ((cwdi = p->p_cwdi) == NULL) 584 continue; 585 /* 586 * Can't change to the old directory any more, 587 * so even if we see a stale value it's not a 588 * problem. 589 */ 590 if (cwdi->cwdi_cdir != olddp && 591 cwdi->cwdi_rdir != olddp) 592 continue; 593 retry = true; 594 rele1 = NULL; 595 rele2 = NULL; 596 atomic_inc_uint(&cwdi->cwdi_refcnt); 597 mutex_exit(proc_lock); 598 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 599 if (cwdi->cwdi_cdir == olddp) { 600 rele1 = cwdi->cwdi_cdir; 601 vref(newdp); 602 cwdi->cwdi_cdir = newdp; 603 } 604 if (cwdi->cwdi_rdir == olddp) { 605 rele2 = cwdi->cwdi_rdir; 606 vref(newdp); 607 cwdi->cwdi_rdir = newdp; 608 } 609 rw_exit(&cwdi->cwdi_lock); 610 cwdfree(cwdi); 611 if (rele1 != NULL) 612 vrele(rele1); 613 if (rele2 != NULL) 614 vrele(rele2); 615 mutex_enter(proc_lock); 616 break; 617 } 618 mutex_exit(proc_lock); 619 } while (retry); 620 621 if (rootvnode == olddp) { 622 vrele(rootvnode); 623 vref(newdp); 624 rootvnode = newdp; 625 } 626 vput(newdp); 627 } 628 629 /* 630 * Unmount a file system. 631 * 632 * Note: unmount takes a path to the vnode mounted on as argument, 633 * not special file (as before). 634 */ 635 /* ARGSUSED */ 636 int 637 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 638 { 639 /* { 640 syscallarg(const char *) path; 641 syscallarg(int) flags; 642 } */ 643 struct vnode *vp; 644 struct mount *mp; 645 int error; 646 struct nameidata nd; 647 648 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 649 SCARG(uap, path)); 650 if ((error = namei(&nd)) != 0) 651 return (error); 652 vp = nd.ni_vp; 653 mp = vp->v_mount; 654 atomic_inc_uint(&mp->mnt_refcnt); 655 VOP_UNLOCK(vp, 0); 656 657 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 658 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 659 if (error) { 660 vrele(vp); 661 vfs_destroy(mp); 662 return (error); 663 } 664 665 /* 666 * Don't allow unmounting the root file system. 667 */ 668 if (mp->mnt_flag & MNT_ROOTFS) { 669 vrele(vp); 670 vfs_destroy(mp); 671 return (EINVAL); 672 } 673 674 /* 675 * Must be the root of the filesystem 676 */ 677 if ((vp->v_vflag & VV_ROOT) == 0) { 678 vrele(vp); 679 vfs_destroy(mp); 680 return (EINVAL); 681 } 682 683 vrele(vp); 684 error = dounmount(mp, SCARG(uap, flags), l); 685 vfs_destroy(mp); 686 return error; 687 } 688 689 /* 690 * Do the actual file system unmount. File system is assumed to have 691 * been locked by the caller. 692 * 693 * => Caller hold reference to the mount, explicitly for dounmount(). 694 */ 695 int 696 dounmount(struct mount *mp, int flags, struct lwp *l) 697 { 698 struct vnode *coveredvp; 699 int error; 700 int async; 701 int used_syncer; 702 703 #if NVERIEXEC > 0 704 error = veriexec_unmountchk(mp); 705 if (error) 706 return (error); 707 #endif /* NVERIEXEC > 0 */ 708 709 /* 710 * XXX Freeze syncer. Must do this before locking the 711 * mount point. See dounmount() for details. 712 */ 713 mutex_enter(&syncer_mutex); 714 rw_enter(&mp->mnt_unmounting, RW_WRITER); 715 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 716 rw_exit(&mp->mnt_unmounting); 717 mutex_exit(&syncer_mutex); 718 return ENOENT; 719 } 720 721 used_syncer = (mp->mnt_syncer != NULL); 722 723 /* 724 * XXX Syncer must be frozen when we get here. This should really 725 * be done on a per-mountpoint basis, but the syncer doesn't work 726 * like that. 727 * 728 * The caller of dounmount() must acquire syncer_mutex because 729 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 730 * order, and we must preserve that order to avoid deadlock. 731 * 732 * So, if the file system did not use the syncer, now is 733 * the time to release the syncer_mutex. 734 */ 735 if (used_syncer == 0) 736 mutex_exit(&syncer_mutex); 737 738 mp->mnt_iflag |= IMNT_UNMOUNT; 739 async = mp->mnt_flag & MNT_ASYNC; 740 mp->mnt_flag &= ~MNT_ASYNC; 741 cache_purgevfs(mp); /* remove cache entries for this file sys */ 742 if (mp->mnt_syncer != NULL) 743 vfs_deallocate_syncvnode(mp); 744 error = 0; 745 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 746 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 747 } 748 vfs_scrubvnlist(mp); 749 if (error == 0 || (flags & MNT_FORCE)) 750 error = VFS_UNMOUNT(mp, flags); 751 if (error) { 752 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 753 (void) vfs_allocate_syncvnode(mp); 754 mp->mnt_iflag &= ~IMNT_UNMOUNT; 755 mp->mnt_flag |= async; 756 rw_exit(&mp->mnt_unmounting); 757 if (used_syncer) 758 mutex_exit(&syncer_mutex); 759 return (error); 760 } 761 vfs_scrubvnlist(mp); 762 mutex_enter(&mountlist_lock); 763 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 764 coveredvp->v_mountedhere = NULL; 765 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 766 mp->mnt_iflag |= IMNT_GONE; 767 mutex_exit(&mountlist_lock); 768 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 769 panic("unmount: dangling vnode"); 770 if (used_syncer) 771 mutex_exit(&syncer_mutex); 772 vfs_hooks_unmount(mp); 773 rw_exit(&mp->mnt_unmounting); 774 vfs_destroy(mp); /* reference from mount() */ 775 if (coveredvp != NULLVP) 776 vrele(coveredvp); 777 return (0); 778 } 779 780 /* 781 * Sync each mounted filesystem. 782 */ 783 #ifdef DEBUG 784 int syncprt = 0; 785 struct ctldebug debug0 = { "syncprt", &syncprt }; 786 #endif 787 788 /* ARGSUSED */ 789 int 790 sys_sync(struct lwp *l, const void *v, register_t *retval) 791 { 792 struct mount *mp, *nmp; 793 int asyncflag; 794 795 if (l == NULL) 796 l = &lwp0; 797 798 mutex_enter(&mountlist_lock); 799 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 800 mp = nmp) { 801 if (vfs_busy(mp, &nmp)) { 802 continue; 803 } 804 mutex_enter(&mp->mnt_updating); 805 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 806 asyncflag = mp->mnt_flag & MNT_ASYNC; 807 mp->mnt_flag &= ~MNT_ASYNC; 808 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 809 if (asyncflag) 810 mp->mnt_flag |= MNT_ASYNC; 811 } 812 mutex_exit(&mp->mnt_updating); 813 vfs_unbusy(mp, false, &nmp); 814 } 815 mutex_exit(&mountlist_lock); 816 #ifdef DEBUG 817 if (syncprt) 818 vfs_bufstats(); 819 #endif /* DEBUG */ 820 return (0); 821 } 822 823 /* 824 * Change filesystem quotas. 825 */ 826 /* ARGSUSED */ 827 int 828 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 829 { 830 /* { 831 syscallarg(const char *) path; 832 syscallarg(int) cmd; 833 syscallarg(int) uid; 834 syscallarg(void *) arg; 835 } */ 836 struct mount *mp; 837 int error; 838 struct vnode *vp; 839 840 error = namei_simple_user(SCARG(uap, path), 841 NSM_FOLLOW_TRYEMULROOT, &vp); 842 if (error != 0) 843 return (error); 844 mp = vp->v_mount; 845 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 846 SCARG(uap, arg)); 847 vrele(vp); 848 return (error); 849 } 850 851 int 852 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 853 int root) 854 { 855 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 856 int error = 0; 857 858 /* 859 * If MNT_NOWAIT or MNT_LAZY is specified, do not 860 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 861 * overrides MNT_NOWAIT. 862 */ 863 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 864 (flags != MNT_WAIT && flags != 0)) { 865 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 866 goto done; 867 } 868 869 /* Get the filesystem stats now */ 870 memset(sp, 0, sizeof(*sp)); 871 if ((error = VFS_STATVFS(mp, sp)) != 0) { 872 return error; 873 } 874 875 if (cwdi->cwdi_rdir == NULL) 876 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 877 done: 878 if (cwdi->cwdi_rdir != NULL) { 879 size_t len; 880 char *bp; 881 char c; 882 char *path = PNBUF_GET(); 883 884 bp = path + MAXPATHLEN; 885 *--bp = '\0'; 886 rw_enter(&cwdi->cwdi_lock, RW_READER); 887 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 888 MAXPATHLEN / 2, 0, l); 889 rw_exit(&cwdi->cwdi_lock); 890 if (error) { 891 PNBUF_PUT(path); 892 return error; 893 } 894 len = strlen(bp); 895 if (len != 1) { 896 /* 897 * for mount points that are below our root, we can see 898 * them, so we fix up the pathname and return them. The 899 * rest we cannot see, so we don't allow viewing the 900 * data. 901 */ 902 if (strncmp(bp, sp->f_mntonname, len) == 0 && 903 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 904 (void)strlcpy(sp->f_mntonname, 905 c == '\0' ? "/" : &sp->f_mntonname[len], 906 sizeof(sp->f_mntonname)); 907 } else { 908 if (root) 909 (void)strlcpy(sp->f_mntonname, "/", 910 sizeof(sp->f_mntonname)); 911 else 912 error = EPERM; 913 } 914 } 915 PNBUF_PUT(path); 916 } 917 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 918 return error; 919 } 920 921 /* 922 * Get filesystem statistics by path. 923 */ 924 int 925 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 926 { 927 struct mount *mp; 928 int error; 929 struct vnode *vp; 930 931 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 932 if (error != 0) 933 return error; 934 mp = vp->v_mount; 935 error = dostatvfs(mp, sb, l, flags, 1); 936 vrele(vp); 937 return error; 938 } 939 940 /* ARGSUSED */ 941 int 942 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 943 { 944 /* { 945 syscallarg(const char *) path; 946 syscallarg(struct statvfs *) buf; 947 syscallarg(int) flags; 948 } */ 949 struct statvfs *sb; 950 int error; 951 952 sb = STATVFSBUF_GET(); 953 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 954 if (error == 0) 955 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 956 STATVFSBUF_PUT(sb); 957 return error; 958 } 959 960 /* 961 * Get filesystem statistics by fd. 962 */ 963 int 964 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 965 { 966 file_t *fp; 967 struct mount *mp; 968 int error; 969 970 /* fd_getvnode() will use the descriptor for us */ 971 if ((error = fd_getvnode(fd, &fp)) != 0) 972 return (error); 973 mp = ((struct vnode *)fp->f_data)->v_mount; 974 error = dostatvfs(mp, sb, curlwp, flags, 1); 975 fd_putfile(fd); 976 return error; 977 } 978 979 /* ARGSUSED */ 980 int 981 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 982 { 983 /* { 984 syscallarg(int) fd; 985 syscallarg(struct statvfs *) buf; 986 syscallarg(int) flags; 987 } */ 988 struct statvfs *sb; 989 int error; 990 991 sb = STATVFSBUF_GET(); 992 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 993 if (error == 0) 994 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 995 STATVFSBUF_PUT(sb); 996 return error; 997 } 998 999 1000 /* 1001 * Get statistics on all filesystems. 1002 */ 1003 int 1004 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1005 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1006 register_t *retval) 1007 { 1008 int root = 0; 1009 struct proc *p = l->l_proc; 1010 struct mount *mp, *nmp; 1011 struct statvfs *sb; 1012 size_t count, maxcount; 1013 int error = 0; 1014 1015 sb = STATVFSBUF_GET(); 1016 maxcount = bufsize / entry_sz; 1017 mutex_enter(&mountlist_lock); 1018 count = 0; 1019 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1020 mp = nmp) { 1021 if (vfs_busy(mp, &nmp)) { 1022 continue; 1023 } 1024 if (sfsp && count < maxcount) { 1025 error = dostatvfs(mp, sb, l, flags, 0); 1026 if (error) { 1027 vfs_unbusy(mp, false, &nmp); 1028 error = 0; 1029 continue; 1030 } 1031 error = copyfn(sb, sfsp, entry_sz); 1032 if (error) { 1033 vfs_unbusy(mp, false, NULL); 1034 goto out; 1035 } 1036 sfsp = (char *)sfsp + entry_sz; 1037 root |= strcmp(sb->f_mntonname, "/") == 0; 1038 } 1039 count++; 1040 vfs_unbusy(mp, false, &nmp); 1041 } 1042 mutex_exit(&mountlist_lock); 1043 1044 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1045 /* 1046 * fake a root entry 1047 */ 1048 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1049 sb, l, flags, 1); 1050 if (error != 0) 1051 goto out; 1052 if (sfsp) { 1053 error = copyfn(sb, sfsp, entry_sz); 1054 if (error != 0) 1055 goto out; 1056 } 1057 count++; 1058 } 1059 if (sfsp && count > maxcount) 1060 *retval = maxcount; 1061 else 1062 *retval = count; 1063 out: 1064 STATVFSBUF_PUT(sb); 1065 return error; 1066 } 1067 1068 int 1069 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1070 { 1071 /* { 1072 syscallarg(struct statvfs *) buf; 1073 syscallarg(size_t) bufsize; 1074 syscallarg(int) flags; 1075 } */ 1076 1077 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1078 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1079 } 1080 1081 /* 1082 * Change current working directory to a given file descriptor. 1083 */ 1084 /* ARGSUSED */ 1085 int 1086 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1087 { 1088 /* { 1089 syscallarg(int) fd; 1090 } */ 1091 struct proc *p = l->l_proc; 1092 struct cwdinfo *cwdi; 1093 struct vnode *vp, *tdp; 1094 struct mount *mp; 1095 file_t *fp; 1096 int error, fd; 1097 1098 /* fd_getvnode() will use the descriptor for us */ 1099 fd = SCARG(uap, fd); 1100 if ((error = fd_getvnode(fd, &fp)) != 0) 1101 return (error); 1102 vp = fp->f_data; 1103 1104 vref(vp); 1105 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1106 if (vp->v_type != VDIR) 1107 error = ENOTDIR; 1108 else 1109 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1110 if (error) { 1111 vput(vp); 1112 goto out; 1113 } 1114 while ((mp = vp->v_mountedhere) != NULL) { 1115 error = vfs_busy(mp, NULL); 1116 vput(vp); 1117 if (error != 0) 1118 goto out; 1119 error = VFS_ROOT(mp, &tdp); 1120 vfs_unbusy(mp, false, NULL); 1121 if (error) 1122 goto out; 1123 vp = tdp; 1124 } 1125 VOP_UNLOCK(vp, 0); 1126 1127 /* 1128 * Disallow changing to a directory not under the process's 1129 * current root directory (if there is one). 1130 */ 1131 cwdi = p->p_cwdi; 1132 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1133 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1134 vrele(vp); 1135 error = EPERM; /* operation not permitted */ 1136 } else { 1137 vrele(cwdi->cwdi_cdir); 1138 cwdi->cwdi_cdir = vp; 1139 } 1140 rw_exit(&cwdi->cwdi_lock); 1141 1142 out: 1143 fd_putfile(fd); 1144 return (error); 1145 } 1146 1147 /* 1148 * Change this process's notion of the root directory to a given file 1149 * descriptor. 1150 */ 1151 int 1152 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1153 { 1154 struct proc *p = l->l_proc; 1155 struct vnode *vp; 1156 file_t *fp; 1157 int error, fd = SCARG(uap, fd); 1158 1159 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1160 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1161 return error; 1162 /* fd_getvnode() will use the descriptor for us */ 1163 if ((error = fd_getvnode(fd, &fp)) != 0) 1164 return error; 1165 vp = fp->f_data; 1166 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1167 if (vp->v_type != VDIR) 1168 error = ENOTDIR; 1169 else 1170 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1171 VOP_UNLOCK(vp, 0); 1172 if (error) 1173 goto out; 1174 vref(vp); 1175 1176 change_root(p->p_cwdi, vp, l); 1177 1178 out: 1179 fd_putfile(fd); 1180 return (error); 1181 } 1182 1183 /* 1184 * Change current working directory (``.''). 1185 */ 1186 /* ARGSUSED */ 1187 int 1188 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1189 { 1190 /* { 1191 syscallarg(const char *) path; 1192 } */ 1193 struct proc *p = l->l_proc; 1194 struct cwdinfo *cwdi; 1195 int error; 1196 struct vnode *vp; 1197 1198 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1199 &vp, l)) != 0) 1200 return (error); 1201 cwdi = p->p_cwdi; 1202 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1203 vrele(cwdi->cwdi_cdir); 1204 cwdi->cwdi_cdir = vp; 1205 rw_exit(&cwdi->cwdi_lock); 1206 return (0); 1207 } 1208 1209 /* 1210 * Change notion of root (``/'') directory. 1211 */ 1212 /* ARGSUSED */ 1213 int 1214 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1215 { 1216 /* { 1217 syscallarg(const char *) path; 1218 } */ 1219 struct proc *p = l->l_proc; 1220 int error; 1221 struct vnode *vp; 1222 1223 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1224 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1225 return (error); 1226 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1227 &vp, l)) != 0) 1228 return (error); 1229 1230 change_root(p->p_cwdi, vp, l); 1231 1232 return (0); 1233 } 1234 1235 /* 1236 * Common routine for chroot and fchroot. 1237 * NB: callers need to properly authorize the change root operation. 1238 */ 1239 void 1240 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1241 { 1242 1243 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1244 if (cwdi->cwdi_rdir != NULL) 1245 vrele(cwdi->cwdi_rdir); 1246 cwdi->cwdi_rdir = vp; 1247 1248 /* 1249 * Prevent escaping from chroot by putting the root under 1250 * the working directory. Silently chdir to / if we aren't 1251 * already there. 1252 */ 1253 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1254 /* 1255 * XXX would be more failsafe to change directory to a 1256 * deadfs node here instead 1257 */ 1258 vrele(cwdi->cwdi_cdir); 1259 vref(vp); 1260 cwdi->cwdi_cdir = vp; 1261 } 1262 rw_exit(&cwdi->cwdi_lock); 1263 } 1264 1265 /* 1266 * Common routine for chroot and chdir. 1267 */ 1268 int 1269 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1270 { 1271 struct nameidata nd; 1272 int error; 1273 1274 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, where, 1275 path); 1276 if ((error = namei(&nd)) != 0) 1277 return (error); 1278 *vpp = nd.ni_vp; 1279 if ((*vpp)->v_type != VDIR) 1280 error = ENOTDIR; 1281 else 1282 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1283 1284 if (error) 1285 vput(*vpp); 1286 else 1287 VOP_UNLOCK(*vpp, 0); 1288 return (error); 1289 } 1290 1291 /* 1292 * Check permissions, allocate an open file structure, 1293 * and call the device open routine if any. 1294 */ 1295 int 1296 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1297 { 1298 /* { 1299 syscallarg(const char *) path; 1300 syscallarg(int) flags; 1301 syscallarg(int) mode; 1302 } */ 1303 struct proc *p = l->l_proc; 1304 struct cwdinfo *cwdi = p->p_cwdi; 1305 file_t *fp; 1306 struct vnode *vp; 1307 int flags, cmode; 1308 int type, indx, error; 1309 struct flock lf; 1310 struct nameidata nd; 1311 1312 flags = FFLAGS(SCARG(uap, flags)); 1313 if ((flags & (FREAD | FWRITE)) == 0) 1314 return (EINVAL); 1315 if ((error = fd_allocfile(&fp, &indx)) != 0) 1316 return (error); 1317 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1318 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1319 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1320 SCARG(uap, path)); 1321 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1322 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1323 fd_abort(p, fp, indx); 1324 if ((error == EDUPFD || error == EMOVEFD) && 1325 l->l_dupfd >= 0 && /* XXX from fdopen */ 1326 (error = 1327 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1328 *retval = indx; 1329 return (0); 1330 } 1331 if (error == ERESTART) 1332 error = EINTR; 1333 return (error); 1334 } 1335 1336 l->l_dupfd = 0; 1337 vp = nd.ni_vp; 1338 fp->f_flag = flags & FMASK; 1339 fp->f_type = DTYPE_VNODE; 1340 fp->f_ops = &vnops; 1341 fp->f_data = vp; 1342 if (flags & (O_EXLOCK | O_SHLOCK)) { 1343 lf.l_whence = SEEK_SET; 1344 lf.l_start = 0; 1345 lf.l_len = 0; 1346 if (flags & O_EXLOCK) 1347 lf.l_type = F_WRLCK; 1348 else 1349 lf.l_type = F_RDLCK; 1350 type = F_FLOCK; 1351 if ((flags & FNONBLOCK) == 0) 1352 type |= F_WAIT; 1353 VOP_UNLOCK(vp, 0); 1354 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1355 if (error) { 1356 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1357 fd_abort(p, fp, indx); 1358 return (error); 1359 } 1360 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1361 atomic_or_uint(&fp->f_flag, FHASLOCK); 1362 } 1363 VOP_UNLOCK(vp, 0); 1364 *retval = indx; 1365 fd_affix(p, fp, indx); 1366 return (0); 1367 } 1368 1369 static void 1370 vfs__fhfree(fhandle_t *fhp) 1371 { 1372 size_t fhsize; 1373 1374 if (fhp == NULL) { 1375 return; 1376 } 1377 fhsize = FHANDLE_SIZE(fhp); 1378 kmem_free(fhp, fhsize); 1379 } 1380 1381 /* 1382 * vfs_composefh: compose a filehandle. 1383 */ 1384 1385 int 1386 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1387 { 1388 struct mount *mp; 1389 struct fid *fidp; 1390 int error; 1391 size_t needfhsize; 1392 size_t fidsize; 1393 1394 mp = vp->v_mount; 1395 fidp = NULL; 1396 if (*fh_size < FHANDLE_SIZE_MIN) { 1397 fidsize = 0; 1398 } else { 1399 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1400 if (fhp != NULL) { 1401 memset(fhp, 0, *fh_size); 1402 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1403 fidp = &fhp->fh_fid; 1404 } 1405 } 1406 error = VFS_VPTOFH(vp, fidp, &fidsize); 1407 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1408 if (error == 0 && *fh_size < needfhsize) { 1409 error = E2BIG; 1410 } 1411 *fh_size = needfhsize; 1412 return error; 1413 } 1414 1415 int 1416 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1417 { 1418 struct mount *mp; 1419 fhandle_t *fhp; 1420 size_t fhsize; 1421 size_t fidsize; 1422 int error; 1423 1424 *fhpp = NULL; 1425 mp = vp->v_mount; 1426 fidsize = 0; 1427 error = VFS_VPTOFH(vp, NULL, &fidsize); 1428 KASSERT(error != 0); 1429 if (error != E2BIG) { 1430 goto out; 1431 } 1432 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1433 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1434 if (fhp == NULL) { 1435 error = ENOMEM; 1436 goto out; 1437 } 1438 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1439 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1440 if (error == 0) { 1441 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1442 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1443 *fhpp = fhp; 1444 } else { 1445 kmem_free(fhp, fhsize); 1446 } 1447 out: 1448 return error; 1449 } 1450 1451 void 1452 vfs_composefh_free(fhandle_t *fhp) 1453 { 1454 1455 vfs__fhfree(fhp); 1456 } 1457 1458 /* 1459 * vfs_fhtovp: lookup a vnode by a filehandle. 1460 */ 1461 1462 int 1463 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1464 { 1465 struct mount *mp; 1466 int error; 1467 1468 *vpp = NULL; 1469 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1470 if (mp == NULL) { 1471 error = ESTALE; 1472 goto out; 1473 } 1474 if (mp->mnt_op->vfs_fhtovp == NULL) { 1475 error = EOPNOTSUPP; 1476 goto out; 1477 } 1478 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1479 out: 1480 return error; 1481 } 1482 1483 /* 1484 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1485 * the needed size. 1486 */ 1487 1488 int 1489 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1490 { 1491 fhandle_t *fhp; 1492 int error; 1493 1494 *fhpp = NULL; 1495 if (fhsize > FHANDLE_SIZE_MAX) { 1496 return EINVAL; 1497 } 1498 if (fhsize < FHANDLE_SIZE_MIN) { 1499 return EINVAL; 1500 } 1501 again: 1502 fhp = kmem_alloc(fhsize, KM_SLEEP); 1503 if (fhp == NULL) { 1504 return ENOMEM; 1505 } 1506 error = copyin(ufhp, fhp, fhsize); 1507 if (error == 0) { 1508 /* XXX this check shouldn't be here */ 1509 if (FHANDLE_SIZE(fhp) == fhsize) { 1510 *fhpp = fhp; 1511 return 0; 1512 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1513 /* 1514 * a kludge for nfsv2 padded handles. 1515 */ 1516 size_t sz; 1517 1518 sz = FHANDLE_SIZE(fhp); 1519 kmem_free(fhp, fhsize); 1520 fhsize = sz; 1521 goto again; 1522 } else { 1523 /* 1524 * userland told us wrong size. 1525 */ 1526 error = EINVAL; 1527 } 1528 } 1529 kmem_free(fhp, fhsize); 1530 return error; 1531 } 1532 1533 void 1534 vfs_copyinfh_free(fhandle_t *fhp) 1535 { 1536 1537 vfs__fhfree(fhp); 1538 } 1539 1540 /* 1541 * Get file handle system call 1542 */ 1543 int 1544 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1545 { 1546 /* { 1547 syscallarg(char *) fname; 1548 syscallarg(fhandle_t *) fhp; 1549 syscallarg(size_t *) fh_size; 1550 } */ 1551 struct vnode *vp; 1552 fhandle_t *fh; 1553 int error; 1554 struct nameidata nd; 1555 size_t sz; 1556 size_t usz; 1557 1558 /* 1559 * Must be super user 1560 */ 1561 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1562 0, NULL, NULL, NULL); 1563 if (error) 1564 return (error); 1565 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1566 SCARG(uap, fname)); 1567 error = namei(&nd); 1568 if (error) 1569 return (error); 1570 vp = nd.ni_vp; 1571 error = vfs_composefh_alloc(vp, &fh); 1572 vput(vp); 1573 if (error != 0) { 1574 goto out; 1575 } 1576 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1577 if (error != 0) { 1578 goto out; 1579 } 1580 sz = FHANDLE_SIZE(fh); 1581 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1582 if (error != 0) { 1583 goto out; 1584 } 1585 if (usz >= sz) { 1586 error = copyout(fh, SCARG(uap, fhp), sz); 1587 } else { 1588 error = E2BIG; 1589 } 1590 out: 1591 vfs_composefh_free(fh); 1592 return (error); 1593 } 1594 1595 /* 1596 * Open a file given a file handle. 1597 * 1598 * Check permissions, allocate an open file structure, 1599 * and call the device open routine if any. 1600 */ 1601 1602 int 1603 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1604 register_t *retval) 1605 { 1606 file_t *fp; 1607 struct vnode *vp = NULL; 1608 kauth_cred_t cred = l->l_cred; 1609 file_t *nfp; 1610 int type, indx, error=0; 1611 struct flock lf; 1612 struct vattr va; 1613 fhandle_t *fh; 1614 int flags; 1615 proc_t *p; 1616 1617 p = curproc; 1618 1619 /* 1620 * Must be super user 1621 */ 1622 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1623 0, NULL, NULL, NULL))) 1624 return (error); 1625 1626 flags = FFLAGS(oflags); 1627 if ((flags & (FREAD | FWRITE)) == 0) 1628 return (EINVAL); 1629 if ((flags & O_CREAT)) 1630 return (EINVAL); 1631 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1632 return (error); 1633 fp = nfp; 1634 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1635 if (error != 0) { 1636 goto bad; 1637 } 1638 error = vfs_fhtovp(fh, &vp); 1639 if (error != 0) { 1640 goto bad; 1641 } 1642 1643 /* Now do an effective vn_open */ 1644 1645 if (vp->v_type == VSOCK) { 1646 error = EOPNOTSUPP; 1647 goto bad; 1648 } 1649 error = vn_openchk(vp, cred, flags); 1650 if (error != 0) 1651 goto bad; 1652 if (flags & O_TRUNC) { 1653 VOP_UNLOCK(vp, 0); /* XXX */ 1654 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1655 vattr_null(&va); 1656 va.va_size = 0; 1657 error = VOP_SETATTR(vp, &va, cred); 1658 if (error) 1659 goto bad; 1660 } 1661 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1662 goto bad; 1663 if (flags & FWRITE) { 1664 mutex_enter(&vp->v_interlock); 1665 vp->v_writecount++; 1666 mutex_exit(&vp->v_interlock); 1667 } 1668 1669 /* done with modified vn_open, now finish what sys_open does. */ 1670 1671 fp->f_flag = flags & FMASK; 1672 fp->f_type = DTYPE_VNODE; 1673 fp->f_ops = &vnops; 1674 fp->f_data = vp; 1675 if (flags & (O_EXLOCK | O_SHLOCK)) { 1676 lf.l_whence = SEEK_SET; 1677 lf.l_start = 0; 1678 lf.l_len = 0; 1679 if (flags & O_EXLOCK) 1680 lf.l_type = F_WRLCK; 1681 else 1682 lf.l_type = F_RDLCK; 1683 type = F_FLOCK; 1684 if ((flags & FNONBLOCK) == 0) 1685 type |= F_WAIT; 1686 VOP_UNLOCK(vp, 0); 1687 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1688 if (error) { 1689 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1690 fd_abort(p, fp, indx); 1691 return (error); 1692 } 1693 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1694 atomic_or_uint(&fp->f_flag, FHASLOCK); 1695 } 1696 VOP_UNLOCK(vp, 0); 1697 *retval = indx; 1698 fd_affix(p, fp, indx); 1699 vfs_copyinfh_free(fh); 1700 return (0); 1701 1702 bad: 1703 fd_abort(p, fp, indx); 1704 if (vp != NULL) 1705 vput(vp); 1706 vfs_copyinfh_free(fh); 1707 return (error); 1708 } 1709 1710 int 1711 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1712 { 1713 /* { 1714 syscallarg(const void *) fhp; 1715 syscallarg(size_t) fh_size; 1716 syscallarg(int) flags; 1717 } */ 1718 1719 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1720 SCARG(uap, flags), retval); 1721 } 1722 1723 int 1724 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1725 { 1726 int error; 1727 fhandle_t *fh; 1728 struct vnode *vp; 1729 1730 /* 1731 * Must be super user 1732 */ 1733 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1734 0, NULL, NULL, NULL))) 1735 return (error); 1736 1737 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1738 if (error != 0) 1739 return error; 1740 1741 error = vfs_fhtovp(fh, &vp); 1742 vfs_copyinfh_free(fh); 1743 if (error != 0) 1744 return error; 1745 1746 error = vn_stat(vp, sb); 1747 vput(vp); 1748 return error; 1749 } 1750 1751 1752 /* ARGSUSED */ 1753 int 1754 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1755 { 1756 /* { 1757 syscallarg(const void *) fhp; 1758 syscallarg(size_t) fh_size; 1759 syscallarg(struct stat *) sb; 1760 } */ 1761 struct stat sb; 1762 int error; 1763 1764 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1765 if (error) 1766 return error; 1767 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1768 } 1769 1770 int 1771 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1772 int flags) 1773 { 1774 fhandle_t *fh; 1775 struct mount *mp; 1776 struct vnode *vp; 1777 int error; 1778 1779 /* 1780 * Must be super user 1781 */ 1782 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1783 0, NULL, NULL, NULL))) 1784 return error; 1785 1786 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1787 if (error != 0) 1788 return error; 1789 1790 error = vfs_fhtovp(fh, &vp); 1791 vfs_copyinfh_free(fh); 1792 if (error != 0) 1793 return error; 1794 1795 mp = vp->v_mount; 1796 error = dostatvfs(mp, sb, l, flags, 1); 1797 vput(vp); 1798 return error; 1799 } 1800 1801 /* ARGSUSED */ 1802 int 1803 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1804 { 1805 /* { 1806 syscallarg(const void *) fhp; 1807 syscallarg(size_t) fh_size; 1808 syscallarg(struct statvfs *) buf; 1809 syscallarg(int) flags; 1810 } */ 1811 struct statvfs *sb = STATVFSBUF_GET(); 1812 int error; 1813 1814 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1815 SCARG(uap, flags)); 1816 if (error == 0) 1817 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1818 STATVFSBUF_PUT(sb); 1819 return error; 1820 } 1821 1822 /* 1823 * Create a special file. 1824 */ 1825 /* ARGSUSED */ 1826 int 1827 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1828 register_t *retval) 1829 { 1830 /* { 1831 syscallarg(const char *) path; 1832 syscallarg(mode_t) mode; 1833 syscallarg(dev_t) dev; 1834 } */ 1835 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1836 SCARG(uap, dev), retval, UIO_USERSPACE); 1837 } 1838 1839 int 1840 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1841 register_t *retval, enum uio_seg seg) 1842 { 1843 struct proc *p = l->l_proc; 1844 struct vnode *vp; 1845 struct vattr vattr; 1846 int error, optype; 1847 struct nameidata nd; 1848 char *path; 1849 const char *cpath; 1850 1851 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1852 0, NULL, NULL, NULL)) != 0) 1853 return (error); 1854 1855 optype = VOP_MKNOD_DESCOFFSET; 1856 1857 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1858 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1859 1860 if ((error = namei(&nd)) != 0) 1861 goto out; 1862 vp = nd.ni_vp; 1863 if (vp != NULL) 1864 error = EEXIST; 1865 else { 1866 vattr_null(&vattr); 1867 /* We will read cwdi->cwdi_cmask unlocked. */ 1868 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1869 vattr.va_rdev = dev; 1870 1871 switch (mode & S_IFMT) { 1872 case S_IFMT: /* used by badsect to flag bad sectors */ 1873 vattr.va_type = VBAD; 1874 break; 1875 case S_IFCHR: 1876 vattr.va_type = VCHR; 1877 break; 1878 case S_IFBLK: 1879 vattr.va_type = VBLK; 1880 break; 1881 case S_IFWHT: 1882 optype = VOP_WHITEOUT_DESCOFFSET; 1883 break; 1884 case S_IFREG: 1885 #if NVERIEXEC > 0 1886 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1887 O_CREAT); 1888 #endif /* NVERIEXEC > 0 */ 1889 vattr.va_type = VREG; 1890 vattr.va_rdev = VNOVAL; 1891 optype = VOP_CREATE_DESCOFFSET; 1892 break; 1893 default: 1894 error = EINVAL; 1895 break; 1896 } 1897 } 1898 if (!error) { 1899 switch (optype) { 1900 case VOP_WHITEOUT_DESCOFFSET: 1901 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1902 if (error) 1903 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1904 vput(nd.ni_dvp); 1905 break; 1906 1907 case VOP_MKNOD_DESCOFFSET: 1908 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1909 &nd.ni_cnd, &vattr); 1910 if (error == 0) 1911 vput(nd.ni_vp); 1912 break; 1913 1914 case VOP_CREATE_DESCOFFSET: 1915 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1916 &nd.ni_cnd, &vattr); 1917 if (error == 0) 1918 vput(nd.ni_vp); 1919 break; 1920 } 1921 } else { 1922 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1923 if (nd.ni_dvp == vp) 1924 vrele(nd.ni_dvp); 1925 else 1926 vput(nd.ni_dvp); 1927 if (vp) 1928 vrele(vp); 1929 } 1930 out: 1931 VERIEXEC_PATH_PUT(path); 1932 return (error); 1933 } 1934 1935 /* 1936 * Create a named pipe. 1937 */ 1938 /* ARGSUSED */ 1939 int 1940 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1941 { 1942 /* { 1943 syscallarg(const char *) path; 1944 syscallarg(int) mode; 1945 } */ 1946 struct proc *p = l->l_proc; 1947 struct vattr vattr; 1948 int error; 1949 struct nameidata nd; 1950 1951 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1952 SCARG(uap, path)); 1953 if ((error = namei(&nd)) != 0) 1954 return (error); 1955 if (nd.ni_vp != NULL) { 1956 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1957 if (nd.ni_dvp == nd.ni_vp) 1958 vrele(nd.ni_dvp); 1959 else 1960 vput(nd.ni_dvp); 1961 vrele(nd.ni_vp); 1962 return (EEXIST); 1963 } 1964 vattr_null(&vattr); 1965 vattr.va_type = VFIFO; 1966 /* We will read cwdi->cwdi_cmask unlocked. */ 1967 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1968 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1969 if (error == 0) 1970 vput(nd.ni_vp); 1971 return (error); 1972 } 1973 1974 /* 1975 * Make a hard file link. 1976 */ 1977 /* ARGSUSED */ 1978 int 1979 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1980 { 1981 /* { 1982 syscallarg(const char *) path; 1983 syscallarg(const char *) link; 1984 } */ 1985 struct vnode *vp; 1986 struct nameidata nd; 1987 int error; 1988 1989 error = namei_simple_user(SCARG(uap, path), 1990 NSM_FOLLOW_TRYEMULROOT, &vp); 1991 if (error != 0) 1992 return (error); 1993 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1994 SCARG(uap, link)); 1995 if ((error = namei(&nd)) != 0) 1996 goto out; 1997 if (nd.ni_vp) { 1998 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1999 if (nd.ni_dvp == nd.ni_vp) 2000 vrele(nd.ni_dvp); 2001 else 2002 vput(nd.ni_dvp); 2003 vrele(nd.ni_vp); 2004 error = EEXIST; 2005 goto out; 2006 } 2007 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2008 out: 2009 vrele(vp); 2010 return (error); 2011 } 2012 2013 /* 2014 * Make a symbolic link. 2015 */ 2016 /* ARGSUSED */ 2017 int 2018 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2019 { 2020 /* { 2021 syscallarg(const char *) path; 2022 syscallarg(const char *) link; 2023 } */ 2024 struct proc *p = l->l_proc; 2025 struct vattr vattr; 2026 char *path; 2027 int error; 2028 struct nameidata nd; 2029 2030 path = PNBUF_GET(); 2031 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2032 if (error) 2033 goto out; 2034 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2035 SCARG(uap, link)); 2036 if ((error = namei(&nd)) != 0) 2037 goto out; 2038 if (nd.ni_vp) { 2039 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2040 if (nd.ni_dvp == nd.ni_vp) 2041 vrele(nd.ni_dvp); 2042 else 2043 vput(nd.ni_dvp); 2044 vrele(nd.ni_vp); 2045 error = EEXIST; 2046 goto out; 2047 } 2048 vattr_null(&vattr); 2049 vattr.va_type = VLNK; 2050 /* We will read cwdi->cwdi_cmask unlocked. */ 2051 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2052 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2053 if (error == 0) 2054 vput(nd.ni_vp); 2055 out: 2056 PNBUF_PUT(path); 2057 return (error); 2058 } 2059 2060 /* 2061 * Delete a whiteout from the filesystem. 2062 */ 2063 /* ARGSUSED */ 2064 int 2065 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2066 { 2067 /* { 2068 syscallarg(const char *) path; 2069 } */ 2070 int error; 2071 struct nameidata nd; 2072 2073 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2074 UIO_USERSPACE, SCARG(uap, path)); 2075 error = namei(&nd); 2076 if (error) 2077 return (error); 2078 2079 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2080 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2081 if (nd.ni_dvp == nd.ni_vp) 2082 vrele(nd.ni_dvp); 2083 else 2084 vput(nd.ni_dvp); 2085 if (nd.ni_vp) 2086 vrele(nd.ni_vp); 2087 return (EEXIST); 2088 } 2089 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2090 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2091 vput(nd.ni_dvp); 2092 return (error); 2093 } 2094 2095 /* 2096 * Delete a name from the filesystem. 2097 */ 2098 /* ARGSUSED */ 2099 int 2100 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2101 { 2102 /* { 2103 syscallarg(const char *) path; 2104 } */ 2105 2106 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2107 } 2108 2109 int 2110 do_sys_unlink(const char *arg, enum uio_seg seg) 2111 { 2112 struct vnode *vp; 2113 int error; 2114 struct nameidata nd; 2115 char *path; 2116 const char *cpath; 2117 2118 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2119 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2120 2121 if ((error = namei(&nd)) != 0) 2122 goto out; 2123 vp = nd.ni_vp; 2124 2125 /* 2126 * The root of a mounted filesystem cannot be deleted. 2127 */ 2128 if (vp->v_vflag & VV_ROOT) { 2129 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2130 if (nd.ni_dvp == vp) 2131 vrele(nd.ni_dvp); 2132 else 2133 vput(nd.ni_dvp); 2134 vput(vp); 2135 error = EBUSY; 2136 goto out; 2137 } 2138 2139 #if NVERIEXEC > 0 2140 /* Handle remove requests for veriexec entries. */ 2141 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2142 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2143 if (nd.ni_dvp == vp) 2144 vrele(nd.ni_dvp); 2145 else 2146 vput(nd.ni_dvp); 2147 vput(vp); 2148 goto out; 2149 } 2150 #endif /* NVERIEXEC > 0 */ 2151 2152 #ifdef FILEASSOC 2153 (void)fileassoc_file_delete(vp); 2154 #endif /* FILEASSOC */ 2155 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2156 out: 2157 VERIEXEC_PATH_PUT(path); 2158 return (error); 2159 } 2160 2161 /* 2162 * Reposition read/write file offset. 2163 */ 2164 int 2165 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2166 { 2167 /* { 2168 syscallarg(int) fd; 2169 syscallarg(int) pad; 2170 syscallarg(off_t) offset; 2171 syscallarg(int) whence; 2172 } */ 2173 kauth_cred_t cred = l->l_cred; 2174 file_t *fp; 2175 struct vnode *vp; 2176 struct vattr vattr; 2177 off_t newoff; 2178 int error, fd; 2179 2180 fd = SCARG(uap, fd); 2181 2182 if ((fp = fd_getfile(fd)) == NULL) 2183 return (EBADF); 2184 2185 vp = fp->f_data; 2186 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2187 error = ESPIPE; 2188 goto out; 2189 } 2190 2191 switch (SCARG(uap, whence)) { 2192 case SEEK_CUR: 2193 newoff = fp->f_offset + SCARG(uap, offset); 2194 break; 2195 case SEEK_END: 2196 error = VOP_GETATTR(vp, &vattr, cred); 2197 if (error) { 2198 goto out; 2199 } 2200 newoff = SCARG(uap, offset) + vattr.va_size; 2201 break; 2202 case SEEK_SET: 2203 newoff = SCARG(uap, offset); 2204 break; 2205 default: 2206 error = EINVAL; 2207 goto out; 2208 } 2209 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2210 *(off_t *)retval = fp->f_offset = newoff; 2211 } 2212 out: 2213 fd_putfile(fd); 2214 return (error); 2215 } 2216 2217 /* 2218 * Positional read system call. 2219 */ 2220 int 2221 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2222 { 2223 /* { 2224 syscallarg(int) fd; 2225 syscallarg(void *) buf; 2226 syscallarg(size_t) nbyte; 2227 syscallarg(off_t) offset; 2228 } */ 2229 file_t *fp; 2230 struct vnode *vp; 2231 off_t offset; 2232 int error, fd = SCARG(uap, fd); 2233 2234 if ((fp = fd_getfile(fd)) == NULL) 2235 return (EBADF); 2236 2237 if ((fp->f_flag & FREAD) == 0) { 2238 fd_putfile(fd); 2239 return (EBADF); 2240 } 2241 2242 vp = fp->f_data; 2243 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2244 error = ESPIPE; 2245 goto out; 2246 } 2247 2248 offset = SCARG(uap, offset); 2249 2250 /* 2251 * XXX This works because no file systems actually 2252 * XXX take any action on the seek operation. 2253 */ 2254 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2255 goto out; 2256 2257 /* dofileread() will unuse the descriptor for us */ 2258 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2259 &offset, 0, retval)); 2260 2261 out: 2262 fd_putfile(fd); 2263 return (error); 2264 } 2265 2266 /* 2267 * Positional scatter read system call. 2268 */ 2269 int 2270 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2271 { 2272 /* { 2273 syscallarg(int) fd; 2274 syscallarg(const struct iovec *) iovp; 2275 syscallarg(int) iovcnt; 2276 syscallarg(off_t) offset; 2277 } */ 2278 off_t offset = SCARG(uap, offset); 2279 2280 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2281 SCARG(uap, iovcnt), &offset, 0, retval); 2282 } 2283 2284 /* 2285 * Positional write system call. 2286 */ 2287 int 2288 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2289 { 2290 /* { 2291 syscallarg(int) fd; 2292 syscallarg(const void *) buf; 2293 syscallarg(size_t) nbyte; 2294 syscallarg(off_t) offset; 2295 } */ 2296 file_t *fp; 2297 struct vnode *vp; 2298 off_t offset; 2299 int error, fd = SCARG(uap, fd); 2300 2301 if ((fp = fd_getfile(fd)) == NULL) 2302 return (EBADF); 2303 2304 if ((fp->f_flag & FWRITE) == 0) { 2305 fd_putfile(fd); 2306 return (EBADF); 2307 } 2308 2309 vp = fp->f_data; 2310 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2311 error = ESPIPE; 2312 goto out; 2313 } 2314 2315 offset = SCARG(uap, offset); 2316 2317 /* 2318 * XXX This works because no file systems actually 2319 * XXX take any action on the seek operation. 2320 */ 2321 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2322 goto out; 2323 2324 /* dofilewrite() will unuse the descriptor for us */ 2325 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2326 &offset, 0, retval)); 2327 2328 out: 2329 fd_putfile(fd); 2330 return (error); 2331 } 2332 2333 /* 2334 * Positional gather write system call. 2335 */ 2336 int 2337 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2338 { 2339 /* { 2340 syscallarg(int) fd; 2341 syscallarg(const struct iovec *) iovp; 2342 syscallarg(int) iovcnt; 2343 syscallarg(off_t) offset; 2344 } */ 2345 off_t offset = SCARG(uap, offset); 2346 2347 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2348 SCARG(uap, iovcnt), &offset, 0, retval); 2349 } 2350 2351 /* 2352 * Check access permissions. 2353 */ 2354 int 2355 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2356 { 2357 /* { 2358 syscallarg(const char *) path; 2359 syscallarg(int) flags; 2360 } */ 2361 kauth_cred_t cred; 2362 struct vnode *vp; 2363 int error, flags; 2364 struct nameidata nd; 2365 2366 cred = kauth_cred_dup(l->l_cred); 2367 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2368 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2369 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2370 SCARG(uap, path)); 2371 /* Override default credentials */ 2372 nd.ni_cnd.cn_cred = cred; 2373 if ((error = namei(&nd)) != 0) 2374 goto out; 2375 vp = nd.ni_vp; 2376 2377 /* Flags == 0 means only check for existence. */ 2378 if (SCARG(uap, flags)) { 2379 flags = 0; 2380 if (SCARG(uap, flags) & R_OK) 2381 flags |= VREAD; 2382 if (SCARG(uap, flags) & W_OK) 2383 flags |= VWRITE; 2384 if (SCARG(uap, flags) & X_OK) 2385 flags |= VEXEC; 2386 2387 error = VOP_ACCESS(vp, flags, cred); 2388 if (!error && (flags & VWRITE)) 2389 error = vn_writechk(vp); 2390 } 2391 vput(vp); 2392 out: 2393 kauth_cred_free(cred); 2394 return (error); 2395 } 2396 2397 /* 2398 * Common code for all sys_stat functions, including compat versions. 2399 */ 2400 int 2401 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2402 { 2403 int error; 2404 struct nameidata nd; 2405 2406 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2407 UIO_USERSPACE, path); 2408 error = namei(&nd); 2409 if (error != 0) 2410 return error; 2411 error = vn_stat(nd.ni_vp, sb); 2412 vput(nd.ni_vp); 2413 return error; 2414 } 2415 2416 /* 2417 * Get file status; this version follows links. 2418 */ 2419 /* ARGSUSED */ 2420 int 2421 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2422 { 2423 /* { 2424 syscallarg(const char *) path; 2425 syscallarg(struct stat *) ub; 2426 } */ 2427 struct stat sb; 2428 int error; 2429 2430 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2431 if (error) 2432 return error; 2433 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2434 } 2435 2436 /* 2437 * Get file status; this version does not follow links. 2438 */ 2439 /* ARGSUSED */ 2440 int 2441 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2442 { 2443 /* { 2444 syscallarg(const char *) path; 2445 syscallarg(struct stat *) ub; 2446 } */ 2447 struct stat sb; 2448 int error; 2449 2450 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2451 if (error) 2452 return error; 2453 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2454 } 2455 2456 /* 2457 * Get configurable pathname variables. 2458 */ 2459 /* ARGSUSED */ 2460 int 2461 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2462 { 2463 /* { 2464 syscallarg(const char *) path; 2465 syscallarg(int) name; 2466 } */ 2467 int error; 2468 struct nameidata nd; 2469 2470 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2471 SCARG(uap, path)); 2472 if ((error = namei(&nd)) != 0) 2473 return (error); 2474 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2475 vput(nd.ni_vp); 2476 return (error); 2477 } 2478 2479 /* 2480 * Return target name of a symbolic link. 2481 */ 2482 /* ARGSUSED */ 2483 int 2484 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2485 { 2486 /* { 2487 syscallarg(const char *) path; 2488 syscallarg(char *) buf; 2489 syscallarg(size_t) count; 2490 } */ 2491 struct vnode *vp; 2492 struct iovec aiov; 2493 struct uio auio; 2494 int error; 2495 struct nameidata nd; 2496 2497 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2498 SCARG(uap, path)); 2499 if ((error = namei(&nd)) != 0) 2500 return (error); 2501 vp = nd.ni_vp; 2502 if (vp->v_type != VLNK) 2503 error = EINVAL; 2504 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2505 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2506 aiov.iov_base = SCARG(uap, buf); 2507 aiov.iov_len = SCARG(uap, count); 2508 auio.uio_iov = &aiov; 2509 auio.uio_iovcnt = 1; 2510 auio.uio_offset = 0; 2511 auio.uio_rw = UIO_READ; 2512 KASSERT(l == curlwp); 2513 auio.uio_vmspace = l->l_proc->p_vmspace; 2514 auio.uio_resid = SCARG(uap, count); 2515 error = VOP_READLINK(vp, &auio, l->l_cred); 2516 } 2517 vput(vp); 2518 *retval = SCARG(uap, count) - auio.uio_resid; 2519 return (error); 2520 } 2521 2522 /* 2523 * Change flags of a file given a path name. 2524 */ 2525 /* ARGSUSED */ 2526 int 2527 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2528 { 2529 /* { 2530 syscallarg(const char *) path; 2531 syscallarg(u_long) flags; 2532 } */ 2533 struct vnode *vp; 2534 int error; 2535 2536 error = namei_simple_user(SCARG(uap, path), 2537 NSM_FOLLOW_TRYEMULROOT, &vp); 2538 if (error != 0) 2539 return (error); 2540 error = change_flags(vp, SCARG(uap, flags), l); 2541 vput(vp); 2542 return (error); 2543 } 2544 2545 /* 2546 * Change flags of a file given a file descriptor. 2547 */ 2548 /* ARGSUSED */ 2549 int 2550 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2551 { 2552 /* { 2553 syscallarg(int) fd; 2554 syscallarg(u_long) flags; 2555 } */ 2556 struct vnode *vp; 2557 file_t *fp; 2558 int error; 2559 2560 /* fd_getvnode() will use the descriptor for us */ 2561 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2562 return (error); 2563 vp = fp->f_data; 2564 error = change_flags(vp, SCARG(uap, flags), l); 2565 VOP_UNLOCK(vp, 0); 2566 fd_putfile(SCARG(uap, fd)); 2567 return (error); 2568 } 2569 2570 /* 2571 * Change flags of a file given a path name; this version does 2572 * not follow links. 2573 */ 2574 int 2575 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2576 { 2577 /* { 2578 syscallarg(const char *) path; 2579 syscallarg(u_long) flags; 2580 } */ 2581 struct vnode *vp; 2582 int error; 2583 2584 error = namei_simple_user(SCARG(uap, path), 2585 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2586 if (error != 0) 2587 return (error); 2588 error = change_flags(vp, SCARG(uap, flags), l); 2589 vput(vp); 2590 return (error); 2591 } 2592 2593 /* 2594 * Common routine to change flags of a file. 2595 */ 2596 int 2597 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2598 { 2599 struct vattr vattr; 2600 int error; 2601 2602 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2603 /* 2604 * Non-superusers cannot change the flags on devices, even if they 2605 * own them. 2606 */ 2607 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2608 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2609 goto out; 2610 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2611 error = EINVAL; 2612 goto out; 2613 } 2614 } 2615 vattr_null(&vattr); 2616 vattr.va_flags = flags; 2617 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2618 out: 2619 return (error); 2620 } 2621 2622 /* 2623 * Change mode of a file given path name; this version follows links. 2624 */ 2625 /* ARGSUSED */ 2626 int 2627 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2628 { 2629 /* { 2630 syscallarg(const char *) path; 2631 syscallarg(int) mode; 2632 } */ 2633 int error; 2634 struct vnode *vp; 2635 2636 error = namei_simple_user(SCARG(uap, path), 2637 NSM_FOLLOW_TRYEMULROOT, &vp); 2638 if (error != 0) 2639 return (error); 2640 2641 error = change_mode(vp, SCARG(uap, mode), l); 2642 2643 vrele(vp); 2644 return (error); 2645 } 2646 2647 /* 2648 * Change mode of a file given a file descriptor. 2649 */ 2650 /* ARGSUSED */ 2651 int 2652 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2653 { 2654 /* { 2655 syscallarg(int) fd; 2656 syscallarg(int) mode; 2657 } */ 2658 file_t *fp; 2659 int error; 2660 2661 /* fd_getvnode() will use the descriptor for us */ 2662 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2663 return (error); 2664 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2665 fd_putfile(SCARG(uap, fd)); 2666 return (error); 2667 } 2668 2669 /* 2670 * Change mode of a file given path name; this version does not follow links. 2671 */ 2672 /* ARGSUSED */ 2673 int 2674 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2675 { 2676 /* { 2677 syscallarg(const char *) path; 2678 syscallarg(int) mode; 2679 } */ 2680 int error; 2681 struct vnode *vp; 2682 2683 error = namei_simple_user(SCARG(uap, path), 2684 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2685 if (error != 0) 2686 return (error); 2687 2688 error = change_mode(vp, SCARG(uap, mode), l); 2689 2690 vrele(vp); 2691 return (error); 2692 } 2693 2694 /* 2695 * Common routine to set mode given a vnode. 2696 */ 2697 static int 2698 change_mode(struct vnode *vp, int mode, struct lwp *l) 2699 { 2700 struct vattr vattr; 2701 int error; 2702 2703 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2704 vattr_null(&vattr); 2705 vattr.va_mode = mode & ALLPERMS; 2706 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2707 VOP_UNLOCK(vp, 0); 2708 return (error); 2709 } 2710 2711 /* 2712 * Set ownership given a path name; this version follows links. 2713 */ 2714 /* ARGSUSED */ 2715 int 2716 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2717 { 2718 /* { 2719 syscallarg(const char *) path; 2720 syscallarg(uid_t) uid; 2721 syscallarg(gid_t) gid; 2722 } */ 2723 int error; 2724 struct vnode *vp; 2725 2726 error = namei_simple_user(SCARG(uap, path), 2727 NSM_FOLLOW_TRYEMULROOT, &vp); 2728 if (error != 0) 2729 return (error); 2730 2731 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2732 2733 vrele(vp); 2734 return (error); 2735 } 2736 2737 /* 2738 * Set ownership given a path name; this version follows links. 2739 * Provides POSIX semantics. 2740 */ 2741 /* ARGSUSED */ 2742 int 2743 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2744 { 2745 /* { 2746 syscallarg(const char *) path; 2747 syscallarg(uid_t) uid; 2748 syscallarg(gid_t) gid; 2749 } */ 2750 int error; 2751 struct vnode *vp; 2752 2753 error = namei_simple_user(SCARG(uap, path), 2754 NSM_FOLLOW_TRYEMULROOT, &vp); 2755 if (error != 0) 2756 return (error); 2757 2758 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2759 2760 vrele(vp); 2761 return (error); 2762 } 2763 2764 /* 2765 * Set ownership given a file descriptor. 2766 */ 2767 /* ARGSUSED */ 2768 int 2769 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2770 { 2771 /* { 2772 syscallarg(int) fd; 2773 syscallarg(uid_t) uid; 2774 syscallarg(gid_t) gid; 2775 } */ 2776 int error; 2777 file_t *fp; 2778 2779 /* fd_getvnode() will use the descriptor for us */ 2780 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2781 return (error); 2782 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2783 l, 0); 2784 fd_putfile(SCARG(uap, fd)); 2785 return (error); 2786 } 2787 2788 /* 2789 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2790 */ 2791 /* ARGSUSED */ 2792 int 2793 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2794 { 2795 /* { 2796 syscallarg(int) fd; 2797 syscallarg(uid_t) uid; 2798 syscallarg(gid_t) gid; 2799 } */ 2800 int error; 2801 file_t *fp; 2802 2803 /* fd_getvnode() will use the descriptor for us */ 2804 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2805 return (error); 2806 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2807 l, 1); 2808 fd_putfile(SCARG(uap, fd)); 2809 return (error); 2810 } 2811 2812 /* 2813 * Set ownership given a path name; this version does not follow links. 2814 */ 2815 /* ARGSUSED */ 2816 int 2817 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2818 { 2819 /* { 2820 syscallarg(const char *) path; 2821 syscallarg(uid_t) uid; 2822 syscallarg(gid_t) gid; 2823 } */ 2824 int error; 2825 struct vnode *vp; 2826 2827 error = namei_simple_user(SCARG(uap, path), 2828 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2829 if (error != 0) 2830 return (error); 2831 2832 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2833 2834 vrele(vp); 2835 return (error); 2836 } 2837 2838 /* 2839 * Set ownership given a path name; this version does not follow links. 2840 * Provides POSIX/XPG semantics. 2841 */ 2842 /* ARGSUSED */ 2843 int 2844 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2845 { 2846 /* { 2847 syscallarg(const char *) path; 2848 syscallarg(uid_t) uid; 2849 syscallarg(gid_t) gid; 2850 } */ 2851 int error; 2852 struct vnode *vp; 2853 2854 error = namei_simple_user(SCARG(uap, path), 2855 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2856 if (error != 0) 2857 return (error); 2858 2859 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2860 2861 vrele(vp); 2862 return (error); 2863 } 2864 2865 /* 2866 * Common routine to set ownership given a vnode. 2867 */ 2868 static int 2869 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2870 int posix_semantics) 2871 { 2872 struct vattr vattr; 2873 mode_t newmode; 2874 int error; 2875 2876 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2877 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2878 goto out; 2879 2880 #define CHANGED(x) ((int)(x) != -1) 2881 newmode = vattr.va_mode; 2882 if (posix_semantics) { 2883 /* 2884 * POSIX/XPG semantics: if the caller is not the super-user, 2885 * clear set-user-id and set-group-id bits. Both POSIX and 2886 * the XPG consider the behaviour for calls by the super-user 2887 * implementation-defined; we leave the set-user-id and set- 2888 * group-id settings intact in that case. 2889 */ 2890 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2891 NULL) != 0) 2892 newmode &= ~(S_ISUID | S_ISGID); 2893 } else { 2894 /* 2895 * NetBSD semantics: when changing owner and/or group, 2896 * clear the respective bit(s). 2897 */ 2898 if (CHANGED(uid)) 2899 newmode &= ~S_ISUID; 2900 if (CHANGED(gid)) 2901 newmode &= ~S_ISGID; 2902 } 2903 /* Update va_mode iff altered. */ 2904 if (vattr.va_mode == newmode) 2905 newmode = VNOVAL; 2906 2907 vattr_null(&vattr); 2908 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2909 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2910 vattr.va_mode = newmode; 2911 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2912 #undef CHANGED 2913 2914 out: 2915 VOP_UNLOCK(vp, 0); 2916 return (error); 2917 } 2918 2919 /* 2920 * Set the access and modification times given a path name; this 2921 * version follows links. 2922 */ 2923 /* ARGSUSED */ 2924 int 2925 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2926 register_t *retval) 2927 { 2928 /* { 2929 syscallarg(const char *) path; 2930 syscallarg(const struct timeval *) tptr; 2931 } */ 2932 2933 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2934 SCARG(uap, tptr), UIO_USERSPACE); 2935 } 2936 2937 /* 2938 * Set the access and modification times given a file descriptor. 2939 */ 2940 /* ARGSUSED */ 2941 int 2942 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2943 register_t *retval) 2944 { 2945 /* { 2946 syscallarg(int) fd; 2947 syscallarg(const struct timeval *) tptr; 2948 } */ 2949 int error; 2950 file_t *fp; 2951 2952 /* fd_getvnode() will use the descriptor for us */ 2953 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2954 return (error); 2955 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2956 UIO_USERSPACE); 2957 fd_putfile(SCARG(uap, fd)); 2958 return (error); 2959 } 2960 2961 /* 2962 * Set the access and modification times given a path name; this 2963 * version does not follow links. 2964 */ 2965 int 2966 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2967 register_t *retval) 2968 { 2969 /* { 2970 syscallarg(const char *) path; 2971 syscallarg(const struct timeval *) tptr; 2972 } */ 2973 2974 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2975 SCARG(uap, tptr), UIO_USERSPACE); 2976 } 2977 2978 /* 2979 * Common routine to set access and modification times given a vnode. 2980 */ 2981 int 2982 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2983 const struct timeval *tptr, enum uio_seg seg) 2984 { 2985 struct vattr vattr; 2986 int error, dorele = 0; 2987 namei_simple_flags_t sflags; 2988 2989 bool vanull, setbirthtime; 2990 struct timespec ts[2]; 2991 2992 /* 2993 * I have checked all callers and they pass either FOLLOW, 2994 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 2995 * is 0. More to the point, they don't pass anything else. 2996 * Let's keep it that way at least until the namei interfaces 2997 * are fully sanitized. 2998 */ 2999 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3000 sflags = (flag == FOLLOW) ? 3001 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3002 3003 if (tptr == NULL) { 3004 vanull = true; 3005 nanotime(&ts[0]); 3006 ts[1] = ts[0]; 3007 } else { 3008 struct timeval tv[2]; 3009 3010 vanull = false; 3011 if (seg != UIO_SYSSPACE) { 3012 error = copyin(tptr, tv, sizeof (tv)); 3013 if (error != 0) 3014 return error; 3015 tptr = tv; 3016 } 3017 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3018 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3019 } 3020 3021 if (vp == NULL) { 3022 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3023 error = namei_simple_user(path, sflags, &vp); 3024 if (error != 0) 3025 return error; 3026 dorele = 1; 3027 } 3028 3029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3030 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3031 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3032 vattr_null(&vattr); 3033 vattr.va_atime = ts[0]; 3034 vattr.va_mtime = ts[1]; 3035 if (setbirthtime) 3036 vattr.va_birthtime = ts[1]; 3037 if (vanull) 3038 vattr.va_vaflags |= VA_UTIMES_NULL; 3039 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3040 VOP_UNLOCK(vp, 0); 3041 3042 if (dorele != 0) 3043 vrele(vp); 3044 3045 return error; 3046 } 3047 3048 /* 3049 * Truncate a file given its path name. 3050 */ 3051 /* ARGSUSED */ 3052 int 3053 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3054 { 3055 /* { 3056 syscallarg(const char *) path; 3057 syscallarg(int) pad; 3058 syscallarg(off_t) length; 3059 } */ 3060 struct vnode *vp; 3061 struct vattr vattr; 3062 int error; 3063 3064 error = namei_simple_user(SCARG(uap, path), 3065 NSM_FOLLOW_TRYEMULROOT, &vp); 3066 if (error != 0) 3067 return (error); 3068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3069 if (vp->v_type == VDIR) 3070 error = EISDIR; 3071 else if ((error = vn_writechk(vp)) == 0 && 3072 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3073 vattr_null(&vattr); 3074 vattr.va_size = SCARG(uap, length); 3075 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3076 } 3077 vput(vp); 3078 return (error); 3079 } 3080 3081 /* 3082 * Truncate a file given a file descriptor. 3083 */ 3084 /* ARGSUSED */ 3085 int 3086 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3087 { 3088 /* { 3089 syscallarg(int) fd; 3090 syscallarg(int) pad; 3091 syscallarg(off_t) length; 3092 } */ 3093 struct vattr vattr; 3094 struct vnode *vp; 3095 file_t *fp; 3096 int error; 3097 3098 /* fd_getvnode() will use the descriptor for us */ 3099 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3100 return (error); 3101 if ((fp->f_flag & FWRITE) == 0) { 3102 error = EINVAL; 3103 goto out; 3104 } 3105 vp = fp->f_data; 3106 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3107 if (vp->v_type == VDIR) 3108 error = EISDIR; 3109 else if ((error = vn_writechk(vp)) == 0) { 3110 vattr_null(&vattr); 3111 vattr.va_size = SCARG(uap, length); 3112 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3113 } 3114 VOP_UNLOCK(vp, 0); 3115 out: 3116 fd_putfile(SCARG(uap, fd)); 3117 return (error); 3118 } 3119 3120 /* 3121 * Sync an open file. 3122 */ 3123 /* ARGSUSED */ 3124 int 3125 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3126 { 3127 /* { 3128 syscallarg(int) fd; 3129 } */ 3130 struct vnode *vp; 3131 file_t *fp; 3132 int error; 3133 3134 /* fd_getvnode() will use the descriptor for us */ 3135 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3136 return (error); 3137 vp = fp->f_data; 3138 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3139 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3140 VOP_UNLOCK(vp, 0); 3141 fd_putfile(SCARG(uap, fd)); 3142 return (error); 3143 } 3144 3145 /* 3146 * Sync a range of file data. API modeled after that found in AIX. 3147 * 3148 * FDATASYNC indicates that we need only save enough metadata to be able 3149 * to re-read the written data. Note we duplicate AIX's requirement that 3150 * the file be open for writing. 3151 */ 3152 /* ARGSUSED */ 3153 int 3154 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3155 { 3156 /* { 3157 syscallarg(int) fd; 3158 syscallarg(int) flags; 3159 syscallarg(off_t) start; 3160 syscallarg(off_t) length; 3161 } */ 3162 struct vnode *vp; 3163 file_t *fp; 3164 int flags, nflags; 3165 off_t s, e, len; 3166 int error; 3167 3168 /* fd_getvnode() will use the descriptor for us */ 3169 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3170 return (error); 3171 3172 if ((fp->f_flag & FWRITE) == 0) { 3173 error = EBADF; 3174 goto out; 3175 } 3176 3177 flags = SCARG(uap, flags); 3178 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3179 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3180 error = EINVAL; 3181 goto out; 3182 } 3183 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3184 if (flags & FDATASYNC) 3185 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3186 else 3187 nflags = FSYNC_WAIT; 3188 if (flags & FDISKSYNC) 3189 nflags |= FSYNC_CACHE; 3190 3191 len = SCARG(uap, length); 3192 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3193 if (len) { 3194 s = SCARG(uap, start); 3195 e = s + len; 3196 if (e < s) { 3197 error = EINVAL; 3198 goto out; 3199 } 3200 } else { 3201 e = 0; 3202 s = 0; 3203 } 3204 3205 vp = fp->f_data; 3206 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3207 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3208 VOP_UNLOCK(vp, 0); 3209 out: 3210 fd_putfile(SCARG(uap, fd)); 3211 return (error); 3212 } 3213 3214 /* 3215 * Sync the data of an open file. 3216 */ 3217 /* ARGSUSED */ 3218 int 3219 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3220 { 3221 /* { 3222 syscallarg(int) fd; 3223 } */ 3224 struct vnode *vp; 3225 file_t *fp; 3226 int error; 3227 3228 /* fd_getvnode() will use the descriptor for us */ 3229 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3230 return (error); 3231 if ((fp->f_flag & FWRITE) == 0) { 3232 fd_putfile(SCARG(uap, fd)); 3233 return (EBADF); 3234 } 3235 vp = fp->f_data; 3236 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3237 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3238 VOP_UNLOCK(vp, 0); 3239 fd_putfile(SCARG(uap, fd)); 3240 return (error); 3241 } 3242 3243 /* 3244 * Rename files, (standard) BSD semantics frontend. 3245 */ 3246 /* ARGSUSED */ 3247 int 3248 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3249 { 3250 /* { 3251 syscallarg(const char *) from; 3252 syscallarg(const char *) to; 3253 } */ 3254 3255 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3256 } 3257 3258 /* 3259 * Rename files, POSIX semantics frontend. 3260 */ 3261 /* ARGSUSED */ 3262 int 3263 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3264 { 3265 /* { 3266 syscallarg(const char *) from; 3267 syscallarg(const char *) to; 3268 } */ 3269 3270 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3271 } 3272 3273 /* 3274 * Rename files. Source and destination must either both be directories, 3275 * or both not be directories. If target is a directory, it must be empty. 3276 * If `from' and `to' refer to the same object, the value of the `retain' 3277 * argument is used to determine whether `from' will be 3278 * 3279 * (retain == 0) deleted unless `from' and `to' refer to the same 3280 * object in the file system's name space (BSD). 3281 * (retain == 1) always retained (POSIX). 3282 */ 3283 int 3284 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3285 { 3286 struct vnode *tvp, *fvp, *tdvp; 3287 struct nameidata fromnd, tond; 3288 struct mount *fs; 3289 struct lwp *l = curlwp; 3290 struct proc *p; 3291 uint32_t saveflag; 3292 int error; 3293 3294 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME, 3295 seg, from); 3296 if ((error = namei(&fromnd)) != 0) 3297 return (error); 3298 if (fromnd.ni_dvp != fromnd.ni_vp) 3299 VOP_UNLOCK(fromnd.ni_dvp, 0); 3300 fvp = fromnd.ni_vp; 3301 3302 fs = fvp->v_mount; 3303 error = VFS_RENAMELOCK_ENTER(fs); 3304 if (error) { 3305 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3306 vrele(fromnd.ni_dvp); 3307 vrele(fvp); 3308 goto out1; 3309 } 3310 3311 /* 3312 * close, partially, yet another race - ideally we should only 3313 * go as far as getting fromnd.ni_dvp before getting the per-fs 3314 * lock, and then continue to get fromnd.ni_vp, but we can't do 3315 * that with namei as it stands. 3316 * 3317 * This still won't prevent rmdir from nuking fromnd.ni_vp 3318 * under us. The real fix is to get the locks in the right 3319 * order and do the lookups in the right places, but that's a 3320 * major rototill. 3321 * 3322 * Preserve the SAVESTART in cn_flags, because who knows what 3323 * might happen if we don't. 3324 * 3325 * Note: this logic (as well as this whole function) is cloned 3326 * in nfs_serv.c. Proceed accordingly. 3327 */ 3328 vrele(fvp); 3329 if ((fromnd.ni_cnd.cn_namelen == 1 && 3330 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3331 (fromnd.ni_cnd.cn_namelen == 2 && 3332 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3333 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3334 error = EINVAL; 3335 VFS_RENAMELOCK_EXIT(fs); 3336 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3337 vrele(fromnd.ni_dvp); 3338 goto out1; 3339 } 3340 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3341 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3342 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3343 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3344 fromnd.ni_cnd.cn_flags |= saveflag; 3345 if (error) { 3346 VOP_UNLOCK(fromnd.ni_dvp, 0); 3347 VFS_RENAMELOCK_EXIT(fs); 3348 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3349 vrele(fromnd.ni_dvp); 3350 goto out1; 3351 } 3352 VOP_UNLOCK(fromnd.ni_vp, 0); 3353 if (fromnd.ni_dvp != fromnd.ni_vp) 3354 VOP_UNLOCK(fromnd.ni_dvp, 0); 3355 fvp = fromnd.ni_vp; 3356 3357 NDINIT(&tond, RENAME, 3358 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3359 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3360 seg, to); 3361 if ((error = namei(&tond)) != 0) { 3362 VFS_RENAMELOCK_EXIT(fs); 3363 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3364 vrele(fromnd.ni_dvp); 3365 vrele(fvp); 3366 goto out1; 3367 } 3368 tdvp = tond.ni_dvp; 3369 tvp = tond.ni_vp; 3370 3371 if (tvp != NULL) { 3372 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3373 error = ENOTDIR; 3374 goto out; 3375 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3376 error = EISDIR; 3377 goto out; 3378 } 3379 } 3380 3381 if (fvp == tdvp) 3382 error = EINVAL; 3383 3384 /* 3385 * Source and destination refer to the same object. 3386 */ 3387 if (fvp == tvp) { 3388 if (retain) 3389 error = -1; 3390 else if (fromnd.ni_dvp == tdvp && 3391 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3392 !memcmp(fromnd.ni_cnd.cn_nameptr, 3393 tond.ni_cnd.cn_nameptr, 3394 fromnd.ni_cnd.cn_namelen)) 3395 error = -1; 3396 } 3397 3398 #if NVERIEXEC > 0 3399 if (!error) { 3400 char *f1, *f2; 3401 size_t f1_len; 3402 size_t f2_len; 3403 3404 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3405 f1 = kmem_alloc(f1_len, KM_SLEEP); 3406 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3407 3408 f2_len = tond.ni_cnd.cn_namelen + 1; 3409 f2 = kmem_alloc(f2_len, KM_SLEEP); 3410 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3411 3412 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3413 3414 kmem_free(f1, f1_len); 3415 kmem_free(f2, f2_len); 3416 } 3417 #endif /* NVERIEXEC > 0 */ 3418 3419 out: 3420 p = l->l_proc; 3421 if (!error) { 3422 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3423 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3424 VFS_RENAMELOCK_EXIT(fs); 3425 } else { 3426 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3427 if (tdvp == tvp) 3428 vrele(tdvp); 3429 else 3430 vput(tdvp); 3431 if (tvp) 3432 vput(tvp); 3433 VFS_RENAMELOCK_EXIT(fs); 3434 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3435 vrele(fromnd.ni_dvp); 3436 vrele(fvp); 3437 } 3438 vrele(tond.ni_startdir); 3439 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3440 out1: 3441 if (fromnd.ni_startdir) 3442 vrele(fromnd.ni_startdir); 3443 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3444 return (error == -1 ? 0 : error); 3445 } 3446 3447 /* 3448 * Make a directory file. 3449 */ 3450 /* ARGSUSED */ 3451 int 3452 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3453 { 3454 /* { 3455 syscallarg(const char *) path; 3456 syscallarg(int) mode; 3457 } */ 3458 3459 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3460 } 3461 3462 int 3463 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3464 { 3465 struct proc *p = curlwp->l_proc; 3466 struct vnode *vp; 3467 struct vattr vattr; 3468 int error; 3469 struct nameidata nd; 3470 3471 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, 3472 seg, path); 3473 if ((error = namei(&nd)) != 0) 3474 return (error); 3475 vp = nd.ni_vp; 3476 if (vp != NULL) { 3477 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3478 if (nd.ni_dvp == vp) 3479 vrele(nd.ni_dvp); 3480 else 3481 vput(nd.ni_dvp); 3482 vrele(vp); 3483 return (EEXIST); 3484 } 3485 vattr_null(&vattr); 3486 vattr.va_type = VDIR; 3487 /* We will read cwdi->cwdi_cmask unlocked. */ 3488 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3489 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3490 if (!error) 3491 vput(nd.ni_vp); 3492 return (error); 3493 } 3494 3495 /* 3496 * Remove a directory file. 3497 */ 3498 /* ARGSUSED */ 3499 int 3500 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3501 { 3502 /* { 3503 syscallarg(const char *) path; 3504 } */ 3505 struct vnode *vp; 3506 int error; 3507 struct nameidata nd; 3508 3509 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3510 SCARG(uap, path)); 3511 if ((error = namei(&nd)) != 0) 3512 return (error); 3513 vp = nd.ni_vp; 3514 if (vp->v_type != VDIR) { 3515 error = ENOTDIR; 3516 goto out; 3517 } 3518 /* 3519 * No rmdir "." please. 3520 */ 3521 if (nd.ni_dvp == vp) { 3522 error = EINVAL; 3523 goto out; 3524 } 3525 /* 3526 * The root of a mounted filesystem cannot be deleted. 3527 */ 3528 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3529 error = EBUSY; 3530 goto out; 3531 } 3532 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3533 return (error); 3534 3535 out: 3536 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3537 if (nd.ni_dvp == vp) 3538 vrele(nd.ni_dvp); 3539 else 3540 vput(nd.ni_dvp); 3541 vput(vp); 3542 return (error); 3543 } 3544 3545 /* 3546 * Read a block of directory entries in a file system independent format. 3547 */ 3548 int 3549 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3550 { 3551 /* { 3552 syscallarg(int) fd; 3553 syscallarg(char *) buf; 3554 syscallarg(size_t) count; 3555 } */ 3556 file_t *fp; 3557 int error, done; 3558 3559 /* fd_getvnode() will use the descriptor for us */ 3560 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3561 return (error); 3562 if ((fp->f_flag & FREAD) == 0) { 3563 error = EBADF; 3564 goto out; 3565 } 3566 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3567 SCARG(uap, count), &done, l, 0, 0); 3568 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3569 *retval = done; 3570 out: 3571 fd_putfile(SCARG(uap, fd)); 3572 return (error); 3573 } 3574 3575 /* 3576 * Set the mode mask for creation of filesystem nodes. 3577 */ 3578 int 3579 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3580 { 3581 /* { 3582 syscallarg(mode_t) newmask; 3583 } */ 3584 struct proc *p = l->l_proc; 3585 struct cwdinfo *cwdi; 3586 3587 /* 3588 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3589 * important is that we serialize changes to the mask. The 3590 * rw_exit() will issue a write memory barrier on our behalf, 3591 * and force the changes out to other CPUs (as it must use an 3592 * atomic operation, draining the local CPU's store buffers). 3593 */ 3594 cwdi = p->p_cwdi; 3595 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3596 *retval = cwdi->cwdi_cmask; 3597 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3598 rw_exit(&cwdi->cwdi_lock); 3599 3600 return (0); 3601 } 3602 3603 int 3604 dorevoke(struct vnode *vp, kauth_cred_t cred) 3605 { 3606 struct vattr vattr; 3607 int error; 3608 3609 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3610 return error; 3611 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3612 (error = kauth_authorize_generic(cred, 3613 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3614 VOP_REVOKE(vp, REVOKEALL); 3615 return (error); 3616 } 3617 3618 /* 3619 * Void all references to file by ripping underlying filesystem 3620 * away from vnode. 3621 */ 3622 /* ARGSUSED */ 3623 int 3624 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3625 { 3626 /* { 3627 syscallarg(const char *) path; 3628 } */ 3629 struct vnode *vp; 3630 int error; 3631 3632 error = namei_simple_user(SCARG(uap, path), 3633 NSM_FOLLOW_TRYEMULROOT, &vp); 3634 if (error != 0) 3635 return (error); 3636 error = dorevoke(vp, l->l_cred); 3637 vrele(vp); 3638 return (error); 3639 } 3640