1 /* $NetBSD: vfs_syscalls.c,v 1.383 2009/01/11 02:45:53 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 63 */ 64 65 #include <sys/cdefs.h> 66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.383 2009/01/11 02:45:53 christos Exp $"); 67 68 #ifdef _KERNEL_OPT 69 #include "opt_fileassoc.h" 70 #include "veriexec.h" 71 #endif 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/namei.h> 76 #include <sys/filedesc.h> 77 #include <sys/kernel.h> 78 #include <sys/file.h> 79 #include <sys/stat.h> 80 #include <sys/vnode.h> 81 #include <sys/mount.h> 82 #include <sys/proc.h> 83 #include <sys/uio.h> 84 #include <sys/malloc.h> 85 #include <sys/kmem.h> 86 #include <sys/dirent.h> 87 #include <sys/sysctl.h> 88 #include <sys/syscallargs.h> 89 #include <sys/vfs_syscalls.h> 90 #include <sys/ktrace.h> 91 #ifdef FILEASSOC 92 #include <sys/fileassoc.h> 93 #endif /* FILEASSOC */ 94 #include <sys/verified_exec.h> 95 #include <sys/kauth.h> 96 #include <sys/atomic.h> 97 #include <sys/module.h> 98 #include <sys/buf.h> 99 100 #include <miscfs/genfs/genfs.h> 101 #include <miscfs/syncfs/syncfs.h> 102 #include <miscfs/specfs/specdev.h> 103 104 #include <nfs/rpcv2.h> 105 #include <nfs/nfsproto.h> 106 #include <nfs/nfs.h> 107 #include <nfs/nfs_var.h> 108 109 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 110 111 static int change_dir(struct nameidata *, struct lwp *); 112 static int change_flags(struct vnode *, u_long, struct lwp *); 113 static int change_mode(struct vnode *, int, struct lwp *l); 114 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 115 116 void checkdirs(struct vnode *); 117 118 int dovfsusermount = 0; 119 120 /* 121 * Virtual File System System Calls 122 */ 123 124 /* 125 * Mount a file system. 126 */ 127 128 /* 129 * This table is used to maintain compatibility with 4.3BSD 130 * and NetBSD 0.9 mount syscalls - and possibly other systems. 131 * Note, the order is important! 132 * 133 * Do not modify this table. It should only contain filesystems 134 * supported by NetBSD 0.9 and 4.3BSD. 135 */ 136 const char * const mountcompatnames[] = { 137 NULL, /* 0 = MOUNT_NONE */ 138 MOUNT_FFS, /* 1 = MOUNT_UFS */ 139 MOUNT_NFS, /* 2 */ 140 MOUNT_MFS, /* 3 */ 141 MOUNT_MSDOS, /* 4 */ 142 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 143 MOUNT_FDESC, /* 6 */ 144 MOUNT_KERNFS, /* 7 */ 145 NULL, /* 8 = MOUNT_DEVFS */ 146 MOUNT_AFS, /* 9 */ 147 }; 148 const int nmountcompatnames = sizeof(mountcompatnames) / 149 sizeof(mountcompatnames[0]); 150 151 static int 152 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 153 void *data, size_t *data_len) 154 { 155 struct mount *mp; 156 int error = 0, saved_flags; 157 158 mp = vp->v_mount; 159 saved_flags = mp->mnt_flag; 160 161 /* We can operate only on VV_ROOT nodes. */ 162 if ((vp->v_vflag & VV_ROOT) == 0) { 163 error = EINVAL; 164 goto out; 165 } 166 167 /* 168 * We only allow the filesystem to be reloaded if it 169 * is currently mounted read-only. Additionally, we 170 * prevent read-write to read-only downgrades. 171 */ 172 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 173 (mp->mnt_flag & MNT_RDONLY) == 0) { 174 error = EOPNOTSUPP; /* Needs translation */ 175 goto out; 176 } 177 178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 180 if (error) 181 goto out; 182 183 if (vfs_busy(mp, NULL)) { 184 error = EPERM; 185 goto out; 186 } 187 188 mutex_enter(&mp->mnt_updating); 189 190 mp->mnt_flag &= ~MNT_OP_FLAGS; 191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 192 193 /* 194 * Set the mount level flags. 195 */ 196 if (flags & MNT_RDONLY) 197 mp->mnt_flag |= MNT_RDONLY; 198 else if (mp->mnt_flag & MNT_RDONLY) 199 mp->mnt_iflag |= IMNT_WANTRDWR; 200 mp->mnt_flag &= 201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 204 MNT_LOG); 205 mp->mnt_flag |= flags & 206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 209 MNT_LOG | MNT_IGNORE); 210 211 error = VFS_MOUNT(mp, path, data, data_len); 212 213 if (error && data != NULL) { 214 int error2; 215 216 /* 217 * Update failed; let's try and see if it was an 218 * export request. For compat with 3.0 and earlier. 219 */ 220 error2 = vfs_hooks_reexport(mp, path, data); 221 222 /* 223 * Only update error code if the export request was 224 * understood but some problem occurred while 225 * processing it. 226 */ 227 if (error2 != EJUSTRETURN) 228 error = error2; 229 } 230 231 if (mp->mnt_iflag & IMNT_WANTRDWR) 232 mp->mnt_flag &= ~MNT_RDONLY; 233 if (error) 234 mp->mnt_flag = saved_flags; 235 mp->mnt_flag &= ~MNT_OP_FLAGS; 236 mp->mnt_iflag &= ~IMNT_WANTRDWR; 237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 238 if (mp->mnt_syncer == NULL) 239 error = vfs_allocate_syncvnode(mp); 240 } else { 241 if (mp->mnt_syncer != NULL) 242 vfs_deallocate_syncvnode(mp); 243 } 244 mutex_exit(&mp->mnt_updating); 245 vfs_unbusy(mp, false, NULL); 246 247 out: 248 return (error); 249 } 250 251 static int 252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 253 { 254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 255 int error; 256 257 /* Copy file-system type from userspace. */ 258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 259 if (error) { 260 /* 261 * Historically, filesystem types were identified by numbers. 262 * If we get an integer for the filesystem type instead of a 263 * string, we check to see if it matches one of the historic 264 * filesystem types. 265 */ 266 u_long fsindex = (u_long)fstype; 267 if (fsindex >= nmountcompatnames || 268 mountcompatnames[fsindex] == NULL) 269 return ENODEV; 270 strlcpy(fstypename, mountcompatnames[fsindex], 271 sizeof(fstypename)); 272 } 273 274 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 275 if (strcmp(fstypename, "ufs") == 0) 276 fstypename[0] = 'f'; 277 278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 279 return 0; 280 281 /* If we can autoload a vfs module, try again */ 282 mutex_enter(&module_lock); 283 (void)module_autoload(fstype, MODULE_CLASS_VFS); 284 mutex_exit(&module_lock); 285 286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 287 return 0; 288 289 return ENODEV; 290 } 291 292 static int 293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 294 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 295 { 296 struct mount *mp; 297 struct vnode *vp = *vpp; 298 struct vattr va; 299 int error; 300 301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 303 if (error) 304 return error; 305 306 /* Can't make a non-dir a mount-point (from here anyway). */ 307 if (vp->v_type != VDIR) 308 return ENOTDIR; 309 310 /* 311 * If the user is not root, ensure that they own the directory 312 * onto which we are attempting to mount. 313 */ 314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 315 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 316 (error = kauth_authorize_generic(l->l_cred, 317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 318 return error; 319 } 320 321 if (flags & MNT_EXPORTED) 322 return EINVAL; 323 324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 325 return error; 326 327 /* 328 * Check if a file-system is not already mounted on this vnode. 329 */ 330 if (vp->v_mountedhere != NULL) 331 return EBUSY; 332 333 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 334 if (mp == NULL) 335 return ENOMEM; 336 337 mp->mnt_op = vfsops; 338 mp->mnt_refcnt = 1; 339 340 TAILQ_INIT(&mp->mnt_vnodelist); 341 rw_init(&mp->mnt_unmounting); 342 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 343 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 344 error = vfs_busy(mp, NULL); 345 KASSERT(error == 0); 346 mutex_enter(&mp->mnt_updating); 347 348 mp->mnt_vnodecovered = vp; 349 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 350 mount_initspecific(mp); 351 352 /* 353 * The underlying file system may refuse the mount for 354 * various reasons. Allow the user to force it to happen. 355 * 356 * Set the mount level flags. 357 */ 358 mp->mnt_flag = flags & 359 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 361 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 362 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 363 364 error = VFS_MOUNT(mp, path, data, data_len); 365 mp->mnt_flag &= ~MNT_OP_FLAGS; 366 367 /* 368 * Put the new filesystem on the mount list after root. 369 */ 370 cache_purge(vp); 371 if (error != 0) { 372 vp->v_mountedhere = NULL; 373 mutex_exit(&mp->mnt_updating); 374 vfs_unbusy(mp, false, NULL); 375 vfs_destroy(mp); 376 return error; 377 } 378 379 mp->mnt_iflag &= ~IMNT_WANTRDWR; 380 mutex_enter(&mountlist_lock); 381 vp->v_mountedhere = mp; 382 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 383 mutex_exit(&mountlist_lock); 384 vn_restorerecurse(vp, recurse); 385 VOP_UNLOCK(vp, 0); 386 checkdirs(vp); 387 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 388 error = vfs_allocate_syncvnode(mp); 389 /* Hold an additional reference to the mount across VFS_START(). */ 390 mutex_exit(&mp->mnt_updating); 391 vfs_unbusy(mp, true, NULL); 392 (void) VFS_STATVFS(mp, &mp->mnt_stat); 393 error = VFS_START(mp, 0); 394 if (error) 395 vrele(vp); 396 /* Drop reference held for VFS_START(). */ 397 vfs_destroy(mp); 398 *vpp = NULL; 399 return error; 400 } 401 402 static int 403 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 404 void *data, size_t *data_len) 405 { 406 struct mount *mp; 407 int error; 408 409 /* If MNT_GETARGS is specified, it should be the only flag. */ 410 if (flags & ~MNT_GETARGS) 411 return EINVAL; 412 413 mp = vp->v_mount; 414 415 /* XXX: probably some notion of "can see" here if we want isolation. */ 416 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 417 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 418 if (error) 419 return error; 420 421 if ((vp->v_vflag & VV_ROOT) == 0) 422 return EINVAL; 423 424 if (vfs_busy(mp, NULL)) 425 return EPERM; 426 427 mutex_enter(&mp->mnt_updating); 428 mp->mnt_flag &= ~MNT_OP_FLAGS; 429 mp->mnt_flag |= MNT_GETARGS; 430 error = VFS_MOUNT(mp, path, data, data_len); 431 mp->mnt_flag &= ~MNT_OP_FLAGS; 432 mutex_exit(&mp->mnt_updating); 433 434 vfs_unbusy(mp, false, NULL); 435 return (error); 436 } 437 438 int 439 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 440 { 441 /* { 442 syscallarg(const char *) type; 443 syscallarg(const char *) path; 444 syscallarg(int) flags; 445 syscallarg(void *) data; 446 syscallarg(size_t) data_len; 447 } */ 448 449 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 450 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 451 SCARG(uap, data_len), retval); 452 } 453 454 int 455 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 456 const char *path, int flags, void *data, enum uio_seg data_seg, 457 size_t data_len, register_t *retval) 458 { 459 struct vnode *vp; 460 struct nameidata nd; 461 void *data_buf = data; 462 u_int recurse; 463 int error; 464 465 /* 466 * Get vnode to be covered 467 */ 468 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 469 if ((error = namei(&nd)) != 0) 470 return (error); 471 vp = nd.ni_vp; 472 473 /* 474 * A lookup in VFS_MOUNT might result in an attempt to 475 * lock this vnode again, so make the lock recursive. 476 */ 477 if (vfsops == NULL) { 478 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 479 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 480 recurse = vn_setrecurse(vp); 481 vfsops = vp->v_mount->mnt_op; 482 } else { 483 /* 'type' is userspace */ 484 error = mount_get_vfsops(type, &vfsops); 485 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 486 recurse = vn_setrecurse(vp); 487 if (error != 0) 488 goto done; 489 } 490 } else { 491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 492 recurse = vn_setrecurse(vp); 493 } 494 495 if (data != NULL && data_seg == UIO_USERSPACE) { 496 if (data_len == 0) { 497 /* No length supplied, use default for filesystem */ 498 data_len = vfsops->vfs_min_mount_data; 499 if (data_len > VFS_MAX_MOUNT_DATA) { 500 error = EINVAL; 501 goto done; 502 } 503 /* 504 * Hopefully a longer buffer won't make copyin() fail. 505 * For compatibility with 3.0 and earlier. 506 */ 507 if (flags & MNT_UPDATE 508 && data_len < sizeof (struct mnt_export_args30)) 509 data_len = sizeof (struct mnt_export_args30); 510 } 511 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 512 513 /* NFS needs the buffer even for mnt_getargs .... */ 514 error = copyin(data, data_buf, data_len); 515 if (error != 0) 516 goto done; 517 } 518 519 if (flags & MNT_GETARGS) { 520 if (data_len == 0) { 521 error = EINVAL; 522 goto done; 523 } 524 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 525 if (error != 0) 526 goto done; 527 if (data_seg == UIO_USERSPACE) 528 error = copyout(data_buf, data, data_len); 529 *retval = data_len; 530 } else if (flags & MNT_UPDATE) { 531 error = mount_update(l, vp, path, flags, data_buf, &data_len); 532 } else { 533 /* Locking is handled internally in mount_domount(). */ 534 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 535 &data_len, recurse); 536 } 537 538 done: 539 if (vp != NULL) { 540 vn_restorerecurse(vp, recurse); 541 vput(vp); 542 } 543 if (data_buf != data) 544 free(data_buf, M_TEMP); 545 return (error); 546 } 547 548 /* 549 * Scan all active processes to see if any of them have a current 550 * or root directory onto which the new filesystem has just been 551 * mounted. If so, replace them with the new mount point. 552 */ 553 void 554 checkdirs(struct vnode *olddp) 555 { 556 struct cwdinfo *cwdi; 557 struct vnode *newdp, *rele1, *rele2; 558 struct proc *p; 559 bool retry; 560 561 if (olddp->v_usecount == 1) 562 return; 563 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 564 panic("mount: lost mount"); 565 566 do { 567 retry = false; 568 mutex_enter(proc_lock); 569 PROCLIST_FOREACH(p, &allproc) { 570 if ((p->p_flag & PK_MARKER) != 0) 571 continue; 572 if ((cwdi = p->p_cwdi) == NULL) 573 continue; 574 /* 575 * Can't change to the old directory any more, 576 * so even if we see a stale value it's not a 577 * problem. 578 */ 579 if (cwdi->cwdi_cdir != olddp && 580 cwdi->cwdi_rdir != olddp) 581 continue; 582 retry = true; 583 rele1 = NULL; 584 rele2 = NULL; 585 atomic_inc_uint(&cwdi->cwdi_refcnt); 586 mutex_exit(proc_lock); 587 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 588 if (cwdi->cwdi_cdir == olddp) { 589 rele1 = cwdi->cwdi_cdir; 590 VREF(newdp); 591 cwdi->cwdi_cdir = newdp; 592 } 593 if (cwdi->cwdi_rdir == olddp) { 594 rele2 = cwdi->cwdi_rdir; 595 VREF(newdp); 596 cwdi->cwdi_rdir = newdp; 597 } 598 rw_exit(&cwdi->cwdi_lock); 599 cwdfree(cwdi); 600 if (rele1 != NULL) 601 vrele(rele1); 602 if (rele2 != NULL) 603 vrele(rele2); 604 mutex_enter(proc_lock); 605 break; 606 } 607 mutex_exit(proc_lock); 608 } while (retry); 609 610 if (rootvnode == olddp) { 611 vrele(rootvnode); 612 VREF(newdp); 613 rootvnode = newdp; 614 } 615 vput(newdp); 616 } 617 618 /* 619 * Unmount a file system. 620 * 621 * Note: unmount takes a path to the vnode mounted on as argument, 622 * not special file (as before). 623 */ 624 /* ARGSUSED */ 625 int 626 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 627 { 628 /* { 629 syscallarg(const char *) path; 630 syscallarg(int) flags; 631 } */ 632 struct vnode *vp; 633 struct mount *mp; 634 int error; 635 struct nameidata nd; 636 637 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 638 SCARG(uap, path)); 639 if ((error = namei(&nd)) != 0) 640 return (error); 641 vp = nd.ni_vp; 642 mp = vp->v_mount; 643 atomic_inc_uint(&mp->mnt_refcnt); 644 VOP_UNLOCK(vp, 0); 645 646 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 647 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 648 if (error) { 649 vrele(vp); 650 vfs_destroy(mp); 651 return (error); 652 } 653 654 /* 655 * Don't allow unmounting the root file system. 656 */ 657 if (mp->mnt_flag & MNT_ROOTFS) { 658 vrele(vp); 659 vfs_destroy(mp); 660 return (EINVAL); 661 } 662 663 /* 664 * Must be the root of the filesystem 665 */ 666 if ((vp->v_vflag & VV_ROOT) == 0) { 667 vrele(vp); 668 vfs_destroy(mp); 669 return (EINVAL); 670 } 671 672 vrele(vp); 673 error = dounmount(mp, SCARG(uap, flags), l); 674 vfs_destroy(mp); 675 return error; 676 } 677 678 /* 679 * Do the actual file system unmount. File system is assumed to have 680 * been locked by the caller. 681 * 682 * => Caller hold reference to the mount, explicitly for dounmount(). 683 */ 684 int 685 dounmount(struct mount *mp, int flags, struct lwp *l) 686 { 687 struct vnode *coveredvp; 688 int error; 689 int async; 690 int used_syncer; 691 692 #if NVERIEXEC > 0 693 error = veriexec_unmountchk(mp); 694 if (error) 695 return (error); 696 #endif /* NVERIEXEC > 0 */ 697 698 /* 699 * XXX Freeze syncer. Must do this before locking the 700 * mount point. See dounmount() for details. 701 */ 702 mutex_enter(&syncer_mutex); 703 rw_enter(&mp->mnt_unmounting, RW_WRITER); 704 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 705 rw_exit(&mp->mnt_unmounting); 706 mutex_exit(&syncer_mutex); 707 return ENOENT; 708 } 709 710 used_syncer = (mp->mnt_syncer != NULL); 711 712 /* 713 * XXX Syncer must be frozen when we get here. This should really 714 * be done on a per-mountpoint basis, but especially the softdep 715 * code possibly called from the syncer doesn't exactly work on a 716 * per-mountpoint basis, so the softdep code would become a maze 717 * of vfs_busy() calls. 718 * 719 * The caller of dounmount() must acquire syncer_mutex because 720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 721 * order, and we must preserve that order to avoid deadlock. 722 * 723 * So, if the file system did not use the syncer, now is 724 * the time to release the syncer_mutex. 725 */ 726 if (used_syncer == 0) 727 mutex_exit(&syncer_mutex); 728 729 mp->mnt_iflag |= IMNT_UNMOUNT; 730 async = mp->mnt_flag & MNT_ASYNC; 731 mp->mnt_flag &= ~MNT_ASYNC; 732 cache_purgevfs(mp); /* remove cache entries for this file sys */ 733 if (mp->mnt_syncer != NULL) 734 vfs_deallocate_syncvnode(mp); 735 error = 0; 736 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 738 } 739 vfs_scrubvnlist(mp); 740 if (error == 0 || (flags & MNT_FORCE)) 741 error = VFS_UNMOUNT(mp, flags); 742 if (error) { 743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 744 (void) vfs_allocate_syncvnode(mp); 745 mp->mnt_iflag &= ~IMNT_UNMOUNT; 746 mp->mnt_flag |= async; 747 rw_exit(&mp->mnt_unmounting); 748 if (used_syncer) 749 mutex_exit(&syncer_mutex); 750 return (error); 751 } 752 vfs_scrubvnlist(mp); 753 mutex_enter(&mountlist_lock); 754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 755 coveredvp->v_mountedhere = NULL; 756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 757 mp->mnt_iflag |= IMNT_GONE; 758 mutex_exit(&mountlist_lock); 759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 760 panic("unmount: dangling vnode"); 761 if (used_syncer) 762 mutex_exit(&syncer_mutex); 763 vfs_hooks_unmount(mp); 764 rw_exit(&mp->mnt_unmounting); 765 vfs_destroy(mp); /* reference from mount() */ 766 if (coveredvp != NULLVP) 767 vrele(coveredvp); 768 return (0); 769 } 770 771 /* 772 * Sync each mounted filesystem. 773 */ 774 #ifdef DEBUG 775 int syncprt = 0; 776 struct ctldebug debug0 = { "syncprt", &syncprt }; 777 #endif 778 779 /* ARGSUSED */ 780 int 781 sys_sync(struct lwp *l, const void *v, register_t *retval) 782 { 783 struct mount *mp, *nmp; 784 int asyncflag; 785 786 if (l == NULL) 787 l = &lwp0; 788 789 mutex_enter(&mountlist_lock); 790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 791 mp = nmp) { 792 if (vfs_busy(mp, &nmp)) { 793 continue; 794 } 795 mutex_enter(&mp->mnt_updating); 796 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 797 asyncflag = mp->mnt_flag & MNT_ASYNC; 798 mp->mnt_flag &= ~MNT_ASYNC; 799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 800 if (asyncflag) 801 mp->mnt_flag |= MNT_ASYNC; 802 } 803 mutex_exit(&mp->mnt_updating); 804 vfs_unbusy(mp, false, &nmp); 805 } 806 mutex_exit(&mountlist_lock); 807 #ifdef DEBUG 808 if (syncprt) 809 vfs_bufstats(); 810 #endif /* DEBUG */ 811 return (0); 812 } 813 814 /* 815 * Change filesystem quotas. 816 */ 817 /* ARGSUSED */ 818 int 819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 820 { 821 /* { 822 syscallarg(const char *) path; 823 syscallarg(int) cmd; 824 syscallarg(int) uid; 825 syscallarg(void *) arg; 826 } */ 827 struct mount *mp; 828 int error; 829 struct nameidata nd; 830 831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 832 SCARG(uap, path)); 833 if ((error = namei(&nd)) != 0) 834 return (error); 835 mp = nd.ni_vp->v_mount; 836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 837 SCARG(uap, arg)); 838 vrele(nd.ni_vp); 839 return (error); 840 } 841 842 int 843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 844 int root) 845 { 846 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 847 int error = 0; 848 849 /* 850 * If MNT_NOWAIT or MNT_LAZY is specified, do not 851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 852 * overrides MNT_NOWAIT. 853 */ 854 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 855 (flags != MNT_WAIT && flags != 0)) { 856 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 857 goto done; 858 } 859 860 /* Get the filesystem stats now */ 861 memset(sp, 0, sizeof(*sp)); 862 if ((error = VFS_STATVFS(mp, sp)) != 0) { 863 return error; 864 } 865 866 if (cwdi->cwdi_rdir == NULL) 867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 868 done: 869 if (cwdi->cwdi_rdir != NULL) { 870 size_t len; 871 char *bp; 872 char c; 873 char *path = PNBUF_GET(); 874 875 bp = path + MAXPATHLEN; 876 *--bp = '\0'; 877 rw_enter(&cwdi->cwdi_lock, RW_READER); 878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 879 MAXPATHLEN / 2, 0, l); 880 rw_exit(&cwdi->cwdi_lock); 881 if (error) { 882 PNBUF_PUT(path); 883 return error; 884 } 885 len = strlen(bp); 886 /* 887 * for mount points that are below our root, we can see 888 * them, so we fix up the pathname and return them. The 889 * rest we cannot see, so we don't allow viewing the 890 * data. 891 */ 892 if (strncmp(bp, sp->f_mntonname, len) == 0 && 893 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 894 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 895 sizeof(sp->f_mntonname)); 896 if (sp->f_mntonname[0] == '\0') 897 (void)strlcpy(sp->f_mntonname, "/", 898 sizeof(sp->f_mntonname)); 899 } else { 900 if (root) 901 (void)strlcpy(sp->f_mntonname, "/", 902 sizeof(sp->f_mntonname)); 903 else 904 error = EPERM; 905 } 906 PNBUF_PUT(path); 907 } 908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 909 return error; 910 } 911 912 /* 913 * Get filesystem statistics by path. 914 */ 915 int 916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 917 { 918 struct mount *mp; 919 int error; 920 struct nameidata nd; 921 922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 923 if ((error = namei(&nd)) != 0) 924 return error; 925 mp = nd.ni_vp->v_mount; 926 error = dostatvfs(mp, sb, l, flags, 1); 927 vrele(nd.ni_vp); 928 return error; 929 } 930 931 /* ARGSUSED */ 932 int 933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 934 { 935 /* { 936 syscallarg(const char *) path; 937 syscallarg(struct statvfs *) buf; 938 syscallarg(int) flags; 939 } */ 940 struct statvfs *sb; 941 int error; 942 943 sb = STATVFSBUF_GET(); 944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 945 if (error == 0) 946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 947 STATVFSBUF_PUT(sb); 948 return error; 949 } 950 951 /* 952 * Get filesystem statistics by fd. 953 */ 954 int 955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 956 { 957 file_t *fp; 958 struct mount *mp; 959 int error; 960 961 /* fd_getvnode() will use the descriptor for us */ 962 if ((error = fd_getvnode(fd, &fp)) != 0) 963 return (error); 964 mp = ((struct vnode *)fp->f_data)->v_mount; 965 error = dostatvfs(mp, sb, curlwp, flags, 1); 966 fd_putfile(fd); 967 return error; 968 } 969 970 /* ARGSUSED */ 971 int 972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 973 { 974 /* { 975 syscallarg(int) fd; 976 syscallarg(struct statvfs *) buf; 977 syscallarg(int) flags; 978 } */ 979 struct statvfs *sb; 980 int error; 981 982 sb = STATVFSBUF_GET(); 983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 984 if (error == 0) 985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 986 STATVFSBUF_PUT(sb); 987 return error; 988 } 989 990 991 /* 992 * Get statistics on all filesystems. 993 */ 994 int 995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 997 register_t *retval) 998 { 999 int root = 0; 1000 struct proc *p = l->l_proc; 1001 struct mount *mp, *nmp; 1002 struct statvfs *sb; 1003 size_t count, maxcount; 1004 int error = 0; 1005 1006 sb = STATVFSBUF_GET(); 1007 maxcount = bufsize / entry_sz; 1008 mutex_enter(&mountlist_lock); 1009 count = 0; 1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1011 mp = nmp) { 1012 if (vfs_busy(mp, &nmp)) { 1013 continue; 1014 } 1015 if (sfsp && count < maxcount) { 1016 error = dostatvfs(mp, sb, l, flags, 0); 1017 if (error) { 1018 vfs_unbusy(mp, false, &nmp); 1019 error = 0; 1020 continue; 1021 } 1022 error = copyfn(sb, sfsp, entry_sz); 1023 if (error) { 1024 vfs_unbusy(mp, false, NULL); 1025 goto out; 1026 } 1027 sfsp = (char *)sfsp + entry_sz; 1028 root |= strcmp(sb->f_mntonname, "/") == 0; 1029 } 1030 count++; 1031 vfs_unbusy(mp, false, &nmp); 1032 } 1033 mutex_exit(&mountlist_lock); 1034 1035 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1036 /* 1037 * fake a root entry 1038 */ 1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1040 sb, l, flags, 1); 1041 if (error != 0) 1042 goto out; 1043 if (sfsp) { 1044 error = copyfn(sb, sfsp, entry_sz); 1045 if (error != 0) 1046 goto out; 1047 } 1048 count++; 1049 } 1050 if (sfsp && count > maxcount) 1051 *retval = maxcount; 1052 else 1053 *retval = count; 1054 out: 1055 STATVFSBUF_PUT(sb); 1056 return error; 1057 } 1058 1059 int 1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1061 { 1062 /* { 1063 syscallarg(struct statvfs *) buf; 1064 syscallarg(size_t) bufsize; 1065 syscallarg(int) flags; 1066 } */ 1067 1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1070 } 1071 1072 /* 1073 * Change current working directory to a given file descriptor. 1074 */ 1075 /* ARGSUSED */ 1076 int 1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1078 { 1079 /* { 1080 syscallarg(int) fd; 1081 } */ 1082 struct proc *p = l->l_proc; 1083 struct cwdinfo *cwdi; 1084 struct vnode *vp, *tdp; 1085 struct mount *mp; 1086 file_t *fp; 1087 int error, fd; 1088 1089 /* fd_getvnode() will use the descriptor for us */ 1090 fd = SCARG(uap, fd); 1091 if ((error = fd_getvnode(fd, &fp)) != 0) 1092 return (error); 1093 vp = fp->f_data; 1094 1095 VREF(vp); 1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1097 if (vp->v_type != VDIR) 1098 error = ENOTDIR; 1099 else 1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1101 if (error) { 1102 vput(vp); 1103 goto out; 1104 } 1105 while ((mp = vp->v_mountedhere) != NULL) { 1106 error = vfs_busy(mp, NULL); 1107 vput(vp); 1108 if (error != 0) 1109 goto out; 1110 error = VFS_ROOT(mp, &tdp); 1111 vfs_unbusy(mp, false, NULL); 1112 if (error) 1113 goto out; 1114 vp = tdp; 1115 } 1116 VOP_UNLOCK(vp, 0); 1117 1118 /* 1119 * Disallow changing to a directory not under the process's 1120 * current root directory (if there is one). 1121 */ 1122 cwdi = p->p_cwdi; 1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1125 vrele(vp); 1126 error = EPERM; /* operation not permitted */ 1127 } else { 1128 vrele(cwdi->cwdi_cdir); 1129 cwdi->cwdi_cdir = vp; 1130 } 1131 rw_exit(&cwdi->cwdi_lock); 1132 1133 out: 1134 fd_putfile(fd); 1135 return (error); 1136 } 1137 1138 /* 1139 * Change this process's notion of the root directory to a given file 1140 * descriptor. 1141 */ 1142 int 1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1144 { 1145 struct proc *p = l->l_proc; 1146 struct cwdinfo *cwdi; 1147 struct vnode *vp; 1148 file_t *fp; 1149 int error, fd = SCARG(uap, fd); 1150 1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1153 return error; 1154 /* fd_getvnode() will use the descriptor for us */ 1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1156 return error; 1157 vp = fp->f_data; 1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1159 if (vp->v_type != VDIR) 1160 error = ENOTDIR; 1161 else 1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1163 VOP_UNLOCK(vp, 0); 1164 if (error) 1165 goto out; 1166 VREF(vp); 1167 1168 /* 1169 * Prevent escaping from chroot by putting the root under 1170 * the working directory. Silently chdir to / if we aren't 1171 * already there. 1172 */ 1173 cwdi = p->p_cwdi; 1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1176 /* 1177 * XXX would be more failsafe to change directory to a 1178 * deadfs node here instead 1179 */ 1180 vrele(cwdi->cwdi_cdir); 1181 VREF(vp); 1182 cwdi->cwdi_cdir = vp; 1183 } 1184 1185 if (cwdi->cwdi_rdir != NULL) 1186 vrele(cwdi->cwdi_rdir); 1187 cwdi->cwdi_rdir = vp; 1188 rw_exit(&cwdi->cwdi_lock); 1189 1190 out: 1191 fd_putfile(fd); 1192 return (error); 1193 } 1194 1195 /* 1196 * Change current working directory (``.''). 1197 */ 1198 /* ARGSUSED */ 1199 int 1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1201 { 1202 /* { 1203 syscallarg(const char *) path; 1204 } */ 1205 struct proc *p = l->l_proc; 1206 struct cwdinfo *cwdi; 1207 int error; 1208 struct nameidata nd; 1209 1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1211 SCARG(uap, path)); 1212 if ((error = change_dir(&nd, l)) != 0) 1213 return (error); 1214 cwdi = p->p_cwdi; 1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1216 vrele(cwdi->cwdi_cdir); 1217 cwdi->cwdi_cdir = nd.ni_vp; 1218 rw_exit(&cwdi->cwdi_lock); 1219 return (0); 1220 } 1221 1222 /* 1223 * Change notion of root (``/'') directory. 1224 */ 1225 /* ARGSUSED */ 1226 int 1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1228 { 1229 /* { 1230 syscallarg(const char *) path; 1231 } */ 1232 struct proc *p = l->l_proc; 1233 struct cwdinfo *cwdi; 1234 struct vnode *vp; 1235 int error; 1236 struct nameidata nd; 1237 1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1240 return (error); 1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1242 SCARG(uap, path)); 1243 if ((error = change_dir(&nd, l)) != 0) 1244 return (error); 1245 1246 cwdi = p->p_cwdi; 1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1248 if (cwdi->cwdi_rdir != NULL) 1249 vrele(cwdi->cwdi_rdir); 1250 vp = nd.ni_vp; 1251 cwdi->cwdi_rdir = vp; 1252 1253 /* 1254 * Prevent escaping from chroot by putting the root under 1255 * the working directory. Silently chdir to / if we aren't 1256 * already there. 1257 */ 1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1259 /* 1260 * XXX would be more failsafe to change directory to a 1261 * deadfs node here instead 1262 */ 1263 vrele(cwdi->cwdi_cdir); 1264 VREF(vp); 1265 cwdi->cwdi_cdir = vp; 1266 } 1267 rw_exit(&cwdi->cwdi_lock); 1268 1269 return (0); 1270 } 1271 1272 /* 1273 * Common routine for chroot and chdir. 1274 */ 1275 static int 1276 change_dir(struct nameidata *ndp, struct lwp *l) 1277 { 1278 struct vnode *vp; 1279 int error; 1280 1281 if ((error = namei(ndp)) != 0) 1282 return (error); 1283 vp = ndp->ni_vp; 1284 if (vp->v_type != VDIR) 1285 error = ENOTDIR; 1286 else 1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1288 1289 if (error) 1290 vput(vp); 1291 else 1292 VOP_UNLOCK(vp, 0); 1293 return (error); 1294 } 1295 1296 /* 1297 * Check permissions, allocate an open file structure, 1298 * and call the device open routine if any. 1299 */ 1300 int 1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1302 { 1303 /* { 1304 syscallarg(const char *) path; 1305 syscallarg(int) flags; 1306 syscallarg(int) mode; 1307 } */ 1308 struct proc *p = l->l_proc; 1309 struct cwdinfo *cwdi = p->p_cwdi; 1310 file_t *fp; 1311 struct vnode *vp; 1312 int flags, cmode; 1313 int type, indx, error; 1314 struct flock lf; 1315 struct nameidata nd; 1316 1317 flags = FFLAGS(SCARG(uap, flags)); 1318 if ((flags & (FREAD | FWRITE)) == 0) 1319 return (EINVAL); 1320 if ((error = fd_allocfile(&fp, &indx)) != 0) 1321 return (error); 1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1325 SCARG(uap, path)); 1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1327 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1328 fd_abort(p, fp, indx); 1329 if ((error == EDUPFD || error == EMOVEFD) && 1330 l->l_dupfd >= 0 && /* XXX from fdopen */ 1331 (error = 1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1333 *retval = indx; 1334 return (0); 1335 } 1336 if (error == ERESTART) 1337 error = EINTR; 1338 return (error); 1339 } 1340 1341 l->l_dupfd = 0; 1342 vp = nd.ni_vp; 1343 fp->f_flag = flags & FMASK; 1344 fp->f_type = DTYPE_VNODE; 1345 fp->f_ops = &vnops; 1346 fp->f_data = vp; 1347 if (flags & (O_EXLOCK | O_SHLOCK)) { 1348 lf.l_whence = SEEK_SET; 1349 lf.l_start = 0; 1350 lf.l_len = 0; 1351 if (flags & O_EXLOCK) 1352 lf.l_type = F_WRLCK; 1353 else 1354 lf.l_type = F_RDLCK; 1355 type = F_FLOCK; 1356 if ((flags & FNONBLOCK) == 0) 1357 type |= F_WAIT; 1358 VOP_UNLOCK(vp, 0); 1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1360 if (error) { 1361 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1362 fd_abort(p, fp, indx); 1363 return (error); 1364 } 1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1366 atomic_or_uint(&fp->f_flag, FHASLOCK); 1367 } 1368 VOP_UNLOCK(vp, 0); 1369 *retval = indx; 1370 fd_affix(p, fp, indx); 1371 return (0); 1372 } 1373 1374 static void 1375 vfs__fhfree(fhandle_t *fhp) 1376 { 1377 size_t fhsize; 1378 1379 if (fhp == NULL) { 1380 return; 1381 } 1382 fhsize = FHANDLE_SIZE(fhp); 1383 kmem_free(fhp, fhsize); 1384 } 1385 1386 /* 1387 * vfs_composefh: compose a filehandle. 1388 */ 1389 1390 int 1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1392 { 1393 struct mount *mp; 1394 struct fid *fidp; 1395 int error; 1396 size_t needfhsize; 1397 size_t fidsize; 1398 1399 mp = vp->v_mount; 1400 fidp = NULL; 1401 if (*fh_size < FHANDLE_SIZE_MIN) { 1402 fidsize = 0; 1403 } else { 1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1405 if (fhp != NULL) { 1406 memset(fhp, 0, *fh_size); 1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1408 fidp = &fhp->fh_fid; 1409 } 1410 } 1411 error = VFS_VPTOFH(vp, fidp, &fidsize); 1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1413 if (error == 0 && *fh_size < needfhsize) { 1414 error = E2BIG; 1415 } 1416 *fh_size = needfhsize; 1417 return error; 1418 } 1419 1420 int 1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1422 { 1423 struct mount *mp; 1424 fhandle_t *fhp; 1425 size_t fhsize; 1426 size_t fidsize; 1427 int error; 1428 1429 *fhpp = NULL; 1430 mp = vp->v_mount; 1431 fidsize = 0; 1432 error = VFS_VPTOFH(vp, NULL, &fidsize); 1433 KASSERT(error != 0); 1434 if (error != E2BIG) { 1435 goto out; 1436 } 1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1438 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1439 if (fhp == NULL) { 1440 error = ENOMEM; 1441 goto out; 1442 } 1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1445 if (error == 0) { 1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1447 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1448 *fhpp = fhp; 1449 } else { 1450 kmem_free(fhp, fhsize); 1451 } 1452 out: 1453 return error; 1454 } 1455 1456 void 1457 vfs_composefh_free(fhandle_t *fhp) 1458 { 1459 1460 vfs__fhfree(fhp); 1461 } 1462 1463 /* 1464 * vfs_fhtovp: lookup a vnode by a filehandle. 1465 */ 1466 1467 int 1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1469 { 1470 struct mount *mp; 1471 int error; 1472 1473 *vpp = NULL; 1474 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1475 if (mp == NULL) { 1476 error = ESTALE; 1477 goto out; 1478 } 1479 if (mp->mnt_op->vfs_fhtovp == NULL) { 1480 error = EOPNOTSUPP; 1481 goto out; 1482 } 1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1484 out: 1485 return error; 1486 } 1487 1488 /* 1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1490 * the needed size. 1491 */ 1492 1493 int 1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1495 { 1496 fhandle_t *fhp; 1497 int error; 1498 1499 *fhpp = NULL; 1500 if (fhsize > FHANDLE_SIZE_MAX) { 1501 return EINVAL; 1502 } 1503 if (fhsize < FHANDLE_SIZE_MIN) { 1504 return EINVAL; 1505 } 1506 again: 1507 fhp = kmem_alloc(fhsize, KM_SLEEP); 1508 if (fhp == NULL) { 1509 return ENOMEM; 1510 } 1511 error = copyin(ufhp, fhp, fhsize); 1512 if (error == 0) { 1513 /* XXX this check shouldn't be here */ 1514 if (FHANDLE_SIZE(fhp) == fhsize) { 1515 *fhpp = fhp; 1516 return 0; 1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1518 /* 1519 * a kludge for nfsv2 padded handles. 1520 */ 1521 size_t sz; 1522 1523 sz = FHANDLE_SIZE(fhp); 1524 kmem_free(fhp, fhsize); 1525 fhsize = sz; 1526 goto again; 1527 } else { 1528 /* 1529 * userland told us wrong size. 1530 */ 1531 error = EINVAL; 1532 } 1533 } 1534 kmem_free(fhp, fhsize); 1535 return error; 1536 } 1537 1538 void 1539 vfs_copyinfh_free(fhandle_t *fhp) 1540 { 1541 1542 vfs__fhfree(fhp); 1543 } 1544 1545 /* 1546 * Get file handle system call 1547 */ 1548 int 1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1550 { 1551 /* { 1552 syscallarg(char *) fname; 1553 syscallarg(fhandle_t *) fhp; 1554 syscallarg(size_t *) fh_size; 1555 } */ 1556 struct vnode *vp; 1557 fhandle_t *fh; 1558 int error; 1559 struct nameidata nd; 1560 size_t sz; 1561 size_t usz; 1562 1563 /* 1564 * Must be super user 1565 */ 1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1567 0, NULL, NULL, NULL); 1568 if (error) 1569 return (error); 1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1571 SCARG(uap, fname)); 1572 error = namei(&nd); 1573 if (error) 1574 return (error); 1575 vp = nd.ni_vp; 1576 error = vfs_composefh_alloc(vp, &fh); 1577 vput(vp); 1578 if (error != 0) { 1579 goto out; 1580 } 1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1582 if (error != 0) { 1583 goto out; 1584 } 1585 sz = FHANDLE_SIZE(fh); 1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1587 if (error != 0) { 1588 goto out; 1589 } 1590 if (usz >= sz) { 1591 error = copyout(fh, SCARG(uap, fhp), sz); 1592 } else { 1593 error = E2BIG; 1594 } 1595 out: 1596 vfs_composefh_free(fh); 1597 return (error); 1598 } 1599 1600 /* 1601 * Open a file given a file handle. 1602 * 1603 * Check permissions, allocate an open file structure, 1604 * and call the device open routine if any. 1605 */ 1606 1607 int 1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1609 register_t *retval) 1610 { 1611 file_t *fp; 1612 struct vnode *vp = NULL; 1613 kauth_cred_t cred = l->l_cred; 1614 file_t *nfp; 1615 int type, indx, error=0; 1616 struct flock lf; 1617 struct vattr va; 1618 fhandle_t *fh; 1619 int flags; 1620 proc_t *p; 1621 1622 p = curproc; 1623 1624 /* 1625 * Must be super user 1626 */ 1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1628 0, NULL, NULL, NULL))) 1629 return (error); 1630 1631 flags = FFLAGS(oflags); 1632 if ((flags & (FREAD | FWRITE)) == 0) 1633 return (EINVAL); 1634 if ((flags & O_CREAT)) 1635 return (EINVAL); 1636 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1637 return (error); 1638 fp = nfp; 1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1640 if (error != 0) { 1641 goto bad; 1642 } 1643 error = vfs_fhtovp(fh, &vp); 1644 if (error != 0) { 1645 goto bad; 1646 } 1647 1648 /* Now do an effective vn_open */ 1649 1650 if (vp->v_type == VSOCK) { 1651 error = EOPNOTSUPP; 1652 goto bad; 1653 } 1654 error = vn_openchk(vp, cred, flags); 1655 if (error != 0) 1656 goto bad; 1657 if (flags & O_TRUNC) { 1658 VOP_UNLOCK(vp, 0); /* XXX */ 1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1660 VATTR_NULL(&va); 1661 va.va_size = 0; 1662 error = VOP_SETATTR(vp, &va, cred); 1663 if (error) 1664 goto bad; 1665 } 1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1667 goto bad; 1668 if (flags & FWRITE) { 1669 mutex_enter(&vp->v_interlock); 1670 vp->v_writecount++; 1671 mutex_exit(&vp->v_interlock); 1672 } 1673 1674 /* done with modified vn_open, now finish what sys_open does. */ 1675 1676 fp->f_flag = flags & FMASK; 1677 fp->f_type = DTYPE_VNODE; 1678 fp->f_ops = &vnops; 1679 fp->f_data = vp; 1680 if (flags & (O_EXLOCK | O_SHLOCK)) { 1681 lf.l_whence = SEEK_SET; 1682 lf.l_start = 0; 1683 lf.l_len = 0; 1684 if (flags & O_EXLOCK) 1685 lf.l_type = F_WRLCK; 1686 else 1687 lf.l_type = F_RDLCK; 1688 type = F_FLOCK; 1689 if ((flags & FNONBLOCK) == 0) 1690 type |= F_WAIT; 1691 VOP_UNLOCK(vp, 0); 1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1693 if (error) { 1694 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1695 fd_abort(p, fp, indx); 1696 return (error); 1697 } 1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1699 atomic_or_uint(&fp->f_flag, FHASLOCK); 1700 } 1701 VOP_UNLOCK(vp, 0); 1702 *retval = indx; 1703 fd_affix(p, fp, indx); 1704 vfs_copyinfh_free(fh); 1705 return (0); 1706 1707 bad: 1708 fd_abort(p, fp, indx); 1709 if (vp != NULL) 1710 vput(vp); 1711 vfs_copyinfh_free(fh); 1712 return (error); 1713 } 1714 1715 int 1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1717 { 1718 /* { 1719 syscallarg(const void *) fhp; 1720 syscallarg(size_t) fh_size; 1721 syscallarg(int) flags; 1722 } */ 1723 1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1725 SCARG(uap, flags), retval); 1726 } 1727 1728 int 1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1730 { 1731 int error; 1732 fhandle_t *fh; 1733 struct vnode *vp; 1734 1735 /* 1736 * Must be super user 1737 */ 1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1739 0, NULL, NULL, NULL))) 1740 return (error); 1741 1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1743 if (error != 0) 1744 return error; 1745 1746 error = vfs_fhtovp(fh, &vp); 1747 vfs_copyinfh_free(fh); 1748 if (error != 0) 1749 return error; 1750 1751 error = vn_stat(vp, sb); 1752 vput(vp); 1753 return error; 1754 } 1755 1756 1757 /* ARGSUSED */ 1758 int 1759 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1760 { 1761 /* { 1762 syscallarg(const void *) fhp; 1763 syscallarg(size_t) fh_size; 1764 syscallarg(struct stat *) sb; 1765 } */ 1766 struct stat sb; 1767 int error; 1768 1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1770 if (error) 1771 return error; 1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1773 } 1774 1775 int 1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1777 int flags) 1778 { 1779 fhandle_t *fh; 1780 struct mount *mp; 1781 struct vnode *vp; 1782 int error; 1783 1784 /* 1785 * Must be super user 1786 */ 1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1788 0, NULL, NULL, NULL))) 1789 return error; 1790 1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1792 if (error != 0) 1793 return error; 1794 1795 error = vfs_fhtovp(fh, &vp); 1796 vfs_copyinfh_free(fh); 1797 if (error != 0) 1798 return error; 1799 1800 mp = vp->v_mount; 1801 error = dostatvfs(mp, sb, l, flags, 1); 1802 vput(vp); 1803 return error; 1804 } 1805 1806 /* ARGSUSED */ 1807 int 1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1809 { 1810 /* { 1811 syscallarg(const void *) fhp; 1812 syscallarg(size_t) fh_size; 1813 syscallarg(struct statvfs *) buf; 1814 syscallarg(int) flags; 1815 } */ 1816 struct statvfs *sb = STATVFSBUF_GET(); 1817 int error; 1818 1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1820 SCARG(uap, flags)); 1821 if (error == 0) 1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1823 STATVFSBUF_PUT(sb); 1824 return error; 1825 } 1826 1827 /* 1828 * Create a special file. 1829 */ 1830 /* ARGSUSED */ 1831 int 1832 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1833 register_t *retval) 1834 { 1835 /* { 1836 syscallarg(const char *) path; 1837 syscallarg(mode_t) mode; 1838 syscallarg(dev_t) dev; 1839 } */ 1840 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1841 SCARG(uap, dev), retval); 1842 } 1843 1844 int 1845 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1846 register_t *retval) 1847 { 1848 struct proc *p = l->l_proc; 1849 struct vnode *vp; 1850 struct vattr vattr; 1851 int error, optype; 1852 struct nameidata nd; 1853 char *path; 1854 const char *cpath; 1855 enum uio_seg seg = UIO_USERSPACE; 1856 1857 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1858 0, NULL, NULL, NULL)) != 0) 1859 return (error); 1860 1861 optype = VOP_MKNOD_DESCOFFSET; 1862 1863 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1864 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1865 1866 if ((error = namei(&nd)) != 0) 1867 goto out; 1868 vp = nd.ni_vp; 1869 if (vp != NULL) 1870 error = EEXIST; 1871 else { 1872 VATTR_NULL(&vattr); 1873 /* We will read cwdi->cwdi_cmask unlocked. */ 1874 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1875 vattr.va_rdev = dev; 1876 1877 switch (mode & S_IFMT) { 1878 case S_IFMT: /* used by badsect to flag bad sectors */ 1879 vattr.va_type = VBAD; 1880 break; 1881 case S_IFCHR: 1882 vattr.va_type = VCHR; 1883 break; 1884 case S_IFBLK: 1885 vattr.va_type = VBLK; 1886 break; 1887 case S_IFWHT: 1888 optype = VOP_WHITEOUT_DESCOFFSET; 1889 break; 1890 case S_IFREG: 1891 #if NVERIEXEC > 0 1892 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1893 O_CREAT); 1894 #endif /* NVERIEXEC > 0 */ 1895 vattr.va_type = VREG; 1896 vattr.va_rdev = VNOVAL; 1897 optype = VOP_CREATE_DESCOFFSET; 1898 break; 1899 default: 1900 error = EINVAL; 1901 break; 1902 } 1903 } 1904 if (!error) { 1905 switch (optype) { 1906 case VOP_WHITEOUT_DESCOFFSET: 1907 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1908 if (error) 1909 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1910 vput(nd.ni_dvp); 1911 break; 1912 1913 case VOP_MKNOD_DESCOFFSET: 1914 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1915 &nd.ni_cnd, &vattr); 1916 if (error == 0) 1917 vput(nd.ni_vp); 1918 break; 1919 1920 case VOP_CREATE_DESCOFFSET: 1921 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1922 &nd.ni_cnd, &vattr); 1923 if (error == 0) 1924 vput(nd.ni_vp); 1925 break; 1926 } 1927 } else { 1928 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1929 if (nd.ni_dvp == vp) 1930 vrele(nd.ni_dvp); 1931 else 1932 vput(nd.ni_dvp); 1933 if (vp) 1934 vrele(vp); 1935 } 1936 out: 1937 VERIEXEC_PATH_PUT(path); 1938 return (error); 1939 } 1940 1941 /* 1942 * Create a named pipe. 1943 */ 1944 /* ARGSUSED */ 1945 int 1946 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1947 { 1948 /* { 1949 syscallarg(const char *) path; 1950 syscallarg(int) mode; 1951 } */ 1952 struct proc *p = l->l_proc; 1953 struct vattr vattr; 1954 int error; 1955 struct nameidata nd; 1956 1957 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1958 SCARG(uap, path)); 1959 if ((error = namei(&nd)) != 0) 1960 return (error); 1961 if (nd.ni_vp != NULL) { 1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1963 if (nd.ni_dvp == nd.ni_vp) 1964 vrele(nd.ni_dvp); 1965 else 1966 vput(nd.ni_dvp); 1967 vrele(nd.ni_vp); 1968 return (EEXIST); 1969 } 1970 VATTR_NULL(&vattr); 1971 vattr.va_type = VFIFO; 1972 /* We will read cwdi->cwdi_cmask unlocked. */ 1973 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1975 if (error == 0) 1976 vput(nd.ni_vp); 1977 return (error); 1978 } 1979 1980 /* 1981 * Make a hard file link. 1982 */ 1983 /* ARGSUSED */ 1984 int 1985 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1986 { 1987 /* { 1988 syscallarg(const char *) path; 1989 syscallarg(const char *) link; 1990 } */ 1991 struct vnode *vp; 1992 struct nameidata nd; 1993 int error; 1994 1995 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1996 SCARG(uap, path)); 1997 if ((error = namei(&nd)) != 0) 1998 return (error); 1999 vp = nd.ni_vp; 2000 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2001 SCARG(uap, link)); 2002 if ((error = namei(&nd)) != 0) 2003 goto out; 2004 if (nd.ni_vp) { 2005 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2006 if (nd.ni_dvp == nd.ni_vp) 2007 vrele(nd.ni_dvp); 2008 else 2009 vput(nd.ni_dvp); 2010 vrele(nd.ni_vp); 2011 error = EEXIST; 2012 goto out; 2013 } 2014 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2015 out: 2016 vrele(vp); 2017 return (error); 2018 } 2019 2020 /* 2021 * Make a symbolic link. 2022 */ 2023 /* ARGSUSED */ 2024 int 2025 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2026 { 2027 /* { 2028 syscallarg(const char *) path; 2029 syscallarg(const char *) link; 2030 } */ 2031 struct proc *p = l->l_proc; 2032 struct vattr vattr; 2033 char *path; 2034 int error; 2035 struct nameidata nd; 2036 2037 path = PNBUF_GET(); 2038 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2039 if (error) 2040 goto out; 2041 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2042 SCARG(uap, link)); 2043 if ((error = namei(&nd)) != 0) 2044 goto out; 2045 if (nd.ni_vp) { 2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2047 if (nd.ni_dvp == nd.ni_vp) 2048 vrele(nd.ni_dvp); 2049 else 2050 vput(nd.ni_dvp); 2051 vrele(nd.ni_vp); 2052 error = EEXIST; 2053 goto out; 2054 } 2055 VATTR_NULL(&vattr); 2056 vattr.va_type = VLNK; 2057 /* We will read cwdi->cwdi_cmask unlocked. */ 2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2060 if (error == 0) 2061 vput(nd.ni_vp); 2062 out: 2063 PNBUF_PUT(path); 2064 return (error); 2065 } 2066 2067 /* 2068 * Delete a whiteout from the filesystem. 2069 */ 2070 /* ARGSUSED */ 2071 int 2072 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const char *) path; 2076 } */ 2077 int error; 2078 struct nameidata nd; 2079 2080 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2081 UIO_USERSPACE, SCARG(uap, path)); 2082 error = namei(&nd); 2083 if (error) 2084 return (error); 2085 2086 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2087 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2088 if (nd.ni_dvp == nd.ni_vp) 2089 vrele(nd.ni_dvp); 2090 else 2091 vput(nd.ni_dvp); 2092 if (nd.ni_vp) 2093 vrele(nd.ni_vp); 2094 return (EEXIST); 2095 } 2096 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2097 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2098 vput(nd.ni_dvp); 2099 return (error); 2100 } 2101 2102 /* 2103 * Delete a name from the filesystem. 2104 */ 2105 /* ARGSUSED */ 2106 int 2107 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2108 { 2109 /* { 2110 syscallarg(const char *) path; 2111 } */ 2112 2113 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2114 } 2115 2116 int 2117 do_sys_unlink(const char *arg, enum uio_seg seg) 2118 { 2119 struct vnode *vp; 2120 int error; 2121 struct nameidata nd; 2122 kauth_cred_t cred; 2123 char *path; 2124 const char *cpath; 2125 2126 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2127 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2128 2129 if ((error = namei(&nd)) != 0) 2130 goto out; 2131 vp = nd.ni_vp; 2132 2133 /* 2134 * The root of a mounted filesystem cannot be deleted. 2135 */ 2136 if (vp->v_vflag & VV_ROOT) { 2137 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2138 if (nd.ni_dvp == vp) 2139 vrele(nd.ni_dvp); 2140 else 2141 vput(nd.ni_dvp); 2142 vput(vp); 2143 error = EBUSY; 2144 goto out; 2145 } 2146 2147 #if NVERIEXEC > 0 2148 /* Handle remove requests for veriexec entries. */ 2149 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2150 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2151 if (nd.ni_dvp == vp) 2152 vrele(nd.ni_dvp); 2153 else 2154 vput(nd.ni_dvp); 2155 vput(vp); 2156 goto out; 2157 } 2158 #endif /* NVERIEXEC > 0 */ 2159 2160 cred = kauth_cred_get(); 2161 #ifdef FILEASSOC 2162 (void)fileassoc_file_delete(vp); 2163 #endif /* FILEASSOC */ 2164 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2165 out: 2166 VERIEXEC_PATH_PUT(path); 2167 return (error); 2168 } 2169 2170 /* 2171 * Reposition read/write file offset. 2172 */ 2173 int 2174 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2175 { 2176 /* { 2177 syscallarg(int) fd; 2178 syscallarg(int) pad; 2179 syscallarg(off_t) offset; 2180 syscallarg(int) whence; 2181 } */ 2182 kauth_cred_t cred = l->l_cred; 2183 file_t *fp; 2184 struct vnode *vp; 2185 struct vattr vattr; 2186 off_t newoff; 2187 int error, fd; 2188 2189 fd = SCARG(uap, fd); 2190 2191 if ((fp = fd_getfile(fd)) == NULL) 2192 return (EBADF); 2193 2194 vp = fp->f_data; 2195 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2196 error = ESPIPE; 2197 goto out; 2198 } 2199 2200 switch (SCARG(uap, whence)) { 2201 case SEEK_CUR: 2202 newoff = fp->f_offset + SCARG(uap, offset); 2203 break; 2204 case SEEK_END: 2205 error = VOP_GETATTR(vp, &vattr, cred); 2206 if (error) { 2207 goto out; 2208 } 2209 newoff = SCARG(uap, offset) + vattr.va_size; 2210 break; 2211 case SEEK_SET: 2212 newoff = SCARG(uap, offset); 2213 break; 2214 default: 2215 error = EINVAL; 2216 goto out; 2217 } 2218 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2219 *(off_t *)retval = fp->f_offset = newoff; 2220 } 2221 out: 2222 fd_putfile(fd); 2223 return (error); 2224 } 2225 2226 /* 2227 * Positional read system call. 2228 */ 2229 int 2230 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2231 { 2232 /* { 2233 syscallarg(int) fd; 2234 syscallarg(void *) buf; 2235 syscallarg(size_t) nbyte; 2236 syscallarg(off_t) offset; 2237 } */ 2238 file_t *fp; 2239 struct vnode *vp; 2240 off_t offset; 2241 int error, fd = SCARG(uap, fd); 2242 2243 if ((fp = fd_getfile(fd)) == NULL) 2244 return (EBADF); 2245 2246 if ((fp->f_flag & FREAD) == 0) { 2247 fd_putfile(fd); 2248 return (EBADF); 2249 } 2250 2251 vp = fp->f_data; 2252 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2253 error = ESPIPE; 2254 goto out; 2255 } 2256 2257 offset = SCARG(uap, offset); 2258 2259 /* 2260 * XXX This works because no file systems actually 2261 * XXX take any action on the seek operation. 2262 */ 2263 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2264 goto out; 2265 2266 /* dofileread() will unuse the descriptor for us */ 2267 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2268 &offset, 0, retval)); 2269 2270 out: 2271 fd_putfile(fd); 2272 return (error); 2273 } 2274 2275 /* 2276 * Positional scatter read system call. 2277 */ 2278 int 2279 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2280 { 2281 /* { 2282 syscallarg(int) fd; 2283 syscallarg(const struct iovec *) iovp; 2284 syscallarg(int) iovcnt; 2285 syscallarg(off_t) offset; 2286 } */ 2287 off_t offset = SCARG(uap, offset); 2288 2289 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2290 SCARG(uap, iovcnt), &offset, 0, retval); 2291 } 2292 2293 /* 2294 * Positional write system call. 2295 */ 2296 int 2297 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2298 { 2299 /* { 2300 syscallarg(int) fd; 2301 syscallarg(const void *) buf; 2302 syscallarg(size_t) nbyte; 2303 syscallarg(off_t) offset; 2304 } */ 2305 file_t *fp; 2306 struct vnode *vp; 2307 off_t offset; 2308 int error, fd = SCARG(uap, fd); 2309 2310 if ((fp = fd_getfile(fd)) == NULL) 2311 return (EBADF); 2312 2313 if ((fp->f_flag & FWRITE) == 0) { 2314 fd_putfile(fd); 2315 return (EBADF); 2316 } 2317 2318 vp = fp->f_data; 2319 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2320 error = ESPIPE; 2321 goto out; 2322 } 2323 2324 offset = SCARG(uap, offset); 2325 2326 /* 2327 * XXX This works because no file systems actually 2328 * XXX take any action on the seek operation. 2329 */ 2330 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2331 goto out; 2332 2333 /* dofilewrite() will unuse the descriptor for us */ 2334 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2335 &offset, 0, retval)); 2336 2337 out: 2338 fd_putfile(fd); 2339 return (error); 2340 } 2341 2342 /* 2343 * Positional gather write system call. 2344 */ 2345 int 2346 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2347 { 2348 /* { 2349 syscallarg(int) fd; 2350 syscallarg(const struct iovec *) iovp; 2351 syscallarg(int) iovcnt; 2352 syscallarg(off_t) offset; 2353 } */ 2354 off_t offset = SCARG(uap, offset); 2355 2356 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2357 SCARG(uap, iovcnt), &offset, 0, retval); 2358 } 2359 2360 /* 2361 * Check access permissions. 2362 */ 2363 int 2364 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2365 { 2366 /* { 2367 syscallarg(const char *) path; 2368 syscallarg(int) flags; 2369 } */ 2370 kauth_cred_t cred; 2371 struct vnode *vp; 2372 int error, flags; 2373 struct nameidata nd; 2374 2375 cred = kauth_cred_dup(l->l_cred); 2376 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2377 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2378 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2379 SCARG(uap, path)); 2380 /* Override default credentials */ 2381 nd.ni_cnd.cn_cred = cred; 2382 if ((error = namei(&nd)) != 0) 2383 goto out; 2384 vp = nd.ni_vp; 2385 2386 /* Flags == 0 means only check for existence. */ 2387 if (SCARG(uap, flags)) { 2388 flags = 0; 2389 if (SCARG(uap, flags) & R_OK) 2390 flags |= VREAD; 2391 if (SCARG(uap, flags) & W_OK) 2392 flags |= VWRITE; 2393 if (SCARG(uap, flags) & X_OK) 2394 flags |= VEXEC; 2395 2396 error = VOP_ACCESS(vp, flags, cred); 2397 if (!error && (flags & VWRITE)) 2398 error = vn_writechk(vp); 2399 } 2400 vput(vp); 2401 out: 2402 kauth_cred_free(cred); 2403 return (error); 2404 } 2405 2406 /* 2407 * Common code for all sys_stat functions, including compat versions. 2408 */ 2409 int 2410 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2411 { 2412 int error; 2413 struct nameidata nd; 2414 2415 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2416 UIO_USERSPACE, path); 2417 error = namei(&nd); 2418 if (error != 0) 2419 return error; 2420 error = vn_stat(nd.ni_vp, sb); 2421 vput(nd.ni_vp); 2422 return error; 2423 } 2424 2425 /* 2426 * Get file status; this version follows links. 2427 */ 2428 /* ARGSUSED */ 2429 int 2430 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2431 { 2432 /* { 2433 syscallarg(const char *) path; 2434 syscallarg(struct stat *) ub; 2435 } */ 2436 struct stat sb; 2437 int error; 2438 2439 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2440 if (error) 2441 return error; 2442 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2443 } 2444 2445 /* 2446 * Get file status; this version does not follow links. 2447 */ 2448 /* ARGSUSED */ 2449 int 2450 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2451 { 2452 /* { 2453 syscallarg(const char *) path; 2454 syscallarg(struct stat *) ub; 2455 } */ 2456 struct stat sb; 2457 int error; 2458 2459 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2460 if (error) 2461 return error; 2462 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2463 } 2464 2465 /* 2466 * Get configurable pathname variables. 2467 */ 2468 /* ARGSUSED */ 2469 int 2470 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2471 { 2472 /* { 2473 syscallarg(const char *) path; 2474 syscallarg(int) name; 2475 } */ 2476 int error; 2477 struct nameidata nd; 2478 2479 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2480 SCARG(uap, path)); 2481 if ((error = namei(&nd)) != 0) 2482 return (error); 2483 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2484 vput(nd.ni_vp); 2485 return (error); 2486 } 2487 2488 /* 2489 * Return target name of a symbolic link. 2490 */ 2491 /* ARGSUSED */ 2492 int 2493 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2494 { 2495 /* { 2496 syscallarg(const char *) path; 2497 syscallarg(char *) buf; 2498 syscallarg(size_t) count; 2499 } */ 2500 struct vnode *vp; 2501 struct iovec aiov; 2502 struct uio auio; 2503 int error; 2504 struct nameidata nd; 2505 2506 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2507 SCARG(uap, path)); 2508 if ((error = namei(&nd)) != 0) 2509 return (error); 2510 vp = nd.ni_vp; 2511 if (vp->v_type != VLNK) 2512 error = EINVAL; 2513 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2514 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2515 aiov.iov_base = SCARG(uap, buf); 2516 aiov.iov_len = SCARG(uap, count); 2517 auio.uio_iov = &aiov; 2518 auio.uio_iovcnt = 1; 2519 auio.uio_offset = 0; 2520 auio.uio_rw = UIO_READ; 2521 KASSERT(l == curlwp); 2522 auio.uio_vmspace = l->l_proc->p_vmspace; 2523 auio.uio_resid = SCARG(uap, count); 2524 error = VOP_READLINK(vp, &auio, l->l_cred); 2525 } 2526 vput(vp); 2527 *retval = SCARG(uap, count) - auio.uio_resid; 2528 return (error); 2529 } 2530 2531 /* 2532 * Change flags of a file given a path name. 2533 */ 2534 /* ARGSUSED */ 2535 int 2536 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2537 { 2538 /* { 2539 syscallarg(const char *) path; 2540 syscallarg(u_long) flags; 2541 } */ 2542 struct vnode *vp; 2543 int error; 2544 struct nameidata nd; 2545 2546 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2547 SCARG(uap, path)); 2548 if ((error = namei(&nd)) != 0) 2549 return (error); 2550 vp = nd.ni_vp; 2551 error = change_flags(vp, SCARG(uap, flags), l); 2552 vput(vp); 2553 return (error); 2554 } 2555 2556 /* 2557 * Change flags of a file given a file descriptor. 2558 */ 2559 /* ARGSUSED */ 2560 int 2561 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2562 { 2563 /* { 2564 syscallarg(int) fd; 2565 syscallarg(u_long) flags; 2566 } */ 2567 struct vnode *vp; 2568 file_t *fp; 2569 int error; 2570 2571 /* fd_getvnode() will use the descriptor for us */ 2572 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2573 return (error); 2574 vp = fp->f_data; 2575 error = change_flags(vp, SCARG(uap, flags), l); 2576 VOP_UNLOCK(vp, 0); 2577 fd_putfile(SCARG(uap, fd)); 2578 return (error); 2579 } 2580 2581 /* 2582 * Change flags of a file given a path name; this version does 2583 * not follow links. 2584 */ 2585 int 2586 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2587 { 2588 /* { 2589 syscallarg(const char *) path; 2590 syscallarg(u_long) flags; 2591 } */ 2592 struct vnode *vp; 2593 int error; 2594 struct nameidata nd; 2595 2596 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2597 SCARG(uap, path)); 2598 if ((error = namei(&nd)) != 0) 2599 return (error); 2600 vp = nd.ni_vp; 2601 error = change_flags(vp, SCARG(uap, flags), l); 2602 vput(vp); 2603 return (error); 2604 } 2605 2606 /* 2607 * Common routine to change flags of a file. 2608 */ 2609 int 2610 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2611 { 2612 struct vattr vattr; 2613 int error; 2614 2615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2616 /* 2617 * Non-superusers cannot change the flags on devices, even if they 2618 * own them. 2619 */ 2620 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2621 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2622 goto out; 2623 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2624 error = EINVAL; 2625 goto out; 2626 } 2627 } 2628 VATTR_NULL(&vattr); 2629 vattr.va_flags = flags; 2630 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2631 out: 2632 return (error); 2633 } 2634 2635 /* 2636 * Change mode of a file given path name; this version follows links. 2637 */ 2638 /* ARGSUSED */ 2639 int 2640 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2641 { 2642 /* { 2643 syscallarg(const char *) path; 2644 syscallarg(int) mode; 2645 } */ 2646 int error; 2647 struct nameidata nd; 2648 2649 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2650 SCARG(uap, path)); 2651 if ((error = namei(&nd)) != 0) 2652 return (error); 2653 2654 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2655 2656 vrele(nd.ni_vp); 2657 return (error); 2658 } 2659 2660 /* 2661 * Change mode of a file given a file descriptor. 2662 */ 2663 /* ARGSUSED */ 2664 int 2665 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2666 { 2667 /* { 2668 syscallarg(int) fd; 2669 syscallarg(int) mode; 2670 } */ 2671 file_t *fp; 2672 int error; 2673 2674 /* fd_getvnode() will use the descriptor for us */ 2675 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2676 return (error); 2677 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2678 fd_putfile(SCARG(uap, fd)); 2679 return (error); 2680 } 2681 2682 /* 2683 * Change mode of a file given path name; this version does not follow links. 2684 */ 2685 /* ARGSUSED */ 2686 int 2687 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2688 { 2689 /* { 2690 syscallarg(const char *) path; 2691 syscallarg(int) mode; 2692 } */ 2693 int error; 2694 struct nameidata nd; 2695 2696 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2697 SCARG(uap, path)); 2698 if ((error = namei(&nd)) != 0) 2699 return (error); 2700 2701 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2702 2703 vrele(nd.ni_vp); 2704 return (error); 2705 } 2706 2707 /* 2708 * Common routine to set mode given a vnode. 2709 */ 2710 static int 2711 change_mode(struct vnode *vp, int mode, struct lwp *l) 2712 { 2713 struct vattr vattr; 2714 int error; 2715 2716 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2717 VATTR_NULL(&vattr); 2718 vattr.va_mode = mode & ALLPERMS; 2719 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2720 VOP_UNLOCK(vp, 0); 2721 return (error); 2722 } 2723 2724 /* 2725 * Set ownership given a path name; this version follows links. 2726 */ 2727 /* ARGSUSED */ 2728 int 2729 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2730 { 2731 /* { 2732 syscallarg(const char *) path; 2733 syscallarg(uid_t) uid; 2734 syscallarg(gid_t) gid; 2735 } */ 2736 int error; 2737 struct nameidata nd; 2738 2739 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2740 SCARG(uap, path)); 2741 if ((error = namei(&nd)) != 0) 2742 return (error); 2743 2744 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2745 2746 vrele(nd.ni_vp); 2747 return (error); 2748 } 2749 2750 /* 2751 * Set ownership given a path name; this version follows links. 2752 * Provides POSIX semantics. 2753 */ 2754 /* ARGSUSED */ 2755 int 2756 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2757 { 2758 /* { 2759 syscallarg(const char *) path; 2760 syscallarg(uid_t) uid; 2761 syscallarg(gid_t) gid; 2762 } */ 2763 int error; 2764 struct nameidata nd; 2765 2766 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2767 SCARG(uap, path)); 2768 if ((error = namei(&nd)) != 0) 2769 return (error); 2770 2771 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2772 2773 vrele(nd.ni_vp); 2774 return (error); 2775 } 2776 2777 /* 2778 * Set ownership given a file descriptor. 2779 */ 2780 /* ARGSUSED */ 2781 int 2782 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2783 { 2784 /* { 2785 syscallarg(int) fd; 2786 syscallarg(uid_t) uid; 2787 syscallarg(gid_t) gid; 2788 } */ 2789 int error; 2790 file_t *fp; 2791 2792 /* fd_getvnode() will use the descriptor for us */ 2793 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2794 return (error); 2795 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2796 l, 0); 2797 fd_putfile(SCARG(uap, fd)); 2798 return (error); 2799 } 2800 2801 /* 2802 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2803 */ 2804 /* ARGSUSED */ 2805 int 2806 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2807 { 2808 /* { 2809 syscallarg(int) fd; 2810 syscallarg(uid_t) uid; 2811 syscallarg(gid_t) gid; 2812 } */ 2813 int error; 2814 file_t *fp; 2815 2816 /* fd_getvnode() will use the descriptor for us */ 2817 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2818 return (error); 2819 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2820 l, 1); 2821 fd_putfile(SCARG(uap, fd)); 2822 return (error); 2823 } 2824 2825 /* 2826 * Set ownership given a path name; this version does not follow links. 2827 */ 2828 /* ARGSUSED */ 2829 int 2830 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2831 { 2832 /* { 2833 syscallarg(const char *) path; 2834 syscallarg(uid_t) uid; 2835 syscallarg(gid_t) gid; 2836 } */ 2837 int error; 2838 struct nameidata nd; 2839 2840 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2841 SCARG(uap, path)); 2842 if ((error = namei(&nd)) != 0) 2843 return (error); 2844 2845 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2846 2847 vrele(nd.ni_vp); 2848 return (error); 2849 } 2850 2851 /* 2852 * Set ownership given a path name; this version does not follow links. 2853 * Provides POSIX/XPG semantics. 2854 */ 2855 /* ARGSUSED */ 2856 int 2857 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2858 { 2859 /* { 2860 syscallarg(const char *) path; 2861 syscallarg(uid_t) uid; 2862 syscallarg(gid_t) gid; 2863 } */ 2864 int error; 2865 struct nameidata nd; 2866 2867 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2868 SCARG(uap, path)); 2869 if ((error = namei(&nd)) != 0) 2870 return (error); 2871 2872 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2873 2874 vrele(nd.ni_vp); 2875 return (error); 2876 } 2877 2878 /* 2879 * Common routine to set ownership given a vnode. 2880 */ 2881 static int 2882 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2883 int posix_semantics) 2884 { 2885 struct vattr vattr; 2886 mode_t newmode; 2887 int error; 2888 2889 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2890 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2891 goto out; 2892 2893 #define CHANGED(x) ((int)(x) != -1) 2894 newmode = vattr.va_mode; 2895 if (posix_semantics) { 2896 /* 2897 * POSIX/XPG semantics: if the caller is not the super-user, 2898 * clear set-user-id and set-group-id bits. Both POSIX and 2899 * the XPG consider the behaviour for calls by the super-user 2900 * implementation-defined; we leave the set-user-id and set- 2901 * group-id settings intact in that case. 2902 */ 2903 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2904 NULL) != 0) 2905 newmode &= ~(S_ISUID | S_ISGID); 2906 } else { 2907 /* 2908 * NetBSD semantics: when changing owner and/or group, 2909 * clear the respective bit(s). 2910 */ 2911 if (CHANGED(uid)) 2912 newmode &= ~S_ISUID; 2913 if (CHANGED(gid)) 2914 newmode &= ~S_ISGID; 2915 } 2916 /* Update va_mode iff altered. */ 2917 if (vattr.va_mode == newmode) 2918 newmode = VNOVAL; 2919 2920 VATTR_NULL(&vattr); 2921 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2922 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2923 vattr.va_mode = newmode; 2924 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2925 #undef CHANGED 2926 2927 out: 2928 VOP_UNLOCK(vp, 0); 2929 return (error); 2930 } 2931 2932 /* 2933 * Set the access and modification times given a path name; this 2934 * version follows links. 2935 */ 2936 /* ARGSUSED */ 2937 int 2938 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2939 register_t *retval) 2940 { 2941 /* { 2942 syscallarg(const char *) path; 2943 syscallarg(const struct timeval *) tptr; 2944 } */ 2945 2946 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2947 SCARG(uap, tptr), UIO_USERSPACE); 2948 } 2949 2950 /* 2951 * Set the access and modification times given a file descriptor. 2952 */ 2953 /* ARGSUSED */ 2954 int 2955 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2956 register_t *retval) 2957 { 2958 /* { 2959 syscallarg(int) fd; 2960 syscallarg(const struct timeval *) tptr; 2961 } */ 2962 int error; 2963 file_t *fp; 2964 2965 /* fd_getvnode() will use the descriptor for us */ 2966 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2967 return (error); 2968 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2969 UIO_USERSPACE); 2970 fd_putfile(SCARG(uap, fd)); 2971 return (error); 2972 } 2973 2974 /* 2975 * Set the access and modification times given a path name; this 2976 * version does not follow links. 2977 */ 2978 int 2979 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2980 register_t *retval) 2981 { 2982 /* { 2983 syscallarg(const char *) path; 2984 syscallarg(const struct timeval *) tptr; 2985 } */ 2986 2987 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2988 SCARG(uap, tptr), UIO_USERSPACE); 2989 } 2990 2991 /* 2992 * Common routine to set access and modification times given a vnode. 2993 */ 2994 int 2995 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2996 const struct timeval *tptr, enum uio_seg seg) 2997 { 2998 struct vattr vattr; 2999 struct nameidata nd; 3000 int error; 3001 bool vanull, setbirthtime; 3002 struct timespec ts[2]; 3003 3004 if (tptr == NULL) { 3005 vanull = true; 3006 nanotime(&ts[0]); 3007 ts[1] = ts[0]; 3008 } else { 3009 struct timeval tv[2]; 3010 3011 vanull = false; 3012 if (seg != UIO_SYSSPACE) { 3013 error = copyin(tptr, tv, sizeof (tv)); 3014 if (error != 0) 3015 return error; 3016 tptr = tv; 3017 } 3018 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3019 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3020 } 3021 3022 if (vp == NULL) { 3023 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3024 if ((error = namei(&nd)) != 0) 3025 return error; 3026 vp = nd.ni_vp; 3027 } else 3028 nd.ni_vp = NULL; 3029 3030 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3031 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3032 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3033 VATTR_NULL(&vattr); 3034 vattr.va_atime = ts[0]; 3035 vattr.va_mtime = ts[1]; 3036 if (setbirthtime) 3037 vattr.va_birthtime = ts[1]; 3038 if (vanull) 3039 vattr.va_flags |= VA_UTIMES_NULL; 3040 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3041 VOP_UNLOCK(vp, 0); 3042 3043 if (nd.ni_vp != NULL) 3044 vrele(nd.ni_vp); 3045 3046 return error; 3047 } 3048 3049 /* 3050 * Truncate a file given its path name. 3051 */ 3052 /* ARGSUSED */ 3053 int 3054 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3055 { 3056 /* { 3057 syscallarg(const char *) path; 3058 syscallarg(int) pad; 3059 syscallarg(off_t) length; 3060 } */ 3061 struct vnode *vp; 3062 struct vattr vattr; 3063 int error; 3064 struct nameidata nd; 3065 3066 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3067 SCARG(uap, path)); 3068 if ((error = namei(&nd)) != 0) 3069 return (error); 3070 vp = nd.ni_vp; 3071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3072 if (vp->v_type == VDIR) 3073 error = EISDIR; 3074 else if ((error = vn_writechk(vp)) == 0 && 3075 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3076 VATTR_NULL(&vattr); 3077 vattr.va_size = SCARG(uap, length); 3078 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3079 } 3080 vput(vp); 3081 return (error); 3082 } 3083 3084 /* 3085 * Truncate a file given a file descriptor. 3086 */ 3087 /* ARGSUSED */ 3088 int 3089 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3090 { 3091 /* { 3092 syscallarg(int) fd; 3093 syscallarg(int) pad; 3094 syscallarg(off_t) length; 3095 } */ 3096 struct vattr vattr; 3097 struct vnode *vp; 3098 file_t *fp; 3099 int error; 3100 3101 /* fd_getvnode() will use the descriptor for us */ 3102 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3103 return (error); 3104 if ((fp->f_flag & FWRITE) == 0) { 3105 error = EINVAL; 3106 goto out; 3107 } 3108 vp = fp->f_data; 3109 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3110 if (vp->v_type == VDIR) 3111 error = EISDIR; 3112 else if ((error = vn_writechk(vp)) == 0) { 3113 VATTR_NULL(&vattr); 3114 vattr.va_size = SCARG(uap, length); 3115 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3116 } 3117 VOP_UNLOCK(vp, 0); 3118 out: 3119 fd_putfile(SCARG(uap, fd)); 3120 return (error); 3121 } 3122 3123 /* 3124 * Sync an open file. 3125 */ 3126 /* ARGSUSED */ 3127 int 3128 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3129 { 3130 /* { 3131 syscallarg(int) fd; 3132 } */ 3133 struct vnode *vp; 3134 file_t *fp; 3135 int error; 3136 3137 /* fd_getvnode() will use the descriptor for us */ 3138 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3139 return (error); 3140 vp = fp->f_data; 3141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3142 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3143 if (error == 0 && bioopsp != NULL && 3144 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3145 (*bioopsp->io_fsync)(vp, 0); 3146 VOP_UNLOCK(vp, 0); 3147 fd_putfile(SCARG(uap, fd)); 3148 return (error); 3149 } 3150 3151 /* 3152 * Sync a range of file data. API modeled after that found in AIX. 3153 * 3154 * FDATASYNC indicates that we need only save enough metadata to be able 3155 * to re-read the written data. Note we duplicate AIX's requirement that 3156 * the file be open for writing. 3157 */ 3158 /* ARGSUSED */ 3159 int 3160 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3161 { 3162 /* { 3163 syscallarg(int) fd; 3164 syscallarg(int) flags; 3165 syscallarg(off_t) start; 3166 syscallarg(off_t) length; 3167 } */ 3168 struct vnode *vp; 3169 file_t *fp; 3170 int flags, nflags; 3171 off_t s, e, len; 3172 int error; 3173 3174 /* fd_getvnode() will use the descriptor for us */ 3175 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3176 return (error); 3177 3178 if ((fp->f_flag & FWRITE) == 0) { 3179 error = EBADF; 3180 goto out; 3181 } 3182 3183 flags = SCARG(uap, flags); 3184 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3185 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3186 error = EINVAL; 3187 goto out; 3188 } 3189 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3190 if (flags & FDATASYNC) 3191 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3192 else 3193 nflags = FSYNC_WAIT; 3194 if (flags & FDISKSYNC) 3195 nflags |= FSYNC_CACHE; 3196 3197 len = SCARG(uap, length); 3198 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3199 if (len) { 3200 s = SCARG(uap, start); 3201 e = s + len; 3202 if (e < s) { 3203 error = EINVAL; 3204 goto out; 3205 } 3206 } else { 3207 e = 0; 3208 s = 0; 3209 } 3210 3211 vp = fp->f_data; 3212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3213 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3214 3215 if (error == 0 && bioopsp != NULL && 3216 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3217 (*bioopsp->io_fsync)(vp, nflags); 3218 3219 VOP_UNLOCK(vp, 0); 3220 out: 3221 fd_putfile(SCARG(uap, fd)); 3222 return (error); 3223 } 3224 3225 /* 3226 * Sync the data of an open file. 3227 */ 3228 /* ARGSUSED */ 3229 int 3230 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3231 { 3232 /* { 3233 syscallarg(int) fd; 3234 } */ 3235 struct vnode *vp; 3236 file_t *fp; 3237 int error; 3238 3239 /* fd_getvnode() will use the descriptor for us */ 3240 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3241 return (error); 3242 if ((fp->f_flag & FWRITE) == 0) { 3243 fd_putfile(SCARG(uap, fd)); 3244 return (EBADF); 3245 } 3246 vp = fp->f_data; 3247 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3248 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3249 VOP_UNLOCK(vp, 0); 3250 fd_putfile(SCARG(uap, fd)); 3251 return (error); 3252 } 3253 3254 /* 3255 * Rename files, (standard) BSD semantics frontend. 3256 */ 3257 /* ARGSUSED */ 3258 int 3259 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3260 { 3261 /* { 3262 syscallarg(const char *) from; 3263 syscallarg(const char *) to; 3264 } */ 3265 3266 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3267 } 3268 3269 /* 3270 * Rename files, POSIX semantics frontend. 3271 */ 3272 /* ARGSUSED */ 3273 int 3274 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3275 { 3276 /* { 3277 syscallarg(const char *) from; 3278 syscallarg(const char *) to; 3279 } */ 3280 3281 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3282 } 3283 3284 /* 3285 * Rename files. Source and destination must either both be directories, 3286 * or both not be directories. If target is a directory, it must be empty. 3287 * If `from' and `to' refer to the same object, the value of the `retain' 3288 * argument is used to determine whether `from' will be 3289 * 3290 * (retain == 0) deleted unless `from' and `to' refer to the same 3291 * object in the file system's name space (BSD). 3292 * (retain == 1) always retained (POSIX). 3293 */ 3294 int 3295 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3296 { 3297 struct vnode *tvp, *fvp, *tdvp; 3298 struct nameidata fromnd, tond; 3299 struct mount *fs; 3300 struct lwp *l = curlwp; 3301 struct proc *p; 3302 uint32_t saveflag; 3303 int error; 3304 3305 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3306 seg, from); 3307 if ((error = namei(&fromnd)) != 0) 3308 return (error); 3309 if (fromnd.ni_dvp != fromnd.ni_vp) 3310 VOP_UNLOCK(fromnd.ni_dvp, 0); 3311 fvp = fromnd.ni_vp; 3312 3313 fs = fvp->v_mount; 3314 error = VFS_RENAMELOCK_ENTER(fs); 3315 if (error) { 3316 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3317 vrele(fromnd.ni_dvp); 3318 vrele(fvp); 3319 goto out1; 3320 } 3321 3322 /* 3323 * close, partially, yet another race - ideally we should only 3324 * go as far as getting fromnd.ni_dvp before getting the per-fs 3325 * lock, and then continue to get fromnd.ni_vp, but we can't do 3326 * that with namei as it stands. 3327 * 3328 * This still won't prevent rmdir from nuking fromnd.ni_vp 3329 * under us. The real fix is to get the locks in the right 3330 * order and do the lookups in the right places, but that's a 3331 * major rototill. 3332 * 3333 * Preserve the SAVESTART in cn_flags, because who knows what 3334 * might happen if we don't. 3335 * 3336 * Note: this logic (as well as this whole function) is cloned 3337 * in nfs_serv.c. Proceed accordingly. 3338 */ 3339 vrele(fvp); 3340 if ((fromnd.ni_cnd.cn_namelen == 1 && 3341 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3342 (fromnd.ni_cnd.cn_namelen == 2 && 3343 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3344 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3345 error = EINVAL; 3346 VFS_RENAMELOCK_EXIT(fs); 3347 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3348 vrele(fromnd.ni_dvp); 3349 goto out1; 3350 } 3351 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3352 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3353 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3354 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3355 fromnd.ni_cnd.cn_flags |= saveflag; 3356 if (error) { 3357 VOP_UNLOCK(fromnd.ni_dvp, 0); 3358 VFS_RENAMELOCK_EXIT(fs); 3359 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3360 vrele(fromnd.ni_dvp); 3361 goto out1; 3362 } 3363 VOP_UNLOCK(fromnd.ni_vp, 0); 3364 if (fromnd.ni_dvp != fromnd.ni_vp) 3365 VOP_UNLOCK(fromnd.ni_dvp, 0); 3366 fvp = fromnd.ni_vp; 3367 3368 NDINIT(&tond, RENAME, 3369 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3370 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3371 seg, to); 3372 if ((error = namei(&tond)) != 0) { 3373 VFS_RENAMELOCK_EXIT(fs); 3374 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3375 vrele(fromnd.ni_dvp); 3376 vrele(fvp); 3377 goto out1; 3378 } 3379 tdvp = tond.ni_dvp; 3380 tvp = tond.ni_vp; 3381 3382 if (tvp != NULL) { 3383 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3384 error = ENOTDIR; 3385 goto out; 3386 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3387 error = EISDIR; 3388 goto out; 3389 } 3390 } 3391 3392 if (fvp == tdvp) 3393 error = EINVAL; 3394 3395 /* 3396 * Source and destination refer to the same object. 3397 */ 3398 if (fvp == tvp) { 3399 if (retain) 3400 error = -1; 3401 else if (fromnd.ni_dvp == tdvp && 3402 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3403 !memcmp(fromnd.ni_cnd.cn_nameptr, 3404 tond.ni_cnd.cn_nameptr, 3405 fromnd.ni_cnd.cn_namelen)) 3406 error = -1; 3407 } 3408 3409 #if NVERIEXEC > 0 3410 if (!error) { 3411 char *f1, *f2; 3412 3413 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3414 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen + 1); 3415 3416 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3417 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen + 1); 3418 3419 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3420 3421 free(f1, M_TEMP); 3422 free(f2, M_TEMP); 3423 } 3424 #endif /* NVERIEXEC > 0 */ 3425 3426 out: 3427 p = l->l_proc; 3428 if (!error) { 3429 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3430 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3431 VFS_RENAMELOCK_EXIT(fs); 3432 } else { 3433 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3434 if (tdvp == tvp) 3435 vrele(tdvp); 3436 else 3437 vput(tdvp); 3438 if (tvp) 3439 vput(tvp); 3440 VFS_RENAMELOCK_EXIT(fs); 3441 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3442 vrele(fromnd.ni_dvp); 3443 vrele(fvp); 3444 } 3445 vrele(tond.ni_startdir); 3446 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3447 out1: 3448 if (fromnd.ni_startdir) 3449 vrele(fromnd.ni_startdir); 3450 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3451 return (error == -1 ? 0 : error); 3452 } 3453 3454 /* 3455 * Make a directory file. 3456 */ 3457 /* ARGSUSED */ 3458 int 3459 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3460 { 3461 /* { 3462 syscallarg(const char *) path; 3463 syscallarg(int) mode; 3464 } */ 3465 struct proc *p = l->l_proc; 3466 struct vnode *vp; 3467 struct vattr vattr; 3468 int error; 3469 struct nameidata nd; 3470 3471 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3472 SCARG(uap, path)); 3473 if ((error = namei(&nd)) != 0) 3474 return (error); 3475 vp = nd.ni_vp; 3476 if (vp != NULL) { 3477 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3478 if (nd.ni_dvp == vp) 3479 vrele(nd.ni_dvp); 3480 else 3481 vput(nd.ni_dvp); 3482 vrele(vp); 3483 return (EEXIST); 3484 } 3485 VATTR_NULL(&vattr); 3486 vattr.va_type = VDIR; 3487 /* We will read cwdi->cwdi_cmask unlocked. */ 3488 vattr.va_mode = 3489 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3490 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3491 if (!error) 3492 vput(nd.ni_vp); 3493 return (error); 3494 } 3495 3496 /* 3497 * Remove a directory file. 3498 */ 3499 /* ARGSUSED */ 3500 int 3501 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3502 { 3503 /* { 3504 syscallarg(const char *) path; 3505 } */ 3506 struct vnode *vp; 3507 int error; 3508 struct nameidata nd; 3509 3510 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3511 SCARG(uap, path)); 3512 if ((error = namei(&nd)) != 0) 3513 return (error); 3514 vp = nd.ni_vp; 3515 if (vp->v_type != VDIR) { 3516 error = ENOTDIR; 3517 goto out; 3518 } 3519 /* 3520 * No rmdir "." please. 3521 */ 3522 if (nd.ni_dvp == vp) { 3523 error = EINVAL; 3524 goto out; 3525 } 3526 /* 3527 * The root of a mounted filesystem cannot be deleted. 3528 */ 3529 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3530 error = EBUSY; 3531 goto out; 3532 } 3533 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3534 return (error); 3535 3536 out: 3537 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3538 if (nd.ni_dvp == vp) 3539 vrele(nd.ni_dvp); 3540 else 3541 vput(nd.ni_dvp); 3542 vput(vp); 3543 return (error); 3544 } 3545 3546 /* 3547 * Read a block of directory entries in a file system independent format. 3548 */ 3549 int 3550 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3551 { 3552 /* { 3553 syscallarg(int) fd; 3554 syscallarg(char *) buf; 3555 syscallarg(size_t) count; 3556 } */ 3557 file_t *fp; 3558 int error, done; 3559 3560 /* fd_getvnode() will use the descriptor for us */ 3561 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3562 return (error); 3563 if ((fp->f_flag & FREAD) == 0) { 3564 error = EBADF; 3565 goto out; 3566 } 3567 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3568 SCARG(uap, count), &done, l, 0, 0); 3569 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3570 *retval = done; 3571 out: 3572 fd_putfile(SCARG(uap, fd)); 3573 return (error); 3574 } 3575 3576 /* 3577 * Set the mode mask for creation of filesystem nodes. 3578 */ 3579 int 3580 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3581 { 3582 /* { 3583 syscallarg(mode_t) newmask; 3584 } */ 3585 struct proc *p = l->l_proc; 3586 struct cwdinfo *cwdi; 3587 3588 /* 3589 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3590 * important is that we serialize changes to the mask. The 3591 * rw_exit() will issue a write memory barrier on our behalf, 3592 * and force the changes out to other CPUs (as it must use an 3593 * atomic operation, draining the local CPU's store buffers). 3594 */ 3595 cwdi = p->p_cwdi; 3596 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3597 *retval = cwdi->cwdi_cmask; 3598 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3599 rw_exit(&cwdi->cwdi_lock); 3600 3601 return (0); 3602 } 3603 3604 int 3605 dorevoke(struct vnode *vp, kauth_cred_t cred) 3606 { 3607 struct vattr vattr; 3608 int error; 3609 3610 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3611 return error; 3612 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3613 (error = kauth_authorize_generic(cred, 3614 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3615 VOP_REVOKE(vp, REVOKEALL); 3616 return (error); 3617 } 3618 3619 /* 3620 * Void all references to file by ripping underlying filesystem 3621 * away from vnode. 3622 */ 3623 /* ARGSUSED */ 3624 int 3625 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3626 { 3627 /* { 3628 syscallarg(const char *) path; 3629 } */ 3630 struct vnode *vp; 3631 int error; 3632 struct nameidata nd; 3633 3634 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3635 SCARG(uap, path)); 3636 if ((error = namei(&nd)) != 0) 3637 return (error); 3638 vp = nd.ni_vp; 3639 error = dorevoke(vp, l->l_cred); 3640 vrele(vp); 3641 return (error); 3642 } 3643