1 /* $NetBSD: vfs_syscalls.c,v 1.391 2009/03/13 11:05:26 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.391 2009/03/13 11:05:26 yamt Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_dir(struct nameidata *, struct lwp *); 114 static int change_flags(struct vnode *, u_long, struct lwp *); 115 static int change_mode(struct vnode *, int, struct lwp *l); 116 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 117 118 void checkdirs(struct vnode *); 119 120 int dovfsusermount = 0; 121 122 /* 123 * Virtual File System System Calls 124 */ 125 126 /* 127 * Mount a file system. 128 */ 129 130 /* 131 * This table is used to maintain compatibility with 4.3BSD 132 * and NetBSD 0.9 mount syscalls - and possibly other systems. 133 * Note, the order is important! 134 * 135 * Do not modify this table. It should only contain filesystems 136 * supported by NetBSD 0.9 and 4.3BSD. 137 */ 138 const char * const mountcompatnames[] = { 139 NULL, /* 0 = MOUNT_NONE */ 140 MOUNT_FFS, /* 1 = MOUNT_UFS */ 141 MOUNT_NFS, /* 2 */ 142 MOUNT_MFS, /* 3 */ 143 MOUNT_MSDOS, /* 4 */ 144 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 145 MOUNT_FDESC, /* 6 */ 146 MOUNT_KERNFS, /* 7 */ 147 NULL, /* 8 = MOUNT_DEVFS */ 148 MOUNT_AFS, /* 9 */ 149 }; 150 const int nmountcompatnames = sizeof(mountcompatnames) / 151 sizeof(mountcompatnames[0]); 152 153 static int 154 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 155 void *data, size_t *data_len) 156 { 157 struct mount *mp; 158 int error = 0, saved_flags; 159 160 mp = vp->v_mount; 161 saved_flags = mp->mnt_flag; 162 163 /* We can operate only on VV_ROOT nodes. */ 164 if ((vp->v_vflag & VV_ROOT) == 0) { 165 error = EINVAL; 166 goto out; 167 } 168 169 /* 170 * We only allow the filesystem to be reloaded if it 171 * is currently mounted read-only. Additionally, we 172 * prevent read-write to read-only downgrades. 173 */ 174 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 175 (mp->mnt_flag & MNT_RDONLY) == 0) { 176 error = EOPNOTSUPP; /* Needs translation */ 177 goto out; 178 } 179 180 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 181 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 182 if (error) 183 goto out; 184 185 if (vfs_busy(mp, NULL)) { 186 error = EPERM; 187 goto out; 188 } 189 190 mutex_enter(&mp->mnt_updating); 191 192 mp->mnt_flag &= ~MNT_OP_FLAGS; 193 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 194 195 /* 196 * Set the mount level flags. 197 */ 198 if (flags & MNT_RDONLY) 199 mp->mnt_flag |= MNT_RDONLY; 200 else if (mp->mnt_flag & MNT_RDONLY) 201 mp->mnt_iflag |= IMNT_WANTRDWR; 202 mp->mnt_flag &= 203 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 204 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 205 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 206 MNT_LOG); 207 mp->mnt_flag |= flags & 208 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 209 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 210 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 211 MNT_LOG | MNT_IGNORE); 212 213 error = VFS_MOUNT(mp, path, data, data_len); 214 215 if (error && data != NULL) { 216 int error2; 217 218 /* 219 * Update failed; let's try and see if it was an 220 * export request. For compat with 3.0 and earlier. 221 */ 222 error2 = vfs_hooks_reexport(mp, path, data); 223 224 /* 225 * Only update error code if the export request was 226 * understood but some problem occurred while 227 * processing it. 228 */ 229 if (error2 != EJUSTRETURN) 230 error = error2; 231 } 232 233 if (mp->mnt_iflag & IMNT_WANTRDWR) 234 mp->mnt_flag &= ~MNT_RDONLY; 235 if (error) 236 mp->mnt_flag = saved_flags; 237 mp->mnt_flag &= ~MNT_OP_FLAGS; 238 mp->mnt_iflag &= ~IMNT_WANTRDWR; 239 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 240 if (mp->mnt_syncer == NULL) 241 error = vfs_allocate_syncvnode(mp); 242 } else { 243 if (mp->mnt_syncer != NULL) 244 vfs_deallocate_syncvnode(mp); 245 } 246 mutex_exit(&mp->mnt_updating); 247 vfs_unbusy(mp, false, NULL); 248 249 out: 250 return (error); 251 } 252 253 static int 254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 255 { 256 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 257 int error; 258 259 /* Copy file-system type from userspace. */ 260 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 261 if (error) { 262 /* 263 * Historically, filesystem types were identified by numbers. 264 * If we get an integer for the filesystem type instead of a 265 * string, we check to see if it matches one of the historic 266 * filesystem types. 267 */ 268 u_long fsindex = (u_long)fstype; 269 if (fsindex >= nmountcompatnames || 270 mountcompatnames[fsindex] == NULL) 271 return ENODEV; 272 strlcpy(fstypename, mountcompatnames[fsindex], 273 sizeof(fstypename)); 274 } 275 276 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 277 if (strcmp(fstypename, "ufs") == 0) 278 fstypename[0] = 'f'; 279 280 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 281 return 0; 282 283 /* If we can autoload a vfs module, try again */ 284 mutex_enter(&module_lock); 285 (void)module_autoload(fstype, MODULE_CLASS_VFS); 286 mutex_exit(&module_lock); 287 288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 289 return 0; 290 291 return ENODEV; 292 } 293 294 static int 295 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 296 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 297 { 298 struct mount *mp; 299 struct vnode *vp = *vpp; 300 struct vattr va; 301 int error; 302 303 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 304 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 305 if (error) 306 return error; 307 308 /* Can't make a non-dir a mount-point (from here anyway). */ 309 if (vp->v_type != VDIR) 310 return ENOTDIR; 311 312 /* 313 * If the user is not root, ensure that they own the directory 314 * onto which we are attempting to mount. 315 */ 316 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 317 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 318 (error = kauth_authorize_generic(l->l_cred, 319 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 320 return error; 321 } 322 323 if (flags & MNT_EXPORTED) 324 return EINVAL; 325 326 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 327 return error; 328 329 /* 330 * Check if a file-system is not already mounted on this vnode. 331 */ 332 if (vp->v_mountedhere != NULL) 333 return EBUSY; 334 335 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 336 if (mp == NULL) 337 return ENOMEM; 338 339 mp->mnt_op = vfsops; 340 mp->mnt_refcnt = 1; 341 342 TAILQ_INIT(&mp->mnt_vnodelist); 343 rw_init(&mp->mnt_unmounting); 344 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 345 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 346 error = vfs_busy(mp, NULL); 347 KASSERT(error == 0); 348 mutex_enter(&mp->mnt_updating); 349 350 mp->mnt_vnodecovered = vp; 351 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 352 mount_initspecific(mp); 353 354 /* 355 * The underlying file system may refuse the mount for 356 * various reasons. Allow the user to force it to happen. 357 * 358 * Set the mount level flags. 359 */ 360 mp->mnt_flag = flags & 361 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 362 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 363 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 364 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 365 366 error = VFS_MOUNT(mp, path, data, data_len); 367 mp->mnt_flag &= ~MNT_OP_FLAGS; 368 369 /* 370 * Put the new filesystem on the mount list after root. 371 */ 372 cache_purge(vp); 373 if (error != 0) { 374 vp->v_mountedhere = NULL; 375 mutex_exit(&mp->mnt_updating); 376 vfs_unbusy(mp, false, NULL); 377 vfs_destroy(mp); 378 return error; 379 } 380 381 mp->mnt_iflag &= ~IMNT_WANTRDWR; 382 mutex_enter(&mountlist_lock); 383 vp->v_mountedhere = mp; 384 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 385 mutex_exit(&mountlist_lock); 386 vn_restorerecurse(vp, recurse); 387 VOP_UNLOCK(vp, 0); 388 checkdirs(vp); 389 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 390 error = vfs_allocate_syncvnode(mp); 391 /* Hold an additional reference to the mount across VFS_START(). */ 392 mutex_exit(&mp->mnt_updating); 393 vfs_unbusy(mp, true, NULL); 394 (void) VFS_STATVFS(mp, &mp->mnt_stat); 395 error = VFS_START(mp, 0); 396 if (error) 397 vrele(vp); 398 /* Drop reference held for VFS_START(). */ 399 vfs_destroy(mp); 400 *vpp = NULL; 401 return error; 402 } 403 404 static int 405 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 406 void *data, size_t *data_len) 407 { 408 struct mount *mp; 409 int error; 410 411 /* If MNT_GETARGS is specified, it should be the only flag. */ 412 if (flags & ~MNT_GETARGS) 413 return EINVAL; 414 415 mp = vp->v_mount; 416 417 /* XXX: probably some notion of "can see" here if we want isolation. */ 418 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 419 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 420 if (error) 421 return error; 422 423 if ((vp->v_vflag & VV_ROOT) == 0) 424 return EINVAL; 425 426 if (vfs_busy(mp, NULL)) 427 return EPERM; 428 429 mutex_enter(&mp->mnt_updating); 430 mp->mnt_flag &= ~MNT_OP_FLAGS; 431 mp->mnt_flag |= MNT_GETARGS; 432 error = VFS_MOUNT(mp, path, data, data_len); 433 mp->mnt_flag &= ~MNT_OP_FLAGS; 434 mutex_exit(&mp->mnt_updating); 435 436 vfs_unbusy(mp, false, NULL); 437 return (error); 438 } 439 440 int 441 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 442 { 443 /* { 444 syscallarg(const char *) type; 445 syscallarg(const char *) path; 446 syscallarg(int) flags; 447 syscallarg(void *) data; 448 syscallarg(size_t) data_len; 449 } */ 450 451 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 452 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 453 SCARG(uap, data_len), retval); 454 } 455 456 int 457 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 458 const char *path, int flags, void *data, enum uio_seg data_seg, 459 size_t data_len, register_t *retval) 460 { 461 struct vnode *vp; 462 struct nameidata nd; 463 void *data_buf = data; 464 u_int recurse; 465 int error; 466 467 /* 468 * Get vnode to be covered 469 */ 470 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 471 if ((error = namei(&nd)) != 0) 472 return (error); 473 vp = nd.ni_vp; 474 475 /* 476 * A lookup in VFS_MOUNT might result in an attempt to 477 * lock this vnode again, so make the lock recursive. 478 */ 479 if (vfsops == NULL) { 480 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 481 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 482 recurse = vn_setrecurse(vp); 483 vfsops = vp->v_mount->mnt_op; 484 } else { 485 /* 'type' is userspace */ 486 error = mount_get_vfsops(type, &vfsops); 487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 488 recurse = vn_setrecurse(vp); 489 if (error != 0) 490 goto done; 491 } 492 } else { 493 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 494 recurse = vn_setrecurse(vp); 495 } 496 497 if (data != NULL && data_seg == UIO_USERSPACE) { 498 if (data_len == 0) { 499 /* No length supplied, use default for filesystem */ 500 data_len = vfsops->vfs_min_mount_data; 501 if (data_len > VFS_MAX_MOUNT_DATA) { 502 error = EINVAL; 503 goto done; 504 } 505 /* 506 * Hopefully a longer buffer won't make copyin() fail. 507 * For compatibility with 3.0 and earlier. 508 */ 509 if (flags & MNT_UPDATE 510 && data_len < sizeof (struct mnt_export_args30)) 511 data_len = sizeof (struct mnt_export_args30); 512 } 513 data_buf = kmem_alloc(data_len, KM_SLEEP); 514 515 /* NFS needs the buffer even for mnt_getargs .... */ 516 error = copyin(data, data_buf, data_len); 517 if (error != 0) 518 goto done; 519 } 520 521 if (flags & MNT_GETARGS) { 522 if (data_len == 0) { 523 error = EINVAL; 524 goto done; 525 } 526 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 527 if (error != 0) 528 goto done; 529 if (data_seg == UIO_USERSPACE) 530 error = copyout(data_buf, data, data_len); 531 *retval = data_len; 532 } else if (flags & MNT_UPDATE) { 533 error = mount_update(l, vp, path, flags, data_buf, &data_len); 534 } else { 535 /* Locking is handled internally in mount_domount(). */ 536 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 537 &data_len, recurse); 538 } 539 540 done: 541 if (vp != NULL) { 542 vn_restorerecurse(vp, recurse); 543 vput(vp); 544 } 545 if (data_buf != data) 546 kmem_free(data_buf, data_len); 547 return (error); 548 } 549 550 /* 551 * Scan all active processes to see if any of them have a current 552 * or root directory onto which the new filesystem has just been 553 * mounted. If so, replace them with the new mount point. 554 */ 555 void 556 checkdirs(struct vnode *olddp) 557 { 558 struct cwdinfo *cwdi; 559 struct vnode *newdp, *rele1, *rele2; 560 struct proc *p; 561 bool retry; 562 563 if (olddp->v_usecount == 1) 564 return; 565 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 566 panic("mount: lost mount"); 567 568 do { 569 retry = false; 570 mutex_enter(proc_lock); 571 PROCLIST_FOREACH(p, &allproc) { 572 if ((p->p_flag & PK_MARKER) != 0) 573 continue; 574 if ((cwdi = p->p_cwdi) == NULL) 575 continue; 576 /* 577 * Can't change to the old directory any more, 578 * so even if we see a stale value it's not a 579 * problem. 580 */ 581 if (cwdi->cwdi_cdir != olddp && 582 cwdi->cwdi_rdir != olddp) 583 continue; 584 retry = true; 585 rele1 = NULL; 586 rele2 = NULL; 587 atomic_inc_uint(&cwdi->cwdi_refcnt); 588 mutex_exit(proc_lock); 589 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 590 if (cwdi->cwdi_cdir == olddp) { 591 rele1 = cwdi->cwdi_cdir; 592 VREF(newdp); 593 cwdi->cwdi_cdir = newdp; 594 } 595 if (cwdi->cwdi_rdir == olddp) { 596 rele2 = cwdi->cwdi_rdir; 597 VREF(newdp); 598 cwdi->cwdi_rdir = newdp; 599 } 600 rw_exit(&cwdi->cwdi_lock); 601 cwdfree(cwdi); 602 if (rele1 != NULL) 603 vrele(rele1); 604 if (rele2 != NULL) 605 vrele(rele2); 606 mutex_enter(proc_lock); 607 break; 608 } 609 mutex_exit(proc_lock); 610 } while (retry); 611 612 if (rootvnode == olddp) { 613 vrele(rootvnode); 614 VREF(newdp); 615 rootvnode = newdp; 616 } 617 vput(newdp); 618 } 619 620 /* 621 * Unmount a file system. 622 * 623 * Note: unmount takes a path to the vnode mounted on as argument, 624 * not special file (as before). 625 */ 626 /* ARGSUSED */ 627 int 628 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 629 { 630 /* { 631 syscallarg(const char *) path; 632 syscallarg(int) flags; 633 } */ 634 struct vnode *vp; 635 struct mount *mp; 636 int error; 637 struct nameidata nd; 638 639 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 640 SCARG(uap, path)); 641 if ((error = namei(&nd)) != 0) 642 return (error); 643 vp = nd.ni_vp; 644 mp = vp->v_mount; 645 atomic_inc_uint(&mp->mnt_refcnt); 646 VOP_UNLOCK(vp, 0); 647 648 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 649 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 650 if (error) { 651 vrele(vp); 652 vfs_destroy(mp); 653 return (error); 654 } 655 656 /* 657 * Don't allow unmounting the root file system. 658 */ 659 if (mp->mnt_flag & MNT_ROOTFS) { 660 vrele(vp); 661 vfs_destroy(mp); 662 return (EINVAL); 663 } 664 665 /* 666 * Must be the root of the filesystem 667 */ 668 if ((vp->v_vflag & VV_ROOT) == 0) { 669 vrele(vp); 670 vfs_destroy(mp); 671 return (EINVAL); 672 } 673 674 vrele(vp); 675 error = dounmount(mp, SCARG(uap, flags), l); 676 vfs_destroy(mp); 677 return error; 678 } 679 680 /* 681 * Do the actual file system unmount. File system is assumed to have 682 * been locked by the caller. 683 * 684 * => Caller hold reference to the mount, explicitly for dounmount(). 685 */ 686 int 687 dounmount(struct mount *mp, int flags, struct lwp *l) 688 { 689 struct vnode *coveredvp; 690 int error; 691 int async; 692 int used_syncer; 693 694 #if NVERIEXEC > 0 695 error = veriexec_unmountchk(mp); 696 if (error) 697 return (error); 698 #endif /* NVERIEXEC > 0 */ 699 700 /* 701 * XXX Freeze syncer. Must do this before locking the 702 * mount point. See dounmount() for details. 703 */ 704 mutex_enter(&syncer_mutex); 705 rw_enter(&mp->mnt_unmounting, RW_WRITER); 706 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 707 rw_exit(&mp->mnt_unmounting); 708 mutex_exit(&syncer_mutex); 709 return ENOENT; 710 } 711 712 used_syncer = (mp->mnt_syncer != NULL); 713 714 /* 715 * XXX Syncer must be frozen when we get here. This should really 716 * be done on a per-mountpoint basis, but the syncer doesn't work 717 * like that. 718 * 719 * The caller of dounmount() must acquire syncer_mutex because 720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 721 * order, and we must preserve that order to avoid deadlock. 722 * 723 * So, if the file system did not use the syncer, now is 724 * the time to release the syncer_mutex. 725 */ 726 if (used_syncer == 0) 727 mutex_exit(&syncer_mutex); 728 729 mp->mnt_iflag |= IMNT_UNMOUNT; 730 async = mp->mnt_flag & MNT_ASYNC; 731 mp->mnt_flag &= ~MNT_ASYNC; 732 cache_purgevfs(mp); /* remove cache entries for this file sys */ 733 if (mp->mnt_syncer != NULL) 734 vfs_deallocate_syncvnode(mp); 735 error = 0; 736 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 738 } 739 vfs_scrubvnlist(mp); 740 if (error == 0 || (flags & MNT_FORCE)) 741 error = VFS_UNMOUNT(mp, flags); 742 if (error) { 743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 744 (void) vfs_allocate_syncvnode(mp); 745 mp->mnt_iflag &= ~IMNT_UNMOUNT; 746 mp->mnt_flag |= async; 747 rw_exit(&mp->mnt_unmounting); 748 if (used_syncer) 749 mutex_exit(&syncer_mutex); 750 return (error); 751 } 752 vfs_scrubvnlist(mp); 753 mutex_enter(&mountlist_lock); 754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 755 coveredvp->v_mountedhere = NULL; 756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 757 mp->mnt_iflag |= IMNT_GONE; 758 mutex_exit(&mountlist_lock); 759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 760 panic("unmount: dangling vnode"); 761 if (used_syncer) 762 mutex_exit(&syncer_mutex); 763 vfs_hooks_unmount(mp); 764 rw_exit(&mp->mnt_unmounting); 765 vfs_destroy(mp); /* reference from mount() */ 766 if (coveredvp != NULLVP) 767 vrele(coveredvp); 768 return (0); 769 } 770 771 /* 772 * Sync each mounted filesystem. 773 */ 774 #ifdef DEBUG 775 int syncprt = 0; 776 struct ctldebug debug0 = { "syncprt", &syncprt }; 777 #endif 778 779 /* ARGSUSED */ 780 int 781 sys_sync(struct lwp *l, const void *v, register_t *retval) 782 { 783 struct mount *mp, *nmp; 784 int asyncflag; 785 786 if (l == NULL) 787 l = &lwp0; 788 789 mutex_enter(&mountlist_lock); 790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 791 mp = nmp) { 792 if (vfs_busy(mp, &nmp)) { 793 continue; 794 } 795 mutex_enter(&mp->mnt_updating); 796 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 797 asyncflag = mp->mnt_flag & MNT_ASYNC; 798 mp->mnt_flag &= ~MNT_ASYNC; 799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 800 if (asyncflag) 801 mp->mnt_flag |= MNT_ASYNC; 802 } 803 mutex_exit(&mp->mnt_updating); 804 vfs_unbusy(mp, false, &nmp); 805 } 806 mutex_exit(&mountlist_lock); 807 #ifdef DEBUG 808 if (syncprt) 809 vfs_bufstats(); 810 #endif /* DEBUG */ 811 return (0); 812 } 813 814 /* 815 * Change filesystem quotas. 816 */ 817 /* ARGSUSED */ 818 int 819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 820 { 821 /* { 822 syscallarg(const char *) path; 823 syscallarg(int) cmd; 824 syscallarg(int) uid; 825 syscallarg(void *) arg; 826 } */ 827 struct mount *mp; 828 int error; 829 struct nameidata nd; 830 831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 832 SCARG(uap, path)); 833 if ((error = namei(&nd)) != 0) 834 return (error); 835 mp = nd.ni_vp->v_mount; 836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 837 SCARG(uap, arg)); 838 vrele(nd.ni_vp); 839 return (error); 840 } 841 842 int 843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 844 int root) 845 { 846 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 847 int error = 0; 848 849 /* 850 * If MNT_NOWAIT or MNT_LAZY is specified, do not 851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 852 * overrides MNT_NOWAIT. 853 */ 854 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 855 (flags != MNT_WAIT && flags != 0)) { 856 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 857 goto done; 858 } 859 860 /* Get the filesystem stats now */ 861 memset(sp, 0, sizeof(*sp)); 862 if ((error = VFS_STATVFS(mp, sp)) != 0) { 863 return error; 864 } 865 866 if (cwdi->cwdi_rdir == NULL) 867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 868 done: 869 if (cwdi->cwdi_rdir != NULL) { 870 size_t len; 871 char *bp; 872 char c; 873 char *path = PNBUF_GET(); 874 875 bp = path + MAXPATHLEN; 876 *--bp = '\0'; 877 rw_enter(&cwdi->cwdi_lock, RW_READER); 878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 879 MAXPATHLEN / 2, 0, l); 880 rw_exit(&cwdi->cwdi_lock); 881 if (error) { 882 PNBUF_PUT(path); 883 return error; 884 } 885 len = strlen(bp); 886 if (len != 1) { 887 /* 888 * for mount points that are below our root, we can see 889 * them, so we fix up the pathname and return them. The 890 * rest we cannot see, so we don't allow viewing the 891 * data. 892 */ 893 if (strncmp(bp, sp->f_mntonname, len) == 0 && 894 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 895 (void)strlcpy(sp->f_mntonname, 896 c == '\0' ? "/" : &sp->f_mntonname[len], 897 sizeof(sp->f_mntonname)); 898 } else { 899 if (root) 900 (void)strlcpy(sp->f_mntonname, "/", 901 sizeof(sp->f_mntonname)); 902 else 903 error = EPERM; 904 } 905 } 906 PNBUF_PUT(path); 907 } 908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 909 return error; 910 } 911 912 /* 913 * Get filesystem statistics by path. 914 */ 915 int 916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 917 { 918 struct mount *mp; 919 int error; 920 struct nameidata nd; 921 922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 923 if ((error = namei(&nd)) != 0) 924 return error; 925 mp = nd.ni_vp->v_mount; 926 error = dostatvfs(mp, sb, l, flags, 1); 927 vrele(nd.ni_vp); 928 return error; 929 } 930 931 /* ARGSUSED */ 932 int 933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 934 { 935 /* { 936 syscallarg(const char *) path; 937 syscallarg(struct statvfs *) buf; 938 syscallarg(int) flags; 939 } */ 940 struct statvfs *sb; 941 int error; 942 943 sb = STATVFSBUF_GET(); 944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 945 if (error == 0) 946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 947 STATVFSBUF_PUT(sb); 948 return error; 949 } 950 951 /* 952 * Get filesystem statistics by fd. 953 */ 954 int 955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 956 { 957 file_t *fp; 958 struct mount *mp; 959 int error; 960 961 /* fd_getvnode() will use the descriptor for us */ 962 if ((error = fd_getvnode(fd, &fp)) != 0) 963 return (error); 964 mp = ((struct vnode *)fp->f_data)->v_mount; 965 error = dostatvfs(mp, sb, curlwp, flags, 1); 966 fd_putfile(fd); 967 return error; 968 } 969 970 /* ARGSUSED */ 971 int 972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 973 { 974 /* { 975 syscallarg(int) fd; 976 syscallarg(struct statvfs *) buf; 977 syscallarg(int) flags; 978 } */ 979 struct statvfs *sb; 980 int error; 981 982 sb = STATVFSBUF_GET(); 983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 984 if (error == 0) 985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 986 STATVFSBUF_PUT(sb); 987 return error; 988 } 989 990 991 /* 992 * Get statistics on all filesystems. 993 */ 994 int 995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 997 register_t *retval) 998 { 999 int root = 0; 1000 struct proc *p = l->l_proc; 1001 struct mount *mp, *nmp; 1002 struct statvfs *sb; 1003 size_t count, maxcount; 1004 int error = 0; 1005 1006 sb = STATVFSBUF_GET(); 1007 maxcount = bufsize / entry_sz; 1008 mutex_enter(&mountlist_lock); 1009 count = 0; 1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1011 mp = nmp) { 1012 if (vfs_busy(mp, &nmp)) { 1013 continue; 1014 } 1015 if (sfsp && count < maxcount) { 1016 error = dostatvfs(mp, sb, l, flags, 0); 1017 if (error) { 1018 vfs_unbusy(mp, false, &nmp); 1019 error = 0; 1020 continue; 1021 } 1022 error = copyfn(sb, sfsp, entry_sz); 1023 if (error) { 1024 vfs_unbusy(mp, false, NULL); 1025 goto out; 1026 } 1027 sfsp = (char *)sfsp + entry_sz; 1028 root |= strcmp(sb->f_mntonname, "/") == 0; 1029 } 1030 count++; 1031 vfs_unbusy(mp, false, &nmp); 1032 } 1033 mutex_exit(&mountlist_lock); 1034 1035 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1036 /* 1037 * fake a root entry 1038 */ 1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1040 sb, l, flags, 1); 1041 if (error != 0) 1042 goto out; 1043 if (sfsp) { 1044 error = copyfn(sb, sfsp, entry_sz); 1045 if (error != 0) 1046 goto out; 1047 } 1048 count++; 1049 } 1050 if (sfsp && count > maxcount) 1051 *retval = maxcount; 1052 else 1053 *retval = count; 1054 out: 1055 STATVFSBUF_PUT(sb); 1056 return error; 1057 } 1058 1059 int 1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1061 { 1062 /* { 1063 syscallarg(struct statvfs *) buf; 1064 syscallarg(size_t) bufsize; 1065 syscallarg(int) flags; 1066 } */ 1067 1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1070 } 1071 1072 /* 1073 * Change current working directory to a given file descriptor. 1074 */ 1075 /* ARGSUSED */ 1076 int 1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1078 { 1079 /* { 1080 syscallarg(int) fd; 1081 } */ 1082 struct proc *p = l->l_proc; 1083 struct cwdinfo *cwdi; 1084 struct vnode *vp, *tdp; 1085 struct mount *mp; 1086 file_t *fp; 1087 int error, fd; 1088 1089 /* fd_getvnode() will use the descriptor for us */ 1090 fd = SCARG(uap, fd); 1091 if ((error = fd_getvnode(fd, &fp)) != 0) 1092 return (error); 1093 vp = fp->f_data; 1094 1095 VREF(vp); 1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1097 if (vp->v_type != VDIR) 1098 error = ENOTDIR; 1099 else 1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1101 if (error) { 1102 vput(vp); 1103 goto out; 1104 } 1105 while ((mp = vp->v_mountedhere) != NULL) { 1106 error = vfs_busy(mp, NULL); 1107 vput(vp); 1108 if (error != 0) 1109 goto out; 1110 error = VFS_ROOT(mp, &tdp); 1111 vfs_unbusy(mp, false, NULL); 1112 if (error) 1113 goto out; 1114 vp = tdp; 1115 } 1116 VOP_UNLOCK(vp, 0); 1117 1118 /* 1119 * Disallow changing to a directory not under the process's 1120 * current root directory (if there is one). 1121 */ 1122 cwdi = p->p_cwdi; 1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1125 vrele(vp); 1126 error = EPERM; /* operation not permitted */ 1127 } else { 1128 vrele(cwdi->cwdi_cdir); 1129 cwdi->cwdi_cdir = vp; 1130 } 1131 rw_exit(&cwdi->cwdi_lock); 1132 1133 out: 1134 fd_putfile(fd); 1135 return (error); 1136 } 1137 1138 /* 1139 * Change this process's notion of the root directory to a given file 1140 * descriptor. 1141 */ 1142 int 1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1144 { 1145 struct proc *p = l->l_proc; 1146 struct cwdinfo *cwdi; 1147 struct vnode *vp; 1148 file_t *fp; 1149 int error, fd = SCARG(uap, fd); 1150 1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1153 return error; 1154 /* fd_getvnode() will use the descriptor for us */ 1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1156 return error; 1157 vp = fp->f_data; 1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1159 if (vp->v_type != VDIR) 1160 error = ENOTDIR; 1161 else 1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1163 VOP_UNLOCK(vp, 0); 1164 if (error) 1165 goto out; 1166 VREF(vp); 1167 1168 /* 1169 * Prevent escaping from chroot by putting the root under 1170 * the working directory. Silently chdir to / if we aren't 1171 * already there. 1172 */ 1173 cwdi = p->p_cwdi; 1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1176 /* 1177 * XXX would be more failsafe to change directory to a 1178 * deadfs node here instead 1179 */ 1180 vrele(cwdi->cwdi_cdir); 1181 VREF(vp); 1182 cwdi->cwdi_cdir = vp; 1183 } 1184 1185 if (cwdi->cwdi_rdir != NULL) 1186 vrele(cwdi->cwdi_rdir); 1187 cwdi->cwdi_rdir = vp; 1188 rw_exit(&cwdi->cwdi_lock); 1189 1190 out: 1191 fd_putfile(fd); 1192 return (error); 1193 } 1194 1195 /* 1196 * Change current working directory (``.''). 1197 */ 1198 /* ARGSUSED */ 1199 int 1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1201 { 1202 /* { 1203 syscallarg(const char *) path; 1204 } */ 1205 struct proc *p = l->l_proc; 1206 struct cwdinfo *cwdi; 1207 int error; 1208 struct nameidata nd; 1209 1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1211 SCARG(uap, path)); 1212 if ((error = change_dir(&nd, l)) != 0) 1213 return (error); 1214 cwdi = p->p_cwdi; 1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1216 vrele(cwdi->cwdi_cdir); 1217 cwdi->cwdi_cdir = nd.ni_vp; 1218 rw_exit(&cwdi->cwdi_lock); 1219 return (0); 1220 } 1221 1222 /* 1223 * Change notion of root (``/'') directory. 1224 */ 1225 /* ARGSUSED */ 1226 int 1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1228 { 1229 /* { 1230 syscallarg(const char *) path; 1231 } */ 1232 struct proc *p = l->l_proc; 1233 struct cwdinfo *cwdi; 1234 struct vnode *vp; 1235 int error; 1236 struct nameidata nd; 1237 1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1240 return (error); 1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1242 SCARG(uap, path)); 1243 if ((error = change_dir(&nd, l)) != 0) 1244 return (error); 1245 1246 cwdi = p->p_cwdi; 1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1248 if (cwdi->cwdi_rdir != NULL) 1249 vrele(cwdi->cwdi_rdir); 1250 vp = nd.ni_vp; 1251 cwdi->cwdi_rdir = vp; 1252 1253 /* 1254 * Prevent escaping from chroot by putting the root under 1255 * the working directory. Silently chdir to / if we aren't 1256 * already there. 1257 */ 1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1259 /* 1260 * XXX would be more failsafe to change directory to a 1261 * deadfs node here instead 1262 */ 1263 vrele(cwdi->cwdi_cdir); 1264 VREF(vp); 1265 cwdi->cwdi_cdir = vp; 1266 } 1267 rw_exit(&cwdi->cwdi_lock); 1268 1269 return (0); 1270 } 1271 1272 /* 1273 * Common routine for chroot and chdir. 1274 */ 1275 static int 1276 change_dir(struct nameidata *ndp, struct lwp *l) 1277 { 1278 struct vnode *vp; 1279 int error; 1280 1281 if ((error = namei(ndp)) != 0) 1282 return (error); 1283 vp = ndp->ni_vp; 1284 if (vp->v_type != VDIR) 1285 error = ENOTDIR; 1286 else 1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1288 1289 if (error) 1290 vput(vp); 1291 else 1292 VOP_UNLOCK(vp, 0); 1293 return (error); 1294 } 1295 1296 /* 1297 * Check permissions, allocate an open file structure, 1298 * and call the device open routine if any. 1299 */ 1300 int 1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1302 { 1303 /* { 1304 syscallarg(const char *) path; 1305 syscallarg(int) flags; 1306 syscallarg(int) mode; 1307 } */ 1308 struct proc *p = l->l_proc; 1309 struct cwdinfo *cwdi = p->p_cwdi; 1310 file_t *fp; 1311 struct vnode *vp; 1312 int flags, cmode; 1313 int type, indx, error; 1314 struct flock lf; 1315 struct nameidata nd; 1316 1317 flags = FFLAGS(SCARG(uap, flags)); 1318 if ((flags & (FREAD | FWRITE)) == 0) 1319 return (EINVAL); 1320 if ((error = fd_allocfile(&fp, &indx)) != 0) 1321 return (error); 1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1325 SCARG(uap, path)); 1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1327 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1328 fd_abort(p, fp, indx); 1329 if ((error == EDUPFD || error == EMOVEFD) && 1330 l->l_dupfd >= 0 && /* XXX from fdopen */ 1331 (error = 1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1333 *retval = indx; 1334 return (0); 1335 } 1336 if (error == ERESTART) 1337 error = EINTR; 1338 return (error); 1339 } 1340 1341 l->l_dupfd = 0; 1342 vp = nd.ni_vp; 1343 fp->f_flag = flags & FMASK; 1344 fp->f_type = DTYPE_VNODE; 1345 fp->f_ops = &vnops; 1346 fp->f_data = vp; 1347 if (flags & (O_EXLOCK | O_SHLOCK)) { 1348 lf.l_whence = SEEK_SET; 1349 lf.l_start = 0; 1350 lf.l_len = 0; 1351 if (flags & O_EXLOCK) 1352 lf.l_type = F_WRLCK; 1353 else 1354 lf.l_type = F_RDLCK; 1355 type = F_FLOCK; 1356 if ((flags & FNONBLOCK) == 0) 1357 type |= F_WAIT; 1358 VOP_UNLOCK(vp, 0); 1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1360 if (error) { 1361 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1362 fd_abort(p, fp, indx); 1363 return (error); 1364 } 1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1366 atomic_or_uint(&fp->f_flag, FHASLOCK); 1367 } 1368 VOP_UNLOCK(vp, 0); 1369 *retval = indx; 1370 fd_affix(p, fp, indx); 1371 return (0); 1372 } 1373 1374 static void 1375 vfs__fhfree(fhandle_t *fhp) 1376 { 1377 size_t fhsize; 1378 1379 if (fhp == NULL) { 1380 return; 1381 } 1382 fhsize = FHANDLE_SIZE(fhp); 1383 kmem_free(fhp, fhsize); 1384 } 1385 1386 /* 1387 * vfs_composefh: compose a filehandle. 1388 */ 1389 1390 int 1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1392 { 1393 struct mount *mp; 1394 struct fid *fidp; 1395 int error; 1396 size_t needfhsize; 1397 size_t fidsize; 1398 1399 mp = vp->v_mount; 1400 fidp = NULL; 1401 if (*fh_size < FHANDLE_SIZE_MIN) { 1402 fidsize = 0; 1403 } else { 1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1405 if (fhp != NULL) { 1406 memset(fhp, 0, *fh_size); 1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1408 fidp = &fhp->fh_fid; 1409 } 1410 } 1411 error = VFS_VPTOFH(vp, fidp, &fidsize); 1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1413 if (error == 0 && *fh_size < needfhsize) { 1414 error = E2BIG; 1415 } 1416 *fh_size = needfhsize; 1417 return error; 1418 } 1419 1420 int 1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1422 { 1423 struct mount *mp; 1424 fhandle_t *fhp; 1425 size_t fhsize; 1426 size_t fidsize; 1427 int error; 1428 1429 *fhpp = NULL; 1430 mp = vp->v_mount; 1431 fidsize = 0; 1432 error = VFS_VPTOFH(vp, NULL, &fidsize); 1433 KASSERT(error != 0); 1434 if (error != E2BIG) { 1435 goto out; 1436 } 1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1438 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1439 if (fhp == NULL) { 1440 error = ENOMEM; 1441 goto out; 1442 } 1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1445 if (error == 0) { 1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1447 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1448 *fhpp = fhp; 1449 } else { 1450 kmem_free(fhp, fhsize); 1451 } 1452 out: 1453 return error; 1454 } 1455 1456 void 1457 vfs_composefh_free(fhandle_t *fhp) 1458 { 1459 1460 vfs__fhfree(fhp); 1461 } 1462 1463 /* 1464 * vfs_fhtovp: lookup a vnode by a filehandle. 1465 */ 1466 1467 int 1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1469 { 1470 struct mount *mp; 1471 int error; 1472 1473 *vpp = NULL; 1474 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1475 if (mp == NULL) { 1476 error = ESTALE; 1477 goto out; 1478 } 1479 if (mp->mnt_op->vfs_fhtovp == NULL) { 1480 error = EOPNOTSUPP; 1481 goto out; 1482 } 1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1484 out: 1485 return error; 1486 } 1487 1488 /* 1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1490 * the needed size. 1491 */ 1492 1493 int 1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1495 { 1496 fhandle_t *fhp; 1497 int error; 1498 1499 *fhpp = NULL; 1500 if (fhsize > FHANDLE_SIZE_MAX) { 1501 return EINVAL; 1502 } 1503 if (fhsize < FHANDLE_SIZE_MIN) { 1504 return EINVAL; 1505 } 1506 again: 1507 fhp = kmem_alloc(fhsize, KM_SLEEP); 1508 if (fhp == NULL) { 1509 return ENOMEM; 1510 } 1511 error = copyin(ufhp, fhp, fhsize); 1512 if (error == 0) { 1513 /* XXX this check shouldn't be here */ 1514 if (FHANDLE_SIZE(fhp) == fhsize) { 1515 *fhpp = fhp; 1516 return 0; 1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1518 /* 1519 * a kludge for nfsv2 padded handles. 1520 */ 1521 size_t sz; 1522 1523 sz = FHANDLE_SIZE(fhp); 1524 kmem_free(fhp, fhsize); 1525 fhsize = sz; 1526 goto again; 1527 } else { 1528 /* 1529 * userland told us wrong size. 1530 */ 1531 error = EINVAL; 1532 } 1533 } 1534 kmem_free(fhp, fhsize); 1535 return error; 1536 } 1537 1538 void 1539 vfs_copyinfh_free(fhandle_t *fhp) 1540 { 1541 1542 vfs__fhfree(fhp); 1543 } 1544 1545 /* 1546 * Get file handle system call 1547 */ 1548 int 1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1550 { 1551 /* { 1552 syscallarg(char *) fname; 1553 syscallarg(fhandle_t *) fhp; 1554 syscallarg(size_t *) fh_size; 1555 } */ 1556 struct vnode *vp; 1557 fhandle_t *fh; 1558 int error; 1559 struct nameidata nd; 1560 size_t sz; 1561 size_t usz; 1562 1563 /* 1564 * Must be super user 1565 */ 1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1567 0, NULL, NULL, NULL); 1568 if (error) 1569 return (error); 1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1571 SCARG(uap, fname)); 1572 error = namei(&nd); 1573 if (error) 1574 return (error); 1575 vp = nd.ni_vp; 1576 error = vfs_composefh_alloc(vp, &fh); 1577 vput(vp); 1578 if (error != 0) { 1579 goto out; 1580 } 1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1582 if (error != 0) { 1583 goto out; 1584 } 1585 sz = FHANDLE_SIZE(fh); 1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1587 if (error != 0) { 1588 goto out; 1589 } 1590 if (usz >= sz) { 1591 error = copyout(fh, SCARG(uap, fhp), sz); 1592 } else { 1593 error = E2BIG; 1594 } 1595 out: 1596 vfs_composefh_free(fh); 1597 return (error); 1598 } 1599 1600 /* 1601 * Open a file given a file handle. 1602 * 1603 * Check permissions, allocate an open file structure, 1604 * and call the device open routine if any. 1605 */ 1606 1607 int 1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1609 register_t *retval) 1610 { 1611 file_t *fp; 1612 struct vnode *vp = NULL; 1613 kauth_cred_t cred = l->l_cred; 1614 file_t *nfp; 1615 int type, indx, error=0; 1616 struct flock lf; 1617 struct vattr va; 1618 fhandle_t *fh; 1619 int flags; 1620 proc_t *p; 1621 1622 p = curproc; 1623 1624 /* 1625 * Must be super user 1626 */ 1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1628 0, NULL, NULL, NULL))) 1629 return (error); 1630 1631 flags = FFLAGS(oflags); 1632 if ((flags & (FREAD | FWRITE)) == 0) 1633 return (EINVAL); 1634 if ((flags & O_CREAT)) 1635 return (EINVAL); 1636 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1637 return (error); 1638 fp = nfp; 1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1640 if (error != 0) { 1641 goto bad; 1642 } 1643 error = vfs_fhtovp(fh, &vp); 1644 if (error != 0) { 1645 goto bad; 1646 } 1647 1648 /* Now do an effective vn_open */ 1649 1650 if (vp->v_type == VSOCK) { 1651 error = EOPNOTSUPP; 1652 goto bad; 1653 } 1654 error = vn_openchk(vp, cred, flags); 1655 if (error != 0) 1656 goto bad; 1657 if (flags & O_TRUNC) { 1658 VOP_UNLOCK(vp, 0); /* XXX */ 1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1660 VATTR_NULL(&va); 1661 va.va_size = 0; 1662 error = VOP_SETATTR(vp, &va, cred); 1663 if (error) 1664 goto bad; 1665 } 1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1667 goto bad; 1668 if (flags & FWRITE) { 1669 mutex_enter(&vp->v_interlock); 1670 vp->v_writecount++; 1671 mutex_exit(&vp->v_interlock); 1672 } 1673 1674 /* done with modified vn_open, now finish what sys_open does. */ 1675 1676 fp->f_flag = flags & FMASK; 1677 fp->f_type = DTYPE_VNODE; 1678 fp->f_ops = &vnops; 1679 fp->f_data = vp; 1680 if (flags & (O_EXLOCK | O_SHLOCK)) { 1681 lf.l_whence = SEEK_SET; 1682 lf.l_start = 0; 1683 lf.l_len = 0; 1684 if (flags & O_EXLOCK) 1685 lf.l_type = F_WRLCK; 1686 else 1687 lf.l_type = F_RDLCK; 1688 type = F_FLOCK; 1689 if ((flags & FNONBLOCK) == 0) 1690 type |= F_WAIT; 1691 VOP_UNLOCK(vp, 0); 1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1693 if (error) { 1694 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1695 fd_abort(p, fp, indx); 1696 return (error); 1697 } 1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1699 atomic_or_uint(&fp->f_flag, FHASLOCK); 1700 } 1701 VOP_UNLOCK(vp, 0); 1702 *retval = indx; 1703 fd_affix(p, fp, indx); 1704 vfs_copyinfh_free(fh); 1705 return (0); 1706 1707 bad: 1708 fd_abort(p, fp, indx); 1709 if (vp != NULL) 1710 vput(vp); 1711 vfs_copyinfh_free(fh); 1712 return (error); 1713 } 1714 1715 int 1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1717 { 1718 /* { 1719 syscallarg(const void *) fhp; 1720 syscallarg(size_t) fh_size; 1721 syscallarg(int) flags; 1722 } */ 1723 1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1725 SCARG(uap, flags), retval); 1726 } 1727 1728 int 1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1730 { 1731 int error; 1732 fhandle_t *fh; 1733 struct vnode *vp; 1734 1735 /* 1736 * Must be super user 1737 */ 1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1739 0, NULL, NULL, NULL))) 1740 return (error); 1741 1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1743 if (error != 0) 1744 return error; 1745 1746 error = vfs_fhtovp(fh, &vp); 1747 vfs_copyinfh_free(fh); 1748 if (error != 0) 1749 return error; 1750 1751 error = vn_stat(vp, sb); 1752 vput(vp); 1753 return error; 1754 } 1755 1756 1757 /* ARGSUSED */ 1758 int 1759 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1760 { 1761 /* { 1762 syscallarg(const void *) fhp; 1763 syscallarg(size_t) fh_size; 1764 syscallarg(struct stat *) sb; 1765 } */ 1766 struct stat sb; 1767 int error; 1768 1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1770 if (error) 1771 return error; 1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1773 } 1774 1775 int 1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1777 int flags) 1778 { 1779 fhandle_t *fh; 1780 struct mount *mp; 1781 struct vnode *vp; 1782 int error; 1783 1784 /* 1785 * Must be super user 1786 */ 1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1788 0, NULL, NULL, NULL))) 1789 return error; 1790 1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1792 if (error != 0) 1793 return error; 1794 1795 error = vfs_fhtovp(fh, &vp); 1796 vfs_copyinfh_free(fh); 1797 if (error != 0) 1798 return error; 1799 1800 mp = vp->v_mount; 1801 error = dostatvfs(mp, sb, l, flags, 1); 1802 vput(vp); 1803 return error; 1804 } 1805 1806 /* ARGSUSED */ 1807 int 1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1809 { 1810 /* { 1811 syscallarg(const void *) fhp; 1812 syscallarg(size_t) fh_size; 1813 syscallarg(struct statvfs *) buf; 1814 syscallarg(int) flags; 1815 } */ 1816 struct statvfs *sb = STATVFSBUF_GET(); 1817 int error; 1818 1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1820 SCARG(uap, flags)); 1821 if (error == 0) 1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1823 STATVFSBUF_PUT(sb); 1824 return error; 1825 } 1826 1827 /* 1828 * Create a special file. 1829 */ 1830 /* ARGSUSED */ 1831 int 1832 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1833 register_t *retval) 1834 { 1835 /* { 1836 syscallarg(const char *) path; 1837 syscallarg(mode_t) mode; 1838 syscallarg(dev_t) dev; 1839 } */ 1840 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1841 SCARG(uap, dev), retval); 1842 } 1843 1844 int 1845 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1846 register_t *retval) 1847 { 1848 struct proc *p = l->l_proc; 1849 struct vnode *vp; 1850 struct vattr vattr; 1851 int error, optype; 1852 struct nameidata nd; 1853 char *path; 1854 const char *cpath; 1855 enum uio_seg seg = UIO_USERSPACE; 1856 1857 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1858 0, NULL, NULL, NULL)) != 0) 1859 return (error); 1860 1861 optype = VOP_MKNOD_DESCOFFSET; 1862 1863 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1864 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1865 1866 if ((error = namei(&nd)) != 0) 1867 goto out; 1868 vp = nd.ni_vp; 1869 if (vp != NULL) 1870 error = EEXIST; 1871 else { 1872 VATTR_NULL(&vattr); 1873 /* We will read cwdi->cwdi_cmask unlocked. */ 1874 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1875 vattr.va_rdev = dev; 1876 1877 switch (mode & S_IFMT) { 1878 case S_IFMT: /* used by badsect to flag bad sectors */ 1879 vattr.va_type = VBAD; 1880 break; 1881 case S_IFCHR: 1882 vattr.va_type = VCHR; 1883 break; 1884 case S_IFBLK: 1885 vattr.va_type = VBLK; 1886 break; 1887 case S_IFWHT: 1888 optype = VOP_WHITEOUT_DESCOFFSET; 1889 break; 1890 case S_IFREG: 1891 #if NVERIEXEC > 0 1892 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1893 O_CREAT); 1894 #endif /* NVERIEXEC > 0 */ 1895 vattr.va_type = VREG; 1896 vattr.va_rdev = VNOVAL; 1897 optype = VOP_CREATE_DESCOFFSET; 1898 break; 1899 default: 1900 error = EINVAL; 1901 break; 1902 } 1903 } 1904 if (!error) { 1905 switch (optype) { 1906 case VOP_WHITEOUT_DESCOFFSET: 1907 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1908 if (error) 1909 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1910 vput(nd.ni_dvp); 1911 break; 1912 1913 case VOP_MKNOD_DESCOFFSET: 1914 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1915 &nd.ni_cnd, &vattr); 1916 if (error == 0) 1917 vput(nd.ni_vp); 1918 break; 1919 1920 case VOP_CREATE_DESCOFFSET: 1921 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1922 &nd.ni_cnd, &vattr); 1923 if (error == 0) 1924 vput(nd.ni_vp); 1925 break; 1926 } 1927 } else { 1928 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1929 if (nd.ni_dvp == vp) 1930 vrele(nd.ni_dvp); 1931 else 1932 vput(nd.ni_dvp); 1933 if (vp) 1934 vrele(vp); 1935 } 1936 out: 1937 VERIEXEC_PATH_PUT(path); 1938 return (error); 1939 } 1940 1941 /* 1942 * Create a named pipe. 1943 */ 1944 /* ARGSUSED */ 1945 int 1946 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1947 { 1948 /* { 1949 syscallarg(const char *) path; 1950 syscallarg(int) mode; 1951 } */ 1952 struct proc *p = l->l_proc; 1953 struct vattr vattr; 1954 int error; 1955 struct nameidata nd; 1956 1957 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1958 SCARG(uap, path)); 1959 if ((error = namei(&nd)) != 0) 1960 return (error); 1961 if (nd.ni_vp != NULL) { 1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1963 if (nd.ni_dvp == nd.ni_vp) 1964 vrele(nd.ni_dvp); 1965 else 1966 vput(nd.ni_dvp); 1967 vrele(nd.ni_vp); 1968 return (EEXIST); 1969 } 1970 VATTR_NULL(&vattr); 1971 vattr.va_type = VFIFO; 1972 /* We will read cwdi->cwdi_cmask unlocked. */ 1973 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1975 if (error == 0) 1976 vput(nd.ni_vp); 1977 return (error); 1978 } 1979 1980 /* 1981 * Make a hard file link. 1982 */ 1983 /* ARGSUSED */ 1984 int 1985 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1986 { 1987 /* { 1988 syscallarg(const char *) path; 1989 syscallarg(const char *) link; 1990 } */ 1991 struct vnode *vp; 1992 struct nameidata nd; 1993 int error; 1994 1995 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1996 SCARG(uap, path)); 1997 if ((error = namei(&nd)) != 0) 1998 return (error); 1999 vp = nd.ni_vp; 2000 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2001 SCARG(uap, link)); 2002 if ((error = namei(&nd)) != 0) 2003 goto out; 2004 if (nd.ni_vp) { 2005 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2006 if (nd.ni_dvp == nd.ni_vp) 2007 vrele(nd.ni_dvp); 2008 else 2009 vput(nd.ni_dvp); 2010 vrele(nd.ni_vp); 2011 error = EEXIST; 2012 goto out; 2013 } 2014 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2015 out: 2016 vrele(vp); 2017 return (error); 2018 } 2019 2020 /* 2021 * Make a symbolic link. 2022 */ 2023 /* ARGSUSED */ 2024 int 2025 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2026 { 2027 /* { 2028 syscallarg(const char *) path; 2029 syscallarg(const char *) link; 2030 } */ 2031 struct proc *p = l->l_proc; 2032 struct vattr vattr; 2033 char *path; 2034 int error; 2035 struct nameidata nd; 2036 2037 path = PNBUF_GET(); 2038 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2039 if (error) 2040 goto out; 2041 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2042 SCARG(uap, link)); 2043 if ((error = namei(&nd)) != 0) 2044 goto out; 2045 if (nd.ni_vp) { 2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2047 if (nd.ni_dvp == nd.ni_vp) 2048 vrele(nd.ni_dvp); 2049 else 2050 vput(nd.ni_dvp); 2051 vrele(nd.ni_vp); 2052 error = EEXIST; 2053 goto out; 2054 } 2055 VATTR_NULL(&vattr); 2056 vattr.va_type = VLNK; 2057 /* We will read cwdi->cwdi_cmask unlocked. */ 2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2060 if (error == 0) 2061 vput(nd.ni_vp); 2062 out: 2063 PNBUF_PUT(path); 2064 return (error); 2065 } 2066 2067 /* 2068 * Delete a whiteout from the filesystem. 2069 */ 2070 /* ARGSUSED */ 2071 int 2072 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const char *) path; 2076 } */ 2077 int error; 2078 struct nameidata nd; 2079 2080 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2081 UIO_USERSPACE, SCARG(uap, path)); 2082 error = namei(&nd); 2083 if (error) 2084 return (error); 2085 2086 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2087 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2088 if (nd.ni_dvp == nd.ni_vp) 2089 vrele(nd.ni_dvp); 2090 else 2091 vput(nd.ni_dvp); 2092 if (nd.ni_vp) 2093 vrele(nd.ni_vp); 2094 return (EEXIST); 2095 } 2096 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2097 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2098 vput(nd.ni_dvp); 2099 return (error); 2100 } 2101 2102 /* 2103 * Delete a name from the filesystem. 2104 */ 2105 /* ARGSUSED */ 2106 int 2107 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2108 { 2109 /* { 2110 syscallarg(const char *) path; 2111 } */ 2112 2113 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2114 } 2115 2116 int 2117 do_sys_unlink(const char *arg, enum uio_seg seg) 2118 { 2119 struct vnode *vp; 2120 int error; 2121 struct nameidata nd; 2122 char *path; 2123 const char *cpath; 2124 2125 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2126 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2127 2128 if ((error = namei(&nd)) != 0) 2129 goto out; 2130 vp = nd.ni_vp; 2131 2132 /* 2133 * The root of a mounted filesystem cannot be deleted. 2134 */ 2135 if (vp->v_vflag & VV_ROOT) { 2136 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2137 if (nd.ni_dvp == vp) 2138 vrele(nd.ni_dvp); 2139 else 2140 vput(nd.ni_dvp); 2141 vput(vp); 2142 error = EBUSY; 2143 goto out; 2144 } 2145 2146 #if NVERIEXEC > 0 2147 /* Handle remove requests for veriexec entries. */ 2148 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2149 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2150 if (nd.ni_dvp == vp) 2151 vrele(nd.ni_dvp); 2152 else 2153 vput(nd.ni_dvp); 2154 vput(vp); 2155 goto out; 2156 } 2157 #endif /* NVERIEXEC > 0 */ 2158 2159 #ifdef FILEASSOC 2160 (void)fileassoc_file_delete(vp); 2161 #endif /* FILEASSOC */ 2162 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2163 out: 2164 VERIEXEC_PATH_PUT(path); 2165 return (error); 2166 } 2167 2168 /* 2169 * Reposition read/write file offset. 2170 */ 2171 int 2172 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2173 { 2174 /* { 2175 syscallarg(int) fd; 2176 syscallarg(int) pad; 2177 syscallarg(off_t) offset; 2178 syscallarg(int) whence; 2179 } */ 2180 kauth_cred_t cred = l->l_cred; 2181 file_t *fp; 2182 struct vnode *vp; 2183 struct vattr vattr; 2184 off_t newoff; 2185 int error, fd; 2186 2187 fd = SCARG(uap, fd); 2188 2189 if ((fp = fd_getfile(fd)) == NULL) 2190 return (EBADF); 2191 2192 vp = fp->f_data; 2193 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2194 error = ESPIPE; 2195 goto out; 2196 } 2197 2198 switch (SCARG(uap, whence)) { 2199 case SEEK_CUR: 2200 newoff = fp->f_offset + SCARG(uap, offset); 2201 break; 2202 case SEEK_END: 2203 error = VOP_GETATTR(vp, &vattr, cred); 2204 if (error) { 2205 goto out; 2206 } 2207 newoff = SCARG(uap, offset) + vattr.va_size; 2208 break; 2209 case SEEK_SET: 2210 newoff = SCARG(uap, offset); 2211 break; 2212 default: 2213 error = EINVAL; 2214 goto out; 2215 } 2216 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2217 *(off_t *)retval = fp->f_offset = newoff; 2218 } 2219 out: 2220 fd_putfile(fd); 2221 return (error); 2222 } 2223 2224 /* 2225 * Positional read system call. 2226 */ 2227 int 2228 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2229 { 2230 /* { 2231 syscallarg(int) fd; 2232 syscallarg(void *) buf; 2233 syscallarg(size_t) nbyte; 2234 syscallarg(off_t) offset; 2235 } */ 2236 file_t *fp; 2237 struct vnode *vp; 2238 off_t offset; 2239 int error, fd = SCARG(uap, fd); 2240 2241 if ((fp = fd_getfile(fd)) == NULL) 2242 return (EBADF); 2243 2244 if ((fp->f_flag & FREAD) == 0) { 2245 fd_putfile(fd); 2246 return (EBADF); 2247 } 2248 2249 vp = fp->f_data; 2250 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2251 error = ESPIPE; 2252 goto out; 2253 } 2254 2255 offset = SCARG(uap, offset); 2256 2257 /* 2258 * XXX This works because no file systems actually 2259 * XXX take any action on the seek operation. 2260 */ 2261 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2262 goto out; 2263 2264 /* dofileread() will unuse the descriptor for us */ 2265 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2266 &offset, 0, retval)); 2267 2268 out: 2269 fd_putfile(fd); 2270 return (error); 2271 } 2272 2273 /* 2274 * Positional scatter read system call. 2275 */ 2276 int 2277 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2278 { 2279 /* { 2280 syscallarg(int) fd; 2281 syscallarg(const struct iovec *) iovp; 2282 syscallarg(int) iovcnt; 2283 syscallarg(off_t) offset; 2284 } */ 2285 off_t offset = SCARG(uap, offset); 2286 2287 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2288 SCARG(uap, iovcnt), &offset, 0, retval); 2289 } 2290 2291 /* 2292 * Positional write system call. 2293 */ 2294 int 2295 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2296 { 2297 /* { 2298 syscallarg(int) fd; 2299 syscallarg(const void *) buf; 2300 syscallarg(size_t) nbyte; 2301 syscallarg(off_t) offset; 2302 } */ 2303 file_t *fp; 2304 struct vnode *vp; 2305 off_t offset; 2306 int error, fd = SCARG(uap, fd); 2307 2308 if ((fp = fd_getfile(fd)) == NULL) 2309 return (EBADF); 2310 2311 if ((fp->f_flag & FWRITE) == 0) { 2312 fd_putfile(fd); 2313 return (EBADF); 2314 } 2315 2316 vp = fp->f_data; 2317 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2318 error = ESPIPE; 2319 goto out; 2320 } 2321 2322 offset = SCARG(uap, offset); 2323 2324 /* 2325 * XXX This works because no file systems actually 2326 * XXX take any action on the seek operation. 2327 */ 2328 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2329 goto out; 2330 2331 /* dofilewrite() will unuse the descriptor for us */ 2332 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2333 &offset, 0, retval)); 2334 2335 out: 2336 fd_putfile(fd); 2337 return (error); 2338 } 2339 2340 /* 2341 * Positional gather write system call. 2342 */ 2343 int 2344 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2345 { 2346 /* { 2347 syscallarg(int) fd; 2348 syscallarg(const struct iovec *) iovp; 2349 syscallarg(int) iovcnt; 2350 syscallarg(off_t) offset; 2351 } */ 2352 off_t offset = SCARG(uap, offset); 2353 2354 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2355 SCARG(uap, iovcnt), &offset, 0, retval); 2356 } 2357 2358 /* 2359 * Check access permissions. 2360 */ 2361 int 2362 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2363 { 2364 /* { 2365 syscallarg(const char *) path; 2366 syscallarg(int) flags; 2367 } */ 2368 kauth_cred_t cred; 2369 struct vnode *vp; 2370 int error, flags; 2371 struct nameidata nd; 2372 2373 cred = kauth_cred_dup(l->l_cred); 2374 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2375 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2376 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2377 SCARG(uap, path)); 2378 /* Override default credentials */ 2379 nd.ni_cnd.cn_cred = cred; 2380 if ((error = namei(&nd)) != 0) 2381 goto out; 2382 vp = nd.ni_vp; 2383 2384 /* Flags == 0 means only check for existence. */ 2385 if (SCARG(uap, flags)) { 2386 flags = 0; 2387 if (SCARG(uap, flags) & R_OK) 2388 flags |= VREAD; 2389 if (SCARG(uap, flags) & W_OK) 2390 flags |= VWRITE; 2391 if (SCARG(uap, flags) & X_OK) 2392 flags |= VEXEC; 2393 2394 error = VOP_ACCESS(vp, flags, cred); 2395 if (!error && (flags & VWRITE)) 2396 error = vn_writechk(vp); 2397 } 2398 vput(vp); 2399 out: 2400 kauth_cred_free(cred); 2401 return (error); 2402 } 2403 2404 /* 2405 * Common code for all sys_stat functions, including compat versions. 2406 */ 2407 int 2408 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2409 { 2410 int error; 2411 struct nameidata nd; 2412 2413 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2414 UIO_USERSPACE, path); 2415 error = namei(&nd); 2416 if (error != 0) 2417 return error; 2418 error = vn_stat(nd.ni_vp, sb); 2419 vput(nd.ni_vp); 2420 return error; 2421 } 2422 2423 /* 2424 * Get file status; this version follows links. 2425 */ 2426 /* ARGSUSED */ 2427 int 2428 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2429 { 2430 /* { 2431 syscallarg(const char *) path; 2432 syscallarg(struct stat *) ub; 2433 } */ 2434 struct stat sb; 2435 int error; 2436 2437 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2438 if (error) 2439 return error; 2440 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2441 } 2442 2443 /* 2444 * Get file status; this version does not follow links. 2445 */ 2446 /* ARGSUSED */ 2447 int 2448 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2449 { 2450 /* { 2451 syscallarg(const char *) path; 2452 syscallarg(struct stat *) ub; 2453 } */ 2454 struct stat sb; 2455 int error; 2456 2457 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2458 if (error) 2459 return error; 2460 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2461 } 2462 2463 /* 2464 * Get configurable pathname variables. 2465 */ 2466 /* ARGSUSED */ 2467 int 2468 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2469 { 2470 /* { 2471 syscallarg(const char *) path; 2472 syscallarg(int) name; 2473 } */ 2474 int error; 2475 struct nameidata nd; 2476 2477 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2478 SCARG(uap, path)); 2479 if ((error = namei(&nd)) != 0) 2480 return (error); 2481 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2482 vput(nd.ni_vp); 2483 return (error); 2484 } 2485 2486 /* 2487 * Return target name of a symbolic link. 2488 */ 2489 /* ARGSUSED */ 2490 int 2491 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2492 { 2493 /* { 2494 syscallarg(const char *) path; 2495 syscallarg(char *) buf; 2496 syscallarg(size_t) count; 2497 } */ 2498 struct vnode *vp; 2499 struct iovec aiov; 2500 struct uio auio; 2501 int error; 2502 struct nameidata nd; 2503 2504 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2505 SCARG(uap, path)); 2506 if ((error = namei(&nd)) != 0) 2507 return (error); 2508 vp = nd.ni_vp; 2509 if (vp->v_type != VLNK) 2510 error = EINVAL; 2511 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2512 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2513 aiov.iov_base = SCARG(uap, buf); 2514 aiov.iov_len = SCARG(uap, count); 2515 auio.uio_iov = &aiov; 2516 auio.uio_iovcnt = 1; 2517 auio.uio_offset = 0; 2518 auio.uio_rw = UIO_READ; 2519 KASSERT(l == curlwp); 2520 auio.uio_vmspace = l->l_proc->p_vmspace; 2521 auio.uio_resid = SCARG(uap, count); 2522 error = VOP_READLINK(vp, &auio, l->l_cred); 2523 } 2524 vput(vp); 2525 *retval = SCARG(uap, count) - auio.uio_resid; 2526 return (error); 2527 } 2528 2529 /* 2530 * Change flags of a file given a path name. 2531 */ 2532 /* ARGSUSED */ 2533 int 2534 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2535 { 2536 /* { 2537 syscallarg(const char *) path; 2538 syscallarg(u_long) flags; 2539 } */ 2540 struct vnode *vp; 2541 int error; 2542 struct nameidata nd; 2543 2544 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2545 SCARG(uap, path)); 2546 if ((error = namei(&nd)) != 0) 2547 return (error); 2548 vp = nd.ni_vp; 2549 error = change_flags(vp, SCARG(uap, flags), l); 2550 vput(vp); 2551 return (error); 2552 } 2553 2554 /* 2555 * Change flags of a file given a file descriptor. 2556 */ 2557 /* ARGSUSED */ 2558 int 2559 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2560 { 2561 /* { 2562 syscallarg(int) fd; 2563 syscallarg(u_long) flags; 2564 } */ 2565 struct vnode *vp; 2566 file_t *fp; 2567 int error; 2568 2569 /* fd_getvnode() will use the descriptor for us */ 2570 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2571 return (error); 2572 vp = fp->f_data; 2573 error = change_flags(vp, SCARG(uap, flags), l); 2574 VOP_UNLOCK(vp, 0); 2575 fd_putfile(SCARG(uap, fd)); 2576 return (error); 2577 } 2578 2579 /* 2580 * Change flags of a file given a path name; this version does 2581 * not follow links. 2582 */ 2583 int 2584 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2585 { 2586 /* { 2587 syscallarg(const char *) path; 2588 syscallarg(u_long) flags; 2589 } */ 2590 struct vnode *vp; 2591 int error; 2592 struct nameidata nd; 2593 2594 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2595 SCARG(uap, path)); 2596 if ((error = namei(&nd)) != 0) 2597 return (error); 2598 vp = nd.ni_vp; 2599 error = change_flags(vp, SCARG(uap, flags), l); 2600 vput(vp); 2601 return (error); 2602 } 2603 2604 /* 2605 * Common routine to change flags of a file. 2606 */ 2607 int 2608 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2609 { 2610 struct vattr vattr; 2611 int error; 2612 2613 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2614 /* 2615 * Non-superusers cannot change the flags on devices, even if they 2616 * own them. 2617 */ 2618 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2619 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2620 goto out; 2621 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2622 error = EINVAL; 2623 goto out; 2624 } 2625 } 2626 VATTR_NULL(&vattr); 2627 vattr.va_flags = flags; 2628 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2629 out: 2630 return (error); 2631 } 2632 2633 /* 2634 * Change mode of a file given path name; this version follows links. 2635 */ 2636 /* ARGSUSED */ 2637 int 2638 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2639 { 2640 /* { 2641 syscallarg(const char *) path; 2642 syscallarg(int) mode; 2643 } */ 2644 int error; 2645 struct nameidata nd; 2646 2647 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2648 SCARG(uap, path)); 2649 if ((error = namei(&nd)) != 0) 2650 return (error); 2651 2652 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2653 2654 vrele(nd.ni_vp); 2655 return (error); 2656 } 2657 2658 /* 2659 * Change mode of a file given a file descriptor. 2660 */ 2661 /* ARGSUSED */ 2662 int 2663 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2664 { 2665 /* { 2666 syscallarg(int) fd; 2667 syscallarg(int) mode; 2668 } */ 2669 file_t *fp; 2670 int error; 2671 2672 /* fd_getvnode() will use the descriptor for us */ 2673 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2674 return (error); 2675 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2676 fd_putfile(SCARG(uap, fd)); 2677 return (error); 2678 } 2679 2680 /* 2681 * Change mode of a file given path name; this version does not follow links. 2682 */ 2683 /* ARGSUSED */ 2684 int 2685 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2686 { 2687 /* { 2688 syscallarg(const char *) path; 2689 syscallarg(int) mode; 2690 } */ 2691 int error; 2692 struct nameidata nd; 2693 2694 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2695 SCARG(uap, path)); 2696 if ((error = namei(&nd)) != 0) 2697 return (error); 2698 2699 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2700 2701 vrele(nd.ni_vp); 2702 return (error); 2703 } 2704 2705 /* 2706 * Common routine to set mode given a vnode. 2707 */ 2708 static int 2709 change_mode(struct vnode *vp, int mode, struct lwp *l) 2710 { 2711 struct vattr vattr; 2712 int error; 2713 2714 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2715 VATTR_NULL(&vattr); 2716 vattr.va_mode = mode & ALLPERMS; 2717 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2718 VOP_UNLOCK(vp, 0); 2719 return (error); 2720 } 2721 2722 /* 2723 * Set ownership given a path name; this version follows links. 2724 */ 2725 /* ARGSUSED */ 2726 int 2727 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2728 { 2729 /* { 2730 syscallarg(const char *) path; 2731 syscallarg(uid_t) uid; 2732 syscallarg(gid_t) gid; 2733 } */ 2734 int error; 2735 struct nameidata nd; 2736 2737 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2738 SCARG(uap, path)); 2739 if ((error = namei(&nd)) != 0) 2740 return (error); 2741 2742 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2743 2744 vrele(nd.ni_vp); 2745 return (error); 2746 } 2747 2748 /* 2749 * Set ownership given a path name; this version follows links. 2750 * Provides POSIX semantics. 2751 */ 2752 /* ARGSUSED */ 2753 int 2754 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2755 { 2756 /* { 2757 syscallarg(const char *) path; 2758 syscallarg(uid_t) uid; 2759 syscallarg(gid_t) gid; 2760 } */ 2761 int error; 2762 struct nameidata nd; 2763 2764 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2765 SCARG(uap, path)); 2766 if ((error = namei(&nd)) != 0) 2767 return (error); 2768 2769 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2770 2771 vrele(nd.ni_vp); 2772 return (error); 2773 } 2774 2775 /* 2776 * Set ownership given a file descriptor. 2777 */ 2778 /* ARGSUSED */ 2779 int 2780 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2781 { 2782 /* { 2783 syscallarg(int) fd; 2784 syscallarg(uid_t) uid; 2785 syscallarg(gid_t) gid; 2786 } */ 2787 int error; 2788 file_t *fp; 2789 2790 /* fd_getvnode() will use the descriptor for us */ 2791 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2792 return (error); 2793 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2794 l, 0); 2795 fd_putfile(SCARG(uap, fd)); 2796 return (error); 2797 } 2798 2799 /* 2800 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2801 */ 2802 /* ARGSUSED */ 2803 int 2804 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2805 { 2806 /* { 2807 syscallarg(int) fd; 2808 syscallarg(uid_t) uid; 2809 syscallarg(gid_t) gid; 2810 } */ 2811 int error; 2812 file_t *fp; 2813 2814 /* fd_getvnode() will use the descriptor for us */ 2815 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2816 return (error); 2817 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2818 l, 1); 2819 fd_putfile(SCARG(uap, fd)); 2820 return (error); 2821 } 2822 2823 /* 2824 * Set ownership given a path name; this version does not follow links. 2825 */ 2826 /* ARGSUSED */ 2827 int 2828 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2829 { 2830 /* { 2831 syscallarg(const char *) path; 2832 syscallarg(uid_t) uid; 2833 syscallarg(gid_t) gid; 2834 } */ 2835 int error; 2836 struct nameidata nd; 2837 2838 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2839 SCARG(uap, path)); 2840 if ((error = namei(&nd)) != 0) 2841 return (error); 2842 2843 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2844 2845 vrele(nd.ni_vp); 2846 return (error); 2847 } 2848 2849 /* 2850 * Set ownership given a path name; this version does not follow links. 2851 * Provides POSIX/XPG semantics. 2852 */ 2853 /* ARGSUSED */ 2854 int 2855 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2856 { 2857 /* { 2858 syscallarg(const char *) path; 2859 syscallarg(uid_t) uid; 2860 syscallarg(gid_t) gid; 2861 } */ 2862 int error; 2863 struct nameidata nd; 2864 2865 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2866 SCARG(uap, path)); 2867 if ((error = namei(&nd)) != 0) 2868 return (error); 2869 2870 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2871 2872 vrele(nd.ni_vp); 2873 return (error); 2874 } 2875 2876 /* 2877 * Common routine to set ownership given a vnode. 2878 */ 2879 static int 2880 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2881 int posix_semantics) 2882 { 2883 struct vattr vattr; 2884 mode_t newmode; 2885 int error; 2886 2887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2888 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2889 goto out; 2890 2891 #define CHANGED(x) ((int)(x) != -1) 2892 newmode = vattr.va_mode; 2893 if (posix_semantics) { 2894 /* 2895 * POSIX/XPG semantics: if the caller is not the super-user, 2896 * clear set-user-id and set-group-id bits. Both POSIX and 2897 * the XPG consider the behaviour for calls by the super-user 2898 * implementation-defined; we leave the set-user-id and set- 2899 * group-id settings intact in that case. 2900 */ 2901 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2902 NULL) != 0) 2903 newmode &= ~(S_ISUID | S_ISGID); 2904 } else { 2905 /* 2906 * NetBSD semantics: when changing owner and/or group, 2907 * clear the respective bit(s). 2908 */ 2909 if (CHANGED(uid)) 2910 newmode &= ~S_ISUID; 2911 if (CHANGED(gid)) 2912 newmode &= ~S_ISGID; 2913 } 2914 /* Update va_mode iff altered. */ 2915 if (vattr.va_mode == newmode) 2916 newmode = VNOVAL; 2917 2918 VATTR_NULL(&vattr); 2919 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2920 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2921 vattr.va_mode = newmode; 2922 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2923 #undef CHANGED 2924 2925 out: 2926 VOP_UNLOCK(vp, 0); 2927 return (error); 2928 } 2929 2930 /* 2931 * Set the access and modification times given a path name; this 2932 * version follows links. 2933 */ 2934 /* ARGSUSED */ 2935 int 2936 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2937 register_t *retval) 2938 { 2939 /* { 2940 syscallarg(const char *) path; 2941 syscallarg(const struct timeval *) tptr; 2942 } */ 2943 2944 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2945 SCARG(uap, tptr), UIO_USERSPACE); 2946 } 2947 2948 /* 2949 * Set the access and modification times given a file descriptor. 2950 */ 2951 /* ARGSUSED */ 2952 int 2953 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2954 register_t *retval) 2955 { 2956 /* { 2957 syscallarg(int) fd; 2958 syscallarg(const struct timeval *) tptr; 2959 } */ 2960 int error; 2961 file_t *fp; 2962 2963 /* fd_getvnode() will use the descriptor for us */ 2964 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2965 return (error); 2966 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2967 UIO_USERSPACE); 2968 fd_putfile(SCARG(uap, fd)); 2969 return (error); 2970 } 2971 2972 /* 2973 * Set the access and modification times given a path name; this 2974 * version does not follow links. 2975 */ 2976 int 2977 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2978 register_t *retval) 2979 { 2980 /* { 2981 syscallarg(const char *) path; 2982 syscallarg(const struct timeval *) tptr; 2983 } */ 2984 2985 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2986 SCARG(uap, tptr), UIO_USERSPACE); 2987 } 2988 2989 /* 2990 * Common routine to set access and modification times given a vnode. 2991 */ 2992 int 2993 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2994 const struct timeval *tptr, enum uio_seg seg) 2995 { 2996 struct vattr vattr; 2997 struct nameidata nd; 2998 int error; 2999 bool vanull, setbirthtime; 3000 struct timespec ts[2]; 3001 3002 if (tptr == NULL) { 3003 vanull = true; 3004 nanotime(&ts[0]); 3005 ts[1] = ts[0]; 3006 } else { 3007 struct timeval tv[2]; 3008 3009 vanull = false; 3010 if (seg != UIO_SYSSPACE) { 3011 error = copyin(tptr, tv, sizeof (tv)); 3012 if (error != 0) 3013 return error; 3014 tptr = tv; 3015 } 3016 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3017 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3018 } 3019 3020 if (vp == NULL) { 3021 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3022 if ((error = namei(&nd)) != 0) 3023 return error; 3024 vp = nd.ni_vp; 3025 } else 3026 nd.ni_vp = NULL; 3027 3028 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3029 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3030 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3031 VATTR_NULL(&vattr); 3032 vattr.va_atime = ts[0]; 3033 vattr.va_mtime = ts[1]; 3034 if (setbirthtime) 3035 vattr.va_birthtime = ts[1]; 3036 if (vanull) 3037 vattr.va_flags |= VA_UTIMES_NULL; 3038 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3039 VOP_UNLOCK(vp, 0); 3040 3041 if (nd.ni_vp != NULL) 3042 vrele(nd.ni_vp); 3043 3044 return error; 3045 } 3046 3047 /* 3048 * Truncate a file given its path name. 3049 */ 3050 /* ARGSUSED */ 3051 int 3052 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3053 { 3054 /* { 3055 syscallarg(const char *) path; 3056 syscallarg(int) pad; 3057 syscallarg(off_t) length; 3058 } */ 3059 struct vnode *vp; 3060 struct vattr vattr; 3061 int error; 3062 struct nameidata nd; 3063 3064 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3065 SCARG(uap, path)); 3066 if ((error = namei(&nd)) != 0) 3067 return (error); 3068 vp = nd.ni_vp; 3069 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3070 if (vp->v_type == VDIR) 3071 error = EISDIR; 3072 else if ((error = vn_writechk(vp)) == 0 && 3073 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3074 VATTR_NULL(&vattr); 3075 vattr.va_size = SCARG(uap, length); 3076 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3077 } 3078 vput(vp); 3079 return (error); 3080 } 3081 3082 /* 3083 * Truncate a file given a file descriptor. 3084 */ 3085 /* ARGSUSED */ 3086 int 3087 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3088 { 3089 /* { 3090 syscallarg(int) fd; 3091 syscallarg(int) pad; 3092 syscallarg(off_t) length; 3093 } */ 3094 struct vattr vattr; 3095 struct vnode *vp; 3096 file_t *fp; 3097 int error; 3098 3099 /* fd_getvnode() will use the descriptor for us */ 3100 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3101 return (error); 3102 if ((fp->f_flag & FWRITE) == 0) { 3103 error = EINVAL; 3104 goto out; 3105 } 3106 vp = fp->f_data; 3107 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3108 if (vp->v_type == VDIR) 3109 error = EISDIR; 3110 else if ((error = vn_writechk(vp)) == 0) { 3111 VATTR_NULL(&vattr); 3112 vattr.va_size = SCARG(uap, length); 3113 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3114 } 3115 VOP_UNLOCK(vp, 0); 3116 out: 3117 fd_putfile(SCARG(uap, fd)); 3118 return (error); 3119 } 3120 3121 /* 3122 * Sync an open file. 3123 */ 3124 /* ARGSUSED */ 3125 int 3126 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3127 { 3128 /* { 3129 syscallarg(int) fd; 3130 } */ 3131 struct vnode *vp; 3132 file_t *fp; 3133 int error; 3134 3135 /* fd_getvnode() will use the descriptor for us */ 3136 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3137 return (error); 3138 vp = fp->f_data; 3139 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3140 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3141 VOP_UNLOCK(vp, 0); 3142 fd_putfile(SCARG(uap, fd)); 3143 return (error); 3144 } 3145 3146 /* 3147 * Sync a range of file data. API modeled after that found in AIX. 3148 * 3149 * FDATASYNC indicates that we need only save enough metadata to be able 3150 * to re-read the written data. Note we duplicate AIX's requirement that 3151 * the file be open for writing. 3152 */ 3153 /* ARGSUSED */ 3154 int 3155 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3156 { 3157 /* { 3158 syscallarg(int) fd; 3159 syscallarg(int) flags; 3160 syscallarg(off_t) start; 3161 syscallarg(off_t) length; 3162 } */ 3163 struct vnode *vp; 3164 file_t *fp; 3165 int flags, nflags; 3166 off_t s, e, len; 3167 int error; 3168 3169 /* fd_getvnode() will use the descriptor for us */ 3170 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3171 return (error); 3172 3173 if ((fp->f_flag & FWRITE) == 0) { 3174 error = EBADF; 3175 goto out; 3176 } 3177 3178 flags = SCARG(uap, flags); 3179 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3180 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3181 error = EINVAL; 3182 goto out; 3183 } 3184 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3185 if (flags & FDATASYNC) 3186 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3187 else 3188 nflags = FSYNC_WAIT; 3189 if (flags & FDISKSYNC) 3190 nflags |= FSYNC_CACHE; 3191 3192 len = SCARG(uap, length); 3193 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3194 if (len) { 3195 s = SCARG(uap, start); 3196 e = s + len; 3197 if (e < s) { 3198 error = EINVAL; 3199 goto out; 3200 } 3201 } else { 3202 e = 0; 3203 s = 0; 3204 } 3205 3206 vp = fp->f_data; 3207 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3208 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3209 VOP_UNLOCK(vp, 0); 3210 out: 3211 fd_putfile(SCARG(uap, fd)); 3212 return (error); 3213 } 3214 3215 /* 3216 * Sync the data of an open file. 3217 */ 3218 /* ARGSUSED */ 3219 int 3220 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3221 { 3222 /* { 3223 syscallarg(int) fd; 3224 } */ 3225 struct vnode *vp; 3226 file_t *fp; 3227 int error; 3228 3229 /* fd_getvnode() will use the descriptor for us */ 3230 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3231 return (error); 3232 if ((fp->f_flag & FWRITE) == 0) { 3233 fd_putfile(SCARG(uap, fd)); 3234 return (EBADF); 3235 } 3236 vp = fp->f_data; 3237 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3238 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3239 VOP_UNLOCK(vp, 0); 3240 fd_putfile(SCARG(uap, fd)); 3241 return (error); 3242 } 3243 3244 /* 3245 * Rename files, (standard) BSD semantics frontend. 3246 */ 3247 /* ARGSUSED */ 3248 int 3249 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3250 { 3251 /* { 3252 syscallarg(const char *) from; 3253 syscallarg(const char *) to; 3254 } */ 3255 3256 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3257 } 3258 3259 /* 3260 * Rename files, POSIX semantics frontend. 3261 */ 3262 /* ARGSUSED */ 3263 int 3264 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3265 { 3266 /* { 3267 syscallarg(const char *) from; 3268 syscallarg(const char *) to; 3269 } */ 3270 3271 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3272 } 3273 3274 /* 3275 * Rename files. Source and destination must either both be directories, 3276 * or both not be directories. If target is a directory, it must be empty. 3277 * If `from' and `to' refer to the same object, the value of the `retain' 3278 * argument is used to determine whether `from' will be 3279 * 3280 * (retain == 0) deleted unless `from' and `to' refer to the same 3281 * object in the file system's name space (BSD). 3282 * (retain == 1) always retained (POSIX). 3283 */ 3284 int 3285 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3286 { 3287 struct vnode *tvp, *fvp, *tdvp; 3288 struct nameidata fromnd, tond; 3289 struct mount *fs; 3290 struct lwp *l = curlwp; 3291 struct proc *p; 3292 uint32_t saveflag; 3293 int error; 3294 3295 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3296 seg, from); 3297 if ((error = namei(&fromnd)) != 0) 3298 return (error); 3299 if (fromnd.ni_dvp != fromnd.ni_vp) 3300 VOP_UNLOCK(fromnd.ni_dvp, 0); 3301 fvp = fromnd.ni_vp; 3302 3303 fs = fvp->v_mount; 3304 error = VFS_RENAMELOCK_ENTER(fs); 3305 if (error) { 3306 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3307 vrele(fromnd.ni_dvp); 3308 vrele(fvp); 3309 goto out1; 3310 } 3311 3312 /* 3313 * close, partially, yet another race - ideally we should only 3314 * go as far as getting fromnd.ni_dvp before getting the per-fs 3315 * lock, and then continue to get fromnd.ni_vp, but we can't do 3316 * that with namei as it stands. 3317 * 3318 * This still won't prevent rmdir from nuking fromnd.ni_vp 3319 * under us. The real fix is to get the locks in the right 3320 * order and do the lookups in the right places, but that's a 3321 * major rototill. 3322 * 3323 * Preserve the SAVESTART in cn_flags, because who knows what 3324 * might happen if we don't. 3325 * 3326 * Note: this logic (as well as this whole function) is cloned 3327 * in nfs_serv.c. Proceed accordingly. 3328 */ 3329 vrele(fvp); 3330 if ((fromnd.ni_cnd.cn_namelen == 1 && 3331 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3332 (fromnd.ni_cnd.cn_namelen == 2 && 3333 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3334 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3335 error = EINVAL; 3336 VFS_RENAMELOCK_EXIT(fs); 3337 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3338 vrele(fromnd.ni_dvp); 3339 goto out1; 3340 } 3341 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3342 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3343 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3344 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3345 fromnd.ni_cnd.cn_flags |= saveflag; 3346 if (error) { 3347 VOP_UNLOCK(fromnd.ni_dvp, 0); 3348 VFS_RENAMELOCK_EXIT(fs); 3349 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3350 vrele(fromnd.ni_dvp); 3351 goto out1; 3352 } 3353 VOP_UNLOCK(fromnd.ni_vp, 0); 3354 if (fromnd.ni_dvp != fromnd.ni_vp) 3355 VOP_UNLOCK(fromnd.ni_dvp, 0); 3356 fvp = fromnd.ni_vp; 3357 3358 NDINIT(&tond, RENAME, 3359 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3360 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3361 seg, to); 3362 if ((error = namei(&tond)) != 0) { 3363 VFS_RENAMELOCK_EXIT(fs); 3364 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3365 vrele(fromnd.ni_dvp); 3366 vrele(fvp); 3367 goto out1; 3368 } 3369 tdvp = tond.ni_dvp; 3370 tvp = tond.ni_vp; 3371 3372 if (tvp != NULL) { 3373 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3374 error = ENOTDIR; 3375 goto out; 3376 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3377 error = EISDIR; 3378 goto out; 3379 } 3380 } 3381 3382 if (fvp == tdvp) 3383 error = EINVAL; 3384 3385 /* 3386 * Source and destination refer to the same object. 3387 */ 3388 if (fvp == tvp) { 3389 if (retain) 3390 error = -1; 3391 else if (fromnd.ni_dvp == tdvp && 3392 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3393 !memcmp(fromnd.ni_cnd.cn_nameptr, 3394 tond.ni_cnd.cn_nameptr, 3395 fromnd.ni_cnd.cn_namelen)) 3396 error = -1; 3397 } 3398 3399 #if NVERIEXEC > 0 3400 if (!error) { 3401 char *f1, *f2; 3402 size_t f1_len; 3403 size_t f2_len; 3404 3405 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3406 f1 = kmem_alloc(f1_len, KM_SLEEP); 3407 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3408 3409 f2_len = tond.ni_cnd.cn_namelen + 1; 3410 f2 = kmem_alloc(f2_len, KM_SLEEP); 3411 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3412 3413 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3414 3415 kmem_free(f1, f1_len); 3416 kmem_free(f2, f2_len); 3417 } 3418 #endif /* NVERIEXEC > 0 */ 3419 3420 out: 3421 p = l->l_proc; 3422 if (!error) { 3423 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3424 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3425 VFS_RENAMELOCK_EXIT(fs); 3426 } else { 3427 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3428 if (tdvp == tvp) 3429 vrele(tdvp); 3430 else 3431 vput(tdvp); 3432 if (tvp) 3433 vput(tvp); 3434 VFS_RENAMELOCK_EXIT(fs); 3435 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3436 vrele(fromnd.ni_dvp); 3437 vrele(fvp); 3438 } 3439 vrele(tond.ni_startdir); 3440 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3441 out1: 3442 if (fromnd.ni_startdir) 3443 vrele(fromnd.ni_startdir); 3444 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3445 return (error == -1 ? 0 : error); 3446 } 3447 3448 /* 3449 * Make a directory file. 3450 */ 3451 /* ARGSUSED */ 3452 int 3453 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3454 { 3455 /* { 3456 syscallarg(const char *) path; 3457 syscallarg(int) mode; 3458 } */ 3459 struct proc *p = l->l_proc; 3460 struct vnode *vp; 3461 struct vattr vattr; 3462 int error; 3463 struct nameidata nd; 3464 3465 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3466 SCARG(uap, path)); 3467 if ((error = namei(&nd)) != 0) 3468 return (error); 3469 vp = nd.ni_vp; 3470 if (vp != NULL) { 3471 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3472 if (nd.ni_dvp == vp) 3473 vrele(nd.ni_dvp); 3474 else 3475 vput(nd.ni_dvp); 3476 vrele(vp); 3477 return (EEXIST); 3478 } 3479 VATTR_NULL(&vattr); 3480 vattr.va_type = VDIR; 3481 /* We will read cwdi->cwdi_cmask unlocked. */ 3482 vattr.va_mode = 3483 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3484 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3485 if (!error) 3486 vput(nd.ni_vp); 3487 return (error); 3488 } 3489 3490 /* 3491 * Remove a directory file. 3492 */ 3493 /* ARGSUSED */ 3494 int 3495 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3496 { 3497 /* { 3498 syscallarg(const char *) path; 3499 } */ 3500 struct vnode *vp; 3501 int error; 3502 struct nameidata nd; 3503 3504 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3505 SCARG(uap, path)); 3506 if ((error = namei(&nd)) != 0) 3507 return (error); 3508 vp = nd.ni_vp; 3509 if (vp->v_type != VDIR) { 3510 error = ENOTDIR; 3511 goto out; 3512 } 3513 /* 3514 * No rmdir "." please. 3515 */ 3516 if (nd.ni_dvp == vp) { 3517 error = EINVAL; 3518 goto out; 3519 } 3520 /* 3521 * The root of a mounted filesystem cannot be deleted. 3522 */ 3523 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3524 error = EBUSY; 3525 goto out; 3526 } 3527 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3528 return (error); 3529 3530 out: 3531 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3532 if (nd.ni_dvp == vp) 3533 vrele(nd.ni_dvp); 3534 else 3535 vput(nd.ni_dvp); 3536 vput(vp); 3537 return (error); 3538 } 3539 3540 /* 3541 * Read a block of directory entries in a file system independent format. 3542 */ 3543 int 3544 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3545 { 3546 /* { 3547 syscallarg(int) fd; 3548 syscallarg(char *) buf; 3549 syscallarg(size_t) count; 3550 } */ 3551 file_t *fp; 3552 int error, done; 3553 3554 /* fd_getvnode() will use the descriptor for us */ 3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3556 return (error); 3557 if ((fp->f_flag & FREAD) == 0) { 3558 error = EBADF; 3559 goto out; 3560 } 3561 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3562 SCARG(uap, count), &done, l, 0, 0); 3563 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3564 *retval = done; 3565 out: 3566 fd_putfile(SCARG(uap, fd)); 3567 return (error); 3568 } 3569 3570 /* 3571 * Set the mode mask for creation of filesystem nodes. 3572 */ 3573 int 3574 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3575 { 3576 /* { 3577 syscallarg(mode_t) newmask; 3578 } */ 3579 struct proc *p = l->l_proc; 3580 struct cwdinfo *cwdi; 3581 3582 /* 3583 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3584 * important is that we serialize changes to the mask. The 3585 * rw_exit() will issue a write memory barrier on our behalf, 3586 * and force the changes out to other CPUs (as it must use an 3587 * atomic operation, draining the local CPU's store buffers). 3588 */ 3589 cwdi = p->p_cwdi; 3590 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3591 *retval = cwdi->cwdi_cmask; 3592 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3593 rw_exit(&cwdi->cwdi_lock); 3594 3595 return (0); 3596 } 3597 3598 int 3599 dorevoke(struct vnode *vp, kauth_cred_t cred) 3600 { 3601 struct vattr vattr; 3602 int error; 3603 3604 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3605 return error; 3606 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3607 (error = kauth_authorize_generic(cred, 3608 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3609 VOP_REVOKE(vp, REVOKEALL); 3610 return (error); 3611 } 3612 3613 /* 3614 * Void all references to file by ripping underlying filesystem 3615 * away from vnode. 3616 */ 3617 /* ARGSUSED */ 3618 int 3619 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3620 { 3621 /* { 3622 syscallarg(const char *) path; 3623 } */ 3624 struct vnode *vp; 3625 int error; 3626 struct nameidata nd; 3627 3628 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3629 SCARG(uap, path)); 3630 if ((error = namei(&nd)) != 0) 3631 return (error); 3632 vp = nd.ni_vp; 3633 error = dorevoke(vp, l->l_cred); 3634 vrele(vp); 3635 return (error); 3636 } 3637