1 /* $NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 63 */ 64 65 #include <sys/cdefs.h> 66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.369 2008/06/24 11:21:46 ad Exp $"); 67 68 #include "opt_compat_netbsd.h" 69 #include "opt_compat_43.h" 70 #include "opt_fileassoc.h" 71 #include "fss.h" 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/namei.h> 77 #include <sys/filedesc.h> 78 #include <sys/kernel.h> 79 #include <sys/file.h> 80 #include <sys/stat.h> 81 #include <sys/vnode.h> 82 #include <sys/mount.h> 83 #include <sys/proc.h> 84 #include <sys/uio.h> 85 #include <sys/malloc.h> 86 #include <sys/kmem.h> 87 #include <sys/dirent.h> 88 #include <sys/sysctl.h> 89 #include <sys/syscallargs.h> 90 #include <sys/vfs_syscalls.h> 91 #include <sys/ktrace.h> 92 #ifdef FILEASSOC 93 #include <sys/fileassoc.h> 94 #endif /* FILEASSOC */ 95 #include <sys/verified_exec.h> 96 #include <sys/kauth.h> 97 #include <sys/atomic.h> 98 #include <sys/module.h> 99 100 #include <miscfs/genfs/genfs.h> 101 #include <miscfs/syncfs/syncfs.h> 102 #include <miscfs/specfs/specdev.h> 103 104 #ifdef COMPAT_30 105 #include "opt_nfsserver.h" 106 #include <nfs/rpcv2.h> 107 #endif 108 #include <nfs/nfsproto.h> 109 #ifdef COMPAT_30 110 #include <nfs/nfs.h> 111 #include <nfs/nfs_var.h> 112 #endif 113 114 #if NFSS > 0 115 #include <dev/fssvar.h> 116 #endif 117 118 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 119 120 static int change_dir(struct nameidata *, struct lwp *); 121 static int change_flags(struct vnode *, u_long, struct lwp *); 122 static int change_mode(struct vnode *, int, struct lwp *l); 123 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 124 125 void checkdirs(struct vnode *); 126 127 int dovfsusermount = 0; 128 129 /* 130 * Virtual File System System Calls 131 */ 132 133 /* 134 * Mount a file system. 135 */ 136 137 #if defined(COMPAT_09) || defined(COMPAT_43) 138 /* 139 * This table is used to maintain compatibility with 4.3BSD 140 * and NetBSD 0.9 mount syscalls. Note, the order is important! 141 * 142 * Do not modify this table. It should only contain filesystems 143 * supported by NetBSD 0.9 and 4.3BSD. 144 */ 145 const char * const mountcompatnames[] = { 146 NULL, /* 0 = MOUNT_NONE */ 147 MOUNT_FFS, /* 1 = MOUNT_UFS */ 148 MOUNT_NFS, /* 2 */ 149 MOUNT_MFS, /* 3 */ 150 MOUNT_MSDOS, /* 4 */ 151 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 152 MOUNT_FDESC, /* 6 */ 153 MOUNT_KERNFS, /* 7 */ 154 NULL, /* 8 = MOUNT_DEVFS */ 155 MOUNT_AFS, /* 9 */ 156 }; 157 const int nmountcompatnames = sizeof(mountcompatnames) / 158 sizeof(mountcompatnames[0]); 159 #endif /* COMPAT_09 || COMPAT_43 */ 160 161 static int 162 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 163 void *data, size_t *data_len) 164 { 165 struct mount *mp; 166 int error = 0, saved_flags; 167 168 mp = vp->v_mount; 169 saved_flags = mp->mnt_flag; 170 171 /* We can operate only on VV_ROOT nodes. */ 172 if ((vp->v_vflag & VV_ROOT) == 0) { 173 error = EINVAL; 174 goto out; 175 } 176 177 /* 178 * We only allow the filesystem to be reloaded if it 179 * is currently mounted read-only. 180 */ 181 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 182 error = EOPNOTSUPP; /* Needs translation */ 183 goto out; 184 } 185 186 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 187 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 188 if (error) 189 goto out; 190 191 if (vfs_busy(mp, NULL)) { 192 error = EPERM; 193 goto out; 194 } 195 196 mutex_enter(&mp->mnt_updating); 197 198 mp->mnt_flag &= ~MNT_OP_FLAGS; 199 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 200 201 /* 202 * Set the mount level flags. 203 */ 204 if (flags & MNT_RDONLY) 205 mp->mnt_flag |= MNT_RDONLY; 206 else if (mp->mnt_flag & MNT_RDONLY) 207 mp->mnt_iflag |= IMNT_WANTRDWR; 208 mp->mnt_flag &= 209 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 210 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 211 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 212 mp->mnt_flag |= flags & 213 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 214 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 215 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 216 MNT_IGNORE); 217 218 error = VFS_MOUNT(mp, path, data, data_len); 219 220 #if defined(COMPAT_30) && defined(NFSSERVER) 221 if (error && data != NULL) { 222 int error2; 223 224 /* Update failed; let's try and see if it was an 225 * export request. */ 226 error2 = nfs_update_exports_30(mp, path, data, l); 227 228 /* Only update error code if the export request was 229 * understood but some problem occurred while 230 * processing it. */ 231 if (error2 != EJUSTRETURN) 232 error = error2; 233 } 234 #endif 235 if (mp->mnt_iflag & IMNT_WANTRDWR) 236 mp->mnt_flag &= ~MNT_RDONLY; 237 if (error) 238 mp->mnt_flag = saved_flags; 239 mp->mnt_flag &= ~MNT_OP_FLAGS; 240 mp->mnt_iflag &= ~IMNT_WANTRDWR; 241 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 242 if (mp->mnt_syncer == NULL) 243 error = vfs_allocate_syncvnode(mp); 244 } else { 245 if (mp->mnt_syncer != NULL) 246 vfs_deallocate_syncvnode(mp); 247 } 248 mutex_exit(&mp->mnt_updating); 249 vfs_unbusy(mp, false, NULL); 250 251 out: 252 return (error); 253 } 254 255 static int 256 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 257 { 258 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 259 int error; 260 261 /* Copy file-system type from userspace. */ 262 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 263 if (error) { 264 #if defined(COMPAT_09) || defined(COMPAT_43) 265 /* 266 * Historically, filesystem types were identified by numbers. 267 * If we get an integer for the filesystem type instead of a 268 * string, we check to see if it matches one of the historic 269 * filesystem types. 270 */ 271 u_long fsindex = (u_long)fstype; 272 if (fsindex >= nmountcompatnames || 273 mountcompatnames[fsindex] == NULL) 274 return ENODEV; 275 strlcpy(fstypename, mountcompatnames[fsindex], 276 sizeof(fstypename)); 277 #else 278 return error; 279 #endif 280 } 281 282 #ifdef COMPAT_10 283 /* Accept `ufs' as an alias for `ffs'. */ 284 if (strcmp(fstypename, "ufs") == 0) 285 fstypename[0] = 'f'; 286 #endif 287 288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 289 return 0; 290 291 /* If we can autoload a vfs module, try again */ 292 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true); 293 294 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 295 return 0; 296 297 return ENODEV; 298 } 299 300 static int 301 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 302 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 303 { 304 struct mount *mp; 305 struct vnode *vp = *vpp; 306 struct vattr va; 307 int error; 308 309 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 310 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 311 if (error) 312 return error; 313 314 /* Can't make a non-dir a mount-point (from here anyway). */ 315 if (vp->v_type != VDIR) 316 return ENOTDIR; 317 318 /* 319 * If the user is not root, ensure that they own the directory 320 * onto which we are attempting to mount. 321 */ 322 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 323 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 324 (error = kauth_authorize_generic(l->l_cred, 325 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 326 return error; 327 } 328 329 if (flags & MNT_EXPORTED) 330 return EINVAL; 331 332 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 333 return error; 334 335 /* 336 * Check if a file-system is not already mounted on this vnode. 337 */ 338 if (vp->v_mountedhere != NULL) 339 return EBUSY; 340 341 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 342 if (mp == NULL) 343 return ENOMEM; 344 345 mp->mnt_op = vfsops; 346 mp->mnt_refcnt = 1; 347 348 TAILQ_INIT(&mp->mnt_vnodelist); 349 rw_init(&mp->mnt_unmounting); 350 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 351 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 352 error = vfs_busy(mp, NULL); 353 KASSERT(error == 0); 354 mutex_enter(&mp->mnt_updating); 355 356 mp->mnt_vnodecovered = vp; 357 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 358 mount_initspecific(mp); 359 360 /* 361 * The underlying file system may refuse the mount for 362 * various reasons. Allow the user to force it to happen. 363 * 364 * Set the mount level flags. 365 */ 366 mp->mnt_flag = flags & 367 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 368 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 369 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 370 MNT_IGNORE | MNT_RDONLY); 371 372 error = VFS_MOUNT(mp, path, data, data_len); 373 mp->mnt_flag &= ~MNT_OP_FLAGS; 374 375 /* 376 * Put the new filesystem on the mount list after root. 377 */ 378 cache_purge(vp); 379 if (error != 0) { 380 vp->v_mountedhere = NULL; 381 mutex_exit(&mp->mnt_updating); 382 vfs_unbusy(mp, false, NULL); 383 vfs_destroy(mp); 384 return error; 385 } 386 387 mp->mnt_iflag &= ~IMNT_WANTRDWR; 388 mutex_enter(&mountlist_lock); 389 vp->v_mountedhere = mp; 390 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 391 mutex_exit(&mountlist_lock); 392 vn_restorerecurse(vp, recurse); 393 VOP_UNLOCK(vp, 0); 394 checkdirs(vp); 395 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 396 error = vfs_allocate_syncvnode(mp); 397 /* Hold an additional reference to the mount across VFS_START(). */ 398 mutex_exit(&mp->mnt_updating); 399 vfs_unbusy(mp, true, NULL); 400 (void) VFS_STATVFS(mp, &mp->mnt_stat); 401 error = VFS_START(mp, 0); 402 if (error) { 403 vrele(vp); 404 vfs_destroy(mp); 405 } 406 /* Drop reference held for VFS_START(). */ 407 vfs_destroy(mp); 408 *vpp = NULL; 409 return error; 410 } 411 412 static int 413 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 414 void *data, size_t *data_len) 415 { 416 struct mount *mp; 417 int error; 418 419 /* If MNT_GETARGS is specified, it should be the only flag. */ 420 if (flags & ~MNT_GETARGS) 421 return EINVAL; 422 423 mp = vp->v_mount; 424 425 /* XXX: probably some notion of "can see" here if we want isolation. */ 426 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 427 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 428 if (error) 429 return error; 430 431 if ((vp->v_vflag & VV_ROOT) == 0) 432 return EINVAL; 433 434 if (vfs_busy(mp, NULL)) 435 return EPERM; 436 437 mutex_enter(&mp->mnt_updating); 438 mp->mnt_flag &= ~MNT_OP_FLAGS; 439 mp->mnt_flag |= MNT_GETARGS; 440 error = VFS_MOUNT(mp, path, data, data_len); 441 mp->mnt_flag &= ~MNT_OP_FLAGS; 442 mutex_exit(&mp->mnt_updating); 443 444 vfs_unbusy(mp, false, NULL); 445 return (error); 446 } 447 448 #ifdef COMPAT_40 449 /* ARGSUSED */ 450 int 451 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval) 452 { 453 /* { 454 syscallarg(const char *) type; 455 syscallarg(const char *) path; 456 syscallarg(int) flags; 457 syscallarg(void *) data; 458 } */ 459 register_t dummy; 460 461 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 462 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 463 } 464 #endif 465 466 int 467 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 468 { 469 /* { 470 syscallarg(const char *) type; 471 syscallarg(const char *) path; 472 syscallarg(int) flags; 473 syscallarg(void *) data; 474 syscallarg(size_t) data_len; 475 } */ 476 477 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 478 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 479 SCARG(uap, data_len), retval); 480 } 481 482 int 483 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 484 const char *path, int flags, void *data, enum uio_seg data_seg, 485 size_t data_len, register_t *retval) 486 { 487 struct vnode *vp; 488 struct nameidata nd; 489 void *data_buf = data; 490 u_int recurse; 491 int error; 492 493 /* 494 * Get vnode to be covered 495 */ 496 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 497 if ((error = namei(&nd)) != 0) 498 return (error); 499 vp = nd.ni_vp; 500 501 /* 502 * A lookup in VFS_MOUNT might result in an attempt to 503 * lock this vnode again, so make the lock recursive. 504 */ 505 if (vfsops == NULL) { 506 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 507 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 508 recurse = vn_setrecurse(vp); 509 vfsops = vp->v_mount->mnt_op; 510 } else { 511 /* 'type' is userspace */ 512 error = mount_get_vfsops(type, &vfsops); 513 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 514 recurse = vn_setrecurse(vp); 515 if (error != 0) 516 goto done; 517 } 518 } else { 519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 520 recurse = vn_setrecurse(vp); 521 } 522 523 if (data != NULL && data_seg == UIO_USERSPACE) { 524 if (data_len == 0) { 525 /* No length supplied, use default for filesystem */ 526 data_len = vfsops->vfs_min_mount_data; 527 if (data_len > VFS_MAX_MOUNT_DATA) { 528 /* maybe a force loaded old LKM */ 529 error = EINVAL; 530 goto done; 531 } 532 #ifdef COMPAT_30 533 /* Hopefully a longer buffer won't make copyin() fail */ 534 if (flags & MNT_UPDATE 535 && data_len < sizeof (struct mnt_export_args30)) 536 data_len = sizeof (struct mnt_export_args30); 537 #endif 538 } 539 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 540 541 /* NFS needs the buffer even for mnt_getargs .... */ 542 error = copyin(data, data_buf, data_len); 543 if (error != 0) 544 goto done; 545 } 546 547 if (flags & MNT_GETARGS) { 548 if (data_len == 0) { 549 error = EINVAL; 550 goto done; 551 } 552 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 553 if (error != 0) 554 goto done; 555 if (data_seg == UIO_USERSPACE) 556 error = copyout(data_buf, data, data_len); 557 *retval = data_len; 558 } else if (flags & MNT_UPDATE) { 559 error = mount_update(l, vp, path, flags, data_buf, &data_len); 560 } else { 561 /* Locking is handled internally in mount_domount(). */ 562 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 563 &data_len, recurse); 564 } 565 566 done: 567 if (vp != NULL) { 568 vn_restorerecurse(vp, recurse); 569 vput(vp); 570 } 571 if (data_buf != data) 572 free(data_buf, M_TEMP); 573 return (error); 574 } 575 576 /* 577 * Scan all active processes to see if any of them have a current 578 * or root directory onto which the new filesystem has just been 579 * mounted. If so, replace them with the new mount point. 580 */ 581 void 582 checkdirs(struct vnode *olddp) 583 { 584 struct cwdinfo *cwdi; 585 struct vnode *newdp, *rele1, *rele2; 586 struct proc *p; 587 bool retry; 588 589 if (olddp->v_usecount == 1) 590 return; 591 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 592 panic("mount: lost mount"); 593 594 do { 595 retry = false; 596 mutex_enter(proc_lock); 597 PROCLIST_FOREACH(p, &allproc) { 598 if ((p->p_flag & PK_MARKER) != 0) 599 continue; 600 if ((cwdi = p->p_cwdi) == NULL) 601 continue; 602 /* 603 * Can't change to the old directory any more, 604 * so even if we see a stale value it's not a 605 * problem. 606 */ 607 if (cwdi->cwdi_cdir != olddp && 608 cwdi->cwdi_rdir != olddp) 609 continue; 610 retry = true; 611 rele1 = NULL; 612 rele2 = NULL; 613 atomic_inc_uint(&cwdi->cwdi_refcnt); 614 mutex_exit(proc_lock); 615 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 616 if (cwdi->cwdi_cdir == olddp) { 617 rele1 = cwdi->cwdi_cdir; 618 VREF(newdp); 619 cwdi->cwdi_cdir = newdp; 620 } 621 if (cwdi->cwdi_rdir == olddp) { 622 rele2 = cwdi->cwdi_rdir; 623 VREF(newdp); 624 cwdi->cwdi_rdir = newdp; 625 } 626 rw_exit(&cwdi->cwdi_lock); 627 cwdfree(cwdi); 628 if (rele1 != NULL) 629 vrele(rele1); 630 if (rele2 != NULL) 631 vrele(rele2); 632 mutex_enter(proc_lock); 633 break; 634 } 635 mutex_exit(proc_lock); 636 } while (retry); 637 638 if (rootvnode == olddp) { 639 vrele(rootvnode); 640 VREF(newdp); 641 rootvnode = newdp; 642 } 643 vput(newdp); 644 } 645 646 /* 647 * Unmount a file system. 648 * 649 * Note: unmount takes a path to the vnode mounted on as argument, 650 * not special file (as before). 651 */ 652 /* ARGSUSED */ 653 int 654 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 655 { 656 /* { 657 syscallarg(const char *) path; 658 syscallarg(int) flags; 659 } */ 660 struct vnode *vp; 661 struct mount *mp; 662 int error; 663 struct nameidata nd; 664 665 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 666 SCARG(uap, path)); 667 if ((error = namei(&nd)) != 0) 668 return (error); 669 vp = nd.ni_vp; 670 mp = vp->v_mount; 671 atomic_inc_uint(&mp->mnt_refcnt); 672 VOP_UNLOCK(vp, 0); 673 674 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 675 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 676 if (error) { 677 vrele(vp); 678 vfs_destroy(mp); 679 return (error); 680 } 681 682 /* 683 * Don't allow unmounting the root file system. 684 */ 685 if (mp->mnt_flag & MNT_ROOTFS) { 686 vrele(vp); 687 vfs_destroy(mp); 688 return (EINVAL); 689 } 690 691 /* 692 * Must be the root of the filesystem 693 */ 694 if ((vp->v_vflag & VV_ROOT) == 0) { 695 vrele(vp); 696 vfs_destroy(mp); 697 return (EINVAL); 698 } 699 700 vrele(vp); 701 error = dounmount(mp, SCARG(uap, flags), l); 702 return error; 703 } 704 705 /* 706 * Do the actual file system unmount. File system is assumed to have 707 * been locked by the caller. 708 * 709 * => Caller gain reference to the mount, explicility for unmount. 710 * => Reference will be dropped in all cases. 711 */ 712 int 713 dounmount(struct mount *mp, int flags, struct lwp *l) 714 { 715 struct vnode *coveredvp; 716 int error; 717 int async; 718 int used_syncer; 719 720 #if NVERIEXEC > 0 721 error = veriexec_unmountchk(mp); 722 if (error) 723 return (error); 724 #endif /* NVERIEXEC > 0 */ 725 726 /* 727 * XXX Freeze syncer. Must do this before locking the 728 * mount point. See dounmount() for details. 729 */ 730 mutex_enter(&syncer_mutex); 731 rw_enter(&mp->mnt_unmounting, RW_WRITER); 732 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 733 rw_exit(&mp->mnt_unmounting); 734 mutex_exit(&syncer_mutex); 735 vfs_destroy(mp); 736 return ENOENT; 737 } 738 739 used_syncer = (mp->mnt_syncer != NULL); 740 741 /* 742 * XXX Syncer must be frozen when we get here. This should really 743 * be done on a per-mountpoint basis, but especially the softdep 744 * code possibly called from the syncer doesn't exactly work on a 745 * per-mountpoint basis, so the softdep code would become a maze 746 * of vfs_busy() calls. 747 * 748 * The caller of dounmount() must acquire syncer_mutex because 749 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 750 * order, and we must preserve that order to avoid deadlock. 751 * 752 * So, if the file system did not use the syncer, now is 753 * the time to release the syncer_mutex. 754 */ 755 if (used_syncer == 0) 756 mutex_exit(&syncer_mutex); 757 758 mp->mnt_iflag |= IMNT_UNMOUNT; 759 async = mp->mnt_flag & MNT_ASYNC; 760 mp->mnt_flag &= ~MNT_ASYNC; 761 cache_purgevfs(mp); /* remove cache entries for this file sys */ 762 if (mp->mnt_syncer != NULL) 763 vfs_deallocate_syncvnode(mp); 764 error = 0; 765 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 766 #if NFSS > 0 767 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 768 #endif 769 if (error == 0) 770 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 771 } 772 vfs_scrubvnlist(mp); 773 if (error == 0 || (flags & MNT_FORCE)) 774 error = VFS_UNMOUNT(mp, flags); 775 if (error) { 776 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 777 (void) vfs_allocate_syncvnode(mp); 778 mp->mnt_iflag &= ~IMNT_UNMOUNT; 779 mp->mnt_flag |= async; 780 rw_exit(&mp->mnt_unmounting); 781 if (used_syncer) 782 mutex_exit(&syncer_mutex); 783 return (error); 784 } 785 vfs_scrubvnlist(mp); 786 mutex_enter(&mountlist_lock); 787 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 788 coveredvp->v_mountedhere = NULL; 789 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 790 mp->mnt_iflag |= IMNT_GONE; 791 mutex_exit(&mountlist_lock); 792 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 793 panic("unmount: dangling vnode"); 794 if (used_syncer) 795 mutex_exit(&syncer_mutex); 796 vfs_hooks_unmount(mp); 797 rw_exit(&mp->mnt_unmounting); 798 vfs_destroy(mp); /* caller provided reference */ 799 vfs_destroy(mp); /* from mount(), final nail in coffin */ 800 if (coveredvp != NULLVP) 801 vrele(coveredvp); 802 return (0); 803 } 804 805 /* 806 * Sync each mounted filesystem. 807 */ 808 #ifdef DEBUG 809 int syncprt = 0; 810 struct ctldebug debug0 = { "syncprt", &syncprt }; 811 #endif 812 813 /* ARGSUSED */ 814 int 815 sys_sync(struct lwp *l, const void *v, register_t *retval) 816 { 817 struct mount *mp, *nmp; 818 int asyncflag; 819 820 if (l == NULL) 821 l = &lwp0; 822 823 mutex_enter(&mountlist_lock); 824 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 825 mp = nmp) { 826 if (vfs_busy(mp, &nmp)) { 827 continue; 828 } 829 mutex_enter(&mp->mnt_updating); 830 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 831 asyncflag = mp->mnt_flag & MNT_ASYNC; 832 mp->mnt_flag &= ~MNT_ASYNC; 833 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 834 if (asyncflag) 835 mp->mnt_flag |= MNT_ASYNC; 836 } 837 mutex_exit(&mp->mnt_updating); 838 vfs_unbusy(mp, false, &nmp); 839 } 840 mutex_exit(&mountlist_lock); 841 #ifdef DEBUG 842 if (syncprt) 843 vfs_bufstats(); 844 #endif /* DEBUG */ 845 return (0); 846 } 847 848 /* 849 * Change filesystem quotas. 850 */ 851 /* ARGSUSED */ 852 int 853 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 854 { 855 /* { 856 syscallarg(const char *) path; 857 syscallarg(int) cmd; 858 syscallarg(int) uid; 859 syscallarg(void *) arg; 860 } */ 861 struct mount *mp; 862 int error; 863 struct nameidata nd; 864 865 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 866 SCARG(uap, path)); 867 if ((error = namei(&nd)) != 0) 868 return (error); 869 mp = nd.ni_vp->v_mount; 870 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 871 SCARG(uap, arg)); 872 vrele(nd.ni_vp); 873 return (error); 874 } 875 876 int 877 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 878 int root) 879 { 880 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 881 int error = 0; 882 883 /* 884 * If MNT_NOWAIT or MNT_LAZY is specified, do not 885 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 886 * overrides MNT_NOWAIT. 887 */ 888 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 889 (flags != MNT_WAIT && flags != 0)) { 890 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 891 goto done; 892 } 893 894 /* Get the filesystem stats now */ 895 memset(sp, 0, sizeof(*sp)); 896 if ((error = VFS_STATVFS(mp, sp)) != 0) { 897 return error; 898 } 899 900 if (cwdi->cwdi_rdir == NULL) 901 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 902 done: 903 if (cwdi->cwdi_rdir != NULL) { 904 size_t len; 905 char *bp; 906 char c; 907 char *path = PNBUF_GET(); 908 909 bp = path + MAXPATHLEN; 910 *--bp = '\0'; 911 rw_enter(&cwdi->cwdi_lock, RW_READER); 912 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 913 MAXPATHLEN / 2, 0, l); 914 rw_exit(&cwdi->cwdi_lock); 915 if (error) { 916 PNBUF_PUT(path); 917 return error; 918 } 919 len = strlen(bp); 920 /* 921 * for mount points that are below our root, we can see 922 * them, so we fix up the pathname and return them. The 923 * rest we cannot see, so we don't allow viewing the 924 * data. 925 */ 926 if (strncmp(bp, sp->f_mntonname, len) == 0 && 927 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 928 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 929 sizeof(sp->f_mntonname)); 930 if (sp->f_mntonname[0] == '\0') 931 (void)strlcpy(sp->f_mntonname, "/", 932 sizeof(sp->f_mntonname)); 933 } else { 934 if (root) 935 (void)strlcpy(sp->f_mntonname, "/", 936 sizeof(sp->f_mntonname)); 937 else 938 error = EPERM; 939 } 940 PNBUF_PUT(path); 941 } 942 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 943 return error; 944 } 945 946 /* 947 * Get filesystem statistics by path. 948 */ 949 int 950 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 951 { 952 struct mount *mp; 953 int error; 954 struct nameidata nd; 955 956 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 957 if ((error = namei(&nd)) != 0) 958 return error; 959 mp = nd.ni_vp->v_mount; 960 error = dostatvfs(mp, sb, l, flags, 1); 961 vrele(nd.ni_vp); 962 return error; 963 } 964 965 /* ARGSUSED */ 966 int 967 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 968 { 969 /* { 970 syscallarg(const char *) path; 971 syscallarg(struct statvfs *) buf; 972 syscallarg(int) flags; 973 } */ 974 struct statvfs *sb; 975 int error; 976 977 sb = STATVFSBUF_GET(); 978 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 979 if (error == 0) 980 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 981 STATVFSBUF_PUT(sb); 982 return error; 983 } 984 985 /* 986 * Get filesystem statistics by fd. 987 */ 988 int 989 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 990 { 991 file_t *fp; 992 struct mount *mp; 993 int error; 994 995 /* fd_getvnode() will use the descriptor for us */ 996 if ((error = fd_getvnode(fd, &fp)) != 0) 997 return (error); 998 mp = ((struct vnode *)fp->f_data)->v_mount; 999 error = dostatvfs(mp, sb, curlwp, flags, 1); 1000 fd_putfile(fd); 1001 return error; 1002 } 1003 1004 /* ARGSUSED */ 1005 int 1006 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1007 { 1008 /* { 1009 syscallarg(int) fd; 1010 syscallarg(struct statvfs *) buf; 1011 syscallarg(int) flags; 1012 } */ 1013 struct statvfs *sb; 1014 int error; 1015 1016 sb = STATVFSBUF_GET(); 1017 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1018 if (error == 0) 1019 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1020 STATVFSBUF_PUT(sb); 1021 return error; 1022 } 1023 1024 1025 /* 1026 * Get statistics on all filesystems. 1027 */ 1028 int 1029 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1030 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1031 register_t *retval) 1032 { 1033 int root = 0; 1034 struct proc *p = l->l_proc; 1035 struct mount *mp, *nmp; 1036 struct statvfs *sb; 1037 size_t count, maxcount; 1038 int error = 0; 1039 1040 sb = STATVFSBUF_GET(); 1041 maxcount = bufsize / entry_sz; 1042 mutex_enter(&mountlist_lock); 1043 count = 0; 1044 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1045 mp = nmp) { 1046 if (vfs_busy(mp, &nmp)) { 1047 continue; 1048 } 1049 if (sfsp && count < maxcount) { 1050 error = dostatvfs(mp, sb, l, flags, 0); 1051 if (error) { 1052 vfs_unbusy(mp, false, &nmp); 1053 error = 0; 1054 continue; 1055 } 1056 error = copyfn(sb, sfsp, entry_sz); 1057 if (error) { 1058 vfs_unbusy(mp, false, NULL); 1059 goto out; 1060 } 1061 sfsp = (char *)sfsp + entry_sz; 1062 root |= strcmp(sb->f_mntonname, "/") == 0; 1063 } 1064 count++; 1065 vfs_unbusy(mp, false, &nmp); 1066 } 1067 mutex_exit(&mountlist_lock); 1068 1069 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1070 /* 1071 * fake a root entry 1072 */ 1073 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1074 sb, l, flags, 1); 1075 if (error != 0) 1076 goto out; 1077 if (sfsp) { 1078 error = copyfn(sb, sfsp, entry_sz); 1079 if (error != 0) 1080 goto out; 1081 } 1082 count++; 1083 } 1084 if (sfsp && count > maxcount) 1085 *retval = maxcount; 1086 else 1087 *retval = count; 1088 out: 1089 STATVFSBUF_PUT(sb); 1090 return error; 1091 } 1092 1093 int 1094 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1095 { 1096 /* { 1097 syscallarg(struct statvfs *) buf; 1098 syscallarg(size_t) bufsize; 1099 syscallarg(int) flags; 1100 } */ 1101 1102 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1103 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1104 } 1105 1106 /* 1107 * Change current working directory to a given file descriptor. 1108 */ 1109 /* ARGSUSED */ 1110 int 1111 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1112 { 1113 /* { 1114 syscallarg(int) fd; 1115 } */ 1116 struct proc *p = l->l_proc; 1117 struct cwdinfo *cwdi; 1118 struct vnode *vp, *tdp; 1119 struct mount *mp; 1120 file_t *fp; 1121 int error, fd; 1122 1123 /* fd_getvnode() will use the descriptor for us */ 1124 fd = SCARG(uap, fd); 1125 if ((error = fd_getvnode(fd, &fp)) != 0) 1126 return (error); 1127 vp = fp->f_data; 1128 1129 VREF(vp); 1130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1131 if (vp->v_type != VDIR) 1132 error = ENOTDIR; 1133 else 1134 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1135 if (error) { 1136 vput(vp); 1137 goto out; 1138 } 1139 while ((mp = vp->v_mountedhere) != NULL) { 1140 error = vfs_busy(mp, NULL); 1141 vput(vp); 1142 if (error != 0) 1143 goto out; 1144 error = VFS_ROOT(mp, &tdp); 1145 vfs_unbusy(mp, false, NULL); 1146 if (error) 1147 goto out; 1148 vp = tdp; 1149 } 1150 VOP_UNLOCK(vp, 0); 1151 1152 /* 1153 * Disallow changing to a directory not under the process's 1154 * current root directory (if there is one). 1155 */ 1156 cwdi = p->p_cwdi; 1157 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1158 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1159 vrele(vp); 1160 error = EPERM; /* operation not permitted */ 1161 } else { 1162 vrele(cwdi->cwdi_cdir); 1163 cwdi->cwdi_cdir = vp; 1164 } 1165 rw_exit(&cwdi->cwdi_lock); 1166 1167 out: 1168 fd_putfile(fd); 1169 return (error); 1170 } 1171 1172 /* 1173 * Change this process's notion of the root directory to a given file 1174 * descriptor. 1175 */ 1176 int 1177 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1178 { 1179 struct proc *p = l->l_proc; 1180 struct cwdinfo *cwdi; 1181 struct vnode *vp; 1182 file_t *fp; 1183 int error, fd = SCARG(uap, fd); 1184 1185 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1186 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1187 return error; 1188 /* fd_getvnode() will use the descriptor for us */ 1189 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1190 return error; 1191 vp = fp->f_data; 1192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1193 if (vp->v_type != VDIR) 1194 error = ENOTDIR; 1195 else 1196 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1197 VOP_UNLOCK(vp, 0); 1198 if (error) 1199 goto out; 1200 VREF(vp); 1201 1202 /* 1203 * Prevent escaping from chroot by putting the root under 1204 * the working directory. Silently chdir to / if we aren't 1205 * already there. 1206 */ 1207 cwdi = p->p_cwdi; 1208 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1209 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1210 /* 1211 * XXX would be more failsafe to change directory to a 1212 * deadfs node here instead 1213 */ 1214 vrele(cwdi->cwdi_cdir); 1215 VREF(vp); 1216 cwdi->cwdi_cdir = vp; 1217 } 1218 1219 if (cwdi->cwdi_rdir != NULL) 1220 vrele(cwdi->cwdi_rdir); 1221 cwdi->cwdi_rdir = vp; 1222 rw_exit(&cwdi->cwdi_lock); 1223 1224 out: 1225 fd_putfile(fd); 1226 return (error); 1227 } 1228 1229 /* 1230 * Change current working directory (``.''). 1231 */ 1232 /* ARGSUSED */ 1233 int 1234 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1235 { 1236 /* { 1237 syscallarg(const char *) path; 1238 } */ 1239 struct proc *p = l->l_proc; 1240 struct cwdinfo *cwdi; 1241 int error; 1242 struct nameidata nd; 1243 1244 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1245 SCARG(uap, path)); 1246 if ((error = change_dir(&nd, l)) != 0) 1247 return (error); 1248 cwdi = p->p_cwdi; 1249 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1250 vrele(cwdi->cwdi_cdir); 1251 cwdi->cwdi_cdir = nd.ni_vp; 1252 rw_exit(&cwdi->cwdi_lock); 1253 return (0); 1254 } 1255 1256 /* 1257 * Change notion of root (``/'') directory. 1258 */ 1259 /* ARGSUSED */ 1260 int 1261 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1262 { 1263 /* { 1264 syscallarg(const char *) path; 1265 } */ 1266 struct proc *p = l->l_proc; 1267 struct cwdinfo *cwdi; 1268 struct vnode *vp; 1269 int error; 1270 struct nameidata nd; 1271 1272 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1273 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1274 return (error); 1275 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1276 SCARG(uap, path)); 1277 if ((error = change_dir(&nd, l)) != 0) 1278 return (error); 1279 1280 cwdi = p->p_cwdi; 1281 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1282 if (cwdi->cwdi_rdir != NULL) 1283 vrele(cwdi->cwdi_rdir); 1284 vp = nd.ni_vp; 1285 cwdi->cwdi_rdir = vp; 1286 1287 /* 1288 * Prevent escaping from chroot by putting the root under 1289 * the working directory. Silently chdir to / if we aren't 1290 * already there. 1291 */ 1292 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1293 /* 1294 * XXX would be more failsafe to change directory to a 1295 * deadfs node here instead 1296 */ 1297 vrele(cwdi->cwdi_cdir); 1298 VREF(vp); 1299 cwdi->cwdi_cdir = vp; 1300 } 1301 rw_exit(&cwdi->cwdi_lock); 1302 1303 return (0); 1304 } 1305 1306 /* 1307 * Common routine for chroot and chdir. 1308 */ 1309 static int 1310 change_dir(struct nameidata *ndp, struct lwp *l) 1311 { 1312 struct vnode *vp; 1313 int error; 1314 1315 if ((error = namei(ndp)) != 0) 1316 return (error); 1317 vp = ndp->ni_vp; 1318 if (vp->v_type != VDIR) 1319 error = ENOTDIR; 1320 else 1321 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1322 1323 if (error) 1324 vput(vp); 1325 else 1326 VOP_UNLOCK(vp, 0); 1327 return (error); 1328 } 1329 1330 /* 1331 * Check permissions, allocate an open file structure, 1332 * and call the device open routine if any. 1333 */ 1334 int 1335 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1336 { 1337 /* { 1338 syscallarg(const char *) path; 1339 syscallarg(int) flags; 1340 syscallarg(int) mode; 1341 } */ 1342 struct proc *p = l->l_proc; 1343 struct cwdinfo *cwdi = p->p_cwdi; 1344 file_t *fp; 1345 struct vnode *vp; 1346 int flags, cmode; 1347 int type, indx, error; 1348 struct flock lf; 1349 struct nameidata nd; 1350 1351 flags = FFLAGS(SCARG(uap, flags)); 1352 if ((flags & (FREAD | FWRITE)) == 0) 1353 return (EINVAL); 1354 if ((error = fd_allocfile(&fp, &indx)) != 0) 1355 return (error); 1356 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1357 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1358 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1359 SCARG(uap, path)); 1360 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1361 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1362 fd_abort(p, fp, indx); 1363 if ((error == EDUPFD || error == EMOVEFD) && 1364 l->l_dupfd >= 0 && /* XXX from fdopen */ 1365 (error = 1366 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1367 *retval = indx; 1368 return (0); 1369 } 1370 if (error == ERESTART) 1371 error = EINTR; 1372 return (error); 1373 } 1374 1375 l->l_dupfd = 0; 1376 vp = nd.ni_vp; 1377 fp->f_flag = flags & FMASK; 1378 fp->f_type = DTYPE_VNODE; 1379 fp->f_ops = &vnops; 1380 fp->f_data = vp; 1381 if (flags & (O_EXLOCK | O_SHLOCK)) { 1382 lf.l_whence = SEEK_SET; 1383 lf.l_start = 0; 1384 lf.l_len = 0; 1385 if (flags & O_EXLOCK) 1386 lf.l_type = F_WRLCK; 1387 else 1388 lf.l_type = F_RDLCK; 1389 type = F_FLOCK; 1390 if ((flags & FNONBLOCK) == 0) 1391 type |= F_WAIT; 1392 VOP_UNLOCK(vp, 0); 1393 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1394 if (error) { 1395 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1396 fd_abort(p, fp, indx); 1397 return (error); 1398 } 1399 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1400 atomic_or_uint(&fp->f_flag, FHASLOCK); 1401 } 1402 VOP_UNLOCK(vp, 0); 1403 *retval = indx; 1404 fd_affix(p, fp, indx); 1405 return (0); 1406 } 1407 1408 static void 1409 vfs__fhfree(fhandle_t *fhp) 1410 { 1411 size_t fhsize; 1412 1413 if (fhp == NULL) { 1414 return; 1415 } 1416 fhsize = FHANDLE_SIZE(fhp); 1417 kmem_free(fhp, fhsize); 1418 } 1419 1420 /* 1421 * vfs_composefh: compose a filehandle. 1422 */ 1423 1424 int 1425 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1426 { 1427 struct mount *mp; 1428 struct fid *fidp; 1429 int error; 1430 size_t needfhsize; 1431 size_t fidsize; 1432 1433 mp = vp->v_mount; 1434 fidp = NULL; 1435 if (*fh_size < FHANDLE_SIZE_MIN) { 1436 fidsize = 0; 1437 } else { 1438 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1439 if (fhp != NULL) { 1440 memset(fhp, 0, *fh_size); 1441 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1442 fidp = &fhp->fh_fid; 1443 } 1444 } 1445 error = VFS_VPTOFH(vp, fidp, &fidsize); 1446 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1447 if (error == 0 && *fh_size < needfhsize) { 1448 error = E2BIG; 1449 } 1450 *fh_size = needfhsize; 1451 return error; 1452 } 1453 1454 int 1455 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1456 { 1457 struct mount *mp; 1458 fhandle_t *fhp; 1459 size_t fhsize; 1460 size_t fidsize; 1461 int error; 1462 1463 *fhpp = NULL; 1464 mp = vp->v_mount; 1465 fidsize = 0; 1466 error = VFS_VPTOFH(vp, NULL, &fidsize); 1467 KASSERT(error != 0); 1468 if (error != E2BIG) { 1469 goto out; 1470 } 1471 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1472 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1473 if (fhp == NULL) { 1474 error = ENOMEM; 1475 goto out; 1476 } 1477 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1478 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1479 if (error == 0) { 1480 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1481 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1482 *fhpp = fhp; 1483 } else { 1484 kmem_free(fhp, fhsize); 1485 } 1486 out: 1487 return error; 1488 } 1489 1490 void 1491 vfs_composefh_free(fhandle_t *fhp) 1492 { 1493 1494 vfs__fhfree(fhp); 1495 } 1496 1497 /* 1498 * vfs_fhtovp: lookup a vnode by a filehandle. 1499 */ 1500 1501 int 1502 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1503 { 1504 struct mount *mp; 1505 int error; 1506 1507 *vpp = NULL; 1508 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1509 if (mp == NULL) { 1510 error = ESTALE; 1511 goto out; 1512 } 1513 if (mp->mnt_op->vfs_fhtovp == NULL) { 1514 error = EOPNOTSUPP; 1515 goto out; 1516 } 1517 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1518 out: 1519 return error; 1520 } 1521 1522 /* 1523 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1524 * the needed size. 1525 */ 1526 1527 int 1528 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1529 { 1530 fhandle_t *fhp; 1531 int error; 1532 1533 *fhpp = NULL; 1534 if (fhsize > FHANDLE_SIZE_MAX) { 1535 return EINVAL; 1536 } 1537 if (fhsize < FHANDLE_SIZE_MIN) { 1538 return EINVAL; 1539 } 1540 again: 1541 fhp = kmem_alloc(fhsize, KM_SLEEP); 1542 if (fhp == NULL) { 1543 return ENOMEM; 1544 } 1545 error = copyin(ufhp, fhp, fhsize); 1546 if (error == 0) { 1547 /* XXX this check shouldn't be here */ 1548 if (FHANDLE_SIZE(fhp) == fhsize) { 1549 *fhpp = fhp; 1550 return 0; 1551 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1552 /* 1553 * a kludge for nfsv2 padded handles. 1554 */ 1555 size_t sz; 1556 1557 sz = FHANDLE_SIZE(fhp); 1558 kmem_free(fhp, fhsize); 1559 fhsize = sz; 1560 goto again; 1561 } else { 1562 /* 1563 * userland told us wrong size. 1564 */ 1565 error = EINVAL; 1566 } 1567 } 1568 kmem_free(fhp, fhsize); 1569 return error; 1570 } 1571 1572 void 1573 vfs_copyinfh_free(fhandle_t *fhp) 1574 { 1575 1576 vfs__fhfree(fhp); 1577 } 1578 1579 /* 1580 * Get file handle system call 1581 */ 1582 int 1583 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1584 { 1585 /* { 1586 syscallarg(char *) fname; 1587 syscallarg(fhandle_t *) fhp; 1588 syscallarg(size_t *) fh_size; 1589 } */ 1590 struct vnode *vp; 1591 fhandle_t *fh; 1592 int error; 1593 struct nameidata nd; 1594 size_t sz; 1595 size_t usz; 1596 1597 /* 1598 * Must be super user 1599 */ 1600 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1601 0, NULL, NULL, NULL); 1602 if (error) 1603 return (error); 1604 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1605 SCARG(uap, fname)); 1606 error = namei(&nd); 1607 if (error) 1608 return (error); 1609 vp = nd.ni_vp; 1610 error = vfs_composefh_alloc(vp, &fh); 1611 vput(vp); 1612 if (error != 0) { 1613 goto out; 1614 } 1615 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1616 if (error != 0) { 1617 goto out; 1618 } 1619 sz = FHANDLE_SIZE(fh); 1620 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1621 if (error != 0) { 1622 goto out; 1623 } 1624 if (usz >= sz) { 1625 error = copyout(fh, SCARG(uap, fhp), sz); 1626 } else { 1627 error = E2BIG; 1628 } 1629 out: 1630 vfs_composefh_free(fh); 1631 return (error); 1632 } 1633 1634 /* 1635 * Open a file given a file handle. 1636 * 1637 * Check permissions, allocate an open file structure, 1638 * and call the device open routine if any. 1639 */ 1640 1641 int 1642 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1643 register_t *retval) 1644 { 1645 file_t *fp; 1646 struct vnode *vp = NULL; 1647 kauth_cred_t cred = l->l_cred; 1648 file_t *nfp; 1649 int type, indx, error=0; 1650 struct flock lf; 1651 struct vattr va; 1652 fhandle_t *fh; 1653 int flags; 1654 proc_t *p; 1655 1656 p = curproc; 1657 1658 /* 1659 * Must be super user 1660 */ 1661 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1662 0, NULL, NULL, NULL))) 1663 return (error); 1664 1665 flags = FFLAGS(oflags); 1666 if ((flags & (FREAD | FWRITE)) == 0) 1667 return (EINVAL); 1668 if ((flags & O_CREAT)) 1669 return (EINVAL); 1670 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1671 return (error); 1672 fp = nfp; 1673 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1674 if (error != 0) { 1675 goto bad; 1676 } 1677 error = vfs_fhtovp(fh, &vp); 1678 if (error != 0) { 1679 goto bad; 1680 } 1681 1682 /* Now do an effective vn_open */ 1683 1684 if (vp->v_type == VSOCK) { 1685 error = EOPNOTSUPP; 1686 goto bad; 1687 } 1688 error = vn_openchk(vp, cred, flags); 1689 if (error != 0) 1690 goto bad; 1691 if (flags & O_TRUNC) { 1692 VOP_UNLOCK(vp, 0); /* XXX */ 1693 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1694 VATTR_NULL(&va); 1695 va.va_size = 0; 1696 error = VOP_SETATTR(vp, &va, cred); 1697 if (error) 1698 goto bad; 1699 } 1700 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1701 goto bad; 1702 if (flags & FWRITE) { 1703 mutex_enter(&vp->v_interlock); 1704 vp->v_writecount++; 1705 mutex_exit(&vp->v_interlock); 1706 } 1707 1708 /* done with modified vn_open, now finish what sys_open does. */ 1709 1710 fp->f_flag = flags & FMASK; 1711 fp->f_type = DTYPE_VNODE; 1712 fp->f_ops = &vnops; 1713 fp->f_data = vp; 1714 if (flags & (O_EXLOCK | O_SHLOCK)) { 1715 lf.l_whence = SEEK_SET; 1716 lf.l_start = 0; 1717 lf.l_len = 0; 1718 if (flags & O_EXLOCK) 1719 lf.l_type = F_WRLCK; 1720 else 1721 lf.l_type = F_RDLCK; 1722 type = F_FLOCK; 1723 if ((flags & FNONBLOCK) == 0) 1724 type |= F_WAIT; 1725 VOP_UNLOCK(vp, 0); 1726 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1727 if (error) { 1728 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1729 fd_abort(p, fp, indx); 1730 return (error); 1731 } 1732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1733 atomic_or_uint(&fp->f_flag, FHASLOCK); 1734 } 1735 VOP_UNLOCK(vp, 0); 1736 *retval = indx; 1737 fd_affix(p, fp, indx); 1738 vfs_copyinfh_free(fh); 1739 return (0); 1740 1741 bad: 1742 fd_abort(p, fp, indx); 1743 if (vp != NULL) 1744 vput(vp); 1745 vfs_copyinfh_free(fh); 1746 return (error); 1747 } 1748 1749 int 1750 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1751 { 1752 /* { 1753 syscallarg(const void *) fhp; 1754 syscallarg(size_t) fh_size; 1755 syscallarg(int) flags; 1756 } */ 1757 1758 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1759 SCARG(uap, flags), retval); 1760 } 1761 1762 int 1763 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1764 { 1765 int error; 1766 fhandle_t *fh; 1767 struct vnode *vp; 1768 1769 /* 1770 * Must be super user 1771 */ 1772 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1773 0, NULL, NULL, NULL))) 1774 return (error); 1775 1776 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1777 if (error != 0) 1778 return error; 1779 1780 error = vfs_fhtovp(fh, &vp); 1781 vfs_copyinfh_free(fh); 1782 if (error != 0) 1783 return error; 1784 1785 error = vn_stat(vp, sb); 1786 vput(vp); 1787 return error; 1788 } 1789 1790 1791 /* ARGSUSED */ 1792 int 1793 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval) 1794 { 1795 /* { 1796 syscallarg(const void *) fhp; 1797 syscallarg(size_t) fh_size; 1798 syscallarg(struct stat *) sb; 1799 } */ 1800 struct stat sb; 1801 int error; 1802 1803 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1804 if (error) 1805 return error; 1806 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1807 } 1808 1809 int 1810 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1811 int flags) 1812 { 1813 fhandle_t *fh; 1814 struct mount *mp; 1815 struct vnode *vp; 1816 int error; 1817 1818 /* 1819 * Must be super user 1820 */ 1821 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1822 0, NULL, NULL, NULL))) 1823 return error; 1824 1825 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1826 if (error != 0) 1827 return error; 1828 1829 error = vfs_fhtovp(fh, &vp); 1830 vfs_copyinfh_free(fh); 1831 if (error != 0) 1832 return error; 1833 1834 mp = vp->v_mount; 1835 error = dostatvfs(mp, sb, l, flags, 1); 1836 vput(vp); 1837 return error; 1838 } 1839 1840 /* ARGSUSED */ 1841 int 1842 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1843 { 1844 /* { 1845 syscallarg(const void *) fhp; 1846 syscallarg(size_t) fh_size; 1847 syscallarg(struct statvfs *) buf; 1848 syscallarg(int) flags; 1849 } */ 1850 struct statvfs *sb = STATVFSBUF_GET(); 1851 int error; 1852 1853 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1854 SCARG(uap, flags)); 1855 if (error == 0) 1856 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1857 STATVFSBUF_PUT(sb); 1858 return error; 1859 } 1860 1861 /* 1862 * Create a special file. 1863 */ 1864 /* ARGSUSED */ 1865 int 1866 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval) 1867 { 1868 /* { 1869 syscallarg(const char *) path; 1870 syscallarg(int) mode; 1871 syscallarg(int) dev; 1872 } */ 1873 struct proc *p = l->l_proc; 1874 struct vnode *vp; 1875 struct vattr vattr; 1876 int error, optype; 1877 struct nameidata nd; 1878 char *path; 1879 const char *cpath; 1880 enum uio_seg seg = UIO_USERSPACE; 1881 1882 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1883 0, NULL, NULL, NULL)) != 0) 1884 return (error); 1885 1886 optype = VOP_MKNOD_DESCOFFSET; 1887 1888 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1889 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1890 1891 if ((error = namei(&nd)) != 0) 1892 goto out; 1893 vp = nd.ni_vp; 1894 if (vp != NULL) 1895 error = EEXIST; 1896 else { 1897 VATTR_NULL(&vattr); 1898 /* We will read cwdi->cwdi_cmask unlocked. */ 1899 vattr.va_mode = 1900 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1901 vattr.va_rdev = SCARG(uap, dev); 1902 1903 switch (SCARG(uap, mode) & S_IFMT) { 1904 case S_IFMT: /* used by badsect to flag bad sectors */ 1905 vattr.va_type = VBAD; 1906 break; 1907 case S_IFCHR: 1908 vattr.va_type = VCHR; 1909 break; 1910 case S_IFBLK: 1911 vattr.va_type = VBLK; 1912 break; 1913 case S_IFWHT: 1914 optype = VOP_WHITEOUT_DESCOFFSET; 1915 break; 1916 case S_IFREG: 1917 #if NVERIEXEC > 0 1918 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1919 O_CREAT); 1920 #endif /* NVERIEXEC > 0 */ 1921 vattr.va_type = VREG; 1922 vattr.va_rdev = VNOVAL; 1923 optype = VOP_CREATE_DESCOFFSET; 1924 break; 1925 default: 1926 error = EINVAL; 1927 break; 1928 } 1929 } 1930 if (!error) { 1931 switch (optype) { 1932 case VOP_WHITEOUT_DESCOFFSET: 1933 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1934 if (error) 1935 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1936 vput(nd.ni_dvp); 1937 break; 1938 1939 case VOP_MKNOD_DESCOFFSET: 1940 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1941 &nd.ni_cnd, &vattr); 1942 if (error == 0) 1943 vput(nd.ni_vp); 1944 break; 1945 1946 case VOP_CREATE_DESCOFFSET: 1947 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1948 &nd.ni_cnd, &vattr); 1949 if (error == 0) 1950 vput(nd.ni_vp); 1951 break; 1952 } 1953 } else { 1954 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1955 if (nd.ni_dvp == vp) 1956 vrele(nd.ni_dvp); 1957 else 1958 vput(nd.ni_dvp); 1959 if (vp) 1960 vrele(vp); 1961 } 1962 out: 1963 VERIEXEC_PATH_PUT(path); 1964 return (error); 1965 } 1966 1967 /* 1968 * Create a named pipe. 1969 */ 1970 /* ARGSUSED */ 1971 int 1972 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1973 { 1974 /* { 1975 syscallarg(const char *) path; 1976 syscallarg(int) mode; 1977 } */ 1978 struct proc *p = l->l_proc; 1979 struct vattr vattr; 1980 int error; 1981 struct nameidata nd; 1982 1983 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1984 SCARG(uap, path)); 1985 if ((error = namei(&nd)) != 0) 1986 return (error); 1987 if (nd.ni_vp != NULL) { 1988 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1989 if (nd.ni_dvp == nd.ni_vp) 1990 vrele(nd.ni_dvp); 1991 else 1992 vput(nd.ni_dvp); 1993 vrele(nd.ni_vp); 1994 return (EEXIST); 1995 } 1996 VATTR_NULL(&vattr); 1997 vattr.va_type = VFIFO; 1998 /* We will read cwdi->cwdi_cmask unlocked. */ 1999 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2000 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2001 if (error == 0) 2002 vput(nd.ni_vp); 2003 return (error); 2004 } 2005 2006 /* 2007 * Make a hard file link. 2008 */ 2009 /* ARGSUSED */ 2010 int 2011 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2012 { 2013 /* { 2014 syscallarg(const char *) path; 2015 syscallarg(const char *) link; 2016 } */ 2017 struct vnode *vp; 2018 struct nameidata nd; 2019 int error; 2020 2021 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2022 SCARG(uap, path)); 2023 if ((error = namei(&nd)) != 0) 2024 return (error); 2025 vp = nd.ni_vp; 2026 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2027 SCARG(uap, link)); 2028 if ((error = namei(&nd)) != 0) 2029 goto out; 2030 if (nd.ni_vp) { 2031 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2032 if (nd.ni_dvp == nd.ni_vp) 2033 vrele(nd.ni_dvp); 2034 else 2035 vput(nd.ni_dvp); 2036 vrele(nd.ni_vp); 2037 error = EEXIST; 2038 goto out; 2039 } 2040 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2041 out: 2042 vrele(vp); 2043 return (error); 2044 } 2045 2046 /* 2047 * Make a symbolic link. 2048 */ 2049 /* ARGSUSED */ 2050 int 2051 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2052 { 2053 /* { 2054 syscallarg(const char *) path; 2055 syscallarg(const char *) link; 2056 } */ 2057 struct proc *p = l->l_proc; 2058 struct vattr vattr; 2059 char *path; 2060 int error; 2061 struct nameidata nd; 2062 2063 path = PNBUF_GET(); 2064 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2065 if (error) 2066 goto out; 2067 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2068 SCARG(uap, link)); 2069 if ((error = namei(&nd)) != 0) 2070 goto out; 2071 if (nd.ni_vp) { 2072 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2073 if (nd.ni_dvp == nd.ni_vp) 2074 vrele(nd.ni_dvp); 2075 else 2076 vput(nd.ni_dvp); 2077 vrele(nd.ni_vp); 2078 error = EEXIST; 2079 goto out; 2080 } 2081 VATTR_NULL(&vattr); 2082 vattr.va_type = VLNK; 2083 /* We will read cwdi->cwdi_cmask unlocked. */ 2084 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2085 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2086 if (error == 0) 2087 vput(nd.ni_vp); 2088 out: 2089 PNBUF_PUT(path); 2090 return (error); 2091 } 2092 2093 /* 2094 * Delete a whiteout from the filesystem. 2095 */ 2096 /* ARGSUSED */ 2097 int 2098 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2099 { 2100 /* { 2101 syscallarg(const char *) path; 2102 } */ 2103 int error; 2104 struct nameidata nd; 2105 2106 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2107 UIO_USERSPACE, SCARG(uap, path)); 2108 error = namei(&nd); 2109 if (error) 2110 return (error); 2111 2112 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2114 if (nd.ni_dvp == nd.ni_vp) 2115 vrele(nd.ni_dvp); 2116 else 2117 vput(nd.ni_dvp); 2118 if (nd.ni_vp) 2119 vrele(nd.ni_vp); 2120 return (EEXIST); 2121 } 2122 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2123 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2124 vput(nd.ni_dvp); 2125 return (error); 2126 } 2127 2128 /* 2129 * Delete a name from the filesystem. 2130 */ 2131 /* ARGSUSED */ 2132 int 2133 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2134 { 2135 /* { 2136 syscallarg(const char *) path; 2137 } */ 2138 2139 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2140 } 2141 2142 int 2143 do_sys_unlink(const char *arg, enum uio_seg seg) 2144 { 2145 struct vnode *vp; 2146 int error; 2147 struct nameidata nd; 2148 kauth_cred_t cred; 2149 char *path; 2150 const char *cpath; 2151 2152 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2153 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2154 2155 if ((error = namei(&nd)) != 0) 2156 goto out; 2157 vp = nd.ni_vp; 2158 2159 /* 2160 * The root of a mounted filesystem cannot be deleted. 2161 */ 2162 if (vp->v_vflag & VV_ROOT) { 2163 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2164 if (nd.ni_dvp == vp) 2165 vrele(nd.ni_dvp); 2166 else 2167 vput(nd.ni_dvp); 2168 vput(vp); 2169 error = EBUSY; 2170 goto out; 2171 } 2172 2173 #if NVERIEXEC > 0 2174 /* Handle remove requests for veriexec entries. */ 2175 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2176 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2177 if (nd.ni_dvp == vp) 2178 vrele(nd.ni_dvp); 2179 else 2180 vput(nd.ni_dvp); 2181 vput(vp); 2182 goto out; 2183 } 2184 #endif /* NVERIEXEC > 0 */ 2185 2186 cred = kauth_cred_get(); 2187 #ifdef FILEASSOC 2188 (void)fileassoc_file_delete(vp); 2189 #endif /* FILEASSOC */ 2190 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2191 out: 2192 VERIEXEC_PATH_PUT(path); 2193 return (error); 2194 } 2195 2196 /* 2197 * Reposition read/write file offset. 2198 */ 2199 int 2200 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2201 { 2202 /* { 2203 syscallarg(int) fd; 2204 syscallarg(int) pad; 2205 syscallarg(off_t) offset; 2206 syscallarg(int) whence; 2207 } */ 2208 kauth_cred_t cred = l->l_cred; 2209 file_t *fp; 2210 struct vnode *vp; 2211 struct vattr vattr; 2212 off_t newoff; 2213 int error, fd; 2214 2215 fd = SCARG(uap, fd); 2216 2217 if ((fp = fd_getfile(fd)) == NULL) 2218 return (EBADF); 2219 2220 vp = fp->f_data; 2221 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2222 error = ESPIPE; 2223 goto out; 2224 } 2225 2226 switch (SCARG(uap, whence)) { 2227 case SEEK_CUR: 2228 newoff = fp->f_offset + SCARG(uap, offset); 2229 break; 2230 case SEEK_END: 2231 error = VOP_GETATTR(vp, &vattr, cred); 2232 if (error) { 2233 goto out; 2234 } 2235 newoff = SCARG(uap, offset) + vattr.va_size; 2236 break; 2237 case SEEK_SET: 2238 newoff = SCARG(uap, offset); 2239 break; 2240 default: 2241 error = EINVAL; 2242 goto out; 2243 } 2244 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2245 *(off_t *)retval = fp->f_offset = newoff; 2246 } 2247 out: 2248 fd_putfile(fd); 2249 return (error); 2250 } 2251 2252 /* 2253 * Positional read system call. 2254 */ 2255 int 2256 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2257 { 2258 /* { 2259 syscallarg(int) fd; 2260 syscallarg(void *) buf; 2261 syscallarg(size_t) nbyte; 2262 syscallarg(off_t) offset; 2263 } */ 2264 file_t *fp; 2265 struct vnode *vp; 2266 off_t offset; 2267 int error, fd = SCARG(uap, fd); 2268 2269 if ((fp = fd_getfile(fd)) == NULL) 2270 return (EBADF); 2271 2272 if ((fp->f_flag & FREAD) == 0) { 2273 fd_putfile(fd); 2274 return (EBADF); 2275 } 2276 2277 vp = fp->f_data; 2278 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2279 error = ESPIPE; 2280 goto out; 2281 } 2282 2283 offset = SCARG(uap, offset); 2284 2285 /* 2286 * XXX This works because no file systems actually 2287 * XXX take any action on the seek operation. 2288 */ 2289 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2290 goto out; 2291 2292 /* dofileread() will unuse the descriptor for us */ 2293 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2294 &offset, 0, retval)); 2295 2296 out: 2297 fd_putfile(fd); 2298 return (error); 2299 } 2300 2301 /* 2302 * Positional scatter read system call. 2303 */ 2304 int 2305 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2306 { 2307 /* { 2308 syscallarg(int) fd; 2309 syscallarg(const struct iovec *) iovp; 2310 syscallarg(int) iovcnt; 2311 syscallarg(off_t) offset; 2312 } */ 2313 off_t offset = SCARG(uap, offset); 2314 2315 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2316 SCARG(uap, iovcnt), &offset, 0, retval); 2317 } 2318 2319 /* 2320 * Positional write system call. 2321 */ 2322 int 2323 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2324 { 2325 /* { 2326 syscallarg(int) fd; 2327 syscallarg(const void *) buf; 2328 syscallarg(size_t) nbyte; 2329 syscallarg(off_t) offset; 2330 } */ 2331 file_t *fp; 2332 struct vnode *vp; 2333 off_t offset; 2334 int error, fd = SCARG(uap, fd); 2335 2336 if ((fp = fd_getfile(fd)) == NULL) 2337 return (EBADF); 2338 2339 if ((fp->f_flag & FWRITE) == 0) { 2340 fd_putfile(fd); 2341 return (EBADF); 2342 } 2343 2344 vp = fp->f_data; 2345 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2346 error = ESPIPE; 2347 goto out; 2348 } 2349 2350 offset = SCARG(uap, offset); 2351 2352 /* 2353 * XXX This works because no file systems actually 2354 * XXX take any action on the seek operation. 2355 */ 2356 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2357 goto out; 2358 2359 /* dofilewrite() will unuse the descriptor for us */ 2360 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2361 &offset, 0, retval)); 2362 2363 out: 2364 fd_putfile(fd); 2365 return (error); 2366 } 2367 2368 /* 2369 * Positional gather write system call. 2370 */ 2371 int 2372 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2373 { 2374 /* { 2375 syscallarg(int) fd; 2376 syscallarg(const struct iovec *) iovp; 2377 syscallarg(int) iovcnt; 2378 syscallarg(off_t) offset; 2379 } */ 2380 off_t offset = SCARG(uap, offset); 2381 2382 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2383 SCARG(uap, iovcnt), &offset, 0, retval); 2384 } 2385 2386 /* 2387 * Check access permissions. 2388 */ 2389 int 2390 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2391 { 2392 /* { 2393 syscallarg(const char *) path; 2394 syscallarg(int) flags; 2395 } */ 2396 kauth_cred_t cred; 2397 struct vnode *vp; 2398 int error, flags; 2399 struct nameidata nd; 2400 2401 cred = kauth_cred_dup(l->l_cred); 2402 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2403 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2404 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2405 SCARG(uap, path)); 2406 /* Override default credentials */ 2407 nd.ni_cnd.cn_cred = cred; 2408 if ((error = namei(&nd)) != 0) 2409 goto out; 2410 vp = nd.ni_vp; 2411 2412 /* Flags == 0 means only check for existence. */ 2413 if (SCARG(uap, flags)) { 2414 flags = 0; 2415 if (SCARG(uap, flags) & R_OK) 2416 flags |= VREAD; 2417 if (SCARG(uap, flags) & W_OK) 2418 flags |= VWRITE; 2419 if (SCARG(uap, flags) & X_OK) 2420 flags |= VEXEC; 2421 2422 error = VOP_ACCESS(vp, flags, cred); 2423 if (!error && (flags & VWRITE)) 2424 error = vn_writechk(vp); 2425 } 2426 vput(vp); 2427 out: 2428 kauth_cred_free(cred); 2429 return (error); 2430 } 2431 2432 /* 2433 * Common code for all sys_stat functions, including compat versions. 2434 */ 2435 int 2436 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2437 { 2438 int error; 2439 struct nameidata nd; 2440 2441 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2442 UIO_USERSPACE, path); 2443 error = namei(&nd); 2444 if (error != 0) 2445 return error; 2446 error = vn_stat(nd.ni_vp, sb); 2447 vput(nd.ni_vp); 2448 return error; 2449 } 2450 2451 /* 2452 * Get file status; this version follows links. 2453 */ 2454 /* ARGSUSED */ 2455 int 2456 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval) 2457 { 2458 /* { 2459 syscallarg(const char *) path; 2460 syscallarg(struct stat *) ub; 2461 } */ 2462 struct stat sb; 2463 int error; 2464 2465 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2466 if (error) 2467 return error; 2468 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2469 } 2470 2471 /* 2472 * Get file status; this version does not follow links. 2473 */ 2474 /* ARGSUSED */ 2475 int 2476 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval) 2477 { 2478 /* { 2479 syscallarg(const char *) path; 2480 syscallarg(struct stat *) ub; 2481 } */ 2482 struct stat sb; 2483 int error; 2484 2485 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2486 if (error) 2487 return error; 2488 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2489 } 2490 2491 /* 2492 * Get configurable pathname variables. 2493 */ 2494 /* ARGSUSED */ 2495 int 2496 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2497 { 2498 /* { 2499 syscallarg(const char *) path; 2500 syscallarg(int) name; 2501 } */ 2502 int error; 2503 struct nameidata nd; 2504 2505 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2506 SCARG(uap, path)); 2507 if ((error = namei(&nd)) != 0) 2508 return (error); 2509 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2510 vput(nd.ni_vp); 2511 return (error); 2512 } 2513 2514 /* 2515 * Return target name of a symbolic link. 2516 */ 2517 /* ARGSUSED */ 2518 int 2519 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2520 { 2521 /* { 2522 syscallarg(const char *) path; 2523 syscallarg(char *) buf; 2524 syscallarg(size_t) count; 2525 } */ 2526 struct vnode *vp; 2527 struct iovec aiov; 2528 struct uio auio; 2529 int error; 2530 struct nameidata nd; 2531 2532 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2533 SCARG(uap, path)); 2534 if ((error = namei(&nd)) != 0) 2535 return (error); 2536 vp = nd.ni_vp; 2537 if (vp->v_type != VLNK) 2538 error = EINVAL; 2539 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2540 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2541 aiov.iov_base = SCARG(uap, buf); 2542 aiov.iov_len = SCARG(uap, count); 2543 auio.uio_iov = &aiov; 2544 auio.uio_iovcnt = 1; 2545 auio.uio_offset = 0; 2546 auio.uio_rw = UIO_READ; 2547 KASSERT(l == curlwp); 2548 auio.uio_vmspace = l->l_proc->p_vmspace; 2549 auio.uio_resid = SCARG(uap, count); 2550 error = VOP_READLINK(vp, &auio, l->l_cred); 2551 } 2552 vput(vp); 2553 *retval = SCARG(uap, count) - auio.uio_resid; 2554 return (error); 2555 } 2556 2557 /* 2558 * Change flags of a file given a path name. 2559 */ 2560 /* ARGSUSED */ 2561 int 2562 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2563 { 2564 /* { 2565 syscallarg(const char *) path; 2566 syscallarg(u_long) flags; 2567 } */ 2568 struct vnode *vp; 2569 int error; 2570 struct nameidata nd; 2571 2572 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2573 SCARG(uap, path)); 2574 if ((error = namei(&nd)) != 0) 2575 return (error); 2576 vp = nd.ni_vp; 2577 error = change_flags(vp, SCARG(uap, flags), l); 2578 vput(vp); 2579 return (error); 2580 } 2581 2582 /* 2583 * Change flags of a file given a file descriptor. 2584 */ 2585 /* ARGSUSED */ 2586 int 2587 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2588 { 2589 /* { 2590 syscallarg(int) fd; 2591 syscallarg(u_long) flags; 2592 } */ 2593 struct vnode *vp; 2594 file_t *fp; 2595 int error; 2596 2597 /* fd_getvnode() will use the descriptor for us */ 2598 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2599 return (error); 2600 vp = fp->f_data; 2601 error = change_flags(vp, SCARG(uap, flags), l); 2602 VOP_UNLOCK(vp, 0); 2603 fd_putfile(SCARG(uap, fd)); 2604 return (error); 2605 } 2606 2607 /* 2608 * Change flags of a file given a path name; this version does 2609 * not follow links. 2610 */ 2611 int 2612 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2613 { 2614 /* { 2615 syscallarg(const char *) path; 2616 syscallarg(u_long) flags; 2617 } */ 2618 struct vnode *vp; 2619 int error; 2620 struct nameidata nd; 2621 2622 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2623 SCARG(uap, path)); 2624 if ((error = namei(&nd)) != 0) 2625 return (error); 2626 vp = nd.ni_vp; 2627 error = change_flags(vp, SCARG(uap, flags), l); 2628 vput(vp); 2629 return (error); 2630 } 2631 2632 /* 2633 * Common routine to change flags of a file. 2634 */ 2635 int 2636 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2637 { 2638 struct vattr vattr; 2639 int error; 2640 2641 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2642 /* 2643 * Non-superusers cannot change the flags on devices, even if they 2644 * own them. 2645 */ 2646 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2647 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2648 goto out; 2649 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2650 error = EINVAL; 2651 goto out; 2652 } 2653 } 2654 VATTR_NULL(&vattr); 2655 vattr.va_flags = flags; 2656 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2657 out: 2658 return (error); 2659 } 2660 2661 /* 2662 * Change mode of a file given path name; this version follows links. 2663 */ 2664 /* ARGSUSED */ 2665 int 2666 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2667 { 2668 /* { 2669 syscallarg(const char *) path; 2670 syscallarg(int) mode; 2671 } */ 2672 int error; 2673 struct nameidata nd; 2674 2675 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2676 SCARG(uap, path)); 2677 if ((error = namei(&nd)) != 0) 2678 return (error); 2679 2680 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2681 2682 vrele(nd.ni_vp); 2683 return (error); 2684 } 2685 2686 /* 2687 * Change mode of a file given a file descriptor. 2688 */ 2689 /* ARGSUSED */ 2690 int 2691 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2692 { 2693 /* { 2694 syscallarg(int) fd; 2695 syscallarg(int) mode; 2696 } */ 2697 file_t *fp; 2698 int error; 2699 2700 /* fd_getvnode() will use the descriptor for us */ 2701 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2702 return (error); 2703 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2704 fd_putfile(SCARG(uap, fd)); 2705 return (error); 2706 } 2707 2708 /* 2709 * Change mode of a file given path name; this version does not follow links. 2710 */ 2711 /* ARGSUSED */ 2712 int 2713 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2714 { 2715 /* { 2716 syscallarg(const char *) path; 2717 syscallarg(int) mode; 2718 } */ 2719 int error; 2720 struct nameidata nd; 2721 2722 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2723 SCARG(uap, path)); 2724 if ((error = namei(&nd)) != 0) 2725 return (error); 2726 2727 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2728 2729 vrele(nd.ni_vp); 2730 return (error); 2731 } 2732 2733 /* 2734 * Common routine to set mode given a vnode. 2735 */ 2736 static int 2737 change_mode(struct vnode *vp, int mode, struct lwp *l) 2738 { 2739 struct vattr vattr; 2740 int error; 2741 2742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2743 VATTR_NULL(&vattr); 2744 vattr.va_mode = mode & ALLPERMS; 2745 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2746 VOP_UNLOCK(vp, 0); 2747 return (error); 2748 } 2749 2750 /* 2751 * Set ownership given a path name; this version follows links. 2752 */ 2753 /* ARGSUSED */ 2754 int 2755 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2756 { 2757 /* { 2758 syscallarg(const char *) path; 2759 syscallarg(uid_t) uid; 2760 syscallarg(gid_t) gid; 2761 } */ 2762 int error; 2763 struct nameidata nd; 2764 2765 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2766 SCARG(uap, path)); 2767 if ((error = namei(&nd)) != 0) 2768 return (error); 2769 2770 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2771 2772 vrele(nd.ni_vp); 2773 return (error); 2774 } 2775 2776 /* 2777 * Set ownership given a path name; this version follows links. 2778 * Provides POSIX semantics. 2779 */ 2780 /* ARGSUSED */ 2781 int 2782 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2783 { 2784 /* { 2785 syscallarg(const char *) path; 2786 syscallarg(uid_t) uid; 2787 syscallarg(gid_t) gid; 2788 } */ 2789 int error; 2790 struct nameidata nd; 2791 2792 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2793 SCARG(uap, path)); 2794 if ((error = namei(&nd)) != 0) 2795 return (error); 2796 2797 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2798 2799 vrele(nd.ni_vp); 2800 return (error); 2801 } 2802 2803 /* 2804 * Set ownership given a file descriptor. 2805 */ 2806 /* ARGSUSED */ 2807 int 2808 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2809 { 2810 /* { 2811 syscallarg(int) fd; 2812 syscallarg(uid_t) uid; 2813 syscallarg(gid_t) gid; 2814 } */ 2815 int error; 2816 file_t *fp; 2817 2818 /* fd_getvnode() will use the descriptor for us */ 2819 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2820 return (error); 2821 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2822 l, 0); 2823 fd_putfile(SCARG(uap, fd)); 2824 return (error); 2825 } 2826 2827 /* 2828 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2829 */ 2830 /* ARGSUSED */ 2831 int 2832 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2833 { 2834 /* { 2835 syscallarg(int) fd; 2836 syscallarg(uid_t) uid; 2837 syscallarg(gid_t) gid; 2838 } */ 2839 int error; 2840 file_t *fp; 2841 2842 /* fd_getvnode() will use the descriptor for us */ 2843 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2844 return (error); 2845 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2846 l, 1); 2847 fd_putfile(SCARG(uap, fd)); 2848 return (error); 2849 } 2850 2851 /* 2852 * Set ownership given a path name; this version does not follow links. 2853 */ 2854 /* ARGSUSED */ 2855 int 2856 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2857 { 2858 /* { 2859 syscallarg(const char *) path; 2860 syscallarg(uid_t) uid; 2861 syscallarg(gid_t) gid; 2862 } */ 2863 int error; 2864 struct nameidata nd; 2865 2866 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2867 SCARG(uap, path)); 2868 if ((error = namei(&nd)) != 0) 2869 return (error); 2870 2871 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2872 2873 vrele(nd.ni_vp); 2874 return (error); 2875 } 2876 2877 /* 2878 * Set ownership given a path name; this version does not follow links. 2879 * Provides POSIX/XPG semantics. 2880 */ 2881 /* ARGSUSED */ 2882 int 2883 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2884 { 2885 /* { 2886 syscallarg(const char *) path; 2887 syscallarg(uid_t) uid; 2888 syscallarg(gid_t) gid; 2889 } */ 2890 int error; 2891 struct nameidata nd; 2892 2893 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2894 SCARG(uap, path)); 2895 if ((error = namei(&nd)) != 0) 2896 return (error); 2897 2898 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2899 2900 vrele(nd.ni_vp); 2901 return (error); 2902 } 2903 2904 /* 2905 * Common routine to set ownership given a vnode. 2906 */ 2907 static int 2908 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2909 int posix_semantics) 2910 { 2911 struct vattr vattr; 2912 mode_t newmode; 2913 int error; 2914 2915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2916 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2917 goto out; 2918 2919 #define CHANGED(x) ((int)(x) != -1) 2920 newmode = vattr.va_mode; 2921 if (posix_semantics) { 2922 /* 2923 * POSIX/XPG semantics: if the caller is not the super-user, 2924 * clear set-user-id and set-group-id bits. Both POSIX and 2925 * the XPG consider the behaviour for calls by the super-user 2926 * implementation-defined; we leave the set-user-id and set- 2927 * group-id settings intact in that case. 2928 */ 2929 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2930 NULL) != 0) 2931 newmode &= ~(S_ISUID | S_ISGID); 2932 } else { 2933 /* 2934 * NetBSD semantics: when changing owner and/or group, 2935 * clear the respective bit(s). 2936 */ 2937 if (CHANGED(uid)) 2938 newmode &= ~S_ISUID; 2939 if (CHANGED(gid)) 2940 newmode &= ~S_ISGID; 2941 } 2942 /* Update va_mode iff altered. */ 2943 if (vattr.va_mode == newmode) 2944 newmode = VNOVAL; 2945 2946 VATTR_NULL(&vattr); 2947 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2948 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2949 vattr.va_mode = newmode; 2950 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2951 #undef CHANGED 2952 2953 out: 2954 VOP_UNLOCK(vp, 0); 2955 return (error); 2956 } 2957 2958 /* 2959 * Set the access and modification times given a path name; this 2960 * version follows links. 2961 */ 2962 /* ARGSUSED */ 2963 int 2964 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval) 2965 { 2966 /* { 2967 syscallarg(const char *) path; 2968 syscallarg(const struct timeval *) tptr; 2969 } */ 2970 2971 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2972 SCARG(uap, tptr), UIO_USERSPACE); 2973 } 2974 2975 /* 2976 * Set the access and modification times given a file descriptor. 2977 */ 2978 /* ARGSUSED */ 2979 int 2980 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval) 2981 { 2982 /* { 2983 syscallarg(int) fd; 2984 syscallarg(const struct timeval *) tptr; 2985 } */ 2986 int error; 2987 file_t *fp; 2988 2989 /* fd_getvnode() will use the descriptor for us */ 2990 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2991 return (error); 2992 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2993 UIO_USERSPACE); 2994 fd_putfile(SCARG(uap, fd)); 2995 return (error); 2996 } 2997 2998 /* 2999 * Set the access and modification times given a path name; this 3000 * version does not follow links. 3001 */ 3002 int 3003 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval) 3004 { 3005 /* { 3006 syscallarg(const char *) path; 3007 syscallarg(const struct timeval *) tptr; 3008 } */ 3009 3010 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3011 SCARG(uap, tptr), UIO_USERSPACE); 3012 } 3013 3014 /* 3015 * Common routine to set access and modification times given a vnode. 3016 */ 3017 int 3018 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3019 const struct timeval *tptr, enum uio_seg seg) 3020 { 3021 struct vattr vattr; 3022 struct nameidata nd; 3023 int error; 3024 bool vanull, setbirthtime; 3025 struct timespec ts[2]; 3026 3027 if (tptr == NULL) { 3028 vanull = true; 3029 nanotime(&ts[0]); 3030 ts[1] = ts[0]; 3031 } else { 3032 struct timeval tv[2]; 3033 3034 vanull = false; 3035 if (seg != UIO_SYSSPACE) { 3036 error = copyin(tptr, &tv, sizeof (tv)); 3037 if (error != 0) 3038 return error; 3039 tptr = tv; 3040 } 3041 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3042 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3043 } 3044 3045 if (vp == NULL) { 3046 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3047 if ((error = namei(&nd)) != 0) 3048 return error; 3049 vp = nd.ni_vp; 3050 } else 3051 nd.ni_vp = NULL; 3052 3053 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3054 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3055 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3056 VATTR_NULL(&vattr); 3057 vattr.va_atime = ts[0]; 3058 vattr.va_mtime = ts[1]; 3059 if (setbirthtime) 3060 vattr.va_birthtime = ts[1]; 3061 if (vanull) 3062 vattr.va_flags |= VA_UTIMES_NULL; 3063 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3064 VOP_UNLOCK(vp, 0); 3065 3066 if (nd.ni_vp != NULL) 3067 vrele(nd.ni_vp); 3068 3069 return error; 3070 } 3071 3072 /* 3073 * Truncate a file given its path name. 3074 */ 3075 /* ARGSUSED */ 3076 int 3077 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3078 { 3079 /* { 3080 syscallarg(const char *) path; 3081 syscallarg(int) pad; 3082 syscallarg(off_t) length; 3083 } */ 3084 struct vnode *vp; 3085 struct vattr vattr; 3086 int error; 3087 struct nameidata nd; 3088 3089 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3090 SCARG(uap, path)); 3091 if ((error = namei(&nd)) != 0) 3092 return (error); 3093 vp = nd.ni_vp; 3094 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3095 if (vp->v_type == VDIR) 3096 error = EISDIR; 3097 else if ((error = vn_writechk(vp)) == 0 && 3098 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3099 VATTR_NULL(&vattr); 3100 vattr.va_size = SCARG(uap, length); 3101 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3102 } 3103 vput(vp); 3104 return (error); 3105 } 3106 3107 /* 3108 * Truncate a file given a file descriptor. 3109 */ 3110 /* ARGSUSED */ 3111 int 3112 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3113 { 3114 /* { 3115 syscallarg(int) fd; 3116 syscallarg(int) pad; 3117 syscallarg(off_t) length; 3118 } */ 3119 struct vattr vattr; 3120 struct vnode *vp; 3121 file_t *fp; 3122 int error; 3123 3124 /* fd_getvnode() will use the descriptor for us */ 3125 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3126 return (error); 3127 if ((fp->f_flag & FWRITE) == 0) { 3128 error = EINVAL; 3129 goto out; 3130 } 3131 vp = fp->f_data; 3132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3133 if (vp->v_type == VDIR) 3134 error = EISDIR; 3135 else if ((error = vn_writechk(vp)) == 0) { 3136 VATTR_NULL(&vattr); 3137 vattr.va_size = SCARG(uap, length); 3138 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3139 } 3140 VOP_UNLOCK(vp, 0); 3141 out: 3142 fd_putfile(SCARG(uap, fd)); 3143 return (error); 3144 } 3145 3146 /* 3147 * Sync an open file. 3148 */ 3149 /* ARGSUSED */ 3150 int 3151 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3152 { 3153 /* { 3154 syscallarg(int) fd; 3155 } */ 3156 struct vnode *vp; 3157 file_t *fp; 3158 int error; 3159 3160 /* fd_getvnode() will use the descriptor for us */ 3161 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3162 return (error); 3163 vp = fp->f_data; 3164 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3165 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3166 if (error == 0 && bioopsp != NULL && 3167 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3168 (*bioopsp->io_fsync)(vp, 0); 3169 VOP_UNLOCK(vp, 0); 3170 fd_putfile(SCARG(uap, fd)); 3171 return (error); 3172 } 3173 3174 /* 3175 * Sync a range of file data. API modeled after that found in AIX. 3176 * 3177 * FDATASYNC indicates that we need only save enough metadata to be able 3178 * to re-read the written data. Note we duplicate AIX's requirement that 3179 * the file be open for writing. 3180 */ 3181 /* ARGSUSED */ 3182 int 3183 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3184 { 3185 /* { 3186 syscallarg(int) fd; 3187 syscallarg(int) flags; 3188 syscallarg(off_t) start; 3189 syscallarg(off_t) length; 3190 } */ 3191 struct vnode *vp; 3192 file_t *fp; 3193 int flags, nflags; 3194 off_t s, e, len; 3195 int error; 3196 3197 /* fd_getvnode() will use the descriptor for us */ 3198 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3199 return (error); 3200 3201 if ((fp->f_flag & FWRITE) == 0) { 3202 error = EBADF; 3203 goto out; 3204 } 3205 3206 flags = SCARG(uap, flags); 3207 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3208 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3209 error = EINVAL; 3210 goto out; 3211 } 3212 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3213 if (flags & FDATASYNC) 3214 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3215 else 3216 nflags = FSYNC_WAIT; 3217 if (flags & FDISKSYNC) 3218 nflags |= FSYNC_CACHE; 3219 3220 len = SCARG(uap, length); 3221 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3222 if (len) { 3223 s = SCARG(uap, start); 3224 e = s + len; 3225 if (e < s) { 3226 error = EINVAL; 3227 goto out; 3228 } 3229 } else { 3230 e = 0; 3231 s = 0; 3232 } 3233 3234 vp = fp->f_data; 3235 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3236 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3237 3238 if (error == 0 && bioopsp != NULL && 3239 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3240 (*bioopsp->io_fsync)(vp, nflags); 3241 3242 VOP_UNLOCK(vp, 0); 3243 out: 3244 fd_putfile(SCARG(uap, fd)); 3245 return (error); 3246 } 3247 3248 /* 3249 * Sync the data of an open file. 3250 */ 3251 /* ARGSUSED */ 3252 int 3253 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3254 { 3255 /* { 3256 syscallarg(int) fd; 3257 } */ 3258 struct vnode *vp; 3259 file_t *fp; 3260 int error; 3261 3262 /* fd_getvnode() will use the descriptor for us */ 3263 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3264 return (error); 3265 if ((fp->f_flag & FWRITE) == 0) { 3266 fd_putfile(SCARG(uap, fd)); 3267 return (EBADF); 3268 } 3269 vp = fp->f_data; 3270 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3271 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3272 VOP_UNLOCK(vp, 0); 3273 fd_putfile(SCARG(uap, fd)); 3274 return (error); 3275 } 3276 3277 /* 3278 * Rename files, (standard) BSD semantics frontend. 3279 */ 3280 /* ARGSUSED */ 3281 int 3282 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3283 { 3284 /* { 3285 syscallarg(const char *) from; 3286 syscallarg(const char *) to; 3287 } */ 3288 3289 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3290 } 3291 3292 /* 3293 * Rename files, POSIX semantics frontend. 3294 */ 3295 /* ARGSUSED */ 3296 int 3297 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3298 { 3299 /* { 3300 syscallarg(const char *) from; 3301 syscallarg(const char *) to; 3302 } */ 3303 3304 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3305 } 3306 3307 /* 3308 * Rename files. Source and destination must either both be directories, 3309 * or both not be directories. If target is a directory, it must be empty. 3310 * If `from' and `to' refer to the same object, the value of the `retain' 3311 * argument is used to determine whether `from' will be 3312 * 3313 * (retain == 0) deleted unless `from' and `to' refer to the same 3314 * object in the file system's name space (BSD). 3315 * (retain == 1) always retained (POSIX). 3316 */ 3317 int 3318 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3319 { 3320 struct vnode *tvp, *fvp, *tdvp; 3321 struct nameidata fromnd, tond; 3322 struct mount *fs; 3323 struct lwp *l = curlwp; 3324 struct proc *p; 3325 uint32_t saveflag; 3326 int error; 3327 3328 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3329 seg, from); 3330 if ((error = namei(&fromnd)) != 0) 3331 return (error); 3332 if (fromnd.ni_dvp != fromnd.ni_vp) 3333 VOP_UNLOCK(fromnd.ni_dvp, 0); 3334 fvp = fromnd.ni_vp; 3335 3336 fs = fvp->v_mount; 3337 error = VFS_RENAMELOCK_ENTER(fs); 3338 if (error) { 3339 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3340 vrele(fromnd.ni_dvp); 3341 vrele(fvp); 3342 goto out1; 3343 } 3344 3345 /* 3346 * close, partially, yet another race - ideally we should only 3347 * go as far as getting fromnd.ni_dvp before getting the per-fs 3348 * lock, and then continue to get fromnd.ni_vp, but we can't do 3349 * that with namei as it stands. 3350 * 3351 * This still won't prevent rmdir from nuking fromnd.ni_vp 3352 * under us. The real fix is to get the locks in the right 3353 * order and do the lookups in the right places, but that's a 3354 * major rototill. 3355 * 3356 * Preserve the SAVESTART in cn_flags, because who knows what 3357 * might happen if we don't. 3358 * 3359 * Note: this logic (as well as this whole function) is cloned 3360 * in nfs_serv.c. Proceed accordingly. 3361 */ 3362 vrele(fvp); 3363 if ((fromnd.ni_cnd.cn_namelen == 1 && 3364 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3365 (fromnd.ni_cnd.cn_namelen == 2 && 3366 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3367 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3368 error = EINVAL; 3369 VFS_RENAMELOCK_EXIT(fs); 3370 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3371 vrele(fromnd.ni_dvp); 3372 goto out1; 3373 } 3374 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3375 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3376 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3377 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3378 fromnd.ni_cnd.cn_flags |= saveflag; 3379 if (error) { 3380 VOP_UNLOCK(fromnd.ni_dvp, 0); 3381 VFS_RENAMELOCK_EXIT(fs); 3382 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3383 vrele(fromnd.ni_dvp); 3384 goto out1; 3385 } 3386 VOP_UNLOCK(fromnd.ni_vp, 0); 3387 if (fromnd.ni_dvp != fromnd.ni_vp) 3388 VOP_UNLOCK(fromnd.ni_dvp, 0); 3389 fvp = fromnd.ni_vp; 3390 3391 NDINIT(&tond, RENAME, 3392 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3393 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3394 seg, to); 3395 if ((error = namei(&tond)) != 0) { 3396 VFS_RENAMELOCK_EXIT(fs); 3397 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3398 vrele(fromnd.ni_dvp); 3399 vrele(fvp); 3400 goto out1; 3401 } 3402 tdvp = tond.ni_dvp; 3403 tvp = tond.ni_vp; 3404 3405 if (tvp != NULL) { 3406 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3407 error = ENOTDIR; 3408 goto out; 3409 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3410 error = EISDIR; 3411 goto out; 3412 } 3413 } 3414 3415 if (fvp == tdvp) 3416 error = EINVAL; 3417 3418 /* 3419 * Source and destination refer to the same object. 3420 */ 3421 if (fvp == tvp) { 3422 if (retain) 3423 error = -1; 3424 else if (fromnd.ni_dvp == tdvp && 3425 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3426 !memcmp(fromnd.ni_cnd.cn_nameptr, 3427 tond.ni_cnd.cn_nameptr, 3428 fromnd.ni_cnd.cn_namelen)) 3429 error = -1; 3430 } 3431 3432 #if NVERIEXEC > 0 3433 if (!error) { 3434 char *f1, *f2; 3435 3436 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3437 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3438 3439 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3440 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3441 3442 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3443 3444 free(f1, M_TEMP); 3445 free(f2, M_TEMP); 3446 } 3447 #endif /* NVERIEXEC > 0 */ 3448 3449 out: 3450 p = l->l_proc; 3451 if (!error) { 3452 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3453 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3454 VFS_RENAMELOCK_EXIT(fs); 3455 } else { 3456 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3457 if (tdvp == tvp) 3458 vrele(tdvp); 3459 else 3460 vput(tdvp); 3461 if (tvp) 3462 vput(tvp); 3463 VFS_RENAMELOCK_EXIT(fs); 3464 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3465 vrele(fromnd.ni_dvp); 3466 vrele(fvp); 3467 } 3468 vrele(tond.ni_startdir); 3469 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3470 out1: 3471 if (fromnd.ni_startdir) 3472 vrele(fromnd.ni_startdir); 3473 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3474 return (error == -1 ? 0 : error); 3475 } 3476 3477 /* 3478 * Make a directory file. 3479 */ 3480 /* ARGSUSED */ 3481 int 3482 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3483 { 3484 /* { 3485 syscallarg(const char *) path; 3486 syscallarg(int) mode; 3487 } */ 3488 struct proc *p = l->l_proc; 3489 struct vnode *vp; 3490 struct vattr vattr; 3491 int error; 3492 struct nameidata nd; 3493 3494 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3495 SCARG(uap, path)); 3496 if ((error = namei(&nd)) != 0) 3497 return (error); 3498 vp = nd.ni_vp; 3499 if (vp != NULL) { 3500 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3501 if (nd.ni_dvp == vp) 3502 vrele(nd.ni_dvp); 3503 else 3504 vput(nd.ni_dvp); 3505 vrele(vp); 3506 return (EEXIST); 3507 } 3508 VATTR_NULL(&vattr); 3509 vattr.va_type = VDIR; 3510 /* We will read cwdi->cwdi_cmask unlocked. */ 3511 vattr.va_mode = 3512 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3513 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3514 if (!error) 3515 vput(nd.ni_vp); 3516 return (error); 3517 } 3518 3519 /* 3520 * Remove a directory file. 3521 */ 3522 /* ARGSUSED */ 3523 int 3524 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3525 { 3526 /* { 3527 syscallarg(const char *) path; 3528 } */ 3529 struct vnode *vp; 3530 int error; 3531 struct nameidata nd; 3532 3533 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3534 SCARG(uap, path)); 3535 if ((error = namei(&nd)) != 0) 3536 return (error); 3537 vp = nd.ni_vp; 3538 if (vp->v_type != VDIR) { 3539 error = ENOTDIR; 3540 goto out; 3541 } 3542 /* 3543 * No rmdir "." please. 3544 */ 3545 if (nd.ni_dvp == vp) { 3546 error = EINVAL; 3547 goto out; 3548 } 3549 /* 3550 * The root of a mounted filesystem cannot be deleted. 3551 */ 3552 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3553 error = EBUSY; 3554 goto out; 3555 } 3556 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3557 return (error); 3558 3559 out: 3560 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3561 if (nd.ni_dvp == vp) 3562 vrele(nd.ni_dvp); 3563 else 3564 vput(nd.ni_dvp); 3565 vput(vp); 3566 return (error); 3567 } 3568 3569 /* 3570 * Read a block of directory entries in a file system independent format. 3571 */ 3572 int 3573 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3574 { 3575 /* { 3576 syscallarg(int) fd; 3577 syscallarg(char *) buf; 3578 syscallarg(size_t) count; 3579 } */ 3580 file_t *fp; 3581 int error, done; 3582 3583 /* fd_getvnode() will use the descriptor for us */ 3584 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3585 return (error); 3586 if ((fp->f_flag & FREAD) == 0) { 3587 error = EBADF; 3588 goto out; 3589 } 3590 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3591 SCARG(uap, count), &done, l, 0, 0); 3592 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3593 *retval = done; 3594 out: 3595 fd_putfile(SCARG(uap, fd)); 3596 return (error); 3597 } 3598 3599 /* 3600 * Set the mode mask for creation of filesystem nodes. 3601 */ 3602 int 3603 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3604 { 3605 /* { 3606 syscallarg(mode_t) newmask; 3607 } */ 3608 struct proc *p = l->l_proc; 3609 struct cwdinfo *cwdi; 3610 3611 /* 3612 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3613 * important is that we serialize changes to the mask. The 3614 * rw_exit() will issue a write memory barrier on our behalf, 3615 * and force the changes out to other CPUs (as it must use an 3616 * atomic operation, draining the local CPU's store buffers). 3617 */ 3618 cwdi = p->p_cwdi; 3619 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3620 *retval = cwdi->cwdi_cmask; 3621 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3622 rw_exit(&cwdi->cwdi_lock); 3623 3624 return (0); 3625 } 3626 3627 int 3628 dorevoke(struct vnode *vp, kauth_cred_t cred) 3629 { 3630 struct vattr vattr; 3631 int error; 3632 3633 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3634 return error; 3635 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3636 (error = kauth_authorize_generic(cred, 3637 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3638 VOP_REVOKE(vp, REVOKEALL); 3639 return (error); 3640 } 3641 3642 /* 3643 * Void all references to file by ripping underlying filesystem 3644 * away from vnode. 3645 */ 3646 /* ARGSUSED */ 3647 int 3648 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3649 { 3650 /* { 3651 syscallarg(const char *) path; 3652 } */ 3653 struct vnode *vp; 3654 int error; 3655 struct nameidata nd; 3656 3657 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3658 SCARG(uap, path)); 3659 if ((error = namei(&nd)) != 0) 3660 return (error); 3661 vp = nd.ni_vp; 3662 error = dorevoke(vp, l->l_cred); 3663 vrele(vp); 3664 return (error); 3665 } 3666