1 /* $NetBSD: vfs_syscalls.c,v 1.363 2008/05/20 19:30:03 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 63 */ 64 65 #include <sys/cdefs.h> 66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.363 2008/05/20 19:30:03 ad Exp $"); 67 68 #include "opt_compat_netbsd.h" 69 #include "opt_compat_43.h" 70 #include "opt_fileassoc.h" 71 #include "fss.h" 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/namei.h> 77 #include <sys/filedesc.h> 78 #include <sys/kernel.h> 79 #include <sys/file.h> 80 #include <sys/stat.h> 81 #include <sys/vnode.h> 82 #include <sys/mount.h> 83 #include <sys/proc.h> 84 #include <sys/uio.h> 85 #include <sys/malloc.h> 86 #include <sys/kmem.h> 87 #include <sys/dirent.h> 88 #include <sys/sysctl.h> 89 #include <sys/syscallargs.h> 90 #include <sys/vfs_syscalls.h> 91 #include <sys/ktrace.h> 92 #ifdef FILEASSOC 93 #include <sys/fileassoc.h> 94 #endif /* FILEASSOC */ 95 #include <sys/verified_exec.h> 96 #include <sys/kauth.h> 97 #include <sys/atomic.h> 98 #include <sys/module.h> 99 100 #include <miscfs/genfs/genfs.h> 101 #include <miscfs/syncfs/syncfs.h> 102 #include <miscfs/specfs/specdev.h> 103 104 #ifdef COMPAT_30 105 #include "opt_nfsserver.h" 106 #include <nfs/rpcv2.h> 107 #endif 108 #include <nfs/nfsproto.h> 109 #ifdef COMPAT_30 110 #include <nfs/nfs.h> 111 #include <nfs/nfs_var.h> 112 #endif 113 114 #if NFSS > 0 115 #include <dev/fssvar.h> 116 #endif 117 118 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 119 120 static int change_dir(struct nameidata *, struct lwp *); 121 static int change_flags(struct vnode *, u_long, struct lwp *); 122 static int change_mode(struct vnode *, int, struct lwp *l); 123 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 124 125 void checkdirs(struct vnode *); 126 127 int dovfsusermount = 0; 128 129 /* 130 * Virtual File System System Calls 131 */ 132 133 /* 134 * Mount a file system. 135 */ 136 137 #if defined(COMPAT_09) || defined(COMPAT_43) 138 /* 139 * This table is used to maintain compatibility with 4.3BSD 140 * and NetBSD 0.9 mount syscalls. Note, the order is important! 141 * 142 * Do not modify this table. It should only contain filesystems 143 * supported by NetBSD 0.9 and 4.3BSD. 144 */ 145 const char * const mountcompatnames[] = { 146 NULL, /* 0 = MOUNT_NONE */ 147 MOUNT_FFS, /* 1 = MOUNT_UFS */ 148 MOUNT_NFS, /* 2 */ 149 MOUNT_MFS, /* 3 */ 150 MOUNT_MSDOS, /* 4 */ 151 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 152 MOUNT_FDESC, /* 6 */ 153 MOUNT_KERNFS, /* 7 */ 154 NULL, /* 8 = MOUNT_DEVFS */ 155 MOUNT_AFS, /* 9 */ 156 }; 157 const int nmountcompatnames = sizeof(mountcompatnames) / 158 sizeof(mountcompatnames[0]); 159 #endif /* COMPAT_09 || COMPAT_43 */ 160 161 static int 162 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 163 void *data, size_t *data_len) 164 { 165 struct mount *mp; 166 int error = 0, saved_flags; 167 168 mp = vp->v_mount; 169 saved_flags = mp->mnt_flag; 170 171 /* We can operate only on VV_ROOT nodes. */ 172 if ((vp->v_vflag & VV_ROOT) == 0) { 173 error = EINVAL; 174 goto out; 175 } 176 177 /* 178 * We only allow the filesystem to be reloaded if it 179 * is currently mounted read-only. 180 */ 181 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 182 error = EOPNOTSUPP; /* Needs translation */ 183 goto out; 184 } 185 186 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 187 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 188 if (error) 189 goto out; 190 191 if (vfs_busy(mp, NULL)) { 192 error = EPERM; 193 goto out; 194 } 195 196 mutex_enter(&mp->mnt_updating); 197 198 mp->mnt_flag &= ~MNT_OP_FLAGS; 199 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 200 201 /* 202 * Set the mount level flags. 203 */ 204 if (flags & MNT_RDONLY) 205 mp->mnt_flag |= MNT_RDONLY; 206 else if (mp->mnt_flag & MNT_RDONLY) 207 mp->mnt_iflag |= IMNT_WANTRDWR; 208 mp->mnt_flag &= 209 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 210 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 211 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 212 mp->mnt_flag |= flags & 213 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 214 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 215 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 216 MNT_IGNORE); 217 218 error = VFS_MOUNT(mp, path, data, data_len); 219 220 #if defined(COMPAT_30) && defined(NFSSERVER) 221 if (error && data != NULL) { 222 int error2; 223 224 /* Update failed; let's try and see if it was an 225 * export request. */ 226 error2 = nfs_update_exports_30(mp, path, data, l); 227 228 /* Only update error code if the export request was 229 * understood but some problem occurred while 230 * processing it. */ 231 if (error2 != EJUSTRETURN) 232 error = error2; 233 } 234 #endif 235 if (mp->mnt_iflag & IMNT_WANTRDWR) 236 mp->mnt_flag &= ~MNT_RDONLY; 237 if (error) 238 mp->mnt_flag = saved_flags; 239 mp->mnt_flag &= ~MNT_OP_FLAGS; 240 mp->mnt_iflag &= ~IMNT_WANTRDWR; 241 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 242 if (mp->mnt_syncer == NULL) 243 error = vfs_allocate_syncvnode(mp); 244 } else { 245 if (mp->mnt_syncer != NULL) 246 vfs_deallocate_syncvnode(mp); 247 } 248 mutex_exit(&mp->mnt_updating); 249 vfs_unbusy(mp, false, NULL); 250 251 out: 252 return (error); 253 } 254 255 static int 256 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 257 { 258 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 259 int error; 260 261 /* Copy file-system type from userspace. */ 262 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 263 if (error) { 264 #if defined(COMPAT_09) || defined(COMPAT_43) 265 /* 266 * Historically, filesystem types were identified by numbers. 267 * If we get an integer for the filesystem type instead of a 268 * string, we check to see if it matches one of the historic 269 * filesystem types. 270 */ 271 u_long fsindex = (u_long)fstype; 272 if (fsindex >= nmountcompatnames || 273 mountcompatnames[fsindex] == NULL) 274 return ENODEV; 275 strlcpy(fstypename, mountcompatnames[fsindex], 276 sizeof(fstypename)); 277 #else 278 return error; 279 #endif 280 } 281 282 #ifdef COMPAT_10 283 /* Accept `ufs' as an alias for `ffs'. */ 284 if (strcmp(fstypename, "ufs") == 0) 285 fstypename[0] = 'f'; 286 #endif 287 288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 289 return 0; 290 291 /* If we can autoload a vfs module, try again */ 292 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true); 293 294 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 295 return 0; 296 297 return ENODEV; 298 } 299 300 static int 301 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 302 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 303 { 304 struct mount *mp = NULL; 305 struct vnode *vp = *vpp; 306 struct vattr va; 307 int error; 308 309 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 310 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 311 if (error) 312 return error; 313 314 /* Can't make a non-dir a mount-point (from here anyway). */ 315 if (vp->v_type != VDIR) 316 return ENOTDIR; 317 318 /* 319 * If the user is not root, ensure that they own the directory 320 * onto which we are attempting to mount. 321 */ 322 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 323 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 324 (error = kauth_authorize_generic(l->l_cred, 325 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 326 return error; 327 } 328 329 if (flags & MNT_EXPORTED) 330 return EINVAL; 331 332 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 333 return error; 334 335 /* 336 * Check if a file-system is not already mounted on this vnode. 337 */ 338 if (vp->v_mountedhere != NULL) 339 return EBUSY; 340 341 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 342 if (mp == NULL) 343 return ENOMEM; 344 345 mp->mnt_op = vfsops; 346 mp->mnt_refcnt = 1; 347 348 TAILQ_INIT(&mp->mnt_vnodelist); 349 rw_init(&mp->mnt_unmounting); 350 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 351 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 352 error = vfs_busy(mp, NULL); 353 KASSERT(error == 0); 354 mutex_enter(&mp->mnt_updating); 355 356 mp->mnt_vnodecovered = vp; 357 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 358 mount_initspecific(mp); 359 360 /* 361 * The underlying file system may refuse the mount for 362 * various reasons. Allow the user to force it to happen. 363 * 364 * Set the mount level flags. 365 */ 366 mp->mnt_flag = flags & 367 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 368 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 369 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 370 MNT_IGNORE | MNT_RDONLY); 371 372 error = VFS_MOUNT(mp, path, data, data_len); 373 mp->mnt_flag &= ~MNT_OP_FLAGS; 374 375 /* 376 * Put the new filesystem on the mount list after root. 377 */ 378 cache_purge(vp); 379 if (error != 0) { 380 vp->v_mountedhere = NULL; 381 mutex_exit(&mp->mnt_updating); 382 vfs_unbusy(mp, false, NULL); 383 vfs_destroy(mp); 384 return error; 385 } 386 387 mp->mnt_iflag &= ~IMNT_WANTRDWR; 388 mutex_enter(&mountlist_lock); 389 vp->v_mountedhere = mp; 390 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 391 mutex_exit(&mountlist_lock); 392 vn_restorerecurse(vp, recurse); 393 VOP_UNLOCK(vp, 0); 394 checkdirs(vp); 395 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 396 error = vfs_allocate_syncvnode(mp); 397 /* Hold an additional reference to the mount across VFS_START(). */ 398 mutex_exit(&mp->mnt_updating); 399 vfs_unbusy(mp, true, NULL); 400 (void) VFS_STATVFS(mp, &mp->mnt_stat); 401 error = VFS_START(mp, 0); 402 if (error) { 403 vrele(vp); 404 vfs_destroy(mp); 405 } 406 /* Drop reference held for VFS_START(). */ 407 vfs_destroy(mp); 408 *vpp = NULL; 409 return error; 410 } 411 412 static int 413 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 414 void *data, size_t *data_len) 415 { 416 struct mount *mp; 417 int error; 418 419 /* If MNT_GETARGS is specified, it should be the only flag. */ 420 if (flags & ~MNT_GETARGS) 421 return EINVAL; 422 423 mp = vp->v_mount; 424 425 /* XXX: probably some notion of "can see" here if we want isolation. */ 426 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 427 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 428 if (error) 429 return error; 430 431 if ((vp->v_vflag & VV_ROOT) == 0) 432 return EINVAL; 433 434 if (vfs_busy(mp, NULL)) 435 return EPERM; 436 437 mutex_enter(&mp->mnt_updating); 438 mp->mnt_flag &= ~MNT_OP_FLAGS; 439 mp->mnt_flag |= MNT_GETARGS; 440 error = VFS_MOUNT(mp, path, data, data_len); 441 mp->mnt_flag &= ~MNT_OP_FLAGS; 442 mutex_exit(&mp->mnt_updating); 443 444 vfs_unbusy(mp, false, NULL); 445 return (error); 446 } 447 448 #ifdef COMPAT_40 449 /* ARGSUSED */ 450 int 451 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval) 452 { 453 /* { 454 syscallarg(const char *) type; 455 syscallarg(const char *) path; 456 syscallarg(int) flags; 457 syscallarg(void *) data; 458 } */ 459 register_t dummy; 460 461 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 462 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 463 } 464 #endif 465 466 int 467 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 468 { 469 /* { 470 syscallarg(const char *) type; 471 syscallarg(const char *) path; 472 syscallarg(int) flags; 473 syscallarg(void *) data; 474 syscallarg(size_t) data_len; 475 } */ 476 477 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 478 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 479 SCARG(uap, data_len), retval); 480 } 481 482 int 483 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 484 const char *path, int flags, void *data, enum uio_seg data_seg, 485 size_t data_len, register_t *retval) 486 { 487 struct vnode *vp; 488 struct nameidata nd; 489 void *data_buf = data; 490 u_int recurse; 491 int error; 492 493 /* 494 * Get vnode to be covered 495 */ 496 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 497 if ((error = namei(&nd)) != 0) 498 return (error); 499 vp = nd.ni_vp; 500 501 /* 502 * A lookup in VFS_MOUNT might result in an attempt to 503 * lock this vnode again, so make the lock recursive. 504 */ 505 if (vfsops == NULL) { 506 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 507 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 508 recurse = vn_setrecurse(vp); 509 vfsops = vp->v_mount->mnt_op; 510 } else { 511 /* 'type' is userspace */ 512 error = mount_get_vfsops(type, &vfsops); 513 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 514 recurse = vn_setrecurse(vp); 515 if (error != 0) 516 goto done; 517 } 518 } else { 519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 520 recurse = vn_setrecurse(vp); 521 } 522 523 if (data != NULL && data_seg == UIO_USERSPACE) { 524 if (data_len == 0) { 525 /* No length supplied, use default for filesystem */ 526 data_len = vfsops->vfs_min_mount_data; 527 if (data_len > VFS_MAX_MOUNT_DATA) { 528 /* maybe a force loaded old LKM */ 529 error = EINVAL; 530 goto done; 531 } 532 #ifdef COMPAT_30 533 /* Hopefully a longer buffer won't make copyin() fail */ 534 if (flags & MNT_UPDATE 535 && data_len < sizeof (struct mnt_export_args30)) 536 data_len = sizeof (struct mnt_export_args30); 537 #endif 538 } 539 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 540 541 /* NFS needs the buffer even for mnt_getargs .... */ 542 error = copyin(data, data_buf, data_len); 543 if (error != 0) 544 goto done; 545 } 546 547 if (flags & MNT_GETARGS) { 548 if (data_len == 0) { 549 error = EINVAL; 550 goto done; 551 } 552 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 553 if (error != 0) 554 goto done; 555 if (data_seg == UIO_USERSPACE) 556 error = copyout(data_buf, data, data_len); 557 *retval = data_len; 558 } else if (flags & MNT_UPDATE) { 559 error = mount_update(l, vp, path, flags, data_buf, &data_len); 560 } else { 561 /* Locking is handled internally in mount_domount(). */ 562 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 563 &data_len, recurse); 564 } 565 566 done: 567 if (vp != NULL) { 568 vn_restorerecurse(vp, recurse); 569 vput(vp); 570 } 571 if (data_buf != data) 572 free(data_buf, M_TEMP); 573 return (error); 574 } 575 576 /* 577 * Scan all active processes to see if any of them have a current 578 * or root directory onto which the new filesystem has just been 579 * mounted. If so, replace them with the new mount point. 580 */ 581 void 582 checkdirs(struct vnode *olddp) 583 { 584 struct cwdinfo *cwdi; 585 struct vnode *newdp, *rele1, *rele2; 586 struct proc *p; 587 bool retry; 588 589 if (olddp->v_usecount == 1) 590 return; 591 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 592 panic("mount: lost mount"); 593 594 do { 595 retry = false; 596 mutex_enter(proc_lock); 597 PROCLIST_FOREACH(p, &allproc) { 598 if ((p->p_flag & PK_MARKER) != 0) 599 continue; 600 if ((cwdi = p->p_cwdi) == NULL) 601 continue; 602 /* 603 * Can't change to the old directory any more, 604 * so even if we see a stale value it's not a 605 * problem. 606 */ 607 if (cwdi->cwdi_cdir != olddp && 608 cwdi->cwdi_rdir != olddp) 609 continue; 610 retry = true; 611 rele1 = NULL; 612 rele2 = NULL; 613 atomic_inc_uint(&cwdi->cwdi_refcnt); 614 mutex_exit(proc_lock); 615 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 616 if (cwdi->cwdi_cdir == olddp) { 617 rele1 = cwdi->cwdi_cdir; 618 VREF(newdp); 619 cwdi->cwdi_cdir = newdp; 620 } 621 if (cwdi->cwdi_rdir == olddp) { 622 rele2 = cwdi->cwdi_rdir; 623 VREF(newdp); 624 cwdi->cwdi_rdir = newdp; 625 } 626 rw_exit(&cwdi->cwdi_lock); 627 cwdfree(cwdi); 628 if (rele1 != NULL) 629 vrele(rele1); 630 if (rele2 != NULL) 631 vrele(rele2); 632 mutex_enter(proc_lock); 633 break; 634 } 635 mutex_exit(proc_lock); 636 } while (retry); 637 638 if (rootvnode == olddp) { 639 vrele(rootvnode); 640 VREF(newdp); 641 rootvnode = newdp; 642 } 643 vput(newdp); 644 } 645 646 /* 647 * Unmount a file system. 648 * 649 * Note: unmount takes a path to the vnode mounted on as argument, 650 * not special file (as before). 651 */ 652 /* ARGSUSED */ 653 int 654 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 655 { 656 /* { 657 syscallarg(const char *) path; 658 syscallarg(int) flags; 659 } */ 660 struct vnode *vp; 661 struct mount *mp; 662 int error; 663 struct nameidata nd; 664 665 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 666 SCARG(uap, path)); 667 if ((error = namei(&nd)) != 0) 668 return (error); 669 vp = nd.ni_vp; 670 mp = vp->v_mount; 671 atomic_inc_uint(&mp->mnt_refcnt); 672 VOP_UNLOCK(vp, 0); 673 674 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 675 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 676 if (error) { 677 vrele(vp); 678 vfs_destroy(mp); 679 return (error); 680 } 681 682 /* 683 * Don't allow unmounting the root file system. 684 */ 685 if (mp->mnt_flag & MNT_ROOTFS) { 686 vrele(vp); 687 vfs_destroy(mp); 688 return (EINVAL); 689 } 690 691 /* 692 * Must be the root of the filesystem 693 */ 694 if ((vp->v_vflag & VV_ROOT) == 0) { 695 vrele(vp); 696 vfs_destroy(mp); 697 return (EINVAL); 698 } 699 700 vrele(vp); 701 error = dounmount(mp, SCARG(uap, flags), l); 702 return error; 703 } 704 705 /* 706 * Do the actual file system unmount. File system is assumed to have 707 * been locked by the caller. 708 * 709 * => Caller gain reference to the mount, explicility for unmount. 710 * => Reference will be dropped in all cases. 711 */ 712 int 713 dounmount(struct mount *mp, int flags, struct lwp *l) 714 { 715 struct vnode *coveredvp; 716 int error; 717 int async; 718 int used_syncer; 719 720 #if NVERIEXEC > 0 721 error = veriexec_unmountchk(mp); 722 if (error) 723 return (error); 724 #endif /* NVERIEXEC > 0 */ 725 726 /* 727 * XXX Freeze syncer. Must do this before locking the 728 * mount point. See dounmount() for details. 729 */ 730 mutex_enter(&syncer_mutex); 731 rw_enter(&mp->mnt_unmounting, RW_WRITER); 732 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 733 rw_exit(&mp->mnt_unmounting); 734 mutex_exit(&syncer_mutex); 735 vfs_destroy(mp); 736 return ENOENT; 737 } 738 739 used_syncer = (mp->mnt_syncer != NULL); 740 741 /* 742 * XXX Syncer must be frozen when we get here. This should really 743 * be done on a per-mountpoint basis, but especially the softdep 744 * code possibly called from the syncer doesn't exactly work on a 745 * per-mountpoint basis, so the softdep code would become a maze 746 * of vfs_busy() calls. 747 * 748 * The caller of dounmount() must acquire syncer_mutex because 749 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 750 * order, and we must preserve that order to avoid deadlock. 751 * 752 * So, if the file system did not use the syncer, now is 753 * the time to release the syncer_mutex. 754 */ 755 if (used_syncer == 0) 756 mutex_exit(&syncer_mutex); 757 758 mp->mnt_iflag |= IMNT_UNMOUNT; 759 async = mp->mnt_flag & MNT_ASYNC; 760 mp->mnt_flag &= ~MNT_ASYNC; 761 cache_purgevfs(mp); /* remove cache entries for this file sys */ 762 if (mp->mnt_syncer != NULL) 763 vfs_deallocate_syncvnode(mp); 764 error = 0; 765 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 766 #if NFSS > 0 767 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 768 #endif 769 if (error == 0) 770 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 771 } 772 vfs_scrubvnlist(mp); 773 if (error == 0 || (flags & MNT_FORCE)) 774 error = VFS_UNMOUNT(mp, flags); 775 if (error) { 776 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 777 (void) vfs_allocate_syncvnode(mp); 778 mp->mnt_iflag &= ~IMNT_UNMOUNT; 779 mp->mnt_flag |= async; 780 rw_exit(&mp->mnt_unmounting); 781 if (used_syncer) 782 mutex_exit(&syncer_mutex); 783 return (error); 784 } 785 vfs_scrubvnlist(mp); 786 mutex_enter(&mountlist_lock); 787 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 788 coveredvp->v_mountedhere = NULL; 789 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 790 mp->mnt_iflag |= IMNT_GONE; 791 mutex_exit(&mountlist_lock); 792 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 793 panic("unmount: dangling vnode"); 794 if (used_syncer) 795 mutex_exit(&syncer_mutex); 796 vfs_hooks_unmount(mp); 797 rw_exit(&mp->mnt_unmounting); 798 vfs_destroy(mp); /* caller provided reference */ 799 vfs_destroy(mp); /* from mount(), final nail in coffin */ 800 if (coveredvp != NULLVP) 801 vrele(coveredvp); 802 return (0); 803 } 804 805 /* 806 * Sync each mounted filesystem. 807 */ 808 #ifdef DEBUG 809 int syncprt = 0; 810 struct ctldebug debug0 = { "syncprt", &syncprt }; 811 #endif 812 813 /* ARGSUSED */ 814 int 815 sys_sync(struct lwp *l, const void *v, register_t *retval) 816 { 817 struct mount *mp, *nmp; 818 int asyncflag; 819 820 if (l == NULL) 821 l = &lwp0; 822 823 mutex_enter(&mountlist_lock); 824 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 825 mp = nmp) { 826 if (vfs_busy(mp, &nmp)) { 827 continue; 828 } 829 mutex_enter(&mp->mnt_updating); 830 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 831 asyncflag = mp->mnt_flag & MNT_ASYNC; 832 mp->mnt_flag &= ~MNT_ASYNC; 833 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 834 if (asyncflag) 835 mp->mnt_flag |= MNT_ASYNC; 836 } 837 mutex_exit(&mp->mnt_updating); 838 vfs_unbusy(mp, false, &nmp); 839 } 840 mutex_exit(&mountlist_lock); 841 #ifdef DEBUG 842 if (syncprt) 843 vfs_bufstats(); 844 #endif /* DEBUG */ 845 return (0); 846 } 847 848 /* 849 * Change filesystem quotas. 850 */ 851 /* ARGSUSED */ 852 int 853 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 854 { 855 /* { 856 syscallarg(const char *) path; 857 syscallarg(int) cmd; 858 syscallarg(int) uid; 859 syscallarg(void *) arg; 860 } */ 861 struct mount *mp; 862 int error; 863 struct nameidata nd; 864 865 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 866 SCARG(uap, path)); 867 if ((error = namei(&nd)) != 0) 868 return (error); 869 mp = nd.ni_vp->v_mount; 870 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 871 SCARG(uap, arg)); 872 vrele(nd.ni_vp); 873 return (error); 874 } 875 876 int 877 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 878 int root) 879 { 880 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 881 int error = 0; 882 883 /* 884 * If MNT_NOWAIT or MNT_LAZY is specified, do not 885 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 886 * overrides MNT_NOWAIT. 887 */ 888 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 889 (flags != MNT_WAIT && flags != 0)) { 890 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 891 goto done; 892 } 893 894 /* Get the filesystem stats now */ 895 memset(sp, 0, sizeof(*sp)); 896 if ((error = VFS_STATVFS(mp, sp)) != 0) { 897 return error; 898 } 899 900 if (cwdi->cwdi_rdir == NULL) 901 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 902 done: 903 if (cwdi->cwdi_rdir != NULL) { 904 size_t len; 905 char *bp; 906 char *path = PNBUF_GET(); 907 908 bp = path + MAXPATHLEN; 909 *--bp = '\0'; 910 rw_enter(&cwdi->cwdi_lock, RW_READER); 911 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 912 MAXPATHLEN / 2, 0, l); 913 rw_exit(&cwdi->cwdi_lock); 914 if (error) { 915 PNBUF_PUT(path); 916 return error; 917 } 918 len = strlen(bp); 919 /* 920 * for mount points that are below our root, we can see 921 * them, so we fix up the pathname and return them. The 922 * rest we cannot see, so we don't allow viewing the 923 * data. 924 */ 925 if (strncmp(bp, sp->f_mntonname, len) == 0) { 926 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 927 sizeof(sp->f_mntonname)); 928 if (sp->f_mntonname[0] == '\0') 929 (void)strlcpy(sp->f_mntonname, "/", 930 sizeof(sp->f_mntonname)); 931 } else { 932 if (root) 933 (void)strlcpy(sp->f_mntonname, "/", 934 sizeof(sp->f_mntonname)); 935 else 936 error = EPERM; 937 } 938 PNBUF_PUT(path); 939 } 940 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 941 return error; 942 } 943 944 /* 945 * Get filesystem statistics by path. 946 */ 947 int 948 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 949 { 950 struct mount *mp; 951 int error; 952 struct nameidata nd; 953 954 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 955 if ((error = namei(&nd)) != 0) 956 return error; 957 mp = nd.ni_vp->v_mount; 958 error = dostatvfs(mp, sb, l, flags, 1); 959 vrele(nd.ni_vp); 960 return error; 961 } 962 963 /* ARGSUSED */ 964 int 965 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 966 { 967 /* { 968 syscallarg(const char *) path; 969 syscallarg(struct statvfs *) buf; 970 syscallarg(int) flags; 971 } */ 972 struct statvfs *sb; 973 int error; 974 975 sb = STATVFSBUF_GET(); 976 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 977 if (error == 0) 978 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 979 STATVFSBUF_PUT(sb); 980 return error; 981 } 982 983 /* 984 * Get filesystem statistics by fd. 985 */ 986 int 987 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 988 { 989 file_t *fp; 990 struct mount *mp; 991 int error; 992 993 /* fd_getvnode() will use the descriptor for us */ 994 if ((error = fd_getvnode(fd, &fp)) != 0) 995 return (error); 996 mp = ((struct vnode *)fp->f_data)->v_mount; 997 error = dostatvfs(mp, sb, curlwp, flags, 1); 998 fd_putfile(fd); 999 return error; 1000 } 1001 1002 /* ARGSUSED */ 1003 int 1004 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1005 { 1006 /* { 1007 syscallarg(int) fd; 1008 syscallarg(struct statvfs *) buf; 1009 syscallarg(int) flags; 1010 } */ 1011 struct statvfs *sb; 1012 int error; 1013 1014 sb = STATVFSBUF_GET(); 1015 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1016 if (error == 0) 1017 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1018 STATVFSBUF_PUT(sb); 1019 return error; 1020 } 1021 1022 1023 /* 1024 * Get statistics on all filesystems. 1025 */ 1026 int 1027 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1028 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1029 register_t *retval) 1030 { 1031 int root = 0; 1032 struct proc *p = l->l_proc; 1033 struct mount *mp, *nmp; 1034 struct statvfs *sb; 1035 size_t count, maxcount; 1036 int error = 0; 1037 1038 sb = STATVFSBUF_GET(); 1039 maxcount = bufsize / entry_sz; 1040 mutex_enter(&mountlist_lock); 1041 count = 0; 1042 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1043 mp = nmp) { 1044 if (vfs_busy(mp, &nmp)) { 1045 continue; 1046 } 1047 if (sfsp && count < maxcount) { 1048 error = dostatvfs(mp, sb, l, flags, 0); 1049 if (error) { 1050 vfs_unbusy(mp, false, &nmp); 1051 continue; 1052 } 1053 error = copyfn(sb, sfsp, entry_sz); 1054 if (error) { 1055 vfs_unbusy(mp, false, NULL); 1056 goto out; 1057 } 1058 sfsp = (char *)sfsp + entry_sz; 1059 root |= strcmp(sb->f_mntonname, "/") == 0; 1060 } 1061 count++; 1062 vfs_unbusy(mp, false, &nmp); 1063 } 1064 mutex_exit(&mountlist_lock); 1065 1066 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1067 /* 1068 * fake a root entry 1069 */ 1070 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1071 sb, l, flags, 1); 1072 if (error != 0) 1073 goto out; 1074 if (sfsp) 1075 error = copyfn(sb, sfsp, entry_sz); 1076 count++; 1077 } 1078 if (sfsp && count > maxcount) 1079 *retval = maxcount; 1080 else 1081 *retval = count; 1082 out: 1083 STATVFSBUF_PUT(sb); 1084 return error; 1085 } 1086 1087 int 1088 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1089 { 1090 /* { 1091 syscallarg(struct statvfs *) buf; 1092 syscallarg(size_t) bufsize; 1093 syscallarg(int) flags; 1094 } */ 1095 1096 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1097 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1098 } 1099 1100 /* 1101 * Change current working directory to a given file descriptor. 1102 */ 1103 /* ARGSUSED */ 1104 int 1105 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1106 { 1107 /* { 1108 syscallarg(int) fd; 1109 } */ 1110 struct proc *p = l->l_proc; 1111 struct cwdinfo *cwdi; 1112 struct vnode *vp, *tdp; 1113 struct mount *mp; 1114 file_t *fp; 1115 int error, fd; 1116 1117 /* fd_getvnode() will use the descriptor for us */ 1118 fd = SCARG(uap, fd); 1119 if ((error = fd_getvnode(fd, &fp)) != 0) 1120 return (error); 1121 vp = fp->f_data; 1122 1123 VREF(vp); 1124 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1125 if (vp->v_type != VDIR) 1126 error = ENOTDIR; 1127 else 1128 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1129 if (error) { 1130 vput(vp); 1131 goto out; 1132 } 1133 while ((mp = vp->v_mountedhere) != NULL) { 1134 error = vfs_busy(mp, NULL); 1135 vput(vp); 1136 if (error != 0) 1137 goto out; 1138 error = VFS_ROOT(mp, &tdp); 1139 vfs_unbusy(mp, false, NULL); 1140 if (error) 1141 goto out; 1142 vp = tdp; 1143 } 1144 VOP_UNLOCK(vp, 0); 1145 1146 /* 1147 * Disallow changing to a directory not under the process's 1148 * current root directory (if there is one). 1149 */ 1150 cwdi = p->p_cwdi; 1151 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1152 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1153 vrele(vp); 1154 error = EPERM; /* operation not permitted */ 1155 } else { 1156 vrele(cwdi->cwdi_cdir); 1157 cwdi->cwdi_cdir = vp; 1158 } 1159 rw_exit(&cwdi->cwdi_lock); 1160 1161 out: 1162 fd_putfile(fd); 1163 return (error); 1164 } 1165 1166 /* 1167 * Change this process's notion of the root directory to a given file 1168 * descriptor. 1169 */ 1170 int 1171 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1172 { 1173 struct proc *p = l->l_proc; 1174 struct cwdinfo *cwdi; 1175 struct vnode *vp; 1176 file_t *fp; 1177 int error, fd = SCARG(uap, fd); 1178 1179 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1180 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1181 return error; 1182 /* fd_getvnode() will use the descriptor for us */ 1183 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1184 return error; 1185 vp = fp->f_data; 1186 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1187 if (vp->v_type != VDIR) 1188 error = ENOTDIR; 1189 else 1190 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1191 VOP_UNLOCK(vp, 0); 1192 if (error) 1193 goto out; 1194 VREF(vp); 1195 1196 /* 1197 * Prevent escaping from chroot by putting the root under 1198 * the working directory. Silently chdir to / if we aren't 1199 * already there. 1200 */ 1201 cwdi = p->p_cwdi; 1202 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1203 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1204 /* 1205 * XXX would be more failsafe to change directory to a 1206 * deadfs node here instead 1207 */ 1208 vrele(cwdi->cwdi_cdir); 1209 VREF(vp); 1210 cwdi->cwdi_cdir = vp; 1211 } 1212 1213 if (cwdi->cwdi_rdir != NULL) 1214 vrele(cwdi->cwdi_rdir); 1215 cwdi->cwdi_rdir = vp; 1216 rw_exit(&cwdi->cwdi_lock); 1217 1218 out: 1219 fd_putfile(fd); 1220 return (error); 1221 } 1222 1223 /* 1224 * Change current working directory (``.''). 1225 */ 1226 /* ARGSUSED */ 1227 int 1228 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1229 { 1230 /* { 1231 syscallarg(const char *) path; 1232 } */ 1233 struct proc *p = l->l_proc; 1234 struct cwdinfo *cwdi; 1235 int error; 1236 struct nameidata nd; 1237 1238 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1239 SCARG(uap, path)); 1240 if ((error = change_dir(&nd, l)) != 0) 1241 return (error); 1242 cwdi = p->p_cwdi; 1243 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1244 vrele(cwdi->cwdi_cdir); 1245 cwdi->cwdi_cdir = nd.ni_vp; 1246 rw_exit(&cwdi->cwdi_lock); 1247 return (0); 1248 } 1249 1250 /* 1251 * Change notion of root (``/'') directory. 1252 */ 1253 /* ARGSUSED */ 1254 int 1255 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1256 { 1257 /* { 1258 syscallarg(const char *) path; 1259 } */ 1260 struct proc *p = l->l_proc; 1261 struct cwdinfo *cwdi; 1262 struct vnode *vp; 1263 int error; 1264 struct nameidata nd; 1265 1266 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1267 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1268 return (error); 1269 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1270 SCARG(uap, path)); 1271 if ((error = change_dir(&nd, l)) != 0) 1272 return (error); 1273 1274 cwdi = p->p_cwdi; 1275 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1276 if (cwdi->cwdi_rdir != NULL) 1277 vrele(cwdi->cwdi_rdir); 1278 vp = nd.ni_vp; 1279 cwdi->cwdi_rdir = vp; 1280 1281 /* 1282 * Prevent escaping from chroot by putting the root under 1283 * the working directory. Silently chdir to / if we aren't 1284 * already there. 1285 */ 1286 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1287 /* 1288 * XXX would be more failsafe to change directory to a 1289 * deadfs node here instead 1290 */ 1291 vrele(cwdi->cwdi_cdir); 1292 VREF(vp); 1293 cwdi->cwdi_cdir = vp; 1294 } 1295 rw_exit(&cwdi->cwdi_lock); 1296 1297 return (0); 1298 } 1299 1300 /* 1301 * Common routine for chroot and chdir. 1302 */ 1303 static int 1304 change_dir(struct nameidata *ndp, struct lwp *l) 1305 { 1306 struct vnode *vp; 1307 int error; 1308 1309 if ((error = namei(ndp)) != 0) 1310 return (error); 1311 vp = ndp->ni_vp; 1312 if (vp->v_type != VDIR) 1313 error = ENOTDIR; 1314 else 1315 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1316 1317 if (error) 1318 vput(vp); 1319 else 1320 VOP_UNLOCK(vp, 0); 1321 return (error); 1322 } 1323 1324 /* 1325 * Check permissions, allocate an open file structure, 1326 * and call the device open routine if any. 1327 */ 1328 int 1329 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1330 { 1331 /* { 1332 syscallarg(const char *) path; 1333 syscallarg(int) flags; 1334 syscallarg(int) mode; 1335 } */ 1336 struct proc *p = l->l_proc; 1337 struct cwdinfo *cwdi = p->p_cwdi; 1338 file_t *fp; 1339 struct vnode *vp; 1340 int flags, cmode; 1341 int type, indx, error; 1342 struct flock lf; 1343 struct nameidata nd; 1344 1345 flags = FFLAGS(SCARG(uap, flags)); 1346 if ((flags & (FREAD | FWRITE)) == 0) 1347 return (EINVAL); 1348 if ((error = fd_allocfile(&fp, &indx)) != 0) 1349 return (error); 1350 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1351 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1352 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1353 SCARG(uap, path)); 1354 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1355 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1356 fd_abort(p, fp, indx); 1357 if ((error == EDUPFD || error == EMOVEFD) && 1358 l->l_dupfd >= 0 && /* XXX from fdopen */ 1359 (error = 1360 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1361 *retval = indx; 1362 return (0); 1363 } 1364 if (error == ERESTART) 1365 error = EINTR; 1366 return (error); 1367 } 1368 1369 l->l_dupfd = 0; 1370 vp = nd.ni_vp; 1371 fp->f_flag = flags & FMASK; 1372 fp->f_type = DTYPE_VNODE; 1373 fp->f_ops = &vnops; 1374 fp->f_data = vp; 1375 if (flags & (O_EXLOCK | O_SHLOCK)) { 1376 lf.l_whence = SEEK_SET; 1377 lf.l_start = 0; 1378 lf.l_len = 0; 1379 if (flags & O_EXLOCK) 1380 lf.l_type = F_WRLCK; 1381 else 1382 lf.l_type = F_RDLCK; 1383 type = F_FLOCK; 1384 if ((flags & FNONBLOCK) == 0) 1385 type |= F_WAIT; 1386 VOP_UNLOCK(vp, 0); 1387 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1388 if (error) { 1389 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1390 fd_abort(p, fp, indx); 1391 return (error); 1392 } 1393 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1394 atomic_or_uint(&fp->f_flag, FHASLOCK); 1395 } 1396 VOP_UNLOCK(vp, 0); 1397 *retval = indx; 1398 fd_affix(p, fp, indx); 1399 return (0); 1400 } 1401 1402 static void 1403 vfs__fhfree(fhandle_t *fhp) 1404 { 1405 size_t fhsize; 1406 1407 if (fhp == NULL) { 1408 return; 1409 } 1410 fhsize = FHANDLE_SIZE(fhp); 1411 kmem_free(fhp, fhsize); 1412 } 1413 1414 /* 1415 * vfs_composefh: compose a filehandle. 1416 */ 1417 1418 int 1419 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1420 { 1421 struct mount *mp; 1422 struct fid *fidp; 1423 int error; 1424 size_t needfhsize; 1425 size_t fidsize; 1426 1427 mp = vp->v_mount; 1428 fidp = NULL; 1429 if (*fh_size < FHANDLE_SIZE_MIN) { 1430 fidsize = 0; 1431 } else { 1432 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1433 if (fhp != NULL) { 1434 memset(fhp, 0, *fh_size); 1435 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1436 fidp = &fhp->fh_fid; 1437 } 1438 } 1439 error = VFS_VPTOFH(vp, fidp, &fidsize); 1440 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1441 if (error == 0 && *fh_size < needfhsize) { 1442 error = E2BIG; 1443 } 1444 *fh_size = needfhsize; 1445 return error; 1446 } 1447 1448 int 1449 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1450 { 1451 struct mount *mp; 1452 fhandle_t *fhp; 1453 size_t fhsize; 1454 size_t fidsize; 1455 int error; 1456 1457 *fhpp = NULL; 1458 mp = vp->v_mount; 1459 fidsize = 0; 1460 error = VFS_VPTOFH(vp, NULL, &fidsize); 1461 KASSERT(error != 0); 1462 if (error != E2BIG) { 1463 goto out; 1464 } 1465 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1466 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1467 if (fhp == NULL) { 1468 error = ENOMEM; 1469 goto out; 1470 } 1471 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1472 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1473 if (error == 0) { 1474 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1475 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1476 *fhpp = fhp; 1477 } else { 1478 kmem_free(fhp, fhsize); 1479 } 1480 out: 1481 return error; 1482 } 1483 1484 void 1485 vfs_composefh_free(fhandle_t *fhp) 1486 { 1487 1488 vfs__fhfree(fhp); 1489 } 1490 1491 /* 1492 * vfs_fhtovp: lookup a vnode by a filehandle. 1493 */ 1494 1495 int 1496 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1497 { 1498 struct mount *mp; 1499 int error; 1500 1501 *vpp = NULL; 1502 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1503 if (mp == NULL) { 1504 error = ESTALE; 1505 goto out; 1506 } 1507 if (mp->mnt_op->vfs_fhtovp == NULL) { 1508 error = EOPNOTSUPP; 1509 goto out; 1510 } 1511 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1512 out: 1513 return error; 1514 } 1515 1516 /* 1517 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1518 * the needed size. 1519 */ 1520 1521 int 1522 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1523 { 1524 fhandle_t *fhp; 1525 int error; 1526 1527 *fhpp = NULL; 1528 if (fhsize > FHANDLE_SIZE_MAX) { 1529 return EINVAL; 1530 } 1531 if (fhsize < FHANDLE_SIZE_MIN) { 1532 return EINVAL; 1533 } 1534 again: 1535 fhp = kmem_alloc(fhsize, KM_SLEEP); 1536 if (fhp == NULL) { 1537 return ENOMEM; 1538 } 1539 error = copyin(ufhp, fhp, fhsize); 1540 if (error == 0) { 1541 /* XXX this check shouldn't be here */ 1542 if (FHANDLE_SIZE(fhp) == fhsize) { 1543 *fhpp = fhp; 1544 return 0; 1545 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1546 /* 1547 * a kludge for nfsv2 padded handles. 1548 */ 1549 size_t sz; 1550 1551 sz = FHANDLE_SIZE(fhp); 1552 kmem_free(fhp, fhsize); 1553 fhsize = sz; 1554 goto again; 1555 } else { 1556 /* 1557 * userland told us wrong size. 1558 */ 1559 error = EINVAL; 1560 } 1561 } 1562 kmem_free(fhp, fhsize); 1563 return error; 1564 } 1565 1566 void 1567 vfs_copyinfh_free(fhandle_t *fhp) 1568 { 1569 1570 vfs__fhfree(fhp); 1571 } 1572 1573 /* 1574 * Get file handle system call 1575 */ 1576 int 1577 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1578 { 1579 /* { 1580 syscallarg(char *) fname; 1581 syscallarg(fhandle_t *) fhp; 1582 syscallarg(size_t *) fh_size; 1583 } */ 1584 struct vnode *vp; 1585 fhandle_t *fh; 1586 int error; 1587 struct nameidata nd; 1588 size_t sz; 1589 size_t usz; 1590 1591 /* 1592 * Must be super user 1593 */ 1594 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1595 0, NULL, NULL, NULL); 1596 if (error) 1597 return (error); 1598 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1599 SCARG(uap, fname)); 1600 error = namei(&nd); 1601 if (error) 1602 return (error); 1603 vp = nd.ni_vp; 1604 error = vfs_composefh_alloc(vp, &fh); 1605 vput(vp); 1606 if (error != 0) { 1607 goto out; 1608 } 1609 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1610 if (error != 0) { 1611 goto out; 1612 } 1613 sz = FHANDLE_SIZE(fh); 1614 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1615 if (error != 0) { 1616 goto out; 1617 } 1618 if (usz >= sz) { 1619 error = copyout(fh, SCARG(uap, fhp), sz); 1620 } else { 1621 error = E2BIG; 1622 } 1623 out: 1624 vfs_composefh_free(fh); 1625 return (error); 1626 } 1627 1628 /* 1629 * Open a file given a file handle. 1630 * 1631 * Check permissions, allocate an open file structure, 1632 * and call the device open routine if any. 1633 */ 1634 1635 int 1636 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1637 register_t *retval) 1638 { 1639 file_t *fp; 1640 struct vnode *vp = NULL; 1641 kauth_cred_t cred = l->l_cred; 1642 file_t *nfp; 1643 int type, indx, error=0; 1644 struct flock lf; 1645 struct vattr va; 1646 fhandle_t *fh; 1647 int flags; 1648 proc_t *p; 1649 1650 p = curproc; 1651 1652 /* 1653 * Must be super user 1654 */ 1655 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1656 0, NULL, NULL, NULL))) 1657 return (error); 1658 1659 flags = FFLAGS(oflags); 1660 if ((flags & (FREAD | FWRITE)) == 0) 1661 return (EINVAL); 1662 if ((flags & O_CREAT)) 1663 return (EINVAL); 1664 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1665 return (error); 1666 fp = nfp; 1667 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1668 if (error != 0) { 1669 goto bad; 1670 } 1671 error = vfs_fhtovp(fh, &vp); 1672 if (error != 0) { 1673 goto bad; 1674 } 1675 1676 /* Now do an effective vn_open */ 1677 1678 if (vp->v_type == VSOCK) { 1679 error = EOPNOTSUPP; 1680 goto bad; 1681 } 1682 error = vn_openchk(vp, cred, flags); 1683 if (error != 0) 1684 goto bad; 1685 if (flags & O_TRUNC) { 1686 VOP_UNLOCK(vp, 0); /* XXX */ 1687 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1688 VATTR_NULL(&va); 1689 va.va_size = 0; 1690 error = VOP_SETATTR(vp, &va, cred); 1691 if (error) 1692 goto bad; 1693 } 1694 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1695 goto bad; 1696 if (flags & FWRITE) { 1697 mutex_enter(&vp->v_interlock); 1698 vp->v_writecount++; 1699 mutex_exit(&vp->v_interlock); 1700 } 1701 1702 /* done with modified vn_open, now finish what sys_open does. */ 1703 1704 fp->f_flag = flags & FMASK; 1705 fp->f_type = DTYPE_VNODE; 1706 fp->f_ops = &vnops; 1707 fp->f_data = vp; 1708 if (flags & (O_EXLOCK | O_SHLOCK)) { 1709 lf.l_whence = SEEK_SET; 1710 lf.l_start = 0; 1711 lf.l_len = 0; 1712 if (flags & O_EXLOCK) 1713 lf.l_type = F_WRLCK; 1714 else 1715 lf.l_type = F_RDLCK; 1716 type = F_FLOCK; 1717 if ((flags & FNONBLOCK) == 0) 1718 type |= F_WAIT; 1719 VOP_UNLOCK(vp, 0); 1720 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1721 if (error) { 1722 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1723 fd_abort(p, fp, indx); 1724 return (error); 1725 } 1726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1727 atomic_or_uint(&fp->f_flag, FHASLOCK); 1728 } 1729 VOP_UNLOCK(vp, 0); 1730 *retval = indx; 1731 fd_affix(p, fp, indx); 1732 vfs_copyinfh_free(fh); 1733 return (0); 1734 1735 bad: 1736 fd_abort(p, fp, indx); 1737 if (vp != NULL) 1738 vput(vp); 1739 vfs_copyinfh_free(fh); 1740 return (error); 1741 } 1742 1743 int 1744 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1745 { 1746 /* { 1747 syscallarg(const void *) fhp; 1748 syscallarg(size_t) fh_size; 1749 syscallarg(int) flags; 1750 } */ 1751 1752 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1753 SCARG(uap, flags), retval); 1754 } 1755 1756 int 1757 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1758 { 1759 int error; 1760 fhandle_t *fh; 1761 struct vnode *vp; 1762 1763 /* 1764 * Must be super user 1765 */ 1766 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1767 0, NULL, NULL, NULL))) 1768 return (error); 1769 1770 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1771 if (error != 0) 1772 return error; 1773 1774 error = vfs_fhtovp(fh, &vp); 1775 vfs_copyinfh_free(fh); 1776 if (error != 0) 1777 return error; 1778 1779 error = vn_stat(vp, sb); 1780 vput(vp); 1781 return error; 1782 } 1783 1784 1785 /* ARGSUSED */ 1786 int 1787 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval) 1788 { 1789 /* { 1790 syscallarg(const void *) fhp; 1791 syscallarg(size_t) fh_size; 1792 syscallarg(struct stat *) sb; 1793 } */ 1794 struct stat sb; 1795 int error; 1796 1797 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1798 if (error) 1799 return error; 1800 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1801 } 1802 1803 int 1804 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1805 int flags) 1806 { 1807 fhandle_t *fh; 1808 struct mount *mp; 1809 struct vnode *vp; 1810 int error; 1811 1812 /* 1813 * Must be super user 1814 */ 1815 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1816 0, NULL, NULL, NULL))) 1817 return error; 1818 1819 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1820 if (error != 0) 1821 return error; 1822 1823 error = vfs_fhtovp(fh, &vp); 1824 vfs_copyinfh_free(fh); 1825 if (error != 0) 1826 return error; 1827 1828 mp = vp->v_mount; 1829 error = dostatvfs(mp, sb, l, flags, 1); 1830 vput(vp); 1831 return error; 1832 } 1833 1834 /* ARGSUSED */ 1835 int 1836 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1837 { 1838 /* { 1839 syscallarg(const void *) fhp; 1840 syscallarg(size_t) fh_size; 1841 syscallarg(struct statvfs *) buf; 1842 syscallarg(int) flags; 1843 } */ 1844 struct statvfs *sb = STATVFSBUF_GET(); 1845 int error; 1846 1847 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1848 SCARG(uap, flags)); 1849 if (error == 0) 1850 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1851 STATVFSBUF_PUT(sb); 1852 return error; 1853 } 1854 1855 /* 1856 * Create a special file. 1857 */ 1858 /* ARGSUSED */ 1859 int 1860 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval) 1861 { 1862 /* { 1863 syscallarg(const char *) path; 1864 syscallarg(int) mode; 1865 syscallarg(int) dev; 1866 } */ 1867 struct proc *p = l->l_proc; 1868 struct vnode *vp; 1869 struct vattr vattr; 1870 int error, optype; 1871 struct nameidata nd; 1872 char *path; 1873 const char *cpath; 1874 enum uio_seg seg = UIO_USERSPACE; 1875 1876 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1877 0, NULL, NULL, NULL)) != 0) 1878 return (error); 1879 1880 optype = VOP_MKNOD_DESCOFFSET; 1881 1882 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1883 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1884 1885 if ((error = namei(&nd)) != 0) 1886 goto out; 1887 vp = nd.ni_vp; 1888 if (vp != NULL) 1889 error = EEXIST; 1890 else { 1891 VATTR_NULL(&vattr); 1892 /* We will read cwdi->cwdi_cmask unlocked. */ 1893 vattr.va_mode = 1894 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1895 vattr.va_rdev = SCARG(uap, dev); 1896 1897 switch (SCARG(uap, mode) & S_IFMT) { 1898 case S_IFMT: /* used by badsect to flag bad sectors */ 1899 vattr.va_type = VBAD; 1900 break; 1901 case S_IFCHR: 1902 vattr.va_type = VCHR; 1903 break; 1904 case S_IFBLK: 1905 vattr.va_type = VBLK; 1906 break; 1907 case S_IFWHT: 1908 optype = VOP_WHITEOUT_DESCOFFSET; 1909 break; 1910 case S_IFREG: 1911 #if NVERIEXEC > 0 1912 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1913 O_CREAT); 1914 #endif /* NVERIEXEC > 0 */ 1915 vattr.va_type = VREG; 1916 vattr.va_rdev = VNOVAL; 1917 optype = VOP_CREATE_DESCOFFSET; 1918 break; 1919 default: 1920 error = EINVAL; 1921 break; 1922 } 1923 } 1924 if (!error) { 1925 switch (optype) { 1926 case VOP_WHITEOUT_DESCOFFSET: 1927 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1928 if (error) 1929 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1930 vput(nd.ni_dvp); 1931 break; 1932 1933 case VOP_MKNOD_DESCOFFSET: 1934 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1935 &nd.ni_cnd, &vattr); 1936 if (error == 0) 1937 vput(nd.ni_vp); 1938 break; 1939 1940 case VOP_CREATE_DESCOFFSET: 1941 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1942 &nd.ni_cnd, &vattr); 1943 if (error == 0) 1944 vput(nd.ni_vp); 1945 break; 1946 } 1947 } else { 1948 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1949 if (nd.ni_dvp == vp) 1950 vrele(nd.ni_dvp); 1951 else 1952 vput(nd.ni_dvp); 1953 if (vp) 1954 vrele(vp); 1955 } 1956 out: 1957 VERIEXEC_PATH_PUT(path); 1958 return (error); 1959 } 1960 1961 /* 1962 * Create a named pipe. 1963 */ 1964 /* ARGSUSED */ 1965 int 1966 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1967 { 1968 /* { 1969 syscallarg(const char *) path; 1970 syscallarg(int) mode; 1971 } */ 1972 struct proc *p = l->l_proc; 1973 struct vattr vattr; 1974 int error; 1975 struct nameidata nd; 1976 1977 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1978 SCARG(uap, path)); 1979 if ((error = namei(&nd)) != 0) 1980 return (error); 1981 if (nd.ni_vp != NULL) { 1982 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1983 if (nd.ni_dvp == nd.ni_vp) 1984 vrele(nd.ni_dvp); 1985 else 1986 vput(nd.ni_dvp); 1987 vrele(nd.ni_vp); 1988 return (EEXIST); 1989 } 1990 VATTR_NULL(&vattr); 1991 vattr.va_type = VFIFO; 1992 /* We will read cwdi->cwdi_cmask unlocked. */ 1993 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1994 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1995 if (error == 0) 1996 vput(nd.ni_vp); 1997 return (error); 1998 } 1999 2000 /* 2001 * Make a hard file link. 2002 */ 2003 /* ARGSUSED */ 2004 int 2005 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2006 { 2007 /* { 2008 syscallarg(const char *) path; 2009 syscallarg(const char *) link; 2010 } */ 2011 struct vnode *vp; 2012 struct nameidata nd; 2013 int error; 2014 2015 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2016 SCARG(uap, path)); 2017 if ((error = namei(&nd)) != 0) 2018 return (error); 2019 vp = nd.ni_vp; 2020 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2021 SCARG(uap, link)); 2022 if ((error = namei(&nd)) != 0) 2023 goto out; 2024 if (nd.ni_vp) { 2025 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2026 if (nd.ni_dvp == nd.ni_vp) 2027 vrele(nd.ni_dvp); 2028 else 2029 vput(nd.ni_dvp); 2030 vrele(nd.ni_vp); 2031 error = EEXIST; 2032 goto out; 2033 } 2034 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2035 out: 2036 vrele(vp); 2037 return (error); 2038 } 2039 2040 /* 2041 * Make a symbolic link. 2042 */ 2043 /* ARGSUSED */ 2044 int 2045 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2046 { 2047 /* { 2048 syscallarg(const char *) path; 2049 syscallarg(const char *) link; 2050 } */ 2051 struct proc *p = l->l_proc; 2052 struct vattr vattr; 2053 char *path; 2054 int error; 2055 struct nameidata nd; 2056 2057 path = PNBUF_GET(); 2058 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2059 if (error) 2060 goto out; 2061 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2062 SCARG(uap, link)); 2063 if ((error = namei(&nd)) != 0) 2064 goto out; 2065 if (nd.ni_vp) { 2066 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2067 if (nd.ni_dvp == nd.ni_vp) 2068 vrele(nd.ni_dvp); 2069 else 2070 vput(nd.ni_dvp); 2071 vrele(nd.ni_vp); 2072 error = EEXIST; 2073 goto out; 2074 } 2075 VATTR_NULL(&vattr); 2076 vattr.va_type = VLNK; 2077 /* We will read cwdi->cwdi_cmask unlocked. */ 2078 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2079 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2080 if (error == 0) 2081 vput(nd.ni_vp); 2082 out: 2083 PNBUF_PUT(path); 2084 return (error); 2085 } 2086 2087 /* 2088 * Delete a whiteout from the filesystem. 2089 */ 2090 /* ARGSUSED */ 2091 int 2092 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2093 { 2094 /* { 2095 syscallarg(const char *) path; 2096 } */ 2097 int error; 2098 struct nameidata nd; 2099 2100 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2101 UIO_USERSPACE, SCARG(uap, path)); 2102 error = namei(&nd); 2103 if (error) 2104 return (error); 2105 2106 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2107 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2108 if (nd.ni_dvp == nd.ni_vp) 2109 vrele(nd.ni_dvp); 2110 else 2111 vput(nd.ni_dvp); 2112 if (nd.ni_vp) 2113 vrele(nd.ni_vp); 2114 return (EEXIST); 2115 } 2116 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2117 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2118 vput(nd.ni_dvp); 2119 return (error); 2120 } 2121 2122 /* 2123 * Delete a name from the filesystem. 2124 */ 2125 /* ARGSUSED */ 2126 int 2127 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2128 { 2129 /* { 2130 syscallarg(const char *) path; 2131 } */ 2132 2133 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2134 } 2135 2136 int 2137 do_sys_unlink(const char *arg, enum uio_seg seg) 2138 { 2139 struct vnode *vp; 2140 int error; 2141 struct nameidata nd; 2142 kauth_cred_t cred; 2143 char *path; 2144 const char *cpath; 2145 2146 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2147 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2148 2149 if ((error = namei(&nd)) != 0) 2150 goto out; 2151 vp = nd.ni_vp; 2152 2153 /* 2154 * The root of a mounted filesystem cannot be deleted. 2155 */ 2156 if (vp->v_vflag & VV_ROOT) { 2157 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2158 if (nd.ni_dvp == vp) 2159 vrele(nd.ni_dvp); 2160 else 2161 vput(nd.ni_dvp); 2162 vput(vp); 2163 error = EBUSY; 2164 goto out; 2165 } 2166 2167 #if NVERIEXEC > 0 2168 /* Handle remove requests for veriexec entries. */ 2169 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2170 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2171 if (nd.ni_dvp == vp) 2172 vrele(nd.ni_dvp); 2173 else 2174 vput(nd.ni_dvp); 2175 vput(vp); 2176 goto out; 2177 } 2178 #endif /* NVERIEXEC > 0 */ 2179 2180 cred = kauth_cred_get(); 2181 #ifdef FILEASSOC 2182 (void)fileassoc_file_delete(vp); 2183 #endif /* FILEASSOC */ 2184 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2185 out: 2186 VERIEXEC_PATH_PUT(path); 2187 return (error); 2188 } 2189 2190 /* 2191 * Reposition read/write file offset. 2192 */ 2193 int 2194 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2195 { 2196 /* { 2197 syscallarg(int) fd; 2198 syscallarg(int) pad; 2199 syscallarg(off_t) offset; 2200 syscallarg(int) whence; 2201 } */ 2202 kauth_cred_t cred = l->l_cred; 2203 file_t *fp; 2204 struct vnode *vp; 2205 struct vattr vattr; 2206 off_t newoff; 2207 int error, fd; 2208 2209 fd = SCARG(uap, fd); 2210 2211 if ((fp = fd_getfile(fd)) == NULL) 2212 return (EBADF); 2213 2214 vp = fp->f_data; 2215 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2216 error = ESPIPE; 2217 goto out; 2218 } 2219 2220 switch (SCARG(uap, whence)) { 2221 case SEEK_CUR: 2222 newoff = fp->f_offset + SCARG(uap, offset); 2223 break; 2224 case SEEK_END: 2225 error = VOP_GETATTR(vp, &vattr, cred); 2226 if (error) { 2227 goto out; 2228 } 2229 newoff = SCARG(uap, offset) + vattr.va_size; 2230 break; 2231 case SEEK_SET: 2232 newoff = SCARG(uap, offset); 2233 break; 2234 default: 2235 error = EINVAL; 2236 goto out; 2237 } 2238 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2239 *(off_t *)retval = fp->f_offset = newoff; 2240 } 2241 out: 2242 fd_putfile(fd); 2243 return (error); 2244 } 2245 2246 /* 2247 * Positional read system call. 2248 */ 2249 int 2250 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2251 { 2252 /* { 2253 syscallarg(int) fd; 2254 syscallarg(void *) buf; 2255 syscallarg(size_t) nbyte; 2256 syscallarg(off_t) offset; 2257 } */ 2258 file_t *fp; 2259 struct vnode *vp; 2260 off_t offset; 2261 int error, fd = SCARG(uap, fd); 2262 2263 if ((fp = fd_getfile(fd)) == NULL) 2264 return (EBADF); 2265 2266 if ((fp->f_flag & FREAD) == 0) { 2267 fd_putfile(fd); 2268 return (EBADF); 2269 } 2270 2271 vp = fp->f_data; 2272 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2273 error = ESPIPE; 2274 goto out; 2275 } 2276 2277 offset = SCARG(uap, offset); 2278 2279 /* 2280 * XXX This works because no file systems actually 2281 * XXX take any action on the seek operation. 2282 */ 2283 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2284 goto out; 2285 2286 /* dofileread() will unuse the descriptor for us */ 2287 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2288 &offset, 0, retval)); 2289 2290 out: 2291 fd_putfile(fd); 2292 return (error); 2293 } 2294 2295 /* 2296 * Positional scatter read system call. 2297 */ 2298 int 2299 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2300 { 2301 /* { 2302 syscallarg(int) fd; 2303 syscallarg(const struct iovec *) iovp; 2304 syscallarg(int) iovcnt; 2305 syscallarg(off_t) offset; 2306 } */ 2307 off_t offset = SCARG(uap, offset); 2308 2309 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2310 SCARG(uap, iovcnt), &offset, 0, retval); 2311 } 2312 2313 /* 2314 * Positional write system call. 2315 */ 2316 int 2317 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2318 { 2319 /* { 2320 syscallarg(int) fd; 2321 syscallarg(const void *) buf; 2322 syscallarg(size_t) nbyte; 2323 syscallarg(off_t) offset; 2324 } */ 2325 file_t *fp; 2326 struct vnode *vp; 2327 off_t offset; 2328 int error, fd = SCARG(uap, fd); 2329 2330 if ((fp = fd_getfile(fd)) == NULL) 2331 return (EBADF); 2332 2333 if ((fp->f_flag & FWRITE) == 0) { 2334 fd_putfile(fd); 2335 return (EBADF); 2336 } 2337 2338 vp = fp->f_data; 2339 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2340 error = ESPIPE; 2341 goto out; 2342 } 2343 2344 offset = SCARG(uap, offset); 2345 2346 /* 2347 * XXX This works because no file systems actually 2348 * XXX take any action on the seek operation. 2349 */ 2350 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2351 goto out; 2352 2353 /* dofilewrite() will unuse the descriptor for us */ 2354 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2355 &offset, 0, retval)); 2356 2357 out: 2358 fd_putfile(fd); 2359 return (error); 2360 } 2361 2362 /* 2363 * Positional gather write system call. 2364 */ 2365 int 2366 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2367 { 2368 /* { 2369 syscallarg(int) fd; 2370 syscallarg(const struct iovec *) iovp; 2371 syscallarg(int) iovcnt; 2372 syscallarg(off_t) offset; 2373 } */ 2374 off_t offset = SCARG(uap, offset); 2375 2376 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2377 SCARG(uap, iovcnt), &offset, 0, retval); 2378 } 2379 2380 /* 2381 * Check access permissions. 2382 */ 2383 int 2384 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2385 { 2386 /* { 2387 syscallarg(const char *) path; 2388 syscallarg(int) flags; 2389 } */ 2390 kauth_cred_t cred; 2391 struct vnode *vp; 2392 int error, flags; 2393 struct nameidata nd; 2394 2395 cred = kauth_cred_dup(l->l_cred); 2396 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2397 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2398 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2399 SCARG(uap, path)); 2400 /* Override default credentials */ 2401 nd.ni_cnd.cn_cred = cred; 2402 if ((error = namei(&nd)) != 0) 2403 goto out; 2404 vp = nd.ni_vp; 2405 2406 /* Flags == 0 means only check for existence. */ 2407 if (SCARG(uap, flags)) { 2408 flags = 0; 2409 if (SCARG(uap, flags) & R_OK) 2410 flags |= VREAD; 2411 if (SCARG(uap, flags) & W_OK) 2412 flags |= VWRITE; 2413 if (SCARG(uap, flags) & X_OK) 2414 flags |= VEXEC; 2415 2416 error = VOP_ACCESS(vp, flags, cred); 2417 if (!error && (flags & VWRITE)) 2418 error = vn_writechk(vp); 2419 } 2420 vput(vp); 2421 out: 2422 kauth_cred_free(cred); 2423 return (error); 2424 } 2425 2426 /* 2427 * Common code for all sys_stat functions, including compat versions. 2428 */ 2429 int 2430 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2431 { 2432 int error; 2433 struct nameidata nd; 2434 2435 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2436 UIO_USERSPACE, path); 2437 error = namei(&nd); 2438 if (error != 0) 2439 return error; 2440 error = vn_stat(nd.ni_vp, sb); 2441 vput(nd.ni_vp); 2442 return error; 2443 } 2444 2445 /* 2446 * Get file status; this version follows links. 2447 */ 2448 /* ARGSUSED */ 2449 int 2450 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval) 2451 { 2452 /* { 2453 syscallarg(const char *) path; 2454 syscallarg(struct stat *) ub; 2455 } */ 2456 struct stat sb; 2457 int error; 2458 2459 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2460 if (error) 2461 return error; 2462 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2463 } 2464 2465 /* 2466 * Get file status; this version does not follow links. 2467 */ 2468 /* ARGSUSED */ 2469 int 2470 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval) 2471 { 2472 /* { 2473 syscallarg(const char *) path; 2474 syscallarg(struct stat *) ub; 2475 } */ 2476 struct stat sb; 2477 int error; 2478 2479 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2480 if (error) 2481 return error; 2482 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2483 } 2484 2485 /* 2486 * Get configurable pathname variables. 2487 */ 2488 /* ARGSUSED */ 2489 int 2490 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2491 { 2492 /* { 2493 syscallarg(const char *) path; 2494 syscallarg(int) name; 2495 } */ 2496 int error; 2497 struct nameidata nd; 2498 2499 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2500 SCARG(uap, path)); 2501 if ((error = namei(&nd)) != 0) 2502 return (error); 2503 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2504 vput(nd.ni_vp); 2505 return (error); 2506 } 2507 2508 /* 2509 * Return target name of a symbolic link. 2510 */ 2511 /* ARGSUSED */ 2512 int 2513 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2514 { 2515 /* { 2516 syscallarg(const char *) path; 2517 syscallarg(char *) buf; 2518 syscallarg(size_t) count; 2519 } */ 2520 struct vnode *vp; 2521 struct iovec aiov; 2522 struct uio auio; 2523 int error; 2524 struct nameidata nd; 2525 2526 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2527 SCARG(uap, path)); 2528 if ((error = namei(&nd)) != 0) 2529 return (error); 2530 vp = nd.ni_vp; 2531 if (vp->v_type != VLNK) 2532 error = EINVAL; 2533 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2534 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2535 aiov.iov_base = SCARG(uap, buf); 2536 aiov.iov_len = SCARG(uap, count); 2537 auio.uio_iov = &aiov; 2538 auio.uio_iovcnt = 1; 2539 auio.uio_offset = 0; 2540 auio.uio_rw = UIO_READ; 2541 KASSERT(l == curlwp); 2542 auio.uio_vmspace = l->l_proc->p_vmspace; 2543 auio.uio_resid = SCARG(uap, count); 2544 error = VOP_READLINK(vp, &auio, l->l_cred); 2545 } 2546 vput(vp); 2547 *retval = SCARG(uap, count) - auio.uio_resid; 2548 return (error); 2549 } 2550 2551 /* 2552 * Change flags of a file given a path name. 2553 */ 2554 /* ARGSUSED */ 2555 int 2556 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2557 { 2558 /* { 2559 syscallarg(const char *) path; 2560 syscallarg(u_long) flags; 2561 } */ 2562 struct vnode *vp; 2563 int error; 2564 struct nameidata nd; 2565 2566 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2567 SCARG(uap, path)); 2568 if ((error = namei(&nd)) != 0) 2569 return (error); 2570 vp = nd.ni_vp; 2571 error = change_flags(vp, SCARG(uap, flags), l); 2572 vput(vp); 2573 return (error); 2574 } 2575 2576 /* 2577 * Change flags of a file given a file descriptor. 2578 */ 2579 /* ARGSUSED */ 2580 int 2581 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2582 { 2583 /* { 2584 syscallarg(int) fd; 2585 syscallarg(u_long) flags; 2586 } */ 2587 struct vnode *vp; 2588 file_t *fp; 2589 int error; 2590 2591 /* fd_getvnode() will use the descriptor for us */ 2592 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2593 return (error); 2594 vp = fp->f_data; 2595 error = change_flags(vp, SCARG(uap, flags), l); 2596 VOP_UNLOCK(vp, 0); 2597 fd_putfile(SCARG(uap, fd)); 2598 return (error); 2599 } 2600 2601 /* 2602 * Change flags of a file given a path name; this version does 2603 * not follow links. 2604 */ 2605 int 2606 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2607 { 2608 /* { 2609 syscallarg(const char *) path; 2610 syscallarg(u_long) flags; 2611 } */ 2612 struct vnode *vp; 2613 int error; 2614 struct nameidata nd; 2615 2616 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2617 SCARG(uap, path)); 2618 if ((error = namei(&nd)) != 0) 2619 return (error); 2620 vp = nd.ni_vp; 2621 error = change_flags(vp, SCARG(uap, flags), l); 2622 vput(vp); 2623 return (error); 2624 } 2625 2626 /* 2627 * Common routine to change flags of a file. 2628 */ 2629 int 2630 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2631 { 2632 struct vattr vattr; 2633 int error; 2634 2635 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2636 /* 2637 * Non-superusers cannot change the flags on devices, even if they 2638 * own them. 2639 */ 2640 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2641 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2642 goto out; 2643 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2644 error = EINVAL; 2645 goto out; 2646 } 2647 } 2648 VATTR_NULL(&vattr); 2649 vattr.va_flags = flags; 2650 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2651 out: 2652 return (error); 2653 } 2654 2655 /* 2656 * Change mode of a file given path name; this version follows links. 2657 */ 2658 /* ARGSUSED */ 2659 int 2660 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2661 { 2662 /* { 2663 syscallarg(const char *) path; 2664 syscallarg(int) mode; 2665 } */ 2666 int error; 2667 struct nameidata nd; 2668 2669 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2670 SCARG(uap, path)); 2671 if ((error = namei(&nd)) != 0) 2672 return (error); 2673 2674 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2675 2676 vrele(nd.ni_vp); 2677 return (error); 2678 } 2679 2680 /* 2681 * Change mode of a file given a file descriptor. 2682 */ 2683 /* ARGSUSED */ 2684 int 2685 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2686 { 2687 /* { 2688 syscallarg(int) fd; 2689 syscallarg(int) mode; 2690 } */ 2691 file_t *fp; 2692 int error; 2693 2694 /* fd_getvnode() will use the descriptor for us */ 2695 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2696 return (error); 2697 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2698 fd_putfile(SCARG(uap, fd)); 2699 return (error); 2700 } 2701 2702 /* 2703 * Change mode of a file given path name; this version does not follow links. 2704 */ 2705 /* ARGSUSED */ 2706 int 2707 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2708 { 2709 /* { 2710 syscallarg(const char *) path; 2711 syscallarg(int) mode; 2712 } */ 2713 int error; 2714 struct nameidata nd; 2715 2716 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2717 SCARG(uap, path)); 2718 if ((error = namei(&nd)) != 0) 2719 return (error); 2720 2721 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2722 2723 vrele(nd.ni_vp); 2724 return (error); 2725 } 2726 2727 /* 2728 * Common routine to set mode given a vnode. 2729 */ 2730 static int 2731 change_mode(struct vnode *vp, int mode, struct lwp *l) 2732 { 2733 struct vattr vattr; 2734 int error; 2735 2736 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2737 VATTR_NULL(&vattr); 2738 vattr.va_mode = mode & ALLPERMS; 2739 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2740 VOP_UNLOCK(vp, 0); 2741 return (error); 2742 } 2743 2744 /* 2745 * Set ownership given a path name; this version follows links. 2746 */ 2747 /* ARGSUSED */ 2748 int 2749 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2750 { 2751 /* { 2752 syscallarg(const char *) path; 2753 syscallarg(uid_t) uid; 2754 syscallarg(gid_t) gid; 2755 } */ 2756 int error; 2757 struct nameidata nd; 2758 2759 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2760 SCARG(uap, path)); 2761 if ((error = namei(&nd)) != 0) 2762 return (error); 2763 2764 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2765 2766 vrele(nd.ni_vp); 2767 return (error); 2768 } 2769 2770 /* 2771 * Set ownership given a path name; this version follows links. 2772 * Provides POSIX semantics. 2773 */ 2774 /* ARGSUSED */ 2775 int 2776 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2777 { 2778 /* { 2779 syscallarg(const char *) path; 2780 syscallarg(uid_t) uid; 2781 syscallarg(gid_t) gid; 2782 } */ 2783 int error; 2784 struct nameidata nd; 2785 2786 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2787 SCARG(uap, path)); 2788 if ((error = namei(&nd)) != 0) 2789 return (error); 2790 2791 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2792 2793 vrele(nd.ni_vp); 2794 return (error); 2795 } 2796 2797 /* 2798 * Set ownership given a file descriptor. 2799 */ 2800 /* ARGSUSED */ 2801 int 2802 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2803 { 2804 /* { 2805 syscallarg(int) fd; 2806 syscallarg(uid_t) uid; 2807 syscallarg(gid_t) gid; 2808 } */ 2809 int error; 2810 file_t *fp; 2811 2812 /* fd_getvnode() will use the descriptor for us */ 2813 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2814 return (error); 2815 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2816 l, 0); 2817 fd_putfile(SCARG(uap, fd)); 2818 return (error); 2819 } 2820 2821 /* 2822 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2823 */ 2824 /* ARGSUSED */ 2825 int 2826 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2827 { 2828 /* { 2829 syscallarg(int) fd; 2830 syscallarg(uid_t) uid; 2831 syscallarg(gid_t) gid; 2832 } */ 2833 int error; 2834 file_t *fp; 2835 2836 /* fd_getvnode() will use the descriptor for us */ 2837 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2838 return (error); 2839 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2840 l, 1); 2841 fd_putfile(SCARG(uap, fd)); 2842 return (error); 2843 } 2844 2845 /* 2846 * Set ownership given a path name; this version does not follow links. 2847 */ 2848 /* ARGSUSED */ 2849 int 2850 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2851 { 2852 /* { 2853 syscallarg(const char *) path; 2854 syscallarg(uid_t) uid; 2855 syscallarg(gid_t) gid; 2856 } */ 2857 int error; 2858 struct nameidata nd; 2859 2860 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2861 SCARG(uap, path)); 2862 if ((error = namei(&nd)) != 0) 2863 return (error); 2864 2865 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2866 2867 vrele(nd.ni_vp); 2868 return (error); 2869 } 2870 2871 /* 2872 * Set ownership given a path name; this version does not follow links. 2873 * Provides POSIX/XPG semantics. 2874 */ 2875 /* ARGSUSED */ 2876 int 2877 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2878 { 2879 /* { 2880 syscallarg(const char *) path; 2881 syscallarg(uid_t) uid; 2882 syscallarg(gid_t) gid; 2883 } */ 2884 int error; 2885 struct nameidata nd; 2886 2887 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2888 SCARG(uap, path)); 2889 if ((error = namei(&nd)) != 0) 2890 return (error); 2891 2892 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2893 2894 vrele(nd.ni_vp); 2895 return (error); 2896 } 2897 2898 /* 2899 * Common routine to set ownership given a vnode. 2900 */ 2901 static int 2902 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2903 int posix_semantics) 2904 { 2905 struct vattr vattr; 2906 mode_t newmode; 2907 int error; 2908 2909 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2910 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2911 goto out; 2912 2913 #define CHANGED(x) ((int)(x) != -1) 2914 newmode = vattr.va_mode; 2915 if (posix_semantics) { 2916 /* 2917 * POSIX/XPG semantics: if the caller is not the super-user, 2918 * clear set-user-id and set-group-id bits. Both POSIX and 2919 * the XPG consider the behaviour for calls by the super-user 2920 * implementation-defined; we leave the set-user-id and set- 2921 * group-id settings intact in that case. 2922 */ 2923 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2924 NULL) != 0) 2925 newmode &= ~(S_ISUID | S_ISGID); 2926 } else { 2927 /* 2928 * NetBSD semantics: when changing owner and/or group, 2929 * clear the respective bit(s). 2930 */ 2931 if (CHANGED(uid)) 2932 newmode &= ~S_ISUID; 2933 if (CHANGED(gid)) 2934 newmode &= ~S_ISGID; 2935 } 2936 /* Update va_mode iff altered. */ 2937 if (vattr.va_mode == newmode) 2938 newmode = VNOVAL; 2939 2940 VATTR_NULL(&vattr); 2941 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2942 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2943 vattr.va_mode = newmode; 2944 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2945 #undef CHANGED 2946 2947 out: 2948 VOP_UNLOCK(vp, 0); 2949 return (error); 2950 } 2951 2952 /* 2953 * Set the access and modification times given a path name; this 2954 * version follows links. 2955 */ 2956 /* ARGSUSED */ 2957 int 2958 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval) 2959 { 2960 /* { 2961 syscallarg(const char *) path; 2962 syscallarg(const struct timeval *) tptr; 2963 } */ 2964 2965 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2966 SCARG(uap, tptr), UIO_USERSPACE); 2967 } 2968 2969 /* 2970 * Set the access and modification times given a file descriptor. 2971 */ 2972 /* ARGSUSED */ 2973 int 2974 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval) 2975 { 2976 /* { 2977 syscallarg(int) fd; 2978 syscallarg(const struct timeval *) tptr; 2979 } */ 2980 int error; 2981 file_t *fp; 2982 2983 /* fd_getvnode() will use the descriptor for us */ 2984 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2985 return (error); 2986 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2987 UIO_USERSPACE); 2988 fd_putfile(SCARG(uap, fd)); 2989 return (error); 2990 } 2991 2992 /* 2993 * Set the access and modification times given a path name; this 2994 * version does not follow links. 2995 */ 2996 int 2997 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval) 2998 { 2999 /* { 3000 syscallarg(const char *) path; 3001 syscallarg(const struct timeval *) tptr; 3002 } */ 3003 3004 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3005 SCARG(uap, tptr), UIO_USERSPACE); 3006 } 3007 3008 /* 3009 * Common routine to set access and modification times given a vnode. 3010 */ 3011 int 3012 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3013 const struct timeval *tptr, enum uio_seg seg) 3014 { 3015 struct vattr vattr; 3016 struct nameidata nd; 3017 int error; 3018 3019 VATTR_NULL(&vattr); 3020 if (tptr == NULL) { 3021 nanotime(&vattr.va_atime); 3022 vattr.va_mtime = vattr.va_atime; 3023 vattr.va_vaflags |= VA_UTIMES_NULL; 3024 } else { 3025 struct timeval tv[2]; 3026 3027 if (seg != UIO_SYSSPACE) { 3028 error = copyin(tptr, &tv, sizeof (tv)); 3029 if (error != 0) 3030 return error; 3031 tptr = tv; 3032 } 3033 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime); 3034 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime); 3035 } 3036 3037 if (vp == NULL) { 3038 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3039 if ((error = namei(&nd)) != 0) 3040 return (error); 3041 vp = nd.ni_vp; 3042 } else 3043 nd.ni_vp = NULL; 3044 3045 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3046 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3047 VOP_UNLOCK(vp, 0); 3048 3049 if (nd.ni_vp != NULL) 3050 vrele(nd.ni_vp); 3051 3052 return (error); 3053 } 3054 3055 /* 3056 * Truncate a file given its path name. 3057 */ 3058 /* ARGSUSED */ 3059 int 3060 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3061 { 3062 /* { 3063 syscallarg(const char *) path; 3064 syscallarg(int) pad; 3065 syscallarg(off_t) length; 3066 } */ 3067 struct vnode *vp; 3068 struct vattr vattr; 3069 int error; 3070 struct nameidata nd; 3071 3072 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3073 SCARG(uap, path)); 3074 if ((error = namei(&nd)) != 0) 3075 return (error); 3076 vp = nd.ni_vp; 3077 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3078 if (vp->v_type == VDIR) 3079 error = EISDIR; 3080 else if ((error = vn_writechk(vp)) == 0 && 3081 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3082 VATTR_NULL(&vattr); 3083 vattr.va_size = SCARG(uap, length); 3084 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3085 } 3086 vput(vp); 3087 return (error); 3088 } 3089 3090 /* 3091 * Truncate a file given a file descriptor. 3092 */ 3093 /* ARGSUSED */ 3094 int 3095 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3096 { 3097 /* { 3098 syscallarg(int) fd; 3099 syscallarg(int) pad; 3100 syscallarg(off_t) length; 3101 } */ 3102 struct vattr vattr; 3103 struct vnode *vp; 3104 file_t *fp; 3105 int error; 3106 3107 /* fd_getvnode() will use the descriptor for us */ 3108 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3109 return (error); 3110 if ((fp->f_flag & FWRITE) == 0) { 3111 error = EINVAL; 3112 goto out; 3113 } 3114 vp = fp->f_data; 3115 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3116 if (vp->v_type == VDIR) 3117 error = EISDIR; 3118 else if ((error = vn_writechk(vp)) == 0) { 3119 VATTR_NULL(&vattr); 3120 vattr.va_size = SCARG(uap, length); 3121 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3122 } 3123 VOP_UNLOCK(vp, 0); 3124 out: 3125 fd_putfile(SCARG(uap, fd)); 3126 return (error); 3127 } 3128 3129 /* 3130 * Sync an open file. 3131 */ 3132 /* ARGSUSED */ 3133 int 3134 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3135 { 3136 /* { 3137 syscallarg(int) fd; 3138 } */ 3139 struct vnode *vp; 3140 file_t *fp; 3141 int error; 3142 3143 /* fd_getvnode() will use the descriptor for us */ 3144 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3145 return (error); 3146 vp = fp->f_data; 3147 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3148 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3149 if (error == 0 && bioopsp != NULL && 3150 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3151 (*bioopsp->io_fsync)(vp, 0); 3152 VOP_UNLOCK(vp, 0); 3153 fd_putfile(SCARG(uap, fd)); 3154 return (error); 3155 } 3156 3157 /* 3158 * Sync a range of file data. API modeled after that found in AIX. 3159 * 3160 * FDATASYNC indicates that we need only save enough metadata to be able 3161 * to re-read the written data. Note we duplicate AIX's requirement that 3162 * the file be open for writing. 3163 */ 3164 /* ARGSUSED */ 3165 int 3166 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3167 { 3168 /* { 3169 syscallarg(int) fd; 3170 syscallarg(int) flags; 3171 syscallarg(off_t) start; 3172 syscallarg(off_t) length; 3173 } */ 3174 struct vnode *vp; 3175 file_t *fp; 3176 int flags, nflags; 3177 off_t s, e, len; 3178 int error; 3179 3180 /* fd_getvnode() will use the descriptor for us */ 3181 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3182 return (error); 3183 3184 if ((fp->f_flag & FWRITE) == 0) { 3185 error = EBADF; 3186 goto out; 3187 } 3188 3189 flags = SCARG(uap, flags); 3190 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3191 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3192 error = EINVAL; 3193 goto out; 3194 } 3195 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3196 if (flags & FDATASYNC) 3197 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3198 else 3199 nflags = FSYNC_WAIT; 3200 if (flags & FDISKSYNC) 3201 nflags |= FSYNC_CACHE; 3202 3203 len = SCARG(uap, length); 3204 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3205 if (len) { 3206 s = SCARG(uap, start); 3207 e = s + len; 3208 if (e < s) { 3209 error = EINVAL; 3210 goto out; 3211 } 3212 } else { 3213 e = 0; 3214 s = 0; 3215 } 3216 3217 vp = fp->f_data; 3218 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3219 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3220 3221 if (error == 0 && bioopsp != NULL && 3222 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3223 (*bioopsp->io_fsync)(vp, nflags); 3224 3225 VOP_UNLOCK(vp, 0); 3226 out: 3227 fd_putfile(SCARG(uap, fd)); 3228 return (error); 3229 } 3230 3231 /* 3232 * Sync the data of an open file. 3233 */ 3234 /* ARGSUSED */ 3235 int 3236 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3237 { 3238 /* { 3239 syscallarg(int) fd; 3240 } */ 3241 struct vnode *vp; 3242 file_t *fp; 3243 int error; 3244 3245 /* fd_getvnode() will use the descriptor for us */ 3246 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3247 return (error); 3248 if ((fp->f_flag & FWRITE) == 0) { 3249 fd_putfile(SCARG(uap, fd)); 3250 return (EBADF); 3251 } 3252 vp = fp->f_data; 3253 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3254 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3255 VOP_UNLOCK(vp, 0); 3256 fd_putfile(SCARG(uap, fd)); 3257 return (error); 3258 } 3259 3260 /* 3261 * Rename files, (standard) BSD semantics frontend. 3262 */ 3263 /* ARGSUSED */ 3264 int 3265 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3266 { 3267 /* { 3268 syscallarg(const char *) from; 3269 syscallarg(const char *) to; 3270 } */ 3271 3272 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3273 } 3274 3275 /* 3276 * Rename files, POSIX semantics frontend. 3277 */ 3278 /* ARGSUSED */ 3279 int 3280 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3281 { 3282 /* { 3283 syscallarg(const char *) from; 3284 syscallarg(const char *) to; 3285 } */ 3286 3287 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3288 } 3289 3290 /* 3291 * Rename files. Source and destination must either both be directories, 3292 * or both not be directories. If target is a directory, it must be empty. 3293 * If `from' and `to' refer to the same object, the value of the `retain' 3294 * argument is used to determine whether `from' will be 3295 * 3296 * (retain == 0) deleted unless `from' and `to' refer to the same 3297 * object in the file system's name space (BSD). 3298 * (retain == 1) always retained (POSIX). 3299 */ 3300 int 3301 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3302 { 3303 struct vnode *tvp, *fvp, *tdvp; 3304 struct nameidata fromnd, tond; 3305 struct mount *fs; 3306 struct lwp *l = curlwp; 3307 struct proc *p; 3308 uint32_t saveflag; 3309 int error; 3310 3311 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3312 seg, from); 3313 if ((error = namei(&fromnd)) != 0) 3314 return (error); 3315 if (fromnd.ni_dvp != fromnd.ni_vp) 3316 VOP_UNLOCK(fromnd.ni_dvp, 0); 3317 fvp = fromnd.ni_vp; 3318 3319 fs = fvp->v_mount; 3320 error = VFS_RENAMELOCK_ENTER(fs); 3321 if (error) { 3322 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3323 vrele(fromnd.ni_dvp); 3324 vrele(fvp); 3325 goto out1; 3326 } 3327 3328 /* 3329 * close, partially, yet another race - ideally we should only 3330 * go as far as getting fromnd.ni_dvp before getting the per-fs 3331 * lock, and then continue to get fromnd.ni_vp, but we can't do 3332 * that with namei as it stands. 3333 * 3334 * This still won't prevent rmdir from nuking fromnd.ni_vp 3335 * under us. The real fix is to get the locks in the right 3336 * order and do the lookups in the right places, but that's a 3337 * major rototill. 3338 * 3339 * Preserve the SAVESTART in cn_flags, because who knows what 3340 * might happen if we don't. 3341 * 3342 * Note: this logic (as well as this whole function) is cloned 3343 * in nfs_serv.c. Proceed accordingly. 3344 */ 3345 vrele(fvp); 3346 if ((fromnd.ni_cnd.cn_namelen == 1 && 3347 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3348 (fromnd.ni_cnd.cn_namelen == 2 && 3349 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3350 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3351 error = EINVAL; 3352 VFS_RENAMELOCK_EXIT(fs); 3353 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3354 vrele(fromnd.ni_dvp); 3355 goto out1; 3356 } 3357 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3358 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3359 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3360 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3361 fromnd.ni_cnd.cn_flags |= saveflag; 3362 if (error) { 3363 VOP_UNLOCK(fromnd.ni_dvp, 0); 3364 VFS_RENAMELOCK_EXIT(fs); 3365 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3366 vrele(fromnd.ni_dvp); 3367 goto out1; 3368 } 3369 VOP_UNLOCK(fromnd.ni_vp, 0); 3370 if (fromnd.ni_dvp != fromnd.ni_vp) 3371 VOP_UNLOCK(fromnd.ni_dvp, 0); 3372 fvp = fromnd.ni_vp; 3373 3374 NDINIT(&tond, RENAME, 3375 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3376 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3377 seg, to); 3378 if ((error = namei(&tond)) != 0) { 3379 VFS_RENAMELOCK_EXIT(fs); 3380 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3381 vrele(fromnd.ni_dvp); 3382 vrele(fvp); 3383 goto out1; 3384 } 3385 tdvp = tond.ni_dvp; 3386 tvp = tond.ni_vp; 3387 3388 if (tvp != NULL) { 3389 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3390 error = ENOTDIR; 3391 goto out; 3392 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3393 error = EISDIR; 3394 goto out; 3395 } 3396 } 3397 3398 if (fvp == tdvp) 3399 error = EINVAL; 3400 3401 /* 3402 * Source and destination refer to the same object. 3403 */ 3404 if (fvp == tvp) { 3405 if (retain) 3406 error = -1; 3407 else if (fromnd.ni_dvp == tdvp && 3408 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3409 !memcmp(fromnd.ni_cnd.cn_nameptr, 3410 tond.ni_cnd.cn_nameptr, 3411 fromnd.ni_cnd.cn_namelen)) 3412 error = -1; 3413 } 3414 3415 #if NVERIEXEC > 0 3416 if (!error) { 3417 char *f1, *f2; 3418 3419 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3420 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3421 3422 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3423 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3424 3425 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3426 3427 free(f1, M_TEMP); 3428 free(f2, M_TEMP); 3429 } 3430 #endif /* NVERIEXEC > 0 */ 3431 3432 out: 3433 p = l->l_proc; 3434 if (!error) { 3435 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3436 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3437 VFS_RENAMELOCK_EXIT(fs); 3438 } else { 3439 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3440 if (tdvp == tvp) 3441 vrele(tdvp); 3442 else 3443 vput(tdvp); 3444 if (tvp) 3445 vput(tvp); 3446 VFS_RENAMELOCK_EXIT(fs); 3447 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3448 vrele(fromnd.ni_dvp); 3449 vrele(fvp); 3450 } 3451 vrele(tond.ni_startdir); 3452 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3453 out1: 3454 if (fromnd.ni_startdir) 3455 vrele(fromnd.ni_startdir); 3456 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3457 return (error == -1 ? 0 : error); 3458 } 3459 3460 /* 3461 * Make a directory file. 3462 */ 3463 /* ARGSUSED */ 3464 int 3465 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3466 { 3467 /* { 3468 syscallarg(const char *) path; 3469 syscallarg(int) mode; 3470 } */ 3471 struct proc *p = l->l_proc; 3472 struct vnode *vp; 3473 struct vattr vattr; 3474 int error; 3475 struct nameidata nd; 3476 3477 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3478 SCARG(uap, path)); 3479 if ((error = namei(&nd)) != 0) 3480 return (error); 3481 vp = nd.ni_vp; 3482 if (vp != NULL) { 3483 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3484 if (nd.ni_dvp == vp) 3485 vrele(nd.ni_dvp); 3486 else 3487 vput(nd.ni_dvp); 3488 vrele(vp); 3489 return (EEXIST); 3490 } 3491 VATTR_NULL(&vattr); 3492 vattr.va_type = VDIR; 3493 /* We will read cwdi->cwdi_cmask unlocked. */ 3494 vattr.va_mode = 3495 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3496 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3497 if (!error) 3498 vput(nd.ni_vp); 3499 return (error); 3500 } 3501 3502 /* 3503 * Remove a directory file. 3504 */ 3505 /* ARGSUSED */ 3506 int 3507 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3508 { 3509 /* { 3510 syscallarg(const char *) path; 3511 } */ 3512 struct vnode *vp; 3513 int error; 3514 struct nameidata nd; 3515 3516 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3517 SCARG(uap, path)); 3518 if ((error = namei(&nd)) != 0) 3519 return (error); 3520 vp = nd.ni_vp; 3521 if (vp->v_type != VDIR) { 3522 error = ENOTDIR; 3523 goto out; 3524 } 3525 /* 3526 * No rmdir "." please. 3527 */ 3528 if (nd.ni_dvp == vp) { 3529 error = EINVAL; 3530 goto out; 3531 } 3532 /* 3533 * The root of a mounted filesystem cannot be deleted. 3534 */ 3535 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3536 error = EBUSY; 3537 goto out; 3538 } 3539 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3540 return (error); 3541 3542 out: 3543 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3544 if (nd.ni_dvp == vp) 3545 vrele(nd.ni_dvp); 3546 else 3547 vput(nd.ni_dvp); 3548 vput(vp); 3549 return (error); 3550 } 3551 3552 /* 3553 * Read a block of directory entries in a file system independent format. 3554 */ 3555 int 3556 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3557 { 3558 /* { 3559 syscallarg(int) fd; 3560 syscallarg(char *) buf; 3561 syscallarg(size_t) count; 3562 } */ 3563 file_t *fp; 3564 int error, done; 3565 3566 /* fd_getvnode() will use the descriptor for us */ 3567 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3568 return (error); 3569 if ((fp->f_flag & FREAD) == 0) { 3570 error = EBADF; 3571 goto out; 3572 } 3573 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3574 SCARG(uap, count), &done, l, 0, 0); 3575 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3576 *retval = done; 3577 out: 3578 fd_putfile(SCARG(uap, fd)); 3579 return (error); 3580 } 3581 3582 /* 3583 * Set the mode mask for creation of filesystem nodes. 3584 */ 3585 int 3586 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3587 { 3588 /* { 3589 syscallarg(mode_t) newmask; 3590 } */ 3591 struct proc *p = l->l_proc; 3592 struct cwdinfo *cwdi; 3593 3594 /* 3595 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3596 * important is that we serialize changes to the mask. The 3597 * rw_exit() will issue a write memory barrier on our behalf, 3598 * and force the changes out to other CPUs (as it must use an 3599 * atomic operation, draining the local CPU's store buffers). 3600 */ 3601 cwdi = p->p_cwdi; 3602 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3603 *retval = cwdi->cwdi_cmask; 3604 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3605 rw_exit(&cwdi->cwdi_lock); 3606 3607 return (0); 3608 } 3609 3610 int 3611 dorevoke(struct vnode *vp, kauth_cred_t cred) 3612 { 3613 struct vattr vattr; 3614 int error; 3615 3616 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3617 return error; 3618 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3619 (error = kauth_authorize_generic(cred, 3620 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3621 VOP_REVOKE(vp, REVOKEALL); 3622 return (error); 3623 } 3624 3625 /* 3626 * Void all references to file by ripping underlying filesystem 3627 * away from vnode. 3628 */ 3629 /* ARGSUSED */ 3630 int 3631 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3632 { 3633 /* { 3634 syscallarg(const char *) path; 3635 } */ 3636 struct vnode *vp; 3637 int error; 3638 struct nameidata nd; 3639 3640 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3641 SCARG(uap, path)); 3642 if ((error = namei(&nd)) != 0) 3643 return (error); 3644 vp = nd.ni_vp; 3645 error = dorevoke(vp, l->l_cred); 3646 vrele(vp); 3647 return (error); 3648 } 3649 3650 /* 3651 * Convert a user file descriptor to a kernel file entry. 3652 */ 3653 int 3654 getvnode(int fd, file_t **fpp) 3655 { 3656 struct vnode *vp; 3657 file_t *fp; 3658 3659 if ((fp = fd_getfile(fd)) == NULL) 3660 return (EBADF); 3661 3662 if (fp->f_type != DTYPE_VNODE) { 3663 fd_putfile(fd); 3664 return (EINVAL); 3665 } 3666 3667 vp = fp->f_data; 3668 if (vp->v_type == VBAD) { 3669 fd_putfile(fd); 3670 return (EBADF); 3671 } 3672 3673 *fpp = fp; 3674 return (0); 3675 } 3676