1 /* $NetBSD: vfs_syscalls.c,v 1.354 2008/04/30 12:49:17 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 63 */ 64 65 #include <sys/cdefs.h> 66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.354 2008/04/30 12:49:17 ad Exp $"); 67 68 #include "opt_compat_netbsd.h" 69 #include "opt_compat_43.h" 70 #include "opt_fileassoc.h" 71 #include "fss.h" 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/namei.h> 77 #include <sys/filedesc.h> 78 #include <sys/kernel.h> 79 #include <sys/file.h> 80 #include <sys/stat.h> 81 #include <sys/vnode.h> 82 #include <sys/mount.h> 83 #include <sys/proc.h> 84 #include <sys/uio.h> 85 #include <sys/malloc.h> 86 #include <sys/kmem.h> 87 #include <sys/dirent.h> 88 #include <sys/sysctl.h> 89 #include <sys/syscallargs.h> 90 #include <sys/vfs_syscalls.h> 91 #include <sys/ktrace.h> 92 #ifdef FILEASSOC 93 #include <sys/fileassoc.h> 94 #endif /* FILEASSOC */ 95 #include <sys/verified_exec.h> 96 #include <sys/kauth.h> 97 #include <sys/atomic.h> 98 99 #include <miscfs/genfs/genfs.h> 100 #include <miscfs/syncfs/syncfs.h> 101 #include <miscfs/specfs/specdev.h> 102 103 #ifdef COMPAT_30 104 #include "opt_nfsserver.h" 105 #include <nfs/rpcv2.h> 106 #endif 107 #include <nfs/nfsproto.h> 108 #ifdef COMPAT_30 109 #include <nfs/nfs.h> 110 #include <nfs/nfs_var.h> 111 #endif 112 113 #if NFSS > 0 114 #include <dev/fssvar.h> 115 #endif 116 117 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 118 119 static int change_dir(struct nameidata *, struct lwp *); 120 static int change_flags(struct vnode *, u_long, struct lwp *); 121 static int change_mode(struct vnode *, int, struct lwp *l); 122 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 123 124 void checkdirs(struct vnode *); 125 126 int dovfsusermount = 0; 127 128 /* 129 * Virtual File System System Calls 130 */ 131 132 /* 133 * Mount a file system. 134 */ 135 136 #if defined(COMPAT_09) || defined(COMPAT_43) 137 /* 138 * This table is used to maintain compatibility with 4.3BSD 139 * and NetBSD 0.9 mount syscalls. Note, the order is important! 140 * 141 * Do not modify this table. It should only contain filesystems 142 * supported by NetBSD 0.9 and 4.3BSD. 143 */ 144 const char * const mountcompatnames[] = { 145 NULL, /* 0 = MOUNT_NONE */ 146 MOUNT_FFS, /* 1 = MOUNT_UFS */ 147 MOUNT_NFS, /* 2 */ 148 MOUNT_MFS, /* 3 */ 149 MOUNT_MSDOS, /* 4 */ 150 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 151 MOUNT_FDESC, /* 6 */ 152 MOUNT_KERNFS, /* 7 */ 153 NULL, /* 8 = MOUNT_DEVFS */ 154 MOUNT_AFS, /* 9 */ 155 }; 156 const int nmountcompatnames = sizeof(mountcompatnames) / 157 sizeof(mountcompatnames[0]); 158 #endif /* COMPAT_09 || COMPAT_43 */ 159 160 static int 161 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 162 void *data, size_t *data_len) 163 { 164 struct mount *mp; 165 int error = 0, saved_flags; 166 167 mp = vp->v_mount; 168 saved_flags = mp->mnt_flag; 169 170 /* We can operate only on VV_ROOT nodes. */ 171 if ((vp->v_vflag & VV_ROOT) == 0) { 172 error = EINVAL; 173 goto out; 174 } 175 176 /* 177 * We only allow the filesystem to be reloaded if it 178 * is currently mounted read-only. 179 */ 180 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 181 error = EOPNOTSUPP; /* Needs translation */ 182 goto out; 183 } 184 185 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 186 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 187 if (error) 188 goto out; 189 190 if (vfs_busy(mp, RW_WRITER)) { 191 error = EPERM; 192 goto out; 193 } 194 195 mp->mnt_flag &= ~MNT_OP_FLAGS; 196 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 197 198 /* 199 * Set the mount level flags. 200 */ 201 if (flags & MNT_RDONLY) 202 mp->mnt_flag |= MNT_RDONLY; 203 else if (mp->mnt_flag & MNT_RDONLY) 204 mp->mnt_iflag |= IMNT_WANTRDWR; 205 mp->mnt_flag &= 206 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 209 mp->mnt_flag |= flags & 210 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 211 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 212 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 213 MNT_IGNORE); 214 215 error = VFS_MOUNT(mp, path, data, data_len); 216 217 #if defined(COMPAT_30) && defined(NFSSERVER) 218 if (error && data != NULL) { 219 int error2; 220 221 /* Update failed; let's try and see if it was an 222 * export request. */ 223 error2 = nfs_update_exports_30(mp, path, data, l); 224 225 /* Only update error code if the export request was 226 * understood but some problem occurred while 227 * processing it. */ 228 if (error2 != EJUSTRETURN) 229 error = error2; 230 } 231 #endif 232 if (mp->mnt_iflag & IMNT_WANTRDWR) 233 mp->mnt_flag &= ~MNT_RDONLY; 234 if (error) 235 mp->mnt_flag = saved_flags; 236 mp->mnt_flag &= ~MNT_OP_FLAGS; 237 mp->mnt_iflag &= ~IMNT_WANTRDWR; 238 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 239 if (mp->mnt_syncer == NULL) 240 error = vfs_allocate_syncvnode(mp); 241 } else { 242 if (mp->mnt_syncer != NULL) 243 vfs_deallocate_syncvnode(mp); 244 } 245 vfs_unbusy(mp, false, NULL); 246 247 out: 248 return (error); 249 } 250 251 static int 252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 253 { 254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 255 int error; 256 257 /* Copy file-system type from userspace. */ 258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 259 if (error) { 260 #if defined(COMPAT_09) || defined(COMPAT_43) 261 /* 262 * Historically, filesystem types were identified by numbers. 263 * If we get an integer for the filesystem type instead of a 264 * string, we check to see if it matches one of the historic 265 * filesystem types. 266 */ 267 u_long fsindex = (u_long)fstype; 268 if (fsindex >= nmountcompatnames || 269 mountcompatnames[fsindex] == NULL) 270 return ENODEV; 271 strlcpy(fstypename, mountcompatnames[fsindex], 272 sizeof(fstypename)); 273 #else 274 return error; 275 #endif 276 } 277 278 #ifdef COMPAT_10 279 /* Accept `ufs' as an alias for `ffs'. */ 280 if (strcmp(fstypename, "ufs") == 0) 281 fstypename[0] = 'f'; 282 #endif 283 284 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL) 285 return ENODEV; 286 return 0; 287 } 288 289 static int 290 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 291 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 292 { 293 struct mount *mp = NULL; 294 struct vnode *vp = *vpp; 295 struct vattr va; 296 int error; 297 298 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 299 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 300 if (error) 301 return error; 302 303 /* Can't make a non-dir a mount-point (from here anyway). */ 304 if (vp->v_type != VDIR) 305 return ENOTDIR; 306 307 /* 308 * If the user is not root, ensure that they own the directory 309 * onto which we are attempting to mount. 310 */ 311 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 312 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 313 (error = kauth_authorize_generic(l->l_cred, 314 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 315 return error; 316 } 317 318 if (flags & MNT_EXPORTED) 319 return EINVAL; 320 321 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 322 return error; 323 324 /* 325 * Check if a file-system is not already mounted on this vnode. 326 */ 327 if (vp->v_mountedhere != NULL) 328 return EBUSY; 329 330 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 331 if (mp == NULL) 332 return ENOMEM; 333 334 mp->mnt_op = vfsops; 335 mp->mnt_refcnt = 1; 336 337 TAILQ_INIT(&mp->mnt_vnodelist); 338 rw_init(&mp->mnt_lock); 339 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 340 (void)vfs_busy(mp, RW_WRITER); 341 342 mp->mnt_vnodecovered = vp; 343 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 344 mount_initspecific(mp); 345 346 /* 347 * The underlying file system may refuse the mount for 348 * various reasons. Allow the user to force it to happen. 349 * 350 * Set the mount level flags. 351 */ 352 mp->mnt_flag = flags & 353 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 354 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 355 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 356 MNT_IGNORE | MNT_RDONLY); 357 358 error = VFS_MOUNT(mp, path, data, data_len); 359 mp->mnt_flag &= ~MNT_OP_FLAGS; 360 361 /* 362 * Put the new filesystem on the mount list after root. 363 */ 364 cache_purge(vp); 365 if (error != 0) { 366 vp->v_mountedhere = NULL; 367 vfs_unbusy(mp, false, NULL); 368 vfs_destroy(mp, false); 369 return error; 370 } 371 372 mp->mnt_iflag &= ~IMNT_WANTRDWR; 373 mutex_enter(&mountlist_lock); 374 vp->v_mountedhere = mp; 375 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 376 mp->mnt_iflag |= IMNT_ONLIST; 377 mutex_exit(&mountlist_lock); 378 vn_restorerecurse(vp, recurse); 379 VOP_UNLOCK(vp, 0); 380 checkdirs(vp); 381 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 382 error = vfs_allocate_syncvnode(mp); 383 /* Hold an additional reference to the mount across VFS_START(). */ 384 vfs_unbusy(mp, true, NULL); 385 (void) VFS_STATVFS(mp, &mp->mnt_stat); 386 error = VFS_START(mp, 0); 387 if (error) { 388 vrele(vp); 389 vfs_destroy(mp, false); 390 } 391 /* Drop reference held for VFS_START(). */ 392 vfs_destroy(mp, false); 393 *vpp = NULL; 394 return error; 395 } 396 397 static int 398 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 399 void *data, size_t *data_len) 400 { 401 struct mount *mp; 402 int error; 403 404 /* If MNT_GETARGS is specified, it should be the only flag. */ 405 if (flags & ~MNT_GETARGS) 406 return EINVAL; 407 408 mp = vp->v_mount; 409 410 /* XXX: probably some notion of "can see" here if we want isolation. */ 411 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 412 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 413 if (error) 414 return error; 415 416 if ((vp->v_vflag & VV_ROOT) == 0) 417 return EINVAL; 418 419 if (vfs_busy(mp, RW_WRITER)) 420 return EPERM; 421 422 mp->mnt_flag &= ~MNT_OP_FLAGS; 423 mp->mnt_flag |= MNT_GETARGS; 424 error = VFS_MOUNT(mp, path, data, data_len); 425 mp->mnt_flag &= ~MNT_OP_FLAGS; 426 427 vfs_unbusy(mp, false, NULL); 428 return (error); 429 } 430 431 #ifdef COMPAT_40 432 /* ARGSUSED */ 433 int 434 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval) 435 { 436 /* { 437 syscallarg(const char *) type; 438 syscallarg(const char *) path; 439 syscallarg(int) flags; 440 syscallarg(void *) data; 441 } */ 442 register_t dummy; 443 444 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 445 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 446 } 447 #endif 448 449 int 450 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 451 { 452 /* { 453 syscallarg(const char *) type; 454 syscallarg(const char *) path; 455 syscallarg(int) flags; 456 syscallarg(void *) data; 457 syscallarg(size_t) data_len; 458 } */ 459 460 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 461 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 462 SCARG(uap, data_len), retval); 463 } 464 465 int 466 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 467 const char *path, int flags, void *data, enum uio_seg data_seg, 468 size_t data_len, register_t *retval) 469 { 470 struct vnode *vp; 471 struct nameidata nd; 472 void *data_buf = data; 473 u_int recurse; 474 int error; 475 476 /* 477 * Get vnode to be covered 478 */ 479 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 480 if ((error = namei(&nd)) != 0) 481 return (error); 482 vp = nd.ni_vp; 483 484 /* 485 * A lookup in VFS_MOUNT might result in an attempt to 486 * lock this vnode again, so make the lock recursive. 487 */ 488 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 489 recurse = vn_setrecurse(vp); 490 491 if (vfsops == NULL) { 492 if (flags & (MNT_GETARGS | MNT_UPDATE)) 493 vfsops = vp->v_mount->mnt_op; 494 else { 495 /* 'type' is userspace */ 496 error = mount_get_vfsops(type, &vfsops); 497 if (error != 0) 498 goto done; 499 } 500 } 501 502 if (data != NULL && data_seg == UIO_USERSPACE) { 503 if (data_len == 0) { 504 /* No length supplied, use default for filesystem */ 505 data_len = vfsops->vfs_min_mount_data; 506 if (data_len > VFS_MAX_MOUNT_DATA) { 507 /* maybe a force loaded old LKM */ 508 error = EINVAL; 509 goto done; 510 } 511 #ifdef COMPAT_30 512 /* Hopefully a longer buffer won't make copyin() fail */ 513 if (flags & MNT_UPDATE 514 && data_len < sizeof (struct mnt_export_args30)) 515 data_len = sizeof (struct mnt_export_args30); 516 #endif 517 } 518 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 519 520 /* NFS needs the buffer even for mnt_getargs .... */ 521 error = copyin(data, data_buf, data_len); 522 if (error != 0) 523 goto done; 524 } 525 526 if (flags & MNT_GETARGS) { 527 if (data_len == 0) { 528 error = EINVAL; 529 goto done; 530 } 531 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 532 if (error != 0) 533 goto done; 534 if (data_seg == UIO_USERSPACE) 535 error = copyout(data_buf, data, data_len); 536 *retval = data_len; 537 } else if (flags & MNT_UPDATE) { 538 error = mount_update(l, vp, path, flags, data_buf, &data_len); 539 } else { 540 /* Locking is handled internally in mount_domount(). */ 541 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 542 &data_len, recurse); 543 } 544 545 done: 546 if (vp != NULL) { 547 vn_restorerecurse(vp, recurse); 548 vput(vp); 549 } 550 if (data_buf != data) 551 free(data_buf, M_TEMP); 552 return (error); 553 } 554 555 /* 556 * Scan all active processes to see if any of them have a current 557 * or root directory onto which the new filesystem has just been 558 * mounted. If so, replace them with the new mount point. 559 */ 560 void 561 checkdirs(struct vnode *olddp) 562 { 563 struct cwdinfo *cwdi; 564 struct vnode *newdp; 565 struct proc *p; 566 567 if (olddp->v_usecount == 1) 568 return; 569 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 570 panic("mount: lost mount"); 571 mutex_enter(proc_lock); 572 /* XXXAD Should not be acquiring these locks with proc_lock held!! */ 573 PROCLIST_FOREACH(p, &allproc) { 574 if ((p->p_flag & PK_MARKER) != 0) 575 continue; 576 cwdi = p->p_cwdi; 577 if (!cwdi) 578 continue; 579 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 580 if (cwdi->cwdi_cdir == olddp) { 581 vrele(cwdi->cwdi_cdir); 582 VREF(newdp); 583 cwdi->cwdi_cdir = newdp; 584 } 585 if (cwdi->cwdi_rdir == olddp) { 586 vrele(cwdi->cwdi_rdir); 587 VREF(newdp); 588 cwdi->cwdi_rdir = newdp; 589 } 590 rw_exit(&cwdi->cwdi_lock); 591 } 592 mutex_exit(proc_lock); 593 if (rootvnode == olddp) { 594 vrele(rootvnode); 595 VREF(newdp); 596 rootvnode = newdp; 597 } 598 vput(newdp); 599 } 600 601 /* 602 * Unmount a file system. 603 * 604 * Note: unmount takes a path to the vnode mounted on as argument, 605 * not special file (as before). 606 */ 607 /* ARGSUSED */ 608 int 609 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 610 { 611 /* { 612 syscallarg(const char *) path; 613 syscallarg(int) flags; 614 } */ 615 struct vnode *vp; 616 struct mount *mp; 617 int error; 618 struct nameidata nd; 619 620 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 621 SCARG(uap, path)); 622 if ((error = namei(&nd)) != 0) 623 return (error); 624 vp = nd.ni_vp; 625 mp = vp->v_mount; 626 VOP_UNLOCK(vp, 0); 627 628 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 629 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 630 if (error) { 631 vrele(vp); 632 return (error); 633 } 634 635 /* 636 * Don't allow unmounting the root file system. 637 */ 638 if (mp->mnt_flag & MNT_ROOTFS) { 639 vrele(vp); 640 return (EINVAL); 641 } 642 643 /* 644 * Must be the root of the filesystem 645 */ 646 if ((vp->v_vflag & VV_ROOT) == 0) { 647 vrele(vp); 648 return (EINVAL); 649 } 650 651 /* 652 * XXX Freeze syncer. Must do this before locking the 653 * mount point. See dounmount() for details. 654 */ 655 mutex_enter(&syncer_mutex); 656 error = vfs_busy(mp, RW_WRITER); 657 vrele(vp); 658 if (error != 0) { 659 mutex_exit(&syncer_mutex); 660 return (error); 661 } 662 663 return (dounmount(mp, SCARG(uap, flags), l)); 664 } 665 666 /* 667 * Lock mount and keep additional reference across unmount. 668 */ 669 static void 670 dounmount_lock(struct mount *mp) 671 { 672 673 KASSERT(rw_write_held(&mp->mnt_lock)); 674 KASSERT(mp->mnt_unmounter == NULL); 675 676 mp->mnt_unmounter = curlwp; 677 vfs_unbusy(mp, true, NULL); 678 } 679 680 /* 681 * Unlock mount and drop additional reference. 682 */ 683 static void 684 dounmount_unlock(struct mount *mp) 685 { 686 687 KASSERT(mp->mnt_unmounter == curlwp); 688 689 mutex_enter(&mount_lock); 690 mp->mnt_unmounter = NULL; 691 cv_broadcast(&mount_cv); 692 mutex_exit(&mount_lock); 693 vfs_destroy(mp, false); 694 } 695 696 /* 697 * Do the actual file system unmount. File system is assumed to have been 698 * marked busy by the caller. 699 */ 700 int 701 dounmount(struct mount *mp, int flags, struct lwp *l) 702 { 703 struct vnode *coveredvp; 704 int error; 705 int async; 706 int used_syncer; 707 708 KASSERT(rw_write_held(&mp->mnt_lock)); 709 710 #if NVERIEXEC > 0 711 error = veriexec_unmountchk(mp); 712 if (error) 713 return (error); 714 #endif /* NVERIEXEC > 0 */ 715 716 dounmount_lock(mp); 717 used_syncer = (mp->mnt_syncer != NULL); 718 719 /* 720 * XXX Syncer must be frozen when we get here. This should really 721 * be done on a per-mountpoint basis, but especially the softdep 722 * code possibly called from the syncer doesn't exactly work on a 723 * per-mountpoint basis, so the softdep code would become a maze 724 * of vfs_busy() calls. 725 * 726 * The caller of dounmount() must acquire syncer_mutex because 727 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 728 * order, and we must preserve that order to avoid deadlock. 729 * 730 * So, if the file system did not use the syncer, now is 731 * the time to release the syncer_mutex. 732 */ 733 if (used_syncer == 0) 734 mutex_exit(&syncer_mutex); 735 736 mp->mnt_iflag |= IMNT_UNMOUNT; 737 async = mp->mnt_flag & MNT_ASYNC; 738 mp->mnt_flag &= ~MNT_ASYNC; 739 cache_purgevfs(mp); /* remove cache entries for this file sys */ 740 if (mp->mnt_syncer != NULL) 741 vfs_deallocate_syncvnode(mp); 742 error = 0; 743 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 744 #if NFSS > 0 745 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 746 #endif 747 if (error == 0) 748 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 749 } 750 vfs_scrubvnlist(mp); 751 if (error == 0 || (flags & MNT_FORCE)) 752 error = VFS_UNMOUNT(mp, flags); 753 if (error) { 754 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 755 (void) vfs_allocate_syncvnode(mp); 756 mp->mnt_iflag &= ~IMNT_UNMOUNT; 757 mp->mnt_flag |= async; 758 if (used_syncer) 759 mutex_exit(&syncer_mutex); 760 dounmount_unlock(mp); 761 return (error); 762 } 763 vfs_scrubvnlist(mp); 764 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 765 coveredvp->v_mountedhere = NULL; 766 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 767 panic("unmount: dangling vnode"); 768 mp->mnt_iflag |= IMNT_GONE; 769 if (used_syncer) 770 mutex_exit(&syncer_mutex); 771 vfs_hooks_unmount(mp); 772 dounmount_unlock(mp); 773 vfs_destroy(mp, false); 774 if (coveredvp != NULLVP) 775 vrele(coveredvp); 776 return (0); 777 } 778 779 /* 780 * Sync each mounted filesystem. 781 */ 782 #ifdef DEBUG 783 int syncprt = 0; 784 struct ctldebug debug0 = { "syncprt", &syncprt }; 785 #endif 786 787 /* ARGSUSED */ 788 int 789 sys_sync(struct lwp *l, const void *v, register_t *retval) 790 { 791 struct mount *mp, *nmp; 792 int asyncflag; 793 794 if (l == NULL) 795 l = &lwp0; 796 797 mutex_enter(&mountlist_lock); 798 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 799 mp = nmp) { 800 if (vfs_trybusy(mp, RW_READER, &nmp)) { 801 continue; 802 } 803 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 804 asyncflag = mp->mnt_flag & MNT_ASYNC; 805 mp->mnt_flag &= ~MNT_ASYNC; 806 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 807 if (asyncflag) 808 mp->mnt_flag |= MNT_ASYNC; 809 } 810 mutex_enter(&mountlist_lock); 811 vfs_unbusy(mp, false, &nmp); 812 813 } 814 mutex_exit(&mountlist_lock); 815 #ifdef DEBUG 816 if (syncprt) 817 vfs_bufstats(); 818 #endif /* DEBUG */ 819 return (0); 820 } 821 822 /* 823 * Change filesystem quotas. 824 */ 825 /* ARGSUSED */ 826 int 827 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 828 { 829 /* { 830 syscallarg(const char *) path; 831 syscallarg(int) cmd; 832 syscallarg(int) uid; 833 syscallarg(void *) arg; 834 } */ 835 struct mount *mp; 836 int error; 837 struct nameidata nd; 838 839 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 840 SCARG(uap, path)); 841 if ((error = namei(&nd)) != 0) 842 return (error); 843 mp = nd.ni_vp->v_mount; 844 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 845 SCARG(uap, arg)); 846 vrele(nd.ni_vp); 847 return (error); 848 } 849 850 int 851 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 852 int root) 853 { 854 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 855 int error = 0; 856 857 /* 858 * If MNT_NOWAIT or MNT_LAZY is specified, do not 859 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 860 * overrides MNT_NOWAIT. 861 */ 862 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 863 (flags != MNT_WAIT && flags != 0)) { 864 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 865 goto done; 866 } 867 868 /* Get the filesystem stats now */ 869 memset(sp, 0, sizeof(*sp)); 870 if ((error = VFS_STATVFS(mp, sp)) != 0) { 871 return error; 872 } 873 874 if (cwdi->cwdi_rdir == NULL) 875 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 876 done: 877 if (cwdi->cwdi_rdir != NULL) { 878 size_t len; 879 char *bp; 880 char *path = PNBUF_GET(); 881 882 bp = path + MAXPATHLEN; 883 *--bp = '\0'; 884 rw_enter(&cwdi->cwdi_lock, RW_READER); 885 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 886 MAXPATHLEN / 2, 0, l); 887 rw_exit(&cwdi->cwdi_lock); 888 if (error) { 889 PNBUF_PUT(path); 890 return error; 891 } 892 len = strlen(bp); 893 /* 894 * for mount points that are below our root, we can see 895 * them, so we fix up the pathname and return them. The 896 * rest we cannot see, so we don't allow viewing the 897 * data. 898 */ 899 if (strncmp(bp, sp->f_mntonname, len) == 0) { 900 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 901 sizeof(sp->f_mntonname)); 902 if (sp->f_mntonname[0] == '\0') 903 (void)strlcpy(sp->f_mntonname, "/", 904 sizeof(sp->f_mntonname)); 905 } else { 906 if (root) 907 (void)strlcpy(sp->f_mntonname, "/", 908 sizeof(sp->f_mntonname)); 909 else 910 error = EPERM; 911 } 912 PNBUF_PUT(path); 913 } 914 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 915 return error; 916 } 917 918 /* 919 * Get filesystem statistics by path. 920 */ 921 int 922 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 923 { 924 struct mount *mp; 925 int error; 926 struct nameidata nd; 927 928 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 929 if ((error = namei(&nd)) != 0) 930 return error; 931 mp = nd.ni_vp->v_mount; 932 error = dostatvfs(mp, sb, l, flags, 1); 933 vrele(nd.ni_vp); 934 return error; 935 } 936 937 /* ARGSUSED */ 938 int 939 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 940 { 941 /* { 942 syscallarg(const char *) path; 943 syscallarg(struct statvfs *) buf; 944 syscallarg(int) flags; 945 } */ 946 struct statvfs *sb; 947 int error; 948 949 sb = STATVFSBUF_GET(); 950 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 951 if (error == 0) 952 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 953 STATVFSBUF_PUT(sb); 954 return error; 955 } 956 957 /* 958 * Get filesystem statistics by fd. 959 */ 960 int 961 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 962 { 963 file_t *fp; 964 struct mount *mp; 965 int error; 966 967 /* fd_getvnode() will use the descriptor for us */ 968 if ((error = fd_getvnode(fd, &fp)) != 0) 969 return (error); 970 mp = ((struct vnode *)fp->f_data)->v_mount; 971 error = dostatvfs(mp, sb, curlwp, flags, 1); 972 fd_putfile(fd); 973 return error; 974 } 975 976 /* ARGSUSED */ 977 int 978 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 979 { 980 /* { 981 syscallarg(int) fd; 982 syscallarg(struct statvfs *) buf; 983 syscallarg(int) flags; 984 } */ 985 struct statvfs *sb; 986 int error; 987 988 sb = STATVFSBUF_GET(); 989 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 990 if (error == 0) 991 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 992 STATVFSBUF_PUT(sb); 993 return error; 994 } 995 996 997 /* 998 * Get statistics on all filesystems. 999 */ 1000 int 1001 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1002 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1003 register_t *retval) 1004 { 1005 int root = 0; 1006 struct proc *p = l->l_proc; 1007 struct mount *mp, *nmp; 1008 struct statvfs *sb; 1009 size_t count, maxcount; 1010 int error = 0; 1011 1012 sb = STATVFSBUF_GET(); 1013 maxcount = bufsize / entry_sz; 1014 mutex_enter(&mountlist_lock); 1015 count = 0; 1016 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1017 mp = nmp) { 1018 if (vfs_trybusy(mp, RW_READER, &nmp)) { 1019 continue; 1020 } 1021 if (sfsp && count < maxcount) { 1022 error = dostatvfs(mp, sb, l, flags, 0); 1023 if (error) { 1024 mutex_enter(&mountlist_lock); 1025 vfs_unbusy(mp, false, &nmp); 1026 continue; 1027 } 1028 error = copyfn(sb, sfsp, entry_sz); 1029 if (error) { 1030 vfs_unbusy(mp, false, NULL); 1031 goto out; 1032 } 1033 sfsp = (char *)sfsp + entry_sz; 1034 root |= strcmp(sb->f_mntonname, "/") == 0; 1035 } 1036 count++; 1037 mutex_enter(&mountlist_lock); 1038 vfs_unbusy(mp, false, &nmp); 1039 } 1040 1041 mutex_exit(&mountlist_lock); 1042 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1043 /* 1044 * fake a root entry 1045 */ 1046 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1047 sb, l, flags, 1); 1048 if (error != 0) 1049 goto out; 1050 if (sfsp) 1051 error = copyfn(sb, sfsp, entry_sz); 1052 count++; 1053 } 1054 if (sfsp && count > maxcount) 1055 *retval = maxcount; 1056 else 1057 *retval = count; 1058 out: 1059 STATVFSBUF_PUT(sb); 1060 return error; 1061 } 1062 1063 int 1064 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1065 { 1066 /* { 1067 syscallarg(struct statvfs *) buf; 1068 syscallarg(size_t) bufsize; 1069 syscallarg(int) flags; 1070 } */ 1071 1072 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1073 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1074 } 1075 1076 /* 1077 * Change current working directory to a given file descriptor. 1078 */ 1079 /* ARGSUSED */ 1080 int 1081 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1082 { 1083 /* { 1084 syscallarg(int) fd; 1085 } */ 1086 struct proc *p = l->l_proc; 1087 struct cwdinfo *cwdi; 1088 struct vnode *vp, *tdp; 1089 struct mount *mp; 1090 file_t *fp; 1091 int error, fd; 1092 1093 /* fd_getvnode() will use the descriptor for us */ 1094 fd = SCARG(uap, fd); 1095 if ((error = fd_getvnode(fd, &fp)) != 0) 1096 return (error); 1097 vp = fp->f_data; 1098 1099 VREF(vp); 1100 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1101 if (vp->v_type != VDIR) 1102 error = ENOTDIR; 1103 else 1104 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1105 if (error) { 1106 vput(vp); 1107 goto out; 1108 } 1109 while ((mp = vp->v_mountedhere) != NULL) { 1110 if (vfs_busy(mp, RW_READER)) 1111 continue; 1112 vput(vp); 1113 error = VFS_ROOT(mp, &tdp); 1114 vfs_unbusy(mp, false, NULL); 1115 if (error) 1116 goto out; 1117 vp = tdp; 1118 } 1119 VOP_UNLOCK(vp, 0); 1120 1121 /* 1122 * Disallow changing to a directory not under the process's 1123 * current root directory (if there is one). 1124 */ 1125 cwdi = p->p_cwdi; 1126 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1127 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1128 vrele(vp); 1129 error = EPERM; /* operation not permitted */ 1130 } else { 1131 vrele(cwdi->cwdi_cdir); 1132 cwdi->cwdi_cdir = vp; 1133 } 1134 rw_exit(&cwdi->cwdi_lock); 1135 1136 out: 1137 fd_putfile(fd); 1138 return (error); 1139 } 1140 1141 /* 1142 * Change this process's notion of the root directory to a given file 1143 * descriptor. 1144 */ 1145 int 1146 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1147 { 1148 struct proc *p = l->l_proc; 1149 struct cwdinfo *cwdi; 1150 struct vnode *vp; 1151 file_t *fp; 1152 int error, fd = SCARG(uap, fd); 1153 1154 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1155 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1156 return error; 1157 /* fd_getvnode() will use the descriptor for us */ 1158 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1159 return error; 1160 vp = fp->f_data; 1161 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1162 if (vp->v_type != VDIR) 1163 error = ENOTDIR; 1164 else 1165 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1166 VOP_UNLOCK(vp, 0); 1167 if (error) 1168 goto out; 1169 VREF(vp); 1170 1171 /* 1172 * Prevent escaping from chroot by putting the root under 1173 * the working directory. Silently chdir to / if we aren't 1174 * already there. 1175 */ 1176 cwdi = p->p_cwdi; 1177 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1178 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1179 /* 1180 * XXX would be more failsafe to change directory to a 1181 * deadfs node here instead 1182 */ 1183 vrele(cwdi->cwdi_cdir); 1184 VREF(vp); 1185 cwdi->cwdi_cdir = vp; 1186 } 1187 1188 if (cwdi->cwdi_rdir != NULL) 1189 vrele(cwdi->cwdi_rdir); 1190 cwdi->cwdi_rdir = vp; 1191 rw_exit(&cwdi->cwdi_lock); 1192 1193 out: 1194 fd_putfile(fd); 1195 return (error); 1196 } 1197 1198 /* 1199 * Change current working directory (``.''). 1200 */ 1201 /* ARGSUSED */ 1202 int 1203 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1204 { 1205 /* { 1206 syscallarg(const char *) path; 1207 } */ 1208 struct proc *p = l->l_proc; 1209 struct cwdinfo *cwdi; 1210 int error; 1211 struct nameidata nd; 1212 1213 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1214 SCARG(uap, path)); 1215 if ((error = change_dir(&nd, l)) != 0) 1216 return (error); 1217 cwdi = p->p_cwdi; 1218 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1219 vrele(cwdi->cwdi_cdir); 1220 cwdi->cwdi_cdir = nd.ni_vp; 1221 rw_exit(&cwdi->cwdi_lock); 1222 return (0); 1223 } 1224 1225 /* 1226 * Change notion of root (``/'') directory. 1227 */ 1228 /* ARGSUSED */ 1229 int 1230 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1231 { 1232 /* { 1233 syscallarg(const char *) path; 1234 } */ 1235 struct proc *p = l->l_proc; 1236 struct cwdinfo *cwdi; 1237 struct vnode *vp; 1238 int error; 1239 struct nameidata nd; 1240 1241 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1242 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1243 return (error); 1244 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1245 SCARG(uap, path)); 1246 if ((error = change_dir(&nd, l)) != 0) 1247 return (error); 1248 1249 cwdi = p->p_cwdi; 1250 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1251 if (cwdi->cwdi_rdir != NULL) 1252 vrele(cwdi->cwdi_rdir); 1253 vp = nd.ni_vp; 1254 cwdi->cwdi_rdir = vp; 1255 1256 /* 1257 * Prevent escaping from chroot by putting the root under 1258 * the working directory. Silently chdir to / if we aren't 1259 * already there. 1260 */ 1261 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1262 /* 1263 * XXX would be more failsafe to change directory to a 1264 * deadfs node here instead 1265 */ 1266 vrele(cwdi->cwdi_cdir); 1267 VREF(vp); 1268 cwdi->cwdi_cdir = vp; 1269 } 1270 rw_exit(&cwdi->cwdi_lock); 1271 1272 return (0); 1273 } 1274 1275 /* 1276 * Common routine for chroot and chdir. 1277 */ 1278 static int 1279 change_dir(struct nameidata *ndp, struct lwp *l) 1280 { 1281 struct vnode *vp; 1282 int error; 1283 1284 if ((error = namei(ndp)) != 0) 1285 return (error); 1286 vp = ndp->ni_vp; 1287 if (vp->v_type != VDIR) 1288 error = ENOTDIR; 1289 else 1290 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1291 1292 if (error) 1293 vput(vp); 1294 else 1295 VOP_UNLOCK(vp, 0); 1296 return (error); 1297 } 1298 1299 /* 1300 * Check permissions, allocate an open file structure, 1301 * and call the device open routine if any. 1302 */ 1303 int 1304 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1305 { 1306 /* { 1307 syscallarg(const char *) path; 1308 syscallarg(int) flags; 1309 syscallarg(int) mode; 1310 } */ 1311 struct proc *p = l->l_proc; 1312 struct cwdinfo *cwdi = p->p_cwdi; 1313 file_t *fp; 1314 struct vnode *vp; 1315 int flags, cmode; 1316 int type, indx, error; 1317 struct flock lf; 1318 struct nameidata nd; 1319 1320 flags = FFLAGS(SCARG(uap, flags)); 1321 if ((flags & (FREAD | FWRITE)) == 0) 1322 return (EINVAL); 1323 if ((error = fd_allocfile(&fp, &indx)) != 0) 1324 return (error); 1325 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1326 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1327 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1328 SCARG(uap, path)); 1329 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1330 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1331 fd_abort(p, fp, indx); 1332 if ((error == EDUPFD || error == EMOVEFD) && 1333 l->l_dupfd >= 0 && /* XXX from fdopen */ 1334 (error = 1335 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1336 *retval = indx; 1337 return (0); 1338 } 1339 if (error == ERESTART) 1340 error = EINTR; 1341 return (error); 1342 } 1343 1344 l->l_dupfd = 0; 1345 vp = nd.ni_vp; 1346 fp->f_flag = flags & FMASK; 1347 fp->f_type = DTYPE_VNODE; 1348 fp->f_ops = &vnops; 1349 fp->f_data = vp; 1350 if (flags & (O_EXLOCK | O_SHLOCK)) { 1351 lf.l_whence = SEEK_SET; 1352 lf.l_start = 0; 1353 lf.l_len = 0; 1354 if (flags & O_EXLOCK) 1355 lf.l_type = F_WRLCK; 1356 else 1357 lf.l_type = F_RDLCK; 1358 type = F_FLOCK; 1359 if ((flags & FNONBLOCK) == 0) 1360 type |= F_WAIT; 1361 VOP_UNLOCK(vp, 0); 1362 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1363 if (error) { 1364 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1365 fd_abort(p, fp, indx); 1366 return (error); 1367 } 1368 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1369 atomic_or_uint(&fp->f_flag, FHASLOCK); 1370 } 1371 VOP_UNLOCK(vp, 0); 1372 *retval = indx; 1373 fd_affix(p, fp, indx); 1374 return (0); 1375 } 1376 1377 static void 1378 vfs__fhfree(fhandle_t *fhp) 1379 { 1380 size_t fhsize; 1381 1382 if (fhp == NULL) { 1383 return; 1384 } 1385 fhsize = FHANDLE_SIZE(fhp); 1386 kmem_free(fhp, fhsize); 1387 } 1388 1389 /* 1390 * vfs_composefh: compose a filehandle. 1391 */ 1392 1393 int 1394 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1395 { 1396 struct mount *mp; 1397 struct fid *fidp; 1398 int error; 1399 size_t needfhsize; 1400 size_t fidsize; 1401 1402 mp = vp->v_mount; 1403 fidp = NULL; 1404 if (*fh_size < FHANDLE_SIZE_MIN) { 1405 fidsize = 0; 1406 } else { 1407 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1408 if (fhp != NULL) { 1409 memset(fhp, 0, *fh_size); 1410 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1411 fidp = &fhp->fh_fid; 1412 } 1413 } 1414 error = VFS_VPTOFH(vp, fidp, &fidsize); 1415 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1416 if (error == 0 && *fh_size < needfhsize) { 1417 error = E2BIG; 1418 } 1419 *fh_size = needfhsize; 1420 return error; 1421 } 1422 1423 int 1424 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1425 { 1426 struct mount *mp; 1427 fhandle_t *fhp; 1428 size_t fhsize; 1429 size_t fidsize; 1430 int error; 1431 1432 *fhpp = NULL; 1433 mp = vp->v_mount; 1434 fidsize = 0; 1435 error = VFS_VPTOFH(vp, NULL, &fidsize); 1436 KASSERT(error != 0); 1437 if (error != E2BIG) { 1438 goto out; 1439 } 1440 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1441 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1442 if (fhp == NULL) { 1443 error = ENOMEM; 1444 goto out; 1445 } 1446 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1447 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1448 if (error == 0) { 1449 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1450 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1451 *fhpp = fhp; 1452 } else { 1453 kmem_free(fhp, fhsize); 1454 } 1455 out: 1456 return error; 1457 } 1458 1459 void 1460 vfs_composefh_free(fhandle_t *fhp) 1461 { 1462 1463 vfs__fhfree(fhp); 1464 } 1465 1466 /* 1467 * vfs_fhtovp: lookup a vnode by a filehandle. 1468 */ 1469 1470 int 1471 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1472 { 1473 struct mount *mp; 1474 int error; 1475 1476 *vpp = NULL; 1477 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1478 if (mp == NULL) { 1479 error = ESTALE; 1480 goto out; 1481 } 1482 if (mp->mnt_op->vfs_fhtovp == NULL) { 1483 error = EOPNOTSUPP; 1484 goto out; 1485 } 1486 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1487 out: 1488 return error; 1489 } 1490 1491 /* 1492 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1493 * the needed size. 1494 */ 1495 1496 int 1497 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1498 { 1499 fhandle_t *fhp; 1500 int error; 1501 1502 *fhpp = NULL; 1503 if (fhsize > FHANDLE_SIZE_MAX) { 1504 return EINVAL; 1505 } 1506 if (fhsize < FHANDLE_SIZE_MIN) { 1507 return EINVAL; 1508 } 1509 again: 1510 fhp = kmem_alloc(fhsize, KM_SLEEP); 1511 if (fhp == NULL) { 1512 return ENOMEM; 1513 } 1514 error = copyin(ufhp, fhp, fhsize); 1515 if (error == 0) { 1516 /* XXX this check shouldn't be here */ 1517 if (FHANDLE_SIZE(fhp) == fhsize) { 1518 *fhpp = fhp; 1519 return 0; 1520 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1521 /* 1522 * a kludge for nfsv2 padded handles. 1523 */ 1524 size_t sz; 1525 1526 sz = FHANDLE_SIZE(fhp); 1527 kmem_free(fhp, fhsize); 1528 fhsize = sz; 1529 goto again; 1530 } else { 1531 /* 1532 * userland told us wrong size. 1533 */ 1534 error = EINVAL; 1535 } 1536 } 1537 kmem_free(fhp, fhsize); 1538 return error; 1539 } 1540 1541 void 1542 vfs_copyinfh_free(fhandle_t *fhp) 1543 { 1544 1545 vfs__fhfree(fhp); 1546 } 1547 1548 /* 1549 * Get file handle system call 1550 */ 1551 int 1552 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1553 { 1554 /* { 1555 syscallarg(char *) fname; 1556 syscallarg(fhandle_t *) fhp; 1557 syscallarg(size_t *) fh_size; 1558 } */ 1559 struct vnode *vp; 1560 fhandle_t *fh; 1561 int error; 1562 struct nameidata nd; 1563 size_t sz; 1564 size_t usz; 1565 1566 /* 1567 * Must be super user 1568 */ 1569 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1570 0, NULL, NULL, NULL); 1571 if (error) 1572 return (error); 1573 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1574 SCARG(uap, fname)); 1575 error = namei(&nd); 1576 if (error) 1577 return (error); 1578 vp = nd.ni_vp; 1579 error = vfs_composefh_alloc(vp, &fh); 1580 vput(vp); 1581 if (error != 0) { 1582 goto out; 1583 } 1584 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1585 if (error != 0) { 1586 goto out; 1587 } 1588 sz = FHANDLE_SIZE(fh); 1589 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1590 if (error != 0) { 1591 goto out; 1592 } 1593 if (usz >= sz) { 1594 error = copyout(fh, SCARG(uap, fhp), sz); 1595 } else { 1596 error = E2BIG; 1597 } 1598 out: 1599 vfs_composefh_free(fh); 1600 return (error); 1601 } 1602 1603 /* 1604 * Open a file given a file handle. 1605 * 1606 * Check permissions, allocate an open file structure, 1607 * and call the device open routine if any. 1608 */ 1609 1610 int 1611 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1612 register_t *retval) 1613 { 1614 file_t *fp; 1615 struct vnode *vp = NULL; 1616 kauth_cred_t cred = l->l_cred; 1617 file_t *nfp; 1618 int type, indx, error=0; 1619 struct flock lf; 1620 struct vattr va; 1621 fhandle_t *fh; 1622 int flags; 1623 proc_t *p; 1624 1625 p = curproc; 1626 1627 /* 1628 * Must be super user 1629 */ 1630 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1631 0, NULL, NULL, NULL))) 1632 return (error); 1633 1634 flags = FFLAGS(oflags); 1635 if ((flags & (FREAD | FWRITE)) == 0) 1636 return (EINVAL); 1637 if ((flags & O_CREAT)) 1638 return (EINVAL); 1639 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1640 return (error); 1641 fp = nfp; 1642 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1643 if (error != 0) { 1644 goto bad; 1645 } 1646 error = vfs_fhtovp(fh, &vp); 1647 if (error != 0) { 1648 goto bad; 1649 } 1650 1651 /* Now do an effective vn_open */ 1652 1653 if (vp->v_type == VSOCK) { 1654 error = EOPNOTSUPP; 1655 goto bad; 1656 } 1657 error = vn_openchk(vp, cred, flags); 1658 if (error != 0) 1659 goto bad; 1660 if (flags & O_TRUNC) { 1661 VOP_UNLOCK(vp, 0); /* XXX */ 1662 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1663 VATTR_NULL(&va); 1664 va.va_size = 0; 1665 error = VOP_SETATTR(vp, &va, cred); 1666 if (error) 1667 goto bad; 1668 } 1669 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1670 goto bad; 1671 if (flags & FWRITE) { 1672 mutex_enter(&vp->v_interlock); 1673 vp->v_writecount++; 1674 mutex_exit(&vp->v_interlock); 1675 } 1676 1677 /* done with modified vn_open, now finish what sys_open does. */ 1678 1679 fp->f_flag = flags & FMASK; 1680 fp->f_type = DTYPE_VNODE; 1681 fp->f_ops = &vnops; 1682 fp->f_data = vp; 1683 if (flags & (O_EXLOCK | O_SHLOCK)) { 1684 lf.l_whence = SEEK_SET; 1685 lf.l_start = 0; 1686 lf.l_len = 0; 1687 if (flags & O_EXLOCK) 1688 lf.l_type = F_WRLCK; 1689 else 1690 lf.l_type = F_RDLCK; 1691 type = F_FLOCK; 1692 if ((flags & FNONBLOCK) == 0) 1693 type |= F_WAIT; 1694 VOP_UNLOCK(vp, 0); 1695 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1696 if (error) { 1697 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1698 fd_abort(p, fp, indx); 1699 return (error); 1700 } 1701 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1702 atomic_or_uint(&fp->f_flag, FHASLOCK); 1703 } 1704 VOP_UNLOCK(vp, 0); 1705 *retval = indx; 1706 fd_affix(p, fp, indx); 1707 vfs_copyinfh_free(fh); 1708 return (0); 1709 1710 bad: 1711 fd_abort(p, fp, indx); 1712 if (vp != NULL) 1713 vput(vp); 1714 vfs_copyinfh_free(fh); 1715 return (error); 1716 } 1717 1718 int 1719 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1720 { 1721 /* { 1722 syscallarg(const void *) fhp; 1723 syscallarg(size_t) fh_size; 1724 syscallarg(int) flags; 1725 } */ 1726 1727 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1728 SCARG(uap, flags), retval); 1729 } 1730 1731 int 1732 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1733 { 1734 int error; 1735 fhandle_t *fh; 1736 struct vnode *vp; 1737 1738 /* 1739 * Must be super user 1740 */ 1741 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1742 0, NULL, NULL, NULL))) 1743 return (error); 1744 1745 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1746 if (error != 0) 1747 return error; 1748 1749 error = vfs_fhtovp(fh, &vp); 1750 vfs_copyinfh_free(fh); 1751 if (error != 0) 1752 return error; 1753 1754 error = vn_stat(vp, sb); 1755 vput(vp); 1756 return error; 1757 } 1758 1759 1760 /* ARGSUSED */ 1761 int 1762 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval) 1763 { 1764 /* { 1765 syscallarg(const void *) fhp; 1766 syscallarg(size_t) fh_size; 1767 syscallarg(struct stat *) sb; 1768 } */ 1769 struct stat sb; 1770 int error; 1771 1772 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1773 if (error) 1774 return error; 1775 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1776 } 1777 1778 int 1779 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1780 int flags) 1781 { 1782 fhandle_t *fh; 1783 struct mount *mp; 1784 struct vnode *vp; 1785 int error; 1786 1787 /* 1788 * Must be super user 1789 */ 1790 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1791 0, NULL, NULL, NULL))) 1792 return error; 1793 1794 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1795 if (error != 0) 1796 return error; 1797 1798 error = vfs_fhtovp(fh, &vp); 1799 vfs_copyinfh_free(fh); 1800 if (error != 0) 1801 return error; 1802 1803 mp = vp->v_mount; 1804 error = dostatvfs(mp, sb, l, flags, 1); 1805 vput(vp); 1806 return error; 1807 } 1808 1809 /* ARGSUSED */ 1810 int 1811 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1812 { 1813 /* { 1814 syscallarg(const void *) fhp; 1815 syscallarg(size_t) fh_size; 1816 syscallarg(struct statvfs *) buf; 1817 syscallarg(int) flags; 1818 } */ 1819 struct statvfs *sb = STATVFSBUF_GET(); 1820 int error; 1821 1822 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1823 SCARG(uap, flags)); 1824 if (error == 0) 1825 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1826 STATVFSBUF_PUT(sb); 1827 return error; 1828 } 1829 1830 /* 1831 * Create a special file. 1832 */ 1833 /* ARGSUSED */ 1834 int 1835 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval) 1836 { 1837 /* { 1838 syscallarg(const char *) path; 1839 syscallarg(int) mode; 1840 syscallarg(int) dev; 1841 } */ 1842 struct proc *p = l->l_proc; 1843 struct vnode *vp; 1844 struct vattr vattr; 1845 int error, optype; 1846 struct nameidata nd; 1847 char *path; 1848 const char *cpath; 1849 enum uio_seg seg = UIO_USERSPACE; 1850 1851 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1852 0, NULL, NULL, NULL)) != 0) 1853 return (error); 1854 1855 optype = VOP_MKNOD_DESCOFFSET; 1856 1857 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1858 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1859 1860 if ((error = namei(&nd)) != 0) 1861 goto out; 1862 vp = nd.ni_vp; 1863 if (vp != NULL) 1864 error = EEXIST; 1865 else { 1866 VATTR_NULL(&vattr); 1867 /* We will read cwdi->cwdi_cmask unlocked. */ 1868 vattr.va_mode = 1869 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1870 vattr.va_rdev = SCARG(uap, dev); 1871 1872 switch (SCARG(uap, mode) & S_IFMT) { 1873 case S_IFMT: /* used by badsect to flag bad sectors */ 1874 vattr.va_type = VBAD; 1875 break; 1876 case S_IFCHR: 1877 vattr.va_type = VCHR; 1878 break; 1879 case S_IFBLK: 1880 vattr.va_type = VBLK; 1881 break; 1882 case S_IFWHT: 1883 optype = VOP_WHITEOUT_DESCOFFSET; 1884 break; 1885 case S_IFREG: 1886 #if NVERIEXEC > 0 1887 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1888 O_CREAT); 1889 #endif /* NVERIEXEC > 0 */ 1890 vattr.va_type = VREG; 1891 vattr.va_rdev = VNOVAL; 1892 optype = VOP_CREATE_DESCOFFSET; 1893 break; 1894 default: 1895 error = EINVAL; 1896 break; 1897 } 1898 } 1899 if (!error) { 1900 switch (optype) { 1901 case VOP_WHITEOUT_DESCOFFSET: 1902 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1903 if (error) 1904 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1905 vput(nd.ni_dvp); 1906 break; 1907 1908 case VOP_MKNOD_DESCOFFSET: 1909 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1910 &nd.ni_cnd, &vattr); 1911 if (error == 0) 1912 vput(nd.ni_vp); 1913 break; 1914 1915 case VOP_CREATE_DESCOFFSET: 1916 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1917 &nd.ni_cnd, &vattr); 1918 if (error == 0) 1919 vput(nd.ni_vp); 1920 break; 1921 } 1922 } else { 1923 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1924 if (nd.ni_dvp == vp) 1925 vrele(nd.ni_dvp); 1926 else 1927 vput(nd.ni_dvp); 1928 if (vp) 1929 vrele(vp); 1930 } 1931 out: 1932 VERIEXEC_PATH_PUT(path); 1933 return (error); 1934 } 1935 1936 /* 1937 * Create a named pipe. 1938 */ 1939 /* ARGSUSED */ 1940 int 1941 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1942 { 1943 /* { 1944 syscallarg(const char *) path; 1945 syscallarg(int) mode; 1946 } */ 1947 struct proc *p = l->l_proc; 1948 struct vattr vattr; 1949 int error; 1950 struct nameidata nd; 1951 1952 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1953 SCARG(uap, path)); 1954 if ((error = namei(&nd)) != 0) 1955 return (error); 1956 if (nd.ni_vp != NULL) { 1957 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1958 if (nd.ni_dvp == nd.ni_vp) 1959 vrele(nd.ni_dvp); 1960 else 1961 vput(nd.ni_dvp); 1962 vrele(nd.ni_vp); 1963 return (EEXIST); 1964 } 1965 VATTR_NULL(&vattr); 1966 vattr.va_type = VFIFO; 1967 /* We will read cwdi->cwdi_cmask unlocked. */ 1968 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1969 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1970 if (error == 0) 1971 vput(nd.ni_vp); 1972 return (error); 1973 } 1974 1975 /* 1976 * Make a hard file link. 1977 */ 1978 /* ARGSUSED */ 1979 int 1980 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1981 { 1982 /* { 1983 syscallarg(const char *) path; 1984 syscallarg(const char *) link; 1985 } */ 1986 struct vnode *vp; 1987 struct nameidata nd; 1988 int error; 1989 1990 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1991 SCARG(uap, path)); 1992 if ((error = namei(&nd)) != 0) 1993 return (error); 1994 vp = nd.ni_vp; 1995 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1996 SCARG(uap, link)); 1997 if ((error = namei(&nd)) != 0) 1998 goto out; 1999 if (nd.ni_vp) { 2000 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2001 if (nd.ni_dvp == nd.ni_vp) 2002 vrele(nd.ni_dvp); 2003 else 2004 vput(nd.ni_dvp); 2005 vrele(nd.ni_vp); 2006 error = EEXIST; 2007 goto out; 2008 } 2009 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2010 out: 2011 vrele(vp); 2012 return (error); 2013 } 2014 2015 /* 2016 * Make a symbolic link. 2017 */ 2018 /* ARGSUSED */ 2019 int 2020 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2021 { 2022 /* { 2023 syscallarg(const char *) path; 2024 syscallarg(const char *) link; 2025 } */ 2026 struct proc *p = l->l_proc; 2027 struct vattr vattr; 2028 char *path; 2029 int error; 2030 struct nameidata nd; 2031 2032 path = PNBUF_GET(); 2033 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2034 if (error) 2035 goto out; 2036 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2037 SCARG(uap, link)); 2038 if ((error = namei(&nd)) != 0) 2039 goto out; 2040 if (nd.ni_vp) { 2041 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2042 if (nd.ni_dvp == nd.ni_vp) 2043 vrele(nd.ni_dvp); 2044 else 2045 vput(nd.ni_dvp); 2046 vrele(nd.ni_vp); 2047 error = EEXIST; 2048 goto out; 2049 } 2050 VATTR_NULL(&vattr); 2051 vattr.va_type = VLNK; 2052 /* We will read cwdi->cwdi_cmask unlocked. */ 2053 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2054 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2055 if (error == 0) 2056 vput(nd.ni_vp); 2057 out: 2058 PNBUF_PUT(path); 2059 return (error); 2060 } 2061 2062 /* 2063 * Delete a whiteout from the filesystem. 2064 */ 2065 /* ARGSUSED */ 2066 int 2067 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2068 { 2069 /* { 2070 syscallarg(const char *) path; 2071 } */ 2072 int error; 2073 struct nameidata nd; 2074 2075 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2076 UIO_USERSPACE, SCARG(uap, path)); 2077 error = namei(&nd); 2078 if (error) 2079 return (error); 2080 2081 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2082 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2083 if (nd.ni_dvp == nd.ni_vp) 2084 vrele(nd.ni_dvp); 2085 else 2086 vput(nd.ni_dvp); 2087 if (nd.ni_vp) 2088 vrele(nd.ni_vp); 2089 return (EEXIST); 2090 } 2091 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2092 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2093 vput(nd.ni_dvp); 2094 return (error); 2095 } 2096 2097 /* 2098 * Delete a name from the filesystem. 2099 */ 2100 /* ARGSUSED */ 2101 int 2102 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2103 { 2104 /* { 2105 syscallarg(const char *) path; 2106 } */ 2107 2108 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2109 } 2110 2111 int 2112 do_sys_unlink(const char *arg, enum uio_seg seg) 2113 { 2114 struct vnode *vp; 2115 int error; 2116 struct nameidata nd; 2117 kauth_cred_t cred; 2118 char *path; 2119 const char *cpath; 2120 2121 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2122 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2123 2124 if ((error = namei(&nd)) != 0) 2125 goto out; 2126 vp = nd.ni_vp; 2127 2128 /* 2129 * The root of a mounted filesystem cannot be deleted. 2130 */ 2131 if (vp->v_vflag & VV_ROOT) { 2132 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2133 if (nd.ni_dvp == vp) 2134 vrele(nd.ni_dvp); 2135 else 2136 vput(nd.ni_dvp); 2137 vput(vp); 2138 error = EBUSY; 2139 goto out; 2140 } 2141 2142 #if NVERIEXEC > 0 2143 /* Handle remove requests for veriexec entries. */ 2144 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2145 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2146 if (nd.ni_dvp == vp) 2147 vrele(nd.ni_dvp); 2148 else 2149 vput(nd.ni_dvp); 2150 vput(vp); 2151 goto out; 2152 } 2153 #endif /* NVERIEXEC > 0 */ 2154 2155 cred = kauth_cred_get(); 2156 #ifdef FILEASSOC 2157 (void)fileassoc_file_delete(vp); 2158 #endif /* FILEASSOC */ 2159 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2160 out: 2161 VERIEXEC_PATH_PUT(path); 2162 return (error); 2163 } 2164 2165 /* 2166 * Reposition read/write file offset. 2167 */ 2168 int 2169 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2170 { 2171 /* { 2172 syscallarg(int) fd; 2173 syscallarg(int) pad; 2174 syscallarg(off_t) offset; 2175 syscallarg(int) whence; 2176 } */ 2177 kauth_cred_t cred = l->l_cred; 2178 file_t *fp; 2179 struct vnode *vp; 2180 struct vattr vattr; 2181 off_t newoff; 2182 int error, fd; 2183 2184 fd = SCARG(uap, fd); 2185 2186 if ((fp = fd_getfile(fd)) == NULL) 2187 return (EBADF); 2188 2189 vp = fp->f_data; 2190 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2191 error = ESPIPE; 2192 goto out; 2193 } 2194 2195 switch (SCARG(uap, whence)) { 2196 case SEEK_CUR: 2197 newoff = fp->f_offset + SCARG(uap, offset); 2198 break; 2199 case SEEK_END: 2200 error = VOP_GETATTR(vp, &vattr, cred); 2201 if (error) { 2202 goto out; 2203 } 2204 newoff = SCARG(uap, offset) + vattr.va_size; 2205 break; 2206 case SEEK_SET: 2207 newoff = SCARG(uap, offset); 2208 break; 2209 default: 2210 error = EINVAL; 2211 goto out; 2212 } 2213 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2214 *(off_t *)retval = fp->f_offset = newoff; 2215 } 2216 out: 2217 fd_putfile(fd); 2218 return (error); 2219 } 2220 2221 /* 2222 * Positional read system call. 2223 */ 2224 int 2225 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2226 { 2227 /* { 2228 syscallarg(int) fd; 2229 syscallarg(void *) buf; 2230 syscallarg(size_t) nbyte; 2231 syscallarg(off_t) offset; 2232 } */ 2233 file_t *fp; 2234 struct vnode *vp; 2235 off_t offset; 2236 int error, fd = SCARG(uap, fd); 2237 2238 if ((fp = fd_getfile(fd)) == NULL) 2239 return (EBADF); 2240 2241 if ((fp->f_flag & FREAD) == 0) { 2242 fd_putfile(fd); 2243 return (EBADF); 2244 } 2245 2246 vp = fp->f_data; 2247 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2248 error = ESPIPE; 2249 goto out; 2250 } 2251 2252 offset = SCARG(uap, offset); 2253 2254 /* 2255 * XXX This works because no file systems actually 2256 * XXX take any action on the seek operation. 2257 */ 2258 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2259 goto out; 2260 2261 /* dofileread() will unuse the descriptor for us */ 2262 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2263 &offset, 0, retval)); 2264 2265 out: 2266 fd_putfile(fd); 2267 return (error); 2268 } 2269 2270 /* 2271 * Positional scatter read system call. 2272 */ 2273 int 2274 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2275 { 2276 /* { 2277 syscallarg(int) fd; 2278 syscallarg(const struct iovec *) iovp; 2279 syscallarg(int) iovcnt; 2280 syscallarg(off_t) offset; 2281 } */ 2282 off_t offset = SCARG(uap, offset); 2283 2284 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2285 SCARG(uap, iovcnt), &offset, 0, retval); 2286 } 2287 2288 /* 2289 * Positional write system call. 2290 */ 2291 int 2292 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2293 { 2294 /* { 2295 syscallarg(int) fd; 2296 syscallarg(const void *) buf; 2297 syscallarg(size_t) nbyte; 2298 syscallarg(off_t) offset; 2299 } */ 2300 file_t *fp; 2301 struct vnode *vp; 2302 off_t offset; 2303 int error, fd = SCARG(uap, fd); 2304 2305 if ((fp = fd_getfile(fd)) == NULL) 2306 return (EBADF); 2307 2308 if ((fp->f_flag & FWRITE) == 0) { 2309 fd_putfile(fd); 2310 return (EBADF); 2311 } 2312 2313 vp = fp->f_data; 2314 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2315 error = ESPIPE; 2316 goto out; 2317 } 2318 2319 offset = SCARG(uap, offset); 2320 2321 /* 2322 * XXX This works because no file systems actually 2323 * XXX take any action on the seek operation. 2324 */ 2325 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2326 goto out; 2327 2328 /* dofilewrite() will unuse the descriptor for us */ 2329 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2330 &offset, 0, retval)); 2331 2332 out: 2333 fd_putfile(fd); 2334 return (error); 2335 } 2336 2337 /* 2338 * Positional gather write system call. 2339 */ 2340 int 2341 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2342 { 2343 /* { 2344 syscallarg(int) fd; 2345 syscallarg(const struct iovec *) iovp; 2346 syscallarg(int) iovcnt; 2347 syscallarg(off_t) offset; 2348 } */ 2349 off_t offset = SCARG(uap, offset); 2350 2351 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2352 SCARG(uap, iovcnt), &offset, 0, retval); 2353 } 2354 2355 /* 2356 * Check access permissions. 2357 */ 2358 int 2359 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2360 { 2361 /* { 2362 syscallarg(const char *) path; 2363 syscallarg(int) flags; 2364 } */ 2365 kauth_cred_t cred; 2366 struct vnode *vp; 2367 int error, flags; 2368 struct nameidata nd; 2369 2370 cred = kauth_cred_dup(l->l_cred); 2371 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2372 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2373 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2374 SCARG(uap, path)); 2375 /* Override default credentials */ 2376 nd.ni_cnd.cn_cred = cred; 2377 if ((error = namei(&nd)) != 0) 2378 goto out; 2379 vp = nd.ni_vp; 2380 2381 /* Flags == 0 means only check for existence. */ 2382 if (SCARG(uap, flags)) { 2383 flags = 0; 2384 if (SCARG(uap, flags) & R_OK) 2385 flags |= VREAD; 2386 if (SCARG(uap, flags) & W_OK) 2387 flags |= VWRITE; 2388 if (SCARG(uap, flags) & X_OK) 2389 flags |= VEXEC; 2390 2391 error = VOP_ACCESS(vp, flags, cred); 2392 if (!error && (flags & VWRITE)) 2393 error = vn_writechk(vp); 2394 } 2395 vput(vp); 2396 out: 2397 kauth_cred_free(cred); 2398 return (error); 2399 } 2400 2401 /* 2402 * Common code for all sys_stat functions, including compat versions. 2403 */ 2404 int 2405 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2406 { 2407 int error; 2408 struct nameidata nd; 2409 2410 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2411 UIO_USERSPACE, path); 2412 error = namei(&nd); 2413 if (error != 0) 2414 return error; 2415 error = vn_stat(nd.ni_vp, sb); 2416 vput(nd.ni_vp); 2417 return error; 2418 } 2419 2420 /* 2421 * Get file status; this version follows links. 2422 */ 2423 /* ARGSUSED */ 2424 int 2425 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval) 2426 { 2427 /* { 2428 syscallarg(const char *) path; 2429 syscallarg(struct stat *) ub; 2430 } */ 2431 struct stat sb; 2432 int error; 2433 2434 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2435 if (error) 2436 return error; 2437 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2438 } 2439 2440 /* 2441 * Get file status; this version does not follow links. 2442 */ 2443 /* ARGSUSED */ 2444 int 2445 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval) 2446 { 2447 /* { 2448 syscallarg(const char *) path; 2449 syscallarg(struct stat *) ub; 2450 } */ 2451 struct stat sb; 2452 int error; 2453 2454 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2455 if (error) 2456 return error; 2457 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2458 } 2459 2460 /* 2461 * Get configurable pathname variables. 2462 */ 2463 /* ARGSUSED */ 2464 int 2465 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2466 { 2467 /* { 2468 syscallarg(const char *) path; 2469 syscallarg(int) name; 2470 } */ 2471 int error; 2472 struct nameidata nd; 2473 2474 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2475 SCARG(uap, path)); 2476 if ((error = namei(&nd)) != 0) 2477 return (error); 2478 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2479 vput(nd.ni_vp); 2480 return (error); 2481 } 2482 2483 /* 2484 * Return target name of a symbolic link. 2485 */ 2486 /* ARGSUSED */ 2487 int 2488 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2489 { 2490 /* { 2491 syscallarg(const char *) path; 2492 syscallarg(char *) buf; 2493 syscallarg(size_t) count; 2494 } */ 2495 struct vnode *vp; 2496 struct iovec aiov; 2497 struct uio auio; 2498 int error; 2499 struct nameidata nd; 2500 2501 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2502 SCARG(uap, path)); 2503 if ((error = namei(&nd)) != 0) 2504 return (error); 2505 vp = nd.ni_vp; 2506 if (vp->v_type != VLNK) 2507 error = EINVAL; 2508 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2509 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2510 aiov.iov_base = SCARG(uap, buf); 2511 aiov.iov_len = SCARG(uap, count); 2512 auio.uio_iov = &aiov; 2513 auio.uio_iovcnt = 1; 2514 auio.uio_offset = 0; 2515 auio.uio_rw = UIO_READ; 2516 KASSERT(l == curlwp); 2517 auio.uio_vmspace = l->l_proc->p_vmspace; 2518 auio.uio_resid = SCARG(uap, count); 2519 error = VOP_READLINK(vp, &auio, l->l_cred); 2520 } 2521 vput(vp); 2522 *retval = SCARG(uap, count) - auio.uio_resid; 2523 return (error); 2524 } 2525 2526 /* 2527 * Change flags of a file given a path name. 2528 */ 2529 /* ARGSUSED */ 2530 int 2531 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2532 { 2533 /* { 2534 syscallarg(const char *) path; 2535 syscallarg(u_long) flags; 2536 } */ 2537 struct vnode *vp; 2538 int error; 2539 struct nameidata nd; 2540 2541 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2542 SCARG(uap, path)); 2543 if ((error = namei(&nd)) != 0) 2544 return (error); 2545 vp = nd.ni_vp; 2546 error = change_flags(vp, SCARG(uap, flags), l); 2547 vput(vp); 2548 return (error); 2549 } 2550 2551 /* 2552 * Change flags of a file given a file descriptor. 2553 */ 2554 /* ARGSUSED */ 2555 int 2556 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2557 { 2558 /* { 2559 syscallarg(int) fd; 2560 syscallarg(u_long) flags; 2561 } */ 2562 struct vnode *vp; 2563 file_t *fp; 2564 int error; 2565 2566 /* fd_getvnode() will use the descriptor for us */ 2567 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2568 return (error); 2569 vp = fp->f_data; 2570 error = change_flags(vp, SCARG(uap, flags), l); 2571 VOP_UNLOCK(vp, 0); 2572 fd_putfile(SCARG(uap, fd)); 2573 return (error); 2574 } 2575 2576 /* 2577 * Change flags of a file given a path name; this version does 2578 * not follow links. 2579 */ 2580 int 2581 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2582 { 2583 /* { 2584 syscallarg(const char *) path; 2585 syscallarg(u_long) flags; 2586 } */ 2587 struct vnode *vp; 2588 int error; 2589 struct nameidata nd; 2590 2591 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2592 SCARG(uap, path)); 2593 if ((error = namei(&nd)) != 0) 2594 return (error); 2595 vp = nd.ni_vp; 2596 error = change_flags(vp, SCARG(uap, flags), l); 2597 vput(vp); 2598 return (error); 2599 } 2600 2601 /* 2602 * Common routine to change flags of a file. 2603 */ 2604 int 2605 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2606 { 2607 struct vattr vattr; 2608 int error; 2609 2610 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2611 /* 2612 * Non-superusers cannot change the flags on devices, even if they 2613 * own them. 2614 */ 2615 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2616 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2617 goto out; 2618 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2619 error = EINVAL; 2620 goto out; 2621 } 2622 } 2623 VATTR_NULL(&vattr); 2624 vattr.va_flags = flags; 2625 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2626 out: 2627 return (error); 2628 } 2629 2630 /* 2631 * Change mode of a file given path name; this version follows links. 2632 */ 2633 /* ARGSUSED */ 2634 int 2635 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2636 { 2637 /* { 2638 syscallarg(const char *) path; 2639 syscallarg(int) mode; 2640 } */ 2641 int error; 2642 struct nameidata nd; 2643 2644 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2645 SCARG(uap, path)); 2646 if ((error = namei(&nd)) != 0) 2647 return (error); 2648 2649 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2650 2651 vrele(nd.ni_vp); 2652 return (error); 2653 } 2654 2655 /* 2656 * Change mode of a file given a file descriptor. 2657 */ 2658 /* ARGSUSED */ 2659 int 2660 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2661 { 2662 /* { 2663 syscallarg(int) fd; 2664 syscallarg(int) mode; 2665 } */ 2666 file_t *fp; 2667 int error; 2668 2669 /* fd_getvnode() will use the descriptor for us */ 2670 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2671 return (error); 2672 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2673 fd_putfile(SCARG(uap, fd)); 2674 return (error); 2675 } 2676 2677 /* 2678 * Change mode of a file given path name; this version does not follow links. 2679 */ 2680 /* ARGSUSED */ 2681 int 2682 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2683 { 2684 /* { 2685 syscallarg(const char *) path; 2686 syscallarg(int) mode; 2687 } */ 2688 int error; 2689 struct nameidata nd; 2690 2691 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2692 SCARG(uap, path)); 2693 if ((error = namei(&nd)) != 0) 2694 return (error); 2695 2696 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2697 2698 vrele(nd.ni_vp); 2699 return (error); 2700 } 2701 2702 /* 2703 * Common routine to set mode given a vnode. 2704 */ 2705 static int 2706 change_mode(struct vnode *vp, int mode, struct lwp *l) 2707 { 2708 struct vattr vattr; 2709 int error; 2710 2711 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2712 VATTR_NULL(&vattr); 2713 vattr.va_mode = mode & ALLPERMS; 2714 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2715 VOP_UNLOCK(vp, 0); 2716 return (error); 2717 } 2718 2719 /* 2720 * Set ownership given a path name; this version follows links. 2721 */ 2722 /* ARGSUSED */ 2723 int 2724 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2725 { 2726 /* { 2727 syscallarg(const char *) path; 2728 syscallarg(uid_t) uid; 2729 syscallarg(gid_t) gid; 2730 } */ 2731 int error; 2732 struct nameidata nd; 2733 2734 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2735 SCARG(uap, path)); 2736 if ((error = namei(&nd)) != 0) 2737 return (error); 2738 2739 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2740 2741 vrele(nd.ni_vp); 2742 return (error); 2743 } 2744 2745 /* 2746 * Set ownership given a path name; this version follows links. 2747 * Provides POSIX semantics. 2748 */ 2749 /* ARGSUSED */ 2750 int 2751 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2752 { 2753 /* { 2754 syscallarg(const char *) path; 2755 syscallarg(uid_t) uid; 2756 syscallarg(gid_t) gid; 2757 } */ 2758 int error; 2759 struct nameidata nd; 2760 2761 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2762 SCARG(uap, path)); 2763 if ((error = namei(&nd)) != 0) 2764 return (error); 2765 2766 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2767 2768 vrele(nd.ni_vp); 2769 return (error); 2770 } 2771 2772 /* 2773 * Set ownership given a file descriptor. 2774 */ 2775 /* ARGSUSED */ 2776 int 2777 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2778 { 2779 /* { 2780 syscallarg(int) fd; 2781 syscallarg(uid_t) uid; 2782 syscallarg(gid_t) gid; 2783 } */ 2784 int error; 2785 file_t *fp; 2786 2787 /* fd_getvnode() will use the descriptor for us */ 2788 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2789 return (error); 2790 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2791 l, 0); 2792 fd_putfile(SCARG(uap, fd)); 2793 return (error); 2794 } 2795 2796 /* 2797 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2798 */ 2799 /* ARGSUSED */ 2800 int 2801 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2802 { 2803 /* { 2804 syscallarg(int) fd; 2805 syscallarg(uid_t) uid; 2806 syscallarg(gid_t) gid; 2807 } */ 2808 int error; 2809 file_t *fp; 2810 2811 /* fd_getvnode() will use the descriptor for us */ 2812 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2813 return (error); 2814 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2815 l, 1); 2816 fd_putfile(SCARG(uap, fd)); 2817 return (error); 2818 } 2819 2820 /* 2821 * Set ownership given a path name; this version does not follow links. 2822 */ 2823 /* ARGSUSED */ 2824 int 2825 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2826 { 2827 /* { 2828 syscallarg(const char *) path; 2829 syscallarg(uid_t) uid; 2830 syscallarg(gid_t) gid; 2831 } */ 2832 int error; 2833 struct nameidata nd; 2834 2835 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2836 SCARG(uap, path)); 2837 if ((error = namei(&nd)) != 0) 2838 return (error); 2839 2840 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2841 2842 vrele(nd.ni_vp); 2843 return (error); 2844 } 2845 2846 /* 2847 * Set ownership given a path name; this version does not follow links. 2848 * Provides POSIX/XPG semantics. 2849 */ 2850 /* ARGSUSED */ 2851 int 2852 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2853 { 2854 /* { 2855 syscallarg(const char *) path; 2856 syscallarg(uid_t) uid; 2857 syscallarg(gid_t) gid; 2858 } */ 2859 int error; 2860 struct nameidata nd; 2861 2862 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2863 SCARG(uap, path)); 2864 if ((error = namei(&nd)) != 0) 2865 return (error); 2866 2867 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2868 2869 vrele(nd.ni_vp); 2870 return (error); 2871 } 2872 2873 /* 2874 * Common routine to set ownership given a vnode. 2875 */ 2876 static int 2877 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2878 int posix_semantics) 2879 { 2880 struct vattr vattr; 2881 mode_t newmode; 2882 int error; 2883 2884 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2885 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2886 goto out; 2887 2888 #define CHANGED(x) ((int)(x) != -1) 2889 newmode = vattr.va_mode; 2890 if (posix_semantics) { 2891 /* 2892 * POSIX/XPG semantics: if the caller is not the super-user, 2893 * clear set-user-id and set-group-id bits. Both POSIX and 2894 * the XPG consider the behaviour for calls by the super-user 2895 * implementation-defined; we leave the set-user-id and set- 2896 * group-id settings intact in that case. 2897 */ 2898 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2899 NULL) != 0) 2900 newmode &= ~(S_ISUID | S_ISGID); 2901 } else { 2902 /* 2903 * NetBSD semantics: when changing owner and/or group, 2904 * clear the respective bit(s). 2905 */ 2906 if (CHANGED(uid)) 2907 newmode &= ~S_ISUID; 2908 if (CHANGED(gid)) 2909 newmode &= ~S_ISGID; 2910 } 2911 /* Update va_mode iff altered. */ 2912 if (vattr.va_mode == newmode) 2913 newmode = VNOVAL; 2914 2915 VATTR_NULL(&vattr); 2916 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2917 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2918 vattr.va_mode = newmode; 2919 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2920 #undef CHANGED 2921 2922 out: 2923 VOP_UNLOCK(vp, 0); 2924 return (error); 2925 } 2926 2927 /* 2928 * Set the access and modification times given a path name; this 2929 * version follows links. 2930 */ 2931 /* ARGSUSED */ 2932 int 2933 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval) 2934 { 2935 /* { 2936 syscallarg(const char *) path; 2937 syscallarg(const struct timeval *) tptr; 2938 } */ 2939 2940 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2941 SCARG(uap, tptr), UIO_USERSPACE); 2942 } 2943 2944 /* 2945 * Set the access and modification times given a file descriptor. 2946 */ 2947 /* ARGSUSED */ 2948 int 2949 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval) 2950 { 2951 /* { 2952 syscallarg(int) fd; 2953 syscallarg(const struct timeval *) tptr; 2954 } */ 2955 int error; 2956 file_t *fp; 2957 2958 /* fd_getvnode() will use the descriptor for us */ 2959 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2960 return (error); 2961 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2962 UIO_USERSPACE); 2963 fd_putfile(SCARG(uap, fd)); 2964 return (error); 2965 } 2966 2967 /* 2968 * Set the access and modification times given a path name; this 2969 * version does not follow links. 2970 */ 2971 int 2972 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval) 2973 { 2974 /* { 2975 syscallarg(const char *) path; 2976 syscallarg(const struct timeval *) tptr; 2977 } */ 2978 2979 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2980 SCARG(uap, tptr), UIO_USERSPACE); 2981 } 2982 2983 /* 2984 * Common routine to set access and modification times given a vnode. 2985 */ 2986 int 2987 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2988 const struct timeval *tptr, enum uio_seg seg) 2989 { 2990 struct vattr vattr; 2991 struct nameidata nd; 2992 int error; 2993 2994 VATTR_NULL(&vattr); 2995 if (tptr == NULL) { 2996 nanotime(&vattr.va_atime); 2997 vattr.va_mtime = vattr.va_atime; 2998 vattr.va_vaflags |= VA_UTIMES_NULL; 2999 } else { 3000 struct timeval tv[2]; 3001 3002 if (seg != UIO_SYSSPACE) { 3003 error = copyin(tptr, &tv, sizeof (tv)); 3004 if (error != 0) 3005 return error; 3006 tptr = tv; 3007 } 3008 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime); 3009 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime); 3010 } 3011 3012 if (vp == NULL) { 3013 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3014 if ((error = namei(&nd)) != 0) 3015 return (error); 3016 vp = nd.ni_vp; 3017 } else 3018 nd.ni_vp = NULL; 3019 3020 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3021 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3022 VOP_UNLOCK(vp, 0); 3023 3024 if (nd.ni_vp != NULL) 3025 vrele(nd.ni_vp); 3026 3027 return (error); 3028 } 3029 3030 /* 3031 * Truncate a file given its path name. 3032 */ 3033 /* ARGSUSED */ 3034 int 3035 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3036 { 3037 /* { 3038 syscallarg(const char *) path; 3039 syscallarg(int) pad; 3040 syscallarg(off_t) length; 3041 } */ 3042 struct vnode *vp; 3043 struct vattr vattr; 3044 int error; 3045 struct nameidata nd; 3046 3047 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3048 SCARG(uap, path)); 3049 if ((error = namei(&nd)) != 0) 3050 return (error); 3051 vp = nd.ni_vp; 3052 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3053 if (vp->v_type == VDIR) 3054 error = EISDIR; 3055 else if ((error = vn_writechk(vp)) == 0 && 3056 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3057 VATTR_NULL(&vattr); 3058 vattr.va_size = SCARG(uap, length); 3059 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3060 } 3061 vput(vp); 3062 return (error); 3063 } 3064 3065 /* 3066 * Truncate a file given a file descriptor. 3067 */ 3068 /* ARGSUSED */ 3069 int 3070 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3071 { 3072 /* { 3073 syscallarg(int) fd; 3074 syscallarg(int) pad; 3075 syscallarg(off_t) length; 3076 } */ 3077 struct vattr vattr; 3078 struct vnode *vp; 3079 file_t *fp; 3080 int error; 3081 3082 /* fd_getvnode() will use the descriptor for us */ 3083 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3084 return (error); 3085 if ((fp->f_flag & FWRITE) == 0) { 3086 error = EINVAL; 3087 goto out; 3088 } 3089 vp = fp->f_data; 3090 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3091 if (vp->v_type == VDIR) 3092 error = EISDIR; 3093 else if ((error = vn_writechk(vp)) == 0) { 3094 VATTR_NULL(&vattr); 3095 vattr.va_size = SCARG(uap, length); 3096 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3097 } 3098 VOP_UNLOCK(vp, 0); 3099 out: 3100 fd_putfile(SCARG(uap, fd)); 3101 return (error); 3102 } 3103 3104 /* 3105 * Sync an open file. 3106 */ 3107 /* ARGSUSED */ 3108 int 3109 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3110 { 3111 /* { 3112 syscallarg(int) fd; 3113 } */ 3114 struct vnode *vp; 3115 file_t *fp; 3116 int error; 3117 3118 /* fd_getvnode() will use the descriptor for us */ 3119 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3120 return (error); 3121 vp = fp->f_data; 3122 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3123 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3124 if (error == 0 && bioopsp != NULL && 3125 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3126 (*bioopsp->io_fsync)(vp, 0); 3127 VOP_UNLOCK(vp, 0); 3128 fd_putfile(SCARG(uap, fd)); 3129 return (error); 3130 } 3131 3132 /* 3133 * Sync a range of file data. API modeled after that found in AIX. 3134 * 3135 * FDATASYNC indicates that we need only save enough metadata to be able 3136 * to re-read the written data. Note we duplicate AIX's requirement that 3137 * the file be open for writing. 3138 */ 3139 /* ARGSUSED */ 3140 int 3141 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3142 { 3143 /* { 3144 syscallarg(int) fd; 3145 syscallarg(int) flags; 3146 syscallarg(off_t) start; 3147 syscallarg(off_t) length; 3148 } */ 3149 struct vnode *vp; 3150 file_t *fp; 3151 int flags, nflags; 3152 off_t s, e, len; 3153 int error; 3154 3155 /* fd_getvnode() will use the descriptor for us */ 3156 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3157 return (error); 3158 3159 if ((fp->f_flag & FWRITE) == 0) { 3160 error = EBADF; 3161 goto out; 3162 } 3163 3164 flags = SCARG(uap, flags); 3165 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3166 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3167 error = EINVAL; 3168 goto out; 3169 } 3170 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3171 if (flags & FDATASYNC) 3172 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3173 else 3174 nflags = FSYNC_WAIT; 3175 if (flags & FDISKSYNC) 3176 nflags |= FSYNC_CACHE; 3177 3178 len = SCARG(uap, length); 3179 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3180 if (len) { 3181 s = SCARG(uap, start); 3182 e = s + len; 3183 if (e < s) { 3184 error = EINVAL; 3185 goto out; 3186 } 3187 } else { 3188 e = 0; 3189 s = 0; 3190 } 3191 3192 vp = fp->f_data; 3193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3194 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3195 3196 if (error == 0 && bioopsp != NULL && 3197 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3198 (*bioopsp->io_fsync)(vp, nflags); 3199 3200 VOP_UNLOCK(vp, 0); 3201 out: 3202 fd_putfile(SCARG(uap, fd)); 3203 return (error); 3204 } 3205 3206 /* 3207 * Sync the data of an open file. 3208 */ 3209 /* ARGSUSED */ 3210 int 3211 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3212 { 3213 /* { 3214 syscallarg(int) fd; 3215 } */ 3216 struct vnode *vp; 3217 file_t *fp; 3218 int error; 3219 3220 /* fd_getvnode() will use the descriptor for us */ 3221 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3222 return (error); 3223 if ((fp->f_flag & FWRITE) == 0) { 3224 fd_putfile(SCARG(uap, fd)); 3225 return (EBADF); 3226 } 3227 vp = fp->f_data; 3228 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3229 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3230 VOP_UNLOCK(vp, 0); 3231 fd_putfile(SCARG(uap, fd)); 3232 return (error); 3233 } 3234 3235 /* 3236 * Rename files, (standard) BSD semantics frontend. 3237 */ 3238 /* ARGSUSED */ 3239 int 3240 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3241 { 3242 /* { 3243 syscallarg(const char *) from; 3244 syscallarg(const char *) to; 3245 } */ 3246 3247 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3248 } 3249 3250 /* 3251 * Rename files, POSIX semantics frontend. 3252 */ 3253 /* ARGSUSED */ 3254 int 3255 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3256 { 3257 /* { 3258 syscallarg(const char *) from; 3259 syscallarg(const char *) to; 3260 } */ 3261 3262 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3263 } 3264 3265 /* 3266 * Rename files. Source and destination must either both be directories, 3267 * or both not be directories. If target is a directory, it must be empty. 3268 * If `from' and `to' refer to the same object, the value of the `retain' 3269 * argument is used to determine whether `from' will be 3270 * 3271 * (retain == 0) deleted unless `from' and `to' refer to the same 3272 * object in the file system's name space (BSD). 3273 * (retain == 1) always retained (POSIX). 3274 */ 3275 int 3276 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3277 { 3278 struct vnode *tvp, *fvp, *tdvp; 3279 struct nameidata fromnd, tond; 3280 struct mount *fs; 3281 struct lwp *l = curlwp; 3282 struct proc *p; 3283 uint32_t saveflag; 3284 int error; 3285 3286 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3287 seg, from); 3288 if ((error = namei(&fromnd)) != 0) 3289 return (error); 3290 if (fromnd.ni_dvp != fromnd.ni_vp) 3291 VOP_UNLOCK(fromnd.ni_dvp, 0); 3292 fvp = fromnd.ni_vp; 3293 3294 fs = fvp->v_mount; 3295 error = VFS_RENAMELOCK_ENTER(fs); 3296 if (error) { 3297 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3298 vrele(fromnd.ni_dvp); 3299 vrele(fvp); 3300 goto out1; 3301 } 3302 3303 /* 3304 * close, partially, yet another race - ideally we should only 3305 * go as far as getting fromnd.ni_dvp before getting the per-fs 3306 * lock, and then continue to get fromnd.ni_vp, but we can't do 3307 * that with namei as it stands. 3308 * 3309 * This still won't prevent rmdir from nuking fromnd.ni_vp 3310 * under us. The real fix is to get the locks in the right 3311 * order and do the lookups in the right places, but that's a 3312 * major rototill. 3313 * 3314 * Preserve the SAVESTART in cn_flags, because who knows what 3315 * might happen if we don't. 3316 * 3317 * Note: this logic (as well as this whole function) is cloned 3318 * in nfs_serv.c. Proceed accordingly. 3319 */ 3320 vrele(fvp); 3321 if ((fromnd.ni_cnd.cn_namelen == 1 && 3322 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3323 (fromnd.ni_cnd.cn_namelen == 2 && 3324 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3325 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3326 error = EINVAL; 3327 VFS_RENAMELOCK_EXIT(fs); 3328 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3329 vrele(fromnd.ni_dvp); 3330 goto out1; 3331 } 3332 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3333 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3334 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3335 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3336 fromnd.ni_cnd.cn_flags |= saveflag; 3337 if (error) { 3338 VOP_UNLOCK(fromnd.ni_dvp, 0); 3339 VFS_RENAMELOCK_EXIT(fs); 3340 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3341 vrele(fromnd.ni_dvp); 3342 goto out1; 3343 } 3344 VOP_UNLOCK(fromnd.ni_vp, 0); 3345 if (fromnd.ni_dvp != fromnd.ni_vp) 3346 VOP_UNLOCK(fromnd.ni_dvp, 0); 3347 fvp = fromnd.ni_vp; 3348 3349 NDINIT(&tond, RENAME, 3350 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3351 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3352 seg, to); 3353 if ((error = namei(&tond)) != 0) { 3354 VFS_RENAMELOCK_EXIT(fs); 3355 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3356 vrele(fromnd.ni_dvp); 3357 vrele(fvp); 3358 goto out1; 3359 } 3360 tdvp = tond.ni_dvp; 3361 tvp = tond.ni_vp; 3362 3363 if (tvp != NULL) { 3364 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3365 error = ENOTDIR; 3366 goto out; 3367 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3368 error = EISDIR; 3369 goto out; 3370 } 3371 } 3372 3373 if (fvp == tdvp) 3374 error = EINVAL; 3375 3376 /* 3377 * Source and destination refer to the same object. 3378 */ 3379 if (fvp == tvp) { 3380 if (retain) 3381 error = -1; 3382 else if (fromnd.ni_dvp == tdvp && 3383 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3384 !memcmp(fromnd.ni_cnd.cn_nameptr, 3385 tond.ni_cnd.cn_nameptr, 3386 fromnd.ni_cnd.cn_namelen)) 3387 error = -1; 3388 } 3389 3390 #if NVERIEXEC > 0 3391 if (!error) { 3392 char *f1, *f2; 3393 3394 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3395 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3396 3397 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3398 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3399 3400 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3401 3402 free(f1, M_TEMP); 3403 free(f2, M_TEMP); 3404 } 3405 #endif /* NVERIEXEC > 0 */ 3406 3407 out: 3408 p = l->l_proc; 3409 if (!error) { 3410 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3411 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3412 VFS_RENAMELOCK_EXIT(fs); 3413 } else { 3414 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3415 if (tdvp == tvp) 3416 vrele(tdvp); 3417 else 3418 vput(tdvp); 3419 if (tvp) 3420 vput(tvp); 3421 VFS_RENAMELOCK_EXIT(fs); 3422 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3423 vrele(fromnd.ni_dvp); 3424 vrele(fvp); 3425 } 3426 vrele(tond.ni_startdir); 3427 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3428 out1: 3429 if (fromnd.ni_startdir) 3430 vrele(fromnd.ni_startdir); 3431 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3432 return (error == -1 ? 0 : error); 3433 } 3434 3435 /* 3436 * Make a directory file. 3437 */ 3438 /* ARGSUSED */ 3439 int 3440 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3441 { 3442 /* { 3443 syscallarg(const char *) path; 3444 syscallarg(int) mode; 3445 } */ 3446 struct proc *p = l->l_proc; 3447 struct vnode *vp; 3448 struct vattr vattr; 3449 int error; 3450 struct nameidata nd; 3451 3452 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3453 SCARG(uap, path)); 3454 if ((error = namei(&nd)) != 0) 3455 return (error); 3456 vp = nd.ni_vp; 3457 if (vp != NULL) { 3458 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3459 if (nd.ni_dvp == vp) 3460 vrele(nd.ni_dvp); 3461 else 3462 vput(nd.ni_dvp); 3463 vrele(vp); 3464 return (EEXIST); 3465 } 3466 VATTR_NULL(&vattr); 3467 vattr.va_type = VDIR; 3468 /* We will read cwdi->cwdi_cmask unlocked. */ 3469 vattr.va_mode = 3470 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3471 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3472 if (!error) 3473 vput(nd.ni_vp); 3474 return (error); 3475 } 3476 3477 /* 3478 * Remove a directory file. 3479 */ 3480 /* ARGSUSED */ 3481 int 3482 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3483 { 3484 /* { 3485 syscallarg(const char *) path; 3486 } */ 3487 struct vnode *vp; 3488 int error; 3489 struct nameidata nd; 3490 3491 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3492 SCARG(uap, path)); 3493 if ((error = namei(&nd)) != 0) 3494 return (error); 3495 vp = nd.ni_vp; 3496 if (vp->v_type != VDIR) { 3497 error = ENOTDIR; 3498 goto out; 3499 } 3500 /* 3501 * No rmdir "." please. 3502 */ 3503 if (nd.ni_dvp == vp) { 3504 error = EINVAL; 3505 goto out; 3506 } 3507 /* 3508 * The root of a mounted filesystem cannot be deleted. 3509 */ 3510 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3511 error = EBUSY; 3512 goto out; 3513 } 3514 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3515 return (error); 3516 3517 out: 3518 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3519 if (nd.ni_dvp == vp) 3520 vrele(nd.ni_dvp); 3521 else 3522 vput(nd.ni_dvp); 3523 vput(vp); 3524 return (error); 3525 } 3526 3527 /* 3528 * Read a block of directory entries in a file system independent format. 3529 */ 3530 int 3531 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3532 { 3533 /* { 3534 syscallarg(int) fd; 3535 syscallarg(char *) buf; 3536 syscallarg(size_t) count; 3537 } */ 3538 file_t *fp; 3539 int error, done; 3540 3541 /* fd_getvnode() will use the descriptor for us */ 3542 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3543 return (error); 3544 if ((fp->f_flag & FREAD) == 0) { 3545 error = EBADF; 3546 goto out; 3547 } 3548 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3549 SCARG(uap, count), &done, l, 0, 0); 3550 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3551 *retval = done; 3552 out: 3553 fd_putfile(SCARG(uap, fd)); 3554 return (error); 3555 } 3556 3557 /* 3558 * Set the mode mask for creation of filesystem nodes. 3559 */ 3560 int 3561 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3562 { 3563 /* { 3564 syscallarg(mode_t) newmask; 3565 } */ 3566 struct proc *p = l->l_proc; 3567 struct cwdinfo *cwdi; 3568 3569 /* 3570 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3571 * important is that we serialize changes to the mask. The 3572 * rw_exit() will issue a write memory barrier on our behalf, 3573 * and force the changes out to other CPUs (as it must use an 3574 * atomic operation, draining the local CPU's store buffers). 3575 */ 3576 cwdi = p->p_cwdi; 3577 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3578 *retval = cwdi->cwdi_cmask; 3579 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3580 rw_exit(&cwdi->cwdi_lock); 3581 3582 return (0); 3583 } 3584 3585 int 3586 dorevoke(struct vnode *vp, kauth_cred_t cred) 3587 { 3588 struct vattr vattr; 3589 int error; 3590 3591 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3592 return error; 3593 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3594 (error = kauth_authorize_generic(cred, 3595 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3596 VOP_REVOKE(vp, REVOKEALL); 3597 return (error); 3598 } 3599 3600 /* 3601 * Void all references to file by ripping underlying filesystem 3602 * away from vnode. 3603 */ 3604 /* ARGSUSED */ 3605 int 3606 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3607 { 3608 /* { 3609 syscallarg(const char *) path; 3610 } */ 3611 struct vnode *vp; 3612 int error; 3613 struct nameidata nd; 3614 3615 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3616 SCARG(uap, path)); 3617 if ((error = namei(&nd)) != 0) 3618 return (error); 3619 vp = nd.ni_vp; 3620 error = dorevoke(vp, l->l_cred); 3621 vrele(vp); 3622 return (error); 3623 } 3624 3625 /* 3626 * Convert a user file descriptor to a kernel file entry. 3627 */ 3628 int 3629 getvnode(int fd, file_t **fpp) 3630 { 3631 struct vnode *vp; 3632 file_t *fp; 3633 3634 if ((fp = fd_getfile(fd)) == NULL) 3635 return (EBADF); 3636 3637 if (fp->f_type != DTYPE_VNODE) { 3638 fd_putfile(fd); 3639 return (EINVAL); 3640 } 3641 3642 vp = fp->f_data; 3643 if (vp->v_type == VBAD) { 3644 fd_putfile(fd); 3645 return (EBADF); 3646 } 3647 3648 *fpp = fp; 3649 return (0); 3650 } 3651