1 /* $NetBSD: vfs_syscalls.c,v 1.350 2008/04/25 13:40:55 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the NetBSD 18 * Foundation, Inc. and its contributors. 19 * 4. Neither the name of The NetBSD Foundation nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1989, 1993 38 * The Regents of the University of California. All rights reserved. 39 * (c) UNIX System Laboratories, Inc. 40 * All or some portions of this file are derived from material licensed 41 * to the University of California by American Telephone and Telegraph 42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 43 * the permission of UNIX System Laboratories, Inc. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 * 69 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.350 2008/04/25 13:40:55 joerg Exp $"); 74 75 #include "opt_compat_netbsd.h" 76 #include "opt_compat_43.h" 77 #include "opt_fileassoc.h" 78 #include "fss.h" 79 #include "veriexec.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/namei.h> 84 #include <sys/filedesc.h> 85 #include <sys/kernel.h> 86 #include <sys/file.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/malloc.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/ktrace.h> 99 #ifdef FILEASSOC 100 #include <sys/fileassoc.h> 101 #endif /* FILEASSOC */ 102 #include <sys/verified_exec.h> 103 #include <sys/kauth.h> 104 #include <sys/atomic.h> 105 106 #include <miscfs/genfs/genfs.h> 107 #include <miscfs/syncfs/syncfs.h> 108 #include <miscfs/specfs/specdev.h> 109 110 #ifdef COMPAT_30 111 #include "opt_nfsserver.h" 112 #include <nfs/rpcv2.h> 113 #endif 114 #include <nfs/nfsproto.h> 115 #ifdef COMPAT_30 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 #endif 119 120 #if NFSS > 0 121 #include <dev/fssvar.h> 122 #endif 123 124 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 125 126 static int change_dir(struct nameidata *, struct lwp *); 127 static int change_flags(struct vnode *, u_long, struct lwp *); 128 static int change_mode(struct vnode *, int, struct lwp *l); 129 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 130 131 void checkdirs(struct vnode *); 132 133 int dovfsusermount = 0; 134 135 /* 136 * Virtual File System System Calls 137 */ 138 139 /* 140 * Mount a file system. 141 */ 142 143 #if defined(COMPAT_09) || defined(COMPAT_43) 144 /* 145 * This table is used to maintain compatibility with 4.3BSD 146 * and NetBSD 0.9 mount syscalls. Note, the order is important! 147 * 148 * Do not modify this table. It should only contain filesystems 149 * supported by NetBSD 0.9 and 4.3BSD. 150 */ 151 const char * const mountcompatnames[] = { 152 NULL, /* 0 = MOUNT_NONE */ 153 MOUNT_FFS, /* 1 = MOUNT_UFS */ 154 MOUNT_NFS, /* 2 */ 155 MOUNT_MFS, /* 3 */ 156 MOUNT_MSDOS, /* 4 */ 157 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 158 MOUNT_FDESC, /* 6 */ 159 MOUNT_KERNFS, /* 7 */ 160 NULL, /* 8 = MOUNT_DEVFS */ 161 MOUNT_AFS, /* 9 */ 162 }; 163 const int nmountcompatnames = sizeof(mountcompatnames) / 164 sizeof(mountcompatnames[0]); 165 #endif /* COMPAT_09 || COMPAT_43 */ 166 167 static int 168 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 169 void *data, size_t *data_len) 170 { 171 struct mount *mp; 172 int error = 0, saved_flags; 173 174 mp = vp->v_mount; 175 saved_flags = mp->mnt_flag; 176 177 /* We can operate only on VV_ROOT nodes. */ 178 if ((vp->v_vflag & VV_ROOT) == 0) { 179 error = EINVAL; 180 goto out; 181 } 182 183 /* 184 * We only allow the filesystem to be reloaded if it 185 * is currently mounted read-only. 186 */ 187 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) { 188 error = EOPNOTSUPP; /* Needs translation */ 189 goto out; 190 } 191 192 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 193 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 194 if (error) 195 goto out; 196 197 if (vfs_trybusy(mp, RW_WRITER, 0)) { 198 error = EPERM; 199 goto out; 200 } 201 202 mp->mnt_flag &= ~MNT_OP_FLAGS; 203 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 204 205 /* 206 * Set the mount level flags. 207 */ 208 if (flags & MNT_RDONLY) 209 mp->mnt_flag |= MNT_RDONLY; 210 else if (mp->mnt_flag & MNT_RDONLY) 211 mp->mnt_iflag |= IMNT_WANTRDWR; 212 mp->mnt_flag &= 213 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 214 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 215 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 216 mp->mnt_flag |= flags & 217 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 218 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 219 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 220 MNT_IGNORE); 221 222 error = VFS_MOUNT(mp, path, data, data_len); 223 224 #if defined(COMPAT_30) && defined(NFSSERVER) 225 if (error && data != NULL) { 226 int error2; 227 228 /* Update failed; let's try and see if it was an 229 * export request. */ 230 error2 = nfs_update_exports_30(mp, path, data, l); 231 232 /* Only update error code if the export request was 233 * understood but some problem occurred while 234 * processing it. */ 235 if (error2 != EJUSTRETURN) 236 error = error2; 237 } 238 #endif 239 if (mp->mnt_iflag & IMNT_WANTRDWR) 240 mp->mnt_flag &= ~MNT_RDONLY; 241 if (error) 242 mp->mnt_flag = saved_flags; 243 mp->mnt_flag &= ~MNT_OP_FLAGS; 244 mp->mnt_iflag &= ~IMNT_WANTRDWR; 245 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 246 if (mp->mnt_syncer == NULL) 247 error = vfs_allocate_syncvnode(mp); 248 } else { 249 if (mp->mnt_syncer != NULL) 250 vfs_deallocate_syncvnode(mp); 251 } 252 vfs_unbusy(mp, false); 253 254 out: 255 return (error); 256 } 257 258 static int 259 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 260 { 261 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 262 int error; 263 264 /* Copy file-system type from userspace. */ 265 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 266 if (error) { 267 #if defined(COMPAT_09) || defined(COMPAT_43) 268 /* 269 * Historically, filesystem types were identified by numbers. 270 * If we get an integer for the filesystem type instead of a 271 * string, we check to see if it matches one of the historic 272 * filesystem types. 273 */ 274 u_long fsindex = (u_long)fstype; 275 if (fsindex >= nmountcompatnames || 276 mountcompatnames[fsindex] == NULL) 277 return ENODEV; 278 strlcpy(fstypename, mountcompatnames[fsindex], 279 sizeof(fstypename)); 280 #else 281 return error; 282 #endif 283 } 284 285 #ifdef COMPAT_10 286 /* Accept `ufs' as an alias for `ffs'. */ 287 if (strcmp(fstypename, "ufs") == 0) 288 fstypename[0] = 'f'; 289 #endif 290 291 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL) 292 return ENODEV; 293 return 0; 294 } 295 296 static int 297 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 298 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 299 { 300 struct mount *mp = NULL; 301 struct vnode *vp = *vpp; 302 struct vattr va; 303 int error; 304 305 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 306 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 307 if (error) 308 return error; 309 310 /* Can't make a non-dir a mount-point (from here anyway). */ 311 if (vp->v_type != VDIR) 312 return ENOTDIR; 313 314 /* 315 * If the user is not root, ensure that they own the directory 316 * onto which we are attempting to mount. 317 */ 318 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 319 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 320 (error = kauth_authorize_generic(l->l_cred, 321 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 322 return error; 323 } 324 325 if (flags & MNT_EXPORTED) 326 return EINVAL; 327 328 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 329 return error; 330 331 /* 332 * Check if a file-system is not already mounted on this vnode. 333 */ 334 if (vp->v_mountedhere != NULL) 335 return EBUSY; 336 337 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 338 if (mp == NULL) 339 return ENOMEM; 340 341 mp->mnt_op = vfsops; 342 mp->mnt_refcnt = 1; 343 344 TAILQ_INIT(&mp->mnt_vnodelist); 345 rw_init(&mp->mnt_lock); 346 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 347 (void)vfs_busy(mp, RW_WRITER, 0); 348 349 mp->mnt_vnodecovered = vp; 350 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 351 mount_initspecific(mp); 352 353 /* 354 * The underlying file system may refuse the mount for 355 * various reasons. Allow the user to force it to happen. 356 * 357 * Set the mount level flags. 358 */ 359 mp->mnt_flag = flags & 360 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 362 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 363 MNT_IGNORE | MNT_RDONLY); 364 365 error = VFS_MOUNT(mp, path, data, data_len); 366 mp->mnt_flag &= ~MNT_OP_FLAGS; 367 368 /* 369 * Put the new filesystem on the mount list after root. 370 */ 371 cache_purge(vp); 372 if (error != 0) { 373 vp->v_mountedhere = NULL; 374 mp->mnt_op->vfs_refcount--; 375 vfs_unbusy(mp, false); 376 vfs_destroy(mp); 377 return error; 378 } 379 380 mp->mnt_iflag &= ~IMNT_WANTRDWR; 381 mutex_enter(&mountlist_lock); 382 vp->v_mountedhere = mp; 383 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 384 mutex_exit(&mountlist_lock); 385 vn_restorerecurse(vp, recurse); 386 VOP_UNLOCK(vp, 0); 387 checkdirs(vp); 388 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 389 error = vfs_allocate_syncvnode(mp); 390 /* Hold an additional reference to the mount across VFS_START(). */ 391 vfs_unbusy(mp, true); 392 (void) VFS_STATVFS(mp, &mp->mnt_stat); 393 error = VFS_START(mp, 0); 394 if (error) { 395 vrele(vp); 396 vfs_destroy(mp); 397 } 398 /* Drop reference held for VFS_START(). */ 399 vfs_destroy(mp); 400 *vpp = NULL; 401 return error; 402 } 403 404 static int 405 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 406 void *data, size_t *data_len) 407 { 408 struct mount *mp; 409 int error; 410 411 /* If MNT_GETARGS is specified, it should be the only flag. */ 412 if (flags & ~MNT_GETARGS) 413 return EINVAL; 414 415 mp = vp->v_mount; 416 417 /* XXX: probably some notion of "can see" here if we want isolation. */ 418 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 419 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 420 if (error) 421 return error; 422 423 if ((vp->v_vflag & VV_ROOT) == 0) 424 return EINVAL; 425 426 if (vfs_trybusy(mp, RW_WRITER, NULL)) 427 return EPERM; 428 429 mp->mnt_flag &= ~MNT_OP_FLAGS; 430 mp->mnt_flag |= MNT_GETARGS; 431 error = VFS_MOUNT(mp, path, data, data_len); 432 mp->mnt_flag &= ~MNT_OP_FLAGS; 433 434 vfs_unbusy(mp, false); 435 return (error); 436 } 437 438 #ifdef COMPAT_40 439 /* ARGSUSED */ 440 int 441 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval) 442 { 443 /* { 444 syscallarg(const char *) type; 445 syscallarg(const char *) path; 446 syscallarg(int) flags; 447 syscallarg(void *) data; 448 } */ 449 register_t dummy; 450 451 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 452 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 453 } 454 #endif 455 456 int 457 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 458 { 459 /* { 460 syscallarg(const char *) type; 461 syscallarg(const char *) path; 462 syscallarg(int) flags; 463 syscallarg(void *) data; 464 syscallarg(size_t) data_len; 465 } */ 466 467 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 468 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 469 SCARG(uap, data_len), retval); 470 } 471 472 int 473 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 474 const char *path, int flags, void *data, enum uio_seg data_seg, 475 size_t data_len, register_t *retval) 476 { 477 struct vnode *vp; 478 struct nameidata nd; 479 void *data_buf = data; 480 u_int recurse; 481 int error; 482 483 /* 484 * Get vnode to be covered 485 */ 486 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 487 if ((error = namei(&nd)) != 0) 488 return (error); 489 vp = nd.ni_vp; 490 491 /* 492 * A lookup in VFS_MOUNT might result in an attempt to 493 * lock this vnode again, so make the lock recursive. 494 */ 495 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 496 recurse = vn_setrecurse(vp); 497 498 if (vfsops == NULL) { 499 if (flags & (MNT_GETARGS | MNT_UPDATE)) 500 vfsops = vp->v_mount->mnt_op; 501 else { 502 /* 'type' is userspace */ 503 error = mount_get_vfsops(type, &vfsops); 504 if (error != 0) 505 goto done; 506 } 507 } 508 509 if (data != NULL && data_seg == UIO_USERSPACE) { 510 if (data_len == 0) { 511 /* No length supplied, use default for filesystem */ 512 data_len = vfsops->vfs_min_mount_data; 513 if (data_len > VFS_MAX_MOUNT_DATA) { 514 /* maybe a force loaded old LKM */ 515 error = EINVAL; 516 goto done; 517 } 518 #ifdef COMPAT_30 519 /* Hopefully a longer buffer won't make copyin() fail */ 520 if (flags & MNT_UPDATE 521 && data_len < sizeof (struct mnt_export_args30)) 522 data_len = sizeof (struct mnt_export_args30); 523 #endif 524 } 525 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 526 527 /* NFS needs the buffer even for mnt_getargs .... */ 528 error = copyin(data, data_buf, data_len); 529 if (error != 0) 530 goto done; 531 } 532 533 if (flags & MNT_GETARGS) { 534 if (data_len == 0) { 535 error = EINVAL; 536 goto done; 537 } 538 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 539 if (error != 0) 540 goto done; 541 if (data_seg == UIO_USERSPACE) 542 error = copyout(data_buf, data, data_len); 543 *retval = data_len; 544 } else if (flags & MNT_UPDATE) { 545 error = mount_update(l, vp, path, flags, data_buf, &data_len); 546 } else { 547 /* Locking is handled internally in mount_domount(). */ 548 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 549 &data_len, recurse); 550 } 551 552 done: 553 if (vp != NULL) { 554 vn_restorerecurse(vp, recurse); 555 vput(vp); 556 } 557 if (data_buf != data) 558 free(data_buf, M_TEMP); 559 return (error); 560 } 561 562 /* 563 * Scan all active processes to see if any of them have a current 564 * or root directory onto which the new filesystem has just been 565 * mounted. If so, replace them with the new mount point. 566 */ 567 void 568 checkdirs(struct vnode *olddp) 569 { 570 struct cwdinfo *cwdi; 571 struct vnode *newdp; 572 struct proc *p; 573 574 if (olddp->v_usecount == 1) 575 return; 576 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 577 panic("mount: lost mount"); 578 mutex_enter(proc_lock); 579 /* XXXAD Should not be acquiring these locks with proc_lock held!! */ 580 PROCLIST_FOREACH(p, &allproc) { 581 cwdi = p->p_cwdi; 582 if (!cwdi) 583 continue; 584 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 585 if (cwdi->cwdi_cdir == olddp) { 586 vrele(cwdi->cwdi_cdir); 587 VREF(newdp); 588 cwdi->cwdi_cdir = newdp; 589 } 590 if (cwdi->cwdi_rdir == olddp) { 591 vrele(cwdi->cwdi_rdir); 592 VREF(newdp); 593 cwdi->cwdi_rdir = newdp; 594 } 595 rw_exit(&cwdi->cwdi_lock); 596 } 597 mutex_exit(proc_lock); 598 if (rootvnode == olddp) { 599 vrele(rootvnode); 600 VREF(newdp); 601 rootvnode = newdp; 602 } 603 vput(newdp); 604 } 605 606 /* 607 * Unmount a file system. 608 * 609 * Note: unmount takes a path to the vnode mounted on as argument, 610 * not special file (as before). 611 */ 612 /* ARGSUSED */ 613 int 614 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 615 { 616 /* { 617 syscallarg(const char *) path; 618 syscallarg(int) flags; 619 } */ 620 struct vnode *vp; 621 struct mount *mp; 622 int error; 623 struct nameidata nd; 624 625 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 626 SCARG(uap, path)); 627 if ((error = namei(&nd)) != 0) 628 return (error); 629 vp = nd.ni_vp; 630 mp = vp->v_mount; 631 VOP_UNLOCK(vp, 0); 632 633 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 634 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 635 if (error) { 636 vrele(vp); 637 return (error); 638 } 639 640 /* 641 * Don't allow unmounting the root file system. 642 */ 643 if (mp->mnt_flag & MNT_ROOTFS) { 644 vrele(vp); 645 return (EINVAL); 646 } 647 648 /* 649 * Must be the root of the filesystem 650 */ 651 if ((vp->v_vflag & VV_ROOT) == 0) { 652 vrele(vp); 653 return (EINVAL); 654 } 655 656 /* 657 * XXX Freeze syncer. Must do this before locking the 658 * mount point. See dounmount() for details. 659 */ 660 mutex_enter(&syncer_mutex); 661 error = vfs_busy(mp, RW_WRITER, NULL); 662 vrele(vp); 663 if (error != 0) { 664 mutex_exit(&syncer_mutex); 665 return (error); 666 } 667 668 return (dounmount(mp, SCARG(uap, flags), l)); 669 } 670 671 /* 672 * Do the actual file system unmount. File system is assumed to have been 673 * marked busy by the caller. 674 */ 675 int 676 dounmount(struct mount *mp, int flags, struct lwp *l) 677 { 678 struct vnode *coveredvp; 679 int error; 680 int async; 681 int used_syncer; 682 683 KASSERT(rw_write_held(&mp->mnt_lock)); 684 685 #if NVERIEXEC > 0 686 error = veriexec_unmountchk(mp); 687 if (error) 688 return (error); 689 #endif /* NVERIEXEC > 0 */ 690 691 used_syncer = (mp->mnt_syncer != NULL); 692 693 /* 694 * XXX Syncer must be frozen when we get here. This should really 695 * be done on a per-mountpoint basis, but especially the softdep 696 * code possibly called from the syncer doesn't exactly work on a 697 * per-mountpoint basis, so the softdep code would become a maze 698 * of vfs_busy() calls. 699 * 700 * The caller of dounmount() must acquire syncer_mutex because 701 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 702 * order, and we must preserve that order to avoid deadlock. 703 * 704 * So, if the file system did not use the syncer, now is 705 * the time to release the syncer_mutex. 706 */ 707 if (used_syncer == 0) 708 mutex_exit(&syncer_mutex); 709 710 mp->mnt_iflag |= IMNT_UNMOUNT; 711 async = mp->mnt_flag & MNT_ASYNC; 712 mp->mnt_flag &= ~MNT_ASYNC; 713 cache_purgevfs(mp); /* remove cache entries for this file sys */ 714 if (mp->mnt_syncer != NULL) 715 vfs_deallocate_syncvnode(mp); 716 error = 0; 717 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 718 #if NFSS > 0 719 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 720 #endif 721 if (error == 0) 722 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 723 } 724 vfs_scrubvnlist(mp); 725 if (error == 0 || (flags & MNT_FORCE)) 726 error = VFS_UNMOUNT(mp, flags); 727 if (error) { 728 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 729 (void) vfs_allocate_syncvnode(mp); 730 mp->mnt_iflag &= ~IMNT_UNMOUNT; 731 mp->mnt_flag |= async; 732 if (used_syncer) 733 mutex_exit(&syncer_mutex); 734 vfs_unbusy(mp, false); 735 return (error); 736 } 737 vfs_scrubvnlist(mp); 738 mutex_enter(&mountlist_lock); 739 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 740 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 741 coveredvp->v_mountedhere = NULL; 742 mutex_exit(&mountlist_lock); 743 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 744 panic("unmount: dangling vnode"); 745 mp->mnt_iflag |= IMNT_GONE; 746 if (used_syncer) 747 mutex_exit(&syncer_mutex); 748 vfs_hooks_unmount(mp); 749 vfs_unbusy(mp, false); 750 vfs_destroy(mp); 751 if (coveredvp != NULLVP) 752 vrele(coveredvp); 753 return (0); 754 } 755 756 /* 757 * Sync each mounted filesystem. 758 */ 759 #ifdef DEBUG 760 int syncprt = 0; 761 struct ctldebug debug0 = { "syncprt", &syncprt }; 762 #endif 763 764 /* ARGSUSED */ 765 int 766 sys_sync(struct lwp *l, const void *v, register_t *retval) 767 { 768 struct mount *mp, *nmp; 769 int asyncflag; 770 771 if (l == NULL) 772 l = &lwp0; 773 774 mutex_enter(&mountlist_lock); 775 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 776 if (vfs_trybusy(mp, RW_READER, &mountlist_lock)) { 777 nmp = mp->mnt_list.cqe_prev; 778 continue; 779 } 780 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 781 asyncflag = mp->mnt_flag & MNT_ASYNC; 782 mp->mnt_flag &= ~MNT_ASYNC; 783 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 784 if (asyncflag) 785 mp->mnt_flag |= MNT_ASYNC; 786 } 787 mutex_enter(&mountlist_lock); 788 nmp = mp->mnt_list.cqe_prev; 789 vfs_unbusy(mp, false); 790 791 } 792 mutex_exit(&mountlist_lock); 793 #ifdef DEBUG 794 if (syncprt) 795 vfs_bufstats(); 796 #endif /* DEBUG */ 797 return (0); 798 } 799 800 /* 801 * Change filesystem quotas. 802 */ 803 /* ARGSUSED */ 804 int 805 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 806 { 807 /* { 808 syscallarg(const char *) path; 809 syscallarg(int) cmd; 810 syscallarg(int) uid; 811 syscallarg(void *) arg; 812 } */ 813 struct mount *mp; 814 int error; 815 struct nameidata nd; 816 817 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 818 SCARG(uap, path)); 819 if ((error = namei(&nd)) != 0) 820 return (error); 821 mp = nd.ni_vp->v_mount; 822 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 823 SCARG(uap, arg)); 824 vrele(nd.ni_vp); 825 return (error); 826 } 827 828 int 829 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 830 int root) 831 { 832 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 833 int error = 0; 834 835 /* 836 * If MNT_NOWAIT or MNT_LAZY is specified, do not 837 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 838 * overrides MNT_NOWAIT. 839 */ 840 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 841 (flags != MNT_WAIT && flags != 0)) { 842 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 843 goto done; 844 } 845 846 /* Get the filesystem stats now */ 847 memset(sp, 0, sizeof(*sp)); 848 if ((error = VFS_STATVFS(mp, sp)) != 0) { 849 return error; 850 } 851 852 if (cwdi->cwdi_rdir == NULL) 853 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 854 done: 855 if (cwdi->cwdi_rdir != NULL) { 856 size_t len; 857 char *bp; 858 char *path = PNBUF_GET(); 859 860 bp = path + MAXPATHLEN; 861 *--bp = '\0'; 862 rw_enter(&cwdi->cwdi_lock, RW_READER); 863 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 864 MAXPATHLEN / 2, 0, l); 865 rw_exit(&cwdi->cwdi_lock); 866 if (error) { 867 PNBUF_PUT(path); 868 return error; 869 } 870 len = strlen(bp); 871 /* 872 * for mount points that are below our root, we can see 873 * them, so we fix up the pathname and return them. The 874 * rest we cannot see, so we don't allow viewing the 875 * data. 876 */ 877 if (strncmp(bp, sp->f_mntonname, len) == 0) { 878 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 879 sizeof(sp->f_mntonname)); 880 if (sp->f_mntonname[0] == '\0') 881 (void)strlcpy(sp->f_mntonname, "/", 882 sizeof(sp->f_mntonname)); 883 } else { 884 if (root) 885 (void)strlcpy(sp->f_mntonname, "/", 886 sizeof(sp->f_mntonname)); 887 else 888 error = EPERM; 889 } 890 PNBUF_PUT(path); 891 } 892 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 893 return error; 894 } 895 896 /* 897 * Get filesystem statistics by path. 898 */ 899 int 900 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 901 { 902 struct mount *mp; 903 int error; 904 struct nameidata nd; 905 906 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 907 if ((error = namei(&nd)) != 0) 908 return error; 909 mp = nd.ni_vp->v_mount; 910 error = dostatvfs(mp, sb, l, flags, 1); 911 vrele(nd.ni_vp); 912 return error; 913 } 914 915 /* ARGSUSED */ 916 int 917 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 918 { 919 /* { 920 syscallarg(const char *) path; 921 syscallarg(struct statvfs *) buf; 922 syscallarg(int) flags; 923 } */ 924 struct statvfs *sb; 925 int error; 926 927 sb = STATVFSBUF_GET(); 928 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 929 if (error == 0) 930 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 931 STATVFSBUF_PUT(sb); 932 return error; 933 } 934 935 /* 936 * Get filesystem statistics by fd. 937 */ 938 int 939 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 940 { 941 file_t *fp; 942 struct mount *mp; 943 int error; 944 945 /* fd_getvnode() will use the descriptor for us */ 946 if ((error = fd_getvnode(fd, &fp)) != 0) 947 return (error); 948 mp = ((struct vnode *)fp->f_data)->v_mount; 949 error = dostatvfs(mp, sb, curlwp, flags, 1); 950 fd_putfile(fd); 951 return error; 952 } 953 954 /* ARGSUSED */ 955 int 956 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 957 { 958 /* { 959 syscallarg(int) fd; 960 syscallarg(struct statvfs *) buf; 961 syscallarg(int) flags; 962 } */ 963 struct statvfs *sb; 964 int error; 965 966 sb = STATVFSBUF_GET(); 967 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 968 if (error == 0) 969 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 970 STATVFSBUF_PUT(sb); 971 return error; 972 } 973 974 975 /* 976 * Get statistics on all filesystems. 977 */ 978 int 979 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 980 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 981 register_t *retval) 982 { 983 int root = 0; 984 struct proc *p = l->l_proc; 985 struct mount *mp, *nmp; 986 struct statvfs *sb; 987 size_t count, maxcount; 988 int error = 0; 989 990 sb = STATVFSBUF_GET(); 991 maxcount = bufsize / entry_sz; 992 mutex_enter(&mountlist_lock); 993 count = 0; 994 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 995 mp = nmp) { 996 if (vfs_trybusy(mp, RW_READER, &mountlist_lock)) { 997 nmp = CIRCLEQ_NEXT(mp, mnt_list); 998 continue; 999 } 1000 if (sfsp && count < maxcount) { 1001 error = dostatvfs(mp, sb, l, flags, 0); 1002 if (error) { 1003 mutex_enter(&mountlist_lock); 1004 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1005 vfs_unbusy(mp, false); 1006 continue; 1007 } 1008 error = copyfn(sb, sfsp, entry_sz); 1009 if (error) { 1010 vfs_unbusy(mp, false); 1011 goto out; 1012 } 1013 sfsp = (char *)sfsp + entry_sz; 1014 root |= strcmp(sb->f_mntonname, "/") == 0; 1015 } 1016 count++; 1017 mutex_enter(&mountlist_lock); 1018 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1019 vfs_unbusy(mp, false); 1020 } 1021 1022 mutex_exit(&mountlist_lock); 1023 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1024 /* 1025 * fake a root entry 1026 */ 1027 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1028 sb, l, flags, 1); 1029 if (error != 0) 1030 goto out; 1031 if (sfsp) 1032 error = copyfn(sb, sfsp, entry_sz); 1033 count++; 1034 } 1035 if (sfsp && count > maxcount) 1036 *retval = maxcount; 1037 else 1038 *retval = count; 1039 out: 1040 STATVFSBUF_PUT(sb); 1041 return error; 1042 } 1043 1044 int 1045 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1046 { 1047 /* { 1048 syscallarg(struct statvfs *) buf; 1049 syscallarg(size_t) bufsize; 1050 syscallarg(int) flags; 1051 } */ 1052 1053 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1054 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1055 } 1056 1057 /* 1058 * Change current working directory to a given file descriptor. 1059 */ 1060 /* ARGSUSED */ 1061 int 1062 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1063 { 1064 /* { 1065 syscallarg(int) fd; 1066 } */ 1067 struct proc *p = l->l_proc; 1068 struct cwdinfo *cwdi; 1069 struct vnode *vp, *tdp; 1070 struct mount *mp; 1071 file_t *fp; 1072 int error, fd; 1073 1074 /* fd_getvnode() will use the descriptor for us */ 1075 fd = SCARG(uap, fd); 1076 if ((error = fd_getvnode(fd, &fp)) != 0) 1077 return (error); 1078 vp = fp->f_data; 1079 1080 VREF(vp); 1081 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1082 if (vp->v_type != VDIR) 1083 error = ENOTDIR; 1084 else 1085 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1086 if (error) { 1087 vput(vp); 1088 goto out; 1089 } 1090 while ((mp = vp->v_mountedhere) != NULL) { 1091 if (vfs_busy(mp, RW_READER, NULL)) 1092 continue; 1093 vput(vp); 1094 error = VFS_ROOT(mp, &tdp); 1095 vfs_unbusy(mp, false); 1096 if (error) 1097 goto out; 1098 vp = tdp; 1099 } 1100 VOP_UNLOCK(vp, 0); 1101 1102 /* 1103 * Disallow changing to a directory not under the process's 1104 * current root directory (if there is one). 1105 */ 1106 cwdi = p->p_cwdi; 1107 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1108 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1109 vrele(vp); 1110 error = EPERM; /* operation not permitted */ 1111 } else { 1112 vrele(cwdi->cwdi_cdir); 1113 cwdi->cwdi_cdir = vp; 1114 } 1115 rw_exit(&cwdi->cwdi_lock); 1116 1117 out: 1118 fd_putfile(fd); 1119 return (error); 1120 } 1121 1122 /* 1123 * Change this process's notion of the root directory to a given file 1124 * descriptor. 1125 */ 1126 int 1127 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1128 { 1129 struct proc *p = l->l_proc; 1130 struct cwdinfo *cwdi; 1131 struct vnode *vp; 1132 file_t *fp; 1133 int error, fd = SCARG(uap, fd); 1134 1135 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1136 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1137 return error; 1138 /* fd_getvnode() will use the descriptor for us */ 1139 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1140 return error; 1141 vp = fp->f_data; 1142 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1143 if (vp->v_type != VDIR) 1144 error = ENOTDIR; 1145 else 1146 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1147 VOP_UNLOCK(vp, 0); 1148 if (error) 1149 goto out; 1150 VREF(vp); 1151 1152 /* 1153 * Prevent escaping from chroot by putting the root under 1154 * the working directory. Silently chdir to / if we aren't 1155 * already there. 1156 */ 1157 cwdi = p->p_cwdi; 1158 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1159 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1160 /* 1161 * XXX would be more failsafe to change directory to a 1162 * deadfs node here instead 1163 */ 1164 vrele(cwdi->cwdi_cdir); 1165 VREF(vp); 1166 cwdi->cwdi_cdir = vp; 1167 } 1168 1169 if (cwdi->cwdi_rdir != NULL) 1170 vrele(cwdi->cwdi_rdir); 1171 cwdi->cwdi_rdir = vp; 1172 rw_exit(&cwdi->cwdi_lock); 1173 1174 out: 1175 fd_putfile(fd); 1176 return (error); 1177 } 1178 1179 /* 1180 * Change current working directory (``.''). 1181 */ 1182 /* ARGSUSED */ 1183 int 1184 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1185 { 1186 /* { 1187 syscallarg(const char *) path; 1188 } */ 1189 struct proc *p = l->l_proc; 1190 struct cwdinfo *cwdi; 1191 int error; 1192 struct nameidata nd; 1193 1194 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1195 SCARG(uap, path)); 1196 if ((error = change_dir(&nd, l)) != 0) 1197 return (error); 1198 cwdi = p->p_cwdi; 1199 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1200 vrele(cwdi->cwdi_cdir); 1201 cwdi->cwdi_cdir = nd.ni_vp; 1202 rw_exit(&cwdi->cwdi_lock); 1203 return (0); 1204 } 1205 1206 /* 1207 * Change notion of root (``/'') directory. 1208 */ 1209 /* ARGSUSED */ 1210 int 1211 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1212 { 1213 /* { 1214 syscallarg(const char *) path; 1215 } */ 1216 struct proc *p = l->l_proc; 1217 struct cwdinfo *cwdi; 1218 struct vnode *vp; 1219 int error; 1220 struct nameidata nd; 1221 1222 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1223 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1224 return (error); 1225 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1226 SCARG(uap, path)); 1227 if ((error = change_dir(&nd, l)) != 0) 1228 return (error); 1229 1230 cwdi = p->p_cwdi; 1231 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1232 if (cwdi->cwdi_rdir != NULL) 1233 vrele(cwdi->cwdi_rdir); 1234 vp = nd.ni_vp; 1235 cwdi->cwdi_rdir = vp; 1236 1237 /* 1238 * Prevent escaping from chroot by putting the root under 1239 * the working directory. Silently chdir to / if we aren't 1240 * already there. 1241 */ 1242 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1243 /* 1244 * XXX would be more failsafe to change directory to a 1245 * deadfs node here instead 1246 */ 1247 vrele(cwdi->cwdi_cdir); 1248 VREF(vp); 1249 cwdi->cwdi_cdir = vp; 1250 } 1251 rw_exit(&cwdi->cwdi_lock); 1252 1253 return (0); 1254 } 1255 1256 /* 1257 * Common routine for chroot and chdir. 1258 */ 1259 static int 1260 change_dir(struct nameidata *ndp, struct lwp *l) 1261 { 1262 struct vnode *vp; 1263 int error; 1264 1265 if ((error = namei(ndp)) != 0) 1266 return (error); 1267 vp = ndp->ni_vp; 1268 if (vp->v_type != VDIR) 1269 error = ENOTDIR; 1270 else 1271 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1272 1273 if (error) 1274 vput(vp); 1275 else 1276 VOP_UNLOCK(vp, 0); 1277 return (error); 1278 } 1279 1280 /* 1281 * Check permissions, allocate an open file structure, 1282 * and call the device open routine if any. 1283 */ 1284 int 1285 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1286 { 1287 /* { 1288 syscallarg(const char *) path; 1289 syscallarg(int) flags; 1290 syscallarg(int) mode; 1291 } */ 1292 struct proc *p = l->l_proc; 1293 struct cwdinfo *cwdi = p->p_cwdi; 1294 file_t *fp; 1295 struct vnode *vp; 1296 int flags, cmode; 1297 int type, indx, error; 1298 struct flock lf; 1299 struct nameidata nd; 1300 1301 flags = FFLAGS(SCARG(uap, flags)); 1302 if ((flags & (FREAD | FWRITE)) == 0) 1303 return (EINVAL); 1304 if ((error = fd_allocfile(&fp, &indx)) != 0) 1305 return (error); 1306 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1307 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1308 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1309 SCARG(uap, path)); 1310 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1311 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1312 fd_abort(p, fp, indx); 1313 if ((error == EDUPFD || error == EMOVEFD) && 1314 l->l_dupfd >= 0 && /* XXX from fdopen */ 1315 (error = 1316 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1317 *retval = indx; 1318 return (0); 1319 } 1320 if (error == ERESTART) 1321 error = EINTR; 1322 return (error); 1323 } 1324 1325 l->l_dupfd = 0; 1326 vp = nd.ni_vp; 1327 fp->f_flag = flags & FMASK; 1328 fp->f_type = DTYPE_VNODE; 1329 fp->f_ops = &vnops; 1330 fp->f_data = vp; 1331 if (flags & (O_EXLOCK | O_SHLOCK)) { 1332 lf.l_whence = SEEK_SET; 1333 lf.l_start = 0; 1334 lf.l_len = 0; 1335 if (flags & O_EXLOCK) 1336 lf.l_type = F_WRLCK; 1337 else 1338 lf.l_type = F_RDLCK; 1339 type = F_FLOCK; 1340 if ((flags & FNONBLOCK) == 0) 1341 type |= F_WAIT; 1342 VOP_UNLOCK(vp, 0); 1343 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1344 if (error) { 1345 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1346 fd_abort(p, fp, indx); 1347 return (error); 1348 } 1349 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1350 atomic_or_uint(&fp->f_flag, FHASLOCK); 1351 } 1352 VOP_UNLOCK(vp, 0); 1353 *retval = indx; 1354 fd_affix(p, fp, indx); 1355 return (0); 1356 } 1357 1358 static void 1359 vfs__fhfree(fhandle_t *fhp) 1360 { 1361 size_t fhsize; 1362 1363 if (fhp == NULL) { 1364 return; 1365 } 1366 fhsize = FHANDLE_SIZE(fhp); 1367 kmem_free(fhp, fhsize); 1368 } 1369 1370 /* 1371 * vfs_composefh: compose a filehandle. 1372 */ 1373 1374 int 1375 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1376 { 1377 struct mount *mp; 1378 struct fid *fidp; 1379 int error; 1380 size_t needfhsize; 1381 size_t fidsize; 1382 1383 mp = vp->v_mount; 1384 fidp = NULL; 1385 if (*fh_size < FHANDLE_SIZE_MIN) { 1386 fidsize = 0; 1387 } else { 1388 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1389 if (fhp != NULL) { 1390 memset(fhp, 0, *fh_size); 1391 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1392 fidp = &fhp->fh_fid; 1393 } 1394 } 1395 error = VFS_VPTOFH(vp, fidp, &fidsize); 1396 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1397 if (error == 0 && *fh_size < needfhsize) { 1398 error = E2BIG; 1399 } 1400 *fh_size = needfhsize; 1401 return error; 1402 } 1403 1404 int 1405 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1406 { 1407 struct mount *mp; 1408 fhandle_t *fhp; 1409 size_t fhsize; 1410 size_t fidsize; 1411 int error; 1412 1413 *fhpp = NULL; 1414 mp = vp->v_mount; 1415 fidsize = 0; 1416 error = VFS_VPTOFH(vp, NULL, &fidsize); 1417 KASSERT(error != 0); 1418 if (error != E2BIG) { 1419 goto out; 1420 } 1421 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1422 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1423 if (fhp == NULL) { 1424 error = ENOMEM; 1425 goto out; 1426 } 1427 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1428 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1429 if (error == 0) { 1430 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1431 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1432 *fhpp = fhp; 1433 } else { 1434 kmem_free(fhp, fhsize); 1435 } 1436 out: 1437 return error; 1438 } 1439 1440 void 1441 vfs_composefh_free(fhandle_t *fhp) 1442 { 1443 1444 vfs__fhfree(fhp); 1445 } 1446 1447 /* 1448 * vfs_fhtovp: lookup a vnode by a filehandle. 1449 */ 1450 1451 int 1452 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1453 { 1454 struct mount *mp; 1455 int error; 1456 1457 *vpp = NULL; 1458 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1459 if (mp == NULL) { 1460 error = ESTALE; 1461 goto out; 1462 } 1463 if (mp->mnt_op->vfs_fhtovp == NULL) { 1464 error = EOPNOTSUPP; 1465 goto out; 1466 } 1467 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1468 out: 1469 return error; 1470 } 1471 1472 /* 1473 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1474 * the needed size. 1475 */ 1476 1477 int 1478 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1479 { 1480 fhandle_t *fhp; 1481 int error; 1482 1483 *fhpp = NULL; 1484 if (fhsize > FHANDLE_SIZE_MAX) { 1485 return EINVAL; 1486 } 1487 if (fhsize < FHANDLE_SIZE_MIN) { 1488 return EINVAL; 1489 } 1490 again: 1491 fhp = kmem_alloc(fhsize, KM_SLEEP); 1492 if (fhp == NULL) { 1493 return ENOMEM; 1494 } 1495 error = copyin(ufhp, fhp, fhsize); 1496 if (error == 0) { 1497 /* XXX this check shouldn't be here */ 1498 if (FHANDLE_SIZE(fhp) == fhsize) { 1499 *fhpp = fhp; 1500 return 0; 1501 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1502 /* 1503 * a kludge for nfsv2 padded handles. 1504 */ 1505 size_t sz; 1506 1507 sz = FHANDLE_SIZE(fhp); 1508 kmem_free(fhp, fhsize); 1509 fhsize = sz; 1510 goto again; 1511 } else { 1512 /* 1513 * userland told us wrong size. 1514 */ 1515 error = EINVAL; 1516 } 1517 } 1518 kmem_free(fhp, fhsize); 1519 return error; 1520 } 1521 1522 void 1523 vfs_copyinfh_free(fhandle_t *fhp) 1524 { 1525 1526 vfs__fhfree(fhp); 1527 } 1528 1529 /* 1530 * Get file handle system call 1531 */ 1532 int 1533 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1534 { 1535 /* { 1536 syscallarg(char *) fname; 1537 syscallarg(fhandle_t *) fhp; 1538 syscallarg(size_t *) fh_size; 1539 } */ 1540 struct vnode *vp; 1541 fhandle_t *fh; 1542 int error; 1543 struct nameidata nd; 1544 size_t sz; 1545 size_t usz; 1546 1547 /* 1548 * Must be super user 1549 */ 1550 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1551 0, NULL, NULL, NULL); 1552 if (error) 1553 return (error); 1554 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1555 SCARG(uap, fname)); 1556 error = namei(&nd); 1557 if (error) 1558 return (error); 1559 vp = nd.ni_vp; 1560 error = vfs_composefh_alloc(vp, &fh); 1561 vput(vp); 1562 if (error != 0) { 1563 goto out; 1564 } 1565 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1566 if (error != 0) { 1567 goto out; 1568 } 1569 sz = FHANDLE_SIZE(fh); 1570 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1571 if (error != 0) { 1572 goto out; 1573 } 1574 if (usz >= sz) { 1575 error = copyout(fh, SCARG(uap, fhp), sz); 1576 } else { 1577 error = E2BIG; 1578 } 1579 out: 1580 vfs_composefh_free(fh); 1581 return (error); 1582 } 1583 1584 /* 1585 * Open a file given a file handle. 1586 * 1587 * Check permissions, allocate an open file structure, 1588 * and call the device open routine if any. 1589 */ 1590 1591 int 1592 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1593 register_t *retval) 1594 { 1595 file_t *fp; 1596 struct vnode *vp = NULL; 1597 kauth_cred_t cred = l->l_cred; 1598 file_t *nfp; 1599 int type, indx, error=0; 1600 struct flock lf; 1601 struct vattr va; 1602 fhandle_t *fh; 1603 int flags; 1604 proc_t *p; 1605 1606 p = curproc; 1607 1608 /* 1609 * Must be super user 1610 */ 1611 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1612 0, NULL, NULL, NULL))) 1613 return (error); 1614 1615 flags = FFLAGS(oflags); 1616 if ((flags & (FREAD | FWRITE)) == 0) 1617 return (EINVAL); 1618 if ((flags & O_CREAT)) 1619 return (EINVAL); 1620 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1621 return (error); 1622 fp = nfp; 1623 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1624 if (error != 0) { 1625 goto bad; 1626 } 1627 error = vfs_fhtovp(fh, &vp); 1628 if (error != 0) { 1629 goto bad; 1630 } 1631 1632 /* Now do an effective vn_open */ 1633 1634 if (vp->v_type == VSOCK) { 1635 error = EOPNOTSUPP; 1636 goto bad; 1637 } 1638 error = vn_openchk(vp, cred, flags); 1639 if (error != 0) 1640 goto bad; 1641 if (flags & O_TRUNC) { 1642 VOP_UNLOCK(vp, 0); /* XXX */ 1643 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1644 VATTR_NULL(&va); 1645 va.va_size = 0; 1646 error = VOP_SETATTR(vp, &va, cred); 1647 if (error) 1648 goto bad; 1649 } 1650 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1651 goto bad; 1652 if (flags & FWRITE) { 1653 mutex_enter(&vp->v_interlock); 1654 vp->v_writecount++; 1655 mutex_exit(&vp->v_interlock); 1656 } 1657 1658 /* done with modified vn_open, now finish what sys_open does. */ 1659 1660 fp->f_flag = flags & FMASK; 1661 fp->f_type = DTYPE_VNODE; 1662 fp->f_ops = &vnops; 1663 fp->f_data = vp; 1664 if (flags & (O_EXLOCK | O_SHLOCK)) { 1665 lf.l_whence = SEEK_SET; 1666 lf.l_start = 0; 1667 lf.l_len = 0; 1668 if (flags & O_EXLOCK) 1669 lf.l_type = F_WRLCK; 1670 else 1671 lf.l_type = F_RDLCK; 1672 type = F_FLOCK; 1673 if ((flags & FNONBLOCK) == 0) 1674 type |= F_WAIT; 1675 VOP_UNLOCK(vp, 0); 1676 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1677 if (error) { 1678 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1679 fd_abort(p, fp, indx); 1680 return (error); 1681 } 1682 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1683 atomic_or_uint(&fp->f_flag, FHASLOCK); 1684 } 1685 VOP_UNLOCK(vp, 0); 1686 *retval = indx; 1687 fd_affix(p, fp, indx); 1688 vfs_copyinfh_free(fh); 1689 return (0); 1690 1691 bad: 1692 fd_abort(p, fp, indx); 1693 if (vp != NULL) 1694 vput(vp); 1695 vfs_copyinfh_free(fh); 1696 return (error); 1697 } 1698 1699 int 1700 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1701 { 1702 /* { 1703 syscallarg(const void *) fhp; 1704 syscallarg(size_t) fh_size; 1705 syscallarg(int) flags; 1706 } */ 1707 1708 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1709 SCARG(uap, flags), retval); 1710 } 1711 1712 int 1713 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1714 { 1715 int error; 1716 fhandle_t *fh; 1717 struct vnode *vp; 1718 1719 /* 1720 * Must be super user 1721 */ 1722 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1723 0, NULL, NULL, NULL))) 1724 return (error); 1725 1726 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1727 if (error != 0) 1728 return error; 1729 1730 error = vfs_fhtovp(fh, &vp); 1731 vfs_copyinfh_free(fh); 1732 if (error != 0) 1733 return error; 1734 1735 error = vn_stat(vp, sb); 1736 vput(vp); 1737 return error; 1738 } 1739 1740 1741 /* ARGSUSED */ 1742 int 1743 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval) 1744 { 1745 /* { 1746 syscallarg(const void *) fhp; 1747 syscallarg(size_t) fh_size; 1748 syscallarg(struct stat *) sb; 1749 } */ 1750 struct stat sb; 1751 int error; 1752 1753 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1754 if (error) 1755 return error; 1756 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1757 } 1758 1759 int 1760 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1761 int flags) 1762 { 1763 fhandle_t *fh; 1764 struct mount *mp; 1765 struct vnode *vp; 1766 int error; 1767 1768 /* 1769 * Must be super user 1770 */ 1771 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1772 0, NULL, NULL, NULL))) 1773 return error; 1774 1775 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1776 if (error != 0) 1777 return error; 1778 1779 error = vfs_fhtovp(fh, &vp); 1780 vfs_copyinfh_free(fh); 1781 if (error != 0) 1782 return error; 1783 1784 mp = vp->v_mount; 1785 error = dostatvfs(mp, sb, l, flags, 1); 1786 vput(vp); 1787 return error; 1788 } 1789 1790 /* ARGSUSED */ 1791 int 1792 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1793 { 1794 /* { 1795 syscallarg(const void *) fhp; 1796 syscallarg(size_t) fh_size; 1797 syscallarg(struct statvfs *) buf; 1798 syscallarg(int) flags; 1799 } */ 1800 struct statvfs *sb = STATVFSBUF_GET(); 1801 int error; 1802 1803 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1804 SCARG(uap, flags)); 1805 if (error == 0) 1806 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1807 STATVFSBUF_PUT(sb); 1808 return error; 1809 } 1810 1811 /* 1812 * Create a special file. 1813 */ 1814 /* ARGSUSED */ 1815 int 1816 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval) 1817 { 1818 /* { 1819 syscallarg(const char *) path; 1820 syscallarg(int) mode; 1821 syscallarg(int) dev; 1822 } */ 1823 struct proc *p = l->l_proc; 1824 struct vnode *vp; 1825 struct vattr vattr; 1826 int error, optype; 1827 struct nameidata nd; 1828 char *path; 1829 const char *cpath; 1830 enum uio_seg seg = UIO_USERSPACE; 1831 1832 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1833 0, NULL, NULL, NULL)) != 0) 1834 return (error); 1835 1836 optype = VOP_MKNOD_DESCOFFSET; 1837 1838 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1839 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1840 1841 if ((error = namei(&nd)) != 0) 1842 goto out; 1843 vp = nd.ni_vp; 1844 if (vp != NULL) 1845 error = EEXIST; 1846 else { 1847 VATTR_NULL(&vattr); 1848 /* We will read cwdi->cwdi_cmask unlocked. */ 1849 vattr.va_mode = 1850 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1851 vattr.va_rdev = SCARG(uap, dev); 1852 1853 switch (SCARG(uap, mode) & S_IFMT) { 1854 case S_IFMT: /* used by badsect to flag bad sectors */ 1855 vattr.va_type = VBAD; 1856 break; 1857 case S_IFCHR: 1858 vattr.va_type = VCHR; 1859 break; 1860 case S_IFBLK: 1861 vattr.va_type = VBLK; 1862 break; 1863 case S_IFWHT: 1864 optype = VOP_WHITEOUT_DESCOFFSET; 1865 break; 1866 case S_IFREG: 1867 #if NVERIEXEC > 0 1868 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1869 O_CREAT); 1870 #endif /* NVERIEXEC > 0 */ 1871 vattr.va_type = VREG; 1872 vattr.va_rdev = VNOVAL; 1873 optype = VOP_CREATE_DESCOFFSET; 1874 break; 1875 default: 1876 error = EINVAL; 1877 break; 1878 } 1879 } 1880 if (!error) { 1881 switch (optype) { 1882 case VOP_WHITEOUT_DESCOFFSET: 1883 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1884 if (error) 1885 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1886 vput(nd.ni_dvp); 1887 break; 1888 1889 case VOP_MKNOD_DESCOFFSET: 1890 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1891 &nd.ni_cnd, &vattr); 1892 if (error == 0) 1893 vput(nd.ni_vp); 1894 break; 1895 1896 case VOP_CREATE_DESCOFFSET: 1897 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1898 &nd.ni_cnd, &vattr); 1899 if (error == 0) 1900 vput(nd.ni_vp); 1901 break; 1902 } 1903 } else { 1904 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1905 if (nd.ni_dvp == vp) 1906 vrele(nd.ni_dvp); 1907 else 1908 vput(nd.ni_dvp); 1909 if (vp) 1910 vrele(vp); 1911 } 1912 out: 1913 VERIEXEC_PATH_PUT(path); 1914 return (error); 1915 } 1916 1917 /* 1918 * Create a named pipe. 1919 */ 1920 /* ARGSUSED */ 1921 int 1922 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1923 { 1924 /* { 1925 syscallarg(const char *) path; 1926 syscallarg(int) mode; 1927 } */ 1928 struct proc *p = l->l_proc; 1929 struct vattr vattr; 1930 int error; 1931 struct nameidata nd; 1932 1933 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1934 SCARG(uap, path)); 1935 if ((error = namei(&nd)) != 0) 1936 return (error); 1937 if (nd.ni_vp != NULL) { 1938 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1939 if (nd.ni_dvp == nd.ni_vp) 1940 vrele(nd.ni_dvp); 1941 else 1942 vput(nd.ni_dvp); 1943 vrele(nd.ni_vp); 1944 return (EEXIST); 1945 } 1946 VATTR_NULL(&vattr); 1947 vattr.va_type = VFIFO; 1948 /* We will read cwdi->cwdi_cmask unlocked. */ 1949 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1950 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1951 if (error == 0) 1952 vput(nd.ni_vp); 1953 return (error); 1954 } 1955 1956 /* 1957 * Make a hard file link. 1958 */ 1959 /* ARGSUSED */ 1960 int 1961 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1962 { 1963 /* { 1964 syscallarg(const char *) path; 1965 syscallarg(const char *) link; 1966 } */ 1967 struct vnode *vp; 1968 struct nameidata nd; 1969 int error; 1970 1971 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1972 SCARG(uap, path)); 1973 if ((error = namei(&nd)) != 0) 1974 return (error); 1975 vp = nd.ni_vp; 1976 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1977 SCARG(uap, link)); 1978 if ((error = namei(&nd)) != 0) 1979 goto out; 1980 if (nd.ni_vp) { 1981 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1982 if (nd.ni_dvp == nd.ni_vp) 1983 vrele(nd.ni_dvp); 1984 else 1985 vput(nd.ni_dvp); 1986 vrele(nd.ni_vp); 1987 error = EEXIST; 1988 goto out; 1989 } 1990 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1991 out: 1992 vrele(vp); 1993 return (error); 1994 } 1995 1996 /* 1997 * Make a symbolic link. 1998 */ 1999 /* ARGSUSED */ 2000 int 2001 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2002 { 2003 /* { 2004 syscallarg(const char *) path; 2005 syscallarg(const char *) link; 2006 } */ 2007 struct proc *p = l->l_proc; 2008 struct vattr vattr; 2009 char *path; 2010 int error; 2011 struct nameidata nd; 2012 2013 path = PNBUF_GET(); 2014 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2015 if (error) 2016 goto out; 2017 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2018 SCARG(uap, link)); 2019 if ((error = namei(&nd)) != 0) 2020 goto out; 2021 if (nd.ni_vp) { 2022 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2023 if (nd.ni_dvp == nd.ni_vp) 2024 vrele(nd.ni_dvp); 2025 else 2026 vput(nd.ni_dvp); 2027 vrele(nd.ni_vp); 2028 error = EEXIST; 2029 goto out; 2030 } 2031 VATTR_NULL(&vattr); 2032 vattr.va_type = VLNK; 2033 /* We will read cwdi->cwdi_cmask unlocked. */ 2034 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2035 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2036 if (error == 0) 2037 vput(nd.ni_vp); 2038 out: 2039 PNBUF_PUT(path); 2040 return (error); 2041 } 2042 2043 /* 2044 * Delete a whiteout from the filesystem. 2045 */ 2046 /* ARGSUSED */ 2047 int 2048 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2049 { 2050 /* { 2051 syscallarg(const char *) path; 2052 } */ 2053 int error; 2054 struct nameidata nd; 2055 2056 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2057 UIO_USERSPACE, SCARG(uap, path)); 2058 error = namei(&nd); 2059 if (error) 2060 return (error); 2061 2062 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2063 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2064 if (nd.ni_dvp == nd.ni_vp) 2065 vrele(nd.ni_dvp); 2066 else 2067 vput(nd.ni_dvp); 2068 if (nd.ni_vp) 2069 vrele(nd.ni_vp); 2070 return (EEXIST); 2071 } 2072 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2073 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2074 vput(nd.ni_dvp); 2075 return (error); 2076 } 2077 2078 /* 2079 * Delete a name from the filesystem. 2080 */ 2081 /* ARGSUSED */ 2082 int 2083 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2084 { 2085 /* { 2086 syscallarg(const char *) path; 2087 } */ 2088 2089 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2090 } 2091 2092 int 2093 do_sys_unlink(const char *arg, enum uio_seg seg) 2094 { 2095 struct vnode *vp; 2096 int error; 2097 struct nameidata nd; 2098 kauth_cred_t cred; 2099 char *path; 2100 const char *cpath; 2101 2102 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2103 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2104 2105 if ((error = namei(&nd)) != 0) 2106 goto out; 2107 vp = nd.ni_vp; 2108 2109 /* 2110 * The root of a mounted filesystem cannot be deleted. 2111 */ 2112 if (vp->v_vflag & VV_ROOT) { 2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2114 if (nd.ni_dvp == vp) 2115 vrele(nd.ni_dvp); 2116 else 2117 vput(nd.ni_dvp); 2118 vput(vp); 2119 error = EBUSY; 2120 goto out; 2121 } 2122 2123 #if NVERIEXEC > 0 2124 /* Handle remove requests for veriexec entries. */ 2125 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2126 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2127 if (nd.ni_dvp == vp) 2128 vrele(nd.ni_dvp); 2129 else 2130 vput(nd.ni_dvp); 2131 vput(vp); 2132 goto out; 2133 } 2134 #endif /* NVERIEXEC > 0 */ 2135 2136 cred = kauth_cred_get(); 2137 #ifdef FILEASSOC 2138 (void)fileassoc_file_delete(vp); 2139 #endif /* FILEASSOC */ 2140 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2141 out: 2142 VERIEXEC_PATH_PUT(path); 2143 return (error); 2144 } 2145 2146 /* 2147 * Reposition read/write file offset. 2148 */ 2149 int 2150 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2151 { 2152 /* { 2153 syscallarg(int) fd; 2154 syscallarg(int) pad; 2155 syscallarg(off_t) offset; 2156 syscallarg(int) whence; 2157 } */ 2158 kauth_cred_t cred = l->l_cred; 2159 file_t *fp; 2160 struct vnode *vp; 2161 struct vattr vattr; 2162 off_t newoff; 2163 int error, fd; 2164 2165 fd = SCARG(uap, fd); 2166 2167 if ((fp = fd_getfile(fd)) == NULL) 2168 return (EBADF); 2169 2170 vp = fp->f_data; 2171 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2172 error = ESPIPE; 2173 goto out; 2174 } 2175 2176 switch (SCARG(uap, whence)) { 2177 case SEEK_CUR: 2178 newoff = fp->f_offset + SCARG(uap, offset); 2179 break; 2180 case SEEK_END: 2181 error = VOP_GETATTR(vp, &vattr, cred); 2182 if (error) { 2183 goto out; 2184 } 2185 newoff = SCARG(uap, offset) + vattr.va_size; 2186 break; 2187 case SEEK_SET: 2188 newoff = SCARG(uap, offset); 2189 break; 2190 default: 2191 error = EINVAL; 2192 goto out; 2193 } 2194 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2195 *(off_t *)retval = fp->f_offset = newoff; 2196 } 2197 out: 2198 fd_putfile(fd); 2199 return (error); 2200 } 2201 2202 /* 2203 * Positional read system call. 2204 */ 2205 int 2206 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2207 { 2208 /* { 2209 syscallarg(int) fd; 2210 syscallarg(void *) buf; 2211 syscallarg(size_t) nbyte; 2212 syscallarg(off_t) offset; 2213 } */ 2214 file_t *fp; 2215 struct vnode *vp; 2216 off_t offset; 2217 int error, fd = SCARG(uap, fd); 2218 2219 if ((fp = fd_getfile(fd)) == NULL) 2220 return (EBADF); 2221 2222 if ((fp->f_flag & FREAD) == 0) { 2223 fd_putfile(fd); 2224 return (EBADF); 2225 } 2226 2227 vp = fp->f_data; 2228 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2229 error = ESPIPE; 2230 goto out; 2231 } 2232 2233 offset = SCARG(uap, offset); 2234 2235 /* 2236 * XXX This works because no file systems actually 2237 * XXX take any action on the seek operation. 2238 */ 2239 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2240 goto out; 2241 2242 /* dofileread() will unuse the descriptor for us */ 2243 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2244 &offset, 0, retval)); 2245 2246 out: 2247 fd_putfile(fd); 2248 return (error); 2249 } 2250 2251 /* 2252 * Positional scatter read system call. 2253 */ 2254 int 2255 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2256 { 2257 /* { 2258 syscallarg(int) fd; 2259 syscallarg(const struct iovec *) iovp; 2260 syscallarg(int) iovcnt; 2261 syscallarg(off_t) offset; 2262 } */ 2263 off_t offset = SCARG(uap, offset); 2264 2265 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2266 SCARG(uap, iovcnt), &offset, 0, retval); 2267 } 2268 2269 /* 2270 * Positional write system call. 2271 */ 2272 int 2273 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2274 { 2275 /* { 2276 syscallarg(int) fd; 2277 syscallarg(const void *) buf; 2278 syscallarg(size_t) nbyte; 2279 syscallarg(off_t) offset; 2280 } */ 2281 file_t *fp; 2282 struct vnode *vp; 2283 off_t offset; 2284 int error, fd = SCARG(uap, fd); 2285 2286 if ((fp = fd_getfile(fd)) == NULL) 2287 return (EBADF); 2288 2289 if ((fp->f_flag & FWRITE) == 0) { 2290 fd_putfile(fd); 2291 return (EBADF); 2292 } 2293 2294 vp = fp->f_data; 2295 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2296 error = ESPIPE; 2297 goto out; 2298 } 2299 2300 offset = SCARG(uap, offset); 2301 2302 /* 2303 * XXX This works because no file systems actually 2304 * XXX take any action on the seek operation. 2305 */ 2306 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2307 goto out; 2308 2309 /* dofilewrite() will unuse the descriptor for us */ 2310 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2311 &offset, 0, retval)); 2312 2313 out: 2314 fd_putfile(fd); 2315 return (error); 2316 } 2317 2318 /* 2319 * Positional gather write system call. 2320 */ 2321 int 2322 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2323 { 2324 /* { 2325 syscallarg(int) fd; 2326 syscallarg(const struct iovec *) iovp; 2327 syscallarg(int) iovcnt; 2328 syscallarg(off_t) offset; 2329 } */ 2330 off_t offset = SCARG(uap, offset); 2331 2332 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2333 SCARG(uap, iovcnt), &offset, 0, retval); 2334 } 2335 2336 /* 2337 * Check access permissions. 2338 */ 2339 int 2340 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2341 { 2342 /* { 2343 syscallarg(const char *) path; 2344 syscallarg(int) flags; 2345 } */ 2346 kauth_cred_t cred; 2347 struct vnode *vp; 2348 int error, flags; 2349 struct nameidata nd; 2350 2351 cred = kauth_cred_dup(l->l_cred); 2352 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2353 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2354 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2355 SCARG(uap, path)); 2356 /* Override default credentials */ 2357 nd.ni_cnd.cn_cred = cred; 2358 if ((error = namei(&nd)) != 0) 2359 goto out; 2360 vp = nd.ni_vp; 2361 2362 /* Flags == 0 means only check for existence. */ 2363 if (SCARG(uap, flags)) { 2364 flags = 0; 2365 if (SCARG(uap, flags) & R_OK) 2366 flags |= VREAD; 2367 if (SCARG(uap, flags) & W_OK) 2368 flags |= VWRITE; 2369 if (SCARG(uap, flags) & X_OK) 2370 flags |= VEXEC; 2371 2372 error = VOP_ACCESS(vp, flags, cred); 2373 if (!error && (flags & VWRITE)) 2374 error = vn_writechk(vp); 2375 } 2376 vput(vp); 2377 out: 2378 kauth_cred_free(cred); 2379 return (error); 2380 } 2381 2382 /* 2383 * Common code for all sys_stat functions, including compat versions. 2384 */ 2385 int 2386 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2387 { 2388 int error; 2389 struct nameidata nd; 2390 2391 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2392 UIO_USERSPACE, path); 2393 error = namei(&nd); 2394 if (error != 0) 2395 return error; 2396 error = vn_stat(nd.ni_vp, sb); 2397 vput(nd.ni_vp); 2398 return error; 2399 } 2400 2401 /* 2402 * Get file status; this version follows links. 2403 */ 2404 /* ARGSUSED */ 2405 int 2406 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval) 2407 { 2408 /* { 2409 syscallarg(const char *) path; 2410 syscallarg(struct stat *) ub; 2411 } */ 2412 struct stat sb; 2413 int error; 2414 2415 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2416 if (error) 2417 return error; 2418 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2419 } 2420 2421 /* 2422 * Get file status; this version does not follow links. 2423 */ 2424 /* ARGSUSED */ 2425 int 2426 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval) 2427 { 2428 /* { 2429 syscallarg(const char *) path; 2430 syscallarg(struct stat *) ub; 2431 } */ 2432 struct stat sb; 2433 int error; 2434 2435 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2436 if (error) 2437 return error; 2438 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2439 } 2440 2441 /* 2442 * Get configurable pathname variables. 2443 */ 2444 /* ARGSUSED */ 2445 int 2446 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2447 { 2448 /* { 2449 syscallarg(const char *) path; 2450 syscallarg(int) name; 2451 } */ 2452 int error; 2453 struct nameidata nd; 2454 2455 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2456 SCARG(uap, path)); 2457 if ((error = namei(&nd)) != 0) 2458 return (error); 2459 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2460 vput(nd.ni_vp); 2461 return (error); 2462 } 2463 2464 /* 2465 * Return target name of a symbolic link. 2466 */ 2467 /* ARGSUSED */ 2468 int 2469 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2470 { 2471 /* { 2472 syscallarg(const char *) path; 2473 syscallarg(char *) buf; 2474 syscallarg(size_t) count; 2475 } */ 2476 struct vnode *vp; 2477 struct iovec aiov; 2478 struct uio auio; 2479 int error; 2480 struct nameidata nd; 2481 2482 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2483 SCARG(uap, path)); 2484 if ((error = namei(&nd)) != 0) 2485 return (error); 2486 vp = nd.ni_vp; 2487 if (vp->v_type != VLNK) 2488 error = EINVAL; 2489 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2490 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2491 aiov.iov_base = SCARG(uap, buf); 2492 aiov.iov_len = SCARG(uap, count); 2493 auio.uio_iov = &aiov; 2494 auio.uio_iovcnt = 1; 2495 auio.uio_offset = 0; 2496 auio.uio_rw = UIO_READ; 2497 KASSERT(l == curlwp); 2498 auio.uio_vmspace = l->l_proc->p_vmspace; 2499 auio.uio_resid = SCARG(uap, count); 2500 error = VOP_READLINK(vp, &auio, l->l_cred); 2501 } 2502 vput(vp); 2503 *retval = SCARG(uap, count) - auio.uio_resid; 2504 return (error); 2505 } 2506 2507 /* 2508 * Change flags of a file given a path name. 2509 */ 2510 /* ARGSUSED */ 2511 int 2512 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2513 { 2514 /* { 2515 syscallarg(const char *) path; 2516 syscallarg(u_long) flags; 2517 } */ 2518 struct vnode *vp; 2519 int error; 2520 struct nameidata nd; 2521 2522 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2523 SCARG(uap, path)); 2524 if ((error = namei(&nd)) != 0) 2525 return (error); 2526 vp = nd.ni_vp; 2527 error = change_flags(vp, SCARG(uap, flags), l); 2528 vput(vp); 2529 return (error); 2530 } 2531 2532 /* 2533 * Change flags of a file given a file descriptor. 2534 */ 2535 /* ARGSUSED */ 2536 int 2537 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2538 { 2539 /* { 2540 syscallarg(int) fd; 2541 syscallarg(u_long) flags; 2542 } */ 2543 struct vnode *vp; 2544 file_t *fp; 2545 int error; 2546 2547 /* fd_getvnode() will use the descriptor for us */ 2548 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2549 return (error); 2550 vp = fp->f_data; 2551 error = change_flags(vp, SCARG(uap, flags), l); 2552 VOP_UNLOCK(vp, 0); 2553 fd_putfile(SCARG(uap, fd)); 2554 return (error); 2555 } 2556 2557 /* 2558 * Change flags of a file given a path name; this version does 2559 * not follow links. 2560 */ 2561 int 2562 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2563 { 2564 /* { 2565 syscallarg(const char *) path; 2566 syscallarg(u_long) flags; 2567 } */ 2568 struct vnode *vp; 2569 int error; 2570 struct nameidata nd; 2571 2572 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2573 SCARG(uap, path)); 2574 if ((error = namei(&nd)) != 0) 2575 return (error); 2576 vp = nd.ni_vp; 2577 error = change_flags(vp, SCARG(uap, flags), l); 2578 vput(vp); 2579 return (error); 2580 } 2581 2582 /* 2583 * Common routine to change flags of a file. 2584 */ 2585 int 2586 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2587 { 2588 struct vattr vattr; 2589 int error; 2590 2591 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2592 /* 2593 * Non-superusers cannot change the flags on devices, even if they 2594 * own them. 2595 */ 2596 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2597 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2598 goto out; 2599 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2600 error = EINVAL; 2601 goto out; 2602 } 2603 } 2604 VATTR_NULL(&vattr); 2605 vattr.va_flags = flags; 2606 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2607 out: 2608 return (error); 2609 } 2610 2611 /* 2612 * Change mode of a file given path name; this version follows links. 2613 */ 2614 /* ARGSUSED */ 2615 int 2616 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2617 { 2618 /* { 2619 syscallarg(const char *) path; 2620 syscallarg(int) mode; 2621 } */ 2622 int error; 2623 struct nameidata nd; 2624 2625 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2626 SCARG(uap, path)); 2627 if ((error = namei(&nd)) != 0) 2628 return (error); 2629 2630 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2631 2632 vrele(nd.ni_vp); 2633 return (error); 2634 } 2635 2636 /* 2637 * Change mode of a file given a file descriptor. 2638 */ 2639 /* ARGSUSED */ 2640 int 2641 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2642 { 2643 /* { 2644 syscallarg(int) fd; 2645 syscallarg(int) mode; 2646 } */ 2647 file_t *fp; 2648 int error; 2649 2650 /* fd_getvnode() will use the descriptor for us */ 2651 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2652 return (error); 2653 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2654 fd_putfile(SCARG(uap, fd)); 2655 return (error); 2656 } 2657 2658 /* 2659 * Change mode of a file given path name; this version does not follow links. 2660 */ 2661 /* ARGSUSED */ 2662 int 2663 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2664 { 2665 /* { 2666 syscallarg(const char *) path; 2667 syscallarg(int) mode; 2668 } */ 2669 int error; 2670 struct nameidata nd; 2671 2672 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2673 SCARG(uap, path)); 2674 if ((error = namei(&nd)) != 0) 2675 return (error); 2676 2677 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2678 2679 vrele(nd.ni_vp); 2680 return (error); 2681 } 2682 2683 /* 2684 * Common routine to set mode given a vnode. 2685 */ 2686 static int 2687 change_mode(struct vnode *vp, int mode, struct lwp *l) 2688 { 2689 struct vattr vattr; 2690 int error; 2691 2692 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2693 VATTR_NULL(&vattr); 2694 vattr.va_mode = mode & ALLPERMS; 2695 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2696 VOP_UNLOCK(vp, 0); 2697 return (error); 2698 } 2699 2700 /* 2701 * Set ownership given a path name; this version follows links. 2702 */ 2703 /* ARGSUSED */ 2704 int 2705 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2706 { 2707 /* { 2708 syscallarg(const char *) path; 2709 syscallarg(uid_t) uid; 2710 syscallarg(gid_t) gid; 2711 } */ 2712 int error; 2713 struct nameidata nd; 2714 2715 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2716 SCARG(uap, path)); 2717 if ((error = namei(&nd)) != 0) 2718 return (error); 2719 2720 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2721 2722 vrele(nd.ni_vp); 2723 return (error); 2724 } 2725 2726 /* 2727 * Set ownership given a path name; this version follows links. 2728 * Provides POSIX semantics. 2729 */ 2730 /* ARGSUSED */ 2731 int 2732 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2733 { 2734 /* { 2735 syscallarg(const char *) path; 2736 syscallarg(uid_t) uid; 2737 syscallarg(gid_t) gid; 2738 } */ 2739 int error; 2740 struct nameidata nd; 2741 2742 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2743 SCARG(uap, path)); 2744 if ((error = namei(&nd)) != 0) 2745 return (error); 2746 2747 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2748 2749 vrele(nd.ni_vp); 2750 return (error); 2751 } 2752 2753 /* 2754 * Set ownership given a file descriptor. 2755 */ 2756 /* ARGSUSED */ 2757 int 2758 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2759 { 2760 /* { 2761 syscallarg(int) fd; 2762 syscallarg(uid_t) uid; 2763 syscallarg(gid_t) gid; 2764 } */ 2765 int error; 2766 file_t *fp; 2767 2768 /* fd_getvnode() will use the descriptor for us */ 2769 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2770 return (error); 2771 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2772 l, 0); 2773 fd_putfile(SCARG(uap, fd)); 2774 return (error); 2775 } 2776 2777 /* 2778 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2779 */ 2780 /* ARGSUSED */ 2781 int 2782 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2783 { 2784 /* { 2785 syscallarg(int) fd; 2786 syscallarg(uid_t) uid; 2787 syscallarg(gid_t) gid; 2788 } */ 2789 int error; 2790 file_t *fp; 2791 2792 /* fd_getvnode() will use the descriptor for us */ 2793 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2794 return (error); 2795 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2796 l, 1); 2797 fd_putfile(SCARG(uap, fd)); 2798 return (error); 2799 } 2800 2801 /* 2802 * Set ownership given a path name; this version does not follow links. 2803 */ 2804 /* ARGSUSED */ 2805 int 2806 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2807 { 2808 /* { 2809 syscallarg(const char *) path; 2810 syscallarg(uid_t) uid; 2811 syscallarg(gid_t) gid; 2812 } */ 2813 int error; 2814 struct nameidata nd; 2815 2816 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2817 SCARG(uap, path)); 2818 if ((error = namei(&nd)) != 0) 2819 return (error); 2820 2821 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2822 2823 vrele(nd.ni_vp); 2824 return (error); 2825 } 2826 2827 /* 2828 * Set ownership given a path name; this version does not follow links. 2829 * Provides POSIX/XPG semantics. 2830 */ 2831 /* ARGSUSED */ 2832 int 2833 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2834 { 2835 /* { 2836 syscallarg(const char *) path; 2837 syscallarg(uid_t) uid; 2838 syscallarg(gid_t) gid; 2839 } */ 2840 int error; 2841 struct nameidata nd; 2842 2843 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2844 SCARG(uap, path)); 2845 if ((error = namei(&nd)) != 0) 2846 return (error); 2847 2848 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2849 2850 vrele(nd.ni_vp); 2851 return (error); 2852 } 2853 2854 /* 2855 * Common routine to set ownership given a vnode. 2856 */ 2857 static int 2858 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2859 int posix_semantics) 2860 { 2861 struct vattr vattr; 2862 mode_t newmode; 2863 int error; 2864 2865 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2866 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2867 goto out; 2868 2869 #define CHANGED(x) ((int)(x) != -1) 2870 newmode = vattr.va_mode; 2871 if (posix_semantics) { 2872 /* 2873 * POSIX/XPG semantics: if the caller is not the super-user, 2874 * clear set-user-id and set-group-id bits. Both POSIX and 2875 * the XPG consider the behaviour for calls by the super-user 2876 * implementation-defined; we leave the set-user-id and set- 2877 * group-id settings intact in that case. 2878 */ 2879 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2880 NULL) != 0) 2881 newmode &= ~(S_ISUID | S_ISGID); 2882 } else { 2883 /* 2884 * NetBSD semantics: when changing owner and/or group, 2885 * clear the respective bit(s). 2886 */ 2887 if (CHANGED(uid)) 2888 newmode &= ~S_ISUID; 2889 if (CHANGED(gid)) 2890 newmode &= ~S_ISGID; 2891 } 2892 /* Update va_mode iff altered. */ 2893 if (vattr.va_mode == newmode) 2894 newmode = VNOVAL; 2895 2896 VATTR_NULL(&vattr); 2897 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2898 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2899 vattr.va_mode = newmode; 2900 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2901 #undef CHANGED 2902 2903 out: 2904 VOP_UNLOCK(vp, 0); 2905 return (error); 2906 } 2907 2908 /* 2909 * Set the access and modification times given a path name; this 2910 * version follows links. 2911 */ 2912 /* ARGSUSED */ 2913 int 2914 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval) 2915 { 2916 /* { 2917 syscallarg(const char *) path; 2918 syscallarg(const struct timeval *) tptr; 2919 } */ 2920 2921 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2922 SCARG(uap, tptr), UIO_USERSPACE); 2923 } 2924 2925 /* 2926 * Set the access and modification times given a file descriptor. 2927 */ 2928 /* ARGSUSED */ 2929 int 2930 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval) 2931 { 2932 /* { 2933 syscallarg(int) fd; 2934 syscallarg(const struct timeval *) tptr; 2935 } */ 2936 int error; 2937 file_t *fp; 2938 2939 /* fd_getvnode() will use the descriptor for us */ 2940 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2941 return (error); 2942 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2943 UIO_USERSPACE); 2944 fd_putfile(SCARG(uap, fd)); 2945 return (error); 2946 } 2947 2948 /* 2949 * Set the access and modification times given a path name; this 2950 * version does not follow links. 2951 */ 2952 int 2953 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval) 2954 { 2955 /* { 2956 syscallarg(const char *) path; 2957 syscallarg(const struct timeval *) tptr; 2958 } */ 2959 2960 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2961 SCARG(uap, tptr), UIO_USERSPACE); 2962 } 2963 2964 /* 2965 * Common routine to set access and modification times given a vnode. 2966 */ 2967 int 2968 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2969 const struct timeval *tptr, enum uio_seg seg) 2970 { 2971 struct vattr vattr; 2972 struct nameidata nd; 2973 int error; 2974 2975 VATTR_NULL(&vattr); 2976 if (tptr == NULL) { 2977 nanotime(&vattr.va_atime); 2978 vattr.va_mtime = vattr.va_atime; 2979 vattr.va_vaflags |= VA_UTIMES_NULL; 2980 } else { 2981 struct timeval tv[2]; 2982 2983 if (seg != UIO_SYSSPACE) { 2984 error = copyin(tptr, &tv, sizeof (tv)); 2985 if (error != 0) 2986 return error; 2987 tptr = tv; 2988 } 2989 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime); 2990 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime); 2991 } 2992 2993 if (vp == NULL) { 2994 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 2995 if ((error = namei(&nd)) != 0) 2996 return (error); 2997 vp = nd.ni_vp; 2998 } else 2999 nd.ni_vp = NULL; 3000 3001 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3002 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3003 VOP_UNLOCK(vp, 0); 3004 3005 if (nd.ni_vp != NULL) 3006 vrele(nd.ni_vp); 3007 3008 return (error); 3009 } 3010 3011 /* 3012 * Truncate a file given its path name. 3013 */ 3014 /* ARGSUSED */ 3015 int 3016 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3017 { 3018 /* { 3019 syscallarg(const char *) path; 3020 syscallarg(int) pad; 3021 syscallarg(off_t) length; 3022 } */ 3023 struct vnode *vp; 3024 struct vattr vattr; 3025 int error; 3026 struct nameidata nd; 3027 3028 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3029 SCARG(uap, path)); 3030 if ((error = namei(&nd)) != 0) 3031 return (error); 3032 vp = nd.ni_vp; 3033 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3034 if (vp->v_type == VDIR) 3035 error = EISDIR; 3036 else if ((error = vn_writechk(vp)) == 0 && 3037 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3038 VATTR_NULL(&vattr); 3039 vattr.va_size = SCARG(uap, length); 3040 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3041 } 3042 vput(vp); 3043 return (error); 3044 } 3045 3046 /* 3047 * Truncate a file given a file descriptor. 3048 */ 3049 /* ARGSUSED */ 3050 int 3051 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3052 { 3053 /* { 3054 syscallarg(int) fd; 3055 syscallarg(int) pad; 3056 syscallarg(off_t) length; 3057 } */ 3058 struct vattr vattr; 3059 struct vnode *vp; 3060 file_t *fp; 3061 int error; 3062 3063 /* fd_getvnode() will use the descriptor for us */ 3064 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3065 return (error); 3066 if ((fp->f_flag & FWRITE) == 0) { 3067 error = EINVAL; 3068 goto out; 3069 } 3070 vp = fp->f_data; 3071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3072 if (vp->v_type == VDIR) 3073 error = EISDIR; 3074 else if ((error = vn_writechk(vp)) == 0) { 3075 VATTR_NULL(&vattr); 3076 vattr.va_size = SCARG(uap, length); 3077 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3078 } 3079 VOP_UNLOCK(vp, 0); 3080 out: 3081 fd_putfile(SCARG(uap, fd)); 3082 return (error); 3083 } 3084 3085 /* 3086 * Sync an open file. 3087 */ 3088 /* ARGSUSED */ 3089 int 3090 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3091 { 3092 /* { 3093 syscallarg(int) fd; 3094 } */ 3095 struct vnode *vp; 3096 file_t *fp; 3097 int error; 3098 3099 /* fd_getvnode() will use the descriptor for us */ 3100 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3101 return (error); 3102 vp = fp->f_data; 3103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3104 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3105 if (error == 0 && bioopsp != NULL && 3106 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3107 (*bioopsp->io_fsync)(vp, 0); 3108 VOP_UNLOCK(vp, 0); 3109 fd_putfile(SCARG(uap, fd)); 3110 return (error); 3111 } 3112 3113 /* 3114 * Sync a range of file data. API modeled after that found in AIX. 3115 * 3116 * FDATASYNC indicates that we need only save enough metadata to be able 3117 * to re-read the written data. Note we duplicate AIX's requirement that 3118 * the file be open for writing. 3119 */ 3120 /* ARGSUSED */ 3121 int 3122 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3123 { 3124 /* { 3125 syscallarg(int) fd; 3126 syscallarg(int) flags; 3127 syscallarg(off_t) start; 3128 syscallarg(off_t) length; 3129 } */ 3130 struct vnode *vp; 3131 file_t *fp; 3132 int flags, nflags; 3133 off_t s, e, len; 3134 int error; 3135 3136 /* fd_getvnode() will use the descriptor for us */ 3137 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3138 return (error); 3139 3140 if ((fp->f_flag & FWRITE) == 0) { 3141 error = EBADF; 3142 goto out; 3143 } 3144 3145 flags = SCARG(uap, flags); 3146 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3147 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3148 error = EINVAL; 3149 goto out; 3150 } 3151 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3152 if (flags & FDATASYNC) 3153 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3154 else 3155 nflags = FSYNC_WAIT; 3156 if (flags & FDISKSYNC) 3157 nflags |= FSYNC_CACHE; 3158 3159 len = SCARG(uap, length); 3160 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3161 if (len) { 3162 s = SCARG(uap, start); 3163 e = s + len; 3164 if (e < s) { 3165 error = EINVAL; 3166 goto out; 3167 } 3168 } else { 3169 e = 0; 3170 s = 0; 3171 } 3172 3173 vp = fp->f_data; 3174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3175 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3176 3177 if (error == 0 && bioopsp != NULL && 3178 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3179 (*bioopsp->io_fsync)(vp, nflags); 3180 3181 VOP_UNLOCK(vp, 0); 3182 out: 3183 fd_putfile(SCARG(uap, fd)); 3184 return (error); 3185 } 3186 3187 /* 3188 * Sync the data of an open file. 3189 */ 3190 /* ARGSUSED */ 3191 int 3192 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3193 { 3194 /* { 3195 syscallarg(int) fd; 3196 } */ 3197 struct vnode *vp; 3198 file_t *fp; 3199 int error; 3200 3201 /* fd_getvnode() will use the descriptor for us */ 3202 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3203 return (error); 3204 if ((fp->f_flag & FWRITE) == 0) { 3205 fd_putfile(SCARG(uap, fd)); 3206 return (EBADF); 3207 } 3208 vp = fp->f_data; 3209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3210 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3211 VOP_UNLOCK(vp, 0); 3212 fd_putfile(SCARG(uap, fd)); 3213 return (error); 3214 } 3215 3216 /* 3217 * Rename files, (standard) BSD semantics frontend. 3218 */ 3219 /* ARGSUSED */ 3220 int 3221 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3222 { 3223 /* { 3224 syscallarg(const char *) from; 3225 syscallarg(const char *) to; 3226 } */ 3227 3228 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3229 } 3230 3231 /* 3232 * Rename files, POSIX semantics frontend. 3233 */ 3234 /* ARGSUSED */ 3235 int 3236 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3237 { 3238 /* { 3239 syscallarg(const char *) from; 3240 syscallarg(const char *) to; 3241 } */ 3242 3243 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3244 } 3245 3246 /* 3247 * Rename files. Source and destination must either both be directories, 3248 * or both not be directories. If target is a directory, it must be empty. 3249 * If `from' and `to' refer to the same object, the value of the `retain' 3250 * argument is used to determine whether `from' will be 3251 * 3252 * (retain == 0) deleted unless `from' and `to' refer to the same 3253 * object in the file system's name space (BSD). 3254 * (retain == 1) always retained (POSIX). 3255 */ 3256 int 3257 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3258 { 3259 struct vnode *tvp, *fvp, *tdvp; 3260 struct nameidata fromnd, tond; 3261 struct mount *fs; 3262 struct lwp *l = curlwp; 3263 struct proc *p; 3264 uint32_t saveflag; 3265 int error; 3266 3267 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3268 seg, from); 3269 if ((error = namei(&fromnd)) != 0) 3270 return (error); 3271 if (fromnd.ni_dvp != fromnd.ni_vp) 3272 VOP_UNLOCK(fromnd.ni_dvp, 0); 3273 fvp = fromnd.ni_vp; 3274 3275 fs = fvp->v_mount; 3276 error = VFS_RENAMELOCK_ENTER(fs); 3277 if (error) { 3278 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3279 vrele(fromnd.ni_dvp); 3280 vrele(fvp); 3281 goto out1; 3282 } 3283 3284 /* 3285 * close, partially, yet another race - ideally we should only 3286 * go as far as getting fromnd.ni_dvp before getting the per-fs 3287 * lock, and then continue to get fromnd.ni_vp, but we can't do 3288 * that with namei as it stands. 3289 * 3290 * This still won't prevent rmdir from nuking fromnd.ni_vp 3291 * under us. The real fix is to get the locks in the right 3292 * order and do the lookups in the right places, but that's a 3293 * major rototill. 3294 * 3295 * Preserve the SAVESTART in cn_flags, because who knows what 3296 * might happen if we don't. 3297 * 3298 * Note: this logic (as well as this whole function) is cloned 3299 * in nfs_serv.c. Proceed accordingly. 3300 */ 3301 vrele(fvp); 3302 if ((fromnd.ni_cnd.cn_namelen == 1 && 3303 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3304 (fromnd.ni_cnd.cn_namelen == 2 && 3305 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3306 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3307 error = EINVAL; 3308 VFS_RENAMELOCK_EXIT(fs); 3309 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3310 vrele(fromnd.ni_dvp); 3311 goto out1; 3312 } 3313 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3314 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3315 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3316 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3317 fromnd.ni_cnd.cn_flags |= saveflag; 3318 if (error) { 3319 VOP_UNLOCK(fromnd.ni_dvp, 0); 3320 VFS_RENAMELOCK_EXIT(fs); 3321 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3322 vrele(fromnd.ni_dvp); 3323 goto out1; 3324 } 3325 VOP_UNLOCK(fromnd.ni_vp, 0); 3326 if (fromnd.ni_dvp != fromnd.ni_vp) 3327 VOP_UNLOCK(fromnd.ni_dvp, 0); 3328 fvp = fromnd.ni_vp; 3329 3330 NDINIT(&tond, RENAME, 3331 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3332 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3333 seg, to); 3334 if ((error = namei(&tond)) != 0) { 3335 VFS_RENAMELOCK_EXIT(fs); 3336 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3337 vrele(fromnd.ni_dvp); 3338 vrele(fvp); 3339 goto out1; 3340 } 3341 tdvp = tond.ni_dvp; 3342 tvp = tond.ni_vp; 3343 3344 if (tvp != NULL) { 3345 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3346 error = ENOTDIR; 3347 goto out; 3348 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3349 error = EISDIR; 3350 goto out; 3351 } 3352 } 3353 3354 if (fvp == tdvp) 3355 error = EINVAL; 3356 3357 /* 3358 * Source and destination refer to the same object. 3359 */ 3360 if (fvp == tvp) { 3361 if (retain) 3362 error = -1; 3363 else if (fromnd.ni_dvp == tdvp && 3364 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3365 !memcmp(fromnd.ni_cnd.cn_nameptr, 3366 tond.ni_cnd.cn_nameptr, 3367 fromnd.ni_cnd.cn_namelen)) 3368 error = -1; 3369 } 3370 3371 #if NVERIEXEC > 0 3372 if (!error) { 3373 char *f1, *f2; 3374 3375 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3376 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3377 3378 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3379 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3380 3381 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3382 3383 free(f1, M_TEMP); 3384 free(f2, M_TEMP); 3385 } 3386 #endif /* NVERIEXEC > 0 */ 3387 3388 out: 3389 p = l->l_proc; 3390 if (!error) { 3391 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3392 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3393 VFS_RENAMELOCK_EXIT(fs); 3394 } else { 3395 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3396 if (tdvp == tvp) 3397 vrele(tdvp); 3398 else 3399 vput(tdvp); 3400 if (tvp) 3401 vput(tvp); 3402 VFS_RENAMELOCK_EXIT(fs); 3403 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3404 vrele(fromnd.ni_dvp); 3405 vrele(fvp); 3406 } 3407 vrele(tond.ni_startdir); 3408 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3409 out1: 3410 if (fromnd.ni_startdir) 3411 vrele(fromnd.ni_startdir); 3412 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3413 return (error == -1 ? 0 : error); 3414 } 3415 3416 /* 3417 * Make a directory file. 3418 */ 3419 /* ARGSUSED */ 3420 int 3421 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3422 { 3423 /* { 3424 syscallarg(const char *) path; 3425 syscallarg(int) mode; 3426 } */ 3427 struct proc *p = l->l_proc; 3428 struct vnode *vp; 3429 struct vattr vattr; 3430 int error; 3431 struct nameidata nd; 3432 3433 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3434 SCARG(uap, path)); 3435 if ((error = namei(&nd)) != 0) 3436 return (error); 3437 vp = nd.ni_vp; 3438 if (vp != NULL) { 3439 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3440 if (nd.ni_dvp == vp) 3441 vrele(nd.ni_dvp); 3442 else 3443 vput(nd.ni_dvp); 3444 vrele(vp); 3445 return (EEXIST); 3446 } 3447 VATTR_NULL(&vattr); 3448 vattr.va_type = VDIR; 3449 /* We will read cwdi->cwdi_cmask unlocked. */ 3450 vattr.va_mode = 3451 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3452 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3453 if (!error) 3454 vput(nd.ni_vp); 3455 return (error); 3456 } 3457 3458 /* 3459 * Remove a directory file. 3460 */ 3461 /* ARGSUSED */ 3462 int 3463 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3464 { 3465 /* { 3466 syscallarg(const char *) path; 3467 } */ 3468 struct vnode *vp; 3469 int error; 3470 struct nameidata nd; 3471 3472 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3473 SCARG(uap, path)); 3474 if ((error = namei(&nd)) != 0) 3475 return (error); 3476 vp = nd.ni_vp; 3477 if (vp->v_type != VDIR) { 3478 error = ENOTDIR; 3479 goto out; 3480 } 3481 /* 3482 * No rmdir "." please. 3483 */ 3484 if (nd.ni_dvp == vp) { 3485 error = EINVAL; 3486 goto out; 3487 } 3488 /* 3489 * The root of a mounted filesystem cannot be deleted. 3490 */ 3491 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3492 error = EBUSY; 3493 goto out; 3494 } 3495 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3496 return (error); 3497 3498 out: 3499 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3500 if (nd.ni_dvp == vp) 3501 vrele(nd.ni_dvp); 3502 else 3503 vput(nd.ni_dvp); 3504 vput(vp); 3505 return (error); 3506 } 3507 3508 /* 3509 * Read a block of directory entries in a file system independent format. 3510 */ 3511 int 3512 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3513 { 3514 /* { 3515 syscallarg(int) fd; 3516 syscallarg(char *) buf; 3517 syscallarg(size_t) count; 3518 } */ 3519 file_t *fp; 3520 int error, done; 3521 3522 /* fd_getvnode() will use the descriptor for us */ 3523 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3524 return (error); 3525 if ((fp->f_flag & FREAD) == 0) { 3526 error = EBADF; 3527 goto out; 3528 } 3529 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3530 SCARG(uap, count), &done, l, 0, 0); 3531 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3532 *retval = done; 3533 out: 3534 fd_putfile(SCARG(uap, fd)); 3535 return (error); 3536 } 3537 3538 /* 3539 * Set the mode mask for creation of filesystem nodes. 3540 */ 3541 int 3542 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3543 { 3544 /* { 3545 syscallarg(mode_t) newmask; 3546 } */ 3547 struct proc *p = l->l_proc; 3548 struct cwdinfo *cwdi; 3549 3550 /* 3551 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3552 * important is that we serialize changes to the mask. The 3553 * rw_exit() will issue a write memory barrier on our behalf, 3554 * and force the changes out to other CPUs (as it must use an 3555 * atomic operation, draining the local CPU's store buffers). 3556 */ 3557 cwdi = p->p_cwdi; 3558 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3559 *retval = cwdi->cwdi_cmask; 3560 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3561 rw_exit(&cwdi->cwdi_lock); 3562 3563 return (0); 3564 } 3565 3566 int 3567 dorevoke(struct vnode *vp, kauth_cred_t cred) 3568 { 3569 struct vattr vattr; 3570 int error; 3571 3572 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3573 return error; 3574 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3575 (error = kauth_authorize_generic(cred, 3576 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3577 VOP_REVOKE(vp, REVOKEALL); 3578 return (error); 3579 } 3580 3581 /* 3582 * Void all references to file by ripping underlying filesystem 3583 * away from vnode. 3584 */ 3585 /* ARGSUSED */ 3586 int 3587 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3588 { 3589 /* { 3590 syscallarg(const char *) path; 3591 } */ 3592 struct vnode *vp; 3593 int error; 3594 struct nameidata nd; 3595 3596 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3597 SCARG(uap, path)); 3598 if ((error = namei(&nd)) != 0) 3599 return (error); 3600 vp = nd.ni_vp; 3601 error = dorevoke(vp, l->l_cred); 3602 vrele(vp); 3603 return (error); 3604 } 3605 3606 /* 3607 * Convert a user file descriptor to a kernel file entry. 3608 */ 3609 int 3610 getvnode(int fd, file_t **fpp) 3611 { 3612 struct vnode *vp; 3613 file_t *fp; 3614 3615 if ((fp = fd_getfile(fd)) == NULL) 3616 return (EBADF); 3617 3618 if (fp->f_type != DTYPE_VNODE) { 3619 fd_putfile(fd); 3620 return (EINVAL); 3621 } 3622 3623 vp = fp->f_data; 3624 if (vp->v_type == VBAD) { 3625 fd_putfile(fd); 3626 return (EBADF); 3627 } 3628 3629 *fpp = fp; 3630 return (0); 3631 } 3632