1 /* $NetBSD: vfs_syscalls.c,v 1.375 2008/09/25 14:37:30 wiz Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 63 */ 64 65 #include <sys/cdefs.h> 66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.375 2008/09/25 14:37:30 wiz Exp $"); 67 68 #include "opt_compat_netbsd.h" 69 #include "opt_compat_43.h" 70 #include "opt_fileassoc.h" 71 #include "veriexec.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/namei.h> 76 #include <sys/filedesc.h> 77 #include <sys/kernel.h> 78 #include <sys/file.h> 79 #include <sys/stat.h> 80 #include <sys/vnode.h> 81 #include <sys/mount.h> 82 #include <sys/proc.h> 83 #include <sys/uio.h> 84 #include <sys/malloc.h> 85 #include <sys/kmem.h> 86 #include <sys/dirent.h> 87 #include <sys/sysctl.h> 88 #include <sys/syscallargs.h> 89 #include <sys/vfs_syscalls.h> 90 #include <sys/ktrace.h> 91 #ifdef FILEASSOC 92 #include <sys/fileassoc.h> 93 #endif /* FILEASSOC */ 94 #include <sys/verified_exec.h> 95 #include <sys/kauth.h> 96 #include <sys/atomic.h> 97 #include <sys/module.h> 98 99 #include <miscfs/genfs/genfs.h> 100 #include <miscfs/syncfs/syncfs.h> 101 #include <miscfs/specfs/specdev.h> 102 103 #ifdef COMPAT_30 104 #include "opt_nfsserver.h" 105 #include <nfs/rpcv2.h> 106 #endif 107 #include <nfs/nfsproto.h> 108 #ifdef COMPAT_30 109 #include <nfs/nfs.h> 110 #include <nfs/nfs_var.h> 111 #endif 112 113 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 114 115 static int change_dir(struct nameidata *, struct lwp *); 116 static int change_flags(struct vnode *, u_long, struct lwp *); 117 static int change_mode(struct vnode *, int, struct lwp *l); 118 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 119 120 void checkdirs(struct vnode *); 121 122 int dovfsusermount = 0; 123 124 /* 125 * Virtual File System System Calls 126 */ 127 128 /* 129 * Mount a file system. 130 */ 131 132 #if defined(COMPAT_09) || defined(COMPAT_43) 133 /* 134 * This table is used to maintain compatibility with 4.3BSD 135 * and NetBSD 0.9 mount syscalls. Note, the order is important! 136 * 137 * Do not modify this table. It should only contain filesystems 138 * supported by NetBSD 0.9 and 4.3BSD. 139 */ 140 const char * const mountcompatnames[] = { 141 NULL, /* 0 = MOUNT_NONE */ 142 MOUNT_FFS, /* 1 = MOUNT_UFS */ 143 MOUNT_NFS, /* 2 */ 144 MOUNT_MFS, /* 3 */ 145 MOUNT_MSDOS, /* 4 */ 146 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 147 MOUNT_FDESC, /* 6 */ 148 MOUNT_KERNFS, /* 7 */ 149 NULL, /* 8 = MOUNT_DEVFS */ 150 MOUNT_AFS, /* 9 */ 151 }; 152 const int nmountcompatnames = sizeof(mountcompatnames) / 153 sizeof(mountcompatnames[0]); 154 #endif /* COMPAT_09 || COMPAT_43 */ 155 156 static int 157 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 158 void *data, size_t *data_len) 159 { 160 struct mount *mp; 161 int error = 0, saved_flags; 162 163 mp = vp->v_mount; 164 saved_flags = mp->mnt_flag; 165 166 /* We can operate only on VV_ROOT nodes. */ 167 if ((vp->v_vflag & VV_ROOT) == 0) { 168 error = EINVAL; 169 goto out; 170 } 171 172 /* 173 * We only allow the filesystem to be reloaded if it 174 * is currently mounted read-only. Additionally, we 175 * prevent read-write to read-only downgrades. 176 */ 177 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 178 (mp->mnt_flag & MNT_RDONLY) == 0) { 179 error = EOPNOTSUPP; /* Needs translation */ 180 goto out; 181 } 182 183 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 184 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 185 if (error) 186 goto out; 187 188 if (vfs_busy(mp, NULL)) { 189 error = EPERM; 190 goto out; 191 } 192 193 mutex_enter(&mp->mnt_updating); 194 195 mp->mnt_flag &= ~MNT_OP_FLAGS; 196 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 197 198 /* 199 * Set the mount level flags. 200 */ 201 if (flags & MNT_RDONLY) 202 mp->mnt_flag |= MNT_RDONLY; 203 else if (mp->mnt_flag & MNT_RDONLY) 204 mp->mnt_iflag |= IMNT_WANTRDWR; 205 mp->mnt_flag &= 206 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 209 MNT_LOG); 210 mp->mnt_flag |= flags & 211 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 212 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 213 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 214 MNT_LOG | MNT_IGNORE); 215 216 error = VFS_MOUNT(mp, path, data, data_len); 217 218 #if defined(COMPAT_30) && defined(NFSSERVER) 219 if (error && data != NULL) { 220 int error2; 221 222 /* Update failed; let's try and see if it was an 223 * export request. */ 224 error2 = nfs_update_exports_30(mp, path, data, l); 225 226 /* Only update error code if the export request was 227 * understood but some problem occurred while 228 * processing it. */ 229 if (error2 != EJUSTRETURN) 230 error = error2; 231 } 232 #endif 233 if (mp->mnt_iflag & IMNT_WANTRDWR) 234 mp->mnt_flag &= ~MNT_RDONLY; 235 if (error) 236 mp->mnt_flag = saved_flags; 237 mp->mnt_flag &= ~MNT_OP_FLAGS; 238 mp->mnt_iflag &= ~IMNT_WANTRDWR; 239 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 240 if (mp->mnt_syncer == NULL) 241 error = vfs_allocate_syncvnode(mp); 242 } else { 243 if (mp->mnt_syncer != NULL) 244 vfs_deallocate_syncvnode(mp); 245 } 246 mutex_exit(&mp->mnt_updating); 247 vfs_unbusy(mp, false, NULL); 248 249 out: 250 return (error); 251 } 252 253 static int 254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 255 { 256 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 257 int error; 258 259 /* Copy file-system type from userspace. */ 260 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 261 if (error) { 262 #if defined(COMPAT_09) || defined(COMPAT_43) 263 /* 264 * Historically, filesystem types were identified by numbers. 265 * If we get an integer for the filesystem type instead of a 266 * string, we check to see if it matches one of the historic 267 * filesystem types. 268 */ 269 u_long fsindex = (u_long)fstype; 270 if (fsindex >= nmountcompatnames || 271 mountcompatnames[fsindex] == NULL) 272 return ENODEV; 273 strlcpy(fstypename, mountcompatnames[fsindex], 274 sizeof(fstypename)); 275 #else 276 return error; 277 #endif 278 } 279 280 #ifdef COMPAT_10 281 /* Accept `ufs' as an alias for `ffs'. */ 282 if (strcmp(fstypename, "ufs") == 0) 283 fstypename[0] = 'f'; 284 #endif 285 286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 287 return 0; 288 289 /* If we can autoload a vfs module, try again */ 290 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true); 291 292 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 293 return 0; 294 295 return ENODEV; 296 } 297 298 static int 299 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 300 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 301 { 302 struct mount *mp; 303 struct vnode *vp = *vpp; 304 struct vattr va; 305 int error; 306 307 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 308 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 309 if (error) 310 return error; 311 312 /* Can't make a non-dir a mount-point (from here anyway). */ 313 if (vp->v_type != VDIR) 314 return ENOTDIR; 315 316 /* 317 * If the user is not root, ensure that they own the directory 318 * onto which we are attempting to mount. 319 */ 320 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 321 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 322 (error = kauth_authorize_generic(l->l_cred, 323 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 324 return error; 325 } 326 327 if (flags & MNT_EXPORTED) 328 return EINVAL; 329 330 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 331 return error; 332 333 /* 334 * Check if a file-system is not already mounted on this vnode. 335 */ 336 if (vp->v_mountedhere != NULL) 337 return EBUSY; 338 339 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 340 if (mp == NULL) 341 return ENOMEM; 342 343 mp->mnt_op = vfsops; 344 mp->mnt_refcnt = 1; 345 346 TAILQ_INIT(&mp->mnt_vnodelist); 347 rw_init(&mp->mnt_unmounting); 348 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 349 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 350 error = vfs_busy(mp, NULL); 351 KASSERT(error == 0); 352 mutex_enter(&mp->mnt_updating); 353 354 mp->mnt_vnodecovered = vp; 355 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 356 mount_initspecific(mp); 357 358 /* 359 * The underlying file system may refuse the mount for 360 * various reasons. Allow the user to force it to happen. 361 * 362 * Set the mount level flags. 363 */ 364 mp->mnt_flag = flags & 365 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 366 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 367 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 368 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 369 370 error = VFS_MOUNT(mp, path, data, data_len); 371 mp->mnt_flag &= ~MNT_OP_FLAGS; 372 373 /* 374 * Put the new filesystem on the mount list after root. 375 */ 376 cache_purge(vp); 377 if (error != 0) { 378 vp->v_mountedhere = NULL; 379 mutex_exit(&mp->mnt_updating); 380 vfs_unbusy(mp, false, NULL); 381 vfs_destroy(mp); 382 return error; 383 } 384 385 mp->mnt_iflag &= ~IMNT_WANTRDWR; 386 mutex_enter(&mountlist_lock); 387 vp->v_mountedhere = mp; 388 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 389 mutex_exit(&mountlist_lock); 390 vn_restorerecurse(vp, recurse); 391 VOP_UNLOCK(vp, 0); 392 checkdirs(vp); 393 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 394 error = vfs_allocate_syncvnode(mp); 395 /* Hold an additional reference to the mount across VFS_START(). */ 396 mutex_exit(&mp->mnt_updating); 397 vfs_unbusy(mp, true, NULL); 398 (void) VFS_STATVFS(mp, &mp->mnt_stat); 399 error = VFS_START(mp, 0); 400 if (error) 401 vrele(vp); 402 /* Drop reference held for VFS_START(). */ 403 vfs_destroy(mp); 404 *vpp = NULL; 405 return error; 406 } 407 408 static int 409 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 410 void *data, size_t *data_len) 411 { 412 struct mount *mp; 413 int error; 414 415 /* If MNT_GETARGS is specified, it should be the only flag. */ 416 if (flags & ~MNT_GETARGS) 417 return EINVAL; 418 419 mp = vp->v_mount; 420 421 /* XXX: probably some notion of "can see" here if we want isolation. */ 422 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 423 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 424 if (error) 425 return error; 426 427 if ((vp->v_vflag & VV_ROOT) == 0) 428 return EINVAL; 429 430 if (vfs_busy(mp, NULL)) 431 return EPERM; 432 433 mutex_enter(&mp->mnt_updating); 434 mp->mnt_flag &= ~MNT_OP_FLAGS; 435 mp->mnt_flag |= MNT_GETARGS; 436 error = VFS_MOUNT(mp, path, data, data_len); 437 mp->mnt_flag &= ~MNT_OP_FLAGS; 438 mutex_exit(&mp->mnt_updating); 439 440 vfs_unbusy(mp, false, NULL); 441 return (error); 442 } 443 444 #ifdef COMPAT_40 445 /* ARGSUSED */ 446 int 447 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval) 448 { 449 /* { 450 syscallarg(const char *) type; 451 syscallarg(const char *) path; 452 syscallarg(int) flags; 453 syscallarg(void *) data; 454 } */ 455 register_t dummy; 456 457 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 458 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy); 459 } 460 #endif 461 462 int 463 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 464 { 465 /* { 466 syscallarg(const char *) type; 467 syscallarg(const char *) path; 468 syscallarg(int) flags; 469 syscallarg(void *) data; 470 syscallarg(size_t) data_len; 471 } */ 472 473 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 474 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 475 SCARG(uap, data_len), retval); 476 } 477 478 int 479 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 480 const char *path, int flags, void *data, enum uio_seg data_seg, 481 size_t data_len, register_t *retval) 482 { 483 struct vnode *vp; 484 struct nameidata nd; 485 void *data_buf = data; 486 u_int recurse; 487 int error; 488 489 /* 490 * Get vnode to be covered 491 */ 492 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 493 if ((error = namei(&nd)) != 0) 494 return (error); 495 vp = nd.ni_vp; 496 497 /* 498 * A lookup in VFS_MOUNT might result in an attempt to 499 * lock this vnode again, so make the lock recursive. 500 */ 501 if (vfsops == NULL) { 502 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 503 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 504 recurse = vn_setrecurse(vp); 505 vfsops = vp->v_mount->mnt_op; 506 } else { 507 /* 'type' is userspace */ 508 error = mount_get_vfsops(type, &vfsops); 509 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 510 recurse = vn_setrecurse(vp); 511 if (error != 0) 512 goto done; 513 } 514 } else { 515 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 516 recurse = vn_setrecurse(vp); 517 } 518 519 if (data != NULL && data_seg == UIO_USERSPACE) { 520 if (data_len == 0) { 521 /* No length supplied, use default for filesystem */ 522 data_len = vfsops->vfs_min_mount_data; 523 if (data_len > VFS_MAX_MOUNT_DATA) { 524 /* maybe a force loaded old LKM */ 525 error = EINVAL; 526 goto done; 527 } 528 #ifdef COMPAT_30 529 /* Hopefully a longer buffer won't make copyin() fail */ 530 if (flags & MNT_UPDATE 531 && data_len < sizeof (struct mnt_export_args30)) 532 data_len = sizeof (struct mnt_export_args30); 533 #endif 534 } 535 data_buf = malloc(data_len, M_TEMP, M_WAITOK); 536 537 /* NFS needs the buffer even for mnt_getargs .... */ 538 error = copyin(data, data_buf, data_len); 539 if (error != 0) 540 goto done; 541 } 542 543 if (flags & MNT_GETARGS) { 544 if (data_len == 0) { 545 error = EINVAL; 546 goto done; 547 } 548 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 549 if (error != 0) 550 goto done; 551 if (data_seg == UIO_USERSPACE) 552 error = copyout(data_buf, data, data_len); 553 *retval = data_len; 554 } else if (flags & MNT_UPDATE) { 555 error = mount_update(l, vp, path, flags, data_buf, &data_len); 556 } else { 557 /* Locking is handled internally in mount_domount(). */ 558 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 559 &data_len, recurse); 560 } 561 562 done: 563 if (vp != NULL) { 564 vn_restorerecurse(vp, recurse); 565 vput(vp); 566 } 567 if (data_buf != data) 568 free(data_buf, M_TEMP); 569 return (error); 570 } 571 572 /* 573 * Scan all active processes to see if any of them have a current 574 * or root directory onto which the new filesystem has just been 575 * mounted. If so, replace them with the new mount point. 576 */ 577 void 578 checkdirs(struct vnode *olddp) 579 { 580 struct cwdinfo *cwdi; 581 struct vnode *newdp, *rele1, *rele2; 582 struct proc *p; 583 bool retry; 584 585 if (olddp->v_usecount == 1) 586 return; 587 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 588 panic("mount: lost mount"); 589 590 do { 591 retry = false; 592 mutex_enter(proc_lock); 593 PROCLIST_FOREACH(p, &allproc) { 594 if ((p->p_flag & PK_MARKER) != 0) 595 continue; 596 if ((cwdi = p->p_cwdi) == NULL) 597 continue; 598 /* 599 * Can't change to the old directory any more, 600 * so even if we see a stale value it's not a 601 * problem. 602 */ 603 if (cwdi->cwdi_cdir != olddp && 604 cwdi->cwdi_rdir != olddp) 605 continue; 606 retry = true; 607 rele1 = NULL; 608 rele2 = NULL; 609 atomic_inc_uint(&cwdi->cwdi_refcnt); 610 mutex_exit(proc_lock); 611 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 612 if (cwdi->cwdi_cdir == olddp) { 613 rele1 = cwdi->cwdi_cdir; 614 VREF(newdp); 615 cwdi->cwdi_cdir = newdp; 616 } 617 if (cwdi->cwdi_rdir == olddp) { 618 rele2 = cwdi->cwdi_rdir; 619 VREF(newdp); 620 cwdi->cwdi_rdir = newdp; 621 } 622 rw_exit(&cwdi->cwdi_lock); 623 cwdfree(cwdi); 624 if (rele1 != NULL) 625 vrele(rele1); 626 if (rele2 != NULL) 627 vrele(rele2); 628 mutex_enter(proc_lock); 629 break; 630 } 631 mutex_exit(proc_lock); 632 } while (retry); 633 634 if (rootvnode == olddp) { 635 vrele(rootvnode); 636 VREF(newdp); 637 rootvnode = newdp; 638 } 639 vput(newdp); 640 } 641 642 /* 643 * Unmount a file system. 644 * 645 * Note: unmount takes a path to the vnode mounted on as argument, 646 * not special file (as before). 647 */ 648 /* ARGSUSED */ 649 int 650 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 651 { 652 /* { 653 syscallarg(const char *) path; 654 syscallarg(int) flags; 655 } */ 656 struct vnode *vp; 657 struct mount *mp; 658 int error; 659 struct nameidata nd; 660 661 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 662 SCARG(uap, path)); 663 if ((error = namei(&nd)) != 0) 664 return (error); 665 vp = nd.ni_vp; 666 mp = vp->v_mount; 667 atomic_inc_uint(&mp->mnt_refcnt); 668 VOP_UNLOCK(vp, 0); 669 670 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 671 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 672 if (error) { 673 vrele(vp); 674 vfs_destroy(mp); 675 return (error); 676 } 677 678 /* 679 * Don't allow unmounting the root file system. 680 */ 681 if (mp->mnt_flag & MNT_ROOTFS) { 682 vrele(vp); 683 vfs_destroy(mp); 684 return (EINVAL); 685 } 686 687 /* 688 * Must be the root of the filesystem 689 */ 690 if ((vp->v_vflag & VV_ROOT) == 0) { 691 vrele(vp); 692 vfs_destroy(mp); 693 return (EINVAL); 694 } 695 696 vrele(vp); 697 error = dounmount(mp, SCARG(uap, flags), l); 698 vfs_destroy(mp); 699 return error; 700 } 701 702 /* 703 * Do the actual file system unmount. File system is assumed to have 704 * been locked by the caller. 705 * 706 * => Caller hold reference to the mount, explicitly for dounmount(). 707 */ 708 int 709 dounmount(struct mount *mp, int flags, struct lwp *l) 710 { 711 struct vnode *coveredvp; 712 int error; 713 int async; 714 int used_syncer; 715 716 #if NVERIEXEC > 0 717 error = veriexec_unmountchk(mp); 718 if (error) 719 return (error); 720 #endif /* NVERIEXEC > 0 */ 721 722 /* 723 * XXX Freeze syncer. Must do this before locking the 724 * mount point. See dounmount() for details. 725 */ 726 mutex_enter(&syncer_mutex); 727 rw_enter(&mp->mnt_unmounting, RW_WRITER); 728 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 729 rw_exit(&mp->mnt_unmounting); 730 mutex_exit(&syncer_mutex); 731 return ENOENT; 732 } 733 734 used_syncer = (mp->mnt_syncer != NULL); 735 736 /* 737 * XXX Syncer must be frozen when we get here. This should really 738 * be done on a per-mountpoint basis, but especially the softdep 739 * code possibly called from the syncer doesn't exactly work on a 740 * per-mountpoint basis, so the softdep code would become a maze 741 * of vfs_busy() calls. 742 * 743 * The caller of dounmount() must acquire syncer_mutex because 744 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 745 * order, and we must preserve that order to avoid deadlock. 746 * 747 * So, if the file system did not use the syncer, now is 748 * the time to release the syncer_mutex. 749 */ 750 if (used_syncer == 0) 751 mutex_exit(&syncer_mutex); 752 753 mp->mnt_iflag |= IMNT_UNMOUNT; 754 async = mp->mnt_flag & MNT_ASYNC; 755 mp->mnt_flag &= ~MNT_ASYNC; 756 cache_purgevfs(mp); /* remove cache entries for this file sys */ 757 if (mp->mnt_syncer != NULL) 758 vfs_deallocate_syncvnode(mp); 759 error = 0; 760 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 761 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 762 } 763 vfs_scrubvnlist(mp); 764 if (error == 0 || (flags & MNT_FORCE)) 765 error = VFS_UNMOUNT(mp, flags); 766 if (error) { 767 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 768 (void) vfs_allocate_syncvnode(mp); 769 mp->mnt_iflag &= ~IMNT_UNMOUNT; 770 mp->mnt_flag |= async; 771 rw_exit(&mp->mnt_unmounting); 772 if (used_syncer) 773 mutex_exit(&syncer_mutex); 774 return (error); 775 } 776 vfs_scrubvnlist(mp); 777 mutex_enter(&mountlist_lock); 778 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 779 coveredvp->v_mountedhere = NULL; 780 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 781 mp->mnt_iflag |= IMNT_GONE; 782 mutex_exit(&mountlist_lock); 783 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 784 panic("unmount: dangling vnode"); 785 if (used_syncer) 786 mutex_exit(&syncer_mutex); 787 vfs_hooks_unmount(mp); 788 rw_exit(&mp->mnt_unmounting); 789 vfs_destroy(mp); /* reference from mount() */ 790 if (coveredvp != NULLVP) 791 vrele(coveredvp); 792 return (0); 793 } 794 795 /* 796 * Sync each mounted filesystem. 797 */ 798 #ifdef DEBUG 799 int syncprt = 0; 800 struct ctldebug debug0 = { "syncprt", &syncprt }; 801 #endif 802 803 /* ARGSUSED */ 804 int 805 sys_sync(struct lwp *l, const void *v, register_t *retval) 806 { 807 struct mount *mp, *nmp; 808 int asyncflag; 809 810 if (l == NULL) 811 l = &lwp0; 812 813 mutex_enter(&mountlist_lock); 814 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 815 mp = nmp) { 816 if (vfs_busy(mp, &nmp)) { 817 continue; 818 } 819 mutex_enter(&mp->mnt_updating); 820 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 821 asyncflag = mp->mnt_flag & MNT_ASYNC; 822 mp->mnt_flag &= ~MNT_ASYNC; 823 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 824 if (asyncflag) 825 mp->mnt_flag |= MNT_ASYNC; 826 } 827 mutex_exit(&mp->mnt_updating); 828 vfs_unbusy(mp, false, &nmp); 829 } 830 mutex_exit(&mountlist_lock); 831 #ifdef DEBUG 832 if (syncprt) 833 vfs_bufstats(); 834 #endif /* DEBUG */ 835 return (0); 836 } 837 838 /* 839 * Change filesystem quotas. 840 */ 841 /* ARGSUSED */ 842 int 843 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 844 { 845 /* { 846 syscallarg(const char *) path; 847 syscallarg(int) cmd; 848 syscallarg(int) uid; 849 syscallarg(void *) arg; 850 } */ 851 struct mount *mp; 852 int error; 853 struct nameidata nd; 854 855 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 856 SCARG(uap, path)); 857 if ((error = namei(&nd)) != 0) 858 return (error); 859 mp = nd.ni_vp->v_mount; 860 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 861 SCARG(uap, arg)); 862 vrele(nd.ni_vp); 863 return (error); 864 } 865 866 int 867 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 868 int root) 869 { 870 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 871 int error = 0; 872 873 /* 874 * If MNT_NOWAIT or MNT_LAZY is specified, do not 875 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 876 * overrides MNT_NOWAIT. 877 */ 878 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 879 (flags != MNT_WAIT && flags != 0)) { 880 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 881 goto done; 882 } 883 884 /* Get the filesystem stats now */ 885 memset(sp, 0, sizeof(*sp)); 886 if ((error = VFS_STATVFS(mp, sp)) != 0) { 887 return error; 888 } 889 890 if (cwdi->cwdi_rdir == NULL) 891 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 892 done: 893 if (cwdi->cwdi_rdir != NULL) { 894 size_t len; 895 char *bp; 896 char c; 897 char *path = PNBUF_GET(); 898 899 bp = path + MAXPATHLEN; 900 *--bp = '\0'; 901 rw_enter(&cwdi->cwdi_lock, RW_READER); 902 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 903 MAXPATHLEN / 2, 0, l); 904 rw_exit(&cwdi->cwdi_lock); 905 if (error) { 906 PNBUF_PUT(path); 907 return error; 908 } 909 len = strlen(bp); 910 /* 911 * for mount points that are below our root, we can see 912 * them, so we fix up the pathname and return them. The 913 * rest we cannot see, so we don't allow viewing the 914 * data. 915 */ 916 if (strncmp(bp, sp->f_mntonname, len) == 0 && 917 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 918 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 919 sizeof(sp->f_mntonname)); 920 if (sp->f_mntonname[0] == '\0') 921 (void)strlcpy(sp->f_mntonname, "/", 922 sizeof(sp->f_mntonname)); 923 } else { 924 if (root) 925 (void)strlcpy(sp->f_mntonname, "/", 926 sizeof(sp->f_mntonname)); 927 else 928 error = EPERM; 929 } 930 PNBUF_PUT(path); 931 } 932 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 933 return error; 934 } 935 936 /* 937 * Get filesystem statistics by path. 938 */ 939 int 940 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 941 { 942 struct mount *mp; 943 int error; 944 struct nameidata nd; 945 946 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path); 947 if ((error = namei(&nd)) != 0) 948 return error; 949 mp = nd.ni_vp->v_mount; 950 error = dostatvfs(mp, sb, l, flags, 1); 951 vrele(nd.ni_vp); 952 return error; 953 } 954 955 /* ARGSUSED */ 956 int 957 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 958 { 959 /* { 960 syscallarg(const char *) path; 961 syscallarg(struct statvfs *) buf; 962 syscallarg(int) flags; 963 } */ 964 struct statvfs *sb; 965 int error; 966 967 sb = STATVFSBUF_GET(); 968 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 969 if (error == 0) 970 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 971 STATVFSBUF_PUT(sb); 972 return error; 973 } 974 975 /* 976 * Get filesystem statistics by fd. 977 */ 978 int 979 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 980 { 981 file_t *fp; 982 struct mount *mp; 983 int error; 984 985 /* fd_getvnode() will use the descriptor for us */ 986 if ((error = fd_getvnode(fd, &fp)) != 0) 987 return (error); 988 mp = ((struct vnode *)fp->f_data)->v_mount; 989 error = dostatvfs(mp, sb, curlwp, flags, 1); 990 fd_putfile(fd); 991 return error; 992 } 993 994 /* ARGSUSED */ 995 int 996 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 997 { 998 /* { 999 syscallarg(int) fd; 1000 syscallarg(struct statvfs *) buf; 1001 syscallarg(int) flags; 1002 } */ 1003 struct statvfs *sb; 1004 int error; 1005 1006 sb = STATVFSBUF_GET(); 1007 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1008 if (error == 0) 1009 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1010 STATVFSBUF_PUT(sb); 1011 return error; 1012 } 1013 1014 1015 /* 1016 * Get statistics on all filesystems. 1017 */ 1018 int 1019 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1020 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1021 register_t *retval) 1022 { 1023 int root = 0; 1024 struct proc *p = l->l_proc; 1025 struct mount *mp, *nmp; 1026 struct statvfs *sb; 1027 size_t count, maxcount; 1028 int error = 0; 1029 1030 sb = STATVFSBUF_GET(); 1031 maxcount = bufsize / entry_sz; 1032 mutex_enter(&mountlist_lock); 1033 count = 0; 1034 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1035 mp = nmp) { 1036 if (vfs_busy(mp, &nmp)) { 1037 continue; 1038 } 1039 if (sfsp && count < maxcount) { 1040 error = dostatvfs(mp, sb, l, flags, 0); 1041 if (error) { 1042 vfs_unbusy(mp, false, &nmp); 1043 error = 0; 1044 continue; 1045 } 1046 error = copyfn(sb, sfsp, entry_sz); 1047 if (error) { 1048 vfs_unbusy(mp, false, NULL); 1049 goto out; 1050 } 1051 sfsp = (char *)sfsp + entry_sz; 1052 root |= strcmp(sb->f_mntonname, "/") == 0; 1053 } 1054 count++; 1055 vfs_unbusy(mp, false, &nmp); 1056 } 1057 mutex_exit(&mountlist_lock); 1058 1059 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1060 /* 1061 * fake a root entry 1062 */ 1063 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1064 sb, l, flags, 1); 1065 if (error != 0) 1066 goto out; 1067 if (sfsp) { 1068 error = copyfn(sb, sfsp, entry_sz); 1069 if (error != 0) 1070 goto out; 1071 } 1072 count++; 1073 } 1074 if (sfsp && count > maxcount) 1075 *retval = maxcount; 1076 else 1077 *retval = count; 1078 out: 1079 STATVFSBUF_PUT(sb); 1080 return error; 1081 } 1082 1083 int 1084 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1085 { 1086 /* { 1087 syscallarg(struct statvfs *) buf; 1088 syscallarg(size_t) bufsize; 1089 syscallarg(int) flags; 1090 } */ 1091 1092 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1093 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1094 } 1095 1096 /* 1097 * Change current working directory to a given file descriptor. 1098 */ 1099 /* ARGSUSED */ 1100 int 1101 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1102 { 1103 /* { 1104 syscallarg(int) fd; 1105 } */ 1106 struct proc *p = l->l_proc; 1107 struct cwdinfo *cwdi; 1108 struct vnode *vp, *tdp; 1109 struct mount *mp; 1110 file_t *fp; 1111 int error, fd; 1112 1113 /* fd_getvnode() will use the descriptor for us */ 1114 fd = SCARG(uap, fd); 1115 if ((error = fd_getvnode(fd, &fp)) != 0) 1116 return (error); 1117 vp = fp->f_data; 1118 1119 VREF(vp); 1120 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1121 if (vp->v_type != VDIR) 1122 error = ENOTDIR; 1123 else 1124 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1125 if (error) { 1126 vput(vp); 1127 goto out; 1128 } 1129 while ((mp = vp->v_mountedhere) != NULL) { 1130 error = vfs_busy(mp, NULL); 1131 vput(vp); 1132 if (error != 0) 1133 goto out; 1134 error = VFS_ROOT(mp, &tdp); 1135 vfs_unbusy(mp, false, NULL); 1136 if (error) 1137 goto out; 1138 vp = tdp; 1139 } 1140 VOP_UNLOCK(vp, 0); 1141 1142 /* 1143 * Disallow changing to a directory not under the process's 1144 * current root directory (if there is one). 1145 */ 1146 cwdi = p->p_cwdi; 1147 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1148 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1149 vrele(vp); 1150 error = EPERM; /* operation not permitted */ 1151 } else { 1152 vrele(cwdi->cwdi_cdir); 1153 cwdi->cwdi_cdir = vp; 1154 } 1155 rw_exit(&cwdi->cwdi_lock); 1156 1157 out: 1158 fd_putfile(fd); 1159 return (error); 1160 } 1161 1162 /* 1163 * Change this process's notion of the root directory to a given file 1164 * descriptor. 1165 */ 1166 int 1167 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1168 { 1169 struct proc *p = l->l_proc; 1170 struct cwdinfo *cwdi; 1171 struct vnode *vp; 1172 file_t *fp; 1173 int error, fd = SCARG(uap, fd); 1174 1175 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1176 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1177 return error; 1178 /* fd_getvnode() will use the descriptor for us */ 1179 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1180 return error; 1181 vp = fp->f_data; 1182 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1183 if (vp->v_type != VDIR) 1184 error = ENOTDIR; 1185 else 1186 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1187 VOP_UNLOCK(vp, 0); 1188 if (error) 1189 goto out; 1190 VREF(vp); 1191 1192 /* 1193 * Prevent escaping from chroot by putting the root under 1194 * the working directory. Silently chdir to / if we aren't 1195 * already there. 1196 */ 1197 cwdi = p->p_cwdi; 1198 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1199 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1200 /* 1201 * XXX would be more failsafe to change directory to a 1202 * deadfs node here instead 1203 */ 1204 vrele(cwdi->cwdi_cdir); 1205 VREF(vp); 1206 cwdi->cwdi_cdir = vp; 1207 } 1208 1209 if (cwdi->cwdi_rdir != NULL) 1210 vrele(cwdi->cwdi_rdir); 1211 cwdi->cwdi_rdir = vp; 1212 rw_exit(&cwdi->cwdi_lock); 1213 1214 out: 1215 fd_putfile(fd); 1216 return (error); 1217 } 1218 1219 /* 1220 * Change current working directory (``.''). 1221 */ 1222 /* ARGSUSED */ 1223 int 1224 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1225 { 1226 /* { 1227 syscallarg(const char *) path; 1228 } */ 1229 struct proc *p = l->l_proc; 1230 struct cwdinfo *cwdi; 1231 int error; 1232 struct nameidata nd; 1233 1234 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1235 SCARG(uap, path)); 1236 if ((error = change_dir(&nd, l)) != 0) 1237 return (error); 1238 cwdi = p->p_cwdi; 1239 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1240 vrele(cwdi->cwdi_cdir); 1241 cwdi->cwdi_cdir = nd.ni_vp; 1242 rw_exit(&cwdi->cwdi_lock); 1243 return (0); 1244 } 1245 1246 /* 1247 * Change notion of root (``/'') directory. 1248 */ 1249 /* ARGSUSED */ 1250 int 1251 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1252 { 1253 /* { 1254 syscallarg(const char *) path; 1255 } */ 1256 struct proc *p = l->l_proc; 1257 struct cwdinfo *cwdi; 1258 struct vnode *vp; 1259 int error; 1260 struct nameidata nd; 1261 1262 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1263 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1264 return (error); 1265 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1266 SCARG(uap, path)); 1267 if ((error = change_dir(&nd, l)) != 0) 1268 return (error); 1269 1270 cwdi = p->p_cwdi; 1271 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1272 if (cwdi->cwdi_rdir != NULL) 1273 vrele(cwdi->cwdi_rdir); 1274 vp = nd.ni_vp; 1275 cwdi->cwdi_rdir = vp; 1276 1277 /* 1278 * Prevent escaping from chroot by putting the root under 1279 * the working directory. Silently chdir to / if we aren't 1280 * already there. 1281 */ 1282 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1283 /* 1284 * XXX would be more failsafe to change directory to a 1285 * deadfs node here instead 1286 */ 1287 vrele(cwdi->cwdi_cdir); 1288 VREF(vp); 1289 cwdi->cwdi_cdir = vp; 1290 } 1291 rw_exit(&cwdi->cwdi_lock); 1292 1293 return (0); 1294 } 1295 1296 /* 1297 * Common routine for chroot and chdir. 1298 */ 1299 static int 1300 change_dir(struct nameidata *ndp, struct lwp *l) 1301 { 1302 struct vnode *vp; 1303 int error; 1304 1305 if ((error = namei(ndp)) != 0) 1306 return (error); 1307 vp = ndp->ni_vp; 1308 if (vp->v_type != VDIR) 1309 error = ENOTDIR; 1310 else 1311 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1312 1313 if (error) 1314 vput(vp); 1315 else 1316 VOP_UNLOCK(vp, 0); 1317 return (error); 1318 } 1319 1320 /* 1321 * Check permissions, allocate an open file structure, 1322 * and call the device open routine if any. 1323 */ 1324 int 1325 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1326 { 1327 /* { 1328 syscallarg(const char *) path; 1329 syscallarg(int) flags; 1330 syscallarg(int) mode; 1331 } */ 1332 struct proc *p = l->l_proc; 1333 struct cwdinfo *cwdi = p->p_cwdi; 1334 file_t *fp; 1335 struct vnode *vp; 1336 int flags, cmode; 1337 int type, indx, error; 1338 struct flock lf; 1339 struct nameidata nd; 1340 1341 flags = FFLAGS(SCARG(uap, flags)); 1342 if ((flags & (FREAD | FWRITE)) == 0) 1343 return (EINVAL); 1344 if ((error = fd_allocfile(&fp, &indx)) != 0) 1345 return (error); 1346 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1347 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1348 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1349 SCARG(uap, path)); 1350 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1351 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1352 fd_abort(p, fp, indx); 1353 if ((error == EDUPFD || error == EMOVEFD) && 1354 l->l_dupfd >= 0 && /* XXX from fdopen */ 1355 (error = 1356 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1357 *retval = indx; 1358 return (0); 1359 } 1360 if (error == ERESTART) 1361 error = EINTR; 1362 return (error); 1363 } 1364 1365 l->l_dupfd = 0; 1366 vp = nd.ni_vp; 1367 fp->f_flag = flags & FMASK; 1368 fp->f_type = DTYPE_VNODE; 1369 fp->f_ops = &vnops; 1370 fp->f_data = vp; 1371 if (flags & (O_EXLOCK | O_SHLOCK)) { 1372 lf.l_whence = SEEK_SET; 1373 lf.l_start = 0; 1374 lf.l_len = 0; 1375 if (flags & O_EXLOCK) 1376 lf.l_type = F_WRLCK; 1377 else 1378 lf.l_type = F_RDLCK; 1379 type = F_FLOCK; 1380 if ((flags & FNONBLOCK) == 0) 1381 type |= F_WAIT; 1382 VOP_UNLOCK(vp, 0); 1383 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1384 if (error) { 1385 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1386 fd_abort(p, fp, indx); 1387 return (error); 1388 } 1389 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1390 atomic_or_uint(&fp->f_flag, FHASLOCK); 1391 } 1392 VOP_UNLOCK(vp, 0); 1393 *retval = indx; 1394 fd_affix(p, fp, indx); 1395 return (0); 1396 } 1397 1398 static void 1399 vfs__fhfree(fhandle_t *fhp) 1400 { 1401 size_t fhsize; 1402 1403 if (fhp == NULL) { 1404 return; 1405 } 1406 fhsize = FHANDLE_SIZE(fhp); 1407 kmem_free(fhp, fhsize); 1408 } 1409 1410 /* 1411 * vfs_composefh: compose a filehandle. 1412 */ 1413 1414 int 1415 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1416 { 1417 struct mount *mp; 1418 struct fid *fidp; 1419 int error; 1420 size_t needfhsize; 1421 size_t fidsize; 1422 1423 mp = vp->v_mount; 1424 fidp = NULL; 1425 if (*fh_size < FHANDLE_SIZE_MIN) { 1426 fidsize = 0; 1427 } else { 1428 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1429 if (fhp != NULL) { 1430 memset(fhp, 0, *fh_size); 1431 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1432 fidp = &fhp->fh_fid; 1433 } 1434 } 1435 error = VFS_VPTOFH(vp, fidp, &fidsize); 1436 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1437 if (error == 0 && *fh_size < needfhsize) { 1438 error = E2BIG; 1439 } 1440 *fh_size = needfhsize; 1441 return error; 1442 } 1443 1444 int 1445 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1446 { 1447 struct mount *mp; 1448 fhandle_t *fhp; 1449 size_t fhsize; 1450 size_t fidsize; 1451 int error; 1452 1453 *fhpp = NULL; 1454 mp = vp->v_mount; 1455 fidsize = 0; 1456 error = VFS_VPTOFH(vp, NULL, &fidsize); 1457 KASSERT(error != 0); 1458 if (error != E2BIG) { 1459 goto out; 1460 } 1461 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1462 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1463 if (fhp == NULL) { 1464 error = ENOMEM; 1465 goto out; 1466 } 1467 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1468 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1469 if (error == 0) { 1470 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1471 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1472 *fhpp = fhp; 1473 } else { 1474 kmem_free(fhp, fhsize); 1475 } 1476 out: 1477 return error; 1478 } 1479 1480 void 1481 vfs_composefh_free(fhandle_t *fhp) 1482 { 1483 1484 vfs__fhfree(fhp); 1485 } 1486 1487 /* 1488 * vfs_fhtovp: lookup a vnode by a filehandle. 1489 */ 1490 1491 int 1492 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1493 { 1494 struct mount *mp; 1495 int error; 1496 1497 *vpp = NULL; 1498 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1499 if (mp == NULL) { 1500 error = ESTALE; 1501 goto out; 1502 } 1503 if (mp->mnt_op->vfs_fhtovp == NULL) { 1504 error = EOPNOTSUPP; 1505 goto out; 1506 } 1507 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1508 out: 1509 return error; 1510 } 1511 1512 /* 1513 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1514 * the needed size. 1515 */ 1516 1517 int 1518 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1519 { 1520 fhandle_t *fhp; 1521 int error; 1522 1523 *fhpp = NULL; 1524 if (fhsize > FHANDLE_SIZE_MAX) { 1525 return EINVAL; 1526 } 1527 if (fhsize < FHANDLE_SIZE_MIN) { 1528 return EINVAL; 1529 } 1530 again: 1531 fhp = kmem_alloc(fhsize, KM_SLEEP); 1532 if (fhp == NULL) { 1533 return ENOMEM; 1534 } 1535 error = copyin(ufhp, fhp, fhsize); 1536 if (error == 0) { 1537 /* XXX this check shouldn't be here */ 1538 if (FHANDLE_SIZE(fhp) == fhsize) { 1539 *fhpp = fhp; 1540 return 0; 1541 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1542 /* 1543 * a kludge for nfsv2 padded handles. 1544 */ 1545 size_t sz; 1546 1547 sz = FHANDLE_SIZE(fhp); 1548 kmem_free(fhp, fhsize); 1549 fhsize = sz; 1550 goto again; 1551 } else { 1552 /* 1553 * userland told us wrong size. 1554 */ 1555 error = EINVAL; 1556 } 1557 } 1558 kmem_free(fhp, fhsize); 1559 return error; 1560 } 1561 1562 void 1563 vfs_copyinfh_free(fhandle_t *fhp) 1564 { 1565 1566 vfs__fhfree(fhp); 1567 } 1568 1569 /* 1570 * Get file handle system call 1571 */ 1572 int 1573 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1574 { 1575 /* { 1576 syscallarg(char *) fname; 1577 syscallarg(fhandle_t *) fhp; 1578 syscallarg(size_t *) fh_size; 1579 } */ 1580 struct vnode *vp; 1581 fhandle_t *fh; 1582 int error; 1583 struct nameidata nd; 1584 size_t sz; 1585 size_t usz; 1586 1587 /* 1588 * Must be super user 1589 */ 1590 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1591 0, NULL, NULL, NULL); 1592 if (error) 1593 return (error); 1594 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1595 SCARG(uap, fname)); 1596 error = namei(&nd); 1597 if (error) 1598 return (error); 1599 vp = nd.ni_vp; 1600 error = vfs_composefh_alloc(vp, &fh); 1601 vput(vp); 1602 if (error != 0) { 1603 goto out; 1604 } 1605 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1606 if (error != 0) { 1607 goto out; 1608 } 1609 sz = FHANDLE_SIZE(fh); 1610 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1611 if (error != 0) { 1612 goto out; 1613 } 1614 if (usz >= sz) { 1615 error = copyout(fh, SCARG(uap, fhp), sz); 1616 } else { 1617 error = E2BIG; 1618 } 1619 out: 1620 vfs_composefh_free(fh); 1621 return (error); 1622 } 1623 1624 /* 1625 * Open a file given a file handle. 1626 * 1627 * Check permissions, allocate an open file structure, 1628 * and call the device open routine if any. 1629 */ 1630 1631 int 1632 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1633 register_t *retval) 1634 { 1635 file_t *fp; 1636 struct vnode *vp = NULL; 1637 kauth_cred_t cred = l->l_cred; 1638 file_t *nfp; 1639 int type, indx, error=0; 1640 struct flock lf; 1641 struct vattr va; 1642 fhandle_t *fh; 1643 int flags; 1644 proc_t *p; 1645 1646 p = curproc; 1647 1648 /* 1649 * Must be super user 1650 */ 1651 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1652 0, NULL, NULL, NULL))) 1653 return (error); 1654 1655 flags = FFLAGS(oflags); 1656 if ((flags & (FREAD | FWRITE)) == 0) 1657 return (EINVAL); 1658 if ((flags & O_CREAT)) 1659 return (EINVAL); 1660 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1661 return (error); 1662 fp = nfp; 1663 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1664 if (error != 0) { 1665 goto bad; 1666 } 1667 error = vfs_fhtovp(fh, &vp); 1668 if (error != 0) { 1669 goto bad; 1670 } 1671 1672 /* Now do an effective vn_open */ 1673 1674 if (vp->v_type == VSOCK) { 1675 error = EOPNOTSUPP; 1676 goto bad; 1677 } 1678 error = vn_openchk(vp, cred, flags); 1679 if (error != 0) 1680 goto bad; 1681 if (flags & O_TRUNC) { 1682 VOP_UNLOCK(vp, 0); /* XXX */ 1683 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1684 VATTR_NULL(&va); 1685 va.va_size = 0; 1686 error = VOP_SETATTR(vp, &va, cred); 1687 if (error) 1688 goto bad; 1689 } 1690 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1691 goto bad; 1692 if (flags & FWRITE) { 1693 mutex_enter(&vp->v_interlock); 1694 vp->v_writecount++; 1695 mutex_exit(&vp->v_interlock); 1696 } 1697 1698 /* done with modified vn_open, now finish what sys_open does. */ 1699 1700 fp->f_flag = flags & FMASK; 1701 fp->f_type = DTYPE_VNODE; 1702 fp->f_ops = &vnops; 1703 fp->f_data = vp; 1704 if (flags & (O_EXLOCK | O_SHLOCK)) { 1705 lf.l_whence = SEEK_SET; 1706 lf.l_start = 0; 1707 lf.l_len = 0; 1708 if (flags & O_EXLOCK) 1709 lf.l_type = F_WRLCK; 1710 else 1711 lf.l_type = F_RDLCK; 1712 type = F_FLOCK; 1713 if ((flags & FNONBLOCK) == 0) 1714 type |= F_WAIT; 1715 VOP_UNLOCK(vp, 0); 1716 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1717 if (error) { 1718 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1719 fd_abort(p, fp, indx); 1720 return (error); 1721 } 1722 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1723 atomic_or_uint(&fp->f_flag, FHASLOCK); 1724 } 1725 VOP_UNLOCK(vp, 0); 1726 *retval = indx; 1727 fd_affix(p, fp, indx); 1728 vfs_copyinfh_free(fh); 1729 return (0); 1730 1731 bad: 1732 fd_abort(p, fp, indx); 1733 if (vp != NULL) 1734 vput(vp); 1735 vfs_copyinfh_free(fh); 1736 return (error); 1737 } 1738 1739 int 1740 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1741 { 1742 /* { 1743 syscallarg(const void *) fhp; 1744 syscallarg(size_t) fh_size; 1745 syscallarg(int) flags; 1746 } */ 1747 1748 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1749 SCARG(uap, flags), retval); 1750 } 1751 1752 int 1753 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1754 { 1755 int error; 1756 fhandle_t *fh; 1757 struct vnode *vp; 1758 1759 /* 1760 * Must be super user 1761 */ 1762 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1763 0, NULL, NULL, NULL))) 1764 return (error); 1765 1766 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1767 if (error != 0) 1768 return error; 1769 1770 error = vfs_fhtovp(fh, &vp); 1771 vfs_copyinfh_free(fh); 1772 if (error != 0) 1773 return error; 1774 1775 error = vn_stat(vp, sb); 1776 vput(vp); 1777 return error; 1778 } 1779 1780 1781 /* ARGSUSED */ 1782 int 1783 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval) 1784 { 1785 /* { 1786 syscallarg(const void *) fhp; 1787 syscallarg(size_t) fh_size; 1788 syscallarg(struct stat *) sb; 1789 } */ 1790 struct stat sb; 1791 int error; 1792 1793 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1794 if (error) 1795 return error; 1796 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1797 } 1798 1799 int 1800 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1801 int flags) 1802 { 1803 fhandle_t *fh; 1804 struct mount *mp; 1805 struct vnode *vp; 1806 int error; 1807 1808 /* 1809 * Must be super user 1810 */ 1811 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1812 0, NULL, NULL, NULL))) 1813 return error; 1814 1815 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1816 if (error != 0) 1817 return error; 1818 1819 error = vfs_fhtovp(fh, &vp); 1820 vfs_copyinfh_free(fh); 1821 if (error != 0) 1822 return error; 1823 1824 mp = vp->v_mount; 1825 error = dostatvfs(mp, sb, l, flags, 1); 1826 vput(vp); 1827 return error; 1828 } 1829 1830 /* ARGSUSED */ 1831 int 1832 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1833 { 1834 /* { 1835 syscallarg(const void *) fhp; 1836 syscallarg(size_t) fh_size; 1837 syscallarg(struct statvfs *) buf; 1838 syscallarg(int) flags; 1839 } */ 1840 struct statvfs *sb = STATVFSBUF_GET(); 1841 int error; 1842 1843 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1844 SCARG(uap, flags)); 1845 if (error == 0) 1846 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1847 STATVFSBUF_PUT(sb); 1848 return error; 1849 } 1850 1851 /* 1852 * Create a special file. 1853 */ 1854 /* ARGSUSED */ 1855 int 1856 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval) 1857 { 1858 /* { 1859 syscallarg(const char *) path; 1860 syscallarg(int) mode; 1861 syscallarg(int) dev; 1862 } */ 1863 struct proc *p = l->l_proc; 1864 struct vnode *vp; 1865 struct vattr vattr; 1866 int error, optype; 1867 struct nameidata nd; 1868 char *path; 1869 const char *cpath; 1870 enum uio_seg seg = UIO_USERSPACE; 1871 1872 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1873 0, NULL, NULL, NULL)) != 0) 1874 return (error); 1875 1876 optype = VOP_MKNOD_DESCOFFSET; 1877 1878 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path); 1879 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1880 1881 if ((error = namei(&nd)) != 0) 1882 goto out; 1883 vp = nd.ni_vp; 1884 if (vp != NULL) 1885 error = EEXIST; 1886 else { 1887 VATTR_NULL(&vattr); 1888 /* We will read cwdi->cwdi_cmask unlocked. */ 1889 vattr.va_mode = 1890 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1891 vattr.va_rdev = SCARG(uap, dev); 1892 1893 switch (SCARG(uap, mode) & S_IFMT) { 1894 case S_IFMT: /* used by badsect to flag bad sectors */ 1895 vattr.va_type = VBAD; 1896 break; 1897 case S_IFCHR: 1898 vattr.va_type = VCHR; 1899 break; 1900 case S_IFBLK: 1901 vattr.va_type = VBLK; 1902 break; 1903 case S_IFWHT: 1904 optype = VOP_WHITEOUT_DESCOFFSET; 1905 break; 1906 case S_IFREG: 1907 #if NVERIEXEC > 0 1908 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1909 O_CREAT); 1910 #endif /* NVERIEXEC > 0 */ 1911 vattr.va_type = VREG; 1912 vattr.va_rdev = VNOVAL; 1913 optype = VOP_CREATE_DESCOFFSET; 1914 break; 1915 default: 1916 error = EINVAL; 1917 break; 1918 } 1919 } 1920 if (!error) { 1921 switch (optype) { 1922 case VOP_WHITEOUT_DESCOFFSET: 1923 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1924 if (error) 1925 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1926 vput(nd.ni_dvp); 1927 break; 1928 1929 case VOP_MKNOD_DESCOFFSET: 1930 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1931 &nd.ni_cnd, &vattr); 1932 if (error == 0) 1933 vput(nd.ni_vp); 1934 break; 1935 1936 case VOP_CREATE_DESCOFFSET: 1937 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1938 &nd.ni_cnd, &vattr); 1939 if (error == 0) 1940 vput(nd.ni_vp); 1941 break; 1942 } 1943 } else { 1944 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1945 if (nd.ni_dvp == vp) 1946 vrele(nd.ni_dvp); 1947 else 1948 vput(nd.ni_dvp); 1949 if (vp) 1950 vrele(vp); 1951 } 1952 out: 1953 VERIEXEC_PATH_PUT(path); 1954 return (error); 1955 } 1956 1957 /* 1958 * Create a named pipe. 1959 */ 1960 /* ARGSUSED */ 1961 int 1962 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1963 { 1964 /* { 1965 syscallarg(const char *) path; 1966 syscallarg(int) mode; 1967 } */ 1968 struct proc *p = l->l_proc; 1969 struct vattr vattr; 1970 int error; 1971 struct nameidata nd; 1972 1973 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1974 SCARG(uap, path)); 1975 if ((error = namei(&nd)) != 0) 1976 return (error); 1977 if (nd.ni_vp != NULL) { 1978 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1979 if (nd.ni_dvp == nd.ni_vp) 1980 vrele(nd.ni_dvp); 1981 else 1982 vput(nd.ni_dvp); 1983 vrele(nd.ni_vp); 1984 return (EEXIST); 1985 } 1986 VATTR_NULL(&vattr); 1987 vattr.va_type = VFIFO; 1988 /* We will read cwdi->cwdi_cmask unlocked. */ 1989 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1990 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1991 if (error == 0) 1992 vput(nd.ni_vp); 1993 return (error); 1994 } 1995 1996 /* 1997 * Make a hard file link. 1998 */ 1999 /* ARGSUSED */ 2000 int 2001 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2002 { 2003 /* { 2004 syscallarg(const char *) path; 2005 syscallarg(const char *) link; 2006 } */ 2007 struct vnode *vp; 2008 struct nameidata nd; 2009 int error; 2010 2011 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2012 SCARG(uap, path)); 2013 if ((error = namei(&nd)) != 0) 2014 return (error); 2015 vp = nd.ni_vp; 2016 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2017 SCARG(uap, link)); 2018 if ((error = namei(&nd)) != 0) 2019 goto out; 2020 if (nd.ni_vp) { 2021 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2022 if (nd.ni_dvp == nd.ni_vp) 2023 vrele(nd.ni_dvp); 2024 else 2025 vput(nd.ni_dvp); 2026 vrele(nd.ni_vp); 2027 error = EEXIST; 2028 goto out; 2029 } 2030 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2031 out: 2032 vrele(vp); 2033 return (error); 2034 } 2035 2036 /* 2037 * Make a symbolic link. 2038 */ 2039 /* ARGSUSED */ 2040 int 2041 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2042 { 2043 /* { 2044 syscallarg(const char *) path; 2045 syscallarg(const char *) link; 2046 } */ 2047 struct proc *p = l->l_proc; 2048 struct vattr vattr; 2049 char *path; 2050 int error; 2051 struct nameidata nd; 2052 2053 path = PNBUF_GET(); 2054 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2055 if (error) 2056 goto out; 2057 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2058 SCARG(uap, link)); 2059 if ((error = namei(&nd)) != 0) 2060 goto out; 2061 if (nd.ni_vp) { 2062 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2063 if (nd.ni_dvp == nd.ni_vp) 2064 vrele(nd.ni_dvp); 2065 else 2066 vput(nd.ni_dvp); 2067 vrele(nd.ni_vp); 2068 error = EEXIST; 2069 goto out; 2070 } 2071 VATTR_NULL(&vattr); 2072 vattr.va_type = VLNK; 2073 /* We will read cwdi->cwdi_cmask unlocked. */ 2074 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2075 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2076 if (error == 0) 2077 vput(nd.ni_vp); 2078 out: 2079 PNBUF_PUT(path); 2080 return (error); 2081 } 2082 2083 /* 2084 * Delete a whiteout from the filesystem. 2085 */ 2086 /* ARGSUSED */ 2087 int 2088 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2089 { 2090 /* { 2091 syscallarg(const char *) path; 2092 } */ 2093 int error; 2094 struct nameidata nd; 2095 2096 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2097 UIO_USERSPACE, SCARG(uap, path)); 2098 error = namei(&nd); 2099 if (error) 2100 return (error); 2101 2102 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2103 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2104 if (nd.ni_dvp == nd.ni_vp) 2105 vrele(nd.ni_dvp); 2106 else 2107 vput(nd.ni_dvp); 2108 if (nd.ni_vp) 2109 vrele(nd.ni_vp); 2110 return (EEXIST); 2111 } 2112 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2114 vput(nd.ni_dvp); 2115 return (error); 2116 } 2117 2118 /* 2119 * Delete a name from the filesystem. 2120 */ 2121 /* ARGSUSED */ 2122 int 2123 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2124 { 2125 /* { 2126 syscallarg(const char *) path; 2127 } */ 2128 2129 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2130 } 2131 2132 int 2133 do_sys_unlink(const char *arg, enum uio_seg seg) 2134 { 2135 struct vnode *vp; 2136 int error; 2137 struct nameidata nd; 2138 kauth_cred_t cred; 2139 char *path; 2140 const char *cpath; 2141 2142 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2143 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2144 2145 if ((error = namei(&nd)) != 0) 2146 goto out; 2147 vp = nd.ni_vp; 2148 2149 /* 2150 * The root of a mounted filesystem cannot be deleted. 2151 */ 2152 if (vp->v_vflag & VV_ROOT) { 2153 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2154 if (nd.ni_dvp == vp) 2155 vrele(nd.ni_dvp); 2156 else 2157 vput(nd.ni_dvp); 2158 vput(vp); 2159 error = EBUSY; 2160 goto out; 2161 } 2162 2163 #if NVERIEXEC > 0 2164 /* Handle remove requests for veriexec entries. */ 2165 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2166 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2167 if (nd.ni_dvp == vp) 2168 vrele(nd.ni_dvp); 2169 else 2170 vput(nd.ni_dvp); 2171 vput(vp); 2172 goto out; 2173 } 2174 #endif /* NVERIEXEC > 0 */ 2175 2176 cred = kauth_cred_get(); 2177 #ifdef FILEASSOC 2178 (void)fileassoc_file_delete(vp); 2179 #endif /* FILEASSOC */ 2180 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2181 out: 2182 VERIEXEC_PATH_PUT(path); 2183 return (error); 2184 } 2185 2186 /* 2187 * Reposition read/write file offset. 2188 */ 2189 int 2190 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2191 { 2192 /* { 2193 syscallarg(int) fd; 2194 syscallarg(int) pad; 2195 syscallarg(off_t) offset; 2196 syscallarg(int) whence; 2197 } */ 2198 kauth_cred_t cred = l->l_cred; 2199 file_t *fp; 2200 struct vnode *vp; 2201 struct vattr vattr; 2202 off_t newoff; 2203 int error, fd; 2204 2205 fd = SCARG(uap, fd); 2206 2207 if ((fp = fd_getfile(fd)) == NULL) 2208 return (EBADF); 2209 2210 vp = fp->f_data; 2211 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2212 error = ESPIPE; 2213 goto out; 2214 } 2215 2216 switch (SCARG(uap, whence)) { 2217 case SEEK_CUR: 2218 newoff = fp->f_offset + SCARG(uap, offset); 2219 break; 2220 case SEEK_END: 2221 error = VOP_GETATTR(vp, &vattr, cred); 2222 if (error) { 2223 goto out; 2224 } 2225 newoff = SCARG(uap, offset) + vattr.va_size; 2226 break; 2227 case SEEK_SET: 2228 newoff = SCARG(uap, offset); 2229 break; 2230 default: 2231 error = EINVAL; 2232 goto out; 2233 } 2234 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2235 *(off_t *)retval = fp->f_offset = newoff; 2236 } 2237 out: 2238 fd_putfile(fd); 2239 return (error); 2240 } 2241 2242 /* 2243 * Positional read system call. 2244 */ 2245 int 2246 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2247 { 2248 /* { 2249 syscallarg(int) fd; 2250 syscallarg(void *) buf; 2251 syscallarg(size_t) nbyte; 2252 syscallarg(off_t) offset; 2253 } */ 2254 file_t *fp; 2255 struct vnode *vp; 2256 off_t offset; 2257 int error, fd = SCARG(uap, fd); 2258 2259 if ((fp = fd_getfile(fd)) == NULL) 2260 return (EBADF); 2261 2262 if ((fp->f_flag & FREAD) == 0) { 2263 fd_putfile(fd); 2264 return (EBADF); 2265 } 2266 2267 vp = fp->f_data; 2268 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2269 error = ESPIPE; 2270 goto out; 2271 } 2272 2273 offset = SCARG(uap, offset); 2274 2275 /* 2276 * XXX This works because no file systems actually 2277 * XXX take any action on the seek operation. 2278 */ 2279 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2280 goto out; 2281 2282 /* dofileread() will unuse the descriptor for us */ 2283 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2284 &offset, 0, retval)); 2285 2286 out: 2287 fd_putfile(fd); 2288 return (error); 2289 } 2290 2291 /* 2292 * Positional scatter read system call. 2293 */ 2294 int 2295 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2296 { 2297 /* { 2298 syscallarg(int) fd; 2299 syscallarg(const struct iovec *) iovp; 2300 syscallarg(int) iovcnt; 2301 syscallarg(off_t) offset; 2302 } */ 2303 off_t offset = SCARG(uap, offset); 2304 2305 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2306 SCARG(uap, iovcnt), &offset, 0, retval); 2307 } 2308 2309 /* 2310 * Positional write system call. 2311 */ 2312 int 2313 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2314 { 2315 /* { 2316 syscallarg(int) fd; 2317 syscallarg(const void *) buf; 2318 syscallarg(size_t) nbyte; 2319 syscallarg(off_t) offset; 2320 } */ 2321 file_t *fp; 2322 struct vnode *vp; 2323 off_t offset; 2324 int error, fd = SCARG(uap, fd); 2325 2326 if ((fp = fd_getfile(fd)) == NULL) 2327 return (EBADF); 2328 2329 if ((fp->f_flag & FWRITE) == 0) { 2330 fd_putfile(fd); 2331 return (EBADF); 2332 } 2333 2334 vp = fp->f_data; 2335 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2336 error = ESPIPE; 2337 goto out; 2338 } 2339 2340 offset = SCARG(uap, offset); 2341 2342 /* 2343 * XXX This works because no file systems actually 2344 * XXX take any action on the seek operation. 2345 */ 2346 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2347 goto out; 2348 2349 /* dofilewrite() will unuse the descriptor for us */ 2350 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2351 &offset, 0, retval)); 2352 2353 out: 2354 fd_putfile(fd); 2355 return (error); 2356 } 2357 2358 /* 2359 * Positional gather write system call. 2360 */ 2361 int 2362 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2363 { 2364 /* { 2365 syscallarg(int) fd; 2366 syscallarg(const struct iovec *) iovp; 2367 syscallarg(int) iovcnt; 2368 syscallarg(off_t) offset; 2369 } */ 2370 off_t offset = SCARG(uap, offset); 2371 2372 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2373 SCARG(uap, iovcnt), &offset, 0, retval); 2374 } 2375 2376 /* 2377 * Check access permissions. 2378 */ 2379 int 2380 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2381 { 2382 /* { 2383 syscallarg(const char *) path; 2384 syscallarg(int) flags; 2385 } */ 2386 kauth_cred_t cred; 2387 struct vnode *vp; 2388 int error, flags; 2389 struct nameidata nd; 2390 2391 cred = kauth_cred_dup(l->l_cred); 2392 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2393 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2394 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2395 SCARG(uap, path)); 2396 /* Override default credentials */ 2397 nd.ni_cnd.cn_cred = cred; 2398 if ((error = namei(&nd)) != 0) 2399 goto out; 2400 vp = nd.ni_vp; 2401 2402 /* Flags == 0 means only check for existence. */ 2403 if (SCARG(uap, flags)) { 2404 flags = 0; 2405 if (SCARG(uap, flags) & R_OK) 2406 flags |= VREAD; 2407 if (SCARG(uap, flags) & W_OK) 2408 flags |= VWRITE; 2409 if (SCARG(uap, flags) & X_OK) 2410 flags |= VEXEC; 2411 2412 error = VOP_ACCESS(vp, flags, cred); 2413 if (!error && (flags & VWRITE)) 2414 error = vn_writechk(vp); 2415 } 2416 vput(vp); 2417 out: 2418 kauth_cred_free(cred); 2419 return (error); 2420 } 2421 2422 /* 2423 * Common code for all sys_stat functions, including compat versions. 2424 */ 2425 int 2426 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2427 { 2428 int error; 2429 struct nameidata nd; 2430 2431 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2432 UIO_USERSPACE, path); 2433 error = namei(&nd); 2434 if (error != 0) 2435 return error; 2436 error = vn_stat(nd.ni_vp, sb); 2437 vput(nd.ni_vp); 2438 return error; 2439 } 2440 2441 /* 2442 * Get file status; this version follows links. 2443 */ 2444 /* ARGSUSED */ 2445 int 2446 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval) 2447 { 2448 /* { 2449 syscallarg(const char *) path; 2450 syscallarg(struct stat *) ub; 2451 } */ 2452 struct stat sb; 2453 int error; 2454 2455 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2456 if (error) 2457 return error; 2458 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2459 } 2460 2461 /* 2462 * Get file status; this version does not follow links. 2463 */ 2464 /* ARGSUSED */ 2465 int 2466 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval) 2467 { 2468 /* { 2469 syscallarg(const char *) path; 2470 syscallarg(struct stat *) ub; 2471 } */ 2472 struct stat sb; 2473 int error; 2474 2475 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2476 if (error) 2477 return error; 2478 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2479 } 2480 2481 /* 2482 * Get configurable pathname variables. 2483 */ 2484 /* ARGSUSED */ 2485 int 2486 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2487 { 2488 /* { 2489 syscallarg(const char *) path; 2490 syscallarg(int) name; 2491 } */ 2492 int error; 2493 struct nameidata nd; 2494 2495 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2496 SCARG(uap, path)); 2497 if ((error = namei(&nd)) != 0) 2498 return (error); 2499 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2500 vput(nd.ni_vp); 2501 return (error); 2502 } 2503 2504 /* 2505 * Return target name of a symbolic link. 2506 */ 2507 /* ARGSUSED */ 2508 int 2509 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2510 { 2511 /* { 2512 syscallarg(const char *) path; 2513 syscallarg(char *) buf; 2514 syscallarg(size_t) count; 2515 } */ 2516 struct vnode *vp; 2517 struct iovec aiov; 2518 struct uio auio; 2519 int error; 2520 struct nameidata nd; 2521 2522 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2523 SCARG(uap, path)); 2524 if ((error = namei(&nd)) != 0) 2525 return (error); 2526 vp = nd.ni_vp; 2527 if (vp->v_type != VLNK) 2528 error = EINVAL; 2529 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2530 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2531 aiov.iov_base = SCARG(uap, buf); 2532 aiov.iov_len = SCARG(uap, count); 2533 auio.uio_iov = &aiov; 2534 auio.uio_iovcnt = 1; 2535 auio.uio_offset = 0; 2536 auio.uio_rw = UIO_READ; 2537 KASSERT(l == curlwp); 2538 auio.uio_vmspace = l->l_proc->p_vmspace; 2539 auio.uio_resid = SCARG(uap, count); 2540 error = VOP_READLINK(vp, &auio, l->l_cred); 2541 } 2542 vput(vp); 2543 *retval = SCARG(uap, count) - auio.uio_resid; 2544 return (error); 2545 } 2546 2547 /* 2548 * Change flags of a file given a path name. 2549 */ 2550 /* ARGSUSED */ 2551 int 2552 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2553 { 2554 /* { 2555 syscallarg(const char *) path; 2556 syscallarg(u_long) flags; 2557 } */ 2558 struct vnode *vp; 2559 int error; 2560 struct nameidata nd; 2561 2562 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2563 SCARG(uap, path)); 2564 if ((error = namei(&nd)) != 0) 2565 return (error); 2566 vp = nd.ni_vp; 2567 error = change_flags(vp, SCARG(uap, flags), l); 2568 vput(vp); 2569 return (error); 2570 } 2571 2572 /* 2573 * Change flags of a file given a file descriptor. 2574 */ 2575 /* ARGSUSED */ 2576 int 2577 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2578 { 2579 /* { 2580 syscallarg(int) fd; 2581 syscallarg(u_long) flags; 2582 } */ 2583 struct vnode *vp; 2584 file_t *fp; 2585 int error; 2586 2587 /* fd_getvnode() will use the descriptor for us */ 2588 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2589 return (error); 2590 vp = fp->f_data; 2591 error = change_flags(vp, SCARG(uap, flags), l); 2592 VOP_UNLOCK(vp, 0); 2593 fd_putfile(SCARG(uap, fd)); 2594 return (error); 2595 } 2596 2597 /* 2598 * Change flags of a file given a path name; this version does 2599 * not follow links. 2600 */ 2601 int 2602 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2603 { 2604 /* { 2605 syscallarg(const char *) path; 2606 syscallarg(u_long) flags; 2607 } */ 2608 struct vnode *vp; 2609 int error; 2610 struct nameidata nd; 2611 2612 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2613 SCARG(uap, path)); 2614 if ((error = namei(&nd)) != 0) 2615 return (error); 2616 vp = nd.ni_vp; 2617 error = change_flags(vp, SCARG(uap, flags), l); 2618 vput(vp); 2619 return (error); 2620 } 2621 2622 /* 2623 * Common routine to change flags of a file. 2624 */ 2625 int 2626 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2627 { 2628 struct vattr vattr; 2629 int error; 2630 2631 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2632 /* 2633 * Non-superusers cannot change the flags on devices, even if they 2634 * own them. 2635 */ 2636 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2637 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2638 goto out; 2639 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2640 error = EINVAL; 2641 goto out; 2642 } 2643 } 2644 VATTR_NULL(&vattr); 2645 vattr.va_flags = flags; 2646 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2647 out: 2648 return (error); 2649 } 2650 2651 /* 2652 * Change mode of a file given path name; this version follows links. 2653 */ 2654 /* ARGSUSED */ 2655 int 2656 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2657 { 2658 /* { 2659 syscallarg(const char *) path; 2660 syscallarg(int) mode; 2661 } */ 2662 int error; 2663 struct nameidata nd; 2664 2665 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2666 SCARG(uap, path)); 2667 if ((error = namei(&nd)) != 0) 2668 return (error); 2669 2670 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2671 2672 vrele(nd.ni_vp); 2673 return (error); 2674 } 2675 2676 /* 2677 * Change mode of a file given a file descriptor. 2678 */ 2679 /* ARGSUSED */ 2680 int 2681 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2682 { 2683 /* { 2684 syscallarg(int) fd; 2685 syscallarg(int) mode; 2686 } */ 2687 file_t *fp; 2688 int error; 2689 2690 /* fd_getvnode() will use the descriptor for us */ 2691 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2692 return (error); 2693 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2694 fd_putfile(SCARG(uap, fd)); 2695 return (error); 2696 } 2697 2698 /* 2699 * Change mode of a file given path name; this version does not follow links. 2700 */ 2701 /* ARGSUSED */ 2702 int 2703 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2704 { 2705 /* { 2706 syscallarg(const char *) path; 2707 syscallarg(int) mode; 2708 } */ 2709 int error; 2710 struct nameidata nd; 2711 2712 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2713 SCARG(uap, path)); 2714 if ((error = namei(&nd)) != 0) 2715 return (error); 2716 2717 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2718 2719 vrele(nd.ni_vp); 2720 return (error); 2721 } 2722 2723 /* 2724 * Common routine to set mode given a vnode. 2725 */ 2726 static int 2727 change_mode(struct vnode *vp, int mode, struct lwp *l) 2728 { 2729 struct vattr vattr; 2730 int error; 2731 2732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2733 VATTR_NULL(&vattr); 2734 vattr.va_mode = mode & ALLPERMS; 2735 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2736 VOP_UNLOCK(vp, 0); 2737 return (error); 2738 } 2739 2740 /* 2741 * Set ownership given a path name; this version follows links. 2742 */ 2743 /* ARGSUSED */ 2744 int 2745 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2746 { 2747 /* { 2748 syscallarg(const char *) path; 2749 syscallarg(uid_t) uid; 2750 syscallarg(gid_t) gid; 2751 } */ 2752 int error; 2753 struct nameidata nd; 2754 2755 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2756 SCARG(uap, path)); 2757 if ((error = namei(&nd)) != 0) 2758 return (error); 2759 2760 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2761 2762 vrele(nd.ni_vp); 2763 return (error); 2764 } 2765 2766 /* 2767 * Set ownership given a path name; this version follows links. 2768 * Provides POSIX semantics. 2769 */ 2770 /* ARGSUSED */ 2771 int 2772 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2773 { 2774 /* { 2775 syscallarg(const char *) path; 2776 syscallarg(uid_t) uid; 2777 syscallarg(gid_t) gid; 2778 } */ 2779 int error; 2780 struct nameidata nd; 2781 2782 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 2783 SCARG(uap, path)); 2784 if ((error = namei(&nd)) != 0) 2785 return (error); 2786 2787 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2788 2789 vrele(nd.ni_vp); 2790 return (error); 2791 } 2792 2793 /* 2794 * Set ownership given a file descriptor. 2795 */ 2796 /* ARGSUSED */ 2797 int 2798 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2799 { 2800 /* { 2801 syscallarg(int) fd; 2802 syscallarg(uid_t) uid; 2803 syscallarg(gid_t) gid; 2804 } */ 2805 int error; 2806 file_t *fp; 2807 2808 /* fd_getvnode() will use the descriptor for us */ 2809 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2810 return (error); 2811 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2812 l, 0); 2813 fd_putfile(SCARG(uap, fd)); 2814 return (error); 2815 } 2816 2817 /* 2818 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2819 */ 2820 /* ARGSUSED */ 2821 int 2822 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2823 { 2824 /* { 2825 syscallarg(int) fd; 2826 syscallarg(uid_t) uid; 2827 syscallarg(gid_t) gid; 2828 } */ 2829 int error; 2830 file_t *fp; 2831 2832 /* fd_getvnode() will use the descriptor for us */ 2833 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2834 return (error); 2835 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2836 l, 1); 2837 fd_putfile(SCARG(uap, fd)); 2838 return (error); 2839 } 2840 2841 /* 2842 * Set ownership given a path name; this version does not follow links. 2843 */ 2844 /* ARGSUSED */ 2845 int 2846 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2847 { 2848 /* { 2849 syscallarg(const char *) path; 2850 syscallarg(uid_t) uid; 2851 syscallarg(gid_t) gid; 2852 } */ 2853 int error; 2854 struct nameidata nd; 2855 2856 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2857 SCARG(uap, path)); 2858 if ((error = namei(&nd)) != 0) 2859 return (error); 2860 2861 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2862 2863 vrele(nd.ni_vp); 2864 return (error); 2865 } 2866 2867 /* 2868 * Set ownership given a path name; this version does not follow links. 2869 * Provides POSIX/XPG semantics. 2870 */ 2871 /* ARGSUSED */ 2872 int 2873 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2874 { 2875 /* { 2876 syscallarg(const char *) path; 2877 syscallarg(uid_t) uid; 2878 syscallarg(gid_t) gid; 2879 } */ 2880 int error; 2881 struct nameidata nd; 2882 2883 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, 2884 SCARG(uap, path)); 2885 if ((error = namei(&nd)) != 0) 2886 return (error); 2887 2888 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2889 2890 vrele(nd.ni_vp); 2891 return (error); 2892 } 2893 2894 /* 2895 * Common routine to set ownership given a vnode. 2896 */ 2897 static int 2898 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2899 int posix_semantics) 2900 { 2901 struct vattr vattr; 2902 mode_t newmode; 2903 int error; 2904 2905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2906 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2907 goto out; 2908 2909 #define CHANGED(x) ((int)(x) != -1) 2910 newmode = vattr.va_mode; 2911 if (posix_semantics) { 2912 /* 2913 * POSIX/XPG semantics: if the caller is not the super-user, 2914 * clear set-user-id and set-group-id bits. Both POSIX and 2915 * the XPG consider the behaviour for calls by the super-user 2916 * implementation-defined; we leave the set-user-id and set- 2917 * group-id settings intact in that case. 2918 */ 2919 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2920 NULL) != 0) 2921 newmode &= ~(S_ISUID | S_ISGID); 2922 } else { 2923 /* 2924 * NetBSD semantics: when changing owner and/or group, 2925 * clear the respective bit(s). 2926 */ 2927 if (CHANGED(uid)) 2928 newmode &= ~S_ISUID; 2929 if (CHANGED(gid)) 2930 newmode &= ~S_ISGID; 2931 } 2932 /* Update va_mode iff altered. */ 2933 if (vattr.va_mode == newmode) 2934 newmode = VNOVAL; 2935 2936 VATTR_NULL(&vattr); 2937 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2938 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2939 vattr.va_mode = newmode; 2940 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2941 #undef CHANGED 2942 2943 out: 2944 VOP_UNLOCK(vp, 0); 2945 return (error); 2946 } 2947 2948 /* 2949 * Set the access and modification times given a path name; this 2950 * version follows links. 2951 */ 2952 /* ARGSUSED */ 2953 int 2954 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval) 2955 { 2956 /* { 2957 syscallarg(const char *) path; 2958 syscallarg(const struct timeval *) tptr; 2959 } */ 2960 2961 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2962 SCARG(uap, tptr), UIO_USERSPACE); 2963 } 2964 2965 /* 2966 * Set the access and modification times given a file descriptor. 2967 */ 2968 /* ARGSUSED */ 2969 int 2970 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval) 2971 { 2972 /* { 2973 syscallarg(int) fd; 2974 syscallarg(const struct timeval *) tptr; 2975 } */ 2976 int error; 2977 file_t *fp; 2978 2979 /* fd_getvnode() will use the descriptor for us */ 2980 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2981 return (error); 2982 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2983 UIO_USERSPACE); 2984 fd_putfile(SCARG(uap, fd)); 2985 return (error); 2986 } 2987 2988 /* 2989 * Set the access and modification times given a path name; this 2990 * version does not follow links. 2991 */ 2992 int 2993 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval) 2994 { 2995 /* { 2996 syscallarg(const char *) path; 2997 syscallarg(const struct timeval *) tptr; 2998 } */ 2999 3000 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3001 SCARG(uap, tptr), UIO_USERSPACE); 3002 } 3003 3004 /* 3005 * Common routine to set access and modification times given a vnode. 3006 */ 3007 int 3008 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3009 const struct timeval *tptr, enum uio_seg seg) 3010 { 3011 struct vattr vattr; 3012 struct nameidata nd; 3013 int error; 3014 bool vanull, setbirthtime; 3015 struct timespec ts[2]; 3016 3017 if (tptr == NULL) { 3018 vanull = true; 3019 nanotime(&ts[0]); 3020 ts[1] = ts[0]; 3021 } else { 3022 struct timeval tv[2]; 3023 3024 vanull = false; 3025 if (seg != UIO_SYSSPACE) { 3026 error = copyin(tptr, &tv, sizeof (tv)); 3027 if (error != 0) 3028 return error; 3029 tptr = tv; 3030 } 3031 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3032 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3033 } 3034 3035 if (vp == NULL) { 3036 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path); 3037 if ((error = namei(&nd)) != 0) 3038 return error; 3039 vp = nd.ni_vp; 3040 } else 3041 nd.ni_vp = NULL; 3042 3043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3044 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3045 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3046 VATTR_NULL(&vattr); 3047 vattr.va_atime = ts[0]; 3048 vattr.va_mtime = ts[1]; 3049 if (setbirthtime) 3050 vattr.va_birthtime = ts[1]; 3051 if (vanull) 3052 vattr.va_flags |= VA_UTIMES_NULL; 3053 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3054 VOP_UNLOCK(vp, 0); 3055 3056 if (nd.ni_vp != NULL) 3057 vrele(nd.ni_vp); 3058 3059 return error; 3060 } 3061 3062 /* 3063 * Truncate a file given its path name. 3064 */ 3065 /* ARGSUSED */ 3066 int 3067 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3068 { 3069 /* { 3070 syscallarg(const char *) path; 3071 syscallarg(int) pad; 3072 syscallarg(off_t) length; 3073 } */ 3074 struct vnode *vp; 3075 struct vattr vattr; 3076 int error; 3077 struct nameidata nd; 3078 3079 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3080 SCARG(uap, path)); 3081 if ((error = namei(&nd)) != 0) 3082 return (error); 3083 vp = nd.ni_vp; 3084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3085 if (vp->v_type == VDIR) 3086 error = EISDIR; 3087 else if ((error = vn_writechk(vp)) == 0 && 3088 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3089 VATTR_NULL(&vattr); 3090 vattr.va_size = SCARG(uap, length); 3091 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3092 } 3093 vput(vp); 3094 return (error); 3095 } 3096 3097 /* 3098 * Truncate a file given a file descriptor. 3099 */ 3100 /* ARGSUSED */ 3101 int 3102 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3103 { 3104 /* { 3105 syscallarg(int) fd; 3106 syscallarg(int) pad; 3107 syscallarg(off_t) length; 3108 } */ 3109 struct vattr vattr; 3110 struct vnode *vp; 3111 file_t *fp; 3112 int error; 3113 3114 /* fd_getvnode() will use the descriptor for us */ 3115 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3116 return (error); 3117 if ((fp->f_flag & FWRITE) == 0) { 3118 error = EINVAL; 3119 goto out; 3120 } 3121 vp = fp->f_data; 3122 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3123 if (vp->v_type == VDIR) 3124 error = EISDIR; 3125 else if ((error = vn_writechk(vp)) == 0) { 3126 VATTR_NULL(&vattr); 3127 vattr.va_size = SCARG(uap, length); 3128 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3129 } 3130 VOP_UNLOCK(vp, 0); 3131 out: 3132 fd_putfile(SCARG(uap, fd)); 3133 return (error); 3134 } 3135 3136 /* 3137 * Sync an open file. 3138 */ 3139 /* ARGSUSED */ 3140 int 3141 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3142 { 3143 /* { 3144 syscallarg(int) fd; 3145 } */ 3146 struct vnode *vp; 3147 file_t *fp; 3148 int error; 3149 3150 /* fd_getvnode() will use the descriptor for us */ 3151 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3152 return (error); 3153 vp = fp->f_data; 3154 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3155 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3156 if (error == 0 && bioopsp != NULL && 3157 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3158 (*bioopsp->io_fsync)(vp, 0); 3159 VOP_UNLOCK(vp, 0); 3160 fd_putfile(SCARG(uap, fd)); 3161 return (error); 3162 } 3163 3164 /* 3165 * Sync a range of file data. API modeled after that found in AIX. 3166 * 3167 * FDATASYNC indicates that we need only save enough metadata to be able 3168 * to re-read the written data. Note we duplicate AIX's requirement that 3169 * the file be open for writing. 3170 */ 3171 /* ARGSUSED */ 3172 int 3173 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3174 { 3175 /* { 3176 syscallarg(int) fd; 3177 syscallarg(int) flags; 3178 syscallarg(off_t) start; 3179 syscallarg(off_t) length; 3180 } */ 3181 struct vnode *vp; 3182 file_t *fp; 3183 int flags, nflags; 3184 off_t s, e, len; 3185 int error; 3186 3187 /* fd_getvnode() will use the descriptor for us */ 3188 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3189 return (error); 3190 3191 if ((fp->f_flag & FWRITE) == 0) { 3192 error = EBADF; 3193 goto out; 3194 } 3195 3196 flags = SCARG(uap, flags); 3197 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3198 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3199 error = EINVAL; 3200 goto out; 3201 } 3202 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3203 if (flags & FDATASYNC) 3204 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3205 else 3206 nflags = FSYNC_WAIT; 3207 if (flags & FDISKSYNC) 3208 nflags |= FSYNC_CACHE; 3209 3210 len = SCARG(uap, length); 3211 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3212 if (len) { 3213 s = SCARG(uap, start); 3214 e = s + len; 3215 if (e < s) { 3216 error = EINVAL; 3217 goto out; 3218 } 3219 } else { 3220 e = 0; 3221 s = 0; 3222 } 3223 3224 vp = fp->f_data; 3225 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3226 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3227 3228 if (error == 0 && bioopsp != NULL && 3229 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3230 (*bioopsp->io_fsync)(vp, nflags); 3231 3232 VOP_UNLOCK(vp, 0); 3233 out: 3234 fd_putfile(SCARG(uap, fd)); 3235 return (error); 3236 } 3237 3238 /* 3239 * Sync the data of an open file. 3240 */ 3241 /* ARGSUSED */ 3242 int 3243 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3244 { 3245 /* { 3246 syscallarg(int) fd; 3247 } */ 3248 struct vnode *vp; 3249 file_t *fp; 3250 int error; 3251 3252 /* fd_getvnode() will use the descriptor for us */ 3253 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3254 return (error); 3255 if ((fp->f_flag & FWRITE) == 0) { 3256 fd_putfile(SCARG(uap, fd)); 3257 return (EBADF); 3258 } 3259 vp = fp->f_data; 3260 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3261 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3262 VOP_UNLOCK(vp, 0); 3263 fd_putfile(SCARG(uap, fd)); 3264 return (error); 3265 } 3266 3267 /* 3268 * Rename files, (standard) BSD semantics frontend. 3269 */ 3270 /* ARGSUSED */ 3271 int 3272 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3273 { 3274 /* { 3275 syscallarg(const char *) from; 3276 syscallarg(const char *) to; 3277 } */ 3278 3279 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3280 } 3281 3282 /* 3283 * Rename files, POSIX semantics frontend. 3284 */ 3285 /* ARGSUSED */ 3286 int 3287 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3288 { 3289 /* { 3290 syscallarg(const char *) from; 3291 syscallarg(const char *) to; 3292 } */ 3293 3294 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3295 } 3296 3297 /* 3298 * Rename files. Source and destination must either both be directories, 3299 * or both not be directories. If target is a directory, it must be empty. 3300 * If `from' and `to' refer to the same object, the value of the `retain' 3301 * argument is used to determine whether `from' will be 3302 * 3303 * (retain == 0) deleted unless `from' and `to' refer to the same 3304 * object in the file system's name space (BSD). 3305 * (retain == 1) always retained (POSIX). 3306 */ 3307 int 3308 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3309 { 3310 struct vnode *tvp, *fvp, *tdvp; 3311 struct nameidata fromnd, tond; 3312 struct mount *fs; 3313 struct lwp *l = curlwp; 3314 struct proc *p; 3315 uint32_t saveflag; 3316 int error; 3317 3318 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3319 seg, from); 3320 if ((error = namei(&fromnd)) != 0) 3321 return (error); 3322 if (fromnd.ni_dvp != fromnd.ni_vp) 3323 VOP_UNLOCK(fromnd.ni_dvp, 0); 3324 fvp = fromnd.ni_vp; 3325 3326 fs = fvp->v_mount; 3327 error = VFS_RENAMELOCK_ENTER(fs); 3328 if (error) { 3329 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3330 vrele(fromnd.ni_dvp); 3331 vrele(fvp); 3332 goto out1; 3333 } 3334 3335 /* 3336 * close, partially, yet another race - ideally we should only 3337 * go as far as getting fromnd.ni_dvp before getting the per-fs 3338 * lock, and then continue to get fromnd.ni_vp, but we can't do 3339 * that with namei as it stands. 3340 * 3341 * This still won't prevent rmdir from nuking fromnd.ni_vp 3342 * under us. The real fix is to get the locks in the right 3343 * order and do the lookups in the right places, but that's a 3344 * major rototill. 3345 * 3346 * Preserve the SAVESTART in cn_flags, because who knows what 3347 * might happen if we don't. 3348 * 3349 * Note: this logic (as well as this whole function) is cloned 3350 * in nfs_serv.c. Proceed accordingly. 3351 */ 3352 vrele(fvp); 3353 if ((fromnd.ni_cnd.cn_namelen == 1 && 3354 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3355 (fromnd.ni_cnd.cn_namelen == 2 && 3356 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3357 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3358 error = EINVAL; 3359 VFS_RENAMELOCK_EXIT(fs); 3360 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3361 vrele(fromnd.ni_dvp); 3362 goto out1; 3363 } 3364 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3365 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3366 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3367 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3368 fromnd.ni_cnd.cn_flags |= saveflag; 3369 if (error) { 3370 VOP_UNLOCK(fromnd.ni_dvp, 0); 3371 VFS_RENAMELOCK_EXIT(fs); 3372 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3373 vrele(fromnd.ni_dvp); 3374 goto out1; 3375 } 3376 VOP_UNLOCK(fromnd.ni_vp, 0); 3377 if (fromnd.ni_dvp != fromnd.ni_vp) 3378 VOP_UNLOCK(fromnd.ni_dvp, 0); 3379 fvp = fromnd.ni_vp; 3380 3381 NDINIT(&tond, RENAME, 3382 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3383 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3384 seg, to); 3385 if ((error = namei(&tond)) != 0) { 3386 VFS_RENAMELOCK_EXIT(fs); 3387 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3388 vrele(fromnd.ni_dvp); 3389 vrele(fvp); 3390 goto out1; 3391 } 3392 tdvp = tond.ni_dvp; 3393 tvp = tond.ni_vp; 3394 3395 if (tvp != NULL) { 3396 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3397 error = ENOTDIR; 3398 goto out; 3399 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3400 error = EISDIR; 3401 goto out; 3402 } 3403 } 3404 3405 if (fvp == tdvp) 3406 error = EINVAL; 3407 3408 /* 3409 * Source and destination refer to the same object. 3410 */ 3411 if (fvp == tvp) { 3412 if (retain) 3413 error = -1; 3414 else if (fromnd.ni_dvp == tdvp && 3415 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3416 !memcmp(fromnd.ni_cnd.cn_nameptr, 3417 tond.ni_cnd.cn_nameptr, 3418 fromnd.ni_cnd.cn_namelen)) 3419 error = -1; 3420 } 3421 3422 #if NVERIEXEC > 0 3423 if (!error) { 3424 char *f1, *f2; 3425 3426 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3427 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen); 3428 3429 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK); 3430 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen); 3431 3432 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3433 3434 free(f1, M_TEMP); 3435 free(f2, M_TEMP); 3436 } 3437 #endif /* NVERIEXEC > 0 */ 3438 3439 out: 3440 p = l->l_proc; 3441 if (!error) { 3442 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3443 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3444 VFS_RENAMELOCK_EXIT(fs); 3445 } else { 3446 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3447 if (tdvp == tvp) 3448 vrele(tdvp); 3449 else 3450 vput(tdvp); 3451 if (tvp) 3452 vput(tvp); 3453 VFS_RENAMELOCK_EXIT(fs); 3454 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3455 vrele(fromnd.ni_dvp); 3456 vrele(fvp); 3457 } 3458 vrele(tond.ni_startdir); 3459 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3460 out1: 3461 if (fromnd.ni_startdir) 3462 vrele(fromnd.ni_startdir); 3463 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3464 return (error == -1 ? 0 : error); 3465 } 3466 3467 /* 3468 * Make a directory file. 3469 */ 3470 /* ARGSUSED */ 3471 int 3472 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3473 { 3474 /* { 3475 syscallarg(const char *) path; 3476 syscallarg(int) mode; 3477 } */ 3478 struct proc *p = l->l_proc; 3479 struct vnode *vp; 3480 struct vattr vattr; 3481 int error; 3482 struct nameidata nd; 3483 3484 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE, 3485 SCARG(uap, path)); 3486 if ((error = namei(&nd)) != 0) 3487 return (error); 3488 vp = nd.ni_vp; 3489 if (vp != NULL) { 3490 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3491 if (nd.ni_dvp == vp) 3492 vrele(nd.ni_dvp); 3493 else 3494 vput(nd.ni_dvp); 3495 vrele(vp); 3496 return (EEXIST); 3497 } 3498 VATTR_NULL(&vattr); 3499 vattr.va_type = VDIR; 3500 /* We will read cwdi->cwdi_cmask unlocked. */ 3501 vattr.va_mode = 3502 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3503 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3504 if (!error) 3505 vput(nd.ni_vp); 3506 return (error); 3507 } 3508 3509 /* 3510 * Remove a directory file. 3511 */ 3512 /* ARGSUSED */ 3513 int 3514 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3515 { 3516 /* { 3517 syscallarg(const char *) path; 3518 } */ 3519 struct vnode *vp; 3520 int error; 3521 struct nameidata nd; 3522 3523 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3524 SCARG(uap, path)); 3525 if ((error = namei(&nd)) != 0) 3526 return (error); 3527 vp = nd.ni_vp; 3528 if (vp->v_type != VDIR) { 3529 error = ENOTDIR; 3530 goto out; 3531 } 3532 /* 3533 * No rmdir "." please. 3534 */ 3535 if (nd.ni_dvp == vp) { 3536 error = EINVAL; 3537 goto out; 3538 } 3539 /* 3540 * The root of a mounted filesystem cannot be deleted. 3541 */ 3542 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3543 error = EBUSY; 3544 goto out; 3545 } 3546 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3547 return (error); 3548 3549 out: 3550 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3551 if (nd.ni_dvp == vp) 3552 vrele(nd.ni_dvp); 3553 else 3554 vput(nd.ni_dvp); 3555 vput(vp); 3556 return (error); 3557 } 3558 3559 /* 3560 * Read a block of directory entries in a file system independent format. 3561 */ 3562 int 3563 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3564 { 3565 /* { 3566 syscallarg(int) fd; 3567 syscallarg(char *) buf; 3568 syscallarg(size_t) count; 3569 } */ 3570 file_t *fp; 3571 int error, done; 3572 3573 /* fd_getvnode() will use the descriptor for us */ 3574 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3575 return (error); 3576 if ((fp->f_flag & FREAD) == 0) { 3577 error = EBADF; 3578 goto out; 3579 } 3580 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3581 SCARG(uap, count), &done, l, 0, 0); 3582 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3583 *retval = done; 3584 out: 3585 fd_putfile(SCARG(uap, fd)); 3586 return (error); 3587 } 3588 3589 /* 3590 * Set the mode mask for creation of filesystem nodes. 3591 */ 3592 int 3593 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3594 { 3595 /* { 3596 syscallarg(mode_t) newmask; 3597 } */ 3598 struct proc *p = l->l_proc; 3599 struct cwdinfo *cwdi; 3600 3601 /* 3602 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3603 * important is that we serialize changes to the mask. The 3604 * rw_exit() will issue a write memory barrier on our behalf, 3605 * and force the changes out to other CPUs (as it must use an 3606 * atomic operation, draining the local CPU's store buffers). 3607 */ 3608 cwdi = p->p_cwdi; 3609 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3610 *retval = cwdi->cwdi_cmask; 3611 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3612 rw_exit(&cwdi->cwdi_lock); 3613 3614 return (0); 3615 } 3616 3617 int 3618 dorevoke(struct vnode *vp, kauth_cred_t cred) 3619 { 3620 struct vattr vattr; 3621 int error; 3622 3623 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3624 return error; 3625 if (kauth_cred_geteuid(cred) != vattr.va_uid && 3626 (error = kauth_authorize_generic(cred, 3627 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3628 VOP_REVOKE(vp, REVOKEALL); 3629 return (error); 3630 } 3631 3632 /* 3633 * Void all references to file by ripping underlying filesystem 3634 * away from vnode. 3635 */ 3636 /* ARGSUSED */ 3637 int 3638 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3639 { 3640 /* { 3641 syscallarg(const char *) path; 3642 } */ 3643 struct vnode *vp; 3644 int error; 3645 struct nameidata nd; 3646 3647 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 3648 SCARG(uap, path)); 3649 if ((error = namei(&nd)) != 0) 3650 return (error); 3651 vp = nd.ni_vp; 3652 error = dorevoke(vp, l->l_cred); 3653 vrele(vp); 3654 return (error); 3655 } 3656