1 /* $NetBSD: vfs_syscalls.c,v 1.500 2015/07/24 13:02:52 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.500 2015/07/24 13:02:52 maxv Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/specfs/specdev.h> 112 113 #include <nfs/rpcv2.h> 114 #include <nfs/nfsproto.h> 115 #include <nfs/nfs.h> 116 #include <nfs/nfs_var.h> 117 118 /* XXX this shouldn't be here */ 119 #ifndef OFF_T_MAX 120 #define OFF_T_MAX __type_max(off_t) 121 #endif 122 123 static int change_flags(struct vnode *, u_long, struct lwp *); 124 static int change_mode(struct vnode *, int, struct lwp *); 125 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 126 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 127 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 128 enum uio_seg); 129 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 130 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 131 enum uio_seg); 132 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 133 enum uio_seg, int); 134 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 135 size_t, register_t *); 136 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 137 138 static int fd_nameiat(struct lwp *, int, struct nameidata *); 139 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 140 namei_simple_flags_t, struct vnode **); 141 142 143 /* 144 * This table is used to maintain compatibility with 4.3BSD 145 * and NetBSD 0.9 mount syscalls - and possibly other systems. 146 * Note, the order is important! 147 * 148 * Do not modify this table. It should only contain filesystems 149 * supported by NetBSD 0.9 and 4.3BSD. 150 */ 151 const char * const mountcompatnames[] = { 152 NULL, /* 0 = MOUNT_NONE */ 153 MOUNT_FFS, /* 1 = MOUNT_UFS */ 154 MOUNT_NFS, /* 2 */ 155 MOUNT_MFS, /* 3 */ 156 MOUNT_MSDOS, /* 4 */ 157 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 158 MOUNT_FDESC, /* 6 */ 159 MOUNT_KERNFS, /* 7 */ 160 NULL, /* 8 = MOUNT_DEVFS */ 161 MOUNT_AFS, /* 9 */ 162 }; 163 164 const int nmountcompatnames = __arraycount(mountcompatnames); 165 166 static int 167 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 168 { 169 file_t *dfp; 170 int error; 171 172 if (fdat != AT_FDCWD) { 173 if ((error = fd_getvnode(fdat, &dfp)) != 0) 174 goto out; 175 176 NDAT(ndp, dfp->f_vnode); 177 } 178 179 error = namei(ndp); 180 181 if (fdat != AT_FDCWD) 182 fd_putfile(fdat); 183 out: 184 return error; 185 } 186 187 static int 188 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 189 namei_simple_flags_t sflags, struct vnode **vp_ret) 190 { 191 file_t *dfp; 192 struct vnode *dvp; 193 int error; 194 195 if (fdat != AT_FDCWD) { 196 if ((error = fd_getvnode(fdat, &dfp)) != 0) 197 goto out; 198 199 dvp = dfp->f_vnode; 200 } else { 201 dvp = NULL; 202 } 203 204 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 205 206 if (fdat != AT_FDCWD) 207 fd_putfile(fdat); 208 out: 209 return error; 210 } 211 212 static int 213 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 214 { 215 int error; 216 217 fp->f_flag = flags & FMASK; 218 fp->f_type = DTYPE_VNODE; 219 fp->f_ops = &vnops; 220 fp->f_vnode = vp; 221 222 if (flags & (O_EXLOCK | O_SHLOCK)) { 223 struct flock lf; 224 int type; 225 226 lf.l_whence = SEEK_SET; 227 lf.l_start = 0; 228 lf.l_len = 0; 229 if (flags & O_EXLOCK) 230 lf.l_type = F_WRLCK; 231 else 232 lf.l_type = F_RDLCK; 233 type = F_FLOCK; 234 if ((flags & FNONBLOCK) == 0) 235 type |= F_WAIT; 236 VOP_UNLOCK(vp); 237 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 238 if (error) { 239 (void) vn_close(vp, fp->f_flag, fp->f_cred); 240 fd_abort(l->l_proc, fp, indx); 241 return error; 242 } 243 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 244 atomic_or_uint(&fp->f_flag, FHASLOCK); 245 } 246 if (flags & O_CLOEXEC) 247 fd_set_exclose(l, indx, true); 248 return 0; 249 } 250 251 static int 252 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 253 void *data, size_t *data_len) 254 { 255 struct mount *mp; 256 int error = 0, saved_flags; 257 258 mp = vp->v_mount; 259 saved_flags = mp->mnt_flag; 260 261 /* We can operate only on VV_ROOT nodes. */ 262 if ((vp->v_vflag & VV_ROOT) == 0) { 263 error = EINVAL; 264 goto out; 265 } 266 267 /* 268 * We only allow the filesystem to be reloaded if it 269 * is currently mounted read-only. Additionally, we 270 * prevent read-write to read-only downgrades. 271 */ 272 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 273 (mp->mnt_flag & MNT_RDONLY) == 0 && 274 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 275 error = EOPNOTSUPP; /* Needs translation */ 276 goto out; 277 } 278 279 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 280 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 281 if (error) 282 goto out; 283 284 if (vfs_busy(mp, NULL)) { 285 error = EPERM; 286 goto out; 287 } 288 289 mutex_enter(&mp->mnt_updating); 290 291 mp->mnt_flag &= ~MNT_OP_FLAGS; 292 mp->mnt_flag |= flags & MNT_OP_FLAGS; 293 294 /* 295 * Set the mount level flags. 296 */ 297 if (flags & MNT_RDONLY) 298 mp->mnt_flag |= MNT_RDONLY; 299 else if (mp->mnt_flag & MNT_RDONLY) 300 mp->mnt_iflag |= IMNT_WANTRDWR; 301 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 302 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 303 error = VFS_MOUNT(mp, path, data, data_len); 304 305 if (error && data != NULL) { 306 int error2; 307 308 /* 309 * Update failed; let's try and see if it was an 310 * export request. For compat with 3.0 and earlier. 311 */ 312 error2 = vfs_hooks_reexport(mp, path, data); 313 314 /* 315 * Only update error code if the export request was 316 * understood but some problem occurred while 317 * processing it. 318 */ 319 if (error2 != EJUSTRETURN) 320 error = error2; 321 } 322 323 if (mp->mnt_iflag & IMNT_WANTRDWR) 324 mp->mnt_flag &= ~MNT_RDONLY; 325 if (error) 326 mp->mnt_flag = saved_flags; 327 mp->mnt_flag &= ~MNT_OP_FLAGS; 328 mp->mnt_iflag &= ~IMNT_WANTRDWR; 329 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 330 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 331 vfs_syncer_add_to_worklist(mp); 332 } else { 333 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 334 vfs_syncer_remove_from_worklist(mp); 335 } 336 mutex_exit(&mp->mnt_updating); 337 vfs_unbusy(mp, false, NULL); 338 339 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 340 (flags & MNT_EXTATTR)) { 341 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 342 NULL, 0, NULL) != 0) { 343 printf("%s: failed to start extattr, error = %d", 344 mp->mnt_stat.f_mntonname, error); 345 mp->mnt_flag &= ~MNT_EXTATTR; 346 } 347 } 348 349 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 350 !(flags & MNT_EXTATTR)) { 351 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 352 NULL, 0, NULL) != 0) { 353 printf("%s: failed to stop extattr, error = %d", 354 mp->mnt_stat.f_mntonname, error); 355 mp->mnt_flag |= MNT_RDONLY; 356 } 357 } 358 out: 359 return (error); 360 } 361 362 static int 363 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 364 { 365 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 366 int error; 367 368 /* Copy file-system type from userspace. */ 369 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 370 if (error) { 371 /* 372 * Historically, filesystem types were identified by numbers. 373 * If we get an integer for the filesystem type instead of a 374 * string, we check to see if it matches one of the historic 375 * filesystem types. 376 */ 377 u_long fsindex = (u_long)fstype; 378 if (fsindex >= nmountcompatnames || 379 mountcompatnames[fsindex] == NULL) 380 return ENODEV; 381 strlcpy(fstypename, mountcompatnames[fsindex], 382 sizeof(fstypename)); 383 } 384 385 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 386 if (strcmp(fstypename, "ufs") == 0) 387 fstypename[0] = 'f'; 388 389 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 390 return 0; 391 392 /* If we can autoload a vfs module, try again */ 393 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 394 395 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 396 return 0; 397 398 return ENODEV; 399 } 400 401 static int 402 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 403 void *data, size_t *data_len) 404 { 405 struct mount *mp; 406 int error; 407 408 /* If MNT_GETARGS is specified, it should be the only flag. */ 409 if (flags & ~MNT_GETARGS) 410 return EINVAL; 411 412 mp = vp->v_mount; 413 414 /* XXX: probably some notion of "can see" here if we want isolation. */ 415 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 416 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 417 if (error) 418 return error; 419 420 if ((vp->v_vflag & VV_ROOT) == 0) 421 return EINVAL; 422 423 if (vfs_busy(mp, NULL)) 424 return EPERM; 425 426 mutex_enter(&mp->mnt_updating); 427 mp->mnt_flag &= ~MNT_OP_FLAGS; 428 mp->mnt_flag |= MNT_GETARGS; 429 error = VFS_MOUNT(mp, path, data, data_len); 430 mp->mnt_flag &= ~MNT_OP_FLAGS; 431 mutex_exit(&mp->mnt_updating); 432 433 vfs_unbusy(mp, false, NULL); 434 return (error); 435 } 436 437 int 438 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 439 { 440 /* { 441 syscallarg(const char *) type; 442 syscallarg(const char *) path; 443 syscallarg(int) flags; 444 syscallarg(void *) data; 445 syscallarg(size_t) data_len; 446 } */ 447 448 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 449 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 450 SCARG(uap, data_len), retval); 451 } 452 453 int 454 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 455 const char *path, int flags, void *data, enum uio_seg data_seg, 456 size_t data_len, register_t *retval) 457 { 458 struct vnode *vp; 459 void *data_buf = data; 460 bool vfsopsrele = false; 461 size_t alloc_sz = 0; 462 int error; 463 464 /* XXX: The calling convention of this routine is totally bizarre */ 465 if (vfsops) 466 vfsopsrele = true; 467 468 /* 469 * Get vnode to be covered 470 */ 471 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 472 if (error != 0) { 473 vp = NULL; 474 goto done; 475 } 476 477 if (vfsops == NULL) { 478 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 479 vfsops = vp->v_mount->mnt_op; 480 } else { 481 /* 'type' is userspace */ 482 error = mount_get_vfsops(type, &vfsops); 483 if (error != 0) 484 goto done; 485 vfsopsrele = true; 486 } 487 } 488 489 /* 490 * We allow data to be NULL, even for userspace. Some fs's don't need 491 * it. The others will handle NULL. 492 */ 493 if (data != NULL && data_seg == UIO_USERSPACE) { 494 if (data_len == 0) { 495 /* No length supplied, use default for filesystem */ 496 data_len = vfsops->vfs_min_mount_data; 497 498 /* 499 * Hopefully a longer buffer won't make copyin() fail. 500 * For compatibility with 3.0 and earlier. 501 */ 502 if (flags & MNT_UPDATE 503 && data_len < sizeof (struct mnt_export_args30)) 504 data_len = sizeof (struct mnt_export_args30); 505 } 506 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 507 error = EINVAL; 508 goto done; 509 } 510 alloc_sz = data_len; 511 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 512 513 /* NFS needs the buffer even for mnt_getargs .... */ 514 error = copyin(data, data_buf, data_len); 515 if (error != 0) 516 goto done; 517 } 518 519 if (flags & MNT_GETARGS) { 520 if (data_len == 0) { 521 error = EINVAL; 522 goto done; 523 } 524 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 525 if (error != 0) 526 goto done; 527 if (data_seg == UIO_USERSPACE) 528 error = copyout(data_buf, data, data_len); 529 *retval = data_len; 530 } else if (flags & MNT_UPDATE) { 531 error = mount_update(l, vp, path, flags, data_buf, &data_len); 532 } else { 533 /* Locking is handled internally in mount_domount(). */ 534 KASSERT(vfsopsrele == true); 535 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 536 &data_len); 537 vfsopsrele = false; 538 } 539 540 done: 541 if (vfsopsrele) 542 vfs_delref(vfsops); 543 if (vp != NULL) { 544 vrele(vp); 545 } 546 if (data_buf != data) 547 kmem_free(data_buf, alloc_sz); 548 return (error); 549 } 550 551 /* 552 * Unmount a file system. 553 * 554 * Note: unmount takes a path to the vnode mounted on as argument, 555 * not special file (as before). 556 */ 557 /* ARGSUSED */ 558 int 559 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 560 { 561 /* { 562 syscallarg(const char *) path; 563 syscallarg(int) flags; 564 } */ 565 struct vnode *vp; 566 struct mount *mp; 567 int error; 568 struct pathbuf *pb; 569 struct nameidata nd; 570 571 error = pathbuf_copyin(SCARG(uap, path), &pb); 572 if (error) { 573 return error; 574 } 575 576 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 577 if ((error = namei(&nd)) != 0) { 578 pathbuf_destroy(pb); 579 return error; 580 } 581 vp = nd.ni_vp; 582 pathbuf_destroy(pb); 583 584 mp = vp->v_mount; 585 atomic_inc_uint(&mp->mnt_refcnt); 586 VOP_UNLOCK(vp); 587 588 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 589 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 590 if (error) { 591 vrele(vp); 592 vfs_destroy(mp); 593 return (error); 594 } 595 596 /* 597 * Don't allow unmounting the root file system. 598 */ 599 if (mp->mnt_flag & MNT_ROOTFS) { 600 vrele(vp); 601 vfs_destroy(mp); 602 return (EINVAL); 603 } 604 605 /* 606 * Must be the root of the filesystem 607 */ 608 if ((vp->v_vflag & VV_ROOT) == 0) { 609 vrele(vp); 610 vfs_destroy(mp); 611 return (EINVAL); 612 } 613 614 vrele(vp); 615 error = dounmount(mp, SCARG(uap, flags), l); 616 vfs_destroy(mp); 617 return error; 618 } 619 620 /* 621 * Sync each mounted filesystem. 622 */ 623 #ifdef DEBUG 624 int syncprt = 0; 625 struct ctldebug debug0 = { "syncprt", &syncprt }; 626 #endif 627 628 void 629 do_sys_sync(struct lwp *l) 630 { 631 struct mount *mp, *nmp; 632 int asyncflag; 633 634 mutex_enter(&mountlist_lock); 635 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 636 if (vfs_busy(mp, &nmp)) { 637 continue; 638 } 639 mutex_enter(&mp->mnt_updating); 640 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 641 asyncflag = mp->mnt_flag & MNT_ASYNC; 642 mp->mnt_flag &= ~MNT_ASYNC; 643 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 644 if (asyncflag) 645 mp->mnt_flag |= MNT_ASYNC; 646 } 647 mutex_exit(&mp->mnt_updating); 648 vfs_unbusy(mp, false, &nmp); 649 } 650 mutex_exit(&mountlist_lock); 651 #ifdef DEBUG 652 if (syncprt) 653 vfs_bufstats(); 654 #endif /* DEBUG */ 655 } 656 657 /* ARGSUSED */ 658 int 659 sys_sync(struct lwp *l, const void *v, register_t *retval) 660 { 661 do_sys_sync(l); 662 return (0); 663 } 664 665 666 /* 667 * Access or change filesystem quotas. 668 * 669 * (this is really 14 different calls bundled into one) 670 */ 671 672 static int 673 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 674 { 675 struct quotastat info_k; 676 int error; 677 678 /* ensure any padding bytes are cleared */ 679 memset(&info_k, 0, sizeof(info_k)); 680 681 error = vfs_quotactl_stat(mp, &info_k); 682 if (error) { 683 return error; 684 } 685 686 return copyout(&info_k, info_u, sizeof(info_k)); 687 } 688 689 static int 690 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 691 struct quotaidtypestat *info_u) 692 { 693 struct quotaidtypestat info_k; 694 int error; 695 696 /* ensure any padding bytes are cleared */ 697 memset(&info_k, 0, sizeof(info_k)); 698 699 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 700 if (error) { 701 return error; 702 } 703 704 return copyout(&info_k, info_u, sizeof(info_k)); 705 } 706 707 static int 708 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 709 struct quotaobjtypestat *info_u) 710 { 711 struct quotaobjtypestat info_k; 712 int error; 713 714 /* ensure any padding bytes are cleared */ 715 memset(&info_k, 0, sizeof(info_k)); 716 717 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 718 if (error) { 719 return error; 720 } 721 722 return copyout(&info_k, info_u, sizeof(info_k)); 723 } 724 725 static int 726 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 727 struct quotaval *val_u) 728 { 729 struct quotakey key_k; 730 struct quotaval val_k; 731 int error; 732 733 /* ensure any padding bytes are cleared */ 734 memset(&val_k, 0, sizeof(val_k)); 735 736 error = copyin(key_u, &key_k, sizeof(key_k)); 737 if (error) { 738 return error; 739 } 740 741 error = vfs_quotactl_get(mp, &key_k, &val_k); 742 if (error) { 743 return error; 744 } 745 746 return copyout(&val_k, val_u, sizeof(val_k)); 747 } 748 749 static int 750 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 751 const struct quotaval *val_u) 752 { 753 struct quotakey key_k; 754 struct quotaval val_k; 755 int error; 756 757 error = copyin(key_u, &key_k, sizeof(key_k)); 758 if (error) { 759 return error; 760 } 761 762 error = copyin(val_u, &val_k, sizeof(val_k)); 763 if (error) { 764 return error; 765 } 766 767 return vfs_quotactl_put(mp, &key_k, &val_k); 768 } 769 770 static int 771 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 772 { 773 struct quotakey key_k; 774 int error; 775 776 error = copyin(key_u, &key_k, sizeof(key_k)); 777 if (error) { 778 return error; 779 } 780 781 return vfs_quotactl_del(mp, &key_k); 782 } 783 784 static int 785 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 786 { 787 struct quotakcursor cursor_k; 788 int error; 789 790 /* ensure any padding bytes are cleared */ 791 memset(&cursor_k, 0, sizeof(cursor_k)); 792 793 error = vfs_quotactl_cursoropen(mp, &cursor_k); 794 if (error) { 795 return error; 796 } 797 798 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 799 } 800 801 static int 802 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 803 { 804 struct quotakcursor cursor_k; 805 int error; 806 807 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 808 if (error) { 809 return error; 810 } 811 812 return vfs_quotactl_cursorclose(mp, &cursor_k); 813 } 814 815 static int 816 do_sys_quotactl_cursorskipidtype(struct mount *mp, 817 struct quotakcursor *cursor_u, int idtype) 818 { 819 struct quotakcursor cursor_k; 820 int error; 821 822 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 823 if (error) { 824 return error; 825 } 826 827 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 828 if (error) { 829 return error; 830 } 831 832 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 833 } 834 835 static int 836 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 837 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 838 unsigned *ret_u) 839 { 840 #define CGET_STACK_MAX 8 841 struct quotakcursor cursor_k; 842 struct quotakey stackkeys[CGET_STACK_MAX]; 843 struct quotaval stackvals[CGET_STACK_MAX]; 844 struct quotakey *keys_k; 845 struct quotaval *vals_k; 846 unsigned ret_k; 847 int error; 848 849 if (maxnum > 128) { 850 maxnum = 128; 851 } 852 853 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 854 if (error) { 855 return error; 856 } 857 858 if (maxnum <= CGET_STACK_MAX) { 859 keys_k = stackkeys; 860 vals_k = stackvals; 861 /* ensure any padding bytes are cleared */ 862 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 863 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 864 } else { 865 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 866 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 867 } 868 869 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 870 &ret_k); 871 if (error) { 872 goto fail; 873 } 874 875 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 876 if (error) { 877 goto fail; 878 } 879 880 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 881 if (error) { 882 goto fail; 883 } 884 885 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 886 if (error) { 887 goto fail; 888 } 889 890 /* do last to maximize the chance of being able to recover a failure */ 891 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 892 893 fail: 894 if (keys_k != stackkeys) { 895 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 896 } 897 if (vals_k != stackvals) { 898 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 899 } 900 return error; 901 } 902 903 static int 904 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 905 int *ret_u) 906 { 907 struct quotakcursor cursor_k; 908 int ret_k; 909 int error; 910 911 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 912 if (error) { 913 return error; 914 } 915 916 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 917 if (error) { 918 return error; 919 } 920 921 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 922 if (error) { 923 return error; 924 } 925 926 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 927 } 928 929 static int 930 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 931 { 932 struct quotakcursor cursor_k; 933 int error; 934 935 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 936 if (error) { 937 return error; 938 } 939 940 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 941 if (error) { 942 return error; 943 } 944 945 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 946 } 947 948 static int 949 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 950 { 951 char *path_k; 952 int error; 953 954 /* XXX this should probably be a struct pathbuf */ 955 path_k = PNBUF_GET(); 956 error = copyin(path_u, path_k, PATH_MAX); 957 if (error) { 958 PNBUF_PUT(path_k); 959 return error; 960 } 961 962 error = vfs_quotactl_quotaon(mp, idtype, path_k); 963 964 PNBUF_PUT(path_k); 965 return error; 966 } 967 968 static int 969 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 970 { 971 return vfs_quotactl_quotaoff(mp, idtype); 972 } 973 974 int 975 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 976 { 977 struct mount *mp; 978 struct vnode *vp; 979 int error; 980 981 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 982 if (error != 0) 983 return (error); 984 mp = vp->v_mount; 985 986 switch (args->qc_op) { 987 case QUOTACTL_STAT: 988 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 989 break; 990 case QUOTACTL_IDTYPESTAT: 991 error = do_sys_quotactl_idtypestat(mp, 992 args->u.idtypestat.qc_idtype, 993 args->u.idtypestat.qc_info); 994 break; 995 case QUOTACTL_OBJTYPESTAT: 996 error = do_sys_quotactl_objtypestat(mp, 997 args->u.objtypestat.qc_objtype, 998 args->u.objtypestat.qc_info); 999 break; 1000 case QUOTACTL_GET: 1001 error = do_sys_quotactl_get(mp, 1002 args->u.get.qc_key, 1003 args->u.get.qc_val); 1004 break; 1005 case QUOTACTL_PUT: 1006 error = do_sys_quotactl_put(mp, 1007 args->u.put.qc_key, 1008 args->u.put.qc_val); 1009 break; 1010 case QUOTACTL_DEL: 1011 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1012 break; 1013 case QUOTACTL_CURSOROPEN: 1014 error = do_sys_quotactl_cursoropen(mp, 1015 args->u.cursoropen.qc_cursor); 1016 break; 1017 case QUOTACTL_CURSORCLOSE: 1018 error = do_sys_quotactl_cursorclose(mp, 1019 args->u.cursorclose.qc_cursor); 1020 break; 1021 case QUOTACTL_CURSORSKIPIDTYPE: 1022 error = do_sys_quotactl_cursorskipidtype(mp, 1023 args->u.cursorskipidtype.qc_cursor, 1024 args->u.cursorskipidtype.qc_idtype); 1025 break; 1026 case QUOTACTL_CURSORGET: 1027 error = do_sys_quotactl_cursorget(mp, 1028 args->u.cursorget.qc_cursor, 1029 args->u.cursorget.qc_keys, 1030 args->u.cursorget.qc_vals, 1031 args->u.cursorget.qc_maxnum, 1032 args->u.cursorget.qc_ret); 1033 break; 1034 case QUOTACTL_CURSORATEND: 1035 error = do_sys_quotactl_cursoratend(mp, 1036 args->u.cursoratend.qc_cursor, 1037 args->u.cursoratend.qc_ret); 1038 break; 1039 case QUOTACTL_CURSORREWIND: 1040 error = do_sys_quotactl_cursorrewind(mp, 1041 args->u.cursorrewind.qc_cursor); 1042 break; 1043 case QUOTACTL_QUOTAON: 1044 error = do_sys_quotactl_quotaon(mp, 1045 args->u.quotaon.qc_idtype, 1046 args->u.quotaon.qc_quotafile); 1047 break; 1048 case QUOTACTL_QUOTAOFF: 1049 error = do_sys_quotactl_quotaoff(mp, 1050 args->u.quotaoff.qc_idtype); 1051 break; 1052 default: 1053 error = EINVAL; 1054 break; 1055 } 1056 1057 vrele(vp); 1058 return error; 1059 } 1060 1061 /* ARGSUSED */ 1062 int 1063 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1064 register_t *retval) 1065 { 1066 /* { 1067 syscallarg(const char *) path; 1068 syscallarg(struct quotactl_args *) args; 1069 } */ 1070 struct quotactl_args args; 1071 int error; 1072 1073 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1074 if (error) { 1075 return error; 1076 } 1077 1078 return do_sys_quotactl(SCARG(uap, path), &args); 1079 } 1080 1081 int 1082 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1083 int root) 1084 { 1085 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1086 int error = 0; 1087 1088 /* 1089 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1090 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1091 * overrides MNT_NOWAIT. 1092 */ 1093 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1094 (flags != MNT_WAIT && flags != 0)) { 1095 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1096 goto done; 1097 } 1098 1099 /* Get the filesystem stats now */ 1100 memset(sp, 0, sizeof(*sp)); 1101 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1102 return error; 1103 } 1104 1105 if (cwdi->cwdi_rdir == NULL) 1106 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1107 done: 1108 if (cwdi->cwdi_rdir != NULL) { 1109 size_t len; 1110 char *bp; 1111 char c; 1112 char *path = PNBUF_GET(); 1113 1114 bp = path + MAXPATHLEN; 1115 *--bp = '\0'; 1116 rw_enter(&cwdi->cwdi_lock, RW_READER); 1117 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1118 MAXPATHLEN / 2, 0, l); 1119 rw_exit(&cwdi->cwdi_lock); 1120 if (error) { 1121 PNBUF_PUT(path); 1122 return error; 1123 } 1124 len = strlen(bp); 1125 if (len != 1) { 1126 /* 1127 * for mount points that are below our root, we can see 1128 * them, so we fix up the pathname and return them. The 1129 * rest we cannot see, so we don't allow viewing the 1130 * data. 1131 */ 1132 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1133 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1134 (void)strlcpy(sp->f_mntonname, 1135 c == '\0' ? "/" : &sp->f_mntonname[len], 1136 sizeof(sp->f_mntonname)); 1137 } else { 1138 if (root) 1139 (void)strlcpy(sp->f_mntonname, "/", 1140 sizeof(sp->f_mntonname)); 1141 else 1142 error = EPERM; 1143 } 1144 } 1145 PNBUF_PUT(path); 1146 } 1147 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1148 return error; 1149 } 1150 1151 /* 1152 * Get filesystem statistics by path. 1153 */ 1154 int 1155 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1156 { 1157 struct mount *mp; 1158 int error; 1159 struct vnode *vp; 1160 1161 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1162 if (error != 0) 1163 return error; 1164 mp = vp->v_mount; 1165 error = dostatvfs(mp, sb, l, flags, 1); 1166 vrele(vp); 1167 return error; 1168 } 1169 1170 /* ARGSUSED */ 1171 int 1172 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1173 { 1174 /* { 1175 syscallarg(const char *) path; 1176 syscallarg(struct statvfs *) buf; 1177 syscallarg(int) flags; 1178 } */ 1179 struct statvfs *sb; 1180 int error; 1181 1182 sb = STATVFSBUF_GET(); 1183 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1184 if (error == 0) 1185 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1186 STATVFSBUF_PUT(sb); 1187 return error; 1188 } 1189 1190 /* 1191 * Get filesystem statistics by fd. 1192 */ 1193 int 1194 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1195 { 1196 file_t *fp; 1197 struct mount *mp; 1198 int error; 1199 1200 /* fd_getvnode() will use the descriptor for us */ 1201 if ((error = fd_getvnode(fd, &fp)) != 0) 1202 return (error); 1203 mp = fp->f_vnode->v_mount; 1204 error = dostatvfs(mp, sb, curlwp, flags, 1); 1205 fd_putfile(fd); 1206 return error; 1207 } 1208 1209 /* ARGSUSED */ 1210 int 1211 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1212 { 1213 /* { 1214 syscallarg(int) fd; 1215 syscallarg(struct statvfs *) buf; 1216 syscallarg(int) flags; 1217 } */ 1218 struct statvfs *sb; 1219 int error; 1220 1221 sb = STATVFSBUF_GET(); 1222 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1223 if (error == 0) 1224 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1225 STATVFSBUF_PUT(sb); 1226 return error; 1227 } 1228 1229 1230 /* 1231 * Get statistics on all filesystems. 1232 */ 1233 int 1234 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1235 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1236 register_t *retval) 1237 { 1238 int root = 0; 1239 struct proc *p = l->l_proc; 1240 struct mount *mp, *nmp; 1241 struct statvfs *sb; 1242 size_t count, maxcount; 1243 int error = 0; 1244 1245 sb = STATVFSBUF_GET(); 1246 maxcount = bufsize / entry_sz; 1247 mutex_enter(&mountlist_lock); 1248 count = 0; 1249 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1250 if (vfs_busy(mp, &nmp)) { 1251 continue; 1252 } 1253 if (sfsp && count < maxcount) { 1254 error = dostatvfs(mp, sb, l, flags, 0); 1255 if (error) { 1256 vfs_unbusy(mp, false, &nmp); 1257 error = 0; 1258 continue; 1259 } 1260 error = copyfn(sb, sfsp, entry_sz); 1261 if (error) { 1262 vfs_unbusy(mp, false, NULL); 1263 goto out; 1264 } 1265 sfsp = (char *)sfsp + entry_sz; 1266 root |= strcmp(sb->f_mntonname, "/") == 0; 1267 } 1268 count++; 1269 vfs_unbusy(mp, false, &nmp); 1270 } 1271 mutex_exit(&mountlist_lock); 1272 1273 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1274 /* 1275 * fake a root entry 1276 */ 1277 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1278 sb, l, flags, 1); 1279 if (error != 0) 1280 goto out; 1281 if (sfsp) { 1282 error = copyfn(sb, sfsp, entry_sz); 1283 if (error != 0) 1284 goto out; 1285 } 1286 count++; 1287 } 1288 if (sfsp && count > maxcount) 1289 *retval = maxcount; 1290 else 1291 *retval = count; 1292 out: 1293 STATVFSBUF_PUT(sb); 1294 return error; 1295 } 1296 1297 int 1298 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1299 { 1300 /* { 1301 syscallarg(struct statvfs *) buf; 1302 syscallarg(size_t) bufsize; 1303 syscallarg(int) flags; 1304 } */ 1305 1306 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1307 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1308 } 1309 1310 /* 1311 * Change current working directory to a given file descriptor. 1312 */ 1313 /* ARGSUSED */ 1314 int 1315 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1316 { 1317 /* { 1318 syscallarg(int) fd; 1319 } */ 1320 struct proc *p = l->l_proc; 1321 struct cwdinfo *cwdi; 1322 struct vnode *vp, *tdp; 1323 struct mount *mp; 1324 file_t *fp; 1325 int error, fd; 1326 1327 /* fd_getvnode() will use the descriptor for us */ 1328 fd = SCARG(uap, fd); 1329 if ((error = fd_getvnode(fd, &fp)) != 0) 1330 return (error); 1331 vp = fp->f_vnode; 1332 1333 vref(vp); 1334 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1335 if (vp->v_type != VDIR) 1336 error = ENOTDIR; 1337 else 1338 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1339 if (error) { 1340 vput(vp); 1341 goto out; 1342 } 1343 while ((mp = vp->v_mountedhere) != NULL) { 1344 error = vfs_busy(mp, NULL); 1345 vput(vp); 1346 if (error != 0) 1347 goto out; 1348 error = VFS_ROOT(mp, &tdp); 1349 vfs_unbusy(mp, false, NULL); 1350 if (error) 1351 goto out; 1352 vp = tdp; 1353 } 1354 VOP_UNLOCK(vp); 1355 1356 /* 1357 * Disallow changing to a directory not under the process's 1358 * current root directory (if there is one). 1359 */ 1360 cwdi = p->p_cwdi; 1361 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1362 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1363 vrele(vp); 1364 error = EPERM; /* operation not permitted */ 1365 } else { 1366 vrele(cwdi->cwdi_cdir); 1367 cwdi->cwdi_cdir = vp; 1368 } 1369 rw_exit(&cwdi->cwdi_lock); 1370 1371 out: 1372 fd_putfile(fd); 1373 return (error); 1374 } 1375 1376 /* 1377 * Change this process's notion of the root directory to a given file 1378 * descriptor. 1379 */ 1380 int 1381 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1382 { 1383 struct proc *p = l->l_proc; 1384 struct vnode *vp; 1385 file_t *fp; 1386 int error, fd = SCARG(uap, fd); 1387 1388 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1389 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1390 return error; 1391 /* fd_getvnode() will use the descriptor for us */ 1392 if ((error = fd_getvnode(fd, &fp)) != 0) 1393 return error; 1394 vp = fp->f_vnode; 1395 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1396 if (vp->v_type != VDIR) 1397 error = ENOTDIR; 1398 else 1399 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1400 VOP_UNLOCK(vp); 1401 if (error) 1402 goto out; 1403 vref(vp); 1404 1405 change_root(p->p_cwdi, vp, l); 1406 1407 out: 1408 fd_putfile(fd); 1409 return (error); 1410 } 1411 1412 /* 1413 * Change current working directory (``.''). 1414 */ 1415 /* ARGSUSED */ 1416 int 1417 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1418 { 1419 /* { 1420 syscallarg(const char *) path; 1421 } */ 1422 struct proc *p = l->l_proc; 1423 struct cwdinfo *cwdi; 1424 int error; 1425 struct vnode *vp; 1426 1427 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1428 &vp, l)) != 0) 1429 return (error); 1430 cwdi = p->p_cwdi; 1431 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1432 vrele(cwdi->cwdi_cdir); 1433 cwdi->cwdi_cdir = vp; 1434 rw_exit(&cwdi->cwdi_lock); 1435 return (0); 1436 } 1437 1438 /* 1439 * Change notion of root (``/'') directory. 1440 */ 1441 /* ARGSUSED */ 1442 int 1443 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1444 { 1445 /* { 1446 syscallarg(const char *) path; 1447 } */ 1448 struct proc *p = l->l_proc; 1449 int error; 1450 struct vnode *vp; 1451 1452 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1453 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1454 return (error); 1455 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1456 &vp, l)) != 0) 1457 return (error); 1458 1459 change_root(p->p_cwdi, vp, l); 1460 1461 return (0); 1462 } 1463 1464 /* 1465 * Common routine for chroot and fchroot. 1466 * NB: callers need to properly authorize the change root operation. 1467 */ 1468 void 1469 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1470 { 1471 struct proc *p = l->l_proc; 1472 kauth_cred_t ncred; 1473 1474 ncred = kauth_cred_alloc(); 1475 1476 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1477 if (cwdi->cwdi_rdir != NULL) 1478 vrele(cwdi->cwdi_rdir); 1479 cwdi->cwdi_rdir = vp; 1480 1481 /* 1482 * Prevent escaping from chroot by putting the root under 1483 * the working directory. Silently chdir to / if we aren't 1484 * already there. 1485 */ 1486 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1487 /* 1488 * XXX would be more failsafe to change directory to a 1489 * deadfs node here instead 1490 */ 1491 vrele(cwdi->cwdi_cdir); 1492 vref(vp); 1493 cwdi->cwdi_cdir = vp; 1494 } 1495 rw_exit(&cwdi->cwdi_lock); 1496 1497 /* Get a write lock on the process credential. */ 1498 proc_crmod_enter(); 1499 1500 kauth_cred_clone(p->p_cred, ncred); 1501 kauth_proc_chroot(ncred, p->p_cwdi); 1502 1503 /* Broadcast our credentials to the process and other LWPs. */ 1504 proc_crmod_leave(ncred, p->p_cred, true); 1505 } 1506 1507 /* 1508 * Common routine for chroot and chdir. 1509 * XXX "where" should be enum uio_seg 1510 */ 1511 int 1512 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1513 { 1514 struct pathbuf *pb; 1515 struct nameidata nd; 1516 int error; 1517 1518 error = pathbuf_maybe_copyin(path, where, &pb); 1519 if (error) { 1520 return error; 1521 } 1522 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1523 if ((error = namei(&nd)) != 0) { 1524 pathbuf_destroy(pb); 1525 return error; 1526 } 1527 *vpp = nd.ni_vp; 1528 pathbuf_destroy(pb); 1529 1530 if ((*vpp)->v_type != VDIR) 1531 error = ENOTDIR; 1532 else 1533 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1534 1535 if (error) 1536 vput(*vpp); 1537 else 1538 VOP_UNLOCK(*vpp); 1539 return (error); 1540 } 1541 1542 /* 1543 * Internals of sys_open - path has already been converted into a pathbuf 1544 * (so we can easily reuse this function from other parts of the kernel, 1545 * like posix_spawn post-processing). 1546 */ 1547 int 1548 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1549 int open_mode, int *fd) 1550 { 1551 struct proc *p = l->l_proc; 1552 struct cwdinfo *cwdi = p->p_cwdi; 1553 file_t *fp; 1554 struct vnode *vp; 1555 int flags, cmode; 1556 int indx, error; 1557 struct nameidata nd; 1558 1559 if (open_flags & O_SEARCH) { 1560 open_flags &= ~(int)O_SEARCH; 1561 } 1562 1563 flags = FFLAGS(open_flags); 1564 if ((flags & (FREAD | FWRITE)) == 0) 1565 return EINVAL; 1566 1567 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1568 return error; 1569 } 1570 1571 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1572 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1573 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1574 if (dvp != NULL) 1575 NDAT(&nd, dvp); 1576 1577 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1578 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1579 fd_abort(p, fp, indx); 1580 if ((error == EDUPFD || error == EMOVEFD) && 1581 l->l_dupfd >= 0 && /* XXX from fdopen */ 1582 (error = 1583 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1584 *fd = indx; 1585 return 0; 1586 } 1587 if (error == ERESTART) 1588 error = EINTR; 1589 return error; 1590 } 1591 1592 l->l_dupfd = 0; 1593 vp = nd.ni_vp; 1594 1595 if ((error = open_setfp(l, fp, vp, indx, flags))) 1596 return error; 1597 1598 VOP_UNLOCK(vp); 1599 *fd = indx; 1600 fd_affix(p, fp, indx); 1601 return 0; 1602 } 1603 1604 int 1605 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1606 { 1607 struct pathbuf *pb; 1608 int error, oflags; 1609 1610 oflags = FFLAGS(open_flags); 1611 if ((oflags & (FREAD | FWRITE)) == 0) 1612 return EINVAL; 1613 1614 pb = pathbuf_create(path); 1615 if (pb == NULL) 1616 return ENOMEM; 1617 1618 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1619 pathbuf_destroy(pb); 1620 1621 return error; 1622 } 1623 1624 /* 1625 * Check permissions, allocate an open file structure, 1626 * and call the device open routine if any. 1627 */ 1628 static int 1629 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1630 int mode, int *fd) 1631 { 1632 file_t *dfp = NULL; 1633 struct vnode *dvp = NULL; 1634 struct pathbuf *pb; 1635 int error; 1636 1637 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1638 if (path == NULL) { 1639 pb = pathbuf_create("."); 1640 if (pb == NULL) 1641 return ENOMEM; 1642 } else 1643 #endif 1644 { 1645 error = pathbuf_copyin(path, &pb); 1646 if (error) 1647 return error; 1648 } 1649 1650 if (fdat != AT_FDCWD) { 1651 /* fd_getvnode() will use the descriptor for us */ 1652 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1653 goto out; 1654 1655 dvp = dfp->f_vnode; 1656 } 1657 1658 error = do_open(l, dvp, pb, flags, mode, fd); 1659 1660 if (dfp != NULL) 1661 fd_putfile(fdat); 1662 out: 1663 pathbuf_destroy(pb); 1664 return error; 1665 } 1666 1667 int 1668 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1669 { 1670 /* { 1671 syscallarg(const char *) path; 1672 syscallarg(int) flags; 1673 syscallarg(int) mode; 1674 } */ 1675 int error; 1676 int fd; 1677 1678 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1679 SCARG(uap, flags), SCARG(uap, mode), &fd); 1680 1681 if (error == 0) 1682 *retval = fd; 1683 1684 return error; 1685 } 1686 1687 int 1688 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1689 { 1690 /* { 1691 syscallarg(int) fd; 1692 syscallarg(const char *) path; 1693 syscallarg(int) oflags; 1694 syscallarg(int) mode; 1695 } */ 1696 int error; 1697 int fd; 1698 1699 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1700 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1701 1702 if (error == 0) 1703 *retval = fd; 1704 1705 return error; 1706 } 1707 1708 static void 1709 vfs__fhfree(fhandle_t *fhp) 1710 { 1711 size_t fhsize; 1712 1713 fhsize = FHANDLE_SIZE(fhp); 1714 kmem_free(fhp, fhsize); 1715 } 1716 1717 /* 1718 * vfs_composefh: compose a filehandle. 1719 */ 1720 1721 int 1722 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1723 { 1724 struct mount *mp; 1725 struct fid *fidp; 1726 int error; 1727 size_t needfhsize; 1728 size_t fidsize; 1729 1730 mp = vp->v_mount; 1731 fidp = NULL; 1732 if (*fh_size < FHANDLE_SIZE_MIN) { 1733 fidsize = 0; 1734 } else { 1735 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1736 if (fhp != NULL) { 1737 memset(fhp, 0, *fh_size); 1738 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1739 fidp = &fhp->fh_fid; 1740 } 1741 } 1742 error = VFS_VPTOFH(vp, fidp, &fidsize); 1743 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1744 if (error == 0 && *fh_size < needfhsize) { 1745 error = E2BIG; 1746 } 1747 *fh_size = needfhsize; 1748 return error; 1749 } 1750 1751 int 1752 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1753 { 1754 struct mount *mp; 1755 fhandle_t *fhp; 1756 size_t fhsize; 1757 size_t fidsize; 1758 int error; 1759 1760 mp = vp->v_mount; 1761 fidsize = 0; 1762 error = VFS_VPTOFH(vp, NULL, &fidsize); 1763 KASSERT(error != 0); 1764 if (error != E2BIG) { 1765 goto out; 1766 } 1767 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1768 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1769 if (fhp == NULL) { 1770 error = ENOMEM; 1771 goto out; 1772 } 1773 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1774 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1775 if (error == 0) { 1776 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1777 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1778 *fhpp = fhp; 1779 } else { 1780 kmem_free(fhp, fhsize); 1781 } 1782 out: 1783 return error; 1784 } 1785 1786 void 1787 vfs_composefh_free(fhandle_t *fhp) 1788 { 1789 1790 vfs__fhfree(fhp); 1791 } 1792 1793 /* 1794 * vfs_fhtovp: lookup a vnode by a filehandle. 1795 */ 1796 1797 int 1798 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1799 { 1800 struct mount *mp; 1801 int error; 1802 1803 *vpp = NULL; 1804 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1805 if (mp == NULL) { 1806 error = ESTALE; 1807 goto out; 1808 } 1809 if (mp->mnt_op->vfs_fhtovp == NULL) { 1810 error = EOPNOTSUPP; 1811 goto out; 1812 } 1813 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1814 out: 1815 return error; 1816 } 1817 1818 /* 1819 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1820 * the needed size. 1821 */ 1822 1823 int 1824 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1825 { 1826 fhandle_t *fhp; 1827 int error; 1828 1829 if (fhsize > FHANDLE_SIZE_MAX) { 1830 return EINVAL; 1831 } 1832 if (fhsize < FHANDLE_SIZE_MIN) { 1833 return EINVAL; 1834 } 1835 again: 1836 fhp = kmem_alloc(fhsize, KM_SLEEP); 1837 if (fhp == NULL) { 1838 return ENOMEM; 1839 } 1840 error = copyin(ufhp, fhp, fhsize); 1841 if (error == 0) { 1842 /* XXX this check shouldn't be here */ 1843 if (FHANDLE_SIZE(fhp) == fhsize) { 1844 *fhpp = fhp; 1845 return 0; 1846 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1847 /* 1848 * a kludge for nfsv2 padded handles. 1849 */ 1850 size_t sz; 1851 1852 sz = FHANDLE_SIZE(fhp); 1853 kmem_free(fhp, fhsize); 1854 fhsize = sz; 1855 goto again; 1856 } else { 1857 /* 1858 * userland told us wrong size. 1859 */ 1860 error = EINVAL; 1861 } 1862 } 1863 kmem_free(fhp, fhsize); 1864 return error; 1865 } 1866 1867 void 1868 vfs_copyinfh_free(fhandle_t *fhp) 1869 { 1870 1871 vfs__fhfree(fhp); 1872 } 1873 1874 /* 1875 * Get file handle system call 1876 */ 1877 int 1878 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1879 { 1880 /* { 1881 syscallarg(char *) fname; 1882 syscallarg(fhandle_t *) fhp; 1883 syscallarg(size_t *) fh_size; 1884 } */ 1885 struct vnode *vp; 1886 fhandle_t *fh; 1887 int error; 1888 struct pathbuf *pb; 1889 struct nameidata nd; 1890 size_t sz; 1891 size_t usz; 1892 1893 /* 1894 * Must be super user 1895 */ 1896 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1897 0, NULL, NULL, NULL); 1898 if (error) 1899 return (error); 1900 1901 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1902 if (error) { 1903 return error; 1904 } 1905 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1906 error = namei(&nd); 1907 if (error) { 1908 pathbuf_destroy(pb); 1909 return error; 1910 } 1911 vp = nd.ni_vp; 1912 pathbuf_destroy(pb); 1913 1914 error = vfs_composefh_alloc(vp, &fh); 1915 vput(vp); 1916 if (error != 0) { 1917 return error; 1918 } 1919 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1920 if (error != 0) { 1921 goto out; 1922 } 1923 sz = FHANDLE_SIZE(fh); 1924 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1925 if (error != 0) { 1926 goto out; 1927 } 1928 if (usz >= sz) { 1929 error = copyout(fh, SCARG(uap, fhp), sz); 1930 } else { 1931 error = E2BIG; 1932 } 1933 out: 1934 vfs_composefh_free(fh); 1935 return (error); 1936 } 1937 1938 /* 1939 * Open a file given a file handle. 1940 * 1941 * Check permissions, allocate an open file structure, 1942 * and call the device open routine if any. 1943 */ 1944 1945 int 1946 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1947 register_t *retval) 1948 { 1949 file_t *fp; 1950 struct vnode *vp = NULL; 1951 kauth_cred_t cred = l->l_cred; 1952 file_t *nfp; 1953 int indx, error; 1954 struct vattr va; 1955 fhandle_t *fh; 1956 int flags; 1957 proc_t *p; 1958 1959 p = curproc; 1960 1961 /* 1962 * Must be super user 1963 */ 1964 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1965 0, NULL, NULL, NULL))) 1966 return (error); 1967 1968 if (oflags & O_SEARCH) { 1969 oflags &= ~(int)O_SEARCH; 1970 } 1971 1972 flags = FFLAGS(oflags); 1973 if ((flags & (FREAD | FWRITE)) == 0) 1974 return (EINVAL); 1975 if ((flags & O_CREAT)) 1976 return (EINVAL); 1977 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1978 return (error); 1979 fp = nfp; 1980 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1981 if (error != 0) { 1982 goto bad; 1983 } 1984 error = vfs_fhtovp(fh, &vp); 1985 vfs_copyinfh_free(fh); 1986 if (error != 0) { 1987 goto bad; 1988 } 1989 1990 /* Now do an effective vn_open */ 1991 1992 if (vp->v_type == VSOCK) { 1993 error = EOPNOTSUPP; 1994 goto bad; 1995 } 1996 error = vn_openchk(vp, cred, flags); 1997 if (error != 0) 1998 goto bad; 1999 if (flags & O_TRUNC) { 2000 VOP_UNLOCK(vp); /* XXX */ 2001 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2002 vattr_null(&va); 2003 va.va_size = 0; 2004 error = VOP_SETATTR(vp, &va, cred); 2005 if (error) 2006 goto bad; 2007 } 2008 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2009 goto bad; 2010 if (flags & FWRITE) { 2011 mutex_enter(vp->v_interlock); 2012 vp->v_writecount++; 2013 mutex_exit(vp->v_interlock); 2014 } 2015 2016 /* done with modified vn_open, now finish what sys_open does. */ 2017 if ((error = open_setfp(l, fp, vp, indx, flags))) 2018 return error; 2019 2020 VOP_UNLOCK(vp); 2021 *retval = indx; 2022 fd_affix(p, fp, indx); 2023 return (0); 2024 2025 bad: 2026 fd_abort(p, fp, indx); 2027 if (vp != NULL) 2028 vput(vp); 2029 return (error); 2030 } 2031 2032 int 2033 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2034 { 2035 /* { 2036 syscallarg(const void *) fhp; 2037 syscallarg(size_t) fh_size; 2038 syscallarg(int) flags; 2039 } */ 2040 2041 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2042 SCARG(uap, flags), retval); 2043 } 2044 2045 int 2046 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2047 { 2048 int error; 2049 fhandle_t *fh; 2050 struct vnode *vp; 2051 2052 /* 2053 * Must be super user 2054 */ 2055 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2056 0, NULL, NULL, NULL))) 2057 return (error); 2058 2059 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2060 if (error != 0) 2061 return error; 2062 2063 error = vfs_fhtovp(fh, &vp); 2064 vfs_copyinfh_free(fh); 2065 if (error != 0) 2066 return error; 2067 2068 error = vn_stat(vp, sb); 2069 vput(vp); 2070 return error; 2071 } 2072 2073 2074 /* ARGSUSED */ 2075 int 2076 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2077 { 2078 /* { 2079 syscallarg(const void *) fhp; 2080 syscallarg(size_t) fh_size; 2081 syscallarg(struct stat *) sb; 2082 } */ 2083 struct stat sb; 2084 int error; 2085 2086 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2087 if (error) 2088 return error; 2089 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2090 } 2091 2092 int 2093 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2094 int flags) 2095 { 2096 fhandle_t *fh; 2097 struct mount *mp; 2098 struct vnode *vp; 2099 int error; 2100 2101 /* 2102 * Must be super user 2103 */ 2104 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2105 0, NULL, NULL, NULL))) 2106 return error; 2107 2108 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2109 if (error != 0) 2110 return error; 2111 2112 error = vfs_fhtovp(fh, &vp); 2113 vfs_copyinfh_free(fh); 2114 if (error != 0) 2115 return error; 2116 2117 mp = vp->v_mount; 2118 error = dostatvfs(mp, sb, l, flags, 1); 2119 vput(vp); 2120 return error; 2121 } 2122 2123 /* ARGSUSED */ 2124 int 2125 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2126 { 2127 /* { 2128 syscallarg(const void *) fhp; 2129 syscallarg(size_t) fh_size; 2130 syscallarg(struct statvfs *) buf; 2131 syscallarg(int) flags; 2132 } */ 2133 struct statvfs *sb = STATVFSBUF_GET(); 2134 int error; 2135 2136 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2137 SCARG(uap, flags)); 2138 if (error == 0) 2139 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2140 STATVFSBUF_PUT(sb); 2141 return error; 2142 } 2143 2144 /* 2145 * Create a special file. 2146 */ 2147 /* ARGSUSED */ 2148 int 2149 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2150 register_t *retval) 2151 { 2152 /* { 2153 syscallarg(const char *) path; 2154 syscallarg(mode_t) mode; 2155 syscallarg(dev_t) dev; 2156 } */ 2157 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2158 SCARG(uap, dev), retval, UIO_USERSPACE); 2159 } 2160 2161 int 2162 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2163 register_t *retval) 2164 { 2165 /* { 2166 syscallarg(int) fd; 2167 syscallarg(const char *) path; 2168 syscallarg(mode_t) mode; 2169 syscallarg(int) pad; 2170 syscallarg(dev_t) dev; 2171 } */ 2172 2173 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2174 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2175 } 2176 2177 int 2178 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2179 register_t *retval, enum uio_seg seg) 2180 { 2181 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2182 } 2183 2184 int 2185 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2186 dev_t dev, register_t *retval, enum uio_seg seg) 2187 { 2188 struct proc *p = l->l_proc; 2189 struct vnode *vp; 2190 struct vattr vattr; 2191 int error, optype; 2192 struct pathbuf *pb; 2193 struct nameidata nd; 2194 const char *pathstring; 2195 2196 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2197 0, NULL, NULL, NULL)) != 0) 2198 return (error); 2199 2200 optype = VOP_MKNOD_DESCOFFSET; 2201 2202 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2203 if (error) { 2204 return error; 2205 } 2206 pathstring = pathbuf_stringcopy_get(pb); 2207 if (pathstring == NULL) { 2208 pathbuf_destroy(pb); 2209 return ENOMEM; 2210 } 2211 2212 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2213 2214 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2215 goto out; 2216 vp = nd.ni_vp; 2217 2218 if (vp != NULL) 2219 error = EEXIST; 2220 else { 2221 vattr_null(&vattr); 2222 /* We will read cwdi->cwdi_cmask unlocked. */ 2223 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2224 vattr.va_rdev = dev; 2225 2226 switch (mode & S_IFMT) { 2227 case S_IFMT: /* used by badsect to flag bad sectors */ 2228 vattr.va_type = VBAD; 2229 break; 2230 case S_IFCHR: 2231 vattr.va_type = VCHR; 2232 break; 2233 case S_IFBLK: 2234 vattr.va_type = VBLK; 2235 break; 2236 case S_IFWHT: 2237 optype = VOP_WHITEOUT_DESCOFFSET; 2238 break; 2239 case S_IFREG: 2240 #if NVERIEXEC > 0 2241 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2242 O_CREAT); 2243 #endif /* NVERIEXEC > 0 */ 2244 vattr.va_type = VREG; 2245 vattr.va_rdev = VNOVAL; 2246 optype = VOP_CREATE_DESCOFFSET; 2247 break; 2248 default: 2249 error = EINVAL; 2250 break; 2251 } 2252 } 2253 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2254 && vattr.va_rdev == VNOVAL) 2255 error = EINVAL; 2256 if (!error) { 2257 switch (optype) { 2258 case VOP_WHITEOUT_DESCOFFSET: 2259 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2260 if (error) 2261 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2262 vput(nd.ni_dvp); 2263 break; 2264 2265 case VOP_MKNOD_DESCOFFSET: 2266 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2267 &nd.ni_cnd, &vattr); 2268 if (error == 0) 2269 vrele(nd.ni_vp); 2270 vput(nd.ni_dvp); 2271 break; 2272 2273 case VOP_CREATE_DESCOFFSET: 2274 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2275 &nd.ni_cnd, &vattr); 2276 if (error == 0) 2277 vrele(nd.ni_vp); 2278 vput(nd.ni_dvp); 2279 break; 2280 } 2281 } else { 2282 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2283 if (nd.ni_dvp == vp) 2284 vrele(nd.ni_dvp); 2285 else 2286 vput(nd.ni_dvp); 2287 if (vp) 2288 vrele(vp); 2289 } 2290 out: 2291 pathbuf_stringcopy_put(pb, pathstring); 2292 pathbuf_destroy(pb); 2293 return (error); 2294 } 2295 2296 /* 2297 * Create a named pipe. 2298 */ 2299 /* ARGSUSED */ 2300 int 2301 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2302 { 2303 /* { 2304 syscallarg(const char *) path; 2305 syscallarg(int) mode; 2306 } */ 2307 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2308 } 2309 2310 int 2311 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2312 register_t *retval) 2313 { 2314 /* { 2315 syscallarg(int) fd; 2316 syscallarg(const char *) path; 2317 syscallarg(int) mode; 2318 } */ 2319 2320 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2321 SCARG(uap, mode)); 2322 } 2323 2324 static int 2325 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2326 { 2327 struct proc *p = l->l_proc; 2328 struct vattr vattr; 2329 int error; 2330 struct pathbuf *pb; 2331 struct nameidata nd; 2332 2333 error = pathbuf_copyin(path, &pb); 2334 if (error) { 2335 return error; 2336 } 2337 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2338 2339 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2340 pathbuf_destroy(pb); 2341 return error; 2342 } 2343 if (nd.ni_vp != NULL) { 2344 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2345 if (nd.ni_dvp == nd.ni_vp) 2346 vrele(nd.ni_dvp); 2347 else 2348 vput(nd.ni_dvp); 2349 vrele(nd.ni_vp); 2350 pathbuf_destroy(pb); 2351 return (EEXIST); 2352 } 2353 vattr_null(&vattr); 2354 vattr.va_type = VFIFO; 2355 /* We will read cwdi->cwdi_cmask unlocked. */ 2356 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2357 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2358 if (error == 0) 2359 vrele(nd.ni_vp); 2360 vput(nd.ni_dvp); 2361 pathbuf_destroy(pb); 2362 return (error); 2363 } 2364 2365 /* 2366 * Make a hard file link. 2367 */ 2368 /* ARGSUSED */ 2369 int 2370 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2371 const char *link, int follow, register_t *retval) 2372 { 2373 struct vnode *vp; 2374 struct pathbuf *linkpb; 2375 struct nameidata nd; 2376 namei_simple_flags_t ns_flags; 2377 int error; 2378 2379 if (follow & AT_SYMLINK_FOLLOW) 2380 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2381 else 2382 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2383 2384 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2385 if (error != 0) 2386 return (error); 2387 error = pathbuf_copyin(link, &linkpb); 2388 if (error) { 2389 goto out1; 2390 } 2391 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2392 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2393 goto out2; 2394 if (nd.ni_vp) { 2395 error = EEXIST; 2396 goto abortop; 2397 } 2398 /* Prevent hard links on directories. */ 2399 if (vp->v_type == VDIR) { 2400 error = EPERM; 2401 goto abortop; 2402 } 2403 /* Prevent cross-mount operation. */ 2404 if (nd.ni_dvp->v_mount != vp->v_mount) { 2405 error = EXDEV; 2406 goto abortop; 2407 } 2408 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2409 VOP_UNLOCK(nd.ni_dvp); 2410 vrele(nd.ni_dvp); 2411 out2: 2412 pathbuf_destroy(linkpb); 2413 out1: 2414 vrele(vp); 2415 return (error); 2416 abortop: 2417 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2418 if (nd.ni_dvp == nd.ni_vp) 2419 vrele(nd.ni_dvp); 2420 else 2421 vput(nd.ni_dvp); 2422 if (nd.ni_vp != NULL) 2423 vrele(nd.ni_vp); 2424 goto out2; 2425 } 2426 2427 int 2428 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2429 { 2430 /* { 2431 syscallarg(const char *) path; 2432 syscallarg(const char *) link; 2433 } */ 2434 const char *path = SCARG(uap, path); 2435 const char *link = SCARG(uap, link); 2436 2437 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2438 AT_SYMLINK_FOLLOW, retval); 2439 } 2440 2441 int 2442 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2443 register_t *retval) 2444 { 2445 /* { 2446 syscallarg(int) fd1; 2447 syscallarg(const char *) name1; 2448 syscallarg(int) fd2; 2449 syscallarg(const char *) name2; 2450 syscallarg(int) flags; 2451 } */ 2452 int fd1 = SCARG(uap, fd1); 2453 const char *name1 = SCARG(uap, name1); 2454 int fd2 = SCARG(uap, fd2); 2455 const char *name2 = SCARG(uap, name2); 2456 int follow; 2457 2458 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2459 2460 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2461 } 2462 2463 2464 int 2465 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2466 { 2467 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2468 } 2469 2470 static int 2471 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2472 const char *link, enum uio_seg seg) 2473 { 2474 struct proc *p = curproc; 2475 struct vattr vattr; 2476 char *path; 2477 int error; 2478 struct pathbuf *linkpb; 2479 struct nameidata nd; 2480 2481 KASSERT(l != NULL || fdat == AT_FDCWD); 2482 2483 path = PNBUF_GET(); 2484 if (seg == UIO_USERSPACE) { 2485 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2486 goto out1; 2487 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2488 goto out1; 2489 } else { 2490 KASSERT(strlen(patharg) < MAXPATHLEN); 2491 strcpy(path, patharg); 2492 linkpb = pathbuf_create(link); 2493 if (linkpb == NULL) { 2494 error = ENOMEM; 2495 goto out1; 2496 } 2497 } 2498 ktrkuser("symlink-target", path, strlen(path)); 2499 2500 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2501 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2502 goto out2; 2503 if (nd.ni_vp) { 2504 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2505 if (nd.ni_dvp == nd.ni_vp) 2506 vrele(nd.ni_dvp); 2507 else 2508 vput(nd.ni_dvp); 2509 vrele(nd.ni_vp); 2510 error = EEXIST; 2511 goto out2; 2512 } 2513 vattr_null(&vattr); 2514 vattr.va_type = VLNK; 2515 /* We will read cwdi->cwdi_cmask unlocked. */ 2516 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2517 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2518 if (error == 0) 2519 vrele(nd.ni_vp); 2520 vput(nd.ni_dvp); 2521 out2: 2522 pathbuf_destroy(linkpb); 2523 out1: 2524 PNBUF_PUT(path); 2525 return (error); 2526 } 2527 2528 /* 2529 * Make a symbolic link. 2530 */ 2531 /* ARGSUSED */ 2532 int 2533 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2534 { 2535 /* { 2536 syscallarg(const char *) path; 2537 syscallarg(const char *) link; 2538 } */ 2539 2540 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2541 UIO_USERSPACE); 2542 } 2543 2544 int 2545 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2546 register_t *retval) 2547 { 2548 /* { 2549 syscallarg(const char *) path1; 2550 syscallarg(int) fd; 2551 syscallarg(const char *) path2; 2552 } */ 2553 2554 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2555 SCARG(uap, path2), UIO_USERSPACE); 2556 } 2557 2558 /* 2559 * Delete a whiteout from the filesystem. 2560 */ 2561 /* ARGSUSED */ 2562 int 2563 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2564 { 2565 /* { 2566 syscallarg(const char *) path; 2567 } */ 2568 int error; 2569 struct pathbuf *pb; 2570 struct nameidata nd; 2571 2572 error = pathbuf_copyin(SCARG(uap, path), &pb); 2573 if (error) { 2574 return error; 2575 } 2576 2577 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2578 error = namei(&nd); 2579 if (error) { 2580 pathbuf_destroy(pb); 2581 return (error); 2582 } 2583 2584 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2585 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2586 if (nd.ni_dvp == nd.ni_vp) 2587 vrele(nd.ni_dvp); 2588 else 2589 vput(nd.ni_dvp); 2590 if (nd.ni_vp) 2591 vrele(nd.ni_vp); 2592 pathbuf_destroy(pb); 2593 return (EEXIST); 2594 } 2595 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2596 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2597 vput(nd.ni_dvp); 2598 pathbuf_destroy(pb); 2599 return (error); 2600 } 2601 2602 /* 2603 * Delete a name from the filesystem. 2604 */ 2605 /* ARGSUSED */ 2606 int 2607 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2608 { 2609 /* { 2610 syscallarg(const char *) path; 2611 } */ 2612 2613 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2614 } 2615 2616 int 2617 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2618 register_t *retval) 2619 { 2620 /* { 2621 syscallarg(int) fd; 2622 syscallarg(const char *) path; 2623 syscallarg(int) flag; 2624 } */ 2625 2626 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2627 SCARG(uap, flag), UIO_USERSPACE); 2628 } 2629 2630 int 2631 do_sys_unlink(const char *arg, enum uio_seg seg) 2632 { 2633 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2634 } 2635 2636 static int 2637 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2638 enum uio_seg seg) 2639 { 2640 struct vnode *vp; 2641 int error; 2642 struct pathbuf *pb; 2643 struct nameidata nd; 2644 const char *pathstring; 2645 2646 KASSERT(l != NULL || fdat == AT_FDCWD); 2647 2648 error = pathbuf_maybe_copyin(arg, seg, &pb); 2649 if (error) { 2650 return error; 2651 } 2652 pathstring = pathbuf_stringcopy_get(pb); 2653 if (pathstring == NULL) { 2654 pathbuf_destroy(pb); 2655 return ENOMEM; 2656 } 2657 2658 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2659 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2660 goto out; 2661 vp = nd.ni_vp; 2662 2663 /* 2664 * The root of a mounted filesystem cannot be deleted. 2665 */ 2666 if ((vp->v_vflag & VV_ROOT) != 0) { 2667 error = EBUSY; 2668 goto abort; 2669 } 2670 2671 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2672 error = EBUSY; 2673 goto abort; 2674 } 2675 2676 /* 2677 * No rmdir "." please. 2678 */ 2679 if (nd.ni_dvp == vp) { 2680 error = EINVAL; 2681 goto abort; 2682 } 2683 2684 /* 2685 * AT_REMOVEDIR is required to remove a directory 2686 */ 2687 if (vp->v_type == VDIR) { 2688 if (!(flags & AT_REMOVEDIR)) { 2689 error = EPERM; 2690 goto abort; 2691 } else { 2692 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2693 goto out; 2694 } 2695 } 2696 2697 /* 2698 * Starting here we only deal with non directories. 2699 */ 2700 if (flags & AT_REMOVEDIR) { 2701 error = ENOTDIR; 2702 goto abort; 2703 } 2704 2705 #if NVERIEXEC > 0 2706 /* Handle remove requests for veriexec entries. */ 2707 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2708 goto abort; 2709 } 2710 #endif /* NVERIEXEC > 0 */ 2711 2712 #ifdef FILEASSOC 2713 (void)fileassoc_file_delete(vp); 2714 #endif /* FILEASSOC */ 2715 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2716 goto out; 2717 2718 abort: 2719 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2720 if (nd.ni_dvp == vp) 2721 vrele(nd.ni_dvp); 2722 else 2723 vput(nd.ni_dvp); 2724 vput(vp); 2725 2726 out: 2727 pathbuf_stringcopy_put(pb, pathstring); 2728 pathbuf_destroy(pb); 2729 return (error); 2730 } 2731 2732 /* 2733 * Reposition read/write file offset. 2734 */ 2735 int 2736 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2737 { 2738 /* { 2739 syscallarg(int) fd; 2740 syscallarg(int) pad; 2741 syscallarg(off_t) offset; 2742 syscallarg(int) whence; 2743 } */ 2744 kauth_cred_t cred = l->l_cred; 2745 file_t *fp; 2746 struct vnode *vp; 2747 struct vattr vattr; 2748 off_t newoff; 2749 int error, fd; 2750 2751 fd = SCARG(uap, fd); 2752 2753 if ((fp = fd_getfile(fd)) == NULL) 2754 return (EBADF); 2755 2756 vp = fp->f_vnode; 2757 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2758 error = ESPIPE; 2759 goto out; 2760 } 2761 2762 switch (SCARG(uap, whence)) { 2763 case SEEK_CUR: 2764 newoff = fp->f_offset + SCARG(uap, offset); 2765 break; 2766 case SEEK_END: 2767 vn_lock(vp, LK_SHARED | LK_RETRY); 2768 error = VOP_GETATTR(vp, &vattr, cred); 2769 VOP_UNLOCK(vp); 2770 if (error) { 2771 goto out; 2772 } 2773 newoff = SCARG(uap, offset) + vattr.va_size; 2774 break; 2775 case SEEK_SET: 2776 newoff = SCARG(uap, offset); 2777 break; 2778 default: 2779 error = EINVAL; 2780 goto out; 2781 } 2782 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2783 *(off_t *)retval = fp->f_offset = newoff; 2784 } 2785 out: 2786 fd_putfile(fd); 2787 return (error); 2788 } 2789 2790 /* 2791 * Positional read system call. 2792 */ 2793 int 2794 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2795 { 2796 /* { 2797 syscallarg(int) fd; 2798 syscallarg(void *) buf; 2799 syscallarg(size_t) nbyte; 2800 syscallarg(off_t) offset; 2801 } */ 2802 file_t *fp; 2803 struct vnode *vp; 2804 off_t offset; 2805 int error, fd = SCARG(uap, fd); 2806 2807 if ((fp = fd_getfile(fd)) == NULL) 2808 return (EBADF); 2809 2810 if ((fp->f_flag & FREAD) == 0) { 2811 fd_putfile(fd); 2812 return (EBADF); 2813 } 2814 2815 vp = fp->f_vnode; 2816 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2817 error = ESPIPE; 2818 goto out; 2819 } 2820 2821 offset = SCARG(uap, offset); 2822 2823 /* 2824 * XXX This works because no file systems actually 2825 * XXX take any action on the seek operation. 2826 */ 2827 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2828 goto out; 2829 2830 /* dofileread() will unuse the descriptor for us */ 2831 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2832 &offset, 0, retval)); 2833 2834 out: 2835 fd_putfile(fd); 2836 return (error); 2837 } 2838 2839 /* 2840 * Positional scatter read system call. 2841 */ 2842 int 2843 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2844 { 2845 /* { 2846 syscallarg(int) fd; 2847 syscallarg(const struct iovec *) iovp; 2848 syscallarg(int) iovcnt; 2849 syscallarg(off_t) offset; 2850 } */ 2851 off_t offset = SCARG(uap, offset); 2852 2853 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2854 SCARG(uap, iovcnt), &offset, 0, retval); 2855 } 2856 2857 /* 2858 * Positional write system call. 2859 */ 2860 int 2861 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2862 { 2863 /* { 2864 syscallarg(int) fd; 2865 syscallarg(const void *) buf; 2866 syscallarg(size_t) nbyte; 2867 syscallarg(off_t) offset; 2868 } */ 2869 file_t *fp; 2870 struct vnode *vp; 2871 off_t offset; 2872 int error, fd = SCARG(uap, fd); 2873 2874 if ((fp = fd_getfile(fd)) == NULL) 2875 return (EBADF); 2876 2877 if ((fp->f_flag & FWRITE) == 0) { 2878 fd_putfile(fd); 2879 return (EBADF); 2880 } 2881 2882 vp = fp->f_vnode; 2883 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2884 error = ESPIPE; 2885 goto out; 2886 } 2887 2888 offset = SCARG(uap, offset); 2889 2890 /* 2891 * XXX This works because no file systems actually 2892 * XXX take any action on the seek operation. 2893 */ 2894 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2895 goto out; 2896 2897 /* dofilewrite() will unuse the descriptor for us */ 2898 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2899 &offset, 0, retval)); 2900 2901 out: 2902 fd_putfile(fd); 2903 return (error); 2904 } 2905 2906 /* 2907 * Positional gather write system call. 2908 */ 2909 int 2910 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2911 { 2912 /* { 2913 syscallarg(int) fd; 2914 syscallarg(const struct iovec *) iovp; 2915 syscallarg(int) iovcnt; 2916 syscallarg(off_t) offset; 2917 } */ 2918 off_t offset = SCARG(uap, offset); 2919 2920 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2921 SCARG(uap, iovcnt), &offset, 0, retval); 2922 } 2923 2924 /* 2925 * Check access permissions. 2926 */ 2927 int 2928 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2929 { 2930 /* { 2931 syscallarg(const char *) path; 2932 syscallarg(int) flags; 2933 } */ 2934 2935 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2936 SCARG(uap, flags), 0); 2937 } 2938 2939 int 2940 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2941 int mode, int flags) 2942 { 2943 kauth_cred_t cred; 2944 struct vnode *vp; 2945 int error, nd_flag, vmode; 2946 struct pathbuf *pb; 2947 struct nameidata nd; 2948 2949 CTASSERT(F_OK == 0); 2950 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2951 /* nonsense mode */ 2952 return EINVAL; 2953 } 2954 2955 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2956 if (flags & AT_SYMLINK_NOFOLLOW) 2957 nd_flag &= ~FOLLOW; 2958 2959 error = pathbuf_copyin(path, &pb); 2960 if (error) 2961 return error; 2962 2963 NDINIT(&nd, LOOKUP, nd_flag, pb); 2964 2965 /* Override default credentials */ 2966 cred = kauth_cred_dup(l->l_cred); 2967 if (!(flags & AT_EACCESS)) { 2968 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2969 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2970 } 2971 nd.ni_cnd.cn_cred = cred; 2972 2973 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2974 pathbuf_destroy(pb); 2975 goto out; 2976 } 2977 vp = nd.ni_vp; 2978 pathbuf_destroy(pb); 2979 2980 /* Flags == 0 means only check for existence. */ 2981 if (mode) { 2982 vmode = 0; 2983 if (mode & R_OK) 2984 vmode |= VREAD; 2985 if (mode & W_OK) 2986 vmode |= VWRITE; 2987 if (mode & X_OK) 2988 vmode |= VEXEC; 2989 2990 error = VOP_ACCESS(vp, vmode, cred); 2991 if (!error && (vmode & VWRITE)) 2992 error = vn_writechk(vp); 2993 } 2994 vput(vp); 2995 out: 2996 kauth_cred_free(cred); 2997 return (error); 2998 } 2999 3000 int 3001 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3002 register_t *retval) 3003 { 3004 /* { 3005 syscallarg(int) fd; 3006 syscallarg(const char *) path; 3007 syscallarg(int) amode; 3008 syscallarg(int) flag; 3009 } */ 3010 3011 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3012 SCARG(uap, amode), SCARG(uap, flag)); 3013 } 3014 3015 /* 3016 * Common code for all sys_stat functions, including compat versions. 3017 */ 3018 int 3019 do_sys_stat(const char *userpath, unsigned int nd_flag, 3020 struct stat *sb) 3021 { 3022 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3023 } 3024 3025 int 3026 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3027 unsigned int nd_flag, struct stat *sb) 3028 { 3029 int error; 3030 struct pathbuf *pb; 3031 struct nameidata nd; 3032 3033 KASSERT(l != NULL || fdat == AT_FDCWD); 3034 3035 error = pathbuf_copyin(userpath, &pb); 3036 if (error) { 3037 return error; 3038 } 3039 3040 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3041 3042 error = fd_nameiat(l, fdat, &nd); 3043 if (error != 0) { 3044 pathbuf_destroy(pb); 3045 return error; 3046 } 3047 error = vn_stat(nd.ni_vp, sb); 3048 vput(nd.ni_vp); 3049 pathbuf_destroy(pb); 3050 return error; 3051 } 3052 3053 /* 3054 * Get file status; this version follows links. 3055 */ 3056 /* ARGSUSED */ 3057 int 3058 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3059 { 3060 /* { 3061 syscallarg(const char *) path; 3062 syscallarg(struct stat *) ub; 3063 } */ 3064 struct stat sb; 3065 int error; 3066 3067 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3068 if (error) 3069 return error; 3070 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3071 } 3072 3073 /* 3074 * Get file status; this version does not follow links. 3075 */ 3076 /* ARGSUSED */ 3077 int 3078 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3079 { 3080 /* { 3081 syscallarg(const char *) path; 3082 syscallarg(struct stat *) ub; 3083 } */ 3084 struct stat sb; 3085 int error; 3086 3087 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3088 if (error) 3089 return error; 3090 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3091 } 3092 3093 int 3094 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3095 register_t *retval) 3096 { 3097 /* { 3098 syscallarg(int) fd; 3099 syscallarg(const char *) path; 3100 syscallarg(struct stat *) buf; 3101 syscallarg(int) flag; 3102 } */ 3103 unsigned int nd_flag; 3104 struct stat sb; 3105 int error; 3106 3107 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3108 nd_flag = NOFOLLOW; 3109 else 3110 nd_flag = FOLLOW; 3111 3112 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3113 &sb); 3114 if (error) 3115 return error; 3116 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3117 } 3118 3119 /* 3120 * Get configurable pathname variables. 3121 */ 3122 /* ARGSUSED */ 3123 int 3124 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3125 { 3126 /* { 3127 syscallarg(const char *) path; 3128 syscallarg(int) name; 3129 } */ 3130 int error; 3131 struct pathbuf *pb; 3132 struct nameidata nd; 3133 3134 error = pathbuf_copyin(SCARG(uap, path), &pb); 3135 if (error) { 3136 return error; 3137 } 3138 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3139 if ((error = namei(&nd)) != 0) { 3140 pathbuf_destroy(pb); 3141 return (error); 3142 } 3143 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3144 vput(nd.ni_vp); 3145 pathbuf_destroy(pb); 3146 return (error); 3147 } 3148 3149 /* 3150 * Return target name of a symbolic link. 3151 */ 3152 /* ARGSUSED */ 3153 int 3154 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3155 register_t *retval) 3156 { 3157 /* { 3158 syscallarg(const char *) path; 3159 syscallarg(char *) buf; 3160 syscallarg(size_t) count; 3161 } */ 3162 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3163 SCARG(uap, buf), SCARG(uap, count), retval); 3164 } 3165 3166 static int 3167 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3168 size_t count, register_t *retval) 3169 { 3170 struct vnode *vp; 3171 struct iovec aiov; 3172 struct uio auio; 3173 int error; 3174 struct pathbuf *pb; 3175 struct nameidata nd; 3176 3177 error = pathbuf_copyin(path, &pb); 3178 if (error) { 3179 return error; 3180 } 3181 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3182 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3183 pathbuf_destroy(pb); 3184 return error; 3185 } 3186 vp = nd.ni_vp; 3187 pathbuf_destroy(pb); 3188 if (vp->v_type != VLNK) 3189 error = EINVAL; 3190 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3191 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3192 aiov.iov_base = buf; 3193 aiov.iov_len = count; 3194 auio.uio_iov = &aiov; 3195 auio.uio_iovcnt = 1; 3196 auio.uio_offset = 0; 3197 auio.uio_rw = UIO_READ; 3198 KASSERT(l == curlwp); 3199 auio.uio_vmspace = l->l_proc->p_vmspace; 3200 auio.uio_resid = count; 3201 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3202 *retval = count - auio.uio_resid; 3203 } 3204 vput(vp); 3205 return (error); 3206 } 3207 3208 int 3209 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3210 register_t *retval) 3211 { 3212 /* { 3213 syscallarg(int) fd; 3214 syscallarg(const char *) path; 3215 syscallarg(char *) buf; 3216 syscallarg(size_t) bufsize; 3217 } */ 3218 3219 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3220 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3221 } 3222 3223 /* 3224 * Change flags of a file given a path name. 3225 */ 3226 /* ARGSUSED */ 3227 int 3228 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3229 { 3230 /* { 3231 syscallarg(const char *) path; 3232 syscallarg(u_long) flags; 3233 } */ 3234 struct vnode *vp; 3235 int error; 3236 3237 error = namei_simple_user(SCARG(uap, path), 3238 NSM_FOLLOW_TRYEMULROOT, &vp); 3239 if (error != 0) 3240 return (error); 3241 error = change_flags(vp, SCARG(uap, flags), l); 3242 vput(vp); 3243 return (error); 3244 } 3245 3246 /* 3247 * Change flags of a file given a file descriptor. 3248 */ 3249 /* ARGSUSED */ 3250 int 3251 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3252 { 3253 /* { 3254 syscallarg(int) fd; 3255 syscallarg(u_long) flags; 3256 } */ 3257 struct vnode *vp; 3258 file_t *fp; 3259 int error; 3260 3261 /* fd_getvnode() will use the descriptor for us */ 3262 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3263 return (error); 3264 vp = fp->f_vnode; 3265 error = change_flags(vp, SCARG(uap, flags), l); 3266 VOP_UNLOCK(vp); 3267 fd_putfile(SCARG(uap, fd)); 3268 return (error); 3269 } 3270 3271 /* 3272 * Change flags of a file given a path name; this version does 3273 * not follow links. 3274 */ 3275 int 3276 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3277 { 3278 /* { 3279 syscallarg(const char *) path; 3280 syscallarg(u_long) flags; 3281 } */ 3282 struct vnode *vp; 3283 int error; 3284 3285 error = namei_simple_user(SCARG(uap, path), 3286 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3287 if (error != 0) 3288 return (error); 3289 error = change_flags(vp, SCARG(uap, flags), l); 3290 vput(vp); 3291 return (error); 3292 } 3293 3294 /* 3295 * Common routine to change flags of a file. 3296 */ 3297 int 3298 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3299 { 3300 struct vattr vattr; 3301 int error; 3302 3303 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3304 3305 vattr_null(&vattr); 3306 vattr.va_flags = flags; 3307 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3308 3309 return (error); 3310 } 3311 3312 /* 3313 * Change mode of a file given path name; this version follows links. 3314 */ 3315 /* ARGSUSED */ 3316 int 3317 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3318 { 3319 /* { 3320 syscallarg(const char *) path; 3321 syscallarg(int) mode; 3322 } */ 3323 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3324 SCARG(uap, mode), 0); 3325 } 3326 3327 int 3328 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3329 { 3330 int error; 3331 struct vnode *vp; 3332 namei_simple_flags_t ns_flag; 3333 3334 if (flags & AT_SYMLINK_NOFOLLOW) 3335 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3336 else 3337 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3338 3339 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3340 if (error != 0) 3341 return error; 3342 3343 error = change_mode(vp, mode, l); 3344 3345 vrele(vp); 3346 3347 return (error); 3348 } 3349 3350 /* 3351 * Change mode of a file given a file descriptor. 3352 */ 3353 /* ARGSUSED */ 3354 int 3355 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3356 { 3357 /* { 3358 syscallarg(int) fd; 3359 syscallarg(int) mode; 3360 } */ 3361 file_t *fp; 3362 int error; 3363 3364 /* fd_getvnode() will use the descriptor for us */ 3365 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3366 return (error); 3367 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3368 fd_putfile(SCARG(uap, fd)); 3369 return (error); 3370 } 3371 3372 int 3373 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3374 register_t *retval) 3375 { 3376 /* { 3377 syscallarg(int) fd; 3378 syscallarg(const char *) path; 3379 syscallarg(int) mode; 3380 syscallarg(int) flag; 3381 } */ 3382 3383 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3384 SCARG(uap, mode), SCARG(uap, flag)); 3385 } 3386 3387 /* 3388 * Change mode of a file given path name; this version does not follow links. 3389 */ 3390 /* ARGSUSED */ 3391 int 3392 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3393 { 3394 /* { 3395 syscallarg(const char *) path; 3396 syscallarg(int) mode; 3397 } */ 3398 int error; 3399 struct vnode *vp; 3400 3401 error = namei_simple_user(SCARG(uap, path), 3402 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3403 if (error != 0) 3404 return (error); 3405 3406 error = change_mode(vp, SCARG(uap, mode), l); 3407 3408 vrele(vp); 3409 return (error); 3410 } 3411 3412 /* 3413 * Common routine to set mode given a vnode. 3414 */ 3415 static int 3416 change_mode(struct vnode *vp, int mode, struct lwp *l) 3417 { 3418 struct vattr vattr; 3419 int error; 3420 3421 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3422 vattr_null(&vattr); 3423 vattr.va_mode = mode & ALLPERMS; 3424 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3425 VOP_UNLOCK(vp); 3426 return (error); 3427 } 3428 3429 /* 3430 * Set ownership given a path name; this version follows links. 3431 */ 3432 /* ARGSUSED */ 3433 int 3434 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3435 { 3436 /* { 3437 syscallarg(const char *) path; 3438 syscallarg(uid_t) uid; 3439 syscallarg(gid_t) gid; 3440 } */ 3441 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3442 SCARG(uap, gid), 0); 3443 } 3444 3445 int 3446 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3447 gid_t gid, int flags) 3448 { 3449 int error; 3450 struct vnode *vp; 3451 namei_simple_flags_t ns_flag; 3452 3453 if (flags & AT_SYMLINK_NOFOLLOW) 3454 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3455 else 3456 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3457 3458 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3459 if (error != 0) 3460 return error; 3461 3462 error = change_owner(vp, uid, gid, l, 0); 3463 3464 vrele(vp); 3465 3466 return (error); 3467 } 3468 3469 /* 3470 * Set ownership given a path name; this version follows links. 3471 * Provides POSIX semantics. 3472 */ 3473 /* ARGSUSED */ 3474 int 3475 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3476 { 3477 /* { 3478 syscallarg(const char *) path; 3479 syscallarg(uid_t) uid; 3480 syscallarg(gid_t) gid; 3481 } */ 3482 int error; 3483 struct vnode *vp; 3484 3485 error = namei_simple_user(SCARG(uap, path), 3486 NSM_FOLLOW_TRYEMULROOT, &vp); 3487 if (error != 0) 3488 return (error); 3489 3490 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3491 3492 vrele(vp); 3493 return (error); 3494 } 3495 3496 /* 3497 * Set ownership given a file descriptor. 3498 */ 3499 /* ARGSUSED */ 3500 int 3501 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3502 { 3503 /* { 3504 syscallarg(int) fd; 3505 syscallarg(uid_t) uid; 3506 syscallarg(gid_t) gid; 3507 } */ 3508 int error; 3509 file_t *fp; 3510 3511 /* fd_getvnode() will use the descriptor for us */ 3512 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3513 return (error); 3514 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3515 l, 0); 3516 fd_putfile(SCARG(uap, fd)); 3517 return (error); 3518 } 3519 3520 int 3521 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3522 register_t *retval) 3523 { 3524 /* { 3525 syscallarg(int) fd; 3526 syscallarg(const char *) path; 3527 syscallarg(uid_t) owner; 3528 syscallarg(gid_t) group; 3529 syscallarg(int) flag; 3530 } */ 3531 3532 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3533 SCARG(uap, owner), SCARG(uap, group), 3534 SCARG(uap, flag)); 3535 } 3536 3537 /* 3538 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3539 */ 3540 /* ARGSUSED */ 3541 int 3542 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3543 { 3544 /* { 3545 syscallarg(int) fd; 3546 syscallarg(uid_t) uid; 3547 syscallarg(gid_t) gid; 3548 } */ 3549 int error; 3550 file_t *fp; 3551 3552 /* fd_getvnode() will use the descriptor for us */ 3553 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3554 return (error); 3555 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3556 l, 1); 3557 fd_putfile(SCARG(uap, fd)); 3558 return (error); 3559 } 3560 3561 /* 3562 * Set ownership given a path name; this version does not follow links. 3563 */ 3564 /* ARGSUSED */ 3565 int 3566 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3567 { 3568 /* { 3569 syscallarg(const char *) path; 3570 syscallarg(uid_t) uid; 3571 syscallarg(gid_t) gid; 3572 } */ 3573 int error; 3574 struct vnode *vp; 3575 3576 error = namei_simple_user(SCARG(uap, path), 3577 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3578 if (error != 0) 3579 return (error); 3580 3581 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3582 3583 vrele(vp); 3584 return (error); 3585 } 3586 3587 /* 3588 * Set ownership given a path name; this version does not follow links. 3589 * Provides POSIX/XPG semantics. 3590 */ 3591 /* ARGSUSED */ 3592 int 3593 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3594 { 3595 /* { 3596 syscallarg(const char *) path; 3597 syscallarg(uid_t) uid; 3598 syscallarg(gid_t) gid; 3599 } */ 3600 int error; 3601 struct vnode *vp; 3602 3603 error = namei_simple_user(SCARG(uap, path), 3604 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3605 if (error != 0) 3606 return (error); 3607 3608 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3609 3610 vrele(vp); 3611 return (error); 3612 } 3613 3614 /* 3615 * Common routine to set ownership given a vnode. 3616 */ 3617 static int 3618 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3619 int posix_semantics) 3620 { 3621 struct vattr vattr; 3622 mode_t newmode; 3623 int error; 3624 3625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3626 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3627 goto out; 3628 3629 #define CHANGED(x) ((int)(x) != -1) 3630 newmode = vattr.va_mode; 3631 if (posix_semantics) { 3632 /* 3633 * POSIX/XPG semantics: if the caller is not the super-user, 3634 * clear set-user-id and set-group-id bits. Both POSIX and 3635 * the XPG consider the behaviour for calls by the super-user 3636 * implementation-defined; we leave the set-user-id and set- 3637 * group-id settings intact in that case. 3638 */ 3639 if (vattr.va_mode & S_ISUID) { 3640 if (kauth_authorize_vnode(l->l_cred, 3641 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3642 newmode &= ~S_ISUID; 3643 } 3644 if (vattr.va_mode & S_ISGID) { 3645 if (kauth_authorize_vnode(l->l_cred, 3646 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3647 newmode &= ~S_ISGID; 3648 } 3649 } else { 3650 /* 3651 * NetBSD semantics: when changing owner and/or group, 3652 * clear the respective bit(s). 3653 */ 3654 if (CHANGED(uid)) 3655 newmode &= ~S_ISUID; 3656 if (CHANGED(gid)) 3657 newmode &= ~S_ISGID; 3658 } 3659 /* Update va_mode iff altered. */ 3660 if (vattr.va_mode == newmode) 3661 newmode = VNOVAL; 3662 3663 vattr_null(&vattr); 3664 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3665 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3666 vattr.va_mode = newmode; 3667 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3668 #undef CHANGED 3669 3670 out: 3671 VOP_UNLOCK(vp); 3672 return (error); 3673 } 3674 3675 /* 3676 * Set the access and modification times given a path name; this 3677 * version follows links. 3678 */ 3679 /* ARGSUSED */ 3680 int 3681 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3682 register_t *retval) 3683 { 3684 /* { 3685 syscallarg(const char *) path; 3686 syscallarg(const struct timeval *) tptr; 3687 } */ 3688 3689 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3690 SCARG(uap, tptr), UIO_USERSPACE); 3691 } 3692 3693 /* 3694 * Set the access and modification times given a file descriptor. 3695 */ 3696 /* ARGSUSED */ 3697 int 3698 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3699 register_t *retval) 3700 { 3701 /* { 3702 syscallarg(int) fd; 3703 syscallarg(const struct timeval *) tptr; 3704 } */ 3705 int error; 3706 file_t *fp; 3707 3708 /* fd_getvnode() will use the descriptor for us */ 3709 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3710 return (error); 3711 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3712 UIO_USERSPACE); 3713 fd_putfile(SCARG(uap, fd)); 3714 return (error); 3715 } 3716 3717 int 3718 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3719 register_t *retval) 3720 { 3721 /* { 3722 syscallarg(int) fd; 3723 syscallarg(const struct timespec *) tptr; 3724 } */ 3725 int error; 3726 file_t *fp; 3727 3728 /* fd_getvnode() will use the descriptor for us */ 3729 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3730 return (error); 3731 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3732 SCARG(uap, tptr), UIO_USERSPACE); 3733 fd_putfile(SCARG(uap, fd)); 3734 return (error); 3735 } 3736 3737 /* 3738 * Set the access and modification times given a path name; this 3739 * version does not follow links. 3740 */ 3741 int 3742 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3743 register_t *retval) 3744 { 3745 /* { 3746 syscallarg(const char *) path; 3747 syscallarg(const struct timeval *) tptr; 3748 } */ 3749 3750 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3751 SCARG(uap, tptr), UIO_USERSPACE); 3752 } 3753 3754 int 3755 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3756 register_t *retval) 3757 { 3758 /* { 3759 syscallarg(int) fd; 3760 syscallarg(const char *) path; 3761 syscallarg(const struct timespec *) tptr; 3762 syscallarg(int) flag; 3763 } */ 3764 int follow; 3765 const struct timespec *tptr; 3766 int error; 3767 3768 tptr = SCARG(uap, tptr); 3769 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3770 3771 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3772 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3773 3774 return error; 3775 } 3776 3777 /* 3778 * Common routine to set access and modification times given a vnode. 3779 */ 3780 int 3781 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3782 const struct timespec *tptr, enum uio_seg seg) 3783 { 3784 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3785 } 3786 3787 int 3788 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3789 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3790 { 3791 struct vattr vattr; 3792 int error, dorele = 0; 3793 namei_simple_flags_t sflags; 3794 bool vanull, setbirthtime; 3795 struct timespec ts[2]; 3796 3797 KASSERT(l != NULL || fdat == AT_FDCWD); 3798 3799 /* 3800 * I have checked all callers and they pass either FOLLOW, 3801 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3802 * is 0. More to the point, they don't pass anything else. 3803 * Let's keep it that way at least until the namei interfaces 3804 * are fully sanitized. 3805 */ 3806 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3807 sflags = (flag == FOLLOW) ? 3808 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3809 3810 if (tptr == NULL) { 3811 vanull = true; 3812 nanotime(&ts[0]); 3813 ts[1] = ts[0]; 3814 } else { 3815 vanull = false; 3816 if (seg != UIO_SYSSPACE) { 3817 error = copyin(tptr, ts, sizeof (ts)); 3818 if (error != 0) 3819 return error; 3820 } else { 3821 ts[0] = tptr[0]; 3822 ts[1] = tptr[1]; 3823 } 3824 } 3825 3826 if (ts[0].tv_nsec == UTIME_NOW) { 3827 nanotime(&ts[0]); 3828 if (ts[1].tv_nsec == UTIME_NOW) { 3829 vanull = true; 3830 ts[1] = ts[0]; 3831 } 3832 } else if (ts[1].tv_nsec == UTIME_NOW) 3833 nanotime(&ts[1]); 3834 3835 if (vp == NULL) { 3836 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3837 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3838 if (error != 0) 3839 return error; 3840 dorele = 1; 3841 } 3842 3843 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3844 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3845 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3846 vattr_null(&vattr); 3847 3848 if (ts[0].tv_nsec != UTIME_OMIT) 3849 vattr.va_atime = ts[0]; 3850 3851 if (ts[1].tv_nsec != UTIME_OMIT) { 3852 vattr.va_mtime = ts[1]; 3853 if (setbirthtime) 3854 vattr.va_birthtime = ts[1]; 3855 } 3856 3857 if (vanull) 3858 vattr.va_vaflags |= VA_UTIMES_NULL; 3859 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3860 VOP_UNLOCK(vp); 3861 3862 if (dorele != 0) 3863 vrele(vp); 3864 3865 return error; 3866 } 3867 3868 int 3869 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3870 const struct timeval *tptr, enum uio_seg seg) 3871 { 3872 struct timespec ts[2]; 3873 struct timespec *tsptr = NULL; 3874 int error; 3875 3876 if (tptr != NULL) { 3877 struct timeval tv[2]; 3878 3879 if (seg != UIO_SYSSPACE) { 3880 error = copyin(tptr, tv, sizeof (tv)); 3881 if (error != 0) 3882 return error; 3883 tptr = tv; 3884 } 3885 3886 if ((tv[0].tv_usec == UTIME_NOW) || 3887 (tv[0].tv_usec == UTIME_OMIT)) 3888 ts[0].tv_nsec = tv[0].tv_usec; 3889 else 3890 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3891 3892 if ((tv[1].tv_usec == UTIME_NOW) || 3893 (tv[1].tv_usec == UTIME_OMIT)) 3894 ts[1].tv_nsec = tv[1].tv_usec; 3895 else 3896 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3897 3898 tsptr = &ts[0]; 3899 } 3900 3901 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3902 } 3903 3904 /* 3905 * Truncate a file given its path name. 3906 */ 3907 /* ARGSUSED */ 3908 int 3909 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3910 { 3911 /* { 3912 syscallarg(const char *) path; 3913 syscallarg(int) pad; 3914 syscallarg(off_t) length; 3915 } */ 3916 struct vnode *vp; 3917 struct vattr vattr; 3918 int error; 3919 3920 if (SCARG(uap, length) < 0) 3921 return EINVAL; 3922 3923 error = namei_simple_user(SCARG(uap, path), 3924 NSM_FOLLOW_TRYEMULROOT, &vp); 3925 if (error != 0) 3926 return (error); 3927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3928 if (vp->v_type == VDIR) 3929 error = EISDIR; 3930 else if ((error = vn_writechk(vp)) == 0 && 3931 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3932 vattr_null(&vattr); 3933 vattr.va_size = SCARG(uap, length); 3934 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3935 } 3936 vput(vp); 3937 return (error); 3938 } 3939 3940 /* 3941 * Truncate a file given a file descriptor. 3942 */ 3943 /* ARGSUSED */ 3944 int 3945 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3946 { 3947 /* { 3948 syscallarg(int) fd; 3949 syscallarg(int) pad; 3950 syscallarg(off_t) length; 3951 } */ 3952 struct vattr vattr; 3953 struct vnode *vp; 3954 file_t *fp; 3955 int error; 3956 3957 if (SCARG(uap, length) < 0) 3958 return EINVAL; 3959 3960 /* fd_getvnode() will use the descriptor for us */ 3961 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3962 return (error); 3963 if ((fp->f_flag & FWRITE) == 0) { 3964 error = EINVAL; 3965 goto out; 3966 } 3967 vp = fp->f_vnode; 3968 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3969 if (vp->v_type == VDIR) 3970 error = EISDIR; 3971 else if ((error = vn_writechk(vp)) == 0) { 3972 vattr_null(&vattr); 3973 vattr.va_size = SCARG(uap, length); 3974 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3975 } 3976 VOP_UNLOCK(vp); 3977 out: 3978 fd_putfile(SCARG(uap, fd)); 3979 return (error); 3980 } 3981 3982 /* 3983 * Sync an open file. 3984 */ 3985 /* ARGSUSED */ 3986 int 3987 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3988 { 3989 /* { 3990 syscallarg(int) fd; 3991 } */ 3992 struct vnode *vp; 3993 file_t *fp; 3994 int error; 3995 3996 /* fd_getvnode() will use the descriptor for us */ 3997 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3998 return (error); 3999 vp = fp->f_vnode; 4000 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4001 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4002 VOP_UNLOCK(vp); 4003 fd_putfile(SCARG(uap, fd)); 4004 return (error); 4005 } 4006 4007 /* 4008 * Sync a range of file data. API modeled after that found in AIX. 4009 * 4010 * FDATASYNC indicates that we need only save enough metadata to be able 4011 * to re-read the written data. Note we duplicate AIX's requirement that 4012 * the file be open for writing. 4013 */ 4014 /* ARGSUSED */ 4015 int 4016 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4017 { 4018 /* { 4019 syscallarg(int) fd; 4020 syscallarg(int) flags; 4021 syscallarg(off_t) start; 4022 syscallarg(off_t) length; 4023 } */ 4024 struct vnode *vp; 4025 file_t *fp; 4026 int flags, nflags; 4027 off_t s, e, len; 4028 int error; 4029 4030 /* fd_getvnode() will use the descriptor for us */ 4031 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4032 return (error); 4033 4034 if ((fp->f_flag & FWRITE) == 0) { 4035 error = EBADF; 4036 goto out; 4037 } 4038 4039 flags = SCARG(uap, flags); 4040 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4041 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4042 error = EINVAL; 4043 goto out; 4044 } 4045 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4046 if (flags & FDATASYNC) 4047 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4048 else 4049 nflags = FSYNC_WAIT; 4050 if (flags & FDISKSYNC) 4051 nflags |= FSYNC_CACHE; 4052 4053 len = SCARG(uap, length); 4054 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4055 if (len) { 4056 s = SCARG(uap, start); 4057 e = s + len; 4058 if (e < s) { 4059 error = EINVAL; 4060 goto out; 4061 } 4062 } else { 4063 e = 0; 4064 s = 0; 4065 } 4066 4067 vp = fp->f_vnode; 4068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4069 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4070 VOP_UNLOCK(vp); 4071 out: 4072 fd_putfile(SCARG(uap, fd)); 4073 return (error); 4074 } 4075 4076 /* 4077 * Sync the data of an open file. 4078 */ 4079 /* ARGSUSED */ 4080 int 4081 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4082 { 4083 /* { 4084 syscallarg(int) fd; 4085 } */ 4086 struct vnode *vp; 4087 file_t *fp; 4088 int error; 4089 4090 /* fd_getvnode() will use the descriptor for us */ 4091 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4092 return (error); 4093 if ((fp->f_flag & FWRITE) == 0) { 4094 fd_putfile(SCARG(uap, fd)); 4095 return (EBADF); 4096 } 4097 vp = fp->f_vnode; 4098 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4099 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4100 VOP_UNLOCK(vp); 4101 fd_putfile(SCARG(uap, fd)); 4102 return (error); 4103 } 4104 4105 /* 4106 * Rename files, (standard) BSD semantics frontend. 4107 */ 4108 /* ARGSUSED */ 4109 int 4110 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4111 { 4112 /* { 4113 syscallarg(const char *) from; 4114 syscallarg(const char *) to; 4115 } */ 4116 4117 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4118 SCARG(uap, to), UIO_USERSPACE, 0)); 4119 } 4120 4121 int 4122 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4123 register_t *retval) 4124 { 4125 /* { 4126 syscallarg(int) fromfd; 4127 syscallarg(const char *) from; 4128 syscallarg(int) tofd; 4129 syscallarg(const char *) to; 4130 } */ 4131 4132 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4133 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4134 } 4135 4136 /* 4137 * Rename files, POSIX semantics frontend. 4138 */ 4139 /* ARGSUSED */ 4140 int 4141 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4142 { 4143 /* { 4144 syscallarg(const char *) from; 4145 syscallarg(const char *) to; 4146 } */ 4147 4148 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4149 SCARG(uap, to), UIO_USERSPACE, 1)); 4150 } 4151 4152 /* 4153 * Rename files. Source and destination must either both be directories, 4154 * or both not be directories. If target is a directory, it must be empty. 4155 * If `from' and `to' refer to the same object, the value of the `retain' 4156 * argument is used to determine whether `from' will be 4157 * 4158 * (retain == 0) deleted unless `from' and `to' refer to the same 4159 * object in the file system's name space (BSD). 4160 * (retain == 1) always retained (POSIX). 4161 * 4162 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4163 */ 4164 int 4165 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4166 { 4167 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4168 } 4169 4170 static int 4171 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4172 const char *to, enum uio_seg seg, int retain) 4173 { 4174 struct pathbuf *fpb, *tpb; 4175 struct nameidata fnd, tnd; 4176 struct vnode *fdvp, *fvp; 4177 struct vnode *tdvp, *tvp; 4178 struct mount *mp, *tmp; 4179 int error; 4180 4181 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4182 4183 error = pathbuf_maybe_copyin(from, seg, &fpb); 4184 if (error) 4185 goto out0; 4186 KASSERT(fpb != NULL); 4187 4188 error = pathbuf_maybe_copyin(to, seg, &tpb); 4189 if (error) 4190 goto out1; 4191 KASSERT(tpb != NULL); 4192 4193 /* 4194 * Lookup from. 4195 * 4196 * XXX LOCKPARENT is wrong because we don't actually want it 4197 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4198 * insane, so for the time being we need to leave it like this. 4199 */ 4200 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4201 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4202 goto out2; 4203 4204 /* 4205 * Pull out the important results of the lookup, fdvp and fvp. 4206 * Of course, fvp is bogus because we're about to unlock fdvp. 4207 */ 4208 fdvp = fnd.ni_dvp; 4209 fvp = fnd.ni_vp; 4210 KASSERT(fdvp != NULL); 4211 KASSERT(fvp != NULL); 4212 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4213 4214 /* 4215 * Make sure neither fdvp nor fvp is locked. 4216 */ 4217 if (fdvp != fvp) 4218 VOP_UNLOCK(fdvp); 4219 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4220 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4221 4222 /* 4223 * Reject renaming `.' and `..'. Can't do this until after 4224 * namei because we need namei's parsing to find the final 4225 * component name. (namei should just leave us with the final 4226 * component name and not look it up itself, but anyway...) 4227 * 4228 * This was here before because we used to relookup from 4229 * instead of to and relookup requires the caller to check 4230 * this, but now file systems may depend on this check, so we 4231 * must retain it until the file systems are all rototilled. 4232 */ 4233 if (((fnd.ni_cnd.cn_namelen == 1) && 4234 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4235 ((fnd.ni_cnd.cn_namelen == 2) && 4236 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4237 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4238 error = EINVAL; /* XXX EISDIR? */ 4239 goto abort0; 4240 } 4241 4242 /* 4243 * Lookup to. 4244 * 4245 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4246 * fvp here to decide whether to add CREATEDIR is a load of 4247 * bollocks because fvp might be the wrong node by now, since 4248 * fdvp is unlocked. 4249 * 4250 * XXX Why not pass CREATEDIR always? 4251 */ 4252 NDINIT(&tnd, RENAME, 4253 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4254 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4255 tpb); 4256 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4257 goto abort0; 4258 4259 /* 4260 * Pull out the important results of the lookup, tdvp and tvp. 4261 * Of course, tvp is bogus because we're about to unlock tdvp. 4262 */ 4263 tdvp = tnd.ni_dvp; 4264 tvp = tnd.ni_vp; 4265 KASSERT(tdvp != NULL); 4266 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4267 4268 /* 4269 * Make sure neither tdvp nor tvp is locked. 4270 */ 4271 if (tdvp != tvp) 4272 VOP_UNLOCK(tdvp); 4273 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4274 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4275 4276 /* 4277 * Reject renaming onto `.' or `..'. relookup is unhappy with 4278 * these, which is why we must do this here. Once upon a time 4279 * we relooked up from instead of to, and consequently didn't 4280 * need this check, but now that we relookup to instead of 4281 * from, we need this; and we shall need it forever forward 4282 * until the VOP_RENAME protocol changes, because file systems 4283 * will no doubt begin to depend on this check. 4284 */ 4285 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4286 error = EISDIR; 4287 goto abort1; 4288 } 4289 if ((tnd.ni_cnd.cn_namelen == 2) && 4290 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4291 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4292 error = EINVAL; 4293 goto abort1; 4294 } 4295 4296 /* 4297 * Get the mount point. If the file system has been unmounted, 4298 * which it may be because we're not holding any vnode locks, 4299 * then v_mount will be NULL. We're not really supposed to 4300 * read v_mount without holding the vnode lock, but since we 4301 * have fdvp referenced, if fdvp->v_mount changes then at worst 4302 * it will be set to NULL, not changed to another mount point. 4303 * And, of course, since it is up to the file system to 4304 * determine the real lock order, we can't lock both fdvp and 4305 * tdvp at the same time. 4306 */ 4307 mp = fdvp->v_mount; 4308 if (mp == NULL) { 4309 error = ENOENT; 4310 goto abort1; 4311 } 4312 4313 /* 4314 * Make sure the mount points match. Again, although we don't 4315 * hold any vnode locks, the v_mount fields may change -- but 4316 * at worst they will change to NULL, so this will never become 4317 * a cross-device rename, because we hold vnode references. 4318 * 4319 * XXX Because nothing is locked and the compiler may reorder 4320 * things here, unmounting the file system at an inopportune 4321 * moment may cause rename to fail with ENXDEV when it really 4322 * should fail with ENOENT. 4323 */ 4324 tmp = tdvp->v_mount; 4325 if (tmp == NULL) { 4326 error = ENOENT; 4327 goto abort1; 4328 } 4329 4330 if (mp != tmp) { 4331 error = EXDEV; 4332 goto abort1; 4333 } 4334 4335 /* 4336 * Take the vfs rename lock to avoid cross-directory screw cases. 4337 * Nothing is locked currently, so taking this lock is safe. 4338 */ 4339 error = VFS_RENAMELOCK_ENTER(mp); 4340 if (error) 4341 goto abort1; 4342 4343 /* 4344 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4345 * and nothing is locked except for the vfs rename lock. 4346 * 4347 * The next step is a little rain dance to conform to the 4348 * insane lock protocol, even though it does nothing to ward 4349 * off race conditions. 4350 * 4351 * We need tdvp and tvp to be locked. However, because we have 4352 * unlocked tdvp in order to hold no locks while we take the 4353 * vfs rename lock, tvp may be wrong here, and we can't safely 4354 * lock it even if the sensible file systems will just unlock 4355 * it straight away. Consequently, we must lock tdvp and then 4356 * relookup tvp to get it locked. 4357 * 4358 * Finally, because the VOP_RENAME protocol is brain-damaged 4359 * and various file systems insanely depend on the semantics of 4360 * this brain damage, the lookup of to must be the last lookup 4361 * before VOP_RENAME. 4362 */ 4363 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4364 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4365 if (error) 4366 goto abort2; 4367 4368 /* 4369 * Drop the old tvp and pick up the new one -- which might be 4370 * the same, but that doesn't matter to us. After this, tdvp 4371 * and tvp should both be locked. 4372 */ 4373 if (tvp != NULL) 4374 vrele(tvp); 4375 tvp = tnd.ni_vp; 4376 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4377 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4378 4379 /* 4380 * The old do_sys_rename had various consistency checks here 4381 * involving fvp and tvp. fvp is bogus already here, and tvp 4382 * will become bogus soon in any sensible file system, so the 4383 * only purpose in putting these checks here is to give lip 4384 * service to these screw cases and to acknowledge that they 4385 * exist, not actually to handle them, but here you go 4386 * anyway... 4387 */ 4388 4389 /* 4390 * Acknowledge that directories and non-directories aren't 4391 * suposed to mix. 4392 */ 4393 if (tvp != NULL) { 4394 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4395 error = ENOTDIR; 4396 goto abort3; 4397 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4398 error = EISDIR; 4399 goto abort3; 4400 } 4401 } 4402 4403 /* 4404 * Acknowledge some random screw case, among the dozens that 4405 * might arise. 4406 */ 4407 if (fvp == tdvp) { 4408 error = EINVAL; 4409 goto abort3; 4410 } 4411 4412 /* 4413 * Acknowledge that POSIX has a wacky screw case. 4414 * 4415 * XXX Eventually the retain flag needs to be passed on to 4416 * VOP_RENAME. 4417 */ 4418 if (fvp == tvp) { 4419 if (retain) { 4420 error = 0; 4421 goto abort3; 4422 } else if ((fdvp == tdvp) && 4423 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4424 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4425 fnd.ni_cnd.cn_namelen))) { 4426 error = 0; 4427 goto abort3; 4428 } 4429 } 4430 4431 /* 4432 * Make sure veriexec can screw us up. (But a race can screw 4433 * up veriexec, of course -- remember, fvp and (soon) tvp are 4434 * bogus.) 4435 */ 4436 #if NVERIEXEC > 0 4437 { 4438 char *f1, *f2; 4439 size_t f1_len; 4440 size_t f2_len; 4441 4442 f1_len = fnd.ni_cnd.cn_namelen + 1; 4443 f1 = kmem_alloc(f1_len, KM_SLEEP); 4444 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4445 4446 f2_len = tnd.ni_cnd.cn_namelen + 1; 4447 f2 = kmem_alloc(f2_len, KM_SLEEP); 4448 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4449 4450 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4451 4452 kmem_free(f1, f1_len); 4453 kmem_free(f2, f2_len); 4454 4455 if (error) 4456 goto abort3; 4457 } 4458 #endif /* NVERIEXEC > 0 */ 4459 4460 /* 4461 * All ready. Incant the rename vop. 4462 */ 4463 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4464 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4465 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4466 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4467 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4468 4469 /* 4470 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4471 * tdvp and tvp. But we can't assert any of that. 4472 */ 4473 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4474 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4475 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4476 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4477 4478 /* 4479 * So all we have left to do is to drop the rename lock and 4480 * destroy the pathbufs. 4481 */ 4482 VFS_RENAMELOCK_EXIT(mp); 4483 goto out2; 4484 4485 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4486 VOP_UNLOCK(tvp); 4487 abort2: VOP_UNLOCK(tdvp); 4488 VFS_RENAMELOCK_EXIT(mp); 4489 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4490 vrele(tdvp); 4491 if (tvp != NULL) 4492 vrele(tvp); 4493 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4494 vrele(fdvp); 4495 vrele(fvp); 4496 out2: pathbuf_destroy(tpb); 4497 out1: pathbuf_destroy(fpb); 4498 out0: return error; 4499 } 4500 4501 /* 4502 * Make a directory file. 4503 */ 4504 /* ARGSUSED */ 4505 int 4506 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4507 { 4508 /* { 4509 syscallarg(const char *) path; 4510 syscallarg(int) mode; 4511 } */ 4512 4513 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4514 SCARG(uap, mode), UIO_USERSPACE); 4515 } 4516 4517 int 4518 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4519 register_t *retval) 4520 { 4521 /* { 4522 syscallarg(int) fd; 4523 syscallarg(const char *) path; 4524 syscallarg(int) mode; 4525 } */ 4526 4527 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4528 SCARG(uap, mode), UIO_USERSPACE); 4529 } 4530 4531 4532 int 4533 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4534 { 4535 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4536 } 4537 4538 static int 4539 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4540 enum uio_seg seg) 4541 { 4542 struct proc *p = curlwp->l_proc; 4543 struct vnode *vp; 4544 struct vattr vattr; 4545 int error; 4546 struct pathbuf *pb; 4547 struct nameidata nd; 4548 4549 KASSERT(l != NULL || fdat == AT_FDCWD); 4550 4551 /* XXX bollocks, should pass in a pathbuf */ 4552 error = pathbuf_maybe_copyin(path, seg, &pb); 4553 if (error) { 4554 return error; 4555 } 4556 4557 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4558 4559 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4560 pathbuf_destroy(pb); 4561 return (error); 4562 } 4563 vp = nd.ni_vp; 4564 if (vp != NULL) { 4565 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4566 if (nd.ni_dvp == vp) 4567 vrele(nd.ni_dvp); 4568 else 4569 vput(nd.ni_dvp); 4570 vrele(vp); 4571 pathbuf_destroy(pb); 4572 return (EEXIST); 4573 } 4574 vattr_null(&vattr); 4575 vattr.va_type = VDIR; 4576 /* We will read cwdi->cwdi_cmask unlocked. */ 4577 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4578 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4579 if (!error) 4580 vrele(nd.ni_vp); 4581 vput(nd.ni_dvp); 4582 pathbuf_destroy(pb); 4583 return (error); 4584 } 4585 4586 /* 4587 * Remove a directory file. 4588 */ 4589 /* ARGSUSED */ 4590 int 4591 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4592 { 4593 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4594 AT_REMOVEDIR, UIO_USERSPACE); 4595 } 4596 4597 /* 4598 * Read a block of directory entries in a file system independent format. 4599 */ 4600 int 4601 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4602 { 4603 /* { 4604 syscallarg(int) fd; 4605 syscallarg(char *) buf; 4606 syscallarg(size_t) count; 4607 } */ 4608 file_t *fp; 4609 int error, done; 4610 4611 /* fd_getvnode() will use the descriptor for us */ 4612 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4613 return (error); 4614 if ((fp->f_flag & FREAD) == 0) { 4615 error = EBADF; 4616 goto out; 4617 } 4618 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4619 SCARG(uap, count), &done, l, 0, 0); 4620 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4621 *retval = done; 4622 out: 4623 fd_putfile(SCARG(uap, fd)); 4624 return (error); 4625 } 4626 4627 /* 4628 * Set the mode mask for creation of filesystem nodes. 4629 */ 4630 int 4631 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4632 { 4633 /* { 4634 syscallarg(mode_t) newmask; 4635 } */ 4636 struct proc *p = l->l_proc; 4637 struct cwdinfo *cwdi; 4638 4639 /* 4640 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4641 * important is that we serialize changes to the mask. The 4642 * rw_exit() will issue a write memory barrier on our behalf, 4643 * and force the changes out to other CPUs (as it must use an 4644 * atomic operation, draining the local CPU's store buffers). 4645 */ 4646 cwdi = p->p_cwdi; 4647 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4648 *retval = cwdi->cwdi_cmask; 4649 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4650 rw_exit(&cwdi->cwdi_lock); 4651 4652 return (0); 4653 } 4654 4655 int 4656 dorevoke(struct vnode *vp, kauth_cred_t cred) 4657 { 4658 struct vattr vattr; 4659 int error, fs_decision; 4660 4661 vn_lock(vp, LK_SHARED | LK_RETRY); 4662 error = VOP_GETATTR(vp, &vattr, cred); 4663 VOP_UNLOCK(vp); 4664 if (error != 0) 4665 return error; 4666 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4667 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4668 fs_decision); 4669 if (!error) 4670 VOP_REVOKE(vp, REVOKEALL); 4671 return (error); 4672 } 4673 4674 /* 4675 * Void all references to file by ripping underlying filesystem 4676 * away from vnode. 4677 */ 4678 /* ARGSUSED */ 4679 int 4680 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4681 { 4682 /* { 4683 syscallarg(const char *) path; 4684 } */ 4685 struct vnode *vp; 4686 int error; 4687 4688 error = namei_simple_user(SCARG(uap, path), 4689 NSM_FOLLOW_TRYEMULROOT, &vp); 4690 if (error != 0) 4691 return (error); 4692 error = dorevoke(vp, l->l_cred); 4693 vrele(vp); 4694 return (error); 4695 } 4696 4697 /* 4698 * Allocate backing store for a file, filling a hole without having to 4699 * explicitly write anything out. 4700 */ 4701 /* ARGSUSED */ 4702 int 4703 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4704 register_t *retval) 4705 { 4706 /* { 4707 syscallarg(int) fd; 4708 syscallarg(off_t) pos; 4709 syscallarg(off_t) len; 4710 } */ 4711 int fd; 4712 off_t pos, len; 4713 struct file *fp; 4714 struct vnode *vp; 4715 int error; 4716 4717 fd = SCARG(uap, fd); 4718 pos = SCARG(uap, pos); 4719 len = SCARG(uap, len); 4720 4721 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4722 *retval = EINVAL; 4723 return 0; 4724 } 4725 4726 error = fd_getvnode(fd, &fp); 4727 if (error) { 4728 *retval = error; 4729 return 0; 4730 } 4731 if ((fp->f_flag & FWRITE) == 0) { 4732 error = EBADF; 4733 goto fail; 4734 } 4735 vp = fp->f_vnode; 4736 4737 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4738 if (vp->v_type == VDIR) { 4739 error = EISDIR; 4740 } else { 4741 error = VOP_FALLOCATE(vp, pos, len); 4742 } 4743 VOP_UNLOCK(vp); 4744 4745 fail: 4746 fd_putfile(fd); 4747 *retval = error; 4748 return 0; 4749 } 4750 4751 /* 4752 * Deallocate backing store for a file, creating a hole. Also used for 4753 * invoking TRIM on disks. 4754 */ 4755 /* ARGSUSED */ 4756 int 4757 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4758 register_t *retval) 4759 { 4760 /* { 4761 syscallarg(int) fd; 4762 syscallarg(off_t) pos; 4763 syscallarg(off_t) len; 4764 } */ 4765 int fd; 4766 off_t pos, len; 4767 struct file *fp; 4768 struct vnode *vp; 4769 int error; 4770 4771 fd = SCARG(uap, fd); 4772 pos = SCARG(uap, pos); 4773 len = SCARG(uap, len); 4774 4775 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4776 return EINVAL; 4777 } 4778 4779 error = fd_getvnode(fd, &fp); 4780 if (error) { 4781 return error; 4782 } 4783 if ((fp->f_flag & FWRITE) == 0) { 4784 error = EBADF; 4785 goto fail; 4786 } 4787 vp = fp->f_vnode; 4788 4789 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4790 if (vp->v_type == VDIR) { 4791 error = EISDIR; 4792 } else { 4793 error = VOP_FDISCARD(vp, pos, len); 4794 } 4795 VOP_UNLOCK(vp); 4796 4797 fail: 4798 fd_putfile(fd); 4799 return error; 4800 } 4801