1 /* $NetBSD: vfs_syscalls.c,v 1.542 2020/02/23 22:14:04 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.542 2020/02/23 22:14:04 ad Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 static int 169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 170 { 171 file_t *dfp; 172 int error; 173 174 if (fdat != AT_FDCWD) { 175 if ((error = fd_getvnode(fdat, &dfp)) != 0) 176 goto out; 177 178 NDAT(ndp, dfp->f_vnode); 179 } 180 181 error = namei(ndp); 182 183 if (fdat != AT_FDCWD) 184 fd_putfile(fdat); 185 out: 186 return error; 187 } 188 189 static int 190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 191 namei_simple_flags_t sflags, struct vnode **vp_ret) 192 { 193 file_t *dfp; 194 struct vnode *dvp; 195 int error; 196 197 if (fdat != AT_FDCWD) { 198 if ((error = fd_getvnode(fdat, &dfp)) != 0) 199 goto out; 200 201 dvp = dfp->f_vnode; 202 } else { 203 dvp = NULL; 204 } 205 206 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 207 208 if (fdat != AT_FDCWD) 209 fd_putfile(fdat); 210 out: 211 return error; 212 } 213 214 static int 215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 216 { 217 int error; 218 219 fp->f_flag = flags & FMASK; 220 fp->f_type = DTYPE_VNODE; 221 fp->f_ops = &vnops; 222 fp->f_vnode = vp; 223 224 if (flags & (O_EXLOCK | O_SHLOCK)) { 225 struct flock lf; 226 int type; 227 228 lf.l_whence = SEEK_SET; 229 lf.l_start = 0; 230 lf.l_len = 0; 231 if (flags & O_EXLOCK) 232 lf.l_type = F_WRLCK; 233 else 234 lf.l_type = F_RDLCK; 235 type = F_FLOCK; 236 if ((flags & FNONBLOCK) == 0) 237 type |= F_WAIT; 238 VOP_UNLOCK(vp); 239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 240 if (error) { 241 (void) vn_close(vp, fp->f_flag, fp->f_cred); 242 fd_abort(l->l_proc, fp, indx); 243 return error; 244 } 245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 246 atomic_or_uint(&fp->f_flag, FHASLOCK); 247 } 248 if (flags & O_CLOEXEC) 249 fd_set_exclose(l, indx, true); 250 return 0; 251 } 252 253 static int 254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 255 void *data, size_t *data_len) 256 { 257 struct mount *mp; 258 int error = 0, saved_flags; 259 260 mp = vp->v_mount; 261 saved_flags = mp->mnt_flag; 262 263 /* We can operate only on VV_ROOT nodes. */ 264 if ((vp->v_vflag & VV_ROOT) == 0) { 265 error = EINVAL; 266 goto out; 267 } 268 269 /* 270 * We only allow the filesystem to be reloaded if it 271 * is currently mounted read-only. Additionally, we 272 * prevent read-write to read-only downgrades. 273 */ 274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 275 (mp->mnt_flag & MNT_RDONLY) == 0 && 276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 277 error = EOPNOTSUPP; /* Needs translation */ 278 goto out; 279 } 280 281 /* 282 * Enabling MNT_UNION requires a covered mountpoint and 283 * must not happen on the root mount. 284 */ 285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 286 error = EOPNOTSUPP; 287 goto out; 288 } 289 290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 292 if (error) 293 goto out; 294 295 error = vfs_suspend(mp, 0); 296 if (error) 297 goto out; 298 299 mutex_enter(mp->mnt_updating); 300 301 mp->mnt_flag &= ~MNT_OP_FLAGS; 302 mp->mnt_flag |= flags & MNT_OP_FLAGS; 303 304 /* 305 * Set the mount level flags. 306 */ 307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 308 if ((flags & MNT_RDONLY)) 309 mp->mnt_iflag |= IMNT_WANTRDONLY; 310 else 311 mp->mnt_iflag |= IMNT_WANTRDWR; 312 } 313 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 315 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 316 mp->mnt_flag &= ~MNT_RDONLY; 317 318 error = VFS_MOUNT(mp, path, data, data_len); 319 320 if (error && data != NULL) { 321 int error2; 322 323 /* 324 * Update failed; let's try and see if it was an 325 * export request. For compat with 3.0 and earlier. 326 */ 327 error2 = vfs_hooks_reexport(mp, path, data); 328 329 /* 330 * Only update error code if the export request was 331 * understood but some problem occurred while 332 * processing it. 333 */ 334 if (error2 != EJUSTRETURN) 335 error = error2; 336 } 337 338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 339 mp->mnt_flag |= MNT_RDONLY; 340 if (error) 341 mp->mnt_flag = saved_flags; 342 mp->mnt_flag &= ~MNT_OP_FLAGS; 343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 346 vfs_syncer_add_to_worklist(mp); 347 } else { 348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 349 vfs_syncer_remove_from_worklist(mp); 350 } 351 mutex_exit(mp->mnt_updating); 352 vfs_resume(mp); 353 354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 355 (flags & MNT_EXTATTR)) { 356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 357 NULL, 0, NULL) != 0) { 358 printf("%s: failed to start extattr, error = %d", 359 mp->mnt_stat.f_mntonname, error); 360 mp->mnt_flag &= ~MNT_EXTATTR; 361 } 362 } 363 364 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 365 !(flags & MNT_EXTATTR)) { 366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 367 NULL, 0, NULL) != 0) { 368 printf("%s: failed to stop extattr, error = %d", 369 mp->mnt_stat.f_mntonname, error); 370 mp->mnt_flag |= MNT_RDONLY; 371 } 372 } 373 out: 374 return (error); 375 } 376 377 static int 378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 379 struct vfsops **vfsops) 380 { 381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 382 int error; 383 384 if (type_seg == UIO_USERSPACE) { 385 /* Copy file-system type from userspace. */ 386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 387 } else { 388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 389 KASSERT(error == 0); 390 } 391 392 if (error) { 393 /* 394 * Historically, filesystem types were identified by numbers. 395 * If we get an integer for the filesystem type instead of a 396 * string, we check to see if it matches one of the historic 397 * filesystem types. 398 */ 399 u_long fsindex = (u_long)fstype; 400 if (fsindex >= nmountcompatnames || 401 mountcompatnames[fsindex] == NULL) 402 return ENODEV; 403 strlcpy(fstypename, mountcompatnames[fsindex], 404 sizeof(fstypename)); 405 } 406 407 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 408 if (strcmp(fstypename, "ufs") == 0) 409 fstypename[0] = 'f'; 410 411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 412 return 0; 413 414 /* If we can autoload a vfs module, try again */ 415 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 416 417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 418 return 0; 419 420 return ENODEV; 421 } 422 423 static int 424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 425 void *data, size_t *data_len) 426 { 427 struct mount *mp; 428 int error; 429 430 /* If MNT_GETARGS is specified, it should be the only flag. */ 431 if (flags & ~MNT_GETARGS) 432 return EINVAL; 433 434 mp = vp->v_mount; 435 436 /* XXX: probably some notion of "can see" here if we want isolation. */ 437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 439 if (error) 440 return error; 441 442 if ((vp->v_vflag & VV_ROOT) == 0) 443 return EINVAL; 444 445 if (vfs_busy(mp)) 446 return EPERM; 447 448 mutex_enter(mp->mnt_updating); 449 mp->mnt_flag &= ~MNT_OP_FLAGS; 450 mp->mnt_flag |= MNT_GETARGS; 451 error = VFS_MOUNT(mp, path, data, data_len); 452 mp->mnt_flag &= ~MNT_OP_FLAGS; 453 mutex_exit(mp->mnt_updating); 454 455 vfs_unbusy(mp); 456 return (error); 457 } 458 459 int 460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 461 { 462 /* { 463 syscallarg(const char *) type; 464 syscallarg(const char *) path; 465 syscallarg(int) flags; 466 syscallarg(void *) data; 467 syscallarg(size_t) data_len; 468 } */ 469 470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 472 SCARG(uap, data_len), retval); 473 } 474 475 int 476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 477 const char *path, int flags, void *data, enum uio_seg data_seg, 478 size_t data_len, register_t *retval) 479 { 480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 481 struct vnode *vp; 482 void *data_buf = data; 483 bool vfsopsrele = false; 484 size_t alloc_sz = 0; 485 int error; 486 487 /* 488 * Get vnode to be covered 489 */ 490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 491 if (error != 0) { 492 vp = NULL; 493 goto done; 494 } 495 496 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 497 vfsops = vp->v_mount->mnt_op; 498 } else { 499 /* 'type' is userspace */ 500 error = mount_get_vfsops(type, type_seg, &vfsops); 501 if (error != 0) 502 goto done; 503 vfsopsrele = true; 504 } 505 506 /* 507 * We allow data to be NULL, even for userspace. Some fs's don't need 508 * it. The others will handle NULL. 509 */ 510 if (data != NULL && data_seg == UIO_USERSPACE) { 511 if (data_len == 0) { 512 /* No length supplied, use default for filesystem */ 513 data_len = vfsops->vfs_min_mount_data; 514 515 /* 516 * Hopefully a longer buffer won't make copyin() fail. 517 * For compatibility with 3.0 and earlier. 518 */ 519 if (flags & MNT_UPDATE 520 && data_len < sizeof (struct mnt_export_args30)) 521 data_len = sizeof (struct mnt_export_args30); 522 } 523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 524 error = EINVAL; 525 goto done; 526 } 527 alloc_sz = data_len; 528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 529 530 /* NFS needs the buffer even for mnt_getargs .... */ 531 error = copyin(data, data_buf, data_len); 532 if (error != 0) 533 goto done; 534 } 535 536 if (flags & MNT_GETARGS) { 537 if (data_len == 0) { 538 error = EINVAL; 539 goto done; 540 } 541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 542 if (error != 0) 543 goto done; 544 if (data_seg == UIO_USERSPACE) 545 error = copyout(data_buf, data, data_len); 546 *retval = data_len; 547 } else if (flags & MNT_UPDATE) { 548 error = mount_update(l, vp, path, flags, data_buf, &data_len); 549 } else { 550 /* Locking is handled internally in mount_domount(). */ 551 KASSERT(vfsopsrele == true); 552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 553 &data_len); 554 vfsopsrele = false; 555 } 556 if (!error) 557 KNOTE(&fs_klist, VQ_MOUNT); 558 559 done: 560 if (vfsopsrele) 561 vfs_delref(vfsops); 562 if (vp != NULL) { 563 vrele(vp); 564 } 565 if (data_buf != data) 566 kmem_free(data_buf, alloc_sz); 567 return (error); 568 } 569 570 /* 571 * Unmount a file system. 572 * 573 * Note: unmount takes a path to the vnode mounted on as argument, 574 * not special file (as before). 575 */ 576 /* ARGSUSED */ 577 int 578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 579 { 580 /* { 581 syscallarg(const char *) path; 582 syscallarg(int) flags; 583 } */ 584 struct vnode *vp; 585 struct mount *mp; 586 int error; 587 struct pathbuf *pb; 588 struct nameidata nd; 589 590 error = pathbuf_copyin(SCARG(uap, path), &pb); 591 if (error) { 592 return error; 593 } 594 595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 596 if ((error = namei(&nd)) != 0) { 597 pathbuf_destroy(pb); 598 return error; 599 } 600 vp = nd.ni_vp; 601 pathbuf_destroy(pb); 602 603 mp = vp->v_mount; 604 vfs_ref(mp); 605 VOP_UNLOCK(vp); 606 607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 609 if (error) { 610 vrele(vp); 611 vfs_rele(mp); 612 return (error); 613 } 614 615 /* 616 * Don't allow unmounting the root file system. 617 */ 618 if (mp->mnt_flag & MNT_ROOTFS) { 619 vrele(vp); 620 vfs_rele(mp); 621 return (EINVAL); 622 } 623 624 /* 625 * Must be the root of the filesystem 626 */ 627 if ((vp->v_vflag & VV_ROOT) == 0) { 628 vrele(vp); 629 vfs_rele(mp); 630 return (EINVAL); 631 } 632 633 vrele(vp); 634 error = dounmount(mp, SCARG(uap, flags), l); 635 vfs_rele(mp); 636 if (!error) 637 KNOTE(&fs_klist, VQ_UNMOUNT); 638 return error; 639 } 640 641 /* 642 * Sync each mounted filesystem. 643 */ 644 #ifdef DEBUG 645 int syncprt = 0; 646 struct ctldebug debug0 = { "syncprt", &syncprt }; 647 #endif 648 649 void 650 do_sys_sync(struct lwp *l) 651 { 652 mount_iterator_t *iter; 653 struct mount *mp; 654 int asyncflag; 655 656 mountlist_iterator_init(&iter); 657 while ((mp = mountlist_iterator_next(iter)) != NULL) { 658 mutex_enter(mp->mnt_updating); 659 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 660 asyncflag = mp->mnt_flag & MNT_ASYNC; 661 mp->mnt_flag &= ~MNT_ASYNC; 662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 663 if (asyncflag) 664 mp->mnt_flag |= MNT_ASYNC; 665 } 666 mutex_exit(mp->mnt_updating); 667 } 668 mountlist_iterator_destroy(iter); 669 #ifdef DEBUG 670 if (syncprt) 671 vfs_bufstats(); 672 #endif /* DEBUG */ 673 } 674 675 /* ARGSUSED */ 676 int 677 sys_sync(struct lwp *l, const void *v, register_t *retval) 678 { 679 do_sys_sync(l); 680 return (0); 681 } 682 683 684 /* 685 * Access or change filesystem quotas. 686 * 687 * (this is really 14 different calls bundled into one) 688 */ 689 690 static int 691 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 692 { 693 struct quotastat info_k; 694 int error; 695 696 /* ensure any padding bytes are cleared */ 697 memset(&info_k, 0, sizeof(info_k)); 698 699 error = vfs_quotactl_stat(mp, &info_k); 700 if (error) { 701 return error; 702 } 703 704 return copyout(&info_k, info_u, sizeof(info_k)); 705 } 706 707 static int 708 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 709 struct quotaidtypestat *info_u) 710 { 711 struct quotaidtypestat info_k; 712 int error; 713 714 /* ensure any padding bytes are cleared */ 715 memset(&info_k, 0, sizeof(info_k)); 716 717 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 718 if (error) { 719 return error; 720 } 721 722 return copyout(&info_k, info_u, sizeof(info_k)); 723 } 724 725 static int 726 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 727 struct quotaobjtypestat *info_u) 728 { 729 struct quotaobjtypestat info_k; 730 int error; 731 732 /* ensure any padding bytes are cleared */ 733 memset(&info_k, 0, sizeof(info_k)); 734 735 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 736 if (error) { 737 return error; 738 } 739 740 return copyout(&info_k, info_u, sizeof(info_k)); 741 } 742 743 static int 744 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 745 struct quotaval *val_u) 746 { 747 struct quotakey key_k; 748 struct quotaval val_k; 749 int error; 750 751 /* ensure any padding bytes are cleared */ 752 memset(&val_k, 0, sizeof(val_k)); 753 754 error = copyin(key_u, &key_k, sizeof(key_k)); 755 if (error) { 756 return error; 757 } 758 759 error = vfs_quotactl_get(mp, &key_k, &val_k); 760 if (error) { 761 return error; 762 } 763 764 return copyout(&val_k, val_u, sizeof(val_k)); 765 } 766 767 static int 768 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 769 const struct quotaval *val_u) 770 { 771 struct quotakey key_k; 772 struct quotaval val_k; 773 int error; 774 775 error = copyin(key_u, &key_k, sizeof(key_k)); 776 if (error) { 777 return error; 778 } 779 780 error = copyin(val_u, &val_k, sizeof(val_k)); 781 if (error) { 782 return error; 783 } 784 785 return vfs_quotactl_put(mp, &key_k, &val_k); 786 } 787 788 static int 789 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 790 { 791 struct quotakey key_k; 792 int error; 793 794 error = copyin(key_u, &key_k, sizeof(key_k)); 795 if (error) { 796 return error; 797 } 798 799 return vfs_quotactl_del(mp, &key_k); 800 } 801 802 static int 803 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 804 { 805 struct quotakcursor cursor_k; 806 int error; 807 808 /* ensure any padding bytes are cleared */ 809 memset(&cursor_k, 0, sizeof(cursor_k)); 810 811 error = vfs_quotactl_cursoropen(mp, &cursor_k); 812 if (error) { 813 return error; 814 } 815 816 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 817 } 818 819 static int 820 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 821 { 822 struct quotakcursor cursor_k; 823 int error; 824 825 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 826 if (error) { 827 return error; 828 } 829 830 return vfs_quotactl_cursorclose(mp, &cursor_k); 831 } 832 833 static int 834 do_sys_quotactl_cursorskipidtype(struct mount *mp, 835 struct quotakcursor *cursor_u, int idtype) 836 { 837 struct quotakcursor cursor_k; 838 int error; 839 840 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 841 if (error) { 842 return error; 843 } 844 845 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 846 if (error) { 847 return error; 848 } 849 850 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 851 } 852 853 static int 854 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 855 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 856 unsigned *ret_u) 857 { 858 #define CGET_STACK_MAX 8 859 struct quotakcursor cursor_k; 860 struct quotakey stackkeys[CGET_STACK_MAX]; 861 struct quotaval stackvals[CGET_STACK_MAX]; 862 struct quotakey *keys_k; 863 struct quotaval *vals_k; 864 unsigned ret_k; 865 int error; 866 867 if (maxnum > 128) { 868 maxnum = 128; 869 } 870 871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 872 if (error) { 873 return error; 874 } 875 876 if (maxnum <= CGET_STACK_MAX) { 877 keys_k = stackkeys; 878 vals_k = stackvals; 879 /* ensure any padding bytes are cleared */ 880 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 881 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 882 } else { 883 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 884 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 885 } 886 887 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 888 &ret_k); 889 if (error) { 890 goto fail; 891 } 892 893 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 894 if (error) { 895 goto fail; 896 } 897 898 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 899 if (error) { 900 goto fail; 901 } 902 903 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 904 if (error) { 905 goto fail; 906 } 907 908 /* do last to maximize the chance of being able to recover a failure */ 909 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 910 911 fail: 912 if (keys_k != stackkeys) { 913 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 914 } 915 if (vals_k != stackvals) { 916 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 917 } 918 return error; 919 } 920 921 static int 922 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 923 int *ret_u) 924 { 925 struct quotakcursor cursor_k; 926 int ret_k; 927 int error; 928 929 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 930 if (error) { 931 return error; 932 } 933 934 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 935 if (error) { 936 return error; 937 } 938 939 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 940 if (error) { 941 return error; 942 } 943 944 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 945 } 946 947 static int 948 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 949 { 950 struct quotakcursor cursor_k; 951 int error; 952 953 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 954 if (error) { 955 return error; 956 } 957 958 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 959 if (error) { 960 return error; 961 } 962 963 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 964 } 965 966 static int 967 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 968 { 969 char *path_k; 970 int error; 971 972 /* XXX this should probably be a struct pathbuf */ 973 path_k = PNBUF_GET(); 974 error = copyin(path_u, path_k, PATH_MAX); 975 if (error) { 976 PNBUF_PUT(path_k); 977 return error; 978 } 979 980 error = vfs_quotactl_quotaon(mp, idtype, path_k); 981 982 PNBUF_PUT(path_k); 983 return error; 984 } 985 986 static int 987 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 988 { 989 return vfs_quotactl_quotaoff(mp, idtype); 990 } 991 992 int 993 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 994 { 995 struct mount *mp; 996 struct vnode *vp; 997 int error; 998 999 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1000 if (error != 0) 1001 return (error); 1002 mp = vp->v_mount; 1003 1004 switch (args->qc_op) { 1005 case QUOTACTL_STAT: 1006 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1007 break; 1008 case QUOTACTL_IDTYPESTAT: 1009 error = do_sys_quotactl_idtypestat(mp, 1010 args->u.idtypestat.qc_idtype, 1011 args->u.idtypestat.qc_info); 1012 break; 1013 case QUOTACTL_OBJTYPESTAT: 1014 error = do_sys_quotactl_objtypestat(mp, 1015 args->u.objtypestat.qc_objtype, 1016 args->u.objtypestat.qc_info); 1017 break; 1018 case QUOTACTL_GET: 1019 error = do_sys_quotactl_get(mp, 1020 args->u.get.qc_key, 1021 args->u.get.qc_val); 1022 break; 1023 case QUOTACTL_PUT: 1024 error = do_sys_quotactl_put(mp, 1025 args->u.put.qc_key, 1026 args->u.put.qc_val); 1027 break; 1028 case QUOTACTL_DEL: 1029 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1030 break; 1031 case QUOTACTL_CURSOROPEN: 1032 error = do_sys_quotactl_cursoropen(mp, 1033 args->u.cursoropen.qc_cursor); 1034 break; 1035 case QUOTACTL_CURSORCLOSE: 1036 error = do_sys_quotactl_cursorclose(mp, 1037 args->u.cursorclose.qc_cursor); 1038 break; 1039 case QUOTACTL_CURSORSKIPIDTYPE: 1040 error = do_sys_quotactl_cursorskipidtype(mp, 1041 args->u.cursorskipidtype.qc_cursor, 1042 args->u.cursorskipidtype.qc_idtype); 1043 break; 1044 case QUOTACTL_CURSORGET: 1045 error = do_sys_quotactl_cursorget(mp, 1046 args->u.cursorget.qc_cursor, 1047 args->u.cursorget.qc_keys, 1048 args->u.cursorget.qc_vals, 1049 args->u.cursorget.qc_maxnum, 1050 args->u.cursorget.qc_ret); 1051 break; 1052 case QUOTACTL_CURSORATEND: 1053 error = do_sys_quotactl_cursoratend(mp, 1054 args->u.cursoratend.qc_cursor, 1055 args->u.cursoratend.qc_ret); 1056 break; 1057 case QUOTACTL_CURSORREWIND: 1058 error = do_sys_quotactl_cursorrewind(mp, 1059 args->u.cursorrewind.qc_cursor); 1060 break; 1061 case QUOTACTL_QUOTAON: 1062 error = do_sys_quotactl_quotaon(mp, 1063 args->u.quotaon.qc_idtype, 1064 args->u.quotaon.qc_quotafile); 1065 break; 1066 case QUOTACTL_QUOTAOFF: 1067 error = do_sys_quotactl_quotaoff(mp, 1068 args->u.quotaoff.qc_idtype); 1069 break; 1070 default: 1071 error = EINVAL; 1072 break; 1073 } 1074 1075 vrele(vp); 1076 return error; 1077 } 1078 1079 /* ARGSUSED */ 1080 int 1081 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1082 register_t *retval) 1083 { 1084 /* { 1085 syscallarg(const char *) path; 1086 syscallarg(struct quotactl_args *) args; 1087 } */ 1088 struct quotactl_args args; 1089 int error; 1090 1091 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1092 if (error) { 1093 return error; 1094 } 1095 1096 return do_sys_quotactl(SCARG(uap, path), &args); 1097 } 1098 1099 int 1100 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1101 int root) 1102 { 1103 struct vnode *rvp; 1104 int error = 0; 1105 1106 /* 1107 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1108 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1109 * overrides MNT_NOWAIT. 1110 */ 1111 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1112 (flags != MNT_WAIT && flags != 0)) { 1113 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1114 rvp = NULL; 1115 } else { 1116 /* Get the filesystem stats now */ 1117 memset(sp, 0, sizeof(*sp)); 1118 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1119 return error; 1120 } 1121 KASSERT(l == curlwp); 1122 rvp = cwdrdir(); 1123 if (rvp == NULL) 1124 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1125 } 1126 1127 if (rvp != NULL) { 1128 size_t len; 1129 char *bp; 1130 char c; 1131 char *path = PNBUF_GET(); 1132 1133 bp = path + MAXPATHLEN; 1134 *--bp = '\0'; 1135 error = getcwd_common(rvp, rootvnode, &bp, path, 1136 MAXPATHLEN / 2, 0, l); 1137 if (error) { 1138 PNBUF_PUT(path); 1139 vrele(rvp); 1140 return error; 1141 } 1142 len = strlen(bp); 1143 if (len != 1) { 1144 /* 1145 * for mount points that are below our root, we can see 1146 * them, so we fix up the pathname and return them. The 1147 * rest we cannot see, so we don't allow viewing the 1148 * data. 1149 */ 1150 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1151 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1152 (void)strlcpy(sp->f_mntonname, 1153 c == '\0' ? "/" : &sp->f_mntonname[len], 1154 sizeof(sp->f_mntonname)); 1155 } else { 1156 if (root) 1157 (void)strlcpy(sp->f_mntonname, "/", 1158 sizeof(sp->f_mntonname)); 1159 else 1160 error = EPERM; 1161 } 1162 } 1163 PNBUF_PUT(path); 1164 vrele(rvp); 1165 } 1166 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1167 return error; 1168 } 1169 1170 /* 1171 * Get filesystem statistics by path. 1172 */ 1173 int 1174 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1175 { 1176 struct mount *mp; 1177 int error; 1178 struct vnode *vp; 1179 1180 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1181 if (error != 0) 1182 return error; 1183 mp = vp->v_mount; 1184 error = dostatvfs(mp, sb, l, flags, 1); 1185 vrele(vp); 1186 return error; 1187 } 1188 1189 /* ARGSUSED */ 1190 int 1191 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1192 { 1193 /* { 1194 syscallarg(const char *) path; 1195 syscallarg(struct statvfs *) buf; 1196 syscallarg(int) flags; 1197 } */ 1198 struct statvfs *sb; 1199 int error; 1200 1201 sb = STATVFSBUF_GET(); 1202 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1203 if (error == 0) 1204 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1205 STATVFSBUF_PUT(sb); 1206 return error; 1207 } 1208 1209 /* 1210 * Get filesystem statistics by fd. 1211 */ 1212 int 1213 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1214 { 1215 file_t *fp; 1216 struct mount *mp; 1217 int error; 1218 1219 /* fd_getvnode() will use the descriptor for us */ 1220 if ((error = fd_getvnode(fd, &fp)) != 0) 1221 return (error); 1222 mp = fp->f_vnode->v_mount; 1223 error = dostatvfs(mp, sb, curlwp, flags, 1); 1224 fd_putfile(fd); 1225 return error; 1226 } 1227 1228 /* ARGSUSED */ 1229 int 1230 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1231 { 1232 /* { 1233 syscallarg(int) fd; 1234 syscallarg(struct statvfs *) buf; 1235 syscallarg(int) flags; 1236 } */ 1237 struct statvfs *sb; 1238 int error; 1239 1240 sb = STATVFSBUF_GET(); 1241 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1242 if (error == 0) 1243 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1244 STATVFSBUF_PUT(sb); 1245 return error; 1246 } 1247 1248 1249 /* 1250 * Get statistics on all filesystems. 1251 */ 1252 int 1253 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1254 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1255 register_t *retval) 1256 { 1257 int root = 0; 1258 mount_iterator_t *iter; 1259 struct proc *p = l->l_proc; 1260 struct mount *mp; 1261 struct statvfs *sb; 1262 size_t count, maxcount; 1263 int error = 0; 1264 1265 sb = STATVFSBUF_GET(); 1266 maxcount = bufsize / entry_sz; 1267 count = 0; 1268 mountlist_iterator_init(&iter); 1269 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1270 if (sfsp && count < maxcount) { 1271 error = dostatvfs(mp, sb, l, flags, 0); 1272 if (error) { 1273 error = 0; 1274 continue; 1275 } 1276 error = copyfn(sb, sfsp, entry_sz); 1277 if (error) 1278 goto out; 1279 sfsp = (char *)sfsp + entry_sz; 1280 root |= strcmp(sb->f_mntonname, "/") == 0; 1281 } 1282 count++; 1283 } 1284 1285 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1286 /* 1287 * fake a root entry 1288 */ 1289 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1290 sb, l, flags, 1); 1291 if (error != 0) 1292 goto out; 1293 if (sfsp) { 1294 error = copyfn(sb, sfsp, entry_sz); 1295 if (error != 0) 1296 goto out; 1297 } 1298 count++; 1299 } 1300 if (sfsp && count > maxcount) 1301 *retval = maxcount; 1302 else 1303 *retval = count; 1304 out: 1305 mountlist_iterator_destroy(iter); 1306 STATVFSBUF_PUT(sb); 1307 return error; 1308 } 1309 1310 int 1311 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1312 register_t *retval) 1313 { 1314 /* { 1315 syscallarg(struct statvfs *) buf; 1316 syscallarg(size_t) bufsize; 1317 syscallarg(int) flags; 1318 } */ 1319 1320 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1321 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1322 } 1323 1324 /* 1325 * Change current working directory to a given file descriptor. 1326 */ 1327 /* ARGSUSED */ 1328 int 1329 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1330 { 1331 /* { 1332 syscallarg(int) fd; 1333 } */ 1334 struct cwdinfo *cwdi; 1335 struct vnode *vp, *tdp; 1336 struct mount *mp; 1337 file_t *fp; 1338 int error, fd; 1339 1340 /* fd_getvnode() will use the descriptor for us */ 1341 fd = SCARG(uap, fd); 1342 if ((error = fd_getvnode(fd, &fp)) != 0) 1343 return (error); 1344 vp = fp->f_vnode; 1345 1346 vref(vp); 1347 vn_lock(vp, LK_SHARED | LK_RETRY); 1348 if (vp->v_type != VDIR) 1349 error = ENOTDIR; 1350 else 1351 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1352 if (error) { 1353 vput(vp); 1354 goto out; 1355 } 1356 while ((mp = vp->v_mountedhere) != NULL) { 1357 error = vfs_busy(mp); 1358 vput(vp); 1359 if (error != 0) 1360 goto out; 1361 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1362 vfs_unbusy(mp); 1363 if (error) 1364 goto out; 1365 vp = tdp; 1366 } 1367 VOP_UNLOCK(vp); 1368 1369 /* 1370 * Disallow changing to a directory not under the process's 1371 * current root directory (if there is one). 1372 */ 1373 cwdi = cwdenter(RW_WRITER); 1374 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1375 vrele(vp); 1376 error = EPERM; /* operation not permitted */ 1377 } else { 1378 vrele(cwdi->cwdi_cdir); 1379 cwdi->cwdi_cdir = vp; 1380 } 1381 cwdexit(cwdi); 1382 1383 out: 1384 fd_putfile(fd); 1385 return (error); 1386 } 1387 1388 /* 1389 * Change this process's notion of the root directory to a given file 1390 * descriptor. 1391 */ 1392 int 1393 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1394 { 1395 struct vnode *vp; 1396 file_t *fp; 1397 int error, fd = SCARG(uap, fd); 1398 1399 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1400 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1401 return error; 1402 /* fd_getvnode() will use the descriptor for us */ 1403 if ((error = fd_getvnode(fd, &fp)) != 0) 1404 return error; 1405 vp = fp->f_vnode; 1406 vn_lock(vp, LK_SHARED | LK_RETRY); 1407 if (vp->v_type != VDIR) 1408 error = ENOTDIR; 1409 else 1410 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1411 VOP_UNLOCK(vp); 1412 if (error) 1413 goto out; 1414 vref(vp); 1415 change_root(vp); 1416 1417 out: 1418 fd_putfile(fd); 1419 return (error); 1420 } 1421 1422 /* 1423 * Change current working directory (``.''). 1424 */ 1425 /* ARGSUSED */ 1426 int 1427 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1428 { 1429 /* { 1430 syscallarg(const char *) path; 1431 } */ 1432 struct cwdinfo *cwdi; 1433 int error; 1434 struct vnode *vp, *ovp; 1435 1436 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1437 if (error != 0) 1438 return (error); 1439 1440 cwdi = cwdenter(RW_WRITER); 1441 ovp = cwdi->cwdi_cdir; 1442 cwdi->cwdi_cdir = vp; 1443 cwdexit(cwdi); 1444 vrele(ovp); 1445 return (0); 1446 } 1447 1448 /* 1449 * Change notion of root (``/'') directory. 1450 */ 1451 /* ARGSUSED */ 1452 int 1453 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1454 { 1455 /* { 1456 syscallarg(const char *) path; 1457 } */ 1458 int error; 1459 struct vnode *vp; 1460 1461 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1462 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1463 return (error); 1464 1465 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1466 if (error == 0) 1467 change_root(vp); 1468 return error; 1469 } 1470 1471 /* 1472 * Common routine for chroot and fchroot. 1473 * NB: callers need to properly authorize the change root operation. 1474 */ 1475 void 1476 change_root(struct vnode *vp) 1477 { 1478 struct cwdinfo *cwdi; 1479 kauth_cred_t ncred; 1480 struct lwp *l = curlwp; 1481 struct proc *p = l->l_proc; 1482 1483 ncred = kauth_cred_alloc(); 1484 1485 cwdi = cwdenter(RW_WRITER); 1486 if (cwdi->cwdi_rdir != NULL) 1487 vrele(cwdi->cwdi_rdir); 1488 cwdi->cwdi_rdir = vp; 1489 1490 /* 1491 * Prevent escaping from chroot by putting the root under 1492 * the working directory. Silently chdir to / if we aren't 1493 * already there. 1494 */ 1495 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1496 /* 1497 * XXX would be more failsafe to change directory to a 1498 * deadfs node here instead 1499 */ 1500 vrele(cwdi->cwdi_cdir); 1501 vref(vp); 1502 cwdi->cwdi_cdir = vp; 1503 } 1504 cwdexit(cwdi); 1505 1506 /* Get a write lock on the process credential. */ 1507 proc_crmod_enter(); 1508 1509 kauth_cred_clone(p->p_cred, ncred); 1510 kauth_proc_chroot(ncred, p->p_cwdi); 1511 1512 /* Broadcast our credentials to the process and other LWPs. */ 1513 proc_crmod_leave(ncred, p->p_cred, true); 1514 } 1515 1516 /* 1517 * Common routine for chroot and chdir. 1518 * XXX "where" should be enum uio_seg 1519 */ 1520 int 1521 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1522 { 1523 struct pathbuf *pb; 1524 struct nameidata nd; 1525 int error; 1526 1527 error = pathbuf_maybe_copyin(path, where, &pb); 1528 if (error) { 1529 return error; 1530 } 1531 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1532 if ((error = namei(&nd)) != 0) { 1533 pathbuf_destroy(pb); 1534 return error; 1535 } 1536 *vpp = nd.ni_vp; 1537 pathbuf_destroy(pb); 1538 1539 if ((*vpp)->v_type != VDIR) 1540 error = ENOTDIR; 1541 else 1542 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1543 1544 if (error) 1545 vput(*vpp); 1546 else 1547 VOP_UNLOCK(*vpp); 1548 return (error); 1549 } 1550 1551 /* 1552 * Internals of sys_open - path has already been converted into a pathbuf 1553 * (so we can easily reuse this function from other parts of the kernel, 1554 * like posix_spawn post-processing). 1555 */ 1556 int 1557 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1558 int open_mode, int *fd) 1559 { 1560 struct proc *p = l->l_proc; 1561 struct cwdinfo *cwdi = p->p_cwdi; 1562 file_t *fp; 1563 struct vnode *vp; 1564 int flags, cmode; 1565 int indx, error; 1566 struct nameidata nd; 1567 1568 if (open_flags & O_SEARCH) { 1569 open_flags &= ~(int)O_SEARCH; 1570 } 1571 1572 /* 1573 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1574 * may be specified. 1575 */ 1576 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1577 return EINVAL; 1578 1579 flags = FFLAGS(open_flags); 1580 if ((flags & (FREAD | FWRITE)) == 0) 1581 return EINVAL; 1582 1583 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1584 return error; 1585 } 1586 1587 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1588 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1589 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1590 if (dvp != NULL) 1591 NDAT(&nd, dvp); 1592 1593 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1594 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1595 fd_abort(p, fp, indx); 1596 if ((error == EDUPFD || error == EMOVEFD) && 1597 l->l_dupfd >= 0 && /* XXX from fdopen */ 1598 (error = 1599 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1600 *fd = indx; 1601 return 0; 1602 } 1603 if (error == ERESTART) 1604 error = EINTR; 1605 return error; 1606 } 1607 1608 l->l_dupfd = 0; 1609 vp = nd.ni_vp; 1610 1611 if ((error = open_setfp(l, fp, vp, indx, flags))) 1612 return error; 1613 1614 VOP_UNLOCK(vp); 1615 *fd = indx; 1616 fd_affix(p, fp, indx); 1617 return 0; 1618 } 1619 1620 int 1621 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1622 { 1623 struct pathbuf *pb; 1624 int error, oflags; 1625 1626 oflags = FFLAGS(open_flags); 1627 if ((oflags & (FREAD | FWRITE)) == 0) 1628 return EINVAL; 1629 1630 pb = pathbuf_create(path); 1631 if (pb == NULL) 1632 return ENOMEM; 1633 1634 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1635 pathbuf_destroy(pb); 1636 1637 return error; 1638 } 1639 1640 static int 1641 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1642 int mode, int *fd) 1643 { 1644 file_t *dfp = NULL; 1645 struct vnode *dvp = NULL; 1646 struct pathbuf *pb; 1647 const char *pathstring = NULL; 1648 int error; 1649 1650 if (path == NULL) { 1651 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1652 if (error == ENOSYS) 1653 goto no_compat; 1654 if (error) 1655 return error; 1656 } else { 1657 no_compat: 1658 error = pathbuf_copyin(path, &pb); 1659 if (error) 1660 return error; 1661 } 1662 1663 pathstring = pathbuf_stringcopy_get(pb); 1664 1665 /* 1666 * fdat is ignored if: 1667 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1668 * 2) if path is absolute, then fdat is useless. 1669 */ 1670 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1671 /* fd_getvnode() will use the descriptor for us */ 1672 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1673 goto out; 1674 1675 dvp = dfp->f_vnode; 1676 } 1677 1678 error = do_open(l, dvp, pb, flags, mode, fd); 1679 1680 if (dfp != NULL) 1681 fd_putfile(fdat); 1682 out: 1683 pathbuf_stringcopy_put(pb, pathstring); 1684 pathbuf_destroy(pb); 1685 return error; 1686 } 1687 1688 int 1689 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1690 { 1691 /* { 1692 syscallarg(const char *) path; 1693 syscallarg(int) flags; 1694 syscallarg(int) mode; 1695 } */ 1696 int error; 1697 int fd; 1698 1699 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1700 SCARG(uap, flags), SCARG(uap, mode), &fd); 1701 1702 if (error == 0) 1703 *retval = fd; 1704 1705 return error; 1706 } 1707 1708 int 1709 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1710 { 1711 /* { 1712 syscallarg(int) fd; 1713 syscallarg(const char *) path; 1714 syscallarg(int) oflags; 1715 syscallarg(int) mode; 1716 } */ 1717 int error; 1718 int fd; 1719 1720 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1721 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1722 1723 if (error == 0) 1724 *retval = fd; 1725 1726 return error; 1727 } 1728 1729 static void 1730 vfs__fhfree(fhandle_t *fhp) 1731 { 1732 size_t fhsize; 1733 1734 fhsize = FHANDLE_SIZE(fhp); 1735 kmem_free(fhp, fhsize); 1736 } 1737 1738 /* 1739 * vfs_composefh: compose a filehandle. 1740 */ 1741 1742 int 1743 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1744 { 1745 struct mount *mp; 1746 struct fid *fidp; 1747 int error; 1748 size_t needfhsize; 1749 size_t fidsize; 1750 1751 mp = vp->v_mount; 1752 fidp = NULL; 1753 if (*fh_size < FHANDLE_SIZE_MIN) { 1754 fidsize = 0; 1755 } else { 1756 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1757 if (fhp != NULL) { 1758 memset(fhp, 0, *fh_size); 1759 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1760 fidp = &fhp->fh_fid; 1761 } 1762 } 1763 error = VFS_VPTOFH(vp, fidp, &fidsize); 1764 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1765 if (error == 0 && *fh_size < needfhsize) { 1766 error = E2BIG; 1767 } 1768 *fh_size = needfhsize; 1769 return error; 1770 } 1771 1772 int 1773 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1774 { 1775 struct mount *mp; 1776 fhandle_t *fhp; 1777 size_t fhsize; 1778 size_t fidsize; 1779 int error; 1780 1781 mp = vp->v_mount; 1782 fidsize = 0; 1783 error = VFS_VPTOFH(vp, NULL, &fidsize); 1784 KASSERT(error != 0); 1785 if (error != E2BIG) { 1786 goto out; 1787 } 1788 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1789 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1790 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1791 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1792 if (error == 0) { 1793 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1794 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1795 *fhpp = fhp; 1796 } else { 1797 kmem_free(fhp, fhsize); 1798 } 1799 out: 1800 return error; 1801 } 1802 1803 void 1804 vfs_composefh_free(fhandle_t *fhp) 1805 { 1806 1807 vfs__fhfree(fhp); 1808 } 1809 1810 /* 1811 * vfs_fhtovp: lookup a vnode by a filehandle. 1812 */ 1813 1814 int 1815 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1816 { 1817 struct mount *mp; 1818 int error; 1819 1820 *vpp = NULL; 1821 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1822 if (mp == NULL) { 1823 error = ESTALE; 1824 goto out; 1825 } 1826 if (mp->mnt_op->vfs_fhtovp == NULL) { 1827 error = EOPNOTSUPP; 1828 goto out; 1829 } 1830 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1831 out: 1832 return error; 1833 } 1834 1835 /* 1836 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1837 * the needed size. 1838 */ 1839 1840 int 1841 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1842 { 1843 fhandle_t *fhp; 1844 int error; 1845 1846 if (fhsize > FHANDLE_SIZE_MAX) { 1847 return EINVAL; 1848 } 1849 if (fhsize < FHANDLE_SIZE_MIN) { 1850 return EINVAL; 1851 } 1852 again: 1853 fhp = kmem_alloc(fhsize, KM_SLEEP); 1854 error = copyin(ufhp, fhp, fhsize); 1855 if (error == 0) { 1856 /* XXX this check shouldn't be here */ 1857 if (FHANDLE_SIZE(fhp) == fhsize) { 1858 *fhpp = fhp; 1859 return 0; 1860 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1861 /* 1862 * a kludge for nfsv2 padded handles. 1863 */ 1864 size_t sz; 1865 1866 sz = FHANDLE_SIZE(fhp); 1867 kmem_free(fhp, fhsize); 1868 fhsize = sz; 1869 goto again; 1870 } else { 1871 /* 1872 * userland told us wrong size. 1873 */ 1874 error = EINVAL; 1875 } 1876 } 1877 kmem_free(fhp, fhsize); 1878 return error; 1879 } 1880 1881 void 1882 vfs_copyinfh_free(fhandle_t *fhp) 1883 { 1884 1885 vfs__fhfree(fhp); 1886 } 1887 1888 /* 1889 * Get file handle system call 1890 */ 1891 int 1892 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1893 { 1894 /* { 1895 syscallarg(char *) fname; 1896 syscallarg(fhandle_t *) fhp; 1897 syscallarg(size_t *) fh_size; 1898 } */ 1899 struct vnode *vp; 1900 fhandle_t *fh; 1901 int error; 1902 struct pathbuf *pb; 1903 struct nameidata nd; 1904 size_t sz; 1905 size_t usz; 1906 1907 /* 1908 * Must be super user 1909 */ 1910 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1911 0, NULL, NULL, NULL); 1912 if (error) 1913 return (error); 1914 1915 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1916 if (error) { 1917 return error; 1918 } 1919 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1920 error = namei(&nd); 1921 if (error) { 1922 pathbuf_destroy(pb); 1923 return error; 1924 } 1925 vp = nd.ni_vp; 1926 pathbuf_destroy(pb); 1927 1928 error = vfs_composefh_alloc(vp, &fh); 1929 vput(vp); 1930 if (error != 0) { 1931 return error; 1932 } 1933 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1934 if (error != 0) { 1935 goto out; 1936 } 1937 sz = FHANDLE_SIZE(fh); 1938 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1939 if (error != 0) { 1940 goto out; 1941 } 1942 if (usz >= sz) { 1943 error = copyout(fh, SCARG(uap, fhp), sz); 1944 } else { 1945 error = E2BIG; 1946 } 1947 out: 1948 vfs_composefh_free(fh); 1949 return (error); 1950 } 1951 1952 /* 1953 * Open a file given a file handle. 1954 * 1955 * Check permissions, allocate an open file structure, 1956 * and call the device open routine if any. 1957 */ 1958 1959 int 1960 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1961 register_t *retval) 1962 { 1963 file_t *fp; 1964 struct vnode *vp = NULL; 1965 kauth_cred_t cred = l->l_cred; 1966 file_t *nfp; 1967 int indx, error; 1968 struct vattr va; 1969 fhandle_t *fh; 1970 int flags; 1971 proc_t *p; 1972 1973 p = curproc; 1974 1975 /* 1976 * Must be super user 1977 */ 1978 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1979 0, NULL, NULL, NULL))) 1980 return (error); 1981 1982 if (oflags & O_SEARCH) { 1983 oflags &= ~(int)O_SEARCH; 1984 } 1985 1986 flags = FFLAGS(oflags); 1987 if ((flags & (FREAD | FWRITE)) == 0) 1988 return (EINVAL); 1989 if ((flags & O_CREAT)) 1990 return (EINVAL); 1991 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1992 return (error); 1993 fp = nfp; 1994 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1995 if (error != 0) { 1996 goto bad; 1997 } 1998 error = vfs_fhtovp(fh, &vp); 1999 vfs_copyinfh_free(fh); 2000 if (error != 0) { 2001 goto bad; 2002 } 2003 2004 /* Now do an effective vn_open */ 2005 2006 if (vp->v_type == VSOCK) { 2007 error = EOPNOTSUPP; 2008 goto bad; 2009 } 2010 error = vn_openchk(vp, cred, flags); 2011 if (error != 0) 2012 goto bad; 2013 if (flags & O_TRUNC) { 2014 VOP_UNLOCK(vp); /* XXX */ 2015 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2016 vattr_null(&va); 2017 va.va_size = 0; 2018 error = VOP_SETATTR(vp, &va, cred); 2019 if (error) 2020 goto bad; 2021 } 2022 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2023 goto bad; 2024 if (flags & FWRITE) { 2025 mutex_enter(vp->v_interlock); 2026 vp->v_writecount++; 2027 mutex_exit(vp->v_interlock); 2028 } 2029 2030 /* done with modified vn_open, now finish what sys_open does. */ 2031 if ((error = open_setfp(l, fp, vp, indx, flags))) 2032 return error; 2033 2034 VOP_UNLOCK(vp); 2035 *retval = indx; 2036 fd_affix(p, fp, indx); 2037 return (0); 2038 2039 bad: 2040 fd_abort(p, fp, indx); 2041 if (vp != NULL) 2042 vput(vp); 2043 return (error); 2044 } 2045 2046 int 2047 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2048 { 2049 /* { 2050 syscallarg(const void *) fhp; 2051 syscallarg(size_t) fh_size; 2052 syscallarg(int) flags; 2053 } */ 2054 2055 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2056 SCARG(uap, flags), retval); 2057 } 2058 2059 int 2060 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2061 { 2062 int error; 2063 fhandle_t *fh; 2064 struct vnode *vp; 2065 2066 /* 2067 * Must be super user 2068 */ 2069 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2070 0, NULL, NULL, NULL))) 2071 return (error); 2072 2073 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2074 if (error != 0) 2075 return error; 2076 2077 error = vfs_fhtovp(fh, &vp); 2078 vfs_copyinfh_free(fh); 2079 if (error != 0) 2080 return error; 2081 2082 error = vn_stat(vp, sb); 2083 vput(vp); 2084 return error; 2085 } 2086 2087 2088 /* ARGSUSED */ 2089 int 2090 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2091 { 2092 /* { 2093 syscallarg(const void *) fhp; 2094 syscallarg(size_t) fh_size; 2095 syscallarg(struct stat *) sb; 2096 } */ 2097 struct stat sb; 2098 int error; 2099 2100 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2101 if (error) 2102 return error; 2103 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2104 } 2105 2106 int 2107 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2108 int flags) 2109 { 2110 fhandle_t *fh; 2111 struct mount *mp; 2112 struct vnode *vp; 2113 int error; 2114 2115 /* 2116 * Must be super user 2117 */ 2118 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2119 0, NULL, NULL, NULL))) 2120 return error; 2121 2122 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2123 if (error != 0) 2124 return error; 2125 2126 error = vfs_fhtovp(fh, &vp); 2127 vfs_copyinfh_free(fh); 2128 if (error != 0) 2129 return error; 2130 2131 mp = vp->v_mount; 2132 error = dostatvfs(mp, sb, l, flags, 1); 2133 vput(vp); 2134 return error; 2135 } 2136 2137 /* ARGSUSED */ 2138 int 2139 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2140 { 2141 /* { 2142 syscallarg(const void *) fhp; 2143 syscallarg(size_t) fh_size; 2144 syscallarg(struct statvfs *) buf; 2145 syscallarg(int) flags; 2146 } */ 2147 struct statvfs *sb = STATVFSBUF_GET(); 2148 int error; 2149 2150 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2151 SCARG(uap, flags)); 2152 if (error == 0) 2153 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2154 STATVFSBUF_PUT(sb); 2155 return error; 2156 } 2157 2158 int 2159 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2160 dev_t dev) 2161 { 2162 2163 /* 2164 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2165 * in mode and dev=0. 2166 * 2167 * In all the other cases it's implementation defined behavior. 2168 */ 2169 2170 if ((mode & S_IFIFO) && dev == 0) 2171 return do_sys_mkfifoat(l, fdat, pathname, mode); 2172 else 2173 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2174 UIO_USERSPACE); 2175 } 2176 2177 /* 2178 * Create a special file. 2179 */ 2180 /* ARGSUSED */ 2181 int 2182 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2183 register_t *retval) 2184 { 2185 /* { 2186 syscallarg(const char *) path; 2187 syscallarg(mode_t) mode; 2188 syscallarg(dev_t) dev; 2189 } */ 2190 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2191 SCARG(uap, mode), SCARG(uap, dev)); 2192 } 2193 2194 int 2195 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2196 register_t *retval) 2197 { 2198 /* { 2199 syscallarg(int) fd; 2200 syscallarg(const char *) path; 2201 syscallarg(mode_t) mode; 2202 syscallarg(int) pad; 2203 syscallarg(dev_t) dev; 2204 } */ 2205 2206 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2207 SCARG(uap, mode), SCARG(uap, dev)); 2208 } 2209 2210 int 2211 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2212 enum uio_seg seg) 2213 { 2214 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2215 } 2216 2217 int 2218 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2219 dev_t dev, enum uio_seg seg) 2220 { 2221 struct proc *p = l->l_proc; 2222 struct vnode *vp; 2223 struct vattr vattr; 2224 int error, optype; 2225 struct pathbuf *pb; 2226 struct nameidata nd; 2227 const char *pathstring; 2228 2229 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2230 0, NULL, NULL, NULL)) != 0) 2231 return (error); 2232 2233 optype = VOP_MKNOD_DESCOFFSET; 2234 2235 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2236 if (error) { 2237 return error; 2238 } 2239 pathstring = pathbuf_stringcopy_get(pb); 2240 if (pathstring == NULL) { 2241 pathbuf_destroy(pb); 2242 return ENOMEM; 2243 } 2244 2245 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2246 2247 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2248 goto out; 2249 vp = nd.ni_vp; 2250 2251 if (vp != NULL) 2252 error = EEXIST; 2253 else { 2254 vattr_null(&vattr); 2255 /* We will read cwdi->cwdi_cmask unlocked. */ 2256 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2257 vattr.va_rdev = dev; 2258 2259 switch (mode & S_IFMT) { 2260 case S_IFMT: /* used by badsect to flag bad sectors */ 2261 vattr.va_type = VBAD; 2262 break; 2263 case S_IFCHR: 2264 vattr.va_type = VCHR; 2265 break; 2266 case S_IFBLK: 2267 vattr.va_type = VBLK; 2268 break; 2269 case S_IFWHT: 2270 optype = VOP_WHITEOUT_DESCOFFSET; 2271 break; 2272 case S_IFREG: 2273 #if NVERIEXEC > 0 2274 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2275 O_CREAT); 2276 #endif /* NVERIEXEC > 0 */ 2277 vattr.va_type = VREG; 2278 vattr.va_rdev = VNOVAL; 2279 optype = VOP_CREATE_DESCOFFSET; 2280 break; 2281 default: 2282 error = EINVAL; 2283 break; 2284 } 2285 2286 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2287 vattr.va_rdev == VNOVAL) 2288 error = EINVAL; 2289 } 2290 2291 if (!error) { 2292 switch (optype) { 2293 case VOP_WHITEOUT_DESCOFFSET: 2294 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2295 if (error) 2296 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2297 vput(nd.ni_dvp); 2298 break; 2299 2300 case VOP_MKNOD_DESCOFFSET: 2301 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2302 &nd.ni_cnd, &vattr); 2303 if (error == 0) 2304 vrele(nd.ni_vp); 2305 vput(nd.ni_dvp); 2306 break; 2307 2308 case VOP_CREATE_DESCOFFSET: 2309 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2310 &nd.ni_cnd, &vattr); 2311 if (error == 0) 2312 vrele(nd.ni_vp); 2313 vput(nd.ni_dvp); 2314 break; 2315 } 2316 } else { 2317 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2318 if (nd.ni_dvp == vp) 2319 vrele(nd.ni_dvp); 2320 else 2321 vput(nd.ni_dvp); 2322 if (vp) 2323 vrele(vp); 2324 } 2325 out: 2326 pathbuf_stringcopy_put(pb, pathstring); 2327 pathbuf_destroy(pb); 2328 return (error); 2329 } 2330 2331 /* 2332 * Create a named pipe. 2333 */ 2334 /* ARGSUSED */ 2335 int 2336 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2337 { 2338 /* { 2339 syscallarg(const char *) path; 2340 syscallarg(int) mode; 2341 } */ 2342 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2343 } 2344 2345 int 2346 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2347 register_t *retval) 2348 { 2349 /* { 2350 syscallarg(int) fd; 2351 syscallarg(const char *) path; 2352 syscallarg(int) mode; 2353 } */ 2354 2355 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2356 SCARG(uap, mode)); 2357 } 2358 2359 static int 2360 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2361 { 2362 struct proc *p = l->l_proc; 2363 struct vattr vattr; 2364 int error; 2365 struct pathbuf *pb; 2366 struct nameidata nd; 2367 2368 error = pathbuf_copyin(path, &pb); 2369 if (error) { 2370 return error; 2371 } 2372 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2373 2374 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2375 pathbuf_destroy(pb); 2376 return error; 2377 } 2378 if (nd.ni_vp != NULL) { 2379 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2380 if (nd.ni_dvp == nd.ni_vp) 2381 vrele(nd.ni_dvp); 2382 else 2383 vput(nd.ni_dvp); 2384 vrele(nd.ni_vp); 2385 pathbuf_destroy(pb); 2386 return (EEXIST); 2387 } 2388 vattr_null(&vattr); 2389 vattr.va_type = VFIFO; 2390 /* We will read cwdi->cwdi_cmask unlocked. */ 2391 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2392 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2393 if (error == 0) 2394 vrele(nd.ni_vp); 2395 vput(nd.ni_dvp); 2396 pathbuf_destroy(pb); 2397 return (error); 2398 } 2399 2400 /* 2401 * Make a hard file link. 2402 */ 2403 /* ARGSUSED */ 2404 int 2405 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2406 const char *link, int follow, register_t *retval) 2407 { 2408 struct vnode *vp; 2409 struct pathbuf *linkpb; 2410 struct nameidata nd; 2411 namei_simple_flags_t ns_flags; 2412 int error; 2413 2414 if (follow & AT_SYMLINK_FOLLOW) 2415 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2416 else 2417 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2418 2419 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2420 if (error != 0) 2421 return (error); 2422 error = pathbuf_copyin(link, &linkpb); 2423 if (error) { 2424 goto out1; 2425 } 2426 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2427 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2428 goto out2; 2429 if (nd.ni_vp) { 2430 error = EEXIST; 2431 goto abortop; 2432 } 2433 /* Prevent hard links on directories. */ 2434 if (vp->v_type == VDIR) { 2435 error = EPERM; 2436 goto abortop; 2437 } 2438 /* Prevent cross-mount operation. */ 2439 if (nd.ni_dvp->v_mount != vp->v_mount) { 2440 error = EXDEV; 2441 goto abortop; 2442 } 2443 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2444 VOP_UNLOCK(nd.ni_dvp); 2445 vrele(nd.ni_dvp); 2446 out2: 2447 pathbuf_destroy(linkpb); 2448 out1: 2449 vrele(vp); 2450 return (error); 2451 abortop: 2452 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2453 if (nd.ni_dvp == nd.ni_vp) 2454 vrele(nd.ni_dvp); 2455 else 2456 vput(nd.ni_dvp); 2457 if (nd.ni_vp != NULL) 2458 vrele(nd.ni_vp); 2459 goto out2; 2460 } 2461 2462 int 2463 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2464 { 2465 /* { 2466 syscallarg(const char *) path; 2467 syscallarg(const char *) link; 2468 } */ 2469 const char *path = SCARG(uap, path); 2470 const char *link = SCARG(uap, link); 2471 2472 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2473 AT_SYMLINK_FOLLOW, retval); 2474 } 2475 2476 int 2477 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2478 register_t *retval) 2479 { 2480 /* { 2481 syscallarg(int) fd1; 2482 syscallarg(const char *) name1; 2483 syscallarg(int) fd2; 2484 syscallarg(const char *) name2; 2485 syscallarg(int) flags; 2486 } */ 2487 int fd1 = SCARG(uap, fd1); 2488 const char *name1 = SCARG(uap, name1); 2489 int fd2 = SCARG(uap, fd2); 2490 const char *name2 = SCARG(uap, name2); 2491 int follow; 2492 2493 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2494 2495 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2496 } 2497 2498 2499 int 2500 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2501 { 2502 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2503 } 2504 2505 static int 2506 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2507 const char *link, enum uio_seg seg) 2508 { 2509 struct proc *p = curproc; 2510 struct vattr vattr; 2511 char *path; 2512 int error; 2513 size_t len; 2514 struct pathbuf *linkpb; 2515 struct nameidata nd; 2516 2517 KASSERT(l != NULL || fdat == AT_FDCWD); 2518 2519 path = PNBUF_GET(); 2520 if (seg == UIO_USERSPACE) { 2521 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2522 goto out1; 2523 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2524 goto out1; 2525 } else { 2526 len = strlen(patharg) + 1; 2527 KASSERT(len <= MAXPATHLEN); 2528 memcpy(path, patharg, len); 2529 linkpb = pathbuf_create(link); 2530 if (linkpb == NULL) { 2531 error = ENOMEM; 2532 goto out1; 2533 } 2534 } 2535 ktrkuser("symlink-target", path, len - 1); 2536 2537 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2538 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2539 goto out2; 2540 if (nd.ni_vp) { 2541 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2542 if (nd.ni_dvp == nd.ni_vp) 2543 vrele(nd.ni_dvp); 2544 else 2545 vput(nd.ni_dvp); 2546 vrele(nd.ni_vp); 2547 error = EEXIST; 2548 goto out2; 2549 } 2550 vattr_null(&vattr); 2551 vattr.va_type = VLNK; 2552 /* We will read cwdi->cwdi_cmask unlocked. */ 2553 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2554 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2555 if (error == 0) 2556 vrele(nd.ni_vp); 2557 vput(nd.ni_dvp); 2558 out2: 2559 pathbuf_destroy(linkpb); 2560 out1: 2561 PNBUF_PUT(path); 2562 return (error); 2563 } 2564 2565 /* 2566 * Make a symbolic link. 2567 */ 2568 /* ARGSUSED */ 2569 int 2570 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2571 { 2572 /* { 2573 syscallarg(const char *) path; 2574 syscallarg(const char *) link; 2575 } */ 2576 2577 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2578 UIO_USERSPACE); 2579 } 2580 2581 int 2582 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2583 register_t *retval) 2584 { 2585 /* { 2586 syscallarg(const char *) path1; 2587 syscallarg(int) fd; 2588 syscallarg(const char *) path2; 2589 } */ 2590 2591 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2592 SCARG(uap, path2), UIO_USERSPACE); 2593 } 2594 2595 /* 2596 * Delete a whiteout from the filesystem. 2597 */ 2598 /* ARGSUSED */ 2599 int 2600 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2601 { 2602 /* { 2603 syscallarg(const char *) path; 2604 } */ 2605 int error; 2606 struct pathbuf *pb; 2607 struct nameidata nd; 2608 2609 error = pathbuf_copyin(SCARG(uap, path), &pb); 2610 if (error) { 2611 return error; 2612 } 2613 2614 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2615 error = namei(&nd); 2616 if (error) { 2617 pathbuf_destroy(pb); 2618 return (error); 2619 } 2620 2621 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2622 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2623 if (nd.ni_dvp == nd.ni_vp) 2624 vrele(nd.ni_dvp); 2625 else 2626 vput(nd.ni_dvp); 2627 if (nd.ni_vp) 2628 vrele(nd.ni_vp); 2629 pathbuf_destroy(pb); 2630 return (EEXIST); 2631 } 2632 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2633 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2634 vput(nd.ni_dvp); 2635 pathbuf_destroy(pb); 2636 return (error); 2637 } 2638 2639 /* 2640 * Delete a name from the filesystem. 2641 */ 2642 /* ARGSUSED */ 2643 int 2644 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2645 { 2646 /* { 2647 syscallarg(const char *) path; 2648 } */ 2649 2650 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2651 } 2652 2653 int 2654 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2655 register_t *retval) 2656 { 2657 /* { 2658 syscallarg(int) fd; 2659 syscallarg(const char *) path; 2660 syscallarg(int) flag; 2661 } */ 2662 2663 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2664 SCARG(uap, flag), UIO_USERSPACE); 2665 } 2666 2667 int 2668 do_sys_unlink(const char *arg, enum uio_seg seg) 2669 { 2670 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2671 } 2672 2673 static int 2674 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2675 enum uio_seg seg) 2676 { 2677 struct vnode *vp; 2678 int error; 2679 struct pathbuf *pb; 2680 struct nameidata nd; 2681 const char *pathstring; 2682 2683 KASSERT(l != NULL || fdat == AT_FDCWD); 2684 2685 error = pathbuf_maybe_copyin(arg, seg, &pb); 2686 if (error) { 2687 return error; 2688 } 2689 pathstring = pathbuf_stringcopy_get(pb); 2690 if (pathstring == NULL) { 2691 pathbuf_destroy(pb); 2692 return ENOMEM; 2693 } 2694 2695 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2696 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2697 goto out; 2698 vp = nd.ni_vp; 2699 2700 /* 2701 * The root of a mounted filesystem cannot be deleted. 2702 */ 2703 if ((vp->v_vflag & VV_ROOT) != 0) { 2704 error = EBUSY; 2705 goto abort; 2706 } 2707 2708 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2709 error = EBUSY; 2710 goto abort; 2711 } 2712 2713 /* 2714 * No rmdir "." please. 2715 */ 2716 if (nd.ni_dvp == vp) { 2717 error = EINVAL; 2718 goto abort; 2719 } 2720 2721 /* 2722 * AT_REMOVEDIR is required to remove a directory 2723 */ 2724 if (vp->v_type == VDIR) { 2725 if (!(flags & AT_REMOVEDIR)) { 2726 error = EPERM; 2727 goto abort; 2728 } else { 2729 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2730 vput(nd.ni_dvp); 2731 goto out; 2732 } 2733 } 2734 2735 /* 2736 * Starting here we only deal with non directories. 2737 */ 2738 if (flags & AT_REMOVEDIR) { 2739 error = ENOTDIR; 2740 goto abort; 2741 } 2742 2743 #if NVERIEXEC > 0 2744 /* Handle remove requests for veriexec entries. */ 2745 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2746 goto abort; 2747 } 2748 #endif /* NVERIEXEC > 0 */ 2749 2750 #ifdef FILEASSOC 2751 (void)fileassoc_file_delete(vp); 2752 #endif /* FILEASSOC */ 2753 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2754 vput(nd.ni_dvp); 2755 goto out; 2756 2757 abort: 2758 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2759 if (nd.ni_dvp == vp) 2760 vrele(nd.ni_dvp); 2761 else 2762 vput(nd.ni_dvp); 2763 vput(vp); 2764 2765 out: 2766 pathbuf_stringcopy_put(pb, pathstring); 2767 pathbuf_destroy(pb); 2768 return (error); 2769 } 2770 2771 /* 2772 * Reposition read/write file offset. 2773 */ 2774 int 2775 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2776 { 2777 /* { 2778 syscallarg(int) fd; 2779 syscallarg(int) pad; 2780 syscallarg(off_t) offset; 2781 syscallarg(int) whence; 2782 } */ 2783 kauth_cred_t cred = l->l_cred; 2784 file_t *fp; 2785 struct vnode *vp; 2786 struct vattr vattr; 2787 off_t newoff; 2788 int error, fd; 2789 2790 fd = SCARG(uap, fd); 2791 2792 if ((fp = fd_getfile(fd)) == NULL) 2793 return (EBADF); 2794 2795 vp = fp->f_vnode; 2796 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2797 error = ESPIPE; 2798 goto out; 2799 } 2800 2801 vn_lock(vp, LK_SHARED | LK_RETRY); 2802 2803 switch (SCARG(uap, whence)) { 2804 case SEEK_CUR: 2805 newoff = fp->f_offset + SCARG(uap, offset); 2806 break; 2807 case SEEK_END: 2808 error = VOP_GETATTR(vp, &vattr, cred); 2809 if (error) { 2810 VOP_UNLOCK(vp); 2811 goto out; 2812 } 2813 newoff = SCARG(uap, offset) + vattr.va_size; 2814 break; 2815 case SEEK_SET: 2816 newoff = SCARG(uap, offset); 2817 break; 2818 default: 2819 error = EINVAL; 2820 VOP_UNLOCK(vp); 2821 goto out; 2822 } 2823 VOP_UNLOCK(vp); 2824 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2825 *(off_t *)retval = fp->f_offset = newoff; 2826 } 2827 out: 2828 fd_putfile(fd); 2829 return (error); 2830 } 2831 2832 /* 2833 * Positional read system call. 2834 */ 2835 int 2836 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2837 { 2838 /* { 2839 syscallarg(int) fd; 2840 syscallarg(void *) buf; 2841 syscallarg(size_t) nbyte; 2842 syscallarg(off_t) offset; 2843 } */ 2844 file_t *fp; 2845 struct vnode *vp; 2846 off_t offset; 2847 int error, fd = SCARG(uap, fd); 2848 2849 if ((fp = fd_getfile(fd)) == NULL) 2850 return (EBADF); 2851 2852 if ((fp->f_flag & FREAD) == 0) { 2853 fd_putfile(fd); 2854 return (EBADF); 2855 } 2856 2857 vp = fp->f_vnode; 2858 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2859 error = ESPIPE; 2860 goto out; 2861 } 2862 2863 offset = SCARG(uap, offset); 2864 2865 /* 2866 * XXX This works because no file systems actually 2867 * XXX take any action on the seek operation. 2868 */ 2869 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2870 goto out; 2871 2872 /* dofileread() will unuse the descriptor for us */ 2873 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2874 &offset, 0, retval)); 2875 2876 out: 2877 fd_putfile(fd); 2878 return (error); 2879 } 2880 2881 /* 2882 * Positional scatter read system call. 2883 */ 2884 int 2885 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2886 { 2887 /* { 2888 syscallarg(int) fd; 2889 syscallarg(const struct iovec *) iovp; 2890 syscallarg(int) iovcnt; 2891 syscallarg(off_t) offset; 2892 } */ 2893 off_t offset = SCARG(uap, offset); 2894 2895 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2896 SCARG(uap, iovcnt), &offset, 0, retval); 2897 } 2898 2899 /* 2900 * Positional write system call. 2901 */ 2902 int 2903 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2904 { 2905 /* { 2906 syscallarg(int) fd; 2907 syscallarg(const void *) buf; 2908 syscallarg(size_t) nbyte; 2909 syscallarg(off_t) offset; 2910 } */ 2911 file_t *fp; 2912 struct vnode *vp; 2913 off_t offset; 2914 int error, fd = SCARG(uap, fd); 2915 2916 if ((fp = fd_getfile(fd)) == NULL) 2917 return (EBADF); 2918 2919 if ((fp->f_flag & FWRITE) == 0) { 2920 fd_putfile(fd); 2921 return (EBADF); 2922 } 2923 2924 vp = fp->f_vnode; 2925 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2926 error = ESPIPE; 2927 goto out; 2928 } 2929 2930 offset = SCARG(uap, offset); 2931 2932 /* 2933 * XXX This works because no file systems actually 2934 * XXX take any action on the seek operation. 2935 */ 2936 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2937 goto out; 2938 2939 /* dofilewrite() will unuse the descriptor for us */ 2940 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2941 &offset, 0, retval)); 2942 2943 out: 2944 fd_putfile(fd); 2945 return (error); 2946 } 2947 2948 /* 2949 * Positional gather write system call. 2950 */ 2951 int 2952 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2953 { 2954 /* { 2955 syscallarg(int) fd; 2956 syscallarg(const struct iovec *) iovp; 2957 syscallarg(int) iovcnt; 2958 syscallarg(off_t) offset; 2959 } */ 2960 off_t offset = SCARG(uap, offset); 2961 2962 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2963 SCARG(uap, iovcnt), &offset, 0, retval); 2964 } 2965 2966 /* 2967 * Check access permissions. 2968 */ 2969 int 2970 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2971 { 2972 /* { 2973 syscallarg(const char *) path; 2974 syscallarg(int) flags; 2975 } */ 2976 2977 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2978 SCARG(uap, flags), 0); 2979 } 2980 2981 int 2982 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2983 int mode, int flags) 2984 { 2985 kauth_cred_t cred; 2986 struct vnode *vp; 2987 int error, nd_flag, vmode; 2988 struct pathbuf *pb; 2989 struct nameidata nd; 2990 2991 CTASSERT(F_OK == 0); 2992 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2993 /* nonsense mode */ 2994 return EINVAL; 2995 } 2996 2997 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2998 if (flags & AT_SYMLINK_NOFOLLOW) 2999 nd_flag &= ~FOLLOW; 3000 3001 error = pathbuf_copyin(path, &pb); 3002 if (error) 3003 return error; 3004 3005 NDINIT(&nd, LOOKUP, nd_flag, pb); 3006 3007 /* Override default credentials */ 3008 cred = kauth_cred_dup(l->l_cred); 3009 if (!(flags & AT_EACCESS)) { 3010 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3011 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3012 } 3013 nd.ni_cnd.cn_cred = cred; 3014 3015 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3016 pathbuf_destroy(pb); 3017 goto out; 3018 } 3019 vp = nd.ni_vp; 3020 pathbuf_destroy(pb); 3021 3022 /* Flags == 0 means only check for existence. */ 3023 if (mode) { 3024 vmode = 0; 3025 if (mode & R_OK) 3026 vmode |= VREAD; 3027 if (mode & W_OK) 3028 vmode |= VWRITE; 3029 if (mode & X_OK) 3030 vmode |= VEXEC; 3031 3032 error = VOP_ACCESS(vp, vmode, cred); 3033 if (!error && (vmode & VWRITE)) 3034 error = vn_writechk(vp); 3035 } 3036 vput(vp); 3037 out: 3038 kauth_cred_free(cred); 3039 return (error); 3040 } 3041 3042 int 3043 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3044 register_t *retval) 3045 { 3046 /* { 3047 syscallarg(int) fd; 3048 syscallarg(const char *) path; 3049 syscallarg(int) amode; 3050 syscallarg(int) flag; 3051 } */ 3052 3053 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3054 SCARG(uap, amode), SCARG(uap, flag)); 3055 } 3056 3057 /* 3058 * Common code for all sys_stat functions, including compat versions. 3059 */ 3060 int 3061 do_sys_stat(const char *userpath, unsigned int nd_flag, 3062 struct stat *sb) 3063 { 3064 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3065 } 3066 3067 int 3068 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3069 unsigned int nd_flag, struct stat *sb) 3070 { 3071 int error; 3072 struct pathbuf *pb; 3073 struct nameidata nd; 3074 3075 KASSERT(l != NULL || fdat == AT_FDCWD); 3076 3077 error = pathbuf_copyin(userpath, &pb); 3078 if (error) { 3079 return error; 3080 } 3081 3082 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3083 3084 error = fd_nameiat(l, fdat, &nd); 3085 if (error != 0) { 3086 pathbuf_destroy(pb); 3087 return error; 3088 } 3089 error = vn_stat(nd.ni_vp, sb); 3090 vput(nd.ni_vp); 3091 pathbuf_destroy(pb); 3092 return error; 3093 } 3094 3095 /* 3096 * Get file status; this version follows links. 3097 */ 3098 /* ARGSUSED */ 3099 int 3100 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3101 { 3102 /* { 3103 syscallarg(const char *) path; 3104 syscallarg(struct stat *) ub; 3105 } */ 3106 struct stat sb; 3107 int error; 3108 3109 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3110 if (error) 3111 return error; 3112 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3113 } 3114 3115 /* 3116 * Get file status; this version does not follow links. 3117 */ 3118 /* ARGSUSED */ 3119 int 3120 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3121 { 3122 /* { 3123 syscallarg(const char *) path; 3124 syscallarg(struct stat *) ub; 3125 } */ 3126 struct stat sb; 3127 int error; 3128 3129 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3130 if (error) 3131 return error; 3132 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3133 } 3134 3135 int 3136 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3137 register_t *retval) 3138 { 3139 /* { 3140 syscallarg(int) fd; 3141 syscallarg(const char *) path; 3142 syscallarg(struct stat *) buf; 3143 syscallarg(int) flag; 3144 } */ 3145 unsigned int nd_flag; 3146 struct stat sb; 3147 int error; 3148 3149 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3150 nd_flag = NOFOLLOW; 3151 else 3152 nd_flag = FOLLOW; 3153 3154 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3155 &sb); 3156 if (error) 3157 return error; 3158 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3159 } 3160 3161 /* 3162 * Get configurable pathname variables. 3163 */ 3164 /* ARGSUSED */ 3165 int 3166 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3167 { 3168 /* { 3169 syscallarg(const char *) path; 3170 syscallarg(int) name; 3171 } */ 3172 int error; 3173 struct pathbuf *pb; 3174 struct nameidata nd; 3175 3176 error = pathbuf_copyin(SCARG(uap, path), &pb); 3177 if (error) { 3178 return error; 3179 } 3180 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3181 if ((error = namei(&nd)) != 0) { 3182 pathbuf_destroy(pb); 3183 return (error); 3184 } 3185 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3186 vput(nd.ni_vp); 3187 pathbuf_destroy(pb); 3188 return (error); 3189 } 3190 3191 /* 3192 * Return target name of a symbolic link. 3193 */ 3194 /* ARGSUSED */ 3195 int 3196 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3197 register_t *retval) 3198 { 3199 /* { 3200 syscallarg(const char *) path; 3201 syscallarg(char *) buf; 3202 syscallarg(size_t) count; 3203 } */ 3204 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3205 SCARG(uap, buf), SCARG(uap, count), retval); 3206 } 3207 3208 static int 3209 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3210 size_t count, register_t *retval) 3211 { 3212 struct vnode *vp; 3213 struct iovec aiov; 3214 struct uio auio; 3215 int error; 3216 struct pathbuf *pb; 3217 struct nameidata nd; 3218 3219 error = pathbuf_copyin(path, &pb); 3220 if (error) { 3221 return error; 3222 } 3223 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3224 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3225 pathbuf_destroy(pb); 3226 return error; 3227 } 3228 vp = nd.ni_vp; 3229 pathbuf_destroy(pb); 3230 if (vp->v_type != VLNK) 3231 error = EINVAL; 3232 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3233 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3234 aiov.iov_base = buf; 3235 aiov.iov_len = count; 3236 auio.uio_iov = &aiov; 3237 auio.uio_iovcnt = 1; 3238 auio.uio_offset = 0; 3239 auio.uio_rw = UIO_READ; 3240 KASSERT(l == curlwp); 3241 auio.uio_vmspace = l->l_proc->p_vmspace; 3242 auio.uio_resid = count; 3243 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3244 *retval = count - auio.uio_resid; 3245 } 3246 vput(vp); 3247 return (error); 3248 } 3249 3250 int 3251 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3252 register_t *retval) 3253 { 3254 /* { 3255 syscallarg(int) fd; 3256 syscallarg(const char *) path; 3257 syscallarg(char *) buf; 3258 syscallarg(size_t) bufsize; 3259 } */ 3260 3261 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3262 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3263 } 3264 3265 /* 3266 * Change flags of a file given a path name. 3267 */ 3268 /* ARGSUSED */ 3269 int 3270 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3271 { 3272 /* { 3273 syscallarg(const char *) path; 3274 syscallarg(u_long) flags; 3275 } */ 3276 struct vnode *vp; 3277 int error; 3278 3279 error = namei_simple_user(SCARG(uap, path), 3280 NSM_FOLLOW_TRYEMULROOT, &vp); 3281 if (error != 0) 3282 return (error); 3283 error = change_flags(vp, SCARG(uap, flags), l); 3284 vput(vp); 3285 return (error); 3286 } 3287 3288 /* 3289 * Change flags of a file given a file descriptor. 3290 */ 3291 /* ARGSUSED */ 3292 int 3293 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3294 { 3295 /* { 3296 syscallarg(int) fd; 3297 syscallarg(u_long) flags; 3298 } */ 3299 struct vnode *vp; 3300 file_t *fp; 3301 int error; 3302 3303 /* fd_getvnode() will use the descriptor for us */ 3304 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3305 return (error); 3306 vp = fp->f_vnode; 3307 error = change_flags(vp, SCARG(uap, flags), l); 3308 VOP_UNLOCK(vp); 3309 fd_putfile(SCARG(uap, fd)); 3310 return (error); 3311 } 3312 3313 /* 3314 * Change flags of a file given a path name; this version does 3315 * not follow links. 3316 */ 3317 int 3318 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3319 { 3320 /* { 3321 syscallarg(const char *) path; 3322 syscallarg(u_long) flags; 3323 } */ 3324 struct vnode *vp; 3325 int error; 3326 3327 error = namei_simple_user(SCARG(uap, path), 3328 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3329 if (error != 0) 3330 return (error); 3331 error = change_flags(vp, SCARG(uap, flags), l); 3332 vput(vp); 3333 return (error); 3334 } 3335 3336 /* 3337 * Common routine to change flags of a file. 3338 */ 3339 int 3340 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3341 { 3342 struct vattr vattr; 3343 int error; 3344 3345 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3346 3347 vattr_null(&vattr); 3348 vattr.va_flags = flags; 3349 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3350 3351 return (error); 3352 } 3353 3354 /* 3355 * Change mode of a file given path name; this version follows links. 3356 */ 3357 /* ARGSUSED */ 3358 int 3359 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3360 { 3361 /* { 3362 syscallarg(const char *) path; 3363 syscallarg(int) mode; 3364 } */ 3365 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3366 SCARG(uap, mode), 0); 3367 } 3368 3369 int 3370 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3371 { 3372 int error; 3373 struct vnode *vp; 3374 namei_simple_flags_t ns_flag; 3375 3376 if (flags & AT_SYMLINK_NOFOLLOW) 3377 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3378 else 3379 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3380 3381 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3382 if (error != 0) 3383 return error; 3384 3385 error = change_mode(vp, mode, l); 3386 3387 vrele(vp); 3388 3389 return (error); 3390 } 3391 3392 /* 3393 * Change mode of a file given a file descriptor. 3394 */ 3395 /* ARGSUSED */ 3396 int 3397 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3398 { 3399 /* { 3400 syscallarg(int) fd; 3401 syscallarg(int) mode; 3402 } */ 3403 file_t *fp; 3404 int error; 3405 3406 /* fd_getvnode() will use the descriptor for us */ 3407 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3408 return (error); 3409 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3410 fd_putfile(SCARG(uap, fd)); 3411 return (error); 3412 } 3413 3414 int 3415 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3416 register_t *retval) 3417 { 3418 /* { 3419 syscallarg(int) fd; 3420 syscallarg(const char *) path; 3421 syscallarg(int) mode; 3422 syscallarg(int) flag; 3423 } */ 3424 3425 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3426 SCARG(uap, mode), SCARG(uap, flag)); 3427 } 3428 3429 /* 3430 * Change mode of a file given path name; this version does not follow links. 3431 */ 3432 /* ARGSUSED */ 3433 int 3434 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3435 { 3436 /* { 3437 syscallarg(const char *) path; 3438 syscallarg(int) mode; 3439 } */ 3440 int error; 3441 struct vnode *vp; 3442 3443 error = namei_simple_user(SCARG(uap, path), 3444 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3445 if (error != 0) 3446 return (error); 3447 3448 error = change_mode(vp, SCARG(uap, mode), l); 3449 3450 vrele(vp); 3451 return (error); 3452 } 3453 3454 /* 3455 * Common routine to set mode given a vnode. 3456 */ 3457 static int 3458 change_mode(struct vnode *vp, int mode, struct lwp *l) 3459 { 3460 struct vattr vattr; 3461 int error; 3462 3463 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3464 vattr_null(&vattr); 3465 vattr.va_mode = mode & ALLPERMS; 3466 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3467 VOP_UNLOCK(vp); 3468 return (error); 3469 } 3470 3471 /* 3472 * Set ownership given a path name; this version follows links. 3473 */ 3474 /* ARGSUSED */ 3475 int 3476 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3477 { 3478 /* { 3479 syscallarg(const char *) path; 3480 syscallarg(uid_t) uid; 3481 syscallarg(gid_t) gid; 3482 } */ 3483 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3484 SCARG(uap, gid), 0); 3485 } 3486 3487 int 3488 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3489 gid_t gid, int flags) 3490 { 3491 int error; 3492 struct vnode *vp; 3493 namei_simple_flags_t ns_flag; 3494 3495 if (flags & AT_SYMLINK_NOFOLLOW) 3496 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3497 else 3498 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3499 3500 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3501 if (error != 0) 3502 return error; 3503 3504 error = change_owner(vp, uid, gid, l, 0); 3505 3506 vrele(vp); 3507 3508 return (error); 3509 } 3510 3511 /* 3512 * Set ownership given a path name; this version follows links. 3513 * Provides POSIX semantics. 3514 */ 3515 /* ARGSUSED */ 3516 int 3517 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3518 { 3519 /* { 3520 syscallarg(const char *) path; 3521 syscallarg(uid_t) uid; 3522 syscallarg(gid_t) gid; 3523 } */ 3524 int error; 3525 struct vnode *vp; 3526 3527 error = namei_simple_user(SCARG(uap, path), 3528 NSM_FOLLOW_TRYEMULROOT, &vp); 3529 if (error != 0) 3530 return (error); 3531 3532 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3533 3534 vrele(vp); 3535 return (error); 3536 } 3537 3538 /* 3539 * Set ownership given a file descriptor. 3540 */ 3541 /* ARGSUSED */ 3542 int 3543 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3544 { 3545 /* { 3546 syscallarg(int) fd; 3547 syscallarg(uid_t) uid; 3548 syscallarg(gid_t) gid; 3549 } */ 3550 int error; 3551 file_t *fp; 3552 3553 /* fd_getvnode() will use the descriptor for us */ 3554 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3555 return (error); 3556 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3557 l, 0); 3558 fd_putfile(SCARG(uap, fd)); 3559 return (error); 3560 } 3561 3562 int 3563 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3564 register_t *retval) 3565 { 3566 /* { 3567 syscallarg(int) fd; 3568 syscallarg(const char *) path; 3569 syscallarg(uid_t) owner; 3570 syscallarg(gid_t) group; 3571 syscallarg(int) flag; 3572 } */ 3573 3574 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3575 SCARG(uap, owner), SCARG(uap, group), 3576 SCARG(uap, flag)); 3577 } 3578 3579 /* 3580 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3581 */ 3582 /* ARGSUSED */ 3583 int 3584 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3585 { 3586 /* { 3587 syscallarg(int) fd; 3588 syscallarg(uid_t) uid; 3589 syscallarg(gid_t) gid; 3590 } */ 3591 int error; 3592 file_t *fp; 3593 3594 /* fd_getvnode() will use the descriptor for us */ 3595 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3596 return (error); 3597 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3598 l, 1); 3599 fd_putfile(SCARG(uap, fd)); 3600 return (error); 3601 } 3602 3603 /* 3604 * Set ownership given a path name; this version does not follow links. 3605 */ 3606 /* ARGSUSED */ 3607 int 3608 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3609 { 3610 /* { 3611 syscallarg(const char *) path; 3612 syscallarg(uid_t) uid; 3613 syscallarg(gid_t) gid; 3614 } */ 3615 int error; 3616 struct vnode *vp; 3617 3618 error = namei_simple_user(SCARG(uap, path), 3619 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3620 if (error != 0) 3621 return (error); 3622 3623 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3624 3625 vrele(vp); 3626 return (error); 3627 } 3628 3629 /* 3630 * Set ownership given a path name; this version does not follow links. 3631 * Provides POSIX/XPG semantics. 3632 */ 3633 /* ARGSUSED */ 3634 int 3635 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3636 { 3637 /* { 3638 syscallarg(const char *) path; 3639 syscallarg(uid_t) uid; 3640 syscallarg(gid_t) gid; 3641 } */ 3642 int error; 3643 struct vnode *vp; 3644 3645 error = namei_simple_user(SCARG(uap, path), 3646 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3647 if (error != 0) 3648 return (error); 3649 3650 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3651 3652 vrele(vp); 3653 return (error); 3654 } 3655 3656 /* 3657 * Common routine to set ownership given a vnode. 3658 */ 3659 static int 3660 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3661 int posix_semantics) 3662 { 3663 struct vattr vattr; 3664 mode_t newmode; 3665 int error; 3666 3667 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3668 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3669 goto out; 3670 3671 #define CHANGED(x) ((int)(x) != -1) 3672 newmode = vattr.va_mode; 3673 if (posix_semantics) { 3674 /* 3675 * POSIX/XPG semantics: if the caller is not the super-user, 3676 * clear set-user-id and set-group-id bits. Both POSIX and 3677 * the XPG consider the behaviour for calls by the super-user 3678 * implementation-defined; we leave the set-user-id and set- 3679 * group-id settings intact in that case. 3680 */ 3681 if (vattr.va_mode & S_ISUID) { 3682 if (kauth_authorize_vnode(l->l_cred, 3683 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3684 newmode &= ~S_ISUID; 3685 } 3686 if (vattr.va_mode & S_ISGID) { 3687 if (kauth_authorize_vnode(l->l_cred, 3688 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3689 newmode &= ~S_ISGID; 3690 } 3691 } else { 3692 /* 3693 * NetBSD semantics: when changing owner and/or group, 3694 * clear the respective bit(s). 3695 */ 3696 if (CHANGED(uid)) 3697 newmode &= ~S_ISUID; 3698 if (CHANGED(gid)) 3699 newmode &= ~S_ISGID; 3700 } 3701 /* Update va_mode iff altered. */ 3702 if (vattr.va_mode == newmode) 3703 newmode = VNOVAL; 3704 3705 vattr_null(&vattr); 3706 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3707 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3708 vattr.va_mode = newmode; 3709 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3710 #undef CHANGED 3711 3712 out: 3713 VOP_UNLOCK(vp); 3714 return (error); 3715 } 3716 3717 /* 3718 * Set the access and modification times given a path name; this 3719 * version follows links. 3720 */ 3721 /* ARGSUSED */ 3722 int 3723 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3724 register_t *retval) 3725 { 3726 /* { 3727 syscallarg(const char *) path; 3728 syscallarg(const struct timeval *) tptr; 3729 } */ 3730 3731 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3732 SCARG(uap, tptr), UIO_USERSPACE); 3733 } 3734 3735 /* 3736 * Set the access and modification times given a file descriptor. 3737 */ 3738 /* ARGSUSED */ 3739 int 3740 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3741 register_t *retval) 3742 { 3743 /* { 3744 syscallarg(int) fd; 3745 syscallarg(const struct timeval *) tptr; 3746 } */ 3747 int error; 3748 file_t *fp; 3749 3750 /* fd_getvnode() will use the descriptor for us */ 3751 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3752 return (error); 3753 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3754 UIO_USERSPACE); 3755 fd_putfile(SCARG(uap, fd)); 3756 return (error); 3757 } 3758 3759 int 3760 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3761 register_t *retval) 3762 { 3763 /* { 3764 syscallarg(int) fd; 3765 syscallarg(const struct timespec *) tptr; 3766 } */ 3767 int error; 3768 file_t *fp; 3769 3770 /* fd_getvnode() will use the descriptor for us */ 3771 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3772 return (error); 3773 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3774 SCARG(uap, tptr), UIO_USERSPACE); 3775 fd_putfile(SCARG(uap, fd)); 3776 return (error); 3777 } 3778 3779 /* 3780 * Set the access and modification times given a path name; this 3781 * version does not follow links. 3782 */ 3783 int 3784 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3785 register_t *retval) 3786 { 3787 /* { 3788 syscallarg(const char *) path; 3789 syscallarg(const struct timeval *) tptr; 3790 } */ 3791 3792 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3793 SCARG(uap, tptr), UIO_USERSPACE); 3794 } 3795 3796 int 3797 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3798 register_t *retval) 3799 { 3800 /* { 3801 syscallarg(int) fd; 3802 syscallarg(const char *) path; 3803 syscallarg(const struct timespec *) tptr; 3804 syscallarg(int) flag; 3805 } */ 3806 int follow; 3807 const struct timespec *tptr; 3808 int error; 3809 3810 tptr = SCARG(uap, tptr); 3811 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3812 3813 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3814 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3815 3816 return error; 3817 } 3818 3819 /* 3820 * Common routine to set access and modification times given a vnode. 3821 */ 3822 int 3823 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3824 const struct timespec *tptr, enum uio_seg seg) 3825 { 3826 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3827 } 3828 3829 int 3830 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3831 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3832 { 3833 struct vattr vattr; 3834 int error, dorele = 0; 3835 namei_simple_flags_t sflags; 3836 bool vanull, setbirthtime; 3837 struct timespec ts[2]; 3838 3839 KASSERT(l != NULL || fdat == AT_FDCWD); 3840 3841 /* 3842 * I have checked all callers and they pass either FOLLOW, 3843 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3844 * is 0. More to the point, they don't pass anything else. 3845 * Let's keep it that way at least until the namei interfaces 3846 * are fully sanitized. 3847 */ 3848 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3849 sflags = (flag == FOLLOW) ? 3850 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3851 3852 if (tptr == NULL) { 3853 vanull = true; 3854 nanotime(&ts[0]); 3855 ts[1] = ts[0]; 3856 } else { 3857 vanull = false; 3858 if (seg != UIO_SYSSPACE) { 3859 error = copyin(tptr, ts, sizeof (ts)); 3860 if (error != 0) 3861 return error; 3862 } else { 3863 ts[0] = tptr[0]; 3864 ts[1] = tptr[1]; 3865 } 3866 } 3867 3868 if (ts[0].tv_nsec == UTIME_NOW) { 3869 nanotime(&ts[0]); 3870 if (ts[1].tv_nsec == UTIME_NOW) { 3871 vanull = true; 3872 ts[1] = ts[0]; 3873 } 3874 } else if (ts[1].tv_nsec == UTIME_NOW) 3875 nanotime(&ts[1]); 3876 3877 if (vp == NULL) { 3878 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3879 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3880 if (error != 0) 3881 return error; 3882 dorele = 1; 3883 } 3884 3885 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3886 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3887 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3888 vattr_null(&vattr); 3889 3890 if (ts[0].tv_nsec != UTIME_OMIT) 3891 vattr.va_atime = ts[0]; 3892 3893 if (ts[1].tv_nsec != UTIME_OMIT) { 3894 vattr.va_mtime = ts[1]; 3895 if (setbirthtime) 3896 vattr.va_birthtime = ts[1]; 3897 } 3898 3899 if (vanull) 3900 vattr.va_vaflags |= VA_UTIMES_NULL; 3901 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3902 VOP_UNLOCK(vp); 3903 3904 if (dorele != 0) 3905 vrele(vp); 3906 3907 return error; 3908 } 3909 3910 int 3911 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3912 const struct timeval *tptr, enum uio_seg seg) 3913 { 3914 struct timespec ts[2]; 3915 struct timespec *tsptr = NULL; 3916 int error; 3917 3918 if (tptr != NULL) { 3919 struct timeval tv[2]; 3920 3921 if (seg != UIO_SYSSPACE) { 3922 error = copyin(tptr, tv, sizeof(tv)); 3923 if (error != 0) 3924 return error; 3925 tptr = tv; 3926 } 3927 3928 if ((tptr[0].tv_usec == UTIME_NOW) || 3929 (tptr[0].tv_usec == UTIME_OMIT)) 3930 ts[0].tv_nsec = tptr[0].tv_usec; 3931 else { 3932 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 3933 return EINVAL; 3934 3935 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3936 } 3937 3938 if ((tptr[1].tv_usec == UTIME_NOW) || 3939 (tptr[1].tv_usec == UTIME_OMIT)) 3940 ts[1].tv_nsec = tptr[1].tv_usec; 3941 else { 3942 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 3943 return EINVAL; 3944 3945 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3946 } 3947 3948 tsptr = &ts[0]; 3949 } 3950 3951 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3952 } 3953 3954 /* 3955 * Truncate a file given its path name. 3956 */ 3957 /* ARGSUSED */ 3958 int 3959 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3960 { 3961 /* { 3962 syscallarg(const char *) path; 3963 syscallarg(int) pad; 3964 syscallarg(off_t) length; 3965 } */ 3966 struct vnode *vp; 3967 struct vattr vattr; 3968 int error; 3969 3970 if (SCARG(uap, length) < 0) 3971 return EINVAL; 3972 3973 error = namei_simple_user(SCARG(uap, path), 3974 NSM_FOLLOW_TRYEMULROOT, &vp); 3975 if (error != 0) 3976 return (error); 3977 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3978 if (vp->v_type == VDIR) 3979 error = EISDIR; 3980 else if ((error = vn_writechk(vp)) == 0 && 3981 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3982 vattr_null(&vattr); 3983 vattr.va_size = SCARG(uap, length); 3984 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3985 } 3986 vput(vp); 3987 return (error); 3988 } 3989 3990 /* 3991 * Truncate a file given a file descriptor. 3992 */ 3993 /* ARGSUSED */ 3994 int 3995 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3996 { 3997 /* { 3998 syscallarg(int) fd; 3999 syscallarg(int) pad; 4000 syscallarg(off_t) length; 4001 } */ 4002 struct vattr vattr; 4003 struct vnode *vp; 4004 file_t *fp; 4005 int error; 4006 4007 if (SCARG(uap, length) < 0) 4008 return EINVAL; 4009 4010 /* fd_getvnode() will use the descriptor for us */ 4011 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4012 return (error); 4013 if ((fp->f_flag & FWRITE) == 0) { 4014 error = EINVAL; 4015 goto out; 4016 } 4017 vp = fp->f_vnode; 4018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4019 if (vp->v_type == VDIR) 4020 error = EISDIR; 4021 else if ((error = vn_writechk(vp)) == 0) { 4022 vattr_null(&vattr); 4023 vattr.va_size = SCARG(uap, length); 4024 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 4025 } 4026 VOP_UNLOCK(vp); 4027 out: 4028 fd_putfile(SCARG(uap, fd)); 4029 return (error); 4030 } 4031 4032 /* 4033 * Sync an open file. 4034 */ 4035 /* ARGSUSED */ 4036 int 4037 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4038 { 4039 /* { 4040 syscallarg(int) fd; 4041 } */ 4042 struct vnode *vp; 4043 file_t *fp; 4044 int error; 4045 4046 /* fd_getvnode() will use the descriptor for us */ 4047 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4048 return (error); 4049 vp = fp->f_vnode; 4050 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4051 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4052 VOP_UNLOCK(vp); 4053 fd_putfile(SCARG(uap, fd)); 4054 return (error); 4055 } 4056 4057 /* 4058 * Sync a range of file data. API modeled after that found in AIX. 4059 * 4060 * FDATASYNC indicates that we need only save enough metadata to be able 4061 * to re-read the written data. Note we duplicate AIX's requirement that 4062 * the file be open for writing. 4063 */ 4064 /* ARGSUSED */ 4065 int 4066 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4067 { 4068 /* { 4069 syscallarg(int) fd; 4070 syscallarg(int) flags; 4071 syscallarg(off_t) start; 4072 syscallarg(off_t) length; 4073 } */ 4074 struct vnode *vp; 4075 file_t *fp; 4076 int flags, nflags; 4077 off_t s, e, len; 4078 int error; 4079 4080 /* fd_getvnode() will use the descriptor for us */ 4081 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4082 return (error); 4083 4084 if ((fp->f_flag & FWRITE) == 0) { 4085 error = EBADF; 4086 goto out; 4087 } 4088 4089 flags = SCARG(uap, flags); 4090 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4091 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4092 error = EINVAL; 4093 goto out; 4094 } 4095 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4096 if (flags & FDATASYNC) 4097 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4098 else 4099 nflags = FSYNC_WAIT; 4100 if (flags & FDISKSYNC) 4101 nflags |= FSYNC_CACHE; 4102 4103 len = SCARG(uap, length); 4104 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4105 if (len) { 4106 s = SCARG(uap, start); 4107 e = s + len; 4108 if (e < s) { 4109 error = EINVAL; 4110 goto out; 4111 } 4112 } else { 4113 e = 0; 4114 s = 0; 4115 } 4116 4117 vp = fp->f_vnode; 4118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4119 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4120 VOP_UNLOCK(vp); 4121 out: 4122 fd_putfile(SCARG(uap, fd)); 4123 return (error); 4124 } 4125 4126 /* 4127 * Sync the data of an open file. 4128 */ 4129 /* ARGSUSED */ 4130 int 4131 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4132 { 4133 /* { 4134 syscallarg(int) fd; 4135 } */ 4136 struct vnode *vp; 4137 file_t *fp; 4138 int error; 4139 4140 /* fd_getvnode() will use the descriptor for us */ 4141 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4142 return (error); 4143 if ((fp->f_flag & FWRITE) == 0) { 4144 fd_putfile(SCARG(uap, fd)); 4145 return (EBADF); 4146 } 4147 vp = fp->f_vnode; 4148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4149 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4150 VOP_UNLOCK(vp); 4151 fd_putfile(SCARG(uap, fd)); 4152 return (error); 4153 } 4154 4155 /* 4156 * Rename files, (standard) BSD semantics frontend. 4157 */ 4158 /* ARGSUSED */ 4159 int 4160 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4161 { 4162 /* { 4163 syscallarg(const char *) from; 4164 syscallarg(const char *) to; 4165 } */ 4166 4167 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4168 SCARG(uap, to), UIO_USERSPACE, 0)); 4169 } 4170 4171 int 4172 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4173 register_t *retval) 4174 { 4175 /* { 4176 syscallarg(int) fromfd; 4177 syscallarg(const char *) from; 4178 syscallarg(int) tofd; 4179 syscallarg(const char *) to; 4180 } */ 4181 4182 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4183 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4184 } 4185 4186 /* 4187 * Rename files, POSIX semantics frontend. 4188 */ 4189 /* ARGSUSED */ 4190 int 4191 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4192 { 4193 /* { 4194 syscallarg(const char *) from; 4195 syscallarg(const char *) to; 4196 } */ 4197 4198 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4199 SCARG(uap, to), UIO_USERSPACE, 1)); 4200 } 4201 4202 /* 4203 * Rename files. Source and destination must either both be directories, 4204 * or both not be directories. If target is a directory, it must be empty. 4205 * If `from' and `to' refer to the same object, the value of the `retain' 4206 * argument is used to determine whether `from' will be 4207 * 4208 * (retain == 0) deleted unless `from' and `to' refer to the same 4209 * object in the file system's name space (BSD). 4210 * (retain == 1) always retained (POSIX). 4211 * 4212 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4213 */ 4214 int 4215 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4216 { 4217 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4218 } 4219 4220 static int 4221 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4222 const char *to, enum uio_seg seg, int retain) 4223 { 4224 struct pathbuf *fpb, *tpb; 4225 struct nameidata fnd, tnd; 4226 struct vnode *fdvp, *fvp; 4227 struct vnode *tdvp, *tvp; 4228 struct mount *mp, *tmp; 4229 int error; 4230 4231 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4232 4233 error = pathbuf_maybe_copyin(from, seg, &fpb); 4234 if (error) 4235 goto out0; 4236 KASSERT(fpb != NULL); 4237 4238 error = pathbuf_maybe_copyin(to, seg, &tpb); 4239 if (error) 4240 goto out1; 4241 KASSERT(tpb != NULL); 4242 4243 /* 4244 * Lookup from. 4245 * 4246 * XXX LOCKPARENT is wrong because we don't actually want it 4247 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4248 * insane, so for the time being we need to leave it like this. 4249 */ 4250 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4251 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4252 goto out2; 4253 4254 /* 4255 * Pull out the important results of the lookup, fdvp and fvp. 4256 * Of course, fvp is bogus because we're about to unlock fdvp. 4257 */ 4258 fdvp = fnd.ni_dvp; 4259 fvp = fnd.ni_vp; 4260 mp = fdvp->v_mount; 4261 KASSERT(fdvp != NULL); 4262 KASSERT(fvp != NULL); 4263 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4264 /* 4265 * Bracket the operation with fstrans_start()/fstrans_done(). 4266 * 4267 * Inside the bracket this file system cannot be unmounted so 4268 * a vnode on this file system cannot change its v_mount. 4269 * A vnode on another file system may still change to dead mount. 4270 */ 4271 fstrans_start(mp); 4272 4273 /* 4274 * Make sure neither fdvp nor fvp is locked. 4275 */ 4276 if (fdvp != fvp) 4277 VOP_UNLOCK(fdvp); 4278 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4279 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4280 4281 /* 4282 * Reject renaming `.' and `..'. Can't do this until after 4283 * namei because we need namei's parsing to find the final 4284 * component name. (namei should just leave us with the final 4285 * component name and not look it up itself, but anyway...) 4286 * 4287 * This was here before because we used to relookup from 4288 * instead of to and relookup requires the caller to check 4289 * this, but now file systems may depend on this check, so we 4290 * must retain it until the file systems are all rototilled. 4291 */ 4292 if (((fnd.ni_cnd.cn_namelen == 1) && 4293 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4294 ((fnd.ni_cnd.cn_namelen == 2) && 4295 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4296 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4297 error = EINVAL; /* XXX EISDIR? */ 4298 goto abort0; 4299 } 4300 4301 /* 4302 * Lookup to. 4303 * 4304 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4305 * fvp here to decide whether to add CREATEDIR is a load of 4306 * bollocks because fvp might be the wrong node by now, since 4307 * fdvp is unlocked. 4308 * 4309 * XXX Why not pass CREATEDIR always? 4310 */ 4311 NDINIT(&tnd, RENAME, 4312 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4313 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4314 tpb); 4315 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4316 goto abort0; 4317 4318 /* 4319 * Pull out the important results of the lookup, tdvp and tvp. 4320 * Of course, tvp is bogus because we're about to unlock tdvp. 4321 */ 4322 tdvp = tnd.ni_dvp; 4323 tvp = tnd.ni_vp; 4324 KASSERT(tdvp != NULL); 4325 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4326 4327 /* 4328 * Make sure neither tdvp nor tvp is locked. 4329 */ 4330 if (tdvp != tvp) 4331 VOP_UNLOCK(tdvp); 4332 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4333 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4334 4335 /* 4336 * Reject renaming onto `.' or `..'. relookup is unhappy with 4337 * these, which is why we must do this here. Once upon a time 4338 * we relooked up from instead of to, and consequently didn't 4339 * need this check, but now that we relookup to instead of 4340 * from, we need this; and we shall need it forever forward 4341 * until the VOP_RENAME protocol changes, because file systems 4342 * will no doubt begin to depend on this check. 4343 */ 4344 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4345 error = EISDIR; 4346 goto abort1; 4347 } 4348 if ((tnd.ni_cnd.cn_namelen == 2) && 4349 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4350 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4351 error = EINVAL; 4352 goto abort1; 4353 } 4354 4355 /* 4356 * Make sure the mount points match. Although we don't hold 4357 * any vnode locks, the v_mount on fdvp file system are stable. 4358 * 4359 * Unmounting another file system at an inopportune moment may 4360 * cause tdvp to disappear and change its v_mount to dead. 4361 * 4362 * So in either case different v_mount means cross-device rename. 4363 */ 4364 KASSERT(mp != NULL); 4365 tmp = tdvp->v_mount; 4366 4367 if (mp != tmp) { 4368 error = EXDEV; 4369 goto abort1; 4370 } 4371 4372 /* 4373 * Take the vfs rename lock to avoid cross-directory screw cases. 4374 * Nothing is locked currently, so taking this lock is safe. 4375 */ 4376 error = VFS_RENAMELOCK_ENTER(mp); 4377 if (error) 4378 goto abort1; 4379 4380 /* 4381 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4382 * and nothing is locked except for the vfs rename lock. 4383 * 4384 * The next step is a little rain dance to conform to the 4385 * insane lock protocol, even though it does nothing to ward 4386 * off race conditions. 4387 * 4388 * We need tdvp and tvp to be locked. However, because we have 4389 * unlocked tdvp in order to hold no locks while we take the 4390 * vfs rename lock, tvp may be wrong here, and we can't safely 4391 * lock it even if the sensible file systems will just unlock 4392 * it straight away. Consequently, we must lock tdvp and then 4393 * relookup tvp to get it locked. 4394 * 4395 * Finally, because the VOP_RENAME protocol is brain-damaged 4396 * and various file systems insanely depend on the semantics of 4397 * this brain damage, the lookup of to must be the last lookup 4398 * before VOP_RENAME. 4399 */ 4400 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4401 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4402 if (error) 4403 goto abort2; 4404 4405 /* 4406 * Drop the old tvp and pick up the new one -- which might be 4407 * the same, but that doesn't matter to us. After this, tdvp 4408 * and tvp should both be locked. 4409 */ 4410 if (tvp != NULL) 4411 vrele(tvp); 4412 tvp = tnd.ni_vp; 4413 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4414 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4415 4416 /* 4417 * The old do_sys_rename had various consistency checks here 4418 * involving fvp and tvp. fvp is bogus already here, and tvp 4419 * will become bogus soon in any sensible file system, so the 4420 * only purpose in putting these checks here is to give lip 4421 * service to these screw cases and to acknowledge that they 4422 * exist, not actually to handle them, but here you go 4423 * anyway... 4424 */ 4425 4426 /* 4427 * Acknowledge that directories and non-directories aren't 4428 * suposed to mix. 4429 */ 4430 if (tvp != NULL) { 4431 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4432 error = ENOTDIR; 4433 goto abort3; 4434 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4435 error = EISDIR; 4436 goto abort3; 4437 } 4438 } 4439 4440 /* 4441 * Acknowledge some random screw case, among the dozens that 4442 * might arise. 4443 */ 4444 if (fvp == tdvp) { 4445 error = EINVAL; 4446 goto abort3; 4447 } 4448 4449 /* 4450 * Acknowledge that POSIX has a wacky screw case. 4451 * 4452 * XXX Eventually the retain flag needs to be passed on to 4453 * VOP_RENAME. 4454 */ 4455 if (fvp == tvp) { 4456 if (retain) { 4457 error = 0; 4458 goto abort3; 4459 } else if ((fdvp == tdvp) && 4460 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4461 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4462 fnd.ni_cnd.cn_namelen))) { 4463 error = 0; 4464 goto abort3; 4465 } 4466 } 4467 4468 /* 4469 * Make sure veriexec can screw us up. (But a race can screw 4470 * up veriexec, of course -- remember, fvp and (soon) tvp are 4471 * bogus.) 4472 */ 4473 #if NVERIEXEC > 0 4474 { 4475 char *f1, *f2; 4476 size_t f1_len; 4477 size_t f2_len; 4478 4479 f1_len = fnd.ni_cnd.cn_namelen + 1; 4480 f1 = kmem_alloc(f1_len, KM_SLEEP); 4481 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4482 4483 f2_len = tnd.ni_cnd.cn_namelen + 1; 4484 f2 = kmem_alloc(f2_len, KM_SLEEP); 4485 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4486 4487 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4488 4489 kmem_free(f1, f1_len); 4490 kmem_free(f2, f2_len); 4491 4492 if (error) 4493 goto abort3; 4494 } 4495 #endif /* NVERIEXEC > 0 */ 4496 4497 /* 4498 * All ready. Incant the rename vop. 4499 */ 4500 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4501 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4502 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4503 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4504 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4505 4506 /* 4507 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4508 * tdvp and tvp. But we can't assert any of that. 4509 */ 4510 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4511 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4512 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4513 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4514 4515 /* 4516 * So all we have left to do is to drop the rename lock and 4517 * destroy the pathbufs. 4518 */ 4519 VFS_RENAMELOCK_EXIT(mp); 4520 fstrans_done(mp); 4521 goto out2; 4522 4523 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4524 VOP_UNLOCK(tvp); 4525 abort2: VOP_UNLOCK(tdvp); 4526 VFS_RENAMELOCK_EXIT(mp); 4527 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4528 vrele(tdvp); 4529 if (tvp != NULL) 4530 vrele(tvp); 4531 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4532 vrele(fdvp); 4533 vrele(fvp); 4534 fstrans_done(mp); 4535 out2: pathbuf_destroy(tpb); 4536 out1: pathbuf_destroy(fpb); 4537 out0: return error; 4538 } 4539 4540 /* 4541 * Make a directory file. 4542 */ 4543 /* ARGSUSED */ 4544 int 4545 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4546 { 4547 /* { 4548 syscallarg(const char *) path; 4549 syscallarg(int) mode; 4550 } */ 4551 4552 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4553 SCARG(uap, mode), UIO_USERSPACE); 4554 } 4555 4556 int 4557 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4558 register_t *retval) 4559 { 4560 /* { 4561 syscallarg(int) fd; 4562 syscallarg(const char *) path; 4563 syscallarg(int) mode; 4564 } */ 4565 4566 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4567 SCARG(uap, mode), UIO_USERSPACE); 4568 } 4569 4570 4571 int 4572 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4573 { 4574 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4575 } 4576 4577 static int 4578 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4579 enum uio_seg seg) 4580 { 4581 struct proc *p = curlwp->l_proc; 4582 struct vnode *vp; 4583 struct vattr vattr; 4584 int error; 4585 struct pathbuf *pb; 4586 struct nameidata nd; 4587 4588 KASSERT(l != NULL || fdat == AT_FDCWD); 4589 4590 /* XXX bollocks, should pass in a pathbuf */ 4591 error = pathbuf_maybe_copyin(path, seg, &pb); 4592 if (error) { 4593 return error; 4594 } 4595 4596 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4597 4598 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4599 pathbuf_destroy(pb); 4600 return (error); 4601 } 4602 vp = nd.ni_vp; 4603 if (vp != NULL) { 4604 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4605 if (nd.ni_dvp == vp) 4606 vrele(nd.ni_dvp); 4607 else 4608 vput(nd.ni_dvp); 4609 vrele(vp); 4610 pathbuf_destroy(pb); 4611 return (EEXIST); 4612 } 4613 vattr_null(&vattr); 4614 vattr.va_type = VDIR; 4615 /* We will read cwdi->cwdi_cmask unlocked. */ 4616 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4617 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4618 if (!error) 4619 vrele(nd.ni_vp); 4620 vput(nd.ni_dvp); 4621 pathbuf_destroy(pb); 4622 return (error); 4623 } 4624 4625 /* 4626 * Remove a directory file. 4627 */ 4628 /* ARGSUSED */ 4629 int 4630 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4631 { 4632 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4633 AT_REMOVEDIR, UIO_USERSPACE); 4634 } 4635 4636 /* 4637 * Read a block of directory entries in a file system independent format. 4638 */ 4639 int 4640 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4641 { 4642 /* { 4643 syscallarg(int) fd; 4644 syscallarg(char *) buf; 4645 syscallarg(size_t) count; 4646 } */ 4647 file_t *fp; 4648 int error, done; 4649 4650 /* fd_getvnode() will use the descriptor for us */ 4651 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4652 return (error); 4653 if ((fp->f_flag & FREAD) == 0) { 4654 error = EBADF; 4655 goto out; 4656 } 4657 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4658 SCARG(uap, count), &done, l, 0, 0); 4659 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4660 *retval = done; 4661 out: 4662 fd_putfile(SCARG(uap, fd)); 4663 return (error); 4664 } 4665 4666 /* 4667 * Set the mode mask for creation of filesystem nodes. 4668 */ 4669 int 4670 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4671 { 4672 /* { 4673 syscallarg(mode_t) newmask; 4674 } */ 4675 4676 /* 4677 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4678 * serialization with those reads is required. It's important to 4679 * return a coherent answer for the caller of umask() though, and 4680 * the atomic operation accomplishes that. 4681 */ 4682 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4683 SCARG(uap, newmask) & ALLPERMS); 4684 4685 return (0); 4686 } 4687 4688 int 4689 dorevoke(struct vnode *vp, kauth_cred_t cred) 4690 { 4691 struct vattr vattr; 4692 int error, fs_decision; 4693 4694 vn_lock(vp, LK_SHARED | LK_RETRY); 4695 error = VOP_GETATTR(vp, &vattr, cred); 4696 VOP_UNLOCK(vp); 4697 if (error != 0) 4698 return error; 4699 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4700 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4701 fs_decision); 4702 if (!error) 4703 VOP_REVOKE(vp, REVOKEALL); 4704 return (error); 4705 } 4706 4707 /* 4708 * Void all references to file by ripping underlying filesystem 4709 * away from vnode. 4710 */ 4711 /* ARGSUSED */ 4712 int 4713 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4714 { 4715 /* { 4716 syscallarg(const char *) path; 4717 } */ 4718 struct vnode *vp; 4719 int error; 4720 4721 error = namei_simple_user(SCARG(uap, path), 4722 NSM_FOLLOW_TRYEMULROOT, &vp); 4723 if (error != 0) 4724 return (error); 4725 error = dorevoke(vp, l->l_cred); 4726 vrele(vp); 4727 return (error); 4728 } 4729 4730 /* 4731 * Allocate backing store for a file, filling a hole without having to 4732 * explicitly write anything out. 4733 */ 4734 /* ARGSUSED */ 4735 int 4736 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4737 register_t *retval) 4738 { 4739 /* { 4740 syscallarg(int) fd; 4741 syscallarg(off_t) pos; 4742 syscallarg(off_t) len; 4743 } */ 4744 int fd; 4745 off_t pos, len; 4746 struct file *fp; 4747 struct vnode *vp; 4748 int error; 4749 4750 fd = SCARG(uap, fd); 4751 pos = SCARG(uap, pos); 4752 len = SCARG(uap, len); 4753 4754 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4755 *retval = EINVAL; 4756 return 0; 4757 } 4758 4759 error = fd_getvnode(fd, &fp); 4760 if (error) { 4761 *retval = error; 4762 return 0; 4763 } 4764 if ((fp->f_flag & FWRITE) == 0) { 4765 error = EBADF; 4766 goto fail; 4767 } 4768 vp = fp->f_vnode; 4769 4770 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4771 if (vp->v_type == VDIR) { 4772 error = EISDIR; 4773 } else { 4774 error = VOP_FALLOCATE(vp, pos, len); 4775 } 4776 VOP_UNLOCK(vp); 4777 4778 fail: 4779 fd_putfile(fd); 4780 *retval = error; 4781 return 0; 4782 } 4783 4784 /* 4785 * Deallocate backing store for a file, creating a hole. Also used for 4786 * invoking TRIM on disks. 4787 */ 4788 /* ARGSUSED */ 4789 int 4790 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4791 register_t *retval) 4792 { 4793 /* { 4794 syscallarg(int) fd; 4795 syscallarg(off_t) pos; 4796 syscallarg(off_t) len; 4797 } */ 4798 int fd; 4799 off_t pos, len; 4800 struct file *fp; 4801 struct vnode *vp; 4802 int error; 4803 4804 fd = SCARG(uap, fd); 4805 pos = SCARG(uap, pos); 4806 len = SCARG(uap, len); 4807 4808 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4809 return EINVAL; 4810 } 4811 4812 error = fd_getvnode(fd, &fp); 4813 if (error) { 4814 return error; 4815 } 4816 if ((fp->f_flag & FWRITE) == 0) { 4817 error = EBADF; 4818 goto fail; 4819 } 4820 vp = fp->f_vnode; 4821 4822 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4823 if (vp->v_type == VDIR) { 4824 error = EISDIR; 4825 } else { 4826 error = VOP_FDISCARD(vp, pos, len); 4827 } 4828 VOP_UNLOCK(vp); 4829 4830 fail: 4831 fd_putfile(fd); 4832 return error; 4833 } 4834