1 /* $NetBSD: vfs_syscalls.c,v 1.544 2020/03/25 18:08:34 gdt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.544 2020/03/25 18:08:34 gdt Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 static int 169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 170 { 171 file_t *dfp; 172 int error; 173 174 if (fdat != AT_FDCWD) { 175 if ((error = fd_getvnode(fdat, &dfp)) != 0) 176 goto out; 177 178 NDAT(ndp, dfp->f_vnode); 179 } 180 181 error = namei(ndp); 182 183 if (fdat != AT_FDCWD) 184 fd_putfile(fdat); 185 out: 186 return error; 187 } 188 189 static int 190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 191 namei_simple_flags_t sflags, struct vnode **vp_ret) 192 { 193 file_t *dfp; 194 struct vnode *dvp; 195 int error; 196 197 if (fdat != AT_FDCWD) { 198 if ((error = fd_getvnode(fdat, &dfp)) != 0) 199 goto out; 200 201 dvp = dfp->f_vnode; 202 } else { 203 dvp = NULL; 204 } 205 206 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 207 208 if (fdat != AT_FDCWD) 209 fd_putfile(fdat); 210 out: 211 return error; 212 } 213 214 static int 215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 216 { 217 int error; 218 219 fp->f_flag = flags & FMASK; 220 fp->f_type = DTYPE_VNODE; 221 fp->f_ops = &vnops; 222 fp->f_vnode = vp; 223 224 if (flags & (O_EXLOCK | O_SHLOCK)) { 225 struct flock lf; 226 int type; 227 228 lf.l_whence = SEEK_SET; 229 lf.l_start = 0; 230 lf.l_len = 0; 231 if (flags & O_EXLOCK) 232 lf.l_type = F_WRLCK; 233 else 234 lf.l_type = F_RDLCK; 235 type = F_FLOCK; 236 if ((flags & FNONBLOCK) == 0) 237 type |= F_WAIT; 238 VOP_UNLOCK(vp); 239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 240 if (error) { 241 (void) vn_close(vp, fp->f_flag, fp->f_cred); 242 fd_abort(l->l_proc, fp, indx); 243 return error; 244 } 245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 246 atomic_or_uint(&fp->f_flag, FHASLOCK); 247 } 248 if (flags & O_CLOEXEC) 249 fd_set_exclose(l, indx, true); 250 return 0; 251 } 252 253 static int 254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 255 void *data, size_t *data_len) 256 { 257 struct mount *mp; 258 int error = 0, saved_flags; 259 260 mp = vp->v_mount; 261 saved_flags = mp->mnt_flag; 262 263 /* We can operate only on VV_ROOT nodes. */ 264 if ((vp->v_vflag & VV_ROOT) == 0) { 265 error = EINVAL; 266 goto out; 267 } 268 269 /* 270 * We only allow the filesystem to be reloaded if it 271 * is currently mounted read-only. Additionally, we 272 * prevent read-write to read-only downgrades. 273 */ 274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 275 (mp->mnt_flag & MNT_RDONLY) == 0 && 276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 277 error = EOPNOTSUPP; /* Needs translation */ 278 goto out; 279 } 280 281 /* 282 * Enabling MNT_UNION requires a covered mountpoint and 283 * must not happen on the root mount. 284 */ 285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 286 error = EOPNOTSUPP; 287 goto out; 288 } 289 290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 292 if (error) 293 goto out; 294 295 error = vfs_suspend(mp, 0); 296 if (error) 297 goto out; 298 299 mutex_enter(mp->mnt_updating); 300 301 mp->mnt_flag &= ~MNT_OP_FLAGS; 302 mp->mnt_flag |= flags & MNT_OP_FLAGS; 303 304 /* 305 * Set the mount level flags. 306 */ 307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 308 if ((flags & MNT_RDONLY)) 309 mp->mnt_iflag |= IMNT_WANTRDONLY; 310 else 311 mp->mnt_iflag |= IMNT_WANTRDWR; 312 } 313 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 315 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 316 mp->mnt_flag &= ~MNT_RDONLY; 317 318 error = VFS_MOUNT(mp, path, data, data_len); 319 320 if (error && data != NULL) { 321 int error2; 322 323 /* 324 * Update failed; let's try and see if it was an 325 * export request. For compat with 3.0 and earlier. 326 */ 327 error2 = vfs_hooks_reexport(mp, path, data); 328 329 /* 330 * Only update error code if the export request was 331 * understood but some problem occurred while 332 * processing it. 333 */ 334 if (error2 != EJUSTRETURN) 335 error = error2; 336 } 337 338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 339 mp->mnt_flag |= MNT_RDONLY; 340 if (error) 341 mp->mnt_flag = saved_flags; 342 mp->mnt_flag &= ~MNT_OP_FLAGS; 343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 346 vfs_syncer_add_to_worklist(mp); 347 } else { 348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 349 vfs_syncer_remove_from_worklist(mp); 350 } 351 mutex_exit(mp->mnt_updating); 352 vfs_resume(mp); 353 354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 355 (flags & MNT_EXTATTR)) { 356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 357 NULL, 0, NULL) != 0) { 358 printf("%s: failed to start extattr, error = %d", 359 mp->mnt_stat.f_mntonname, error); 360 mp->mnt_flag &= ~MNT_EXTATTR; 361 } 362 } 363 364 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 365 !(flags & MNT_EXTATTR)) { 366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 367 NULL, 0, NULL) != 0) { 368 printf("%s: failed to stop extattr, error = %d", 369 mp->mnt_stat.f_mntonname, error); 370 mp->mnt_flag |= MNT_RDONLY; 371 } 372 } 373 out: 374 return (error); 375 } 376 377 static int 378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 379 struct vfsops **vfsops) 380 { 381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 382 int error; 383 384 if (type_seg == UIO_USERSPACE) { 385 /* Copy file-system type from userspace. */ 386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 387 } else { 388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 389 KASSERT(error == 0); 390 } 391 392 if (error) { 393 /* 394 * Historically, filesystem types were identified by numbers. 395 * If we get an integer for the filesystem type instead of a 396 * string, we check to see if it matches one of the historic 397 * filesystem types. 398 */ 399 u_long fsindex = (u_long)fstype; 400 if (fsindex >= nmountcompatnames || 401 mountcompatnames[fsindex] == NULL) 402 return ENODEV; 403 strlcpy(fstypename, mountcompatnames[fsindex], 404 sizeof(fstypename)); 405 } 406 407 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 408 if (strcmp(fstypename, "ufs") == 0) 409 fstypename[0] = 'f'; 410 411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 412 return 0; 413 414 /* If we can autoload a vfs module, try again */ 415 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 416 417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 418 return 0; 419 420 return ENODEV; 421 } 422 423 static int 424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 425 void *data, size_t *data_len) 426 { 427 struct mount *mp; 428 int error; 429 430 /* If MNT_GETARGS is specified, it should be the only flag. */ 431 if (flags & ~MNT_GETARGS) 432 return EINVAL; 433 434 mp = vp->v_mount; 435 436 /* XXX: probably some notion of "can see" here if we want isolation. */ 437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 439 if (error) 440 return error; 441 442 if ((vp->v_vflag & VV_ROOT) == 0) 443 return EINVAL; 444 445 if (vfs_busy(mp)) 446 return EPERM; 447 448 mutex_enter(mp->mnt_updating); 449 mp->mnt_flag &= ~MNT_OP_FLAGS; 450 mp->mnt_flag |= MNT_GETARGS; 451 error = VFS_MOUNT(mp, path, data, data_len); 452 mp->mnt_flag &= ~MNT_OP_FLAGS; 453 mutex_exit(mp->mnt_updating); 454 455 vfs_unbusy(mp); 456 return (error); 457 } 458 459 int 460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 461 { 462 /* { 463 syscallarg(const char *) type; 464 syscallarg(const char *) path; 465 syscallarg(int) flags; 466 syscallarg(void *) data; 467 syscallarg(size_t) data_len; 468 } */ 469 470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 472 SCARG(uap, data_len), retval); 473 } 474 475 int 476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 477 const char *path, int flags, void *data, enum uio_seg data_seg, 478 size_t data_len, register_t *retval) 479 { 480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 481 struct vnode *vp; 482 void *data_buf = data; 483 bool vfsopsrele = false; 484 size_t alloc_sz = 0; 485 int error; 486 487 /* 488 * Get vnode to be covered 489 */ 490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 491 if (error != 0) { 492 vp = NULL; 493 goto done; 494 } 495 496 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 497 vfsops = vp->v_mount->mnt_op; 498 } else { 499 /* 'type' is userspace */ 500 error = mount_get_vfsops(type, type_seg, &vfsops); 501 if (error != 0) 502 goto done; 503 vfsopsrele = true; 504 } 505 506 /* 507 * We allow data to be NULL, even for userspace. Some fs's don't need 508 * it. The others will handle NULL. 509 */ 510 if (data != NULL && data_seg == UIO_USERSPACE) { 511 if (data_len == 0) { 512 /* No length supplied, use default for filesystem */ 513 data_len = vfsops->vfs_min_mount_data; 514 515 /* 516 * Hopefully a longer buffer won't make copyin() fail. 517 * For compatibility with 3.0 and earlier. 518 */ 519 if (flags & MNT_UPDATE 520 && data_len < sizeof (struct mnt_export_args30)) 521 data_len = sizeof (struct mnt_export_args30); 522 } 523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 524 error = EINVAL; 525 goto done; 526 } 527 alloc_sz = data_len; 528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 529 530 /* NFS needs the buffer even for mnt_getargs .... */ 531 error = copyin(data, data_buf, data_len); 532 if (error != 0) 533 goto done; 534 } 535 536 if (flags & MNT_GETARGS) { 537 if (data_len == 0) { 538 error = EINVAL; 539 goto done; 540 } 541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 542 if (error != 0) 543 goto done; 544 if (data_seg == UIO_USERSPACE) 545 error = copyout(data_buf, data, data_len); 546 *retval = data_len; 547 } else if (flags & MNT_UPDATE) { 548 error = mount_update(l, vp, path, flags, data_buf, &data_len); 549 } else { 550 /* Locking is handled internally in mount_domount(). */ 551 KASSERT(vfsopsrele == true); 552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 553 &data_len); 554 vfsopsrele = false; 555 } 556 if (!error) 557 KNOTE(&fs_klist, VQ_MOUNT); 558 559 done: 560 if (vfsopsrele) 561 vfs_delref(vfsops); 562 if (vp != NULL) { 563 vrele(vp); 564 } 565 if (data_buf != data) 566 kmem_free(data_buf, alloc_sz); 567 return (error); 568 } 569 570 /* 571 * Unmount a file system. 572 * 573 * Note: unmount takes a path to the vnode mounted on as argument, 574 * not special file (as before). 575 */ 576 /* ARGSUSED */ 577 int 578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 579 { 580 /* { 581 syscallarg(const char *) path; 582 syscallarg(int) flags; 583 } */ 584 struct vnode *vp; 585 struct mount *mp; 586 int error; 587 struct pathbuf *pb; 588 struct nameidata nd; 589 590 error = pathbuf_copyin(SCARG(uap, path), &pb); 591 if (error) { 592 return error; 593 } 594 595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 596 if ((error = namei(&nd)) != 0) { 597 pathbuf_destroy(pb); 598 return error; 599 } 600 vp = nd.ni_vp; 601 pathbuf_destroy(pb); 602 603 mp = vp->v_mount; 604 vfs_ref(mp); 605 VOP_UNLOCK(vp); 606 607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 609 if (error) { 610 vrele(vp); 611 vfs_rele(mp); 612 return (error); 613 } 614 615 /* 616 * Don't allow unmounting the root file system. 617 */ 618 if (mp->mnt_flag & MNT_ROOTFS) { 619 vrele(vp); 620 vfs_rele(mp); 621 return (EINVAL); 622 } 623 624 /* 625 * Must be the root of the filesystem 626 */ 627 if ((vp->v_vflag & VV_ROOT) == 0) { 628 vrele(vp); 629 vfs_rele(mp); 630 return (EINVAL); 631 } 632 633 vrele(vp); 634 error = dounmount(mp, SCARG(uap, flags), l); 635 vfs_rele(mp); 636 if (!error) 637 KNOTE(&fs_klist, VQ_UNMOUNT); 638 return error; 639 } 640 641 /* 642 * Sync each mounted filesystem. 643 */ 644 #ifdef DEBUG 645 int syncprt = 0; 646 struct ctldebug debug0 = { "syncprt", &syncprt }; 647 #endif 648 649 void 650 do_sys_sync(struct lwp *l) 651 { 652 mount_iterator_t *iter; 653 struct mount *mp; 654 int asyncflag; 655 656 mountlist_iterator_init(&iter); 657 while ((mp = mountlist_iterator_next(iter)) != NULL) { 658 mutex_enter(mp->mnt_updating); 659 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 660 asyncflag = mp->mnt_flag & MNT_ASYNC; 661 mp->mnt_flag &= ~MNT_ASYNC; 662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 663 if (asyncflag) 664 mp->mnt_flag |= MNT_ASYNC; 665 } 666 mutex_exit(mp->mnt_updating); 667 } 668 mountlist_iterator_destroy(iter); 669 #ifdef DEBUG 670 if (syncprt) 671 vfs_bufstats(); 672 #endif /* DEBUG */ 673 } 674 675 /* ARGSUSED */ 676 int 677 sys_sync(struct lwp *l, const void *v, register_t *retval) 678 { 679 do_sys_sync(l); 680 return (0); 681 } 682 683 684 /* 685 * Access or change filesystem quotas. 686 * 687 * (this is really 14 different calls bundled into one) 688 */ 689 690 static int 691 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 692 { 693 struct quotastat info_k; 694 int error; 695 696 /* ensure any padding bytes are cleared */ 697 memset(&info_k, 0, sizeof(info_k)); 698 699 error = vfs_quotactl_stat(mp, &info_k); 700 if (error) { 701 return error; 702 } 703 704 return copyout(&info_k, info_u, sizeof(info_k)); 705 } 706 707 static int 708 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 709 struct quotaidtypestat *info_u) 710 { 711 struct quotaidtypestat info_k; 712 int error; 713 714 /* ensure any padding bytes are cleared */ 715 memset(&info_k, 0, sizeof(info_k)); 716 717 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 718 if (error) { 719 return error; 720 } 721 722 return copyout(&info_k, info_u, sizeof(info_k)); 723 } 724 725 static int 726 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 727 struct quotaobjtypestat *info_u) 728 { 729 struct quotaobjtypestat info_k; 730 int error; 731 732 /* ensure any padding bytes are cleared */ 733 memset(&info_k, 0, sizeof(info_k)); 734 735 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 736 if (error) { 737 return error; 738 } 739 740 return copyout(&info_k, info_u, sizeof(info_k)); 741 } 742 743 static int 744 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 745 struct quotaval *val_u) 746 { 747 struct quotakey key_k; 748 struct quotaval val_k; 749 int error; 750 751 /* ensure any padding bytes are cleared */ 752 memset(&val_k, 0, sizeof(val_k)); 753 754 error = copyin(key_u, &key_k, sizeof(key_k)); 755 if (error) { 756 return error; 757 } 758 759 error = vfs_quotactl_get(mp, &key_k, &val_k); 760 if (error) { 761 return error; 762 } 763 764 return copyout(&val_k, val_u, sizeof(val_k)); 765 } 766 767 static int 768 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 769 const struct quotaval *val_u) 770 { 771 struct quotakey key_k; 772 struct quotaval val_k; 773 int error; 774 775 error = copyin(key_u, &key_k, sizeof(key_k)); 776 if (error) { 777 return error; 778 } 779 780 error = copyin(val_u, &val_k, sizeof(val_k)); 781 if (error) { 782 return error; 783 } 784 785 return vfs_quotactl_put(mp, &key_k, &val_k); 786 } 787 788 static int 789 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 790 { 791 struct quotakey key_k; 792 int error; 793 794 error = copyin(key_u, &key_k, sizeof(key_k)); 795 if (error) { 796 return error; 797 } 798 799 return vfs_quotactl_del(mp, &key_k); 800 } 801 802 static int 803 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 804 { 805 struct quotakcursor cursor_k; 806 int error; 807 808 /* ensure any padding bytes are cleared */ 809 memset(&cursor_k, 0, sizeof(cursor_k)); 810 811 error = vfs_quotactl_cursoropen(mp, &cursor_k); 812 if (error) { 813 return error; 814 } 815 816 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 817 } 818 819 static int 820 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 821 { 822 struct quotakcursor cursor_k; 823 int error; 824 825 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 826 if (error) { 827 return error; 828 } 829 830 return vfs_quotactl_cursorclose(mp, &cursor_k); 831 } 832 833 static int 834 do_sys_quotactl_cursorskipidtype(struct mount *mp, 835 struct quotakcursor *cursor_u, int idtype) 836 { 837 struct quotakcursor cursor_k; 838 int error; 839 840 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 841 if (error) { 842 return error; 843 } 844 845 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 846 if (error) { 847 return error; 848 } 849 850 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 851 } 852 853 static int 854 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 855 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 856 unsigned *ret_u) 857 { 858 #define CGET_STACK_MAX 8 859 struct quotakcursor cursor_k; 860 struct quotakey stackkeys[CGET_STACK_MAX]; 861 struct quotaval stackvals[CGET_STACK_MAX]; 862 struct quotakey *keys_k; 863 struct quotaval *vals_k; 864 unsigned ret_k; 865 int error; 866 867 if (maxnum > 128) { 868 maxnum = 128; 869 } 870 871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 872 if (error) { 873 return error; 874 } 875 876 if (maxnum <= CGET_STACK_MAX) { 877 keys_k = stackkeys; 878 vals_k = stackvals; 879 /* ensure any padding bytes are cleared */ 880 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 881 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 882 } else { 883 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 884 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 885 } 886 887 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 888 &ret_k); 889 if (error) { 890 goto fail; 891 } 892 893 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 894 if (error) { 895 goto fail; 896 } 897 898 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 899 if (error) { 900 goto fail; 901 } 902 903 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 904 if (error) { 905 goto fail; 906 } 907 908 /* do last to maximize the chance of being able to recover a failure */ 909 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 910 911 fail: 912 if (keys_k != stackkeys) { 913 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 914 } 915 if (vals_k != stackvals) { 916 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 917 } 918 return error; 919 } 920 921 static int 922 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 923 int *ret_u) 924 { 925 struct quotakcursor cursor_k; 926 int ret_k; 927 int error; 928 929 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 930 if (error) { 931 return error; 932 } 933 934 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 935 if (error) { 936 return error; 937 } 938 939 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 940 if (error) { 941 return error; 942 } 943 944 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 945 } 946 947 static int 948 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 949 { 950 struct quotakcursor cursor_k; 951 int error; 952 953 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 954 if (error) { 955 return error; 956 } 957 958 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 959 if (error) { 960 return error; 961 } 962 963 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 964 } 965 966 static int 967 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 968 { 969 char *path_k; 970 int error; 971 972 /* XXX this should probably be a struct pathbuf */ 973 path_k = PNBUF_GET(); 974 error = copyin(path_u, path_k, PATH_MAX); 975 if (error) { 976 PNBUF_PUT(path_k); 977 return error; 978 } 979 980 error = vfs_quotactl_quotaon(mp, idtype, path_k); 981 982 PNBUF_PUT(path_k); 983 return error; 984 } 985 986 static int 987 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 988 { 989 return vfs_quotactl_quotaoff(mp, idtype); 990 } 991 992 int 993 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 994 { 995 struct mount *mp; 996 struct vnode *vp; 997 int error; 998 999 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1000 if (error != 0) 1001 return (error); 1002 mp = vp->v_mount; 1003 1004 switch (args->qc_op) { 1005 case QUOTACTL_STAT: 1006 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1007 break; 1008 case QUOTACTL_IDTYPESTAT: 1009 error = do_sys_quotactl_idtypestat(mp, 1010 args->u.idtypestat.qc_idtype, 1011 args->u.idtypestat.qc_info); 1012 break; 1013 case QUOTACTL_OBJTYPESTAT: 1014 error = do_sys_quotactl_objtypestat(mp, 1015 args->u.objtypestat.qc_objtype, 1016 args->u.objtypestat.qc_info); 1017 break; 1018 case QUOTACTL_GET: 1019 error = do_sys_quotactl_get(mp, 1020 args->u.get.qc_key, 1021 args->u.get.qc_val); 1022 break; 1023 case QUOTACTL_PUT: 1024 error = do_sys_quotactl_put(mp, 1025 args->u.put.qc_key, 1026 args->u.put.qc_val); 1027 break; 1028 case QUOTACTL_DEL: 1029 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1030 break; 1031 case QUOTACTL_CURSOROPEN: 1032 error = do_sys_quotactl_cursoropen(mp, 1033 args->u.cursoropen.qc_cursor); 1034 break; 1035 case QUOTACTL_CURSORCLOSE: 1036 error = do_sys_quotactl_cursorclose(mp, 1037 args->u.cursorclose.qc_cursor); 1038 break; 1039 case QUOTACTL_CURSORSKIPIDTYPE: 1040 error = do_sys_quotactl_cursorskipidtype(mp, 1041 args->u.cursorskipidtype.qc_cursor, 1042 args->u.cursorskipidtype.qc_idtype); 1043 break; 1044 case QUOTACTL_CURSORGET: 1045 error = do_sys_quotactl_cursorget(mp, 1046 args->u.cursorget.qc_cursor, 1047 args->u.cursorget.qc_keys, 1048 args->u.cursorget.qc_vals, 1049 args->u.cursorget.qc_maxnum, 1050 args->u.cursorget.qc_ret); 1051 break; 1052 case QUOTACTL_CURSORATEND: 1053 error = do_sys_quotactl_cursoratend(mp, 1054 args->u.cursoratend.qc_cursor, 1055 args->u.cursoratend.qc_ret); 1056 break; 1057 case QUOTACTL_CURSORREWIND: 1058 error = do_sys_quotactl_cursorrewind(mp, 1059 args->u.cursorrewind.qc_cursor); 1060 break; 1061 case QUOTACTL_QUOTAON: 1062 error = do_sys_quotactl_quotaon(mp, 1063 args->u.quotaon.qc_idtype, 1064 args->u.quotaon.qc_quotafile); 1065 break; 1066 case QUOTACTL_QUOTAOFF: 1067 error = do_sys_quotactl_quotaoff(mp, 1068 args->u.quotaoff.qc_idtype); 1069 break; 1070 default: 1071 error = EINVAL; 1072 break; 1073 } 1074 1075 vrele(vp); 1076 return error; 1077 } 1078 1079 /* ARGSUSED */ 1080 int 1081 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1082 register_t *retval) 1083 { 1084 /* { 1085 syscallarg(const char *) path; 1086 syscallarg(struct quotactl_args *) args; 1087 } */ 1088 struct quotactl_args args; 1089 int error; 1090 1091 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1092 if (error) { 1093 return error; 1094 } 1095 1096 return do_sys_quotactl(SCARG(uap, path), &args); 1097 } 1098 1099 int 1100 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1101 int root) 1102 { 1103 struct vnode *rvp; 1104 int error = 0; 1105 1106 /* 1107 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1108 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1109 * overrides MNT_NOWAIT. 1110 */ 1111 KASSERT(l == curlwp); 1112 rvp = cwdrdir(); 1113 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1114 (flags != MNT_WAIT && flags != 0)) { 1115 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1116 } else { 1117 /* Get the filesystem stats now */ 1118 memset(sp, 0, sizeof(*sp)); 1119 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1120 if (rvp) 1121 vrele(rvp); 1122 return error; 1123 } 1124 if (rvp == NULL) 1125 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1126 } 1127 1128 if (rvp != NULL) { 1129 size_t len; 1130 char *bp; 1131 char c; 1132 char *path = PNBUF_GET(); 1133 1134 bp = path + MAXPATHLEN; 1135 *--bp = '\0'; 1136 error = getcwd_common(rvp, rootvnode, &bp, path, 1137 MAXPATHLEN / 2, 0, l); 1138 if (error) { 1139 PNBUF_PUT(path); 1140 vrele(rvp); 1141 return error; 1142 } 1143 len = strlen(bp); 1144 if (len != 1) { 1145 /* 1146 * for mount points that are below our root, we can see 1147 * them, so we fix up the pathname and return them. The 1148 * rest we cannot see, so we don't allow viewing the 1149 * data. 1150 */ 1151 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1152 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1153 (void)strlcpy(sp->f_mntonname, 1154 c == '\0' ? "/" : &sp->f_mntonname[len], 1155 sizeof(sp->f_mntonname)); 1156 } else { 1157 if (root) 1158 (void)strlcpy(sp->f_mntonname, "/", 1159 sizeof(sp->f_mntonname)); 1160 else 1161 error = EPERM; 1162 } 1163 } 1164 PNBUF_PUT(path); 1165 vrele(rvp); 1166 } 1167 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1168 return error; 1169 } 1170 1171 /* 1172 * Get filesystem statistics by path. 1173 */ 1174 int 1175 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1176 { 1177 struct mount *mp; 1178 int error; 1179 struct vnode *vp; 1180 1181 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1182 if (error != 0) 1183 return error; 1184 mp = vp->v_mount; 1185 error = dostatvfs(mp, sb, l, flags, 1); 1186 vrele(vp); 1187 return error; 1188 } 1189 1190 /* ARGSUSED */ 1191 int 1192 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1193 { 1194 /* { 1195 syscallarg(const char *) path; 1196 syscallarg(struct statvfs *) buf; 1197 syscallarg(int) flags; 1198 } */ 1199 struct statvfs *sb; 1200 int error; 1201 1202 sb = STATVFSBUF_GET(); 1203 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1204 if (error == 0) 1205 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1206 STATVFSBUF_PUT(sb); 1207 return error; 1208 } 1209 1210 /* 1211 * Get filesystem statistics by fd. 1212 */ 1213 int 1214 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1215 { 1216 file_t *fp; 1217 struct mount *mp; 1218 int error; 1219 1220 /* fd_getvnode() will use the descriptor for us */ 1221 if ((error = fd_getvnode(fd, &fp)) != 0) 1222 return (error); 1223 mp = fp->f_vnode->v_mount; 1224 error = dostatvfs(mp, sb, curlwp, flags, 1); 1225 fd_putfile(fd); 1226 return error; 1227 } 1228 1229 /* ARGSUSED */ 1230 int 1231 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1232 { 1233 /* { 1234 syscallarg(int) fd; 1235 syscallarg(struct statvfs *) buf; 1236 syscallarg(int) flags; 1237 } */ 1238 struct statvfs *sb; 1239 int error; 1240 1241 sb = STATVFSBUF_GET(); 1242 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1243 if (error == 0) 1244 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1245 STATVFSBUF_PUT(sb); 1246 return error; 1247 } 1248 1249 1250 /* 1251 * Get statistics on all filesystems. 1252 */ 1253 int 1254 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1255 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1256 register_t *retval) 1257 { 1258 int root = 0; 1259 mount_iterator_t *iter; 1260 struct proc *p = l->l_proc; 1261 struct mount *mp; 1262 struct statvfs *sb; 1263 size_t count, maxcount; 1264 int error = 0; 1265 1266 sb = STATVFSBUF_GET(); 1267 maxcount = bufsize / entry_sz; 1268 count = 0; 1269 mountlist_iterator_init(&iter); 1270 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1271 if (sfsp && count < maxcount) { 1272 error = dostatvfs(mp, sb, l, flags, 0); 1273 if (error) { 1274 error = 0; 1275 continue; 1276 } 1277 error = copyfn(sb, sfsp, entry_sz); 1278 if (error) 1279 goto out; 1280 sfsp = (char *)sfsp + entry_sz; 1281 root |= strcmp(sb->f_mntonname, "/") == 0; 1282 } 1283 count++; 1284 } 1285 1286 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1287 /* 1288 * fake a root entry 1289 */ 1290 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1291 sb, l, flags, 1); 1292 if (error != 0) 1293 goto out; 1294 if (sfsp) { 1295 error = copyfn(sb, sfsp, entry_sz); 1296 if (error != 0) 1297 goto out; 1298 } 1299 count++; 1300 } 1301 if (sfsp && count > maxcount) 1302 *retval = maxcount; 1303 else 1304 *retval = count; 1305 out: 1306 mountlist_iterator_destroy(iter); 1307 STATVFSBUF_PUT(sb); 1308 return error; 1309 } 1310 1311 int 1312 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1313 register_t *retval) 1314 { 1315 /* { 1316 syscallarg(struct statvfs *) buf; 1317 syscallarg(size_t) bufsize; 1318 syscallarg(int) flags; 1319 } */ 1320 1321 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1322 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1323 } 1324 1325 /* 1326 * Change current working directory to a given file descriptor. 1327 */ 1328 /* ARGSUSED */ 1329 int 1330 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1331 { 1332 /* { 1333 syscallarg(int) fd; 1334 } */ 1335 struct cwdinfo *cwdi; 1336 struct vnode *vp, *tdp; 1337 struct mount *mp; 1338 file_t *fp; 1339 int error, fd; 1340 1341 /* fd_getvnode() will use the descriptor for us */ 1342 fd = SCARG(uap, fd); 1343 if ((error = fd_getvnode(fd, &fp)) != 0) 1344 return (error); 1345 vp = fp->f_vnode; 1346 1347 vref(vp); 1348 vn_lock(vp, LK_SHARED | LK_RETRY); 1349 if (vp->v_type != VDIR) 1350 error = ENOTDIR; 1351 else 1352 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1353 if (error) { 1354 vput(vp); 1355 goto out; 1356 } 1357 while ((mp = vp->v_mountedhere) != NULL) { 1358 error = vfs_busy(mp); 1359 vput(vp); 1360 if (error != 0) 1361 goto out; 1362 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1363 vfs_unbusy(mp); 1364 if (error) 1365 goto out; 1366 vp = tdp; 1367 } 1368 VOP_UNLOCK(vp); 1369 1370 /* 1371 * Disallow changing to a directory not under the process's 1372 * current root directory (if there is one). 1373 */ 1374 cwdi = cwdenter(RW_WRITER); 1375 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1376 vrele(vp); 1377 error = EPERM; /* operation not permitted */ 1378 } else { 1379 vrele(cwdi->cwdi_cdir); 1380 cwdi->cwdi_cdir = vp; 1381 } 1382 cwdexit(cwdi); 1383 1384 out: 1385 fd_putfile(fd); 1386 return (error); 1387 } 1388 1389 /* 1390 * Change this process's notion of the root directory to a given file 1391 * descriptor. 1392 */ 1393 int 1394 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1395 { 1396 struct vnode *vp; 1397 file_t *fp; 1398 int error, fd = SCARG(uap, fd); 1399 1400 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1401 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1402 return error; 1403 /* fd_getvnode() will use the descriptor for us */ 1404 if ((error = fd_getvnode(fd, &fp)) != 0) 1405 return error; 1406 vp = fp->f_vnode; 1407 vn_lock(vp, LK_SHARED | LK_RETRY); 1408 if (vp->v_type != VDIR) 1409 error = ENOTDIR; 1410 else 1411 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1412 VOP_UNLOCK(vp); 1413 if (error) 1414 goto out; 1415 vref(vp); 1416 change_root(vp); 1417 1418 out: 1419 fd_putfile(fd); 1420 return (error); 1421 } 1422 1423 /* 1424 * Change current working directory (``.''). 1425 */ 1426 /* ARGSUSED */ 1427 int 1428 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1429 { 1430 /* { 1431 syscallarg(const char *) path; 1432 } */ 1433 struct cwdinfo *cwdi; 1434 int error; 1435 struct vnode *vp, *ovp; 1436 1437 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1438 if (error != 0) 1439 return (error); 1440 1441 cwdi = cwdenter(RW_WRITER); 1442 ovp = cwdi->cwdi_cdir; 1443 cwdi->cwdi_cdir = vp; 1444 cwdexit(cwdi); 1445 vrele(ovp); 1446 return (0); 1447 } 1448 1449 /* 1450 * Change notion of root (``/'') directory. 1451 */ 1452 /* ARGSUSED */ 1453 int 1454 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1455 { 1456 /* { 1457 syscallarg(const char *) path; 1458 } */ 1459 int error; 1460 struct vnode *vp; 1461 1462 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1463 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1464 return (error); 1465 1466 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1467 if (error == 0) 1468 change_root(vp); 1469 return error; 1470 } 1471 1472 /* 1473 * Common routine for chroot and fchroot. 1474 * NB: callers need to properly authorize the change root operation. 1475 */ 1476 void 1477 change_root(struct vnode *vp) 1478 { 1479 struct cwdinfo *cwdi; 1480 kauth_cred_t ncred; 1481 struct lwp *l = curlwp; 1482 struct proc *p = l->l_proc; 1483 1484 ncred = kauth_cred_alloc(); 1485 1486 cwdi = cwdenter(RW_WRITER); 1487 if (cwdi->cwdi_rdir != NULL) 1488 vrele(cwdi->cwdi_rdir); 1489 cwdi->cwdi_rdir = vp; 1490 1491 /* 1492 * Prevent escaping from chroot by putting the root under 1493 * the working directory. Silently chdir to / if we aren't 1494 * already there. 1495 */ 1496 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1497 /* 1498 * XXX would be more failsafe to change directory to a 1499 * deadfs node here instead 1500 */ 1501 vrele(cwdi->cwdi_cdir); 1502 vref(vp); 1503 cwdi->cwdi_cdir = vp; 1504 } 1505 cwdexit(cwdi); 1506 1507 /* Get a write lock on the process credential. */ 1508 proc_crmod_enter(); 1509 1510 kauth_cred_clone(p->p_cred, ncred); 1511 kauth_proc_chroot(ncred, p->p_cwdi); 1512 1513 /* Broadcast our credentials to the process and other LWPs. */ 1514 proc_crmod_leave(ncred, p->p_cred, true); 1515 } 1516 1517 /* 1518 * Common routine for chroot and chdir. 1519 * XXX "where" should be enum uio_seg 1520 */ 1521 int 1522 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1523 { 1524 struct pathbuf *pb; 1525 struct nameidata nd; 1526 int error; 1527 1528 error = pathbuf_maybe_copyin(path, where, &pb); 1529 if (error) { 1530 return error; 1531 } 1532 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1533 if ((error = namei(&nd)) != 0) { 1534 pathbuf_destroy(pb); 1535 return error; 1536 } 1537 *vpp = nd.ni_vp; 1538 pathbuf_destroy(pb); 1539 1540 if ((*vpp)->v_type != VDIR) 1541 error = ENOTDIR; 1542 else 1543 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1544 1545 if (error) 1546 vput(*vpp); 1547 else 1548 VOP_UNLOCK(*vpp); 1549 return (error); 1550 } 1551 1552 /* 1553 * Internals of sys_open - path has already been converted into a pathbuf 1554 * (so we can easily reuse this function from other parts of the kernel, 1555 * like posix_spawn post-processing). 1556 */ 1557 int 1558 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1559 int open_mode, int *fd) 1560 { 1561 struct proc *p = l->l_proc; 1562 struct cwdinfo *cwdi = p->p_cwdi; 1563 file_t *fp; 1564 struct vnode *vp; 1565 int flags, cmode; 1566 int indx, error; 1567 struct nameidata nd; 1568 1569 if (open_flags & O_SEARCH) { 1570 open_flags &= ~(int)O_SEARCH; 1571 } 1572 1573 /* 1574 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1575 * may be specified. 1576 */ 1577 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1578 return EINVAL; 1579 1580 flags = FFLAGS(open_flags); 1581 if ((flags & (FREAD | FWRITE)) == 0) 1582 return EINVAL; 1583 1584 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1585 return error; 1586 } 1587 1588 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1589 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1590 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1591 if (dvp != NULL) 1592 NDAT(&nd, dvp); 1593 1594 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1595 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1596 fd_abort(p, fp, indx); 1597 if ((error == EDUPFD || error == EMOVEFD) && 1598 l->l_dupfd >= 0 && /* XXX from fdopen */ 1599 (error = 1600 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1601 *fd = indx; 1602 return 0; 1603 } 1604 if (error == ERESTART) 1605 error = EINTR; 1606 return error; 1607 } 1608 1609 l->l_dupfd = 0; 1610 vp = nd.ni_vp; 1611 1612 if ((error = open_setfp(l, fp, vp, indx, flags))) 1613 return error; 1614 1615 VOP_UNLOCK(vp); 1616 *fd = indx; 1617 fd_affix(p, fp, indx); 1618 return 0; 1619 } 1620 1621 int 1622 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1623 { 1624 struct pathbuf *pb; 1625 int error, oflags; 1626 1627 oflags = FFLAGS(open_flags); 1628 if ((oflags & (FREAD | FWRITE)) == 0) 1629 return EINVAL; 1630 1631 pb = pathbuf_create(path); 1632 if (pb == NULL) 1633 return ENOMEM; 1634 1635 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1636 pathbuf_destroy(pb); 1637 1638 return error; 1639 } 1640 1641 static int 1642 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1643 int mode, int *fd) 1644 { 1645 file_t *dfp = NULL; 1646 struct vnode *dvp = NULL; 1647 struct pathbuf *pb; 1648 const char *pathstring = NULL; 1649 int error; 1650 1651 if (path == NULL) { 1652 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1653 if (error == ENOSYS) 1654 goto no_compat; 1655 if (error) 1656 return error; 1657 } else { 1658 no_compat: 1659 error = pathbuf_copyin(path, &pb); 1660 if (error) 1661 return error; 1662 } 1663 1664 pathstring = pathbuf_stringcopy_get(pb); 1665 1666 /* 1667 * fdat is ignored if: 1668 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1669 * 2) if path is absolute, then fdat is useless. 1670 */ 1671 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1672 /* fd_getvnode() will use the descriptor for us */ 1673 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1674 goto out; 1675 1676 dvp = dfp->f_vnode; 1677 } 1678 1679 error = do_open(l, dvp, pb, flags, mode, fd); 1680 1681 if (dfp != NULL) 1682 fd_putfile(fdat); 1683 out: 1684 pathbuf_stringcopy_put(pb, pathstring); 1685 pathbuf_destroy(pb); 1686 return error; 1687 } 1688 1689 int 1690 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1691 { 1692 /* { 1693 syscallarg(const char *) path; 1694 syscallarg(int) flags; 1695 syscallarg(int) mode; 1696 } */ 1697 int error; 1698 int fd; 1699 1700 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1701 SCARG(uap, flags), SCARG(uap, mode), &fd); 1702 1703 if (error == 0) 1704 *retval = fd; 1705 1706 return error; 1707 } 1708 1709 int 1710 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1711 { 1712 /* { 1713 syscallarg(int) fd; 1714 syscallarg(const char *) path; 1715 syscallarg(int) oflags; 1716 syscallarg(int) mode; 1717 } */ 1718 int error; 1719 int fd; 1720 1721 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1722 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1723 1724 if (error == 0) 1725 *retval = fd; 1726 1727 return error; 1728 } 1729 1730 static void 1731 vfs__fhfree(fhandle_t *fhp) 1732 { 1733 size_t fhsize; 1734 1735 fhsize = FHANDLE_SIZE(fhp); 1736 kmem_free(fhp, fhsize); 1737 } 1738 1739 /* 1740 * vfs_composefh: compose a filehandle. 1741 */ 1742 1743 int 1744 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1745 { 1746 struct mount *mp; 1747 struct fid *fidp; 1748 int error; 1749 size_t needfhsize; 1750 size_t fidsize; 1751 1752 mp = vp->v_mount; 1753 fidp = NULL; 1754 if (*fh_size < FHANDLE_SIZE_MIN) { 1755 fidsize = 0; 1756 } else { 1757 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1758 if (fhp != NULL) { 1759 memset(fhp, 0, *fh_size); 1760 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1761 fidp = &fhp->fh_fid; 1762 } 1763 } 1764 error = VFS_VPTOFH(vp, fidp, &fidsize); 1765 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1766 if (error == 0 && *fh_size < needfhsize) { 1767 error = E2BIG; 1768 } 1769 *fh_size = needfhsize; 1770 return error; 1771 } 1772 1773 int 1774 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1775 { 1776 struct mount *mp; 1777 fhandle_t *fhp; 1778 size_t fhsize; 1779 size_t fidsize; 1780 int error; 1781 1782 mp = vp->v_mount; 1783 fidsize = 0; 1784 error = VFS_VPTOFH(vp, NULL, &fidsize); 1785 KASSERT(error != 0); 1786 if (error != E2BIG) { 1787 goto out; 1788 } 1789 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1790 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1791 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1792 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1793 if (error == 0) { 1794 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1795 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1796 *fhpp = fhp; 1797 } else { 1798 kmem_free(fhp, fhsize); 1799 } 1800 out: 1801 return error; 1802 } 1803 1804 void 1805 vfs_composefh_free(fhandle_t *fhp) 1806 { 1807 1808 vfs__fhfree(fhp); 1809 } 1810 1811 /* 1812 * vfs_fhtovp: lookup a vnode by a filehandle. 1813 */ 1814 1815 int 1816 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1817 { 1818 struct mount *mp; 1819 int error; 1820 1821 *vpp = NULL; 1822 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1823 if (mp == NULL) { 1824 error = ESTALE; 1825 goto out; 1826 } 1827 if (mp->mnt_op->vfs_fhtovp == NULL) { 1828 error = EOPNOTSUPP; 1829 goto out; 1830 } 1831 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1832 out: 1833 return error; 1834 } 1835 1836 /* 1837 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1838 * the needed size. 1839 */ 1840 1841 int 1842 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1843 { 1844 fhandle_t *fhp; 1845 int error; 1846 1847 if (fhsize > FHANDLE_SIZE_MAX) { 1848 return EINVAL; 1849 } 1850 if (fhsize < FHANDLE_SIZE_MIN) { 1851 return EINVAL; 1852 } 1853 again: 1854 fhp = kmem_alloc(fhsize, KM_SLEEP); 1855 error = copyin(ufhp, fhp, fhsize); 1856 if (error == 0) { 1857 /* XXX this check shouldn't be here */ 1858 if (FHANDLE_SIZE(fhp) == fhsize) { 1859 *fhpp = fhp; 1860 return 0; 1861 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1862 /* 1863 * a kludge for nfsv2 padded handles. 1864 */ 1865 size_t sz; 1866 1867 sz = FHANDLE_SIZE(fhp); 1868 kmem_free(fhp, fhsize); 1869 fhsize = sz; 1870 goto again; 1871 } else { 1872 /* 1873 * userland told us wrong size. 1874 */ 1875 error = EINVAL; 1876 } 1877 } 1878 kmem_free(fhp, fhsize); 1879 return error; 1880 } 1881 1882 void 1883 vfs_copyinfh_free(fhandle_t *fhp) 1884 { 1885 1886 vfs__fhfree(fhp); 1887 } 1888 1889 /* 1890 * Get file handle system call 1891 */ 1892 int 1893 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1894 { 1895 /* { 1896 syscallarg(char *) fname; 1897 syscallarg(fhandle_t *) fhp; 1898 syscallarg(size_t *) fh_size; 1899 } */ 1900 struct vnode *vp; 1901 fhandle_t *fh; 1902 int error; 1903 struct pathbuf *pb; 1904 struct nameidata nd; 1905 size_t sz; 1906 size_t usz; 1907 1908 /* 1909 * Must be super user 1910 */ 1911 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1912 0, NULL, NULL, NULL); 1913 if (error) 1914 return (error); 1915 1916 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1917 if (error) { 1918 return error; 1919 } 1920 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1921 error = namei(&nd); 1922 if (error) { 1923 pathbuf_destroy(pb); 1924 return error; 1925 } 1926 vp = nd.ni_vp; 1927 pathbuf_destroy(pb); 1928 1929 error = vfs_composefh_alloc(vp, &fh); 1930 vput(vp); 1931 if (error != 0) { 1932 return error; 1933 } 1934 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1935 if (error != 0) { 1936 goto out; 1937 } 1938 sz = FHANDLE_SIZE(fh); 1939 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1940 if (error != 0) { 1941 goto out; 1942 } 1943 if (usz >= sz) { 1944 error = copyout(fh, SCARG(uap, fhp), sz); 1945 } else { 1946 error = E2BIG; 1947 } 1948 out: 1949 vfs_composefh_free(fh); 1950 return (error); 1951 } 1952 1953 /* 1954 * Open a file given a file handle. 1955 * 1956 * Check permissions, allocate an open file structure, 1957 * and call the device open routine if any. 1958 */ 1959 1960 int 1961 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1962 register_t *retval) 1963 { 1964 file_t *fp; 1965 struct vnode *vp = NULL; 1966 kauth_cred_t cred = l->l_cred; 1967 file_t *nfp; 1968 int indx, error; 1969 struct vattr va; 1970 fhandle_t *fh; 1971 int flags; 1972 proc_t *p; 1973 1974 p = curproc; 1975 1976 /* 1977 * Must be super user 1978 */ 1979 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1980 0, NULL, NULL, NULL))) 1981 return (error); 1982 1983 if (oflags & O_SEARCH) { 1984 oflags &= ~(int)O_SEARCH; 1985 } 1986 1987 flags = FFLAGS(oflags); 1988 if ((flags & (FREAD | FWRITE)) == 0) 1989 return (EINVAL); 1990 if ((flags & O_CREAT)) 1991 return (EINVAL); 1992 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1993 return (error); 1994 fp = nfp; 1995 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1996 if (error != 0) { 1997 goto bad; 1998 } 1999 error = vfs_fhtovp(fh, &vp); 2000 vfs_copyinfh_free(fh); 2001 if (error != 0) { 2002 goto bad; 2003 } 2004 2005 /* Now do an effective vn_open */ 2006 2007 if (vp->v_type == VSOCK) { 2008 error = EOPNOTSUPP; 2009 goto bad; 2010 } 2011 error = vn_openchk(vp, cred, flags); 2012 if (error != 0) 2013 goto bad; 2014 if (flags & O_TRUNC) { 2015 VOP_UNLOCK(vp); /* XXX */ 2016 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2017 vattr_null(&va); 2018 va.va_size = 0; 2019 error = VOP_SETATTR(vp, &va, cred); 2020 if (error) 2021 goto bad; 2022 } 2023 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2024 goto bad; 2025 if (flags & FWRITE) { 2026 mutex_enter(vp->v_interlock); 2027 vp->v_writecount++; 2028 mutex_exit(vp->v_interlock); 2029 } 2030 2031 /* done with modified vn_open, now finish what sys_open does. */ 2032 if ((error = open_setfp(l, fp, vp, indx, flags))) 2033 return error; 2034 2035 VOP_UNLOCK(vp); 2036 *retval = indx; 2037 fd_affix(p, fp, indx); 2038 return (0); 2039 2040 bad: 2041 fd_abort(p, fp, indx); 2042 if (vp != NULL) 2043 vput(vp); 2044 return (error); 2045 } 2046 2047 int 2048 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2049 { 2050 /* { 2051 syscallarg(const void *) fhp; 2052 syscallarg(size_t) fh_size; 2053 syscallarg(int) flags; 2054 } */ 2055 2056 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2057 SCARG(uap, flags), retval); 2058 } 2059 2060 int 2061 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2062 { 2063 int error; 2064 fhandle_t *fh; 2065 struct vnode *vp; 2066 2067 /* 2068 * Must be super user 2069 */ 2070 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2071 0, NULL, NULL, NULL))) 2072 return (error); 2073 2074 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2075 if (error != 0) 2076 return error; 2077 2078 error = vfs_fhtovp(fh, &vp); 2079 vfs_copyinfh_free(fh); 2080 if (error != 0) 2081 return error; 2082 2083 error = vn_stat(vp, sb); 2084 vput(vp); 2085 return error; 2086 } 2087 2088 2089 /* ARGSUSED */ 2090 int 2091 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2092 { 2093 /* { 2094 syscallarg(const void *) fhp; 2095 syscallarg(size_t) fh_size; 2096 syscallarg(struct stat *) sb; 2097 } */ 2098 struct stat sb; 2099 int error; 2100 2101 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2102 if (error) 2103 return error; 2104 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2105 } 2106 2107 int 2108 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2109 int flags) 2110 { 2111 fhandle_t *fh; 2112 struct mount *mp; 2113 struct vnode *vp; 2114 int error; 2115 2116 /* 2117 * Must be super user 2118 */ 2119 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2120 0, NULL, NULL, NULL))) 2121 return error; 2122 2123 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2124 if (error != 0) 2125 return error; 2126 2127 error = vfs_fhtovp(fh, &vp); 2128 vfs_copyinfh_free(fh); 2129 if (error != 0) 2130 return error; 2131 2132 mp = vp->v_mount; 2133 error = dostatvfs(mp, sb, l, flags, 1); 2134 vput(vp); 2135 return error; 2136 } 2137 2138 /* ARGSUSED */ 2139 int 2140 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2141 { 2142 /* { 2143 syscallarg(const void *) fhp; 2144 syscallarg(size_t) fh_size; 2145 syscallarg(struct statvfs *) buf; 2146 syscallarg(int) flags; 2147 } */ 2148 struct statvfs *sb = STATVFSBUF_GET(); 2149 int error; 2150 2151 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2152 SCARG(uap, flags)); 2153 if (error == 0) 2154 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2155 STATVFSBUF_PUT(sb); 2156 return error; 2157 } 2158 2159 int 2160 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2161 dev_t dev) 2162 { 2163 2164 /* 2165 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2166 * in mode and dev=0. 2167 * 2168 * In all the other cases it's implementation defined behavior. 2169 */ 2170 2171 if ((mode & S_IFIFO) && dev == 0) 2172 return do_sys_mkfifoat(l, fdat, pathname, mode); 2173 else 2174 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2175 UIO_USERSPACE); 2176 } 2177 2178 /* 2179 * Create a special file. 2180 */ 2181 /* ARGSUSED */ 2182 int 2183 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2184 register_t *retval) 2185 { 2186 /* { 2187 syscallarg(const char *) path; 2188 syscallarg(mode_t) mode; 2189 syscallarg(dev_t) dev; 2190 } */ 2191 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2192 SCARG(uap, mode), SCARG(uap, dev)); 2193 } 2194 2195 int 2196 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2197 register_t *retval) 2198 { 2199 /* { 2200 syscallarg(int) fd; 2201 syscallarg(const char *) path; 2202 syscallarg(mode_t) mode; 2203 syscallarg(int) pad; 2204 syscallarg(dev_t) dev; 2205 } */ 2206 2207 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2208 SCARG(uap, mode), SCARG(uap, dev)); 2209 } 2210 2211 int 2212 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2213 enum uio_seg seg) 2214 { 2215 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2216 } 2217 2218 int 2219 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2220 dev_t dev, enum uio_seg seg) 2221 { 2222 struct proc *p = l->l_proc; 2223 struct vnode *vp; 2224 struct vattr vattr; 2225 int error, optype; 2226 struct pathbuf *pb; 2227 struct nameidata nd; 2228 const char *pathstring; 2229 2230 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2231 0, NULL, NULL, NULL)) != 0) 2232 return (error); 2233 2234 optype = VOP_MKNOD_DESCOFFSET; 2235 2236 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2237 if (error) { 2238 return error; 2239 } 2240 pathstring = pathbuf_stringcopy_get(pb); 2241 if (pathstring == NULL) { 2242 pathbuf_destroy(pb); 2243 return ENOMEM; 2244 } 2245 2246 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2247 2248 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2249 goto out; 2250 vp = nd.ni_vp; 2251 2252 if (vp != NULL) 2253 error = EEXIST; 2254 else { 2255 vattr_null(&vattr); 2256 /* We will read cwdi->cwdi_cmask unlocked. */ 2257 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2258 vattr.va_rdev = dev; 2259 2260 switch (mode & S_IFMT) { 2261 case S_IFMT: /* used by badsect to flag bad sectors */ 2262 vattr.va_type = VBAD; 2263 break; 2264 case S_IFCHR: 2265 vattr.va_type = VCHR; 2266 break; 2267 case S_IFBLK: 2268 vattr.va_type = VBLK; 2269 break; 2270 case S_IFWHT: 2271 optype = VOP_WHITEOUT_DESCOFFSET; 2272 break; 2273 case S_IFREG: 2274 #if NVERIEXEC > 0 2275 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2276 O_CREAT); 2277 #endif /* NVERIEXEC > 0 */ 2278 vattr.va_type = VREG; 2279 vattr.va_rdev = VNOVAL; 2280 optype = VOP_CREATE_DESCOFFSET; 2281 break; 2282 default: 2283 error = EINVAL; 2284 break; 2285 } 2286 2287 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2288 vattr.va_rdev == VNOVAL) 2289 error = EINVAL; 2290 } 2291 2292 if (!error) { 2293 switch (optype) { 2294 case VOP_WHITEOUT_DESCOFFSET: 2295 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2296 if (error) 2297 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2298 vput(nd.ni_dvp); 2299 break; 2300 2301 case VOP_MKNOD_DESCOFFSET: 2302 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2303 &nd.ni_cnd, &vattr); 2304 if (error == 0) 2305 vrele(nd.ni_vp); 2306 vput(nd.ni_dvp); 2307 break; 2308 2309 case VOP_CREATE_DESCOFFSET: 2310 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2311 &nd.ni_cnd, &vattr); 2312 if (error == 0) 2313 vrele(nd.ni_vp); 2314 vput(nd.ni_dvp); 2315 break; 2316 } 2317 } else { 2318 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2319 if (nd.ni_dvp == vp) 2320 vrele(nd.ni_dvp); 2321 else 2322 vput(nd.ni_dvp); 2323 if (vp) 2324 vrele(vp); 2325 } 2326 out: 2327 pathbuf_stringcopy_put(pb, pathstring); 2328 pathbuf_destroy(pb); 2329 return (error); 2330 } 2331 2332 /* 2333 * Create a named pipe. 2334 */ 2335 /* ARGSUSED */ 2336 int 2337 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2338 { 2339 /* { 2340 syscallarg(const char *) path; 2341 syscallarg(int) mode; 2342 } */ 2343 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2344 } 2345 2346 int 2347 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2348 register_t *retval) 2349 { 2350 /* { 2351 syscallarg(int) fd; 2352 syscallarg(const char *) path; 2353 syscallarg(int) mode; 2354 } */ 2355 2356 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2357 SCARG(uap, mode)); 2358 } 2359 2360 static int 2361 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2362 { 2363 struct proc *p = l->l_proc; 2364 struct vattr vattr; 2365 int error; 2366 struct pathbuf *pb; 2367 struct nameidata nd; 2368 2369 error = pathbuf_copyin(path, &pb); 2370 if (error) { 2371 return error; 2372 } 2373 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2374 2375 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2376 pathbuf_destroy(pb); 2377 return error; 2378 } 2379 if (nd.ni_vp != NULL) { 2380 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2381 if (nd.ni_dvp == nd.ni_vp) 2382 vrele(nd.ni_dvp); 2383 else 2384 vput(nd.ni_dvp); 2385 vrele(nd.ni_vp); 2386 pathbuf_destroy(pb); 2387 return (EEXIST); 2388 } 2389 vattr_null(&vattr); 2390 vattr.va_type = VFIFO; 2391 /* We will read cwdi->cwdi_cmask unlocked. */ 2392 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2393 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2394 if (error == 0) 2395 vrele(nd.ni_vp); 2396 vput(nd.ni_dvp); 2397 pathbuf_destroy(pb); 2398 return (error); 2399 } 2400 2401 /* 2402 * Make a hard file link. 2403 */ 2404 /* ARGSUSED */ 2405 int 2406 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2407 const char *link, int follow, register_t *retval) 2408 { 2409 struct vnode *vp; 2410 struct pathbuf *linkpb; 2411 struct nameidata nd; 2412 namei_simple_flags_t ns_flags; 2413 int error; 2414 2415 if (follow & AT_SYMLINK_FOLLOW) 2416 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2417 else 2418 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2419 2420 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2421 if (error != 0) 2422 return (error); 2423 error = pathbuf_copyin(link, &linkpb); 2424 if (error) { 2425 goto out1; 2426 } 2427 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2428 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2429 goto out2; 2430 if (nd.ni_vp) { 2431 error = EEXIST; 2432 goto abortop; 2433 } 2434 /* Prevent hard links on directories. */ 2435 if (vp->v_type == VDIR) { 2436 error = EPERM; 2437 goto abortop; 2438 } 2439 /* Prevent cross-mount operation. */ 2440 if (nd.ni_dvp->v_mount != vp->v_mount) { 2441 error = EXDEV; 2442 goto abortop; 2443 } 2444 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2445 VOP_UNLOCK(nd.ni_dvp); 2446 vrele(nd.ni_dvp); 2447 out2: 2448 pathbuf_destroy(linkpb); 2449 out1: 2450 vrele(vp); 2451 return (error); 2452 abortop: 2453 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2454 if (nd.ni_dvp == nd.ni_vp) 2455 vrele(nd.ni_dvp); 2456 else 2457 vput(nd.ni_dvp); 2458 if (nd.ni_vp != NULL) 2459 vrele(nd.ni_vp); 2460 goto out2; 2461 } 2462 2463 int 2464 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2465 { 2466 /* { 2467 syscallarg(const char *) path; 2468 syscallarg(const char *) link; 2469 } */ 2470 const char *path = SCARG(uap, path); 2471 const char *link = SCARG(uap, link); 2472 2473 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2474 AT_SYMLINK_FOLLOW, retval); 2475 } 2476 2477 int 2478 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2479 register_t *retval) 2480 { 2481 /* { 2482 syscallarg(int) fd1; 2483 syscallarg(const char *) name1; 2484 syscallarg(int) fd2; 2485 syscallarg(const char *) name2; 2486 syscallarg(int) flags; 2487 } */ 2488 int fd1 = SCARG(uap, fd1); 2489 const char *name1 = SCARG(uap, name1); 2490 int fd2 = SCARG(uap, fd2); 2491 const char *name2 = SCARG(uap, name2); 2492 int follow; 2493 2494 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2495 2496 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2497 } 2498 2499 2500 int 2501 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2502 { 2503 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2504 } 2505 2506 static int 2507 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2508 const char *link, enum uio_seg seg) 2509 { 2510 struct proc *p = curproc; 2511 struct vattr vattr; 2512 char *path; 2513 int error; 2514 size_t len; 2515 struct pathbuf *linkpb; 2516 struct nameidata nd; 2517 2518 KASSERT(l != NULL || fdat == AT_FDCWD); 2519 2520 path = PNBUF_GET(); 2521 if (seg == UIO_USERSPACE) { 2522 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2523 goto out1; 2524 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2525 goto out1; 2526 } else { 2527 len = strlen(patharg) + 1; 2528 KASSERT(len <= MAXPATHLEN); 2529 memcpy(path, patharg, len); 2530 linkpb = pathbuf_create(link); 2531 if (linkpb == NULL) { 2532 error = ENOMEM; 2533 goto out1; 2534 } 2535 } 2536 ktrkuser("symlink-target", path, len - 1); 2537 2538 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2539 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2540 goto out2; 2541 if (nd.ni_vp) { 2542 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2543 if (nd.ni_dvp == nd.ni_vp) 2544 vrele(nd.ni_dvp); 2545 else 2546 vput(nd.ni_dvp); 2547 vrele(nd.ni_vp); 2548 error = EEXIST; 2549 goto out2; 2550 } 2551 vattr_null(&vattr); 2552 vattr.va_type = VLNK; 2553 /* We will read cwdi->cwdi_cmask unlocked. */ 2554 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2555 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2556 if (error == 0) 2557 vrele(nd.ni_vp); 2558 vput(nd.ni_dvp); 2559 out2: 2560 pathbuf_destroy(linkpb); 2561 out1: 2562 PNBUF_PUT(path); 2563 return (error); 2564 } 2565 2566 /* 2567 * Make a symbolic link. 2568 */ 2569 /* ARGSUSED */ 2570 int 2571 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2572 { 2573 /* { 2574 syscallarg(const char *) path; 2575 syscallarg(const char *) link; 2576 } */ 2577 2578 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2579 UIO_USERSPACE); 2580 } 2581 2582 int 2583 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2584 register_t *retval) 2585 { 2586 /* { 2587 syscallarg(const char *) path1; 2588 syscallarg(int) fd; 2589 syscallarg(const char *) path2; 2590 } */ 2591 2592 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2593 SCARG(uap, path2), UIO_USERSPACE); 2594 } 2595 2596 /* 2597 * Delete a whiteout from the filesystem. 2598 */ 2599 /* ARGSUSED */ 2600 int 2601 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2602 { 2603 /* { 2604 syscallarg(const char *) path; 2605 } */ 2606 int error; 2607 struct pathbuf *pb; 2608 struct nameidata nd; 2609 2610 error = pathbuf_copyin(SCARG(uap, path), &pb); 2611 if (error) { 2612 return error; 2613 } 2614 2615 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2616 error = namei(&nd); 2617 if (error) { 2618 pathbuf_destroy(pb); 2619 return (error); 2620 } 2621 2622 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2623 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2624 if (nd.ni_dvp == nd.ni_vp) 2625 vrele(nd.ni_dvp); 2626 else 2627 vput(nd.ni_dvp); 2628 if (nd.ni_vp) 2629 vrele(nd.ni_vp); 2630 pathbuf_destroy(pb); 2631 return (EEXIST); 2632 } 2633 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2634 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2635 vput(nd.ni_dvp); 2636 pathbuf_destroy(pb); 2637 return (error); 2638 } 2639 2640 /* 2641 * Delete a name from the filesystem. 2642 */ 2643 /* ARGSUSED */ 2644 int 2645 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2646 { 2647 /* { 2648 syscallarg(const char *) path; 2649 } */ 2650 2651 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2652 } 2653 2654 int 2655 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2656 register_t *retval) 2657 { 2658 /* { 2659 syscallarg(int) fd; 2660 syscallarg(const char *) path; 2661 syscallarg(int) flag; 2662 } */ 2663 2664 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2665 SCARG(uap, flag), UIO_USERSPACE); 2666 } 2667 2668 int 2669 do_sys_unlink(const char *arg, enum uio_seg seg) 2670 { 2671 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2672 } 2673 2674 static int 2675 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2676 enum uio_seg seg) 2677 { 2678 struct vnode *vp; 2679 int error; 2680 struct pathbuf *pb; 2681 struct nameidata nd; 2682 const char *pathstring; 2683 2684 KASSERT(l != NULL || fdat == AT_FDCWD); 2685 2686 error = pathbuf_maybe_copyin(arg, seg, &pb); 2687 if (error) { 2688 return error; 2689 } 2690 pathstring = pathbuf_stringcopy_get(pb); 2691 if (pathstring == NULL) { 2692 pathbuf_destroy(pb); 2693 return ENOMEM; 2694 } 2695 2696 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2697 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2698 goto out; 2699 vp = nd.ni_vp; 2700 2701 /* 2702 * The root of a mounted filesystem cannot be deleted. 2703 */ 2704 if ((vp->v_vflag & VV_ROOT) != 0) { 2705 error = EBUSY; 2706 goto abort; 2707 } 2708 2709 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2710 error = EBUSY; 2711 goto abort; 2712 } 2713 2714 /* 2715 * No rmdir "." please. 2716 */ 2717 if (nd.ni_dvp == vp) { 2718 error = EINVAL; 2719 goto abort; 2720 } 2721 2722 /* 2723 * AT_REMOVEDIR is required to remove a directory 2724 */ 2725 if (vp->v_type == VDIR) { 2726 if (!(flags & AT_REMOVEDIR)) { 2727 error = EPERM; 2728 goto abort; 2729 } else { 2730 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2731 vput(nd.ni_dvp); 2732 goto out; 2733 } 2734 } 2735 2736 /* 2737 * Starting here we only deal with non directories. 2738 */ 2739 if (flags & AT_REMOVEDIR) { 2740 error = ENOTDIR; 2741 goto abort; 2742 } 2743 2744 #if NVERIEXEC > 0 2745 /* Handle remove requests for veriexec entries. */ 2746 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2747 goto abort; 2748 } 2749 #endif /* NVERIEXEC > 0 */ 2750 2751 #ifdef FILEASSOC 2752 (void)fileassoc_file_delete(vp); 2753 #endif /* FILEASSOC */ 2754 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2755 vput(nd.ni_dvp); 2756 goto out; 2757 2758 abort: 2759 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2760 if (nd.ni_dvp == vp) 2761 vrele(nd.ni_dvp); 2762 else 2763 vput(nd.ni_dvp); 2764 vput(vp); 2765 2766 out: 2767 pathbuf_stringcopy_put(pb, pathstring); 2768 pathbuf_destroy(pb); 2769 return (error); 2770 } 2771 2772 /* 2773 * Reposition read/write file offset. 2774 */ 2775 int 2776 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2777 { 2778 /* { 2779 syscallarg(int) fd; 2780 syscallarg(int) pad; 2781 syscallarg(off_t) offset; 2782 syscallarg(int) whence; 2783 } */ 2784 kauth_cred_t cred = l->l_cred; 2785 file_t *fp; 2786 struct vnode *vp; 2787 struct vattr vattr; 2788 off_t newoff; 2789 int error, fd; 2790 2791 fd = SCARG(uap, fd); 2792 2793 if ((fp = fd_getfile(fd)) == NULL) 2794 return (EBADF); 2795 2796 vp = fp->f_vnode; 2797 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2798 error = ESPIPE; 2799 goto out; 2800 } 2801 2802 vn_lock(vp, LK_SHARED | LK_RETRY); 2803 2804 switch (SCARG(uap, whence)) { 2805 case SEEK_CUR: 2806 newoff = fp->f_offset + SCARG(uap, offset); 2807 break; 2808 case SEEK_END: 2809 error = VOP_GETATTR(vp, &vattr, cred); 2810 if (error) { 2811 VOP_UNLOCK(vp); 2812 goto out; 2813 } 2814 newoff = SCARG(uap, offset) + vattr.va_size; 2815 break; 2816 case SEEK_SET: 2817 newoff = SCARG(uap, offset); 2818 break; 2819 default: 2820 error = EINVAL; 2821 VOP_UNLOCK(vp); 2822 goto out; 2823 } 2824 VOP_UNLOCK(vp); 2825 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2826 *(off_t *)retval = fp->f_offset = newoff; 2827 } 2828 out: 2829 fd_putfile(fd); 2830 return (error); 2831 } 2832 2833 /* 2834 * Positional read system call. 2835 */ 2836 int 2837 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2838 { 2839 /* { 2840 syscallarg(int) fd; 2841 syscallarg(void *) buf; 2842 syscallarg(size_t) nbyte; 2843 syscallarg(off_t) offset; 2844 } */ 2845 file_t *fp; 2846 struct vnode *vp; 2847 off_t offset; 2848 int error, fd = SCARG(uap, fd); 2849 2850 if ((fp = fd_getfile(fd)) == NULL) 2851 return (EBADF); 2852 2853 if ((fp->f_flag & FREAD) == 0) { 2854 fd_putfile(fd); 2855 return (EBADF); 2856 } 2857 2858 vp = fp->f_vnode; 2859 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2860 error = ESPIPE; 2861 goto out; 2862 } 2863 2864 offset = SCARG(uap, offset); 2865 2866 /* 2867 * XXX This works because no file systems actually 2868 * XXX take any action on the seek operation. 2869 */ 2870 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2871 goto out; 2872 2873 /* dofileread() will unuse the descriptor for us */ 2874 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2875 &offset, 0, retval)); 2876 2877 out: 2878 fd_putfile(fd); 2879 return (error); 2880 } 2881 2882 /* 2883 * Positional scatter read system call. 2884 */ 2885 int 2886 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2887 { 2888 /* { 2889 syscallarg(int) fd; 2890 syscallarg(const struct iovec *) iovp; 2891 syscallarg(int) iovcnt; 2892 syscallarg(off_t) offset; 2893 } */ 2894 off_t offset = SCARG(uap, offset); 2895 2896 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2897 SCARG(uap, iovcnt), &offset, 0, retval); 2898 } 2899 2900 /* 2901 * Positional write system call. 2902 */ 2903 int 2904 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2905 { 2906 /* { 2907 syscallarg(int) fd; 2908 syscallarg(const void *) buf; 2909 syscallarg(size_t) nbyte; 2910 syscallarg(off_t) offset; 2911 } */ 2912 file_t *fp; 2913 struct vnode *vp; 2914 off_t offset; 2915 int error, fd = SCARG(uap, fd); 2916 2917 if ((fp = fd_getfile(fd)) == NULL) 2918 return (EBADF); 2919 2920 if ((fp->f_flag & FWRITE) == 0) { 2921 fd_putfile(fd); 2922 return (EBADF); 2923 } 2924 2925 vp = fp->f_vnode; 2926 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2927 error = ESPIPE; 2928 goto out; 2929 } 2930 2931 offset = SCARG(uap, offset); 2932 2933 /* 2934 * XXX This works because no file systems actually 2935 * XXX take any action on the seek operation. 2936 */ 2937 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2938 goto out; 2939 2940 /* dofilewrite() will unuse the descriptor for us */ 2941 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2942 &offset, 0, retval)); 2943 2944 out: 2945 fd_putfile(fd); 2946 return (error); 2947 } 2948 2949 /* 2950 * Positional gather write system call. 2951 */ 2952 int 2953 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2954 { 2955 /* { 2956 syscallarg(int) fd; 2957 syscallarg(const struct iovec *) iovp; 2958 syscallarg(int) iovcnt; 2959 syscallarg(off_t) offset; 2960 } */ 2961 off_t offset = SCARG(uap, offset); 2962 2963 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2964 SCARG(uap, iovcnt), &offset, 0, retval); 2965 } 2966 2967 /* 2968 * Check access permissions. 2969 */ 2970 int 2971 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2972 { 2973 /* { 2974 syscallarg(const char *) path; 2975 syscallarg(int) flags; 2976 } */ 2977 2978 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2979 SCARG(uap, flags), 0); 2980 } 2981 2982 int 2983 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2984 int mode, int flags) 2985 { 2986 kauth_cred_t cred; 2987 struct vnode *vp; 2988 int error, nd_flag, vmode; 2989 struct pathbuf *pb; 2990 struct nameidata nd; 2991 2992 CTASSERT(F_OK == 0); 2993 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2994 /* nonsense mode */ 2995 return EINVAL; 2996 } 2997 2998 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2999 if (flags & AT_SYMLINK_NOFOLLOW) 3000 nd_flag &= ~FOLLOW; 3001 3002 error = pathbuf_copyin(path, &pb); 3003 if (error) 3004 return error; 3005 3006 NDINIT(&nd, LOOKUP, nd_flag, pb); 3007 3008 /* Override default credentials */ 3009 cred = kauth_cred_dup(l->l_cred); 3010 if (!(flags & AT_EACCESS)) { 3011 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3012 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3013 } 3014 nd.ni_cnd.cn_cred = cred; 3015 3016 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3017 pathbuf_destroy(pb); 3018 goto out; 3019 } 3020 vp = nd.ni_vp; 3021 pathbuf_destroy(pb); 3022 3023 /* Flags == 0 means only check for existence. */ 3024 if (mode) { 3025 vmode = 0; 3026 if (mode & R_OK) 3027 vmode |= VREAD; 3028 if (mode & W_OK) 3029 vmode |= VWRITE; 3030 if (mode & X_OK) 3031 vmode |= VEXEC; 3032 3033 error = VOP_ACCESS(vp, vmode, cred); 3034 if (!error && (vmode & VWRITE)) 3035 error = vn_writechk(vp); 3036 } 3037 vput(vp); 3038 out: 3039 kauth_cred_free(cred); 3040 return (error); 3041 } 3042 3043 int 3044 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3045 register_t *retval) 3046 { 3047 /* { 3048 syscallarg(int) fd; 3049 syscallarg(const char *) path; 3050 syscallarg(int) amode; 3051 syscallarg(int) flag; 3052 } */ 3053 3054 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3055 SCARG(uap, amode), SCARG(uap, flag)); 3056 } 3057 3058 /* 3059 * Common code for all sys_stat functions, including compat versions. 3060 */ 3061 int 3062 do_sys_stat(const char *userpath, unsigned int nd_flag, 3063 struct stat *sb) 3064 { 3065 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3066 } 3067 3068 int 3069 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3070 unsigned int nd_flag, struct stat *sb) 3071 { 3072 int error; 3073 struct pathbuf *pb; 3074 struct nameidata nd; 3075 3076 KASSERT(l != NULL || fdat == AT_FDCWD); 3077 3078 error = pathbuf_copyin(userpath, &pb); 3079 if (error) { 3080 return error; 3081 } 3082 3083 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3084 3085 error = fd_nameiat(l, fdat, &nd); 3086 if (error != 0) { 3087 pathbuf_destroy(pb); 3088 return error; 3089 } 3090 error = vn_stat(nd.ni_vp, sb); 3091 vput(nd.ni_vp); 3092 pathbuf_destroy(pb); 3093 return error; 3094 } 3095 3096 /* 3097 * Get file status; this version follows links. 3098 */ 3099 /* ARGSUSED */ 3100 int 3101 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3102 { 3103 /* { 3104 syscallarg(const char *) path; 3105 syscallarg(struct stat *) ub; 3106 } */ 3107 struct stat sb; 3108 int error; 3109 3110 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3111 if (error) 3112 return error; 3113 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3114 } 3115 3116 /* 3117 * Get file status; this version does not follow links. 3118 */ 3119 /* ARGSUSED */ 3120 int 3121 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3122 { 3123 /* { 3124 syscallarg(const char *) path; 3125 syscallarg(struct stat *) ub; 3126 } */ 3127 struct stat sb; 3128 int error; 3129 3130 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3131 if (error) 3132 return error; 3133 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3134 } 3135 3136 int 3137 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3138 register_t *retval) 3139 { 3140 /* { 3141 syscallarg(int) fd; 3142 syscallarg(const char *) path; 3143 syscallarg(struct stat *) buf; 3144 syscallarg(int) flag; 3145 } */ 3146 unsigned int nd_flag; 3147 struct stat sb; 3148 int error; 3149 3150 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3151 nd_flag = NOFOLLOW; 3152 else 3153 nd_flag = FOLLOW; 3154 3155 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3156 &sb); 3157 if (error) 3158 return error; 3159 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3160 } 3161 3162 /* 3163 * Get configurable pathname variables. 3164 */ 3165 /* ARGSUSED */ 3166 int 3167 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3168 { 3169 /* { 3170 syscallarg(const char *) path; 3171 syscallarg(int) name; 3172 } */ 3173 int error; 3174 struct pathbuf *pb; 3175 struct nameidata nd; 3176 3177 error = pathbuf_copyin(SCARG(uap, path), &pb); 3178 if (error) { 3179 return error; 3180 } 3181 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3182 if ((error = namei(&nd)) != 0) { 3183 pathbuf_destroy(pb); 3184 return (error); 3185 } 3186 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3187 vput(nd.ni_vp); 3188 pathbuf_destroy(pb); 3189 return (error); 3190 } 3191 3192 /* 3193 * Return target name of a symbolic link. 3194 */ 3195 /* ARGSUSED */ 3196 int 3197 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3198 register_t *retval) 3199 { 3200 /* { 3201 syscallarg(const char *) path; 3202 syscallarg(char *) buf; 3203 syscallarg(size_t) count; 3204 } */ 3205 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3206 SCARG(uap, buf), SCARG(uap, count), retval); 3207 } 3208 3209 static int 3210 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3211 size_t count, register_t *retval) 3212 { 3213 struct vnode *vp; 3214 struct iovec aiov; 3215 struct uio auio; 3216 int error; 3217 struct pathbuf *pb; 3218 struct nameidata nd; 3219 3220 error = pathbuf_copyin(path, &pb); 3221 if (error) { 3222 return error; 3223 } 3224 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3225 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3226 pathbuf_destroy(pb); 3227 return error; 3228 } 3229 vp = nd.ni_vp; 3230 pathbuf_destroy(pb); 3231 if (vp->v_type != VLNK) 3232 error = EINVAL; 3233 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3234 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3235 aiov.iov_base = buf; 3236 aiov.iov_len = count; 3237 auio.uio_iov = &aiov; 3238 auio.uio_iovcnt = 1; 3239 auio.uio_offset = 0; 3240 auio.uio_rw = UIO_READ; 3241 KASSERT(l == curlwp); 3242 auio.uio_vmspace = l->l_proc->p_vmspace; 3243 auio.uio_resid = count; 3244 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3245 *retval = count - auio.uio_resid; 3246 } 3247 vput(vp); 3248 return (error); 3249 } 3250 3251 int 3252 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3253 register_t *retval) 3254 { 3255 /* { 3256 syscallarg(int) fd; 3257 syscallarg(const char *) path; 3258 syscallarg(char *) buf; 3259 syscallarg(size_t) bufsize; 3260 } */ 3261 3262 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3263 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3264 } 3265 3266 /* 3267 * Change flags of a file given a path name. 3268 */ 3269 /* ARGSUSED */ 3270 int 3271 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3272 { 3273 /* { 3274 syscallarg(const char *) path; 3275 syscallarg(u_long) flags; 3276 } */ 3277 struct vnode *vp; 3278 int error; 3279 3280 error = namei_simple_user(SCARG(uap, path), 3281 NSM_FOLLOW_TRYEMULROOT, &vp); 3282 if (error != 0) 3283 return (error); 3284 error = change_flags(vp, SCARG(uap, flags), l); 3285 vput(vp); 3286 return (error); 3287 } 3288 3289 /* 3290 * Change flags of a file given a file descriptor. 3291 */ 3292 /* ARGSUSED */ 3293 int 3294 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3295 { 3296 /* { 3297 syscallarg(int) fd; 3298 syscallarg(u_long) flags; 3299 } */ 3300 struct vnode *vp; 3301 file_t *fp; 3302 int error; 3303 3304 /* fd_getvnode() will use the descriptor for us */ 3305 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3306 return (error); 3307 vp = fp->f_vnode; 3308 error = change_flags(vp, SCARG(uap, flags), l); 3309 VOP_UNLOCK(vp); 3310 fd_putfile(SCARG(uap, fd)); 3311 return (error); 3312 } 3313 3314 /* 3315 * Change flags of a file given a path name; this version does 3316 * not follow links. 3317 */ 3318 int 3319 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3320 { 3321 /* { 3322 syscallarg(const char *) path; 3323 syscallarg(u_long) flags; 3324 } */ 3325 struct vnode *vp; 3326 int error; 3327 3328 error = namei_simple_user(SCARG(uap, path), 3329 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3330 if (error != 0) 3331 return (error); 3332 error = change_flags(vp, SCARG(uap, flags), l); 3333 vput(vp); 3334 return (error); 3335 } 3336 3337 /* 3338 * Common routine to change flags of a file. 3339 */ 3340 int 3341 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3342 { 3343 struct vattr vattr; 3344 int error; 3345 3346 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3347 3348 vattr_null(&vattr); 3349 vattr.va_flags = flags; 3350 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3351 3352 return (error); 3353 } 3354 3355 /* 3356 * Change mode of a file given path name; this version follows links. 3357 */ 3358 /* ARGSUSED */ 3359 int 3360 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3361 { 3362 /* { 3363 syscallarg(const char *) path; 3364 syscallarg(int) mode; 3365 } */ 3366 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3367 SCARG(uap, mode), 0); 3368 } 3369 3370 int 3371 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3372 { 3373 int error; 3374 struct vnode *vp; 3375 namei_simple_flags_t ns_flag; 3376 3377 if (flags & AT_SYMLINK_NOFOLLOW) 3378 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3379 else 3380 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3381 3382 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3383 if (error != 0) 3384 return error; 3385 3386 error = change_mode(vp, mode, l); 3387 3388 vrele(vp); 3389 3390 return (error); 3391 } 3392 3393 /* 3394 * Change mode of a file given a file descriptor. 3395 */ 3396 /* ARGSUSED */ 3397 int 3398 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3399 { 3400 /* { 3401 syscallarg(int) fd; 3402 syscallarg(int) mode; 3403 } */ 3404 file_t *fp; 3405 int error; 3406 3407 /* fd_getvnode() will use the descriptor for us */ 3408 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3409 return (error); 3410 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3411 fd_putfile(SCARG(uap, fd)); 3412 return (error); 3413 } 3414 3415 int 3416 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3417 register_t *retval) 3418 { 3419 /* { 3420 syscallarg(int) fd; 3421 syscallarg(const char *) path; 3422 syscallarg(int) mode; 3423 syscallarg(int) flag; 3424 } */ 3425 3426 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3427 SCARG(uap, mode), SCARG(uap, flag)); 3428 } 3429 3430 /* 3431 * Change mode of a file given path name; this version does not follow links. 3432 */ 3433 /* ARGSUSED */ 3434 int 3435 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3436 { 3437 /* { 3438 syscallarg(const char *) path; 3439 syscallarg(int) mode; 3440 } */ 3441 int error; 3442 struct vnode *vp; 3443 3444 error = namei_simple_user(SCARG(uap, path), 3445 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3446 if (error != 0) 3447 return (error); 3448 3449 error = change_mode(vp, SCARG(uap, mode), l); 3450 3451 vrele(vp); 3452 return (error); 3453 } 3454 3455 /* 3456 * Common routine to set mode given a vnode. 3457 */ 3458 static int 3459 change_mode(struct vnode *vp, int mode, struct lwp *l) 3460 { 3461 struct vattr vattr; 3462 int error; 3463 3464 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3465 vattr_null(&vattr); 3466 vattr.va_mode = mode & ALLPERMS; 3467 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3468 VOP_UNLOCK(vp); 3469 return (error); 3470 } 3471 3472 /* 3473 * Set ownership given a path name; this version follows links. 3474 */ 3475 /* ARGSUSED */ 3476 int 3477 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3478 { 3479 /* { 3480 syscallarg(const char *) path; 3481 syscallarg(uid_t) uid; 3482 syscallarg(gid_t) gid; 3483 } */ 3484 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3485 SCARG(uap, gid), 0); 3486 } 3487 3488 int 3489 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3490 gid_t gid, int flags) 3491 { 3492 int error; 3493 struct vnode *vp; 3494 namei_simple_flags_t ns_flag; 3495 3496 if (flags & AT_SYMLINK_NOFOLLOW) 3497 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3498 else 3499 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3500 3501 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3502 if (error != 0) 3503 return error; 3504 3505 error = change_owner(vp, uid, gid, l, 0); 3506 3507 vrele(vp); 3508 3509 return (error); 3510 } 3511 3512 /* 3513 * Set ownership given a path name; this version follows links. 3514 * Provides POSIX semantics. 3515 */ 3516 /* ARGSUSED */ 3517 int 3518 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3519 { 3520 /* { 3521 syscallarg(const char *) path; 3522 syscallarg(uid_t) uid; 3523 syscallarg(gid_t) gid; 3524 } */ 3525 int error; 3526 struct vnode *vp; 3527 3528 error = namei_simple_user(SCARG(uap, path), 3529 NSM_FOLLOW_TRYEMULROOT, &vp); 3530 if (error != 0) 3531 return (error); 3532 3533 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3534 3535 vrele(vp); 3536 return (error); 3537 } 3538 3539 /* 3540 * Set ownership given a file descriptor. 3541 */ 3542 /* ARGSUSED */ 3543 int 3544 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3545 { 3546 /* { 3547 syscallarg(int) fd; 3548 syscallarg(uid_t) uid; 3549 syscallarg(gid_t) gid; 3550 } */ 3551 int error; 3552 file_t *fp; 3553 3554 /* fd_getvnode() will use the descriptor for us */ 3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3556 return (error); 3557 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3558 l, 0); 3559 fd_putfile(SCARG(uap, fd)); 3560 return (error); 3561 } 3562 3563 int 3564 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3565 register_t *retval) 3566 { 3567 /* { 3568 syscallarg(int) fd; 3569 syscallarg(const char *) path; 3570 syscallarg(uid_t) owner; 3571 syscallarg(gid_t) group; 3572 syscallarg(int) flag; 3573 } */ 3574 3575 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3576 SCARG(uap, owner), SCARG(uap, group), 3577 SCARG(uap, flag)); 3578 } 3579 3580 /* 3581 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3582 */ 3583 /* ARGSUSED */ 3584 int 3585 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3586 { 3587 /* { 3588 syscallarg(int) fd; 3589 syscallarg(uid_t) uid; 3590 syscallarg(gid_t) gid; 3591 } */ 3592 int error; 3593 file_t *fp; 3594 3595 /* fd_getvnode() will use the descriptor for us */ 3596 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3597 return (error); 3598 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3599 l, 1); 3600 fd_putfile(SCARG(uap, fd)); 3601 return (error); 3602 } 3603 3604 /* 3605 * Set ownership given a path name; this version does not follow links. 3606 */ 3607 /* ARGSUSED */ 3608 int 3609 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3610 { 3611 /* { 3612 syscallarg(const char *) path; 3613 syscallarg(uid_t) uid; 3614 syscallarg(gid_t) gid; 3615 } */ 3616 int error; 3617 struct vnode *vp; 3618 3619 error = namei_simple_user(SCARG(uap, path), 3620 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3621 if (error != 0) 3622 return (error); 3623 3624 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3625 3626 vrele(vp); 3627 return (error); 3628 } 3629 3630 /* 3631 * Set ownership given a path name; this version does not follow links. 3632 * Provides POSIX/XPG semantics. 3633 */ 3634 /* ARGSUSED */ 3635 int 3636 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3637 { 3638 /* { 3639 syscallarg(const char *) path; 3640 syscallarg(uid_t) uid; 3641 syscallarg(gid_t) gid; 3642 } */ 3643 int error; 3644 struct vnode *vp; 3645 3646 error = namei_simple_user(SCARG(uap, path), 3647 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3648 if (error != 0) 3649 return (error); 3650 3651 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3652 3653 vrele(vp); 3654 return (error); 3655 } 3656 3657 /* 3658 * Common routine to set ownership given a vnode. 3659 */ 3660 static int 3661 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3662 int posix_semantics) 3663 { 3664 struct vattr vattr; 3665 mode_t newmode; 3666 int error; 3667 3668 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3669 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3670 goto out; 3671 3672 #define CHANGED(x) ((int)(x) != -1) 3673 newmode = vattr.va_mode; 3674 if (posix_semantics) { 3675 /* 3676 * POSIX/XPG semantics: if the caller is not the super-user, 3677 * clear set-user-id and set-group-id bits. Both POSIX and 3678 * the XPG consider the behaviour for calls by the super-user 3679 * implementation-defined; we leave the set-user-id and set- 3680 * group-id settings intact in that case. 3681 */ 3682 if (vattr.va_mode & S_ISUID) { 3683 if (kauth_authorize_vnode(l->l_cred, 3684 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3685 newmode &= ~S_ISUID; 3686 } 3687 if (vattr.va_mode & S_ISGID) { 3688 if (kauth_authorize_vnode(l->l_cred, 3689 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3690 newmode &= ~S_ISGID; 3691 } 3692 } else { 3693 /* 3694 * NetBSD semantics: when changing owner and/or group, 3695 * clear the respective bit(s). 3696 */ 3697 if (CHANGED(uid)) 3698 newmode &= ~S_ISUID; 3699 if (CHANGED(gid)) 3700 newmode &= ~S_ISGID; 3701 } 3702 /* Update va_mode iff altered. */ 3703 if (vattr.va_mode == newmode) 3704 newmode = VNOVAL; 3705 3706 vattr_null(&vattr); 3707 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3708 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3709 vattr.va_mode = newmode; 3710 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3711 #undef CHANGED 3712 3713 out: 3714 VOP_UNLOCK(vp); 3715 return (error); 3716 } 3717 3718 /* 3719 * Set the access and modification times given a path name; this 3720 * version follows links. 3721 */ 3722 /* ARGSUSED */ 3723 int 3724 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3725 register_t *retval) 3726 { 3727 /* { 3728 syscallarg(const char *) path; 3729 syscallarg(const struct timeval *) tptr; 3730 } */ 3731 3732 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3733 SCARG(uap, tptr), UIO_USERSPACE); 3734 } 3735 3736 /* 3737 * Set the access and modification times given a file descriptor. 3738 */ 3739 /* ARGSUSED */ 3740 int 3741 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3742 register_t *retval) 3743 { 3744 /* { 3745 syscallarg(int) fd; 3746 syscallarg(const struct timeval *) tptr; 3747 } */ 3748 int error; 3749 file_t *fp; 3750 3751 /* fd_getvnode() will use the descriptor for us */ 3752 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3753 return (error); 3754 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3755 UIO_USERSPACE); 3756 fd_putfile(SCARG(uap, fd)); 3757 return (error); 3758 } 3759 3760 int 3761 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3762 register_t *retval) 3763 { 3764 /* { 3765 syscallarg(int) fd; 3766 syscallarg(const struct timespec *) tptr; 3767 } */ 3768 int error; 3769 file_t *fp; 3770 3771 /* fd_getvnode() will use the descriptor for us */ 3772 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3773 return (error); 3774 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3775 SCARG(uap, tptr), UIO_USERSPACE); 3776 fd_putfile(SCARG(uap, fd)); 3777 return (error); 3778 } 3779 3780 /* 3781 * Set the access and modification times given a path name; this 3782 * version does not follow links. 3783 */ 3784 int 3785 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3786 register_t *retval) 3787 { 3788 /* { 3789 syscallarg(const char *) path; 3790 syscallarg(const struct timeval *) tptr; 3791 } */ 3792 3793 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3794 SCARG(uap, tptr), UIO_USERSPACE); 3795 } 3796 3797 int 3798 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3799 register_t *retval) 3800 { 3801 /* { 3802 syscallarg(int) fd; 3803 syscallarg(const char *) path; 3804 syscallarg(const struct timespec *) tptr; 3805 syscallarg(int) flag; 3806 } */ 3807 int follow; 3808 const struct timespec *tptr; 3809 int error; 3810 3811 tptr = SCARG(uap, tptr); 3812 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3813 3814 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3815 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3816 3817 return error; 3818 } 3819 3820 /* 3821 * Common routine to set access and modification times given a vnode. 3822 */ 3823 int 3824 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3825 const struct timespec *tptr, enum uio_seg seg) 3826 { 3827 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3828 } 3829 3830 int 3831 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3832 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3833 { 3834 struct vattr vattr; 3835 int error, dorele = 0; 3836 namei_simple_flags_t sflags; 3837 bool vanull, setbirthtime; 3838 struct timespec ts[2]; 3839 3840 KASSERT(l != NULL || fdat == AT_FDCWD); 3841 3842 /* 3843 * I have checked all callers and they pass either FOLLOW, 3844 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3845 * is 0. More to the point, they don't pass anything else. 3846 * Let's keep it that way at least until the namei interfaces 3847 * are fully sanitized. 3848 */ 3849 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3850 sflags = (flag == FOLLOW) ? 3851 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3852 3853 if (tptr == NULL) { 3854 vanull = true; 3855 nanotime(&ts[0]); 3856 ts[1] = ts[0]; 3857 } else { 3858 vanull = false; 3859 if (seg != UIO_SYSSPACE) { 3860 error = copyin(tptr, ts, sizeof (ts)); 3861 if (error != 0) 3862 return error; 3863 } else { 3864 ts[0] = tptr[0]; 3865 ts[1] = tptr[1]; 3866 } 3867 } 3868 3869 if (ts[0].tv_nsec == UTIME_NOW) { 3870 nanotime(&ts[0]); 3871 if (ts[1].tv_nsec == UTIME_NOW) { 3872 vanull = true; 3873 ts[1] = ts[0]; 3874 } 3875 } else if (ts[1].tv_nsec == UTIME_NOW) 3876 nanotime(&ts[1]); 3877 3878 if (vp == NULL) { 3879 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3880 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3881 if (error != 0) 3882 return error; 3883 dorele = 1; 3884 } 3885 3886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3887 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3888 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3889 vattr_null(&vattr); 3890 3891 if (ts[0].tv_nsec != UTIME_OMIT) 3892 vattr.va_atime = ts[0]; 3893 3894 if (ts[1].tv_nsec != UTIME_OMIT) { 3895 vattr.va_mtime = ts[1]; 3896 if (setbirthtime) 3897 vattr.va_birthtime = ts[1]; 3898 } 3899 3900 if (vanull) 3901 vattr.va_vaflags |= VA_UTIMES_NULL; 3902 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3903 VOP_UNLOCK(vp); 3904 3905 if (dorele != 0) 3906 vrele(vp); 3907 3908 return error; 3909 } 3910 3911 int 3912 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3913 const struct timeval *tptr, enum uio_seg seg) 3914 { 3915 struct timespec ts[2]; 3916 struct timespec *tsptr = NULL; 3917 int error; 3918 3919 if (tptr != NULL) { 3920 struct timeval tv[2]; 3921 3922 if (seg != UIO_SYSSPACE) { 3923 error = copyin(tptr, tv, sizeof(tv)); 3924 if (error != 0) 3925 return error; 3926 tptr = tv; 3927 } 3928 3929 if ((tptr[0].tv_usec == UTIME_NOW) || 3930 (tptr[0].tv_usec == UTIME_OMIT)) 3931 ts[0].tv_nsec = tptr[0].tv_usec; 3932 else { 3933 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 3934 return EINVAL; 3935 3936 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3937 } 3938 3939 if ((tptr[1].tv_usec == UTIME_NOW) || 3940 (tptr[1].tv_usec == UTIME_OMIT)) 3941 ts[1].tv_nsec = tptr[1].tv_usec; 3942 else { 3943 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 3944 return EINVAL; 3945 3946 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3947 } 3948 3949 tsptr = &ts[0]; 3950 } 3951 3952 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3953 } 3954 3955 /* 3956 * Truncate a file given its path name. 3957 */ 3958 /* ARGSUSED */ 3959 int 3960 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3961 { 3962 /* { 3963 syscallarg(const char *) path; 3964 syscallarg(int) pad; 3965 syscallarg(off_t) length; 3966 } */ 3967 struct vnode *vp; 3968 struct vattr vattr; 3969 int error; 3970 3971 if (SCARG(uap, length) < 0) 3972 return EINVAL; 3973 3974 error = namei_simple_user(SCARG(uap, path), 3975 NSM_FOLLOW_TRYEMULROOT, &vp); 3976 if (error != 0) 3977 return (error); 3978 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3979 if (vp->v_type == VDIR) 3980 error = EISDIR; 3981 else if ((error = vn_writechk(vp)) == 0 && 3982 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3983 vattr_null(&vattr); 3984 vattr.va_size = SCARG(uap, length); 3985 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3986 } 3987 vput(vp); 3988 return (error); 3989 } 3990 3991 /* 3992 * Truncate a file given a file descriptor. 3993 */ 3994 /* ARGSUSED */ 3995 int 3996 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3997 { 3998 /* { 3999 syscallarg(int) fd; 4000 syscallarg(int) pad; 4001 syscallarg(off_t) length; 4002 } */ 4003 struct vattr vattr; 4004 struct vnode *vp; 4005 file_t *fp; 4006 int error; 4007 4008 if (SCARG(uap, length) < 0) 4009 return EINVAL; 4010 4011 /* fd_getvnode() will use the descriptor for us */ 4012 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4013 return (error); 4014 if ((fp->f_flag & FWRITE) == 0) { 4015 error = EINVAL; 4016 goto out; 4017 } 4018 vp = fp->f_vnode; 4019 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4020 if (vp->v_type == VDIR) 4021 error = EISDIR; 4022 else if ((error = vn_writechk(vp)) == 0) { 4023 vattr_null(&vattr); 4024 vattr.va_size = SCARG(uap, length); 4025 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 4026 } 4027 VOP_UNLOCK(vp); 4028 out: 4029 fd_putfile(SCARG(uap, fd)); 4030 return (error); 4031 } 4032 4033 /* 4034 * Sync an open file. 4035 */ 4036 /* ARGSUSED */ 4037 int 4038 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4039 { 4040 /* { 4041 syscallarg(int) fd; 4042 } */ 4043 struct vnode *vp; 4044 file_t *fp; 4045 int error; 4046 4047 /* fd_getvnode() will use the descriptor for us */ 4048 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4049 return (error); 4050 vp = fp->f_vnode; 4051 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4052 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4053 VOP_UNLOCK(vp); 4054 fd_putfile(SCARG(uap, fd)); 4055 return (error); 4056 } 4057 4058 /* 4059 * Sync a range of file data. API modeled after that found in AIX. 4060 * 4061 * FDATASYNC indicates that we need only save enough metadata to be able 4062 * to re-read the written data. 4063 */ 4064 /* ARGSUSED */ 4065 int 4066 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4067 { 4068 /* { 4069 syscallarg(int) fd; 4070 syscallarg(int) flags; 4071 syscallarg(off_t) start; 4072 syscallarg(off_t) length; 4073 } */ 4074 struct vnode *vp; 4075 file_t *fp; 4076 int flags, nflags; 4077 off_t s, e, len; 4078 int error; 4079 4080 /* fd_getvnode() will use the descriptor for us */ 4081 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4082 return (error); 4083 4084 if ((fp->f_flag & FWRITE) == 0) { 4085 error = EBADF; 4086 goto out; 4087 } 4088 4089 flags = SCARG(uap, flags); 4090 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4091 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4092 error = EINVAL; 4093 goto out; 4094 } 4095 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4096 if (flags & FDATASYNC) 4097 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4098 else 4099 nflags = FSYNC_WAIT; 4100 if (flags & FDISKSYNC) 4101 nflags |= FSYNC_CACHE; 4102 4103 len = SCARG(uap, length); 4104 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4105 if (len) { 4106 s = SCARG(uap, start); 4107 e = s + len; 4108 if (e < s) { 4109 error = EINVAL; 4110 goto out; 4111 } 4112 } else { 4113 e = 0; 4114 s = 0; 4115 } 4116 4117 vp = fp->f_vnode; 4118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4119 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4120 VOP_UNLOCK(vp); 4121 out: 4122 fd_putfile(SCARG(uap, fd)); 4123 return (error); 4124 } 4125 4126 /* 4127 * Sync the data of an open file. 4128 */ 4129 /* ARGSUSED */ 4130 int 4131 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4132 { 4133 /* { 4134 syscallarg(int) fd; 4135 } */ 4136 struct vnode *vp; 4137 file_t *fp; 4138 int error; 4139 4140 /* fd_getvnode() will use the descriptor for us */ 4141 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4142 return (error); 4143 vp = fp->f_vnode; 4144 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4145 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4146 VOP_UNLOCK(vp); 4147 fd_putfile(SCARG(uap, fd)); 4148 return (error); 4149 } 4150 4151 /* 4152 * Rename files, (standard) BSD semantics frontend. 4153 */ 4154 /* ARGSUSED */ 4155 int 4156 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4157 { 4158 /* { 4159 syscallarg(const char *) from; 4160 syscallarg(const char *) to; 4161 } */ 4162 4163 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4164 SCARG(uap, to), UIO_USERSPACE, 0)); 4165 } 4166 4167 int 4168 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4169 register_t *retval) 4170 { 4171 /* { 4172 syscallarg(int) fromfd; 4173 syscallarg(const char *) from; 4174 syscallarg(int) tofd; 4175 syscallarg(const char *) to; 4176 } */ 4177 4178 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4179 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4180 } 4181 4182 /* 4183 * Rename files, POSIX semantics frontend. 4184 */ 4185 /* ARGSUSED */ 4186 int 4187 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4188 { 4189 /* { 4190 syscallarg(const char *) from; 4191 syscallarg(const char *) to; 4192 } */ 4193 4194 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4195 SCARG(uap, to), UIO_USERSPACE, 1)); 4196 } 4197 4198 /* 4199 * Rename files. Source and destination must either both be directories, 4200 * or both not be directories. If target is a directory, it must be empty. 4201 * If `from' and `to' refer to the same object, the value of the `retain' 4202 * argument is used to determine whether `from' will be 4203 * 4204 * (retain == 0) deleted unless `from' and `to' refer to the same 4205 * object in the file system's name space (BSD). 4206 * (retain == 1) always retained (POSIX). 4207 * 4208 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4209 */ 4210 int 4211 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4212 { 4213 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4214 } 4215 4216 static int 4217 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4218 const char *to, enum uio_seg seg, int retain) 4219 { 4220 struct pathbuf *fpb, *tpb; 4221 struct nameidata fnd, tnd; 4222 struct vnode *fdvp, *fvp; 4223 struct vnode *tdvp, *tvp; 4224 struct mount *mp, *tmp; 4225 int error; 4226 4227 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4228 4229 error = pathbuf_maybe_copyin(from, seg, &fpb); 4230 if (error) 4231 goto out0; 4232 KASSERT(fpb != NULL); 4233 4234 error = pathbuf_maybe_copyin(to, seg, &tpb); 4235 if (error) 4236 goto out1; 4237 KASSERT(tpb != NULL); 4238 4239 /* 4240 * Lookup from. 4241 * 4242 * XXX LOCKPARENT is wrong because we don't actually want it 4243 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4244 * insane, so for the time being we need to leave it like this. 4245 */ 4246 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4247 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4248 goto out2; 4249 4250 /* 4251 * Pull out the important results of the lookup, fdvp and fvp. 4252 * Of course, fvp is bogus because we're about to unlock fdvp. 4253 */ 4254 fdvp = fnd.ni_dvp; 4255 fvp = fnd.ni_vp; 4256 mp = fdvp->v_mount; 4257 KASSERT(fdvp != NULL); 4258 KASSERT(fvp != NULL); 4259 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4260 /* 4261 * Bracket the operation with fstrans_start()/fstrans_done(). 4262 * 4263 * Inside the bracket this file system cannot be unmounted so 4264 * a vnode on this file system cannot change its v_mount. 4265 * A vnode on another file system may still change to dead mount. 4266 */ 4267 fstrans_start(mp); 4268 4269 /* 4270 * Make sure neither fdvp nor fvp is locked. 4271 */ 4272 if (fdvp != fvp) 4273 VOP_UNLOCK(fdvp); 4274 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4275 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4276 4277 /* 4278 * Reject renaming `.' and `..'. Can't do this until after 4279 * namei because we need namei's parsing to find the final 4280 * component name. (namei should just leave us with the final 4281 * component name and not look it up itself, but anyway...) 4282 * 4283 * This was here before because we used to relookup from 4284 * instead of to and relookup requires the caller to check 4285 * this, but now file systems may depend on this check, so we 4286 * must retain it until the file systems are all rototilled. 4287 */ 4288 if (((fnd.ni_cnd.cn_namelen == 1) && 4289 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4290 ((fnd.ni_cnd.cn_namelen == 2) && 4291 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4292 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4293 error = EINVAL; /* XXX EISDIR? */ 4294 goto abort0; 4295 } 4296 4297 /* 4298 * Lookup to. 4299 * 4300 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4301 * fvp here to decide whether to add CREATEDIR is a load of 4302 * bollocks because fvp might be the wrong node by now, since 4303 * fdvp is unlocked. 4304 * 4305 * XXX Why not pass CREATEDIR always? 4306 */ 4307 NDINIT(&tnd, RENAME, 4308 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4309 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4310 tpb); 4311 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4312 goto abort0; 4313 4314 /* 4315 * Pull out the important results of the lookup, tdvp and tvp. 4316 * Of course, tvp is bogus because we're about to unlock tdvp. 4317 */ 4318 tdvp = tnd.ni_dvp; 4319 tvp = tnd.ni_vp; 4320 KASSERT(tdvp != NULL); 4321 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4322 4323 /* 4324 * Make sure neither tdvp nor tvp is locked. 4325 */ 4326 if (tdvp != tvp) 4327 VOP_UNLOCK(tdvp); 4328 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4329 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4330 4331 /* 4332 * Reject renaming onto `.' or `..'. relookup is unhappy with 4333 * these, which is why we must do this here. Once upon a time 4334 * we relooked up from instead of to, and consequently didn't 4335 * need this check, but now that we relookup to instead of 4336 * from, we need this; and we shall need it forever forward 4337 * until the VOP_RENAME protocol changes, because file systems 4338 * will no doubt begin to depend on this check. 4339 */ 4340 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4341 error = EISDIR; 4342 goto abort1; 4343 } 4344 if ((tnd.ni_cnd.cn_namelen == 2) && 4345 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4346 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4347 error = EINVAL; 4348 goto abort1; 4349 } 4350 4351 /* 4352 * Make sure the mount points match. Although we don't hold 4353 * any vnode locks, the v_mount on fdvp file system are stable. 4354 * 4355 * Unmounting another file system at an inopportune moment may 4356 * cause tdvp to disappear and change its v_mount to dead. 4357 * 4358 * So in either case different v_mount means cross-device rename. 4359 */ 4360 KASSERT(mp != NULL); 4361 tmp = tdvp->v_mount; 4362 4363 if (mp != tmp) { 4364 error = EXDEV; 4365 goto abort1; 4366 } 4367 4368 /* 4369 * Take the vfs rename lock to avoid cross-directory screw cases. 4370 * Nothing is locked currently, so taking this lock is safe. 4371 */ 4372 error = VFS_RENAMELOCK_ENTER(mp); 4373 if (error) 4374 goto abort1; 4375 4376 /* 4377 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4378 * and nothing is locked except for the vfs rename lock. 4379 * 4380 * The next step is a little rain dance to conform to the 4381 * insane lock protocol, even though it does nothing to ward 4382 * off race conditions. 4383 * 4384 * We need tdvp and tvp to be locked. However, because we have 4385 * unlocked tdvp in order to hold no locks while we take the 4386 * vfs rename lock, tvp may be wrong here, and we can't safely 4387 * lock it even if the sensible file systems will just unlock 4388 * it straight away. Consequently, we must lock tdvp and then 4389 * relookup tvp to get it locked. 4390 * 4391 * Finally, because the VOP_RENAME protocol is brain-damaged 4392 * and various file systems insanely depend on the semantics of 4393 * this brain damage, the lookup of to must be the last lookup 4394 * before VOP_RENAME. 4395 */ 4396 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4397 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4398 if (error) 4399 goto abort2; 4400 4401 /* 4402 * Drop the old tvp and pick up the new one -- which might be 4403 * the same, but that doesn't matter to us. After this, tdvp 4404 * and tvp should both be locked. 4405 */ 4406 if (tvp != NULL) 4407 vrele(tvp); 4408 tvp = tnd.ni_vp; 4409 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4410 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4411 4412 /* 4413 * The old do_sys_rename had various consistency checks here 4414 * involving fvp and tvp. fvp is bogus already here, and tvp 4415 * will become bogus soon in any sensible file system, so the 4416 * only purpose in putting these checks here is to give lip 4417 * service to these screw cases and to acknowledge that they 4418 * exist, not actually to handle them, but here you go 4419 * anyway... 4420 */ 4421 4422 /* 4423 * Acknowledge that directories and non-directories aren't 4424 * suposed to mix. 4425 */ 4426 if (tvp != NULL) { 4427 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4428 error = ENOTDIR; 4429 goto abort3; 4430 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4431 error = EISDIR; 4432 goto abort3; 4433 } 4434 } 4435 4436 /* 4437 * Acknowledge some random screw case, among the dozens that 4438 * might arise. 4439 */ 4440 if (fvp == tdvp) { 4441 error = EINVAL; 4442 goto abort3; 4443 } 4444 4445 /* 4446 * Acknowledge that POSIX has a wacky screw case. 4447 * 4448 * XXX Eventually the retain flag needs to be passed on to 4449 * VOP_RENAME. 4450 */ 4451 if (fvp == tvp) { 4452 if (retain) { 4453 error = 0; 4454 goto abort3; 4455 } else if ((fdvp == tdvp) && 4456 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4457 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4458 fnd.ni_cnd.cn_namelen))) { 4459 error = 0; 4460 goto abort3; 4461 } 4462 } 4463 4464 /* 4465 * Make sure veriexec can screw us up. (But a race can screw 4466 * up veriexec, of course -- remember, fvp and (soon) tvp are 4467 * bogus.) 4468 */ 4469 #if NVERIEXEC > 0 4470 { 4471 char *f1, *f2; 4472 size_t f1_len; 4473 size_t f2_len; 4474 4475 f1_len = fnd.ni_cnd.cn_namelen + 1; 4476 f1 = kmem_alloc(f1_len, KM_SLEEP); 4477 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4478 4479 f2_len = tnd.ni_cnd.cn_namelen + 1; 4480 f2 = kmem_alloc(f2_len, KM_SLEEP); 4481 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4482 4483 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4484 4485 kmem_free(f1, f1_len); 4486 kmem_free(f2, f2_len); 4487 4488 if (error) 4489 goto abort3; 4490 } 4491 #endif /* NVERIEXEC > 0 */ 4492 4493 /* 4494 * All ready. Incant the rename vop. 4495 */ 4496 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4497 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4498 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4499 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4500 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4501 4502 /* 4503 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4504 * tdvp and tvp. But we can't assert any of that. 4505 */ 4506 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4507 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4508 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4509 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4510 4511 /* 4512 * So all we have left to do is to drop the rename lock and 4513 * destroy the pathbufs. 4514 */ 4515 VFS_RENAMELOCK_EXIT(mp); 4516 fstrans_done(mp); 4517 goto out2; 4518 4519 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4520 VOP_UNLOCK(tvp); 4521 abort2: VOP_UNLOCK(tdvp); 4522 VFS_RENAMELOCK_EXIT(mp); 4523 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4524 vrele(tdvp); 4525 if (tvp != NULL) 4526 vrele(tvp); 4527 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4528 vrele(fdvp); 4529 vrele(fvp); 4530 fstrans_done(mp); 4531 out2: pathbuf_destroy(tpb); 4532 out1: pathbuf_destroy(fpb); 4533 out0: return error; 4534 } 4535 4536 /* 4537 * Make a directory file. 4538 */ 4539 /* ARGSUSED */ 4540 int 4541 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4542 { 4543 /* { 4544 syscallarg(const char *) path; 4545 syscallarg(int) mode; 4546 } */ 4547 4548 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4549 SCARG(uap, mode), UIO_USERSPACE); 4550 } 4551 4552 int 4553 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4554 register_t *retval) 4555 { 4556 /* { 4557 syscallarg(int) fd; 4558 syscallarg(const char *) path; 4559 syscallarg(int) mode; 4560 } */ 4561 4562 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4563 SCARG(uap, mode), UIO_USERSPACE); 4564 } 4565 4566 4567 int 4568 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4569 { 4570 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4571 } 4572 4573 static int 4574 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4575 enum uio_seg seg) 4576 { 4577 struct proc *p = curlwp->l_proc; 4578 struct vnode *vp; 4579 struct vattr vattr; 4580 int error; 4581 struct pathbuf *pb; 4582 struct nameidata nd; 4583 4584 KASSERT(l != NULL || fdat == AT_FDCWD); 4585 4586 /* XXX bollocks, should pass in a pathbuf */ 4587 error = pathbuf_maybe_copyin(path, seg, &pb); 4588 if (error) { 4589 return error; 4590 } 4591 4592 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4593 4594 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4595 pathbuf_destroy(pb); 4596 return (error); 4597 } 4598 vp = nd.ni_vp; 4599 if (vp != NULL) { 4600 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4601 if (nd.ni_dvp == vp) 4602 vrele(nd.ni_dvp); 4603 else 4604 vput(nd.ni_dvp); 4605 vrele(vp); 4606 pathbuf_destroy(pb); 4607 return (EEXIST); 4608 } 4609 vattr_null(&vattr); 4610 vattr.va_type = VDIR; 4611 /* We will read cwdi->cwdi_cmask unlocked. */ 4612 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4613 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4614 if (!error) 4615 vrele(nd.ni_vp); 4616 vput(nd.ni_dvp); 4617 pathbuf_destroy(pb); 4618 return (error); 4619 } 4620 4621 /* 4622 * Remove a directory file. 4623 */ 4624 /* ARGSUSED */ 4625 int 4626 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4627 { 4628 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4629 AT_REMOVEDIR, UIO_USERSPACE); 4630 } 4631 4632 /* 4633 * Read a block of directory entries in a file system independent format. 4634 */ 4635 int 4636 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4637 { 4638 /* { 4639 syscallarg(int) fd; 4640 syscallarg(char *) buf; 4641 syscallarg(size_t) count; 4642 } */ 4643 file_t *fp; 4644 int error, done; 4645 4646 /* fd_getvnode() will use the descriptor for us */ 4647 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4648 return (error); 4649 if ((fp->f_flag & FREAD) == 0) { 4650 error = EBADF; 4651 goto out; 4652 } 4653 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4654 SCARG(uap, count), &done, l, 0, 0); 4655 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4656 *retval = done; 4657 out: 4658 fd_putfile(SCARG(uap, fd)); 4659 return (error); 4660 } 4661 4662 /* 4663 * Set the mode mask for creation of filesystem nodes. 4664 */ 4665 int 4666 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4667 { 4668 /* { 4669 syscallarg(mode_t) newmask; 4670 } */ 4671 4672 /* 4673 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4674 * serialization with those reads is required. It's important to 4675 * return a coherent answer for the caller of umask() though, and 4676 * the atomic operation accomplishes that. 4677 */ 4678 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4679 SCARG(uap, newmask) & ALLPERMS); 4680 4681 return (0); 4682 } 4683 4684 int 4685 dorevoke(struct vnode *vp, kauth_cred_t cred) 4686 { 4687 struct vattr vattr; 4688 int error, fs_decision; 4689 4690 vn_lock(vp, LK_SHARED | LK_RETRY); 4691 error = VOP_GETATTR(vp, &vattr, cred); 4692 VOP_UNLOCK(vp); 4693 if (error != 0) 4694 return error; 4695 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4696 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4697 fs_decision); 4698 if (!error) 4699 VOP_REVOKE(vp, REVOKEALL); 4700 return (error); 4701 } 4702 4703 /* 4704 * Void all references to file by ripping underlying filesystem 4705 * away from vnode. 4706 */ 4707 /* ARGSUSED */ 4708 int 4709 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4710 { 4711 /* { 4712 syscallarg(const char *) path; 4713 } */ 4714 struct vnode *vp; 4715 int error; 4716 4717 error = namei_simple_user(SCARG(uap, path), 4718 NSM_FOLLOW_TRYEMULROOT, &vp); 4719 if (error != 0) 4720 return (error); 4721 error = dorevoke(vp, l->l_cred); 4722 vrele(vp); 4723 return (error); 4724 } 4725 4726 /* 4727 * Allocate backing store for a file, filling a hole without having to 4728 * explicitly write anything out. 4729 */ 4730 /* ARGSUSED */ 4731 int 4732 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4733 register_t *retval) 4734 { 4735 /* { 4736 syscallarg(int) fd; 4737 syscallarg(off_t) pos; 4738 syscallarg(off_t) len; 4739 } */ 4740 int fd; 4741 off_t pos, len; 4742 struct file *fp; 4743 struct vnode *vp; 4744 int error; 4745 4746 fd = SCARG(uap, fd); 4747 pos = SCARG(uap, pos); 4748 len = SCARG(uap, len); 4749 4750 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4751 *retval = EINVAL; 4752 return 0; 4753 } 4754 4755 error = fd_getvnode(fd, &fp); 4756 if (error) { 4757 *retval = error; 4758 return 0; 4759 } 4760 if ((fp->f_flag & FWRITE) == 0) { 4761 error = EBADF; 4762 goto fail; 4763 } 4764 vp = fp->f_vnode; 4765 4766 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4767 if (vp->v_type == VDIR) { 4768 error = EISDIR; 4769 } else { 4770 error = VOP_FALLOCATE(vp, pos, len); 4771 } 4772 VOP_UNLOCK(vp); 4773 4774 fail: 4775 fd_putfile(fd); 4776 *retval = error; 4777 return 0; 4778 } 4779 4780 /* 4781 * Deallocate backing store for a file, creating a hole. Also used for 4782 * invoking TRIM on disks. 4783 */ 4784 /* ARGSUSED */ 4785 int 4786 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4787 register_t *retval) 4788 { 4789 /* { 4790 syscallarg(int) fd; 4791 syscallarg(off_t) pos; 4792 syscallarg(off_t) len; 4793 } */ 4794 int fd; 4795 off_t pos, len; 4796 struct file *fp; 4797 struct vnode *vp; 4798 int error; 4799 4800 fd = SCARG(uap, fd); 4801 pos = SCARG(uap, pos); 4802 len = SCARG(uap, len); 4803 4804 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4805 return EINVAL; 4806 } 4807 4808 error = fd_getvnode(fd, &fp); 4809 if (error) { 4810 return error; 4811 } 4812 if ((fp->f_flag & FWRITE) == 0) { 4813 error = EBADF; 4814 goto fail; 4815 } 4816 vp = fp->f_vnode; 4817 4818 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4819 if (vp->v_type == VDIR) { 4820 error = EISDIR; 4821 } else { 4822 error = VOP_FDISCARD(vp, pos, len); 4823 } 4824 VOP_UNLOCK(vp); 4825 4826 fail: 4827 fd_putfile(fd); 4828 return error; 4829 } 4830