1 /* $NetBSD: vfs_syscalls.c,v 1.553 2021/09/26 21:29:38 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.553 2021/09/26 21:29:38 thorpej Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 /* 169 * Filter event method for EVFILT_FS. 170 */ 171 static struct klist fs_klist = SLIST_HEAD_INITIALIZER(&fs_klist); 172 kmutex_t fs_klist_lock; 173 174 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 175 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 176 177 static int 178 filt_fsattach(struct knote *kn) 179 { 180 mutex_enter(&fs_klist_lock); 181 kn->kn_flags |= EV_CLEAR; 182 SLIST_INSERT_HEAD(&fs_klist, kn, kn_selnext); 183 mutex_exit(&fs_klist_lock); 184 185 return 0; 186 } 187 188 static void 189 filt_fsdetach(struct knote *kn) 190 { 191 mutex_enter(&fs_klist_lock); 192 SLIST_REMOVE(&fs_klist, kn, knote, kn_selnext); 193 mutex_exit(&fs_klist_lock); 194 } 195 196 static int 197 filt_fs(struct knote *kn, long hint) 198 { 199 int rv; 200 201 if (hint & NOTE_SUBMIT) { 202 KASSERT(mutex_owned(&fs_klist_lock)); 203 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 204 } else { 205 mutex_enter(&fs_klist_lock); 206 } 207 208 rv = (kn->kn_fflags != 0); 209 210 if ((hint & NOTE_SUBMIT) == 0) { 211 mutex_exit(&fs_klist_lock); 212 } 213 214 return rv; 215 } 216 217 /* referenced in kern_event.c */ 218 const struct filterops fs_filtops = { 219 .f_flags = FILTEROP_MPSAFE, 220 .f_attach = filt_fsattach, 221 .f_detach = filt_fsdetach, 222 .f_event = filt_fs, 223 }; 224 225 static int 226 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 227 { 228 file_t *dfp; 229 int error; 230 231 if (fdat != AT_FDCWD) { 232 if ((error = fd_getvnode(fdat, &dfp)) != 0) 233 goto out; 234 235 NDAT(ndp, dfp->f_vnode); 236 } 237 238 error = namei(ndp); 239 240 if (fdat != AT_FDCWD) 241 fd_putfile(fdat); 242 out: 243 return error; 244 } 245 246 static int 247 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 248 namei_simple_flags_t sflags, struct vnode **vp_ret) 249 { 250 file_t *dfp; 251 struct vnode *dvp; 252 int error; 253 254 if (fdat != AT_FDCWD) { 255 if ((error = fd_getvnode(fdat, &dfp)) != 0) 256 goto out; 257 258 dvp = dfp->f_vnode; 259 } else { 260 dvp = NULL; 261 } 262 263 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 264 265 if (fdat != AT_FDCWD) 266 fd_putfile(fdat); 267 out: 268 return error; 269 } 270 271 static int 272 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 273 { 274 int error; 275 276 fp->f_flag = flags & FMASK; 277 fp->f_type = DTYPE_VNODE; 278 fp->f_ops = &vnops; 279 fp->f_vnode = vp; 280 281 if (flags & (O_EXLOCK | O_SHLOCK)) { 282 struct flock lf; 283 int type; 284 285 lf.l_whence = SEEK_SET; 286 lf.l_start = 0; 287 lf.l_len = 0; 288 if (flags & O_EXLOCK) 289 lf.l_type = F_WRLCK; 290 else 291 lf.l_type = F_RDLCK; 292 type = F_FLOCK; 293 if ((flags & FNONBLOCK) == 0) 294 type |= F_WAIT; 295 VOP_UNLOCK(vp); 296 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 297 if (error) { 298 (void) vn_close(vp, fp->f_flag, fp->f_cred); 299 fd_abort(l->l_proc, fp, indx); 300 return error; 301 } 302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 303 atomic_or_uint(&fp->f_flag, FHASLOCK); 304 } 305 if (flags & O_CLOEXEC) 306 fd_set_exclose(l, indx, true); 307 return 0; 308 } 309 310 static int 311 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 312 void *data, size_t *data_len) 313 { 314 struct mount *mp; 315 int error = 0, saved_flags; 316 317 mp = vp->v_mount; 318 saved_flags = mp->mnt_flag; 319 320 /* We can operate only on VV_ROOT nodes. */ 321 if ((vp->v_vflag & VV_ROOT) == 0) { 322 error = EINVAL; 323 goto out; 324 } 325 326 /* 327 * We only allow the filesystem to be reloaded if it 328 * is currently mounted read-only. Additionally, we 329 * prevent read-write to read-only downgrades. 330 */ 331 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 332 (mp->mnt_flag & MNT_RDONLY) == 0 && 333 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 334 error = EOPNOTSUPP; /* Needs translation */ 335 goto out; 336 } 337 338 /* 339 * Enabling MNT_UNION requires a covered mountpoint and 340 * must not happen on the root mount. 341 */ 342 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 343 error = EOPNOTSUPP; 344 goto out; 345 } 346 347 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 348 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 349 if (error) 350 goto out; 351 352 error = vfs_suspend(mp, 0); 353 if (error) 354 goto out; 355 356 mutex_enter(mp->mnt_updating); 357 358 mp->mnt_flag &= ~MNT_OP_FLAGS; 359 mp->mnt_flag |= flags & MNT_OP_FLAGS; 360 361 /* 362 * Set the mount level flags. 363 */ 364 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 365 if ((flags & MNT_RDONLY)) 366 mp->mnt_iflag |= IMNT_WANTRDONLY; 367 else 368 mp->mnt_iflag |= IMNT_WANTRDWR; 369 } 370 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 371 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 372 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 373 mp->mnt_flag &= ~MNT_RDONLY; 374 375 error = VFS_MOUNT(mp, path, data, data_len); 376 377 if (error && data != NULL) { 378 int error2; 379 380 /* 381 * Update failed; let's try and see if it was an 382 * export request. For compat with 3.0 and earlier. 383 */ 384 error2 = vfs_hooks_reexport(mp, path, data); 385 386 /* 387 * Only update error code if the export request was 388 * understood but some problem occurred while 389 * processing it. 390 */ 391 if (error2 != EJUSTRETURN) 392 error = error2; 393 } 394 395 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 396 mp->mnt_flag |= MNT_RDONLY; 397 if (error) 398 mp->mnt_flag = saved_flags; 399 mp->mnt_flag &= ~MNT_OP_FLAGS; 400 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 401 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 402 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 403 vfs_syncer_add_to_worklist(mp); 404 } else { 405 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 406 vfs_syncer_remove_from_worklist(mp); 407 } 408 mutex_exit(mp->mnt_updating); 409 vfs_resume(mp); 410 411 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 412 (flags & MNT_EXTATTR)) { 413 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 414 NULL, 0, NULL) != 0) { 415 printf("%s: failed to start extattr, error = %d", 416 mp->mnt_stat.f_mntonname, error); 417 mp->mnt_flag &= ~MNT_EXTATTR; 418 } 419 } 420 421 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 422 !(flags & MNT_EXTATTR)) { 423 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 424 NULL, 0, NULL) != 0) { 425 printf("%s: failed to stop extattr, error = %d", 426 mp->mnt_stat.f_mntonname, error); 427 mp->mnt_flag |= MNT_RDONLY; 428 } 429 } 430 out: 431 return (error); 432 } 433 434 static int 435 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 436 struct vfsops **vfsops) 437 { 438 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 439 int error; 440 441 if (type_seg == UIO_USERSPACE) { 442 /* Copy file-system type from userspace. */ 443 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 444 } else { 445 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 446 KASSERT(error == 0); 447 } 448 449 if (error) { 450 /* 451 * Historically, filesystem types were identified by numbers. 452 * If we get an integer for the filesystem type instead of a 453 * string, we check to see if it matches one of the historic 454 * filesystem types. 455 */ 456 u_long fsindex = (u_long)fstype; 457 if (fsindex >= nmountcompatnames || 458 mountcompatnames[fsindex] == NULL) 459 return ENODEV; 460 strlcpy(fstypename, mountcompatnames[fsindex], 461 sizeof(fstypename)); 462 } 463 464 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 465 if (strcmp(fstypename, "ufs") == 0) 466 fstypename[0] = 'f'; 467 468 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 469 return 0; 470 471 /* If we can autoload a vfs module, try again */ 472 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 473 474 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 475 return 0; 476 477 return ENODEV; 478 } 479 480 static int 481 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 482 void *data, size_t *data_len) 483 { 484 struct mount *mp; 485 int error; 486 487 /* If MNT_GETARGS is specified, it should be the only flag. */ 488 if (flags & ~MNT_GETARGS) 489 return EINVAL; 490 491 mp = vp->v_mount; 492 493 /* XXX: probably some notion of "can see" here if we want isolation. */ 494 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 495 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 496 if (error) 497 return error; 498 499 if ((vp->v_vflag & VV_ROOT) == 0) 500 return EINVAL; 501 502 if (vfs_busy(mp)) 503 return EPERM; 504 505 mutex_enter(mp->mnt_updating); 506 mp->mnt_flag &= ~MNT_OP_FLAGS; 507 mp->mnt_flag |= MNT_GETARGS; 508 error = VFS_MOUNT(mp, path, data, data_len); 509 mp->mnt_flag &= ~MNT_OP_FLAGS; 510 mutex_exit(mp->mnt_updating); 511 512 vfs_unbusy(mp); 513 return (error); 514 } 515 516 int 517 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 518 { 519 /* { 520 syscallarg(const char *) type; 521 syscallarg(const char *) path; 522 syscallarg(int) flags; 523 syscallarg(void *) data; 524 syscallarg(size_t) data_len; 525 } */ 526 527 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 528 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 529 SCARG(uap, data_len), retval); 530 } 531 532 int 533 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 534 const char *path, int flags, void *data, enum uio_seg data_seg, 535 size_t data_len, register_t *retval) 536 { 537 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 538 struct vnode *vp; 539 void *data_buf = data; 540 bool vfsopsrele = false; 541 size_t alloc_sz = 0; 542 int error; 543 544 /* 545 * Get vnode to be covered 546 */ 547 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 548 if (error != 0) { 549 vp = NULL; 550 goto done; 551 } 552 553 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 554 vfsops = vp->v_mount->mnt_op; 555 } else { 556 /* 'type' is userspace */ 557 error = mount_get_vfsops(type, type_seg, &vfsops); 558 if (error != 0) 559 goto done; 560 vfsopsrele = true; 561 } 562 563 /* 564 * We allow data to be NULL, even for userspace. Some fs's don't need 565 * it. The others will handle NULL. 566 */ 567 if (data != NULL && data_seg == UIO_USERSPACE) { 568 if (data_len == 0) { 569 /* No length supplied, use default for filesystem */ 570 data_len = vfsops->vfs_min_mount_data; 571 572 /* 573 * Hopefully a longer buffer won't make copyin() fail. 574 * For compatibility with 3.0 and earlier. 575 */ 576 if (flags & MNT_UPDATE 577 && data_len < sizeof (struct mnt_export_args30)) 578 data_len = sizeof (struct mnt_export_args30); 579 } 580 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 581 error = EINVAL; 582 goto done; 583 } 584 alloc_sz = data_len; 585 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 586 587 /* NFS needs the buffer even for mnt_getargs .... */ 588 error = copyin(data, data_buf, data_len); 589 if (error != 0) 590 goto done; 591 } 592 593 if (flags & MNT_GETARGS) { 594 if (data_len == 0) { 595 error = EINVAL; 596 goto done; 597 } 598 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 599 if (error != 0) 600 goto done; 601 if (data_seg == UIO_USERSPACE) 602 error = copyout(data_buf, data, data_len); 603 *retval = data_len; 604 } else if (flags & MNT_UPDATE) { 605 error = mount_update(l, vp, path, flags, data_buf, &data_len); 606 } else { 607 /* Locking is handled internally in mount_domount(). */ 608 KASSERT(vfsopsrele == true); 609 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 610 &data_len); 611 vfsopsrele = false; 612 } 613 if (!error) { 614 mutex_enter(&fs_klist_lock); 615 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 616 mutex_exit(&fs_klist_lock); 617 } 618 619 done: 620 if (vfsopsrele) 621 vfs_delref(vfsops); 622 if (vp != NULL) { 623 vrele(vp); 624 } 625 if (data_buf != data) 626 kmem_free(data_buf, alloc_sz); 627 return (error); 628 } 629 630 /* 631 * Unmount a file system. 632 * 633 * Note: unmount takes a path to the vnode mounted on as argument, 634 * not special file (as before). 635 */ 636 /* ARGSUSED */ 637 int 638 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 639 { 640 /* { 641 syscallarg(const char *) path; 642 syscallarg(int) flags; 643 } */ 644 struct vnode *vp; 645 struct mount *mp; 646 int error; 647 struct pathbuf *pb; 648 struct nameidata nd; 649 650 error = pathbuf_copyin(SCARG(uap, path), &pb); 651 if (error) { 652 return error; 653 } 654 655 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 656 if ((error = namei(&nd)) != 0) { 657 pathbuf_destroy(pb); 658 return error; 659 } 660 vp = nd.ni_vp; 661 pathbuf_destroy(pb); 662 663 mp = vp->v_mount; 664 vfs_ref(mp); 665 VOP_UNLOCK(vp); 666 667 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 668 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 669 if (error) { 670 vrele(vp); 671 vfs_rele(mp); 672 return (error); 673 } 674 675 /* 676 * Don't allow unmounting the root file system. 677 */ 678 if (mp->mnt_flag & MNT_ROOTFS) { 679 vrele(vp); 680 vfs_rele(mp); 681 return (EINVAL); 682 } 683 684 /* 685 * Must be the root of the filesystem 686 */ 687 if ((vp->v_vflag & VV_ROOT) == 0) { 688 vrele(vp); 689 vfs_rele(mp); 690 return (EINVAL); 691 } 692 693 vrele(vp); 694 error = dounmount(mp, SCARG(uap, flags), l); 695 vfs_rele(mp); 696 if (!error) { 697 mutex_enter(&fs_klist_lock); 698 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 699 mutex_exit(&fs_klist_lock); 700 } 701 return error; 702 } 703 704 /* 705 * Sync each mounted filesystem. 706 */ 707 #ifdef DEBUG 708 int syncprt = 0; 709 struct ctldebug debug0 = { "syncprt", &syncprt }; 710 #endif 711 712 void 713 do_sys_sync(struct lwp *l) 714 { 715 mount_iterator_t *iter; 716 struct mount *mp; 717 int asyncflag; 718 719 mountlist_iterator_init(&iter); 720 while ((mp = mountlist_iterator_next(iter)) != NULL) { 721 mutex_enter(mp->mnt_updating); 722 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 723 asyncflag = mp->mnt_flag & MNT_ASYNC; 724 mp->mnt_flag &= ~MNT_ASYNC; 725 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 726 if (asyncflag) 727 mp->mnt_flag |= MNT_ASYNC; 728 } 729 mutex_exit(mp->mnt_updating); 730 } 731 mountlist_iterator_destroy(iter); 732 #ifdef DEBUG 733 if (syncprt) 734 vfs_bufstats(); 735 #endif /* DEBUG */ 736 } 737 738 static bool 739 sync_vnode_filter(void *cookie, vnode_t *vp) 740 { 741 742 if (vp->v_numoutput > 0) { 743 ++*(int *)cookie; 744 } 745 return false; 746 } 747 748 int 749 vfs_syncwait(void) 750 { 751 int nbusy, nbusy_prev, iter; 752 struct vnode_iterator *vniter; 753 mount_iterator_t *mpiter; 754 struct mount *mp; 755 756 for (nbusy_prev = 0, iter = 0; iter < 20;) { 757 nbusy = 0; 758 mountlist_iterator_init(&mpiter); 759 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 760 vnode_t *vp __diagused; 761 vfs_vnode_iterator_init(mp, &vniter); 762 vp = vfs_vnode_iterator_next(vniter, 763 sync_vnode_filter, &nbusy); 764 KASSERT(vp == NULL); 765 vfs_vnode_iterator_destroy(vniter); 766 } 767 mountlist_iterator_destroy(mpiter); 768 769 if (nbusy == 0) 770 break; 771 if (nbusy_prev == 0) 772 nbusy_prev = nbusy; 773 printf("%d ", nbusy); 774 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 775 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 776 iter++; 777 else 778 nbusy_prev = nbusy; 779 } 780 781 if (nbusy) { 782 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 783 printf("giving up\nPrinting vnodes for busy buffers\n"); 784 mountlist_iterator_init(&mpiter); 785 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 786 vnode_t *vp; 787 vfs_vnode_iterator_init(mp, &vniter); 788 vp = vfs_vnode_iterator_next(vniter, 789 NULL, NULL); 790 mutex_enter(vp->v_interlock); 791 if (vp->v_numoutput > 0) 792 vprint(NULL, vp); 793 mutex_exit(vp->v_interlock); 794 vrele(vp); 795 vfs_vnode_iterator_destroy(vniter); 796 } 797 mountlist_iterator_destroy(mpiter); 798 #endif 799 } 800 801 return nbusy; 802 } 803 804 /* ARGSUSED */ 805 int 806 sys_sync(struct lwp *l, const void *v, register_t *retval) 807 { 808 do_sys_sync(l); 809 return (0); 810 } 811 812 813 /* 814 * Access or change filesystem quotas. 815 * 816 * (this is really 14 different calls bundled into one) 817 */ 818 819 static int 820 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 821 { 822 struct quotastat info_k; 823 int error; 824 825 /* ensure any padding bytes are cleared */ 826 memset(&info_k, 0, sizeof(info_k)); 827 828 error = vfs_quotactl_stat(mp, &info_k); 829 if (error) { 830 return error; 831 } 832 833 return copyout(&info_k, info_u, sizeof(info_k)); 834 } 835 836 static int 837 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 838 struct quotaidtypestat *info_u) 839 { 840 struct quotaidtypestat info_k; 841 int error; 842 843 /* ensure any padding bytes are cleared */ 844 memset(&info_k, 0, sizeof(info_k)); 845 846 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 847 if (error) { 848 return error; 849 } 850 851 return copyout(&info_k, info_u, sizeof(info_k)); 852 } 853 854 static int 855 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 856 struct quotaobjtypestat *info_u) 857 { 858 struct quotaobjtypestat info_k; 859 int error; 860 861 /* ensure any padding bytes are cleared */ 862 memset(&info_k, 0, sizeof(info_k)); 863 864 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 865 if (error) { 866 return error; 867 } 868 869 return copyout(&info_k, info_u, sizeof(info_k)); 870 } 871 872 static int 873 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 874 struct quotaval *val_u) 875 { 876 struct quotakey key_k; 877 struct quotaval val_k; 878 int error; 879 880 /* ensure any padding bytes are cleared */ 881 memset(&val_k, 0, sizeof(val_k)); 882 883 error = copyin(key_u, &key_k, sizeof(key_k)); 884 if (error) { 885 return error; 886 } 887 888 error = vfs_quotactl_get(mp, &key_k, &val_k); 889 if (error) { 890 return error; 891 } 892 893 return copyout(&val_k, val_u, sizeof(val_k)); 894 } 895 896 static int 897 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 898 const struct quotaval *val_u) 899 { 900 struct quotakey key_k; 901 struct quotaval val_k; 902 int error; 903 904 error = copyin(key_u, &key_k, sizeof(key_k)); 905 if (error) { 906 return error; 907 } 908 909 error = copyin(val_u, &val_k, sizeof(val_k)); 910 if (error) { 911 return error; 912 } 913 914 return vfs_quotactl_put(mp, &key_k, &val_k); 915 } 916 917 static int 918 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 919 { 920 struct quotakey key_k; 921 int error; 922 923 error = copyin(key_u, &key_k, sizeof(key_k)); 924 if (error) { 925 return error; 926 } 927 928 return vfs_quotactl_del(mp, &key_k); 929 } 930 931 static int 932 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 933 { 934 struct quotakcursor cursor_k; 935 int error; 936 937 /* ensure any padding bytes are cleared */ 938 memset(&cursor_k, 0, sizeof(cursor_k)); 939 940 error = vfs_quotactl_cursoropen(mp, &cursor_k); 941 if (error) { 942 return error; 943 } 944 945 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 946 } 947 948 static int 949 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 950 { 951 struct quotakcursor cursor_k; 952 int error; 953 954 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 955 if (error) { 956 return error; 957 } 958 959 return vfs_quotactl_cursorclose(mp, &cursor_k); 960 } 961 962 static int 963 do_sys_quotactl_cursorskipidtype(struct mount *mp, 964 struct quotakcursor *cursor_u, int idtype) 965 { 966 struct quotakcursor cursor_k; 967 int error; 968 969 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 970 if (error) { 971 return error; 972 } 973 974 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 975 if (error) { 976 return error; 977 } 978 979 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 980 } 981 982 static int 983 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 984 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 985 unsigned *ret_u) 986 { 987 #define CGET_STACK_MAX 8 988 struct quotakcursor cursor_k; 989 struct quotakey stackkeys[CGET_STACK_MAX]; 990 struct quotaval stackvals[CGET_STACK_MAX]; 991 struct quotakey *keys_k; 992 struct quotaval *vals_k; 993 unsigned ret_k; 994 int error; 995 996 if (maxnum > 128) { 997 maxnum = 128; 998 } 999 1000 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1001 if (error) { 1002 return error; 1003 } 1004 1005 if (maxnum <= CGET_STACK_MAX) { 1006 keys_k = stackkeys; 1007 vals_k = stackvals; 1008 /* ensure any padding bytes are cleared */ 1009 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1010 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1011 } else { 1012 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1013 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1014 } 1015 1016 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1017 &ret_k); 1018 if (error) { 1019 goto fail; 1020 } 1021 1022 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1023 if (error) { 1024 goto fail; 1025 } 1026 1027 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1028 if (error) { 1029 goto fail; 1030 } 1031 1032 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1033 if (error) { 1034 goto fail; 1035 } 1036 1037 /* do last to maximize the chance of being able to recover a failure */ 1038 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1039 1040 fail: 1041 if (keys_k != stackkeys) { 1042 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1043 } 1044 if (vals_k != stackvals) { 1045 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1046 } 1047 return error; 1048 } 1049 1050 static int 1051 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1052 int *ret_u) 1053 { 1054 struct quotakcursor cursor_k; 1055 int ret_k; 1056 int error; 1057 1058 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1059 if (error) { 1060 return error; 1061 } 1062 1063 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1064 if (error) { 1065 return error; 1066 } 1067 1068 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1069 if (error) { 1070 return error; 1071 } 1072 1073 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1074 } 1075 1076 static int 1077 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1078 { 1079 struct quotakcursor cursor_k; 1080 int error; 1081 1082 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1083 if (error) { 1084 return error; 1085 } 1086 1087 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1088 if (error) { 1089 return error; 1090 } 1091 1092 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1093 } 1094 1095 static int 1096 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1097 { 1098 char *path_k; 1099 int error; 1100 1101 /* XXX this should probably be a struct pathbuf */ 1102 path_k = PNBUF_GET(); 1103 error = copyin(path_u, path_k, PATH_MAX); 1104 if (error) { 1105 PNBUF_PUT(path_k); 1106 return error; 1107 } 1108 1109 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1110 1111 PNBUF_PUT(path_k); 1112 return error; 1113 } 1114 1115 static int 1116 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1117 { 1118 return vfs_quotactl_quotaoff(mp, idtype); 1119 } 1120 1121 int 1122 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1123 { 1124 struct mount *mp; 1125 struct vnode *vp; 1126 int error; 1127 1128 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1129 if (error != 0) 1130 return (error); 1131 mp = vp->v_mount; 1132 1133 switch (args->qc_op) { 1134 case QUOTACTL_STAT: 1135 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1136 break; 1137 case QUOTACTL_IDTYPESTAT: 1138 error = do_sys_quotactl_idtypestat(mp, 1139 args->u.idtypestat.qc_idtype, 1140 args->u.idtypestat.qc_info); 1141 break; 1142 case QUOTACTL_OBJTYPESTAT: 1143 error = do_sys_quotactl_objtypestat(mp, 1144 args->u.objtypestat.qc_objtype, 1145 args->u.objtypestat.qc_info); 1146 break; 1147 case QUOTACTL_GET: 1148 error = do_sys_quotactl_get(mp, 1149 args->u.get.qc_key, 1150 args->u.get.qc_val); 1151 break; 1152 case QUOTACTL_PUT: 1153 error = do_sys_quotactl_put(mp, 1154 args->u.put.qc_key, 1155 args->u.put.qc_val); 1156 break; 1157 case QUOTACTL_DEL: 1158 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1159 break; 1160 case QUOTACTL_CURSOROPEN: 1161 error = do_sys_quotactl_cursoropen(mp, 1162 args->u.cursoropen.qc_cursor); 1163 break; 1164 case QUOTACTL_CURSORCLOSE: 1165 error = do_sys_quotactl_cursorclose(mp, 1166 args->u.cursorclose.qc_cursor); 1167 break; 1168 case QUOTACTL_CURSORSKIPIDTYPE: 1169 error = do_sys_quotactl_cursorskipidtype(mp, 1170 args->u.cursorskipidtype.qc_cursor, 1171 args->u.cursorskipidtype.qc_idtype); 1172 break; 1173 case QUOTACTL_CURSORGET: 1174 error = do_sys_quotactl_cursorget(mp, 1175 args->u.cursorget.qc_cursor, 1176 args->u.cursorget.qc_keys, 1177 args->u.cursorget.qc_vals, 1178 args->u.cursorget.qc_maxnum, 1179 args->u.cursorget.qc_ret); 1180 break; 1181 case QUOTACTL_CURSORATEND: 1182 error = do_sys_quotactl_cursoratend(mp, 1183 args->u.cursoratend.qc_cursor, 1184 args->u.cursoratend.qc_ret); 1185 break; 1186 case QUOTACTL_CURSORREWIND: 1187 error = do_sys_quotactl_cursorrewind(mp, 1188 args->u.cursorrewind.qc_cursor); 1189 break; 1190 case QUOTACTL_QUOTAON: 1191 error = do_sys_quotactl_quotaon(mp, 1192 args->u.quotaon.qc_idtype, 1193 args->u.quotaon.qc_quotafile); 1194 break; 1195 case QUOTACTL_QUOTAOFF: 1196 error = do_sys_quotactl_quotaoff(mp, 1197 args->u.quotaoff.qc_idtype); 1198 break; 1199 default: 1200 error = EINVAL; 1201 break; 1202 } 1203 1204 vrele(vp); 1205 return error; 1206 } 1207 1208 /* ARGSUSED */ 1209 int 1210 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1211 register_t *retval) 1212 { 1213 /* { 1214 syscallarg(const char *) path; 1215 syscallarg(struct quotactl_args *) args; 1216 } */ 1217 struct quotactl_args args; 1218 int error; 1219 1220 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1221 if (error) { 1222 return error; 1223 } 1224 1225 return do_sys_quotactl(SCARG(uap, path), &args); 1226 } 1227 1228 int 1229 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1230 int root) 1231 { 1232 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1233 bool chrooted; 1234 int error = 0; 1235 1236 KASSERT(l == curlwp); 1237 1238 /* 1239 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1240 * since it would imply chroots can be escaped. Just make sure this 1241 * routine is self-consistent. 1242 */ 1243 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1244 1245 /* 1246 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1247 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1248 * overrides MNT_NOWAIT. 1249 */ 1250 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1251 (flags != MNT_WAIT && flags != 0)) { 1252 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1253 } else { 1254 /* Get the filesystem stats now */ 1255 memset(sp, 0, sizeof(*sp)); 1256 if ((error = VFS_STATVFS(mp, sp)) != 0) 1257 return error; 1258 if (!chrooted) 1259 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1260 } 1261 1262 if (chrooted) { 1263 size_t len; 1264 char *bp; 1265 char c; 1266 char *path = PNBUF_GET(); 1267 1268 bp = path + MAXPATHLEN; 1269 *--bp = '\0'; 1270 rw_enter(&cwdi->cwdi_lock, RW_READER); 1271 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1272 MAXPATHLEN / 2, 0, l); 1273 rw_exit(&cwdi->cwdi_lock); 1274 if (error) { 1275 PNBUF_PUT(path); 1276 return error; 1277 } 1278 len = strlen(bp); 1279 if (len != 1) { 1280 /* 1281 * for mount points that are below our root, we can see 1282 * them, so we fix up the pathname and return them. The 1283 * rest we cannot see, so we don't allow viewing the 1284 * data. 1285 */ 1286 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1287 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1288 (void)strlcpy(sp->f_mntonname, 1289 c == '\0' ? "/" : &sp->f_mntonname[len], 1290 sizeof(sp->f_mntonname)); 1291 } else { 1292 if (root) 1293 (void)strlcpy(sp->f_mntonname, "/", 1294 sizeof(sp->f_mntonname)); 1295 else 1296 error = EPERM; 1297 } 1298 } 1299 PNBUF_PUT(path); 1300 } 1301 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1302 return error; 1303 } 1304 1305 /* 1306 * Get filesystem statistics by path. 1307 */ 1308 int 1309 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1310 { 1311 struct mount *mp; 1312 int error; 1313 struct vnode *vp; 1314 1315 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1316 if (error != 0) 1317 return error; 1318 mp = vp->v_mount; 1319 error = dostatvfs(mp, sb, l, flags, 1); 1320 vrele(vp); 1321 return error; 1322 } 1323 1324 /* ARGSUSED */ 1325 int 1326 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1327 { 1328 /* { 1329 syscallarg(const char *) path; 1330 syscallarg(struct statvfs *) buf; 1331 syscallarg(int) flags; 1332 } */ 1333 struct statvfs *sb; 1334 int error; 1335 1336 sb = STATVFSBUF_GET(); 1337 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1338 if (error == 0) 1339 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1340 STATVFSBUF_PUT(sb); 1341 return error; 1342 } 1343 1344 /* 1345 * Get filesystem statistics by fd. 1346 */ 1347 int 1348 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1349 { 1350 file_t *fp; 1351 struct mount *mp; 1352 int error; 1353 1354 /* fd_getvnode() will use the descriptor for us */ 1355 if ((error = fd_getvnode(fd, &fp)) != 0) 1356 return (error); 1357 mp = fp->f_vnode->v_mount; 1358 error = dostatvfs(mp, sb, curlwp, flags, 1); 1359 fd_putfile(fd); 1360 return error; 1361 } 1362 1363 /* ARGSUSED */ 1364 int 1365 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1366 { 1367 /* { 1368 syscallarg(int) fd; 1369 syscallarg(struct statvfs *) buf; 1370 syscallarg(int) flags; 1371 } */ 1372 struct statvfs *sb; 1373 int error; 1374 1375 sb = STATVFSBUF_GET(); 1376 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1377 if (error == 0) 1378 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1379 STATVFSBUF_PUT(sb); 1380 return error; 1381 } 1382 1383 1384 /* 1385 * Get statistics on all filesystems. 1386 */ 1387 int 1388 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1389 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1390 register_t *retval) 1391 { 1392 int root = 0; 1393 mount_iterator_t *iter; 1394 struct proc *p = l->l_proc; 1395 struct mount *mp; 1396 struct statvfs *sb; 1397 size_t count, maxcount; 1398 int error = 0; 1399 1400 sb = STATVFSBUF_GET(); 1401 maxcount = bufsize / entry_sz; 1402 count = 0; 1403 mountlist_iterator_init(&iter); 1404 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1405 if (sfsp && count < maxcount) { 1406 error = dostatvfs(mp, sb, l, flags, 0); 1407 if (error) { 1408 error = 0; 1409 continue; 1410 } 1411 error = copyfn(sb, sfsp, entry_sz); 1412 if (error) 1413 goto out; 1414 sfsp = (char *)sfsp + entry_sz; 1415 root |= strcmp(sb->f_mntonname, "/") == 0; 1416 } 1417 count++; 1418 } 1419 1420 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1421 /* 1422 * fake a root entry 1423 */ 1424 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1425 sb, l, flags, 1); 1426 if (error != 0) 1427 goto out; 1428 if (sfsp) { 1429 error = copyfn(sb, sfsp, entry_sz); 1430 if (error != 0) 1431 goto out; 1432 } 1433 count++; 1434 } 1435 if (sfsp && count > maxcount) 1436 *retval = maxcount; 1437 else 1438 *retval = count; 1439 out: 1440 mountlist_iterator_destroy(iter); 1441 STATVFSBUF_PUT(sb); 1442 return error; 1443 } 1444 1445 int 1446 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1447 register_t *retval) 1448 { 1449 /* { 1450 syscallarg(struct statvfs *) buf; 1451 syscallarg(size_t) bufsize; 1452 syscallarg(int) flags; 1453 } */ 1454 1455 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1456 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1457 } 1458 1459 /* 1460 * Change current working directory to a given file descriptor. 1461 */ 1462 /* ARGSUSED */ 1463 int 1464 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1465 { 1466 /* { 1467 syscallarg(int) fd; 1468 } */ 1469 struct proc *p = l->l_proc; 1470 struct cwdinfo *cwdi; 1471 struct vnode *vp, *tdp; 1472 struct mount *mp; 1473 file_t *fp; 1474 int error, fd; 1475 1476 /* fd_getvnode() will use the descriptor for us */ 1477 fd = SCARG(uap, fd); 1478 if ((error = fd_getvnode(fd, &fp)) != 0) 1479 return (error); 1480 vp = fp->f_vnode; 1481 1482 vref(vp); 1483 vn_lock(vp, LK_SHARED | LK_RETRY); 1484 if (vp->v_type != VDIR) 1485 error = ENOTDIR; 1486 else 1487 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1488 if (error) { 1489 vput(vp); 1490 goto out; 1491 } 1492 while ((mp = vp->v_mountedhere) != NULL) { 1493 error = vfs_busy(mp); 1494 vput(vp); 1495 if (error != 0) 1496 goto out; 1497 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1498 vfs_unbusy(mp); 1499 if (error) 1500 goto out; 1501 vp = tdp; 1502 } 1503 VOP_UNLOCK(vp); 1504 1505 /* 1506 * Disallow changing to a directory not under the process's 1507 * current root directory (if there is one). 1508 */ 1509 cwdi = p->p_cwdi; 1510 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1511 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1512 vrele(vp); 1513 error = EPERM; /* operation not permitted */ 1514 } else { 1515 vrele(cwdi->cwdi_cdir); 1516 cwdi->cwdi_cdir = vp; 1517 } 1518 rw_exit(&cwdi->cwdi_lock); 1519 1520 out: 1521 fd_putfile(fd); 1522 return (error); 1523 } 1524 1525 /* 1526 * Change this process's notion of the root directory to a given file 1527 * descriptor. 1528 */ 1529 int 1530 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1531 { 1532 struct vnode *vp; 1533 file_t *fp; 1534 int error, fd = SCARG(uap, fd); 1535 1536 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1537 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1538 return error; 1539 /* fd_getvnode() will use the descriptor for us */ 1540 if ((error = fd_getvnode(fd, &fp)) != 0) 1541 return error; 1542 vp = fp->f_vnode; 1543 vn_lock(vp, LK_SHARED | LK_RETRY); 1544 if (vp->v_type != VDIR) 1545 error = ENOTDIR; 1546 else 1547 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1548 VOP_UNLOCK(vp); 1549 if (error) 1550 goto out; 1551 vref(vp); 1552 change_root(vp); 1553 1554 out: 1555 fd_putfile(fd); 1556 return (error); 1557 } 1558 1559 /* 1560 * Change current working directory (``.''). 1561 */ 1562 /* ARGSUSED */ 1563 int 1564 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1565 { 1566 /* { 1567 syscallarg(const char *) path; 1568 } */ 1569 struct proc *p = l->l_proc; 1570 struct cwdinfo *cwdi; 1571 int error; 1572 struct vnode *vp; 1573 1574 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1575 &vp, l)) != 0) 1576 return (error); 1577 cwdi = p->p_cwdi; 1578 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1579 vrele(cwdi->cwdi_cdir); 1580 cwdi->cwdi_cdir = vp; 1581 rw_exit(&cwdi->cwdi_lock); 1582 return (0); 1583 } 1584 1585 /* 1586 * Change notion of root (``/'') directory. 1587 */ 1588 /* ARGSUSED */ 1589 int 1590 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1591 { 1592 /* { 1593 syscallarg(const char *) path; 1594 } */ 1595 int error; 1596 struct vnode *vp; 1597 1598 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1599 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1600 return (error); 1601 1602 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1603 if (error == 0) 1604 change_root(vp); 1605 return error; 1606 } 1607 1608 /* 1609 * Common routine for chroot and fchroot. 1610 * NB: callers need to properly authorize the change root operation. 1611 */ 1612 void 1613 change_root(struct vnode *vp) 1614 { 1615 kauth_cred_t ncred; 1616 struct lwp *l = curlwp; 1617 struct proc *p = l->l_proc; 1618 struct cwdinfo *cwdi = p->p_cwdi; 1619 1620 ncred = kauth_cred_alloc(); 1621 1622 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1623 if (cwdi->cwdi_rdir != NULL) 1624 vrele(cwdi->cwdi_rdir); 1625 cwdi->cwdi_rdir = vp; 1626 1627 /* 1628 * Prevent escaping from chroot by putting the root under 1629 * the working directory. Silently chdir to / if we aren't 1630 * already there. 1631 */ 1632 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1633 /* 1634 * XXX would be more failsafe to change directory to a 1635 * deadfs node here instead 1636 */ 1637 vrele(cwdi->cwdi_cdir); 1638 vref(vp); 1639 cwdi->cwdi_cdir = vp; 1640 } 1641 rw_exit(&cwdi->cwdi_lock); 1642 1643 /* Get a write lock on the process credential. */ 1644 proc_crmod_enter(); 1645 1646 kauth_cred_clone(p->p_cred, ncred); 1647 kauth_proc_chroot(ncred, p->p_cwdi); 1648 1649 /* Broadcast our credentials to the process and other LWPs. */ 1650 proc_crmod_leave(ncred, p->p_cred, true); 1651 } 1652 1653 /* 1654 * Common routine for chroot and chdir. 1655 * XXX "where" should be enum uio_seg 1656 */ 1657 int 1658 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1659 { 1660 struct pathbuf *pb; 1661 struct nameidata nd; 1662 int error; 1663 1664 error = pathbuf_maybe_copyin(path, where, &pb); 1665 if (error) { 1666 return error; 1667 } 1668 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1669 if ((error = namei(&nd)) != 0) { 1670 pathbuf_destroy(pb); 1671 return error; 1672 } 1673 *vpp = nd.ni_vp; 1674 pathbuf_destroy(pb); 1675 1676 if ((*vpp)->v_type != VDIR) 1677 error = ENOTDIR; 1678 else 1679 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1680 1681 if (error) 1682 vput(*vpp); 1683 else 1684 VOP_UNLOCK(*vpp); 1685 return (error); 1686 } 1687 1688 /* 1689 * Internals of sys_open - path has already been converted into a pathbuf 1690 * (so we can easily reuse this function from other parts of the kernel, 1691 * like posix_spawn post-processing). 1692 */ 1693 int 1694 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1695 int open_mode, int *fd) 1696 { 1697 struct proc *p = l->l_proc; 1698 struct cwdinfo *cwdi = p->p_cwdi; 1699 file_t *fp; 1700 struct vnode *vp; 1701 int dupfd; 1702 bool dupfd_move; 1703 int flags, cmode; 1704 int indx, error; 1705 1706 if (open_flags & O_SEARCH) { 1707 open_flags &= ~(int)O_SEARCH; 1708 } 1709 1710 /* 1711 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1712 * may be specified. 1713 */ 1714 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1715 return EINVAL; 1716 1717 flags = FFLAGS(open_flags); 1718 if ((flags & (FREAD | FWRITE)) == 0) 1719 return EINVAL; 1720 1721 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1722 return error; 1723 } 1724 1725 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1726 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1727 1728 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1729 &vp, &dupfd_move, &dupfd); 1730 if (error != 0) { 1731 fd_abort(p, fp, indx); 1732 if (error == ERESTART) 1733 error = EINTR; 1734 return error; 1735 } 1736 1737 if (vp == NULL) { 1738 fd_abort(p, fp, indx); 1739 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1740 if (error) 1741 return error; 1742 *fd = indx; 1743 } else { 1744 error = open_setfp(l, fp, vp, indx, flags); 1745 if (error) 1746 return error; 1747 VOP_UNLOCK(vp); 1748 *fd = indx; 1749 fd_affix(p, fp, indx); 1750 } 1751 1752 return 0; 1753 } 1754 1755 int 1756 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1757 { 1758 struct pathbuf *pb; 1759 int error, oflags; 1760 1761 oflags = FFLAGS(open_flags); 1762 if ((oflags & (FREAD | FWRITE)) == 0) 1763 return EINVAL; 1764 1765 pb = pathbuf_create(path); 1766 if (pb == NULL) 1767 return ENOMEM; 1768 1769 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1770 pathbuf_destroy(pb); 1771 1772 return error; 1773 } 1774 1775 static int 1776 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1777 int mode, int *fd) 1778 { 1779 file_t *dfp = NULL; 1780 struct vnode *dvp = NULL; 1781 struct pathbuf *pb; 1782 const char *pathstring = NULL; 1783 int error; 1784 1785 if (path == NULL) { 1786 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1787 if (error == ENOSYS) 1788 goto no_compat; 1789 if (error) 1790 return error; 1791 } else { 1792 no_compat: 1793 error = pathbuf_copyin(path, &pb); 1794 if (error) 1795 return error; 1796 } 1797 1798 pathstring = pathbuf_stringcopy_get(pb); 1799 1800 /* 1801 * fdat is ignored if: 1802 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1803 * 2) if path is absolute, then fdat is useless. 1804 */ 1805 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1806 /* fd_getvnode() will use the descriptor for us */ 1807 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1808 goto out; 1809 1810 dvp = dfp->f_vnode; 1811 } 1812 1813 error = do_open(l, dvp, pb, flags, mode, fd); 1814 1815 if (dfp != NULL) 1816 fd_putfile(fdat); 1817 out: 1818 pathbuf_stringcopy_put(pb, pathstring); 1819 pathbuf_destroy(pb); 1820 return error; 1821 } 1822 1823 int 1824 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1825 { 1826 /* { 1827 syscallarg(const char *) path; 1828 syscallarg(int) flags; 1829 syscallarg(int) mode; 1830 } */ 1831 int error; 1832 int fd; 1833 1834 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1835 SCARG(uap, flags), SCARG(uap, mode), &fd); 1836 1837 if (error == 0) 1838 *retval = fd; 1839 1840 return error; 1841 } 1842 1843 int 1844 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1845 { 1846 /* { 1847 syscallarg(int) fd; 1848 syscallarg(const char *) path; 1849 syscallarg(int) oflags; 1850 syscallarg(int) mode; 1851 } */ 1852 int error; 1853 int fd; 1854 1855 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1856 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1857 1858 if (error == 0) 1859 *retval = fd; 1860 1861 return error; 1862 } 1863 1864 static void 1865 vfs__fhfree(fhandle_t *fhp) 1866 { 1867 size_t fhsize; 1868 1869 fhsize = FHANDLE_SIZE(fhp); 1870 kmem_free(fhp, fhsize); 1871 } 1872 1873 /* 1874 * vfs_composefh: compose a filehandle. 1875 */ 1876 1877 int 1878 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1879 { 1880 struct mount *mp; 1881 struct fid *fidp; 1882 int error; 1883 size_t needfhsize; 1884 size_t fidsize; 1885 1886 mp = vp->v_mount; 1887 fidp = NULL; 1888 if (*fh_size < FHANDLE_SIZE_MIN) { 1889 fidsize = 0; 1890 } else { 1891 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1892 if (fhp != NULL) { 1893 memset(fhp, 0, *fh_size); 1894 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1895 fidp = &fhp->fh_fid; 1896 } 1897 } 1898 error = VFS_VPTOFH(vp, fidp, &fidsize); 1899 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1900 if (error == 0 && *fh_size < needfhsize) { 1901 error = E2BIG; 1902 } 1903 *fh_size = needfhsize; 1904 return error; 1905 } 1906 1907 int 1908 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1909 { 1910 struct mount *mp; 1911 fhandle_t *fhp; 1912 size_t fhsize; 1913 size_t fidsize; 1914 int error; 1915 1916 mp = vp->v_mount; 1917 fidsize = 0; 1918 error = VFS_VPTOFH(vp, NULL, &fidsize); 1919 KASSERT(error != 0); 1920 if (error != E2BIG) { 1921 goto out; 1922 } 1923 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1924 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1925 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1926 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1927 if (error == 0) { 1928 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1929 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1930 *fhpp = fhp; 1931 } else { 1932 kmem_free(fhp, fhsize); 1933 } 1934 out: 1935 return error; 1936 } 1937 1938 void 1939 vfs_composefh_free(fhandle_t *fhp) 1940 { 1941 1942 vfs__fhfree(fhp); 1943 } 1944 1945 /* 1946 * vfs_fhtovp: lookup a vnode by a filehandle. 1947 */ 1948 1949 int 1950 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1951 { 1952 struct mount *mp; 1953 int error; 1954 1955 *vpp = NULL; 1956 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1957 if (mp == NULL) { 1958 error = ESTALE; 1959 goto out; 1960 } 1961 if (mp->mnt_op->vfs_fhtovp == NULL) { 1962 error = EOPNOTSUPP; 1963 goto out; 1964 } 1965 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1966 out: 1967 return error; 1968 } 1969 1970 /* 1971 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1972 * the needed size. 1973 */ 1974 1975 int 1976 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1977 { 1978 fhandle_t *fhp; 1979 int error; 1980 1981 if (fhsize > FHANDLE_SIZE_MAX) { 1982 return EINVAL; 1983 } 1984 if (fhsize < FHANDLE_SIZE_MIN) { 1985 return EINVAL; 1986 } 1987 again: 1988 fhp = kmem_alloc(fhsize, KM_SLEEP); 1989 error = copyin(ufhp, fhp, fhsize); 1990 if (error == 0) { 1991 /* XXX this check shouldn't be here */ 1992 if (FHANDLE_SIZE(fhp) == fhsize) { 1993 *fhpp = fhp; 1994 return 0; 1995 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1996 /* 1997 * a kludge for nfsv2 padded handles. 1998 */ 1999 size_t sz; 2000 2001 sz = FHANDLE_SIZE(fhp); 2002 kmem_free(fhp, fhsize); 2003 fhsize = sz; 2004 goto again; 2005 } else { 2006 /* 2007 * userland told us wrong size. 2008 */ 2009 error = EINVAL; 2010 } 2011 } 2012 kmem_free(fhp, fhsize); 2013 return error; 2014 } 2015 2016 void 2017 vfs_copyinfh_free(fhandle_t *fhp) 2018 { 2019 2020 vfs__fhfree(fhp); 2021 } 2022 2023 /* 2024 * Get file handle system call 2025 */ 2026 int 2027 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 2028 { 2029 /* { 2030 syscallarg(char *) fname; 2031 syscallarg(fhandle_t *) fhp; 2032 syscallarg(size_t *) fh_size; 2033 } */ 2034 struct vnode *vp; 2035 fhandle_t *fh; 2036 int error; 2037 struct pathbuf *pb; 2038 struct nameidata nd; 2039 size_t sz; 2040 size_t usz; 2041 2042 /* 2043 * Must be super user 2044 */ 2045 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2046 0, NULL, NULL, NULL); 2047 if (error) 2048 return (error); 2049 2050 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2051 if (error) { 2052 return error; 2053 } 2054 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2055 error = namei(&nd); 2056 if (error) { 2057 pathbuf_destroy(pb); 2058 return error; 2059 } 2060 vp = nd.ni_vp; 2061 pathbuf_destroy(pb); 2062 2063 error = vfs_composefh_alloc(vp, &fh); 2064 vput(vp); 2065 if (error != 0) { 2066 return error; 2067 } 2068 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2069 if (error != 0) { 2070 goto out; 2071 } 2072 sz = FHANDLE_SIZE(fh); 2073 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2074 if (error != 0) { 2075 goto out; 2076 } 2077 if (usz >= sz) { 2078 error = copyout(fh, SCARG(uap, fhp), sz); 2079 } else { 2080 error = E2BIG; 2081 } 2082 out: 2083 vfs_composefh_free(fh); 2084 return (error); 2085 } 2086 2087 /* 2088 * Open a file given a file handle. 2089 * 2090 * Check permissions, allocate an open file structure, 2091 * and call the device open routine if any. 2092 */ 2093 2094 int 2095 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2096 register_t *retval) 2097 { 2098 file_t *fp; 2099 struct vnode *vp = NULL; 2100 kauth_cred_t cred = l->l_cred; 2101 file_t *nfp; 2102 int indx, error; 2103 struct vattr va; 2104 fhandle_t *fh; 2105 int flags; 2106 proc_t *p; 2107 2108 p = curproc; 2109 2110 /* 2111 * Must be super user 2112 */ 2113 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2114 0, NULL, NULL, NULL))) 2115 return (error); 2116 2117 if (oflags & O_SEARCH) { 2118 oflags &= ~(int)O_SEARCH; 2119 } 2120 2121 flags = FFLAGS(oflags); 2122 if ((flags & (FREAD | FWRITE)) == 0) 2123 return (EINVAL); 2124 if ((flags & O_CREAT)) 2125 return (EINVAL); 2126 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2127 return (error); 2128 fp = nfp; 2129 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2130 if (error != 0) { 2131 goto bad; 2132 } 2133 error = vfs_fhtovp(fh, &vp); 2134 vfs_copyinfh_free(fh); 2135 if (error != 0) { 2136 goto bad; 2137 } 2138 2139 /* Now do an effective vn_open */ 2140 2141 if (vp->v_type == VSOCK) { 2142 error = EOPNOTSUPP; 2143 goto bad; 2144 } 2145 error = vn_openchk(vp, cred, flags); 2146 if (error != 0) 2147 goto bad; 2148 if (flags & O_TRUNC) { 2149 VOP_UNLOCK(vp); /* XXX */ 2150 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2151 vattr_null(&va); 2152 va.va_size = 0; 2153 error = VOP_SETATTR(vp, &va, cred); 2154 if (error) 2155 goto bad; 2156 } 2157 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2158 goto bad; 2159 if (flags & FWRITE) { 2160 mutex_enter(vp->v_interlock); 2161 vp->v_writecount++; 2162 mutex_exit(vp->v_interlock); 2163 } 2164 2165 /* done with modified vn_open, now finish what sys_open does. */ 2166 if ((error = open_setfp(l, fp, vp, indx, flags))) 2167 return error; 2168 2169 VOP_UNLOCK(vp); 2170 *retval = indx; 2171 fd_affix(p, fp, indx); 2172 return (0); 2173 2174 bad: 2175 fd_abort(p, fp, indx); 2176 if (vp != NULL) 2177 vput(vp); 2178 if (error == EDUPFD || error == EMOVEFD) { 2179 /* XXX should probably close curlwp->l_dupfd */ 2180 error = EOPNOTSUPP; 2181 } 2182 return (error); 2183 } 2184 2185 int 2186 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2187 { 2188 /* { 2189 syscallarg(const void *) fhp; 2190 syscallarg(size_t) fh_size; 2191 syscallarg(int) flags; 2192 } */ 2193 2194 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2195 SCARG(uap, flags), retval); 2196 } 2197 2198 int 2199 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2200 { 2201 int error; 2202 fhandle_t *fh; 2203 struct vnode *vp; 2204 2205 /* 2206 * Must be super user 2207 */ 2208 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2209 0, NULL, NULL, NULL))) 2210 return (error); 2211 2212 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2213 if (error != 0) 2214 return error; 2215 2216 error = vfs_fhtovp(fh, &vp); 2217 vfs_copyinfh_free(fh); 2218 if (error != 0) 2219 return error; 2220 2221 error = vn_stat(vp, sb); 2222 vput(vp); 2223 return error; 2224 } 2225 2226 2227 /* ARGSUSED */ 2228 int 2229 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2230 { 2231 /* { 2232 syscallarg(const void *) fhp; 2233 syscallarg(size_t) fh_size; 2234 syscallarg(struct stat *) sb; 2235 } */ 2236 struct stat sb; 2237 int error; 2238 2239 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2240 if (error) 2241 return error; 2242 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2243 } 2244 2245 int 2246 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2247 int flags) 2248 { 2249 fhandle_t *fh; 2250 struct mount *mp; 2251 struct vnode *vp; 2252 int error; 2253 2254 /* 2255 * Must be super user 2256 */ 2257 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2258 0, NULL, NULL, NULL))) 2259 return error; 2260 2261 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2262 if (error != 0) 2263 return error; 2264 2265 error = vfs_fhtovp(fh, &vp); 2266 vfs_copyinfh_free(fh); 2267 if (error != 0) 2268 return error; 2269 2270 mp = vp->v_mount; 2271 error = dostatvfs(mp, sb, l, flags, 1); 2272 vput(vp); 2273 return error; 2274 } 2275 2276 /* ARGSUSED */ 2277 int 2278 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2279 { 2280 /* { 2281 syscallarg(const void *) fhp; 2282 syscallarg(size_t) fh_size; 2283 syscallarg(struct statvfs *) buf; 2284 syscallarg(int) flags; 2285 } */ 2286 struct statvfs *sb = STATVFSBUF_GET(); 2287 int error; 2288 2289 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2290 SCARG(uap, flags)); 2291 if (error == 0) 2292 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2293 STATVFSBUF_PUT(sb); 2294 return error; 2295 } 2296 2297 int 2298 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2299 dev_t dev) 2300 { 2301 2302 /* 2303 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2304 * in mode and dev=0. 2305 * 2306 * In all the other cases it's implementation defined behavior. 2307 */ 2308 2309 if ((mode & S_IFIFO) && dev == 0) 2310 return do_sys_mkfifoat(l, fdat, pathname, mode); 2311 else 2312 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2313 UIO_USERSPACE); 2314 } 2315 2316 /* 2317 * Create a special file. 2318 */ 2319 /* ARGSUSED */ 2320 int 2321 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2322 register_t *retval) 2323 { 2324 /* { 2325 syscallarg(const char *) path; 2326 syscallarg(mode_t) mode; 2327 syscallarg(dev_t) dev; 2328 } */ 2329 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2330 SCARG(uap, mode), SCARG(uap, dev)); 2331 } 2332 2333 int 2334 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2335 register_t *retval) 2336 { 2337 /* { 2338 syscallarg(int) fd; 2339 syscallarg(const char *) path; 2340 syscallarg(mode_t) mode; 2341 syscallarg(int) pad; 2342 syscallarg(dev_t) dev; 2343 } */ 2344 2345 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2346 SCARG(uap, mode), SCARG(uap, dev)); 2347 } 2348 2349 int 2350 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2351 enum uio_seg seg) 2352 { 2353 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2354 } 2355 2356 int 2357 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2358 dev_t dev, enum uio_seg seg) 2359 { 2360 struct proc *p = l->l_proc; 2361 struct vnode *vp; 2362 struct vattr vattr; 2363 int error, optype; 2364 struct pathbuf *pb; 2365 struct nameidata nd; 2366 const char *pathstring; 2367 2368 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2369 0, NULL, NULL, NULL)) != 0) 2370 return (error); 2371 2372 optype = VOP_MKNOD_DESCOFFSET; 2373 2374 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2375 if (error) { 2376 return error; 2377 } 2378 pathstring = pathbuf_stringcopy_get(pb); 2379 if (pathstring == NULL) { 2380 pathbuf_destroy(pb); 2381 return ENOMEM; 2382 } 2383 2384 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2385 2386 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2387 goto out; 2388 vp = nd.ni_vp; 2389 2390 if (vp != NULL) 2391 error = EEXIST; 2392 else { 2393 vattr_null(&vattr); 2394 /* We will read cwdi->cwdi_cmask unlocked. */ 2395 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2396 vattr.va_rdev = dev; 2397 2398 switch (mode & S_IFMT) { 2399 case S_IFMT: /* used by badsect to flag bad sectors */ 2400 vattr.va_type = VBAD; 2401 break; 2402 case S_IFCHR: 2403 vattr.va_type = VCHR; 2404 break; 2405 case S_IFBLK: 2406 vattr.va_type = VBLK; 2407 break; 2408 case S_IFWHT: 2409 optype = VOP_WHITEOUT_DESCOFFSET; 2410 break; 2411 case S_IFREG: 2412 #if NVERIEXEC > 0 2413 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2414 O_CREAT); 2415 #endif /* NVERIEXEC > 0 */ 2416 vattr.va_type = VREG; 2417 vattr.va_rdev = VNOVAL; 2418 optype = VOP_CREATE_DESCOFFSET; 2419 break; 2420 default: 2421 error = EINVAL; 2422 break; 2423 } 2424 2425 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2426 vattr.va_rdev == VNOVAL) 2427 error = EINVAL; 2428 } 2429 2430 if (!error) { 2431 switch (optype) { 2432 case VOP_WHITEOUT_DESCOFFSET: 2433 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2434 if (error) 2435 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2436 vput(nd.ni_dvp); 2437 break; 2438 2439 case VOP_MKNOD_DESCOFFSET: 2440 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2441 &nd.ni_cnd, &vattr); 2442 if (error == 0) 2443 vrele(nd.ni_vp); 2444 vput(nd.ni_dvp); 2445 break; 2446 2447 case VOP_CREATE_DESCOFFSET: 2448 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2449 &nd.ni_cnd, &vattr); 2450 if (error == 0) 2451 vrele(nd.ni_vp); 2452 vput(nd.ni_dvp); 2453 break; 2454 } 2455 } else { 2456 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2457 if (nd.ni_dvp == vp) 2458 vrele(nd.ni_dvp); 2459 else 2460 vput(nd.ni_dvp); 2461 if (vp) 2462 vrele(vp); 2463 } 2464 out: 2465 pathbuf_stringcopy_put(pb, pathstring); 2466 pathbuf_destroy(pb); 2467 return (error); 2468 } 2469 2470 /* 2471 * Create a named pipe. 2472 */ 2473 /* ARGSUSED */ 2474 int 2475 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2476 { 2477 /* { 2478 syscallarg(const char *) path; 2479 syscallarg(int) mode; 2480 } */ 2481 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2482 } 2483 2484 int 2485 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2486 register_t *retval) 2487 { 2488 /* { 2489 syscallarg(int) fd; 2490 syscallarg(const char *) path; 2491 syscallarg(int) mode; 2492 } */ 2493 2494 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2495 SCARG(uap, mode)); 2496 } 2497 2498 static int 2499 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2500 { 2501 struct proc *p = l->l_proc; 2502 struct vattr vattr; 2503 int error; 2504 struct pathbuf *pb; 2505 struct nameidata nd; 2506 2507 error = pathbuf_copyin(path, &pb); 2508 if (error) { 2509 return error; 2510 } 2511 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2512 2513 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2514 pathbuf_destroy(pb); 2515 return error; 2516 } 2517 if (nd.ni_vp != NULL) { 2518 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2519 if (nd.ni_dvp == nd.ni_vp) 2520 vrele(nd.ni_dvp); 2521 else 2522 vput(nd.ni_dvp); 2523 vrele(nd.ni_vp); 2524 pathbuf_destroy(pb); 2525 return (EEXIST); 2526 } 2527 vattr_null(&vattr); 2528 vattr.va_type = VFIFO; 2529 /* We will read cwdi->cwdi_cmask unlocked. */ 2530 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2531 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2532 if (error == 0) 2533 vrele(nd.ni_vp); 2534 vput(nd.ni_dvp); 2535 pathbuf_destroy(pb); 2536 return (error); 2537 } 2538 2539 /* 2540 * Make a hard file link. 2541 */ 2542 /* ARGSUSED */ 2543 int 2544 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2545 const char *link, int follow, register_t *retval) 2546 { 2547 struct vnode *vp; 2548 struct pathbuf *linkpb; 2549 struct nameidata nd; 2550 namei_simple_flags_t ns_flags; 2551 int error; 2552 2553 if (follow & AT_SYMLINK_FOLLOW) 2554 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2555 else 2556 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2557 2558 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2559 if (error != 0) 2560 return (error); 2561 error = pathbuf_copyin(link, &linkpb); 2562 if (error) { 2563 goto out1; 2564 } 2565 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2566 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2567 goto out2; 2568 if (nd.ni_vp) { 2569 error = EEXIST; 2570 goto abortop; 2571 } 2572 /* Prevent hard links on directories. */ 2573 if (vp->v_type == VDIR) { 2574 error = EPERM; 2575 goto abortop; 2576 } 2577 /* Prevent cross-mount operation. */ 2578 if (nd.ni_dvp->v_mount != vp->v_mount) { 2579 error = EXDEV; 2580 goto abortop; 2581 } 2582 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2583 VOP_UNLOCK(nd.ni_dvp); 2584 vrele(nd.ni_dvp); 2585 out2: 2586 pathbuf_destroy(linkpb); 2587 out1: 2588 vrele(vp); 2589 return (error); 2590 abortop: 2591 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2592 if (nd.ni_dvp == nd.ni_vp) 2593 vrele(nd.ni_dvp); 2594 else 2595 vput(nd.ni_dvp); 2596 if (nd.ni_vp != NULL) 2597 vrele(nd.ni_vp); 2598 goto out2; 2599 } 2600 2601 int 2602 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2603 { 2604 /* { 2605 syscallarg(const char *) path; 2606 syscallarg(const char *) link; 2607 } */ 2608 const char *path = SCARG(uap, path); 2609 const char *link = SCARG(uap, link); 2610 2611 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2612 AT_SYMLINK_FOLLOW, retval); 2613 } 2614 2615 int 2616 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2617 register_t *retval) 2618 { 2619 /* { 2620 syscallarg(int) fd1; 2621 syscallarg(const char *) name1; 2622 syscallarg(int) fd2; 2623 syscallarg(const char *) name2; 2624 syscallarg(int) flags; 2625 } */ 2626 int fd1 = SCARG(uap, fd1); 2627 const char *name1 = SCARG(uap, name1); 2628 int fd2 = SCARG(uap, fd2); 2629 const char *name2 = SCARG(uap, name2); 2630 int follow; 2631 2632 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2633 2634 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2635 } 2636 2637 2638 int 2639 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2640 { 2641 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2642 } 2643 2644 static int 2645 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2646 const char *link, enum uio_seg seg) 2647 { 2648 struct proc *p = curproc; 2649 struct vattr vattr; 2650 char *path; 2651 int error; 2652 size_t len; 2653 struct pathbuf *linkpb; 2654 struct nameidata nd; 2655 2656 KASSERT(l != NULL || fdat == AT_FDCWD); 2657 2658 path = PNBUF_GET(); 2659 if (seg == UIO_USERSPACE) { 2660 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2661 goto out1; 2662 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2663 goto out1; 2664 } else { 2665 len = strlen(patharg) + 1; 2666 KASSERT(len <= MAXPATHLEN); 2667 memcpy(path, patharg, len); 2668 linkpb = pathbuf_create(link); 2669 if (linkpb == NULL) { 2670 error = ENOMEM; 2671 goto out1; 2672 } 2673 } 2674 ktrkuser("symlink-target", path, len - 1); 2675 2676 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2677 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2678 goto out2; 2679 if (nd.ni_vp) { 2680 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2681 if (nd.ni_dvp == nd.ni_vp) 2682 vrele(nd.ni_dvp); 2683 else 2684 vput(nd.ni_dvp); 2685 vrele(nd.ni_vp); 2686 error = EEXIST; 2687 goto out2; 2688 } 2689 vattr_null(&vattr); 2690 vattr.va_type = VLNK; 2691 /* We will read cwdi->cwdi_cmask unlocked. */ 2692 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2693 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2694 if (error == 0) 2695 vrele(nd.ni_vp); 2696 vput(nd.ni_dvp); 2697 out2: 2698 pathbuf_destroy(linkpb); 2699 out1: 2700 PNBUF_PUT(path); 2701 return (error); 2702 } 2703 2704 /* 2705 * Make a symbolic link. 2706 */ 2707 /* ARGSUSED */ 2708 int 2709 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2710 { 2711 /* { 2712 syscallarg(const char *) path; 2713 syscallarg(const char *) link; 2714 } */ 2715 2716 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2717 UIO_USERSPACE); 2718 } 2719 2720 int 2721 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2722 register_t *retval) 2723 { 2724 /* { 2725 syscallarg(const char *) path1; 2726 syscallarg(int) fd; 2727 syscallarg(const char *) path2; 2728 } */ 2729 2730 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2731 SCARG(uap, path2), UIO_USERSPACE); 2732 } 2733 2734 /* 2735 * Delete a whiteout from the filesystem. 2736 */ 2737 /* ARGSUSED */ 2738 int 2739 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2740 { 2741 /* { 2742 syscallarg(const char *) path; 2743 } */ 2744 int error; 2745 struct pathbuf *pb; 2746 struct nameidata nd; 2747 2748 error = pathbuf_copyin(SCARG(uap, path), &pb); 2749 if (error) { 2750 return error; 2751 } 2752 2753 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2754 error = namei(&nd); 2755 if (error) { 2756 pathbuf_destroy(pb); 2757 return (error); 2758 } 2759 2760 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2761 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2762 if (nd.ni_dvp == nd.ni_vp) 2763 vrele(nd.ni_dvp); 2764 else 2765 vput(nd.ni_dvp); 2766 if (nd.ni_vp) 2767 vrele(nd.ni_vp); 2768 pathbuf_destroy(pb); 2769 return (EEXIST); 2770 } 2771 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2772 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2773 vput(nd.ni_dvp); 2774 pathbuf_destroy(pb); 2775 return (error); 2776 } 2777 2778 /* 2779 * Delete a name from the filesystem. 2780 */ 2781 /* ARGSUSED */ 2782 int 2783 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2784 { 2785 /* { 2786 syscallarg(const char *) path; 2787 } */ 2788 2789 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2790 } 2791 2792 int 2793 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2794 register_t *retval) 2795 { 2796 /* { 2797 syscallarg(int) fd; 2798 syscallarg(const char *) path; 2799 syscallarg(int) flag; 2800 } */ 2801 2802 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2803 SCARG(uap, flag), UIO_USERSPACE); 2804 } 2805 2806 int 2807 do_sys_unlink(const char *arg, enum uio_seg seg) 2808 { 2809 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2810 } 2811 2812 static int 2813 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2814 enum uio_seg seg) 2815 { 2816 struct vnode *vp; 2817 int error; 2818 struct pathbuf *pb; 2819 struct nameidata nd; 2820 const char *pathstring; 2821 2822 KASSERT(l != NULL || fdat == AT_FDCWD); 2823 2824 error = pathbuf_maybe_copyin(arg, seg, &pb); 2825 if (error) { 2826 return error; 2827 } 2828 pathstring = pathbuf_stringcopy_get(pb); 2829 if (pathstring == NULL) { 2830 pathbuf_destroy(pb); 2831 return ENOMEM; 2832 } 2833 2834 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2835 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2836 goto out; 2837 vp = nd.ni_vp; 2838 2839 /* 2840 * The root of a mounted filesystem cannot be deleted. 2841 */ 2842 if ((vp->v_vflag & VV_ROOT) != 0) { 2843 error = EBUSY; 2844 goto abort; 2845 } 2846 2847 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2848 error = EBUSY; 2849 goto abort; 2850 } 2851 2852 /* 2853 * No rmdir "." please. 2854 */ 2855 if (nd.ni_dvp == vp) { 2856 error = EINVAL; 2857 goto abort; 2858 } 2859 2860 /* 2861 * AT_REMOVEDIR is required to remove a directory 2862 */ 2863 if (vp->v_type == VDIR) { 2864 if (!(flags & AT_REMOVEDIR)) { 2865 error = EPERM; 2866 goto abort; 2867 } else { 2868 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2869 vput(nd.ni_dvp); 2870 goto out; 2871 } 2872 } 2873 2874 /* 2875 * Starting here we only deal with non directories. 2876 */ 2877 if (flags & AT_REMOVEDIR) { 2878 error = ENOTDIR; 2879 goto abort; 2880 } 2881 2882 #if NVERIEXEC > 0 2883 /* Handle remove requests for veriexec entries. */ 2884 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2885 goto abort; 2886 } 2887 #endif /* NVERIEXEC > 0 */ 2888 2889 #ifdef FILEASSOC 2890 (void)fileassoc_file_delete(vp); 2891 #endif /* FILEASSOC */ 2892 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2893 vput(nd.ni_dvp); 2894 goto out; 2895 2896 abort: 2897 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2898 if (nd.ni_dvp == vp) 2899 vrele(nd.ni_dvp); 2900 else 2901 vput(nd.ni_dvp); 2902 vput(vp); 2903 2904 out: 2905 pathbuf_stringcopy_put(pb, pathstring); 2906 pathbuf_destroy(pb); 2907 return (error); 2908 } 2909 2910 /* 2911 * Reposition read/write file offset. 2912 */ 2913 int 2914 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2915 { 2916 /* { 2917 syscallarg(int) fd; 2918 syscallarg(int) pad; 2919 syscallarg(off_t) offset; 2920 syscallarg(int) whence; 2921 } */ 2922 file_t *fp; 2923 int error, fd; 2924 2925 switch (SCARG(uap, whence)) { 2926 case SEEK_CUR: 2927 case SEEK_END: 2928 case SEEK_SET: 2929 break; 2930 default: 2931 return EINVAL; 2932 } 2933 2934 fd = SCARG(uap, fd); 2935 2936 if ((fp = fd_getfile(fd)) == NULL) 2937 return (EBADF); 2938 2939 if (fp->f_ops->fo_seek == NULL) { 2940 error = ESPIPE; 2941 goto out; 2942 } 2943 2944 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 2945 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 2946 out: 2947 fd_putfile(fd); 2948 return (error); 2949 } 2950 2951 /* 2952 * Positional read system call. 2953 */ 2954 int 2955 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2956 { 2957 /* { 2958 syscallarg(int) fd; 2959 syscallarg(void *) buf; 2960 syscallarg(size_t) nbyte; 2961 syscallarg(off_t) offset; 2962 } */ 2963 file_t *fp; 2964 off_t offset; 2965 int error, fd = SCARG(uap, fd); 2966 2967 if ((fp = fd_getfile(fd)) == NULL) 2968 return (EBADF); 2969 2970 if ((fp->f_flag & FREAD) == 0) { 2971 fd_putfile(fd); 2972 return (EBADF); 2973 } 2974 2975 if (fp->f_ops->fo_seek == NULL) { 2976 error = ESPIPE; 2977 goto out; 2978 } 2979 2980 offset = SCARG(uap, offset); 2981 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 2982 if (error) 2983 goto out; 2984 2985 /* dofileread() will unuse the descriptor for us */ 2986 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2987 &offset, 0, retval)); 2988 2989 out: 2990 fd_putfile(fd); 2991 return (error); 2992 } 2993 2994 /* 2995 * Positional scatter read system call. 2996 */ 2997 int 2998 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2999 { 3000 /* { 3001 syscallarg(int) fd; 3002 syscallarg(const struct iovec *) iovp; 3003 syscallarg(int) iovcnt; 3004 syscallarg(off_t) offset; 3005 } */ 3006 off_t offset = SCARG(uap, offset); 3007 3008 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3009 SCARG(uap, iovcnt), &offset, 0, retval); 3010 } 3011 3012 /* 3013 * Positional write system call. 3014 */ 3015 int 3016 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 3017 { 3018 /* { 3019 syscallarg(int) fd; 3020 syscallarg(const void *) buf; 3021 syscallarg(size_t) nbyte; 3022 syscallarg(off_t) offset; 3023 } */ 3024 file_t *fp; 3025 off_t offset; 3026 int error, fd = SCARG(uap, fd); 3027 3028 if ((fp = fd_getfile(fd)) == NULL) 3029 return (EBADF); 3030 3031 if ((fp->f_flag & FWRITE) == 0) { 3032 fd_putfile(fd); 3033 return (EBADF); 3034 } 3035 3036 if (fp->f_ops->fo_seek == NULL) { 3037 error = ESPIPE; 3038 goto out; 3039 } 3040 3041 offset = SCARG(uap, offset); 3042 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3043 if (error) 3044 goto out; 3045 3046 /* dofilewrite() will unuse the descriptor for us */ 3047 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3048 &offset, 0, retval)); 3049 3050 out: 3051 fd_putfile(fd); 3052 return (error); 3053 } 3054 3055 /* 3056 * Positional gather write system call. 3057 */ 3058 int 3059 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3060 { 3061 /* { 3062 syscallarg(int) fd; 3063 syscallarg(const struct iovec *) iovp; 3064 syscallarg(int) iovcnt; 3065 syscallarg(off_t) offset; 3066 } */ 3067 off_t offset = SCARG(uap, offset); 3068 3069 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3070 SCARG(uap, iovcnt), &offset, 0, retval); 3071 } 3072 3073 /* 3074 * Check access permissions. 3075 */ 3076 int 3077 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3078 { 3079 /* { 3080 syscallarg(const char *) path; 3081 syscallarg(int) flags; 3082 } */ 3083 3084 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3085 SCARG(uap, flags), 0); 3086 } 3087 3088 int 3089 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3090 int mode, int flags) 3091 { 3092 kauth_cred_t cred; 3093 struct vnode *vp; 3094 int error, nd_flag, vmode; 3095 struct pathbuf *pb; 3096 struct nameidata nd; 3097 3098 CTASSERT(F_OK == 0); 3099 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3100 /* nonsense mode */ 3101 return EINVAL; 3102 } 3103 3104 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3105 if (flags & AT_SYMLINK_NOFOLLOW) 3106 nd_flag &= ~FOLLOW; 3107 3108 error = pathbuf_copyin(path, &pb); 3109 if (error) 3110 return error; 3111 3112 NDINIT(&nd, LOOKUP, nd_flag, pb); 3113 3114 /* Override default credentials */ 3115 cred = kauth_cred_dup(l->l_cred); 3116 if (!(flags & AT_EACCESS)) { 3117 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3118 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3119 } 3120 nd.ni_cnd.cn_cred = cred; 3121 3122 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3123 pathbuf_destroy(pb); 3124 goto out; 3125 } 3126 vp = nd.ni_vp; 3127 pathbuf_destroy(pb); 3128 3129 /* Flags == 0 means only check for existence. */ 3130 if (mode) { 3131 vmode = 0; 3132 if (mode & R_OK) 3133 vmode |= VREAD; 3134 if (mode & W_OK) 3135 vmode |= VWRITE; 3136 if (mode & X_OK) 3137 vmode |= VEXEC; 3138 3139 error = VOP_ACCESS(vp, vmode, cred); 3140 if (!error && (vmode & VWRITE)) 3141 error = vn_writechk(vp); 3142 } 3143 vput(vp); 3144 out: 3145 kauth_cred_free(cred); 3146 return (error); 3147 } 3148 3149 int 3150 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3151 register_t *retval) 3152 { 3153 /* { 3154 syscallarg(int) fd; 3155 syscallarg(const char *) path; 3156 syscallarg(int) amode; 3157 syscallarg(int) flag; 3158 } */ 3159 3160 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3161 SCARG(uap, amode), SCARG(uap, flag)); 3162 } 3163 3164 /* 3165 * Common code for all sys_stat functions, including compat versions. 3166 */ 3167 int 3168 do_sys_stat(const char *userpath, unsigned int nd_flag, 3169 struct stat *sb) 3170 { 3171 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3172 } 3173 3174 int 3175 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3176 unsigned int nd_flag, struct stat *sb) 3177 { 3178 int error; 3179 struct pathbuf *pb; 3180 struct nameidata nd; 3181 3182 KASSERT(l != NULL || fdat == AT_FDCWD); 3183 3184 error = pathbuf_copyin(userpath, &pb); 3185 if (error) { 3186 return error; 3187 } 3188 3189 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3190 3191 error = fd_nameiat(l, fdat, &nd); 3192 if (error != 0) { 3193 pathbuf_destroy(pb); 3194 return error; 3195 } 3196 error = vn_stat(nd.ni_vp, sb); 3197 vput(nd.ni_vp); 3198 pathbuf_destroy(pb); 3199 return error; 3200 } 3201 3202 /* 3203 * Get file status; this version follows links. 3204 */ 3205 /* ARGSUSED */ 3206 int 3207 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3208 { 3209 /* { 3210 syscallarg(const char *) path; 3211 syscallarg(struct stat *) ub; 3212 } */ 3213 struct stat sb; 3214 int error; 3215 3216 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3217 if (error) 3218 return error; 3219 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3220 } 3221 3222 /* 3223 * Get file status; this version does not follow links. 3224 */ 3225 /* ARGSUSED */ 3226 int 3227 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3228 { 3229 /* { 3230 syscallarg(const char *) path; 3231 syscallarg(struct stat *) ub; 3232 } */ 3233 struct stat sb; 3234 int error; 3235 3236 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3237 if (error) 3238 return error; 3239 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3240 } 3241 3242 int 3243 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3244 register_t *retval) 3245 { 3246 /* { 3247 syscallarg(int) fd; 3248 syscallarg(const char *) path; 3249 syscallarg(struct stat *) buf; 3250 syscallarg(int) flag; 3251 } */ 3252 unsigned int nd_flag; 3253 struct stat sb; 3254 int error; 3255 3256 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3257 nd_flag = NOFOLLOW; 3258 else 3259 nd_flag = FOLLOW; 3260 3261 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3262 &sb); 3263 if (error) 3264 return error; 3265 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3266 } 3267 3268 static int 3269 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3270 { 3271 int error; 3272 struct pathbuf *pb; 3273 struct nameidata nd; 3274 3275 error = pathbuf_copyin(path, &pb); 3276 if (error) { 3277 return error; 3278 } 3279 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3280 if ((error = namei(&nd)) != 0) { 3281 pathbuf_destroy(pb); 3282 return error; 3283 } 3284 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3285 vput(nd.ni_vp); 3286 pathbuf_destroy(pb); 3287 return error; 3288 } 3289 3290 /* 3291 * Get configurable pathname variables. 3292 */ 3293 /* ARGSUSED */ 3294 int 3295 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3296 register_t *retval) 3297 { 3298 /* { 3299 syscallarg(const char *) path; 3300 syscallarg(int) name; 3301 } */ 3302 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3303 FOLLOW); 3304 } 3305 3306 /* ARGSUSED */ 3307 int 3308 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3309 register_t *retval) 3310 { 3311 /* { 3312 syscallarg(const char *) path; 3313 syscallarg(int) name; 3314 } */ 3315 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3316 NOFOLLOW); 3317 } 3318 3319 /* 3320 * Return target name of a symbolic link. 3321 */ 3322 /* ARGSUSED */ 3323 int 3324 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3325 register_t *retval) 3326 { 3327 /* { 3328 syscallarg(const char *) path; 3329 syscallarg(char *) buf; 3330 syscallarg(size_t) count; 3331 } */ 3332 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3333 SCARG(uap, buf), SCARG(uap, count), retval); 3334 } 3335 3336 static int 3337 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3338 size_t count, register_t *retval) 3339 { 3340 struct vnode *vp; 3341 struct iovec aiov; 3342 struct uio auio; 3343 int error; 3344 struct pathbuf *pb; 3345 struct nameidata nd; 3346 3347 error = pathbuf_copyin(path, &pb); 3348 if (error) { 3349 return error; 3350 } 3351 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3352 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3353 pathbuf_destroy(pb); 3354 return error; 3355 } 3356 vp = nd.ni_vp; 3357 pathbuf_destroy(pb); 3358 if (vp->v_type != VLNK) 3359 error = EINVAL; 3360 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3361 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3362 aiov.iov_base = buf; 3363 aiov.iov_len = count; 3364 auio.uio_iov = &aiov; 3365 auio.uio_iovcnt = 1; 3366 auio.uio_offset = 0; 3367 auio.uio_rw = UIO_READ; 3368 KASSERT(l == curlwp); 3369 auio.uio_vmspace = l->l_proc->p_vmspace; 3370 auio.uio_resid = count; 3371 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3372 *retval = count - auio.uio_resid; 3373 } 3374 vput(vp); 3375 return (error); 3376 } 3377 3378 int 3379 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3380 register_t *retval) 3381 { 3382 /* { 3383 syscallarg(int) fd; 3384 syscallarg(const char *) path; 3385 syscallarg(char *) buf; 3386 syscallarg(size_t) bufsize; 3387 } */ 3388 3389 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3390 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3391 } 3392 3393 /* 3394 * Change flags of a file given a path name. 3395 */ 3396 /* ARGSUSED */ 3397 int 3398 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3399 { 3400 /* { 3401 syscallarg(const char *) path; 3402 syscallarg(u_long) flags; 3403 } */ 3404 struct vnode *vp; 3405 int error; 3406 3407 error = namei_simple_user(SCARG(uap, path), 3408 NSM_FOLLOW_TRYEMULROOT, &vp); 3409 if (error != 0) 3410 return (error); 3411 error = change_flags(vp, SCARG(uap, flags), l); 3412 vput(vp); 3413 return (error); 3414 } 3415 3416 /* 3417 * Change flags of a file given a file descriptor. 3418 */ 3419 /* ARGSUSED */ 3420 int 3421 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3422 { 3423 /* { 3424 syscallarg(int) fd; 3425 syscallarg(u_long) flags; 3426 } */ 3427 struct vnode *vp; 3428 file_t *fp; 3429 int error; 3430 3431 /* fd_getvnode() will use the descriptor for us */ 3432 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3433 return (error); 3434 vp = fp->f_vnode; 3435 error = change_flags(vp, SCARG(uap, flags), l); 3436 VOP_UNLOCK(vp); 3437 fd_putfile(SCARG(uap, fd)); 3438 return (error); 3439 } 3440 3441 /* 3442 * Change flags of a file given a path name; this version does 3443 * not follow links. 3444 */ 3445 int 3446 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3447 { 3448 /* { 3449 syscallarg(const char *) path; 3450 syscallarg(u_long) flags; 3451 } */ 3452 struct vnode *vp; 3453 int error; 3454 3455 error = namei_simple_user(SCARG(uap, path), 3456 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3457 if (error != 0) 3458 return (error); 3459 error = change_flags(vp, SCARG(uap, flags), l); 3460 vput(vp); 3461 return (error); 3462 } 3463 3464 /* 3465 * Common routine to change flags of a file. 3466 */ 3467 int 3468 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3469 { 3470 struct vattr vattr; 3471 int error; 3472 3473 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3474 3475 vattr_null(&vattr); 3476 vattr.va_flags = flags; 3477 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3478 3479 return (error); 3480 } 3481 3482 /* 3483 * Change mode of a file given path name; this version follows links. 3484 */ 3485 /* ARGSUSED */ 3486 int 3487 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3488 { 3489 /* { 3490 syscallarg(const char *) path; 3491 syscallarg(int) mode; 3492 } */ 3493 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3494 SCARG(uap, mode), 0); 3495 } 3496 3497 int 3498 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3499 { 3500 int error; 3501 struct vnode *vp; 3502 namei_simple_flags_t ns_flag; 3503 3504 if (flags & AT_SYMLINK_NOFOLLOW) 3505 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3506 else 3507 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3508 3509 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3510 if (error != 0) 3511 return error; 3512 3513 error = change_mode(vp, mode, l); 3514 3515 vrele(vp); 3516 3517 return (error); 3518 } 3519 3520 /* 3521 * Change mode of a file given a file descriptor. 3522 */ 3523 /* ARGSUSED */ 3524 int 3525 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3526 { 3527 /* { 3528 syscallarg(int) fd; 3529 syscallarg(int) mode; 3530 } */ 3531 file_t *fp; 3532 int error; 3533 3534 /* fd_getvnode() will use the descriptor for us */ 3535 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3536 return (error); 3537 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3538 fd_putfile(SCARG(uap, fd)); 3539 return (error); 3540 } 3541 3542 int 3543 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3544 register_t *retval) 3545 { 3546 /* { 3547 syscallarg(int) fd; 3548 syscallarg(const char *) path; 3549 syscallarg(int) mode; 3550 syscallarg(int) flag; 3551 } */ 3552 3553 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3554 SCARG(uap, mode), SCARG(uap, flag)); 3555 } 3556 3557 /* 3558 * Change mode of a file given path name; this version does not follow links. 3559 */ 3560 /* ARGSUSED */ 3561 int 3562 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3563 { 3564 /* { 3565 syscallarg(const char *) path; 3566 syscallarg(int) mode; 3567 } */ 3568 int error; 3569 struct vnode *vp; 3570 3571 error = namei_simple_user(SCARG(uap, path), 3572 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3573 if (error != 0) 3574 return (error); 3575 3576 error = change_mode(vp, SCARG(uap, mode), l); 3577 3578 vrele(vp); 3579 return (error); 3580 } 3581 3582 /* 3583 * Common routine to set mode given a vnode. 3584 */ 3585 static int 3586 change_mode(struct vnode *vp, int mode, struct lwp *l) 3587 { 3588 struct vattr vattr; 3589 int error; 3590 3591 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3592 vattr_null(&vattr); 3593 vattr.va_mode = mode & ALLPERMS; 3594 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3595 VOP_UNLOCK(vp); 3596 return (error); 3597 } 3598 3599 /* 3600 * Set ownership given a path name; this version follows links. 3601 */ 3602 /* ARGSUSED */ 3603 int 3604 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3605 { 3606 /* { 3607 syscallarg(const char *) path; 3608 syscallarg(uid_t) uid; 3609 syscallarg(gid_t) gid; 3610 } */ 3611 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3612 SCARG(uap, gid), 0); 3613 } 3614 3615 int 3616 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3617 gid_t gid, int flags) 3618 { 3619 int error; 3620 struct vnode *vp; 3621 namei_simple_flags_t ns_flag; 3622 3623 if (flags & AT_SYMLINK_NOFOLLOW) 3624 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3625 else 3626 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3627 3628 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3629 if (error != 0) 3630 return error; 3631 3632 error = change_owner(vp, uid, gid, l, 0); 3633 3634 vrele(vp); 3635 3636 return (error); 3637 } 3638 3639 /* 3640 * Set ownership given a path name; this version follows links. 3641 * Provides POSIX semantics. 3642 */ 3643 /* ARGSUSED */ 3644 int 3645 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3646 { 3647 /* { 3648 syscallarg(const char *) path; 3649 syscallarg(uid_t) uid; 3650 syscallarg(gid_t) gid; 3651 } */ 3652 int error; 3653 struct vnode *vp; 3654 3655 error = namei_simple_user(SCARG(uap, path), 3656 NSM_FOLLOW_TRYEMULROOT, &vp); 3657 if (error != 0) 3658 return (error); 3659 3660 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3661 3662 vrele(vp); 3663 return (error); 3664 } 3665 3666 /* 3667 * Set ownership given a file descriptor. 3668 */ 3669 /* ARGSUSED */ 3670 int 3671 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3672 { 3673 /* { 3674 syscallarg(int) fd; 3675 syscallarg(uid_t) uid; 3676 syscallarg(gid_t) gid; 3677 } */ 3678 int error; 3679 file_t *fp; 3680 3681 /* fd_getvnode() will use the descriptor for us */ 3682 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3683 return (error); 3684 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3685 l, 0); 3686 fd_putfile(SCARG(uap, fd)); 3687 return (error); 3688 } 3689 3690 int 3691 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3692 register_t *retval) 3693 { 3694 /* { 3695 syscallarg(int) fd; 3696 syscallarg(const char *) path; 3697 syscallarg(uid_t) owner; 3698 syscallarg(gid_t) group; 3699 syscallarg(int) flag; 3700 } */ 3701 3702 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3703 SCARG(uap, owner), SCARG(uap, group), 3704 SCARG(uap, flag)); 3705 } 3706 3707 /* 3708 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3709 */ 3710 /* ARGSUSED */ 3711 int 3712 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3713 { 3714 /* { 3715 syscallarg(int) fd; 3716 syscallarg(uid_t) uid; 3717 syscallarg(gid_t) gid; 3718 } */ 3719 int error; 3720 file_t *fp; 3721 3722 /* fd_getvnode() will use the descriptor for us */ 3723 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3724 return (error); 3725 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3726 l, 1); 3727 fd_putfile(SCARG(uap, fd)); 3728 return (error); 3729 } 3730 3731 /* 3732 * Set ownership given a path name; this version does not follow links. 3733 */ 3734 /* ARGSUSED */ 3735 int 3736 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3737 { 3738 /* { 3739 syscallarg(const char *) path; 3740 syscallarg(uid_t) uid; 3741 syscallarg(gid_t) gid; 3742 } */ 3743 int error; 3744 struct vnode *vp; 3745 3746 error = namei_simple_user(SCARG(uap, path), 3747 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3748 if (error != 0) 3749 return (error); 3750 3751 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3752 3753 vrele(vp); 3754 return (error); 3755 } 3756 3757 /* 3758 * Set ownership given a path name; this version does not follow links. 3759 * Provides POSIX/XPG semantics. 3760 */ 3761 /* ARGSUSED */ 3762 int 3763 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3764 { 3765 /* { 3766 syscallarg(const char *) path; 3767 syscallarg(uid_t) uid; 3768 syscallarg(gid_t) gid; 3769 } */ 3770 int error; 3771 struct vnode *vp; 3772 3773 error = namei_simple_user(SCARG(uap, path), 3774 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3775 if (error != 0) 3776 return (error); 3777 3778 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3779 3780 vrele(vp); 3781 return (error); 3782 } 3783 3784 /* 3785 * Common routine to set ownership given a vnode. 3786 */ 3787 static int 3788 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3789 int posix_semantics) 3790 { 3791 struct vattr vattr; 3792 mode_t newmode; 3793 int error; 3794 3795 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3796 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3797 goto out; 3798 3799 #define CHANGED(x) ((int)(x) != -1) 3800 newmode = vattr.va_mode; 3801 if (posix_semantics) { 3802 /* 3803 * POSIX/XPG semantics: if the caller is not the super-user, 3804 * clear set-user-id and set-group-id bits. Both POSIX and 3805 * the XPG consider the behaviour for calls by the super-user 3806 * implementation-defined; we leave the set-user-id and set- 3807 * group-id settings intact in that case. 3808 */ 3809 if (vattr.va_mode & S_ISUID) { 3810 if (kauth_authorize_vnode(l->l_cred, 3811 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3812 newmode &= ~S_ISUID; 3813 } 3814 if (vattr.va_mode & S_ISGID) { 3815 if (kauth_authorize_vnode(l->l_cred, 3816 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3817 newmode &= ~S_ISGID; 3818 } 3819 } else { 3820 /* 3821 * NetBSD semantics: when changing owner and/or group, 3822 * clear the respective bit(s). 3823 */ 3824 if (CHANGED(uid)) 3825 newmode &= ~S_ISUID; 3826 if (CHANGED(gid)) 3827 newmode &= ~S_ISGID; 3828 } 3829 /* Update va_mode iff altered. */ 3830 if (vattr.va_mode == newmode) 3831 newmode = VNOVAL; 3832 3833 vattr_null(&vattr); 3834 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3835 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3836 vattr.va_mode = newmode; 3837 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3838 #undef CHANGED 3839 3840 out: 3841 VOP_UNLOCK(vp); 3842 return (error); 3843 } 3844 3845 /* 3846 * Set the access and modification times given a path name; this 3847 * version follows links. 3848 */ 3849 /* ARGSUSED */ 3850 int 3851 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3852 register_t *retval) 3853 { 3854 /* { 3855 syscallarg(const char *) path; 3856 syscallarg(const struct timeval *) tptr; 3857 } */ 3858 3859 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3860 SCARG(uap, tptr), UIO_USERSPACE); 3861 } 3862 3863 /* 3864 * Set the access and modification times given a file descriptor. 3865 */ 3866 /* ARGSUSED */ 3867 int 3868 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3869 register_t *retval) 3870 { 3871 /* { 3872 syscallarg(int) fd; 3873 syscallarg(const struct timeval *) tptr; 3874 } */ 3875 int error; 3876 file_t *fp; 3877 3878 /* fd_getvnode() will use the descriptor for us */ 3879 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3880 return (error); 3881 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3882 UIO_USERSPACE); 3883 fd_putfile(SCARG(uap, fd)); 3884 return (error); 3885 } 3886 3887 int 3888 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3889 register_t *retval) 3890 { 3891 /* { 3892 syscallarg(int) fd; 3893 syscallarg(const struct timespec *) tptr; 3894 } */ 3895 int error; 3896 file_t *fp; 3897 3898 /* fd_getvnode() will use the descriptor for us */ 3899 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3900 return (error); 3901 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3902 SCARG(uap, tptr), UIO_USERSPACE); 3903 fd_putfile(SCARG(uap, fd)); 3904 return (error); 3905 } 3906 3907 /* 3908 * Set the access and modification times given a path name; this 3909 * version does not follow links. 3910 */ 3911 int 3912 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3913 register_t *retval) 3914 { 3915 /* { 3916 syscallarg(const char *) path; 3917 syscallarg(const struct timeval *) tptr; 3918 } */ 3919 3920 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3921 SCARG(uap, tptr), UIO_USERSPACE); 3922 } 3923 3924 int 3925 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3926 register_t *retval) 3927 { 3928 /* { 3929 syscallarg(int) fd; 3930 syscallarg(const char *) path; 3931 syscallarg(const struct timespec *) tptr; 3932 syscallarg(int) flag; 3933 } */ 3934 int follow; 3935 const struct timespec *tptr; 3936 int error; 3937 3938 tptr = SCARG(uap, tptr); 3939 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3940 3941 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3942 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3943 3944 return error; 3945 } 3946 3947 /* 3948 * Common routine to set access and modification times given a vnode. 3949 */ 3950 int 3951 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3952 const struct timespec *tptr, enum uio_seg seg) 3953 { 3954 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3955 } 3956 3957 int 3958 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3959 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3960 { 3961 struct vattr vattr; 3962 int error, dorele = 0; 3963 namei_simple_flags_t sflags; 3964 bool vanull, setbirthtime; 3965 struct timespec ts[2]; 3966 3967 KASSERT(l != NULL || fdat == AT_FDCWD); 3968 3969 /* 3970 * I have checked all callers and they pass either FOLLOW, 3971 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3972 * is 0. More to the point, they don't pass anything else. 3973 * Let's keep it that way at least until the namei interfaces 3974 * are fully sanitized. 3975 */ 3976 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3977 sflags = (flag == FOLLOW) ? 3978 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3979 3980 if (tptr == NULL) { 3981 vanull = true; 3982 nanotime(&ts[0]); 3983 ts[1] = ts[0]; 3984 } else { 3985 vanull = false; 3986 if (seg != UIO_SYSSPACE) { 3987 error = copyin(tptr, ts, sizeof (ts)); 3988 if (error != 0) 3989 return error; 3990 } else { 3991 ts[0] = tptr[0]; 3992 ts[1] = tptr[1]; 3993 } 3994 } 3995 3996 if (ts[0].tv_nsec == UTIME_NOW) { 3997 nanotime(&ts[0]); 3998 if (ts[1].tv_nsec == UTIME_NOW) { 3999 vanull = true; 4000 ts[1] = ts[0]; 4001 } 4002 } else if (ts[1].tv_nsec == UTIME_NOW) 4003 nanotime(&ts[1]); 4004 4005 if (vp == NULL) { 4006 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4007 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4008 if (error != 0) 4009 return error; 4010 dorele = 1; 4011 } 4012 4013 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4014 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4015 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4016 vattr_null(&vattr); 4017 4018 if (ts[0].tv_nsec != UTIME_OMIT) 4019 vattr.va_atime = ts[0]; 4020 4021 if (ts[1].tv_nsec != UTIME_OMIT) { 4022 vattr.va_mtime = ts[1]; 4023 if (setbirthtime) 4024 vattr.va_birthtime = ts[1]; 4025 } 4026 4027 if (vanull) 4028 vattr.va_vaflags |= VA_UTIMES_NULL; 4029 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4030 VOP_UNLOCK(vp); 4031 4032 if (dorele != 0) 4033 vrele(vp); 4034 4035 return error; 4036 } 4037 4038 int 4039 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4040 const struct timeval *tptr, enum uio_seg seg) 4041 { 4042 struct timespec ts[2]; 4043 struct timespec *tsptr = NULL; 4044 int error; 4045 4046 if (tptr != NULL) { 4047 struct timeval tv[2]; 4048 4049 if (seg != UIO_SYSSPACE) { 4050 error = copyin(tptr, tv, sizeof(tv)); 4051 if (error != 0) 4052 return error; 4053 tptr = tv; 4054 } 4055 4056 if ((tptr[0].tv_usec == UTIME_NOW) || 4057 (tptr[0].tv_usec == UTIME_OMIT)) 4058 ts[0].tv_nsec = tptr[0].tv_usec; 4059 else { 4060 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4061 return EINVAL; 4062 4063 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4064 } 4065 4066 if ((tptr[1].tv_usec == UTIME_NOW) || 4067 (tptr[1].tv_usec == UTIME_OMIT)) 4068 ts[1].tv_nsec = tptr[1].tv_usec; 4069 else { 4070 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4071 return EINVAL; 4072 4073 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4074 } 4075 4076 tsptr = &ts[0]; 4077 } 4078 4079 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4080 } 4081 4082 /* 4083 * Truncate a file given its path name. 4084 */ 4085 /* ARGSUSED */ 4086 int 4087 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4088 { 4089 /* { 4090 syscallarg(const char *) path; 4091 syscallarg(int) pad; 4092 syscallarg(off_t) length; 4093 } */ 4094 struct vnode *vp; 4095 struct vattr vattr; 4096 int error; 4097 4098 if (SCARG(uap, length) < 0) 4099 return EINVAL; 4100 4101 error = namei_simple_user(SCARG(uap, path), 4102 NSM_FOLLOW_TRYEMULROOT, &vp); 4103 if (error != 0) 4104 return (error); 4105 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4106 if (vp->v_type == VDIR) 4107 error = EISDIR; 4108 else if ((error = vn_writechk(vp)) == 0 && 4109 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4110 vattr_null(&vattr); 4111 vattr.va_size = SCARG(uap, length); 4112 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4113 } 4114 vput(vp); 4115 return (error); 4116 } 4117 4118 /* 4119 * Truncate a file given a file descriptor. 4120 */ 4121 /* ARGSUSED */ 4122 int 4123 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4124 { 4125 /* { 4126 syscallarg(int) fd; 4127 syscallarg(int) pad; 4128 syscallarg(off_t) length; 4129 } */ 4130 struct vattr vattr; 4131 struct vnode *vp; 4132 file_t *fp; 4133 int error; 4134 4135 if (SCARG(uap, length) < 0) 4136 return EINVAL; 4137 4138 /* fd_getvnode() will use the descriptor for us */ 4139 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4140 return (error); 4141 if ((fp->f_flag & FWRITE) == 0) { 4142 error = EINVAL; 4143 goto out; 4144 } 4145 vp = fp->f_vnode; 4146 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4147 if (vp->v_type == VDIR) 4148 error = EISDIR; 4149 else if ((error = vn_writechk(vp)) == 0) { 4150 vattr_null(&vattr); 4151 vattr.va_size = SCARG(uap, length); 4152 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 4153 } 4154 VOP_UNLOCK(vp); 4155 out: 4156 fd_putfile(SCARG(uap, fd)); 4157 return (error); 4158 } 4159 4160 /* 4161 * Sync an open file. 4162 */ 4163 /* ARGSUSED */ 4164 int 4165 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4166 { 4167 /* { 4168 syscallarg(int) fd; 4169 } */ 4170 struct vnode *vp; 4171 file_t *fp; 4172 int error; 4173 4174 /* fd_getvnode() will use the descriptor for us */ 4175 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4176 return (error); 4177 vp = fp->f_vnode; 4178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4179 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4180 VOP_UNLOCK(vp); 4181 fd_putfile(SCARG(uap, fd)); 4182 return (error); 4183 } 4184 4185 /* 4186 * Sync a range of file data. API modeled after that found in AIX. 4187 * 4188 * FDATASYNC indicates that we need only save enough metadata to be able 4189 * to re-read the written data. 4190 */ 4191 /* ARGSUSED */ 4192 int 4193 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4194 { 4195 /* { 4196 syscallarg(int) fd; 4197 syscallarg(int) flags; 4198 syscallarg(off_t) start; 4199 syscallarg(off_t) length; 4200 } */ 4201 struct vnode *vp; 4202 file_t *fp; 4203 int flags, nflags; 4204 off_t s, e, len; 4205 int error; 4206 4207 /* fd_getvnode() will use the descriptor for us */ 4208 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4209 return (error); 4210 4211 if ((fp->f_flag & FWRITE) == 0) { 4212 error = EBADF; 4213 goto out; 4214 } 4215 4216 flags = SCARG(uap, flags); 4217 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4218 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4219 error = EINVAL; 4220 goto out; 4221 } 4222 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4223 if (flags & FDATASYNC) 4224 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4225 else 4226 nflags = FSYNC_WAIT; 4227 if (flags & FDISKSYNC) 4228 nflags |= FSYNC_CACHE; 4229 4230 len = SCARG(uap, length); 4231 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4232 if (len) { 4233 s = SCARG(uap, start); 4234 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4235 error = EINVAL; 4236 goto out; 4237 } 4238 e = s + len; 4239 KASSERT(s <= e); 4240 } else { 4241 e = 0; 4242 s = 0; 4243 } 4244 4245 vp = fp->f_vnode; 4246 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4247 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4248 VOP_UNLOCK(vp); 4249 out: 4250 fd_putfile(SCARG(uap, fd)); 4251 return (error); 4252 } 4253 4254 /* 4255 * Sync the data of an open file. 4256 */ 4257 /* ARGSUSED */ 4258 int 4259 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4260 { 4261 /* { 4262 syscallarg(int) fd; 4263 } */ 4264 struct vnode *vp; 4265 file_t *fp; 4266 int error; 4267 4268 /* fd_getvnode() will use the descriptor for us */ 4269 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4270 return (error); 4271 vp = fp->f_vnode; 4272 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4273 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4274 VOP_UNLOCK(vp); 4275 fd_putfile(SCARG(uap, fd)); 4276 return (error); 4277 } 4278 4279 /* 4280 * Rename files, (standard) BSD semantics frontend. 4281 */ 4282 /* ARGSUSED */ 4283 int 4284 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4285 { 4286 /* { 4287 syscallarg(const char *) from; 4288 syscallarg(const char *) to; 4289 } */ 4290 4291 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4292 SCARG(uap, to), UIO_USERSPACE, 0)); 4293 } 4294 4295 int 4296 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4297 register_t *retval) 4298 { 4299 /* { 4300 syscallarg(int) fromfd; 4301 syscallarg(const char *) from; 4302 syscallarg(int) tofd; 4303 syscallarg(const char *) to; 4304 } */ 4305 4306 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4307 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4308 } 4309 4310 /* 4311 * Rename files, POSIX semantics frontend. 4312 */ 4313 /* ARGSUSED */ 4314 int 4315 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4316 { 4317 /* { 4318 syscallarg(const char *) from; 4319 syscallarg(const char *) to; 4320 } */ 4321 4322 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4323 SCARG(uap, to), UIO_USERSPACE, 1)); 4324 } 4325 4326 /* 4327 * Rename files. Source and destination must either both be directories, 4328 * or both not be directories. If target is a directory, it must be empty. 4329 * If `from' and `to' refer to the same object, the value of the `retain' 4330 * argument is used to determine whether `from' will be 4331 * 4332 * (retain == 0) deleted unless `from' and `to' refer to the same 4333 * object in the file system's name space (BSD). 4334 * (retain == 1) always retained (POSIX). 4335 * 4336 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4337 */ 4338 int 4339 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4340 { 4341 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4342 } 4343 4344 static int 4345 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4346 const char *to, enum uio_seg seg, int retain) 4347 { 4348 struct pathbuf *fpb, *tpb; 4349 struct nameidata fnd, tnd; 4350 struct vnode *fdvp, *fvp; 4351 struct vnode *tdvp, *tvp; 4352 struct mount *mp, *tmp; 4353 int error; 4354 4355 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4356 4357 error = pathbuf_maybe_copyin(from, seg, &fpb); 4358 if (error) 4359 goto out0; 4360 KASSERT(fpb != NULL); 4361 4362 error = pathbuf_maybe_copyin(to, seg, &tpb); 4363 if (error) 4364 goto out1; 4365 KASSERT(tpb != NULL); 4366 4367 /* 4368 * Lookup from. 4369 * 4370 * XXX LOCKPARENT is wrong because we don't actually want it 4371 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4372 * insane, so for the time being we need to leave it like this. 4373 */ 4374 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4375 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4376 goto out2; 4377 4378 /* 4379 * Pull out the important results of the lookup, fdvp and fvp. 4380 * Of course, fvp is bogus because we're about to unlock fdvp. 4381 */ 4382 fdvp = fnd.ni_dvp; 4383 fvp = fnd.ni_vp; 4384 mp = fdvp->v_mount; 4385 KASSERT(fdvp != NULL); 4386 KASSERT(fvp != NULL); 4387 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4388 /* 4389 * Bracket the operation with fstrans_start()/fstrans_done(). 4390 * 4391 * Inside the bracket this file system cannot be unmounted so 4392 * a vnode on this file system cannot change its v_mount. 4393 * A vnode on another file system may still change to dead mount. 4394 */ 4395 fstrans_start(mp); 4396 4397 /* 4398 * Make sure neither fdvp nor fvp is locked. 4399 */ 4400 if (fdvp != fvp) 4401 VOP_UNLOCK(fdvp); 4402 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4403 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4404 4405 /* 4406 * Reject renaming `.' and `..'. Can't do this until after 4407 * namei because we need namei's parsing to find the final 4408 * component name. (namei should just leave us with the final 4409 * component name and not look it up itself, but anyway...) 4410 * 4411 * This was here before because we used to relookup from 4412 * instead of to and relookup requires the caller to check 4413 * this, but now file systems may depend on this check, so we 4414 * must retain it until the file systems are all rototilled. 4415 */ 4416 if (((fnd.ni_cnd.cn_namelen == 1) && 4417 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4418 ((fnd.ni_cnd.cn_namelen == 2) && 4419 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4420 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4421 error = EINVAL; /* XXX EISDIR? */ 4422 goto abort0; 4423 } 4424 4425 /* 4426 * Lookup to. 4427 * 4428 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4429 * fvp here to decide whether to add CREATEDIR is a load of 4430 * bollocks because fvp might be the wrong node by now, since 4431 * fdvp is unlocked. 4432 * 4433 * XXX Why not pass CREATEDIR always? 4434 */ 4435 NDINIT(&tnd, RENAME, 4436 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4437 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4438 tpb); 4439 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4440 goto abort0; 4441 4442 /* 4443 * Pull out the important results of the lookup, tdvp and tvp. 4444 * Of course, tvp is bogus because we're about to unlock tdvp. 4445 */ 4446 tdvp = tnd.ni_dvp; 4447 tvp = tnd.ni_vp; 4448 KASSERT(tdvp != NULL); 4449 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4450 4451 if (fvp->v_type == VDIR) 4452 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4453 /* 4454 * Make sure neither tdvp nor tvp is locked. 4455 */ 4456 if (tdvp != tvp) 4457 VOP_UNLOCK(tdvp); 4458 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4459 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4460 4461 /* 4462 * Reject renaming onto `.' or `..'. relookup is unhappy with 4463 * these, which is why we must do this here. Once upon a time 4464 * we relooked up from instead of to, and consequently didn't 4465 * need this check, but now that we relookup to instead of 4466 * from, we need this; and we shall need it forever forward 4467 * until the VOP_RENAME protocol changes, because file systems 4468 * will no doubt begin to depend on this check. 4469 */ 4470 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4471 error = EISDIR; 4472 goto abort1; 4473 } 4474 if ((tnd.ni_cnd.cn_namelen == 2) && 4475 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4476 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4477 error = EINVAL; 4478 goto abort1; 4479 } 4480 4481 /* 4482 * Make sure the mount points match. Although we don't hold 4483 * any vnode locks, the v_mount on fdvp file system are stable. 4484 * 4485 * Unmounting another file system at an inopportune moment may 4486 * cause tdvp to disappear and change its v_mount to dead. 4487 * 4488 * So in either case different v_mount means cross-device rename. 4489 */ 4490 KASSERT(mp != NULL); 4491 tmp = tdvp->v_mount; 4492 4493 if (mp != tmp) { 4494 error = EXDEV; 4495 goto abort1; 4496 } 4497 4498 /* 4499 * Take the vfs rename lock to avoid cross-directory screw cases. 4500 * Nothing is locked currently, so taking this lock is safe. 4501 */ 4502 error = VFS_RENAMELOCK_ENTER(mp); 4503 if (error) 4504 goto abort1; 4505 4506 /* 4507 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4508 * and nothing is locked except for the vfs rename lock. 4509 * 4510 * The next step is a little rain dance to conform to the 4511 * insane lock protocol, even though it does nothing to ward 4512 * off race conditions. 4513 * 4514 * We need tdvp and tvp to be locked. However, because we have 4515 * unlocked tdvp in order to hold no locks while we take the 4516 * vfs rename lock, tvp may be wrong here, and we can't safely 4517 * lock it even if the sensible file systems will just unlock 4518 * it straight away. Consequently, we must lock tdvp and then 4519 * relookup tvp to get it locked. 4520 * 4521 * Finally, because the VOP_RENAME protocol is brain-damaged 4522 * and various file systems insanely depend on the semantics of 4523 * this brain damage, the lookup of to must be the last lookup 4524 * before VOP_RENAME. 4525 */ 4526 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4527 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4528 if (error) 4529 goto abort2; 4530 4531 /* 4532 * Drop the old tvp and pick up the new one -- which might be 4533 * the same, but that doesn't matter to us. After this, tdvp 4534 * and tvp should both be locked. 4535 */ 4536 if (tvp != NULL) 4537 vrele(tvp); 4538 tvp = tnd.ni_vp; 4539 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4540 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4541 4542 /* 4543 * The old do_sys_rename had various consistency checks here 4544 * involving fvp and tvp. fvp is bogus already here, and tvp 4545 * will become bogus soon in any sensible file system, so the 4546 * only purpose in putting these checks here is to give lip 4547 * service to these screw cases and to acknowledge that they 4548 * exist, not actually to handle them, but here you go 4549 * anyway... 4550 */ 4551 4552 /* 4553 * Acknowledge that directories and non-directories aren't 4554 * suposed to mix. 4555 */ 4556 if (tvp != NULL) { 4557 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4558 error = ENOTDIR; 4559 goto abort3; 4560 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4561 error = EISDIR; 4562 goto abort3; 4563 } 4564 } 4565 4566 /* 4567 * Acknowledge some random screw case, among the dozens that 4568 * might arise. 4569 */ 4570 if (fvp == tdvp) { 4571 error = EINVAL; 4572 goto abort3; 4573 } 4574 4575 /* 4576 * Acknowledge that POSIX has a wacky screw case. 4577 * 4578 * XXX Eventually the retain flag needs to be passed on to 4579 * VOP_RENAME. 4580 */ 4581 if (fvp == tvp) { 4582 if (retain) { 4583 error = 0; 4584 goto abort3; 4585 } else if ((fdvp == tdvp) && 4586 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4587 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4588 fnd.ni_cnd.cn_namelen))) { 4589 error = 0; 4590 goto abort3; 4591 } 4592 } 4593 4594 /* 4595 * Make sure veriexec can screw us up. (But a race can screw 4596 * up veriexec, of course -- remember, fvp and (soon) tvp are 4597 * bogus.) 4598 */ 4599 #if NVERIEXEC > 0 4600 { 4601 char *f1, *f2; 4602 size_t f1_len; 4603 size_t f2_len; 4604 4605 f1_len = fnd.ni_cnd.cn_namelen + 1; 4606 f1 = kmem_alloc(f1_len, KM_SLEEP); 4607 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4608 4609 f2_len = tnd.ni_cnd.cn_namelen + 1; 4610 f2 = kmem_alloc(f2_len, KM_SLEEP); 4611 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4612 4613 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4614 4615 kmem_free(f1, f1_len); 4616 kmem_free(f2, f2_len); 4617 4618 if (error) 4619 goto abort3; 4620 } 4621 #endif /* NVERIEXEC > 0 */ 4622 4623 /* 4624 * All ready. Incant the rename vop. 4625 */ 4626 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4627 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4628 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4629 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4630 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4631 4632 /* 4633 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4634 * tdvp and tvp. But we can't assert any of that. 4635 */ 4636 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4637 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4638 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4639 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4640 4641 /* 4642 * So all we have left to do is to drop the rename lock and 4643 * destroy the pathbufs. 4644 */ 4645 VFS_RENAMELOCK_EXIT(mp); 4646 fstrans_done(mp); 4647 goto out2; 4648 4649 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4650 VOP_UNLOCK(tvp); 4651 abort2: VOP_UNLOCK(tdvp); 4652 VFS_RENAMELOCK_EXIT(mp); 4653 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4654 vrele(tdvp); 4655 if (tvp != NULL) 4656 vrele(tvp); 4657 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4658 vrele(fdvp); 4659 vrele(fvp); 4660 fstrans_done(mp); 4661 out2: pathbuf_destroy(tpb); 4662 out1: pathbuf_destroy(fpb); 4663 out0: return error; 4664 } 4665 4666 /* 4667 * Make a directory file. 4668 */ 4669 /* ARGSUSED */ 4670 int 4671 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4672 { 4673 /* { 4674 syscallarg(const char *) path; 4675 syscallarg(int) mode; 4676 } */ 4677 4678 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4679 SCARG(uap, mode), UIO_USERSPACE); 4680 } 4681 4682 int 4683 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4684 register_t *retval) 4685 { 4686 /* { 4687 syscallarg(int) fd; 4688 syscallarg(const char *) path; 4689 syscallarg(int) mode; 4690 } */ 4691 4692 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4693 SCARG(uap, mode), UIO_USERSPACE); 4694 } 4695 4696 4697 int 4698 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4699 { 4700 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4701 } 4702 4703 static int 4704 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4705 enum uio_seg seg) 4706 { 4707 struct proc *p = curlwp->l_proc; 4708 struct vnode *vp; 4709 struct vattr vattr; 4710 int error; 4711 struct pathbuf *pb; 4712 struct nameidata nd; 4713 4714 KASSERT(l != NULL || fdat == AT_FDCWD); 4715 4716 /* XXX bollocks, should pass in a pathbuf */ 4717 error = pathbuf_maybe_copyin(path, seg, &pb); 4718 if (error) { 4719 return error; 4720 } 4721 4722 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4723 4724 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4725 pathbuf_destroy(pb); 4726 return (error); 4727 } 4728 vp = nd.ni_vp; 4729 if (vp != NULL) { 4730 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4731 if (nd.ni_dvp == vp) 4732 vrele(nd.ni_dvp); 4733 else 4734 vput(nd.ni_dvp); 4735 vrele(vp); 4736 pathbuf_destroy(pb); 4737 return (EEXIST); 4738 } 4739 vattr_null(&vattr); 4740 vattr.va_type = VDIR; 4741 /* We will read cwdi->cwdi_cmask unlocked. */ 4742 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4743 nd.ni_cnd.cn_flags |= WILLBEDIR; 4744 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4745 if (!error) 4746 vrele(nd.ni_vp); 4747 vput(nd.ni_dvp); 4748 pathbuf_destroy(pb); 4749 return (error); 4750 } 4751 4752 /* 4753 * Remove a directory file. 4754 */ 4755 /* ARGSUSED */ 4756 int 4757 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4758 { 4759 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4760 AT_REMOVEDIR, UIO_USERSPACE); 4761 } 4762 4763 /* 4764 * Read a block of directory entries in a file system independent format. 4765 */ 4766 int 4767 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4768 { 4769 /* { 4770 syscallarg(int) fd; 4771 syscallarg(char *) buf; 4772 syscallarg(size_t) count; 4773 } */ 4774 file_t *fp; 4775 int error, done; 4776 4777 /* fd_getvnode() will use the descriptor for us */ 4778 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4779 return (error); 4780 if ((fp->f_flag & FREAD) == 0) { 4781 error = EBADF; 4782 goto out; 4783 } 4784 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4785 SCARG(uap, count), &done, l, 0, 0); 4786 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4787 *retval = done; 4788 out: 4789 fd_putfile(SCARG(uap, fd)); 4790 return (error); 4791 } 4792 4793 /* 4794 * Set the mode mask for creation of filesystem nodes. 4795 */ 4796 int 4797 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4798 { 4799 /* { 4800 syscallarg(mode_t) newmask; 4801 } */ 4802 4803 /* 4804 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4805 * serialization with those reads is required. It's important to 4806 * return a coherent answer for the caller of umask() though, and 4807 * the atomic operation accomplishes that. 4808 */ 4809 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4810 SCARG(uap, newmask) & ALLPERMS); 4811 4812 return (0); 4813 } 4814 4815 int 4816 dorevoke(struct vnode *vp, kauth_cred_t cred) 4817 { 4818 struct vattr vattr; 4819 int error, fs_decision; 4820 4821 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4822 error = VOP_GETATTR(vp, &vattr, cred); 4823 VOP_UNLOCK(vp); 4824 if (error != 0) 4825 return error; 4826 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4827 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4828 fs_decision); 4829 if (!error) 4830 VOP_REVOKE(vp, REVOKEALL); 4831 return (error); 4832 } 4833 4834 /* 4835 * Void all references to file by ripping underlying filesystem 4836 * away from vnode. 4837 */ 4838 /* ARGSUSED */ 4839 int 4840 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4841 { 4842 /* { 4843 syscallarg(const char *) path; 4844 } */ 4845 struct vnode *vp; 4846 int error; 4847 4848 error = namei_simple_user(SCARG(uap, path), 4849 NSM_FOLLOW_TRYEMULROOT, &vp); 4850 if (error != 0) 4851 return (error); 4852 error = dorevoke(vp, l->l_cred); 4853 vrele(vp); 4854 return (error); 4855 } 4856 4857 /* 4858 * Allocate backing store for a file, filling a hole without having to 4859 * explicitly write anything out. 4860 */ 4861 /* ARGSUSED */ 4862 int 4863 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4864 register_t *retval) 4865 { 4866 /* { 4867 syscallarg(int) fd; 4868 syscallarg(off_t) pos; 4869 syscallarg(off_t) len; 4870 } */ 4871 int fd; 4872 off_t pos, len; 4873 struct file *fp; 4874 struct vnode *vp; 4875 int error; 4876 4877 fd = SCARG(uap, fd); 4878 pos = SCARG(uap, pos); 4879 len = SCARG(uap, len); 4880 4881 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4882 *retval = EINVAL; 4883 return 0; 4884 } 4885 4886 error = fd_getvnode(fd, &fp); 4887 if (error) { 4888 *retval = error; 4889 return 0; 4890 } 4891 if ((fp->f_flag & FWRITE) == 0) { 4892 error = EBADF; 4893 goto fail; 4894 } 4895 vp = fp->f_vnode; 4896 4897 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4898 if (vp->v_type == VDIR) { 4899 error = EISDIR; 4900 } else { 4901 error = VOP_FALLOCATE(vp, pos, len); 4902 } 4903 VOP_UNLOCK(vp); 4904 4905 fail: 4906 fd_putfile(fd); 4907 *retval = error; 4908 return 0; 4909 } 4910 4911 /* 4912 * Deallocate backing store for a file, creating a hole. Also used for 4913 * invoking TRIM on disks. 4914 */ 4915 /* ARGSUSED */ 4916 int 4917 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4918 register_t *retval) 4919 { 4920 /* { 4921 syscallarg(int) fd; 4922 syscallarg(off_t) pos; 4923 syscallarg(off_t) len; 4924 } */ 4925 int fd; 4926 off_t pos, len; 4927 struct file *fp; 4928 struct vnode *vp; 4929 int error; 4930 4931 fd = SCARG(uap, fd); 4932 pos = SCARG(uap, pos); 4933 len = SCARG(uap, len); 4934 4935 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4936 return EINVAL; 4937 } 4938 4939 error = fd_getvnode(fd, &fp); 4940 if (error) { 4941 return error; 4942 } 4943 if ((fp->f_flag & FWRITE) == 0) { 4944 error = EBADF; 4945 goto fail; 4946 } 4947 vp = fp->f_vnode; 4948 4949 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4950 if (vp->v_type == VDIR) { 4951 error = EISDIR; 4952 } else { 4953 error = VOP_FDISCARD(vp, pos, len); 4954 } 4955 VOP_UNLOCK(vp); 4956 4957 fail: 4958 fd_putfile(fd); 4959 return error; 4960 } 4961