1 /* $NetBSD: vfs_syscalls.c,v 1.566 2024/07/04 16:42:37 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.566 2024/07/04 16:42:37 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 /* 169 * Filter event method for EVFILT_FS. 170 */ 171 static struct klist fs_klist; 172 static kmutex_t fs_klist_lock; 173 174 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 175 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 176 177 void 178 vfs_evfilt_fs_init(void) 179 { 180 klist_init(&fs_klist); 181 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 182 } 183 184 static int 185 filt_fsattach(struct knote *kn) 186 { 187 mutex_enter(&fs_klist_lock); 188 kn->kn_flags |= EV_CLEAR; 189 klist_insert(&fs_klist, kn); 190 mutex_exit(&fs_klist_lock); 191 192 return 0; 193 } 194 195 static void 196 filt_fsdetach(struct knote *kn) 197 { 198 mutex_enter(&fs_klist_lock); 199 klist_remove(&fs_klist, kn); 200 mutex_exit(&fs_klist_lock); 201 } 202 203 static int 204 filt_fs(struct knote *kn, long hint) 205 { 206 int rv; 207 208 if (hint & NOTE_SUBMIT) { 209 KASSERT(mutex_owned(&fs_klist_lock)); 210 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 211 } else { 212 mutex_enter(&fs_klist_lock); 213 } 214 215 rv = (kn->kn_fflags != 0); 216 217 if ((hint & NOTE_SUBMIT) == 0) { 218 mutex_exit(&fs_klist_lock); 219 } 220 221 return rv; 222 } 223 224 /* referenced in kern_event.c */ 225 const struct filterops fs_filtops = { 226 .f_flags = FILTEROP_MPSAFE, 227 .f_attach = filt_fsattach, 228 .f_detach = filt_fsdetach, 229 .f_event = filt_fs, 230 }; 231 232 static int 233 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 234 { 235 file_t *dfp; 236 int error; 237 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 238 239 if (fdat != AT_FDCWD && path[0] != '/') { 240 if ((error = fd_getvnode(fdat, &dfp)) != 0) 241 goto out; 242 243 NDAT(ndp, dfp->f_vnode); 244 } 245 246 error = namei(ndp); 247 248 if (fdat != AT_FDCWD && path[0] != '/') 249 fd_putfile(fdat); 250 out: 251 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 252 return error; 253 } 254 255 static int 256 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 257 namei_simple_flags_t sflags, struct vnode **vp_ret) 258 { 259 file_t *dfp; 260 struct vnode *dvp; 261 int error; 262 struct pathbuf *pb; 263 const char *p; 264 265 error = pathbuf_copyin(path, &pb); 266 if (error) { 267 return error; 268 } 269 p = pathbuf_stringcopy_get(pb); 270 271 if (fdat != AT_FDCWD && p[0] != '/') { 272 if ((error = fd_getvnode(fdat, &dfp)) != 0) 273 goto out; 274 275 dvp = dfp->f_vnode; 276 } else { 277 dvp = NULL; 278 } 279 280 error = nameiat_simple(dvp, pb, sflags, vp_ret); 281 282 if (fdat != AT_FDCWD && p[0] != '/') 283 fd_putfile(fdat); 284 285 out: 286 pathbuf_stringcopy_put(pb, p); 287 pathbuf_destroy(pb); 288 289 return error; 290 } 291 292 static int 293 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 294 { 295 int error; 296 297 fp->f_flag = flags & FMASK; 298 fp->f_type = DTYPE_VNODE; 299 fp->f_ops = &vnops; 300 fp->f_vnode = vp; 301 302 if (flags & (O_EXLOCK | O_SHLOCK)) { 303 struct flock lf; 304 int type; 305 306 lf.l_whence = SEEK_SET; 307 lf.l_start = 0; 308 lf.l_len = 0; 309 if (flags & O_EXLOCK) 310 lf.l_type = F_WRLCK; 311 else 312 lf.l_type = F_RDLCK; 313 type = F_FLOCK; 314 if ((flags & FNONBLOCK) == 0) 315 type |= F_WAIT; 316 VOP_UNLOCK(vp); 317 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 318 if (error) { 319 (void) vn_close(vp, fp->f_flag, fp->f_cred); 320 fd_abort(l->l_proc, fp, indx); 321 return error; 322 } 323 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 324 atomic_or_uint(&fp->f_flag, FHASLOCK); 325 } 326 if (flags & O_CLOEXEC) 327 fd_set_exclose(l, indx, true); 328 return 0; 329 } 330 331 static int 332 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 333 void *data, size_t *data_len) 334 { 335 struct mount *mp; 336 int error = 0, saved_flags; 337 338 mp = vp->v_mount; 339 saved_flags = mp->mnt_flag; 340 341 /* We can operate only on VV_ROOT nodes. */ 342 if ((vp->v_vflag & VV_ROOT) == 0) { 343 error = EINVAL; 344 goto out; 345 } 346 347 /* 348 * We only allow the filesystem to be reloaded if it 349 * is currently mounted read-only. Additionally, we 350 * prevent read-write to read-only downgrades. 351 */ 352 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 353 (mp->mnt_flag & MNT_RDONLY) == 0 && 354 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 355 error = EOPNOTSUPP; /* Needs translation */ 356 goto out; 357 } 358 359 /* 360 * Enabling MNT_UNION requires a covered mountpoint and 361 * must not happen on the root mount. 362 */ 363 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 364 error = EOPNOTSUPP; 365 goto out; 366 } 367 368 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 369 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 370 if (error) 371 goto out; 372 373 error = vfs_suspend(mp, 0); 374 if (error) 375 goto out; 376 377 mutex_enter(mp->mnt_updating); 378 379 mp->mnt_flag &= ~MNT_OP_FLAGS; 380 mp->mnt_flag |= flags & MNT_OP_FLAGS; 381 382 /* 383 * Set the mount level flags. 384 */ 385 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 386 if ((flags & MNT_RDONLY)) 387 mp->mnt_iflag |= IMNT_WANTRDONLY; 388 else 389 mp->mnt_iflag |= IMNT_WANTRDWR; 390 } 391 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 392 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 393 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 394 mp->mnt_flag &= ~MNT_RDONLY; 395 396 error = VFS_MOUNT(mp, path, data, data_len); 397 398 if (error && data != NULL) { 399 int error2; 400 401 /* 402 * Update failed; let's try and see if it was an 403 * export request. For compat with 3.0 and earlier. 404 */ 405 error2 = vfs_hooks_reexport(mp, path, data); 406 407 /* 408 * Only update error code if the export request was 409 * understood but some problem occurred while 410 * processing it. 411 */ 412 if (error2 != EJUSTRETURN) 413 error = error2; 414 } 415 416 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 417 mp->mnt_flag |= MNT_RDONLY; 418 if (error) 419 mp->mnt_flag = saved_flags; 420 mp->mnt_flag &= ~MNT_OP_FLAGS; 421 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 422 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 423 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 424 vfs_syncer_add_to_worklist(mp); 425 } else { 426 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 427 vfs_syncer_remove_from_worklist(mp); 428 } 429 mutex_exit(mp->mnt_updating); 430 vfs_resume(mp); 431 432 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 433 (flags & MNT_EXTATTR)) { 434 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 435 NULL, 0, NULL) != 0) { 436 printf("%s: failed to start extattr, error = %d", 437 mp->mnt_stat.f_mntonname, error); 438 mp->mnt_flag &= ~MNT_EXTATTR; 439 } 440 } 441 442 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 443 !(flags & MNT_EXTATTR)) { 444 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 445 NULL, 0, NULL) != 0) { 446 printf("%s: failed to stop extattr, error = %d", 447 mp->mnt_stat.f_mntonname, error); 448 mp->mnt_flag |= MNT_RDONLY; 449 } 450 } 451 out: 452 return (error); 453 } 454 455 static int 456 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 457 struct vfsops **vfsops) 458 { 459 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 460 int error; 461 462 if (type_seg == UIO_USERSPACE) { 463 /* Copy file-system type from userspace. */ 464 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 465 } else { 466 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 467 KASSERT(error == 0); 468 } 469 470 if (error) { 471 /* 472 * Historically, filesystem types were identified by numbers. 473 * If we get an integer for the filesystem type instead of a 474 * string, we check to see if it matches one of the historic 475 * filesystem types. 476 */ 477 u_long fsindex = (u_long)fstype; 478 if (fsindex >= nmountcompatnames || 479 mountcompatnames[fsindex] == NULL) 480 return ENODEV; 481 strlcpy(fstypename, mountcompatnames[fsindex], 482 sizeof(fstypename)); 483 } 484 485 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 486 if (strcmp(fstypename, "ufs") == 0) 487 fstypename[0] = 'f'; 488 489 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 490 return 0; 491 492 /* If we can autoload a vfs module, try again */ 493 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 494 495 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 496 return 0; 497 498 return ENODEV; 499 } 500 501 static int 502 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 503 void *data, size_t *data_len) 504 { 505 struct mount *mp; 506 int error; 507 508 /* If MNT_GETARGS is specified, it should be the only flag. */ 509 if (flags & ~MNT_GETARGS) 510 return EINVAL; 511 512 mp = vp->v_mount; 513 514 /* XXX: probably some notion of "can see" here if we want isolation. */ 515 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 516 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 517 if (error) 518 return error; 519 520 if ((vp->v_vflag & VV_ROOT) == 0) 521 return EINVAL; 522 523 if (vfs_busy(mp)) 524 return EPERM; 525 526 mutex_enter(mp->mnt_updating); 527 mp->mnt_flag &= ~MNT_OP_FLAGS; 528 mp->mnt_flag |= MNT_GETARGS; 529 error = VFS_MOUNT(mp, path, data, data_len); 530 mp->mnt_flag &= ~MNT_OP_FLAGS; 531 mutex_exit(mp->mnt_updating); 532 533 vfs_unbusy(mp); 534 return (error); 535 } 536 537 int 538 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 539 { 540 /* { 541 syscallarg(const char *) type; 542 syscallarg(const char *) path; 543 syscallarg(int) flags; 544 syscallarg(void *) data; 545 syscallarg(size_t) data_len; 546 } */ 547 548 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 549 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 550 SCARG(uap, data_len), retval); 551 } 552 553 int 554 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 555 const char *path, int flags, void *data, enum uio_seg data_seg, 556 size_t data_len, register_t *retval) 557 { 558 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 559 struct vnode *vp; 560 void *data_buf = data; 561 bool vfsopsrele = false; 562 size_t alloc_sz = 0; 563 int error; 564 565 /* 566 * Get vnode to be covered 567 */ 568 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 569 if (error != 0) { 570 vp = NULL; 571 goto done; 572 } 573 574 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 575 vfsops = vp->v_mount->mnt_op; 576 } else { 577 /* 'type' is userspace */ 578 error = mount_get_vfsops(type, type_seg, &vfsops); 579 if (error != 0) 580 goto done; 581 vfsopsrele = true; 582 } 583 584 /* 585 * We allow data to be NULL, even for userspace. Some fs's don't need 586 * it. The others will handle NULL. 587 */ 588 if (data != NULL && data_seg == UIO_USERSPACE) { 589 if (data_len == 0) { 590 /* No length supplied, use default for filesystem */ 591 data_len = vfsops->vfs_min_mount_data; 592 593 /* 594 * Hopefully a longer buffer won't make copyin() fail. 595 * For compatibility with 3.0 and earlier. 596 */ 597 if (flags & MNT_UPDATE 598 && data_len < sizeof (struct mnt_export_args30)) 599 data_len = sizeof (struct mnt_export_args30); 600 } 601 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 602 error = EINVAL; 603 goto done; 604 } 605 alloc_sz = data_len; 606 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 607 608 /* NFS needs the buffer even for mnt_getargs .... */ 609 error = copyin(data, data_buf, data_len); 610 if (error != 0) 611 goto done; 612 } 613 614 if (flags & MNT_GETARGS) { 615 if (data_len == 0) { 616 error = EINVAL; 617 goto done; 618 } 619 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 620 if (error != 0) 621 goto done; 622 if (data_seg == UIO_USERSPACE) 623 error = copyout(data_buf, data, data_len); 624 *retval = data_len; 625 } else if (flags & MNT_UPDATE) { 626 error = mount_update(l, vp, path, flags, data_buf, &data_len); 627 } else { 628 /* Locking is handled internally in mount_domount(). */ 629 KASSERT(vfsopsrele == true); 630 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 631 &data_len); 632 vfsopsrele = false; 633 } 634 if (!error) { 635 mutex_enter(&fs_klist_lock); 636 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 637 mutex_exit(&fs_klist_lock); 638 } 639 640 done: 641 if (vfsopsrele) 642 vfs_delref(vfsops); 643 if (vp != NULL) { 644 vrele(vp); 645 } 646 if (data_buf != data) 647 kmem_free(data_buf, alloc_sz); 648 return (error); 649 } 650 651 /* 652 * Unmount a file system. 653 * 654 * Note: unmount takes a path to the vnode mounted on as argument, 655 * not special file (as before). 656 */ 657 /* ARGSUSED */ 658 int 659 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 660 { 661 /* { 662 syscallarg(const char *) path; 663 syscallarg(int) flags; 664 } */ 665 struct vnode *vp; 666 struct mount *mp; 667 int error; 668 struct pathbuf *pb; 669 struct nameidata nd; 670 671 error = pathbuf_copyin(SCARG(uap, path), &pb); 672 if (error) { 673 return error; 674 } 675 676 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 677 if ((error = namei(&nd)) != 0) { 678 pathbuf_destroy(pb); 679 return error; 680 } 681 vp = nd.ni_vp; 682 pathbuf_destroy(pb); 683 684 mp = vp->v_mount; 685 vfs_ref(mp); 686 VOP_UNLOCK(vp); 687 688 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 689 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 690 if (error) { 691 vrele(vp); 692 vfs_rele(mp); 693 return (error); 694 } 695 696 /* 697 * Don't allow unmounting the root file system. 698 */ 699 if (mp->mnt_flag & MNT_ROOTFS) { 700 vrele(vp); 701 vfs_rele(mp); 702 return (EINVAL); 703 } 704 705 /* 706 * Must be the root of the filesystem 707 */ 708 if ((vp->v_vflag & VV_ROOT) == 0) { 709 vrele(vp); 710 vfs_rele(mp); 711 return (EINVAL); 712 } 713 714 vrele(vp); 715 error = dounmount(mp, SCARG(uap, flags), l); 716 vfs_rele(mp); 717 if (!error) { 718 mutex_enter(&fs_klist_lock); 719 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 720 mutex_exit(&fs_klist_lock); 721 } 722 return error; 723 } 724 725 /* 726 * Sync each mounted filesystem. 727 */ 728 #ifdef DEBUG 729 int syncprt = 0; 730 struct ctldebug debug0 = { "syncprt", &syncprt }; 731 #endif 732 733 void 734 do_sys_sync(struct lwp *l) 735 { 736 mount_iterator_t *iter; 737 struct mount *mp; 738 int asyncflag; 739 740 mountlist_iterator_init(&iter); 741 while ((mp = mountlist_iterator_next(iter)) != NULL) { 742 mutex_enter(mp->mnt_updating); 743 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 744 asyncflag = mp->mnt_flag & MNT_ASYNC; 745 mp->mnt_flag &= ~MNT_ASYNC; 746 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 747 if (asyncflag) 748 mp->mnt_flag |= MNT_ASYNC; 749 } 750 mutex_exit(mp->mnt_updating); 751 } 752 mountlist_iterator_destroy(iter); 753 #ifdef DEBUG 754 if (syncprt) 755 vfs_bufstats(); 756 #endif /* DEBUG */ 757 } 758 759 static bool 760 sync_vnode_filter(void *cookie, vnode_t *vp) 761 { 762 763 if (vp->v_numoutput > 0) { 764 ++*(int *)cookie; 765 } 766 return false; 767 } 768 769 int 770 vfs_syncwait(void) 771 { 772 int nbusy, nbusy_prev, iter; 773 struct vnode_iterator *vniter; 774 mount_iterator_t *mpiter; 775 struct mount *mp; 776 777 for (nbusy_prev = 0, iter = 0; iter < 20;) { 778 nbusy = 0; 779 mountlist_iterator_init(&mpiter); 780 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 781 vnode_t *vp __diagused; 782 vfs_vnode_iterator_init(mp, &vniter); 783 vp = vfs_vnode_iterator_next(vniter, 784 sync_vnode_filter, &nbusy); 785 KASSERT(vp == NULL); 786 vfs_vnode_iterator_destroy(vniter); 787 } 788 mountlist_iterator_destroy(mpiter); 789 790 if (nbusy == 0) 791 break; 792 if (nbusy_prev == 0) 793 nbusy_prev = nbusy; 794 printf("%d ", nbusy); 795 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 796 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 797 iter++; 798 else 799 nbusy_prev = nbusy; 800 } 801 802 if (nbusy) { 803 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 804 printf("giving up\nPrinting vnodes for busy buffers\n"); 805 mountlist_iterator_init(&mpiter); 806 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 807 vnode_t *vp; 808 vfs_vnode_iterator_init(mp, &vniter); 809 vp = vfs_vnode_iterator_next(vniter, 810 NULL, NULL); 811 mutex_enter(vp->v_interlock); 812 if (vp->v_numoutput > 0) 813 vprint(NULL, vp); 814 mutex_exit(vp->v_interlock); 815 vrele(vp); 816 vfs_vnode_iterator_destroy(vniter); 817 } 818 mountlist_iterator_destroy(mpiter); 819 #endif 820 } 821 822 return nbusy; 823 } 824 825 /* ARGSUSED */ 826 int 827 sys_sync(struct lwp *l, const void *v, register_t *retval) 828 { 829 do_sys_sync(l); 830 return (0); 831 } 832 833 834 /* 835 * Access or change filesystem quotas. 836 * 837 * (this is really 14 different calls bundled into one) 838 */ 839 840 static int 841 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 842 { 843 struct quotastat info_k; 844 int error; 845 846 /* ensure any padding bytes are cleared */ 847 memset(&info_k, 0, sizeof(info_k)); 848 849 error = vfs_quotactl_stat(mp, &info_k); 850 if (error) { 851 return error; 852 } 853 854 return copyout(&info_k, info_u, sizeof(info_k)); 855 } 856 857 static int 858 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 859 struct quotaidtypestat *info_u) 860 { 861 struct quotaidtypestat info_k; 862 int error; 863 864 /* ensure any padding bytes are cleared */ 865 memset(&info_k, 0, sizeof(info_k)); 866 867 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 868 if (error) { 869 return error; 870 } 871 872 return copyout(&info_k, info_u, sizeof(info_k)); 873 } 874 875 static int 876 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 877 struct quotaobjtypestat *info_u) 878 { 879 struct quotaobjtypestat info_k; 880 int error; 881 882 /* ensure any padding bytes are cleared */ 883 memset(&info_k, 0, sizeof(info_k)); 884 885 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 886 if (error) { 887 return error; 888 } 889 890 return copyout(&info_k, info_u, sizeof(info_k)); 891 } 892 893 static int 894 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 895 struct quotaval *val_u) 896 { 897 struct quotakey key_k; 898 struct quotaval val_k; 899 int error; 900 901 /* ensure any padding bytes are cleared */ 902 memset(&val_k, 0, sizeof(val_k)); 903 904 error = copyin(key_u, &key_k, sizeof(key_k)); 905 if (error) { 906 return error; 907 } 908 909 error = vfs_quotactl_get(mp, &key_k, &val_k); 910 if (error) { 911 return error; 912 } 913 914 return copyout(&val_k, val_u, sizeof(val_k)); 915 } 916 917 static int 918 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 919 const struct quotaval *val_u) 920 { 921 struct quotakey key_k; 922 struct quotaval val_k; 923 int error; 924 925 error = copyin(key_u, &key_k, sizeof(key_k)); 926 if (error) { 927 return error; 928 } 929 930 error = copyin(val_u, &val_k, sizeof(val_k)); 931 if (error) { 932 return error; 933 } 934 935 return vfs_quotactl_put(mp, &key_k, &val_k); 936 } 937 938 static int 939 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 940 { 941 struct quotakey key_k; 942 int error; 943 944 error = copyin(key_u, &key_k, sizeof(key_k)); 945 if (error) { 946 return error; 947 } 948 949 return vfs_quotactl_del(mp, &key_k); 950 } 951 952 static int 953 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 954 { 955 struct quotakcursor cursor_k; 956 int error; 957 958 /* ensure any padding bytes are cleared */ 959 memset(&cursor_k, 0, sizeof(cursor_k)); 960 961 error = vfs_quotactl_cursoropen(mp, &cursor_k); 962 if (error) { 963 return error; 964 } 965 966 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 967 } 968 969 static int 970 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 971 { 972 struct quotakcursor cursor_k; 973 int error; 974 975 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 976 if (error) { 977 return error; 978 } 979 980 return vfs_quotactl_cursorclose(mp, &cursor_k); 981 } 982 983 static int 984 do_sys_quotactl_cursorskipidtype(struct mount *mp, 985 struct quotakcursor *cursor_u, int idtype) 986 { 987 struct quotakcursor cursor_k; 988 int error; 989 990 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 991 if (error) { 992 return error; 993 } 994 995 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 996 if (error) { 997 return error; 998 } 999 1000 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1001 } 1002 1003 static int 1004 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 1005 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 1006 unsigned *ret_u) 1007 { 1008 #define CGET_STACK_MAX 8 1009 struct quotakcursor cursor_k; 1010 struct quotakey stackkeys[CGET_STACK_MAX]; 1011 struct quotaval stackvals[CGET_STACK_MAX]; 1012 struct quotakey *keys_k; 1013 struct quotaval *vals_k; 1014 unsigned ret_k; 1015 int error; 1016 1017 if (maxnum > 128) { 1018 maxnum = 128; 1019 } 1020 1021 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1022 if (error) { 1023 return error; 1024 } 1025 1026 if (maxnum <= CGET_STACK_MAX) { 1027 keys_k = stackkeys; 1028 vals_k = stackvals; 1029 /* ensure any padding bytes are cleared */ 1030 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1031 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1032 } else { 1033 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1034 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1035 } 1036 1037 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1038 &ret_k); 1039 if (error) { 1040 goto fail; 1041 } 1042 1043 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1044 if (error) { 1045 goto fail; 1046 } 1047 1048 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1049 if (error) { 1050 goto fail; 1051 } 1052 1053 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1054 if (error) { 1055 goto fail; 1056 } 1057 1058 /* do last to maximize the chance of being able to recover a failure */ 1059 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1060 1061 fail: 1062 if (keys_k != stackkeys) { 1063 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1064 } 1065 if (vals_k != stackvals) { 1066 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1067 } 1068 return error; 1069 } 1070 1071 static int 1072 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1073 int *ret_u) 1074 { 1075 struct quotakcursor cursor_k; 1076 int ret_k; 1077 int error; 1078 1079 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1080 if (error) { 1081 return error; 1082 } 1083 1084 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1085 if (error) { 1086 return error; 1087 } 1088 1089 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1090 if (error) { 1091 return error; 1092 } 1093 1094 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1095 } 1096 1097 static int 1098 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1099 { 1100 struct quotakcursor cursor_k; 1101 int error; 1102 1103 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1104 if (error) { 1105 return error; 1106 } 1107 1108 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1109 if (error) { 1110 return error; 1111 } 1112 1113 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1114 } 1115 1116 static int 1117 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1118 { 1119 char *path_k; 1120 int error; 1121 1122 /* XXX this should probably be a struct pathbuf */ 1123 path_k = PNBUF_GET(); 1124 error = copyin(path_u, path_k, PATH_MAX); 1125 if (error) { 1126 PNBUF_PUT(path_k); 1127 return error; 1128 } 1129 1130 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1131 1132 PNBUF_PUT(path_k); 1133 return error; 1134 } 1135 1136 static int 1137 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1138 { 1139 return vfs_quotactl_quotaoff(mp, idtype); 1140 } 1141 1142 int 1143 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1144 { 1145 struct mount *mp; 1146 struct vnode *vp; 1147 int error; 1148 1149 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1150 if (error != 0) 1151 return (error); 1152 mp = vp->v_mount; 1153 1154 switch (args->qc_op) { 1155 case QUOTACTL_STAT: 1156 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1157 break; 1158 case QUOTACTL_IDTYPESTAT: 1159 error = do_sys_quotactl_idtypestat(mp, 1160 args->u.idtypestat.qc_idtype, 1161 args->u.idtypestat.qc_info); 1162 break; 1163 case QUOTACTL_OBJTYPESTAT: 1164 error = do_sys_quotactl_objtypestat(mp, 1165 args->u.objtypestat.qc_objtype, 1166 args->u.objtypestat.qc_info); 1167 break; 1168 case QUOTACTL_GET: 1169 error = do_sys_quotactl_get(mp, 1170 args->u.get.qc_key, 1171 args->u.get.qc_val); 1172 break; 1173 case QUOTACTL_PUT: 1174 error = do_sys_quotactl_put(mp, 1175 args->u.put.qc_key, 1176 args->u.put.qc_val); 1177 break; 1178 case QUOTACTL_DEL: 1179 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1180 break; 1181 case QUOTACTL_CURSOROPEN: 1182 error = do_sys_quotactl_cursoropen(mp, 1183 args->u.cursoropen.qc_cursor); 1184 break; 1185 case QUOTACTL_CURSORCLOSE: 1186 error = do_sys_quotactl_cursorclose(mp, 1187 args->u.cursorclose.qc_cursor); 1188 break; 1189 case QUOTACTL_CURSORSKIPIDTYPE: 1190 error = do_sys_quotactl_cursorskipidtype(mp, 1191 args->u.cursorskipidtype.qc_cursor, 1192 args->u.cursorskipidtype.qc_idtype); 1193 break; 1194 case QUOTACTL_CURSORGET: 1195 error = do_sys_quotactl_cursorget(mp, 1196 args->u.cursorget.qc_cursor, 1197 args->u.cursorget.qc_keys, 1198 args->u.cursorget.qc_vals, 1199 args->u.cursorget.qc_maxnum, 1200 args->u.cursorget.qc_ret); 1201 break; 1202 case QUOTACTL_CURSORATEND: 1203 error = do_sys_quotactl_cursoratend(mp, 1204 args->u.cursoratend.qc_cursor, 1205 args->u.cursoratend.qc_ret); 1206 break; 1207 case QUOTACTL_CURSORREWIND: 1208 error = do_sys_quotactl_cursorrewind(mp, 1209 args->u.cursorrewind.qc_cursor); 1210 break; 1211 case QUOTACTL_QUOTAON: 1212 error = do_sys_quotactl_quotaon(mp, 1213 args->u.quotaon.qc_idtype, 1214 args->u.quotaon.qc_quotafile); 1215 break; 1216 case QUOTACTL_QUOTAOFF: 1217 error = do_sys_quotactl_quotaoff(mp, 1218 args->u.quotaoff.qc_idtype); 1219 break; 1220 default: 1221 error = EINVAL; 1222 break; 1223 } 1224 1225 vrele(vp); 1226 return error; 1227 } 1228 1229 /* ARGSUSED */ 1230 int 1231 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1232 register_t *retval) 1233 { 1234 /* { 1235 syscallarg(const char *) path; 1236 syscallarg(struct quotactl_args *) args; 1237 } */ 1238 struct quotactl_args args; 1239 int error; 1240 1241 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1242 if (error) { 1243 return error; 1244 } 1245 1246 return do_sys_quotactl(SCARG(uap, path), &args); 1247 } 1248 1249 int 1250 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1251 int root) 1252 { 1253 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1254 bool chrooted; 1255 int error = 0; 1256 1257 KASSERT(l == curlwp); 1258 1259 /* 1260 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1261 * since it would imply chroots can be escaped. Just make sure this 1262 * routine is self-consistent. 1263 */ 1264 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1265 1266 /* 1267 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1268 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1269 * overrides MNT_NOWAIT. 1270 */ 1271 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1272 (flags != MNT_WAIT && flags != 0)) { 1273 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1274 } else { 1275 /* Get the filesystem stats now */ 1276 memset(sp, 0, sizeof(*sp)); 1277 if ((error = VFS_STATVFS(mp, sp)) != 0) 1278 return error; 1279 if (!chrooted) 1280 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1281 } 1282 1283 if (chrooted) { 1284 size_t len; 1285 char *bp; 1286 char c; 1287 char *path = PNBUF_GET(); 1288 1289 bp = path + MAXPATHLEN; 1290 *--bp = '\0'; 1291 rw_enter(&cwdi->cwdi_lock, RW_READER); 1292 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1293 MAXPATHLEN / 2, 0, l); 1294 rw_exit(&cwdi->cwdi_lock); 1295 if (error) { 1296 PNBUF_PUT(path); 1297 return error; 1298 } 1299 len = strlen(bp); 1300 if (len != 1) { 1301 /* 1302 * for mount points that are below our root, we can see 1303 * them, so we fix up the pathname and return them. The 1304 * rest we cannot see, so we don't allow viewing the 1305 * data. 1306 */ 1307 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1308 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1309 (void)strlcpy(sp->f_mntonname, 1310 c == '\0' ? "/" : &sp->f_mntonname[len], 1311 sizeof(sp->f_mntonname)); 1312 } else { 1313 if (root) 1314 (void)strlcpy(sp->f_mntonname, "/", 1315 sizeof(sp->f_mntonname)); 1316 else 1317 error = EPERM; 1318 } 1319 } 1320 PNBUF_PUT(path); 1321 } 1322 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1323 return error; 1324 } 1325 1326 /* 1327 * Get filesystem statistics by path. 1328 */ 1329 int 1330 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1331 { 1332 struct mount *mp; 1333 int error; 1334 struct vnode *vp; 1335 1336 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1337 if (error != 0) 1338 return error; 1339 mp = vp->v_mount; 1340 error = dostatvfs(mp, sb, l, flags, 1); 1341 vrele(vp); 1342 return error; 1343 } 1344 1345 /* ARGSUSED */ 1346 int 1347 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1348 { 1349 /* { 1350 syscallarg(const char *) path; 1351 syscallarg(struct statvfs *) buf; 1352 syscallarg(int) flags; 1353 } */ 1354 struct statvfs *sb; 1355 int error; 1356 1357 sb = STATVFSBUF_GET(); 1358 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1359 if (error == 0) 1360 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1361 STATVFSBUF_PUT(sb); 1362 return error; 1363 } 1364 1365 /* 1366 * Get filesystem statistics by fd. 1367 */ 1368 int 1369 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1370 { 1371 file_t *fp; 1372 struct mount *mp; 1373 int error; 1374 1375 /* fd_getvnode() will use the descriptor for us */ 1376 if ((error = fd_getvnode(fd, &fp)) != 0) 1377 return (error); 1378 mp = fp->f_vnode->v_mount; 1379 error = dostatvfs(mp, sb, curlwp, flags, 1); 1380 fd_putfile(fd); 1381 return error; 1382 } 1383 1384 /* ARGSUSED */ 1385 int 1386 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1387 { 1388 /* { 1389 syscallarg(int) fd; 1390 syscallarg(struct statvfs *) buf; 1391 syscallarg(int) flags; 1392 } */ 1393 struct statvfs *sb; 1394 int error; 1395 1396 sb = STATVFSBUF_GET(); 1397 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1398 if (error == 0) 1399 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1400 STATVFSBUF_PUT(sb); 1401 return error; 1402 } 1403 1404 1405 /* 1406 * Get statistics on all filesystems. 1407 */ 1408 int 1409 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1410 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1411 register_t *retval) 1412 { 1413 int root = 0; 1414 mount_iterator_t *iter; 1415 struct proc *p = l->l_proc; 1416 struct mount *mp; 1417 struct statvfs *sb; 1418 size_t count, maxcount; 1419 int error = 0; 1420 1421 sb = STATVFSBUF_GET(); 1422 maxcount = bufsize / entry_sz; 1423 count = 0; 1424 mountlist_iterator_init(&iter); 1425 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1426 if (sfsp && count < maxcount) { 1427 error = dostatvfs(mp, sb, l, flags, 0); 1428 if (error) { 1429 error = 0; 1430 continue; 1431 } 1432 error = copyfn(sb, sfsp, entry_sz); 1433 if (error) 1434 goto out; 1435 sfsp = (char *)sfsp + entry_sz; 1436 root |= strcmp(sb->f_mntonname, "/") == 0; 1437 } 1438 count++; 1439 } 1440 1441 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1442 /* 1443 * fake a root entry 1444 */ 1445 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1446 sb, l, flags, 1); 1447 if (error != 0) 1448 goto out; 1449 if (sfsp) { 1450 error = copyfn(sb, sfsp, entry_sz); 1451 if (error != 0) 1452 goto out; 1453 } 1454 count++; 1455 } 1456 if (sfsp && count > maxcount) 1457 *retval = maxcount; 1458 else 1459 *retval = count; 1460 out: 1461 mountlist_iterator_destroy(iter); 1462 STATVFSBUF_PUT(sb); 1463 return error; 1464 } 1465 1466 int 1467 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1468 register_t *retval) 1469 { 1470 /* { 1471 syscallarg(struct statvfs *) buf; 1472 syscallarg(size_t) bufsize; 1473 syscallarg(int) flags; 1474 } */ 1475 1476 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1477 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1478 } 1479 1480 /* 1481 * Change current working directory to a given file descriptor. 1482 */ 1483 int 1484 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1485 { 1486 struct proc *p = l->l_proc; 1487 struct cwdinfo *cwdi; 1488 struct vnode *vp, *tdp; 1489 struct mount *mp; 1490 file_t *fp; 1491 int error; 1492 1493 /* fd_getvnode() will use the descriptor for us */ 1494 if ((error = fd_getvnode(fd, &fp)) != 0) 1495 return error; 1496 vp = fp->f_vnode; 1497 1498 vref(vp); 1499 vn_lock(vp, LK_SHARED | LK_RETRY); 1500 if (vp->v_type != VDIR) 1501 error = ENOTDIR; 1502 else 1503 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1504 if (error) { 1505 vput(vp); 1506 goto out; 1507 } 1508 while ((mp = vp->v_mountedhere) != NULL) { 1509 error = vfs_busy(mp); 1510 vput(vp); 1511 if (error != 0) 1512 goto out; 1513 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1514 vfs_unbusy(mp); 1515 if (error) 1516 goto out; 1517 vp = tdp; 1518 } 1519 VOP_UNLOCK(vp); 1520 1521 /* 1522 * Disallow changing to a directory not under the process's 1523 * current root directory (if there is one). 1524 */ 1525 cwdi = p->p_cwdi; 1526 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1527 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1528 vrele(vp); 1529 error = EPERM; /* operation not permitted */ 1530 } else { 1531 vrele(cwdi->cwdi_cdir); 1532 cwdi->cwdi_cdir = vp; 1533 } 1534 rw_exit(&cwdi->cwdi_lock); 1535 1536 out: 1537 fd_putfile(fd); 1538 return error; 1539 } 1540 1541 /* 1542 * Change current working directory to a given file descriptor. 1543 */ 1544 /* ARGSUSED */ 1545 int 1546 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1547 { 1548 /* { 1549 syscallarg(int) fd; 1550 } */ 1551 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1552 } 1553 1554 /* 1555 * Change this process's notion of the root directory to a given file 1556 * descriptor. 1557 */ 1558 int 1559 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1560 { 1561 struct vnode *vp; 1562 file_t *fp; 1563 int error, fd = SCARG(uap, fd); 1564 1565 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1566 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1567 return error; 1568 /* fd_getvnode() will use the descriptor for us */ 1569 if ((error = fd_getvnode(fd, &fp)) != 0) 1570 return error; 1571 vp = fp->f_vnode; 1572 vn_lock(vp, LK_SHARED | LK_RETRY); 1573 if (vp->v_type != VDIR) 1574 error = ENOTDIR; 1575 else 1576 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1577 VOP_UNLOCK(vp); 1578 if (error) 1579 goto out; 1580 vref(vp); 1581 change_root(vp); 1582 1583 out: 1584 fd_putfile(fd); 1585 return (error); 1586 } 1587 1588 /* 1589 * Change current working directory (``.''). 1590 */ 1591 int 1592 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1593 register_t *retval) 1594 { 1595 struct proc *p = l->l_proc; 1596 struct cwdinfo * cwdi; 1597 int error; 1598 struct vnode *vp; 1599 1600 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1601 return error; 1602 cwdi = p->p_cwdi; 1603 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1604 vrele(cwdi->cwdi_cdir); 1605 cwdi->cwdi_cdir = vp; 1606 rw_exit(&cwdi->cwdi_lock); 1607 return 0; 1608 } 1609 1610 /* 1611 * Change current working directory (``.''). 1612 */ 1613 /* ARGSUSED */ 1614 int 1615 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1616 { 1617 /* { 1618 syscallarg(const char *) path; 1619 } */ 1620 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1621 } 1622 1623 /* 1624 * Change notion of root (``/'') directory. 1625 */ 1626 /* ARGSUSED */ 1627 int 1628 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1629 { 1630 /* { 1631 syscallarg(const char *) path; 1632 } */ 1633 int error; 1634 struct vnode *vp; 1635 1636 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1637 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1638 return (error); 1639 1640 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1641 if (error == 0) 1642 change_root(vp); 1643 return error; 1644 } 1645 1646 /* 1647 * Common routine for chroot and fchroot. 1648 * NB: callers need to properly authorize the change root operation. 1649 */ 1650 void 1651 change_root(struct vnode *vp) 1652 { 1653 kauth_cred_t ncred; 1654 struct lwp *l = curlwp; 1655 struct proc *p = l->l_proc; 1656 struct cwdinfo *cwdi = p->p_cwdi; 1657 1658 ncred = kauth_cred_alloc(); 1659 1660 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1661 if (cwdi->cwdi_rdir != NULL) 1662 vrele(cwdi->cwdi_rdir); 1663 cwdi->cwdi_rdir = vp; 1664 1665 /* 1666 * Prevent escaping from chroot by putting the root under 1667 * the working directory. Silently chdir to / if we aren't 1668 * already there. 1669 */ 1670 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1671 /* 1672 * XXX would be more failsafe to change directory to a 1673 * deadfs node here instead 1674 */ 1675 vrele(cwdi->cwdi_cdir); 1676 vref(vp); 1677 cwdi->cwdi_cdir = vp; 1678 } 1679 rw_exit(&cwdi->cwdi_lock); 1680 1681 /* Get a write lock on the process credential. */ 1682 proc_crmod_enter(); 1683 1684 kauth_cred_clone(p->p_cred, ncred); 1685 kauth_proc_chroot(ncred, p->p_cwdi); 1686 1687 /* Broadcast our credentials to the process and other LWPs. */ 1688 proc_crmod_leave(ncred, p->p_cred, true); 1689 } 1690 1691 /* 1692 * Common routine for chroot and chdir. 1693 * XXX "where" should be enum uio_seg 1694 */ 1695 int 1696 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1697 { 1698 struct pathbuf *pb; 1699 struct nameidata nd; 1700 int error; 1701 1702 error = pathbuf_maybe_copyin(path, where, &pb); 1703 if (error) { 1704 return error; 1705 } 1706 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1707 if ((error = namei(&nd)) != 0) { 1708 pathbuf_destroy(pb); 1709 return error; 1710 } 1711 *vpp = nd.ni_vp; 1712 pathbuf_destroy(pb); 1713 1714 if ((*vpp)->v_type != VDIR) 1715 error = ENOTDIR; 1716 else 1717 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1718 1719 if (error) 1720 vput(*vpp); 1721 else 1722 VOP_UNLOCK(*vpp); 1723 return (error); 1724 } 1725 1726 /* 1727 * Internals of sys_open - path has already been converted into a pathbuf 1728 * (so we can easily reuse this function from other parts of the kernel, 1729 * like posix_spawn post-processing). 1730 */ 1731 int 1732 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1733 int open_mode, int *fd) 1734 { 1735 struct proc *p = l->l_proc; 1736 struct cwdinfo *cwdi = p->p_cwdi; 1737 file_t *fp; 1738 struct vnode *vp; 1739 int dupfd; 1740 bool dupfd_move; 1741 int flags, cmode; 1742 int indx, error; 1743 1744 if (open_flags & O_SEARCH) { 1745 open_flags &= ~(int)O_SEARCH; 1746 } 1747 1748 /* 1749 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1750 * may be specified. 1751 */ 1752 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1753 return EINVAL; 1754 1755 flags = FFLAGS(open_flags); 1756 if ((flags & (FREAD | FWRITE)) == 0) 1757 return EINVAL; 1758 1759 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1760 return error; 1761 } 1762 1763 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1764 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1765 1766 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1767 &vp, &dupfd_move, &dupfd); 1768 if (error != 0) { 1769 fd_abort(p, fp, indx); 1770 return error; 1771 } 1772 1773 if (vp == NULL) { 1774 fd_abort(p, fp, indx); 1775 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1776 if (error) 1777 return error; 1778 *fd = indx; 1779 } else { 1780 error = open_setfp(l, fp, vp, indx, flags); 1781 if (error) 1782 return error; 1783 VOP_UNLOCK(vp); 1784 *fd = indx; 1785 fd_affix(p, fp, indx); 1786 } 1787 1788 return 0; 1789 } 1790 1791 int 1792 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1793 { 1794 struct pathbuf *pb; 1795 int error, oflags; 1796 1797 oflags = FFLAGS(open_flags); 1798 if ((oflags & (FREAD | FWRITE)) == 0) 1799 return EINVAL; 1800 1801 pb = pathbuf_create(path); 1802 if (pb == NULL) 1803 return ENOMEM; 1804 1805 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1806 pathbuf_destroy(pb); 1807 1808 return error; 1809 } 1810 1811 static int 1812 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1813 int mode, int *fd) 1814 { 1815 file_t *dfp = NULL; 1816 struct vnode *dvp = NULL; 1817 struct pathbuf *pb; 1818 const char *pathstring = NULL; 1819 int error; 1820 1821 if (path == NULL) { 1822 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1823 if (error == ENOSYS) 1824 goto no_compat; 1825 if (error) 1826 return error; 1827 } else { 1828 no_compat: 1829 error = pathbuf_copyin(path, &pb); 1830 if (error) 1831 return error; 1832 } 1833 1834 pathstring = pathbuf_stringcopy_get(pb); 1835 1836 /* 1837 * fdat is ignored if: 1838 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1839 * 2) if path is absolute, then fdat is useless. 1840 */ 1841 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1842 /* fd_getvnode() will use the descriptor for us */ 1843 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1844 goto out; 1845 1846 dvp = dfp->f_vnode; 1847 } 1848 1849 error = do_open(l, dvp, pb, flags, mode, fd); 1850 1851 if (dfp != NULL) 1852 fd_putfile(fdat); 1853 out: 1854 pathbuf_stringcopy_put(pb, pathstring); 1855 pathbuf_destroy(pb); 1856 return error; 1857 } 1858 1859 int 1860 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1861 { 1862 /* { 1863 syscallarg(const char *) path; 1864 syscallarg(int) flags; 1865 syscallarg(int) mode; 1866 } */ 1867 int error; 1868 int fd; 1869 1870 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1871 SCARG(uap, flags), SCARG(uap, mode), &fd); 1872 1873 if (error == 0) 1874 *retval = fd; 1875 1876 return error; 1877 } 1878 1879 int 1880 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1881 { 1882 /* { 1883 syscallarg(int) fd; 1884 syscallarg(const char *) path; 1885 syscallarg(int) oflags; 1886 syscallarg(int) mode; 1887 } */ 1888 int error; 1889 int fd; 1890 1891 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1892 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1893 1894 if (error == 0) 1895 *retval = fd; 1896 1897 return error; 1898 } 1899 1900 static void 1901 vfs__fhfree(fhandle_t *fhp) 1902 { 1903 size_t fhsize; 1904 1905 fhsize = FHANDLE_SIZE(fhp); 1906 kmem_free(fhp, fhsize); 1907 } 1908 1909 /* 1910 * vfs_composefh: compose a filehandle. 1911 */ 1912 1913 int 1914 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1915 { 1916 struct mount *mp; 1917 struct fid *fidp; 1918 int error; 1919 size_t needfhsize; 1920 size_t fidsize; 1921 1922 mp = vp->v_mount; 1923 fidp = NULL; 1924 if (*fh_size < FHANDLE_SIZE_MIN) { 1925 fidsize = 0; 1926 } else { 1927 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1928 if (fhp != NULL) { 1929 memset(fhp, 0, *fh_size); 1930 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1931 fidp = &fhp->fh_fid; 1932 } 1933 } 1934 error = VFS_VPTOFH(vp, fidp, &fidsize); 1935 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1936 if (error == 0 && *fh_size < needfhsize) { 1937 error = E2BIG; 1938 } 1939 *fh_size = needfhsize; 1940 return error; 1941 } 1942 1943 int 1944 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1945 { 1946 struct mount *mp; 1947 fhandle_t *fhp; 1948 size_t fhsize; 1949 size_t fidsize; 1950 int error; 1951 1952 mp = vp->v_mount; 1953 fidsize = 0; 1954 error = VFS_VPTOFH(vp, NULL, &fidsize); 1955 KASSERT(error != 0); 1956 if (error != E2BIG) { 1957 goto out; 1958 } 1959 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1960 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1961 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1962 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1963 if (error == 0) { 1964 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1965 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1966 *fhpp = fhp; 1967 } else { 1968 kmem_free(fhp, fhsize); 1969 } 1970 out: 1971 return error; 1972 } 1973 1974 void 1975 vfs_composefh_free(fhandle_t *fhp) 1976 { 1977 1978 vfs__fhfree(fhp); 1979 } 1980 1981 /* 1982 * vfs_fhtovp: lookup a vnode by a filehandle. 1983 */ 1984 1985 int 1986 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1987 { 1988 struct mount *mp; 1989 int error; 1990 1991 *vpp = NULL; 1992 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1993 if (mp == NULL) { 1994 error = ESTALE; 1995 goto out; 1996 } 1997 if (mp->mnt_op->vfs_fhtovp == NULL) { 1998 error = EOPNOTSUPP; 1999 goto out; 2000 } 2001 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 2002 out: 2003 return error; 2004 } 2005 2006 /* 2007 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 2008 * the needed size. 2009 */ 2010 2011 int 2012 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2013 { 2014 fhandle_t *fhp; 2015 int error; 2016 2017 if (fhsize > FHANDLE_SIZE_MAX) { 2018 return EINVAL; 2019 } 2020 if (fhsize < FHANDLE_SIZE_MIN) { 2021 return EINVAL; 2022 } 2023 again: 2024 fhp = kmem_alloc(fhsize, KM_SLEEP); 2025 error = copyin(ufhp, fhp, fhsize); 2026 if (error == 0) { 2027 /* XXX this check shouldn't be here */ 2028 if (FHANDLE_SIZE(fhp) == fhsize) { 2029 *fhpp = fhp; 2030 return 0; 2031 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2032 /* 2033 * a kludge for nfsv2 padded handles. 2034 */ 2035 size_t sz; 2036 2037 sz = FHANDLE_SIZE(fhp); 2038 kmem_free(fhp, fhsize); 2039 fhsize = sz; 2040 goto again; 2041 } else { 2042 /* 2043 * userland told us wrong size. 2044 */ 2045 error = EINVAL; 2046 } 2047 } 2048 kmem_free(fhp, fhsize); 2049 return error; 2050 } 2051 2052 void 2053 vfs_copyinfh_free(fhandle_t *fhp) 2054 { 2055 2056 vfs__fhfree(fhp); 2057 } 2058 2059 /* 2060 * Get file handle system call 2061 */ 2062 int 2063 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 2064 { 2065 /* { 2066 syscallarg(char *) fname; 2067 syscallarg(fhandle_t *) fhp; 2068 syscallarg(size_t *) fh_size; 2069 } */ 2070 struct vnode *vp; 2071 fhandle_t *fh; 2072 int error; 2073 struct pathbuf *pb; 2074 struct nameidata nd; 2075 size_t sz; 2076 size_t usz; 2077 2078 /* 2079 * Must be super user 2080 */ 2081 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2082 0, NULL, NULL, NULL); 2083 if (error) 2084 return (error); 2085 2086 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2087 if (error) { 2088 return error; 2089 } 2090 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2091 error = namei(&nd); 2092 if (error) { 2093 pathbuf_destroy(pb); 2094 return error; 2095 } 2096 vp = nd.ni_vp; 2097 pathbuf_destroy(pb); 2098 2099 error = vfs_composefh_alloc(vp, &fh); 2100 vput(vp); 2101 if (error != 0) { 2102 return error; 2103 } 2104 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2105 if (error != 0) { 2106 goto out; 2107 } 2108 sz = FHANDLE_SIZE(fh); 2109 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2110 if (error != 0) { 2111 goto out; 2112 } 2113 if (usz >= sz) { 2114 error = copyout(fh, SCARG(uap, fhp), sz); 2115 } else { 2116 error = E2BIG; 2117 } 2118 out: 2119 vfs_composefh_free(fh); 2120 return (error); 2121 } 2122 2123 /* 2124 * Open a file given a file handle. 2125 * 2126 * Check permissions, allocate an open file structure, 2127 * and call the device open routine if any. 2128 */ 2129 2130 int 2131 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2132 register_t *retval) 2133 { 2134 file_t *fp; 2135 struct vnode *vp = NULL; 2136 kauth_cred_t cred = l->l_cred; 2137 file_t *nfp; 2138 int indx, error; 2139 struct vattr va; 2140 fhandle_t *fh; 2141 int flags; 2142 proc_t *p; 2143 2144 p = curproc; 2145 2146 /* 2147 * Must be super user 2148 */ 2149 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2150 0, NULL, NULL, NULL))) 2151 return (error); 2152 2153 if (oflags & O_SEARCH) { 2154 oflags &= ~(int)O_SEARCH; 2155 } 2156 2157 flags = FFLAGS(oflags); 2158 if ((flags & (FREAD | FWRITE)) == 0) 2159 return (EINVAL); 2160 if ((flags & O_CREAT)) 2161 return (EINVAL); 2162 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2163 return (error); 2164 fp = nfp; 2165 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2166 if (error != 0) { 2167 goto bad; 2168 } 2169 error = vfs_fhtovp(fh, &vp); 2170 vfs_copyinfh_free(fh); 2171 if (error != 0) { 2172 goto bad; 2173 } 2174 2175 /* Now do an effective vn_open */ 2176 2177 if (vp->v_type == VSOCK) { 2178 error = EOPNOTSUPP; 2179 goto bad; 2180 } 2181 error = vn_openchk(vp, cred, flags); 2182 if (error != 0) 2183 goto bad; 2184 if (flags & O_TRUNC) { 2185 VOP_UNLOCK(vp); /* XXX */ 2186 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2187 vattr_null(&va); 2188 va.va_size = 0; 2189 error = VOP_SETATTR(vp, &va, cred); 2190 if (error) 2191 goto bad; 2192 } 2193 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2194 goto bad; 2195 if (flags & FWRITE) { 2196 mutex_enter(vp->v_interlock); 2197 vp->v_writecount++; 2198 mutex_exit(vp->v_interlock); 2199 } 2200 2201 /* done with modified vn_open, now finish what sys_open does. */ 2202 if ((error = open_setfp(l, fp, vp, indx, flags))) 2203 return error; 2204 2205 VOP_UNLOCK(vp); 2206 *retval = indx; 2207 fd_affix(p, fp, indx); 2208 return (0); 2209 2210 bad: 2211 fd_abort(p, fp, indx); 2212 if (vp != NULL) 2213 vput(vp); 2214 if (error == EDUPFD || error == EMOVEFD) { 2215 /* XXX should probably close curlwp->l_dupfd */ 2216 error = EOPNOTSUPP; 2217 } 2218 return (error); 2219 } 2220 2221 int 2222 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2223 { 2224 /* { 2225 syscallarg(const void *) fhp; 2226 syscallarg(size_t) fh_size; 2227 syscallarg(int) flags; 2228 } */ 2229 2230 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2231 SCARG(uap, flags), retval); 2232 } 2233 2234 int 2235 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2236 { 2237 int error; 2238 fhandle_t *fh; 2239 struct vnode *vp; 2240 2241 /* 2242 * Must be super user 2243 */ 2244 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2245 0, NULL, NULL, NULL))) 2246 return (error); 2247 2248 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2249 if (error != 0) 2250 return error; 2251 2252 error = vfs_fhtovp(fh, &vp); 2253 vfs_copyinfh_free(fh); 2254 if (error != 0) 2255 return error; 2256 2257 error = vn_stat(vp, sb); 2258 vput(vp); 2259 return error; 2260 } 2261 2262 2263 /* ARGSUSED */ 2264 int 2265 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2266 { 2267 /* { 2268 syscallarg(const void *) fhp; 2269 syscallarg(size_t) fh_size; 2270 syscallarg(struct stat *) sb; 2271 } */ 2272 struct stat sb; 2273 int error; 2274 2275 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2276 if (error) 2277 return error; 2278 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2279 } 2280 2281 int 2282 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2283 int flags) 2284 { 2285 fhandle_t *fh; 2286 struct mount *mp; 2287 struct vnode *vp; 2288 int error; 2289 2290 /* 2291 * Must be super user 2292 */ 2293 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2294 0, NULL, NULL, NULL))) 2295 return error; 2296 2297 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2298 if (error != 0) 2299 return error; 2300 2301 error = vfs_fhtovp(fh, &vp); 2302 vfs_copyinfh_free(fh); 2303 if (error != 0) 2304 return error; 2305 2306 mp = vp->v_mount; 2307 error = dostatvfs(mp, sb, l, flags, 1); 2308 vput(vp); 2309 return error; 2310 } 2311 2312 /* ARGSUSED */ 2313 int 2314 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2315 { 2316 /* { 2317 syscallarg(const void *) fhp; 2318 syscallarg(size_t) fh_size; 2319 syscallarg(struct statvfs *) buf; 2320 syscallarg(int) flags; 2321 } */ 2322 struct statvfs *sb = STATVFSBUF_GET(); 2323 int error; 2324 2325 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2326 SCARG(uap, flags)); 2327 if (error == 0) 2328 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2329 STATVFSBUF_PUT(sb); 2330 return error; 2331 } 2332 2333 int 2334 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2335 dev_t dev) 2336 { 2337 2338 /* 2339 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2340 * in mode and dev=0. 2341 * 2342 * In all the other cases it's implementation defined behavior. 2343 */ 2344 2345 if ((mode & S_IFIFO) && dev == 0) 2346 return do_sys_mkfifoat(l, fdat, pathname, mode); 2347 else 2348 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2349 UIO_USERSPACE); 2350 } 2351 2352 /* 2353 * Create a special file. 2354 */ 2355 /* ARGSUSED */ 2356 int 2357 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2358 register_t *retval) 2359 { 2360 /* { 2361 syscallarg(const char *) path; 2362 syscallarg(mode_t) mode; 2363 syscallarg(dev_t) dev; 2364 } */ 2365 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2366 SCARG(uap, mode), SCARG(uap, dev)); 2367 } 2368 2369 int 2370 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2371 register_t *retval) 2372 { 2373 /* { 2374 syscallarg(int) fd; 2375 syscallarg(const char *) path; 2376 syscallarg(mode_t) mode; 2377 syscallarg(int) pad; 2378 syscallarg(dev_t) dev; 2379 } */ 2380 2381 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2382 SCARG(uap, mode), SCARG(uap, dev)); 2383 } 2384 2385 int 2386 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2387 enum uio_seg seg) 2388 { 2389 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2390 } 2391 2392 int 2393 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2394 dev_t dev, enum uio_seg seg) 2395 { 2396 struct proc *p = l->l_proc; 2397 struct vnode *vp; 2398 struct vattr vattr; 2399 int error, optype; 2400 struct pathbuf *pb; 2401 struct nameidata nd; 2402 const char *pathstring; 2403 2404 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2405 0, NULL, NULL, NULL)) != 0) 2406 return (error); 2407 2408 optype = VOP_MKNOD_DESCOFFSET; 2409 2410 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2411 if (error) { 2412 return error; 2413 } 2414 pathstring = pathbuf_stringcopy_get(pb); 2415 if (pathstring == NULL) { 2416 pathbuf_destroy(pb); 2417 return ENOMEM; 2418 } 2419 2420 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2421 2422 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2423 goto out; 2424 vp = nd.ni_vp; 2425 2426 if (vp != NULL) 2427 error = EEXIST; 2428 else { 2429 vattr_null(&vattr); 2430 /* We will read cwdi->cwdi_cmask unlocked. */ 2431 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2432 vattr.va_rdev = dev; 2433 2434 switch (mode & S_IFMT) { 2435 case S_IFMT: /* used by badsect to flag bad sectors */ 2436 vattr.va_type = VBAD; 2437 break; 2438 case S_IFCHR: 2439 vattr.va_type = VCHR; 2440 break; 2441 case S_IFBLK: 2442 vattr.va_type = VBLK; 2443 break; 2444 case S_IFWHT: 2445 optype = VOP_WHITEOUT_DESCOFFSET; 2446 break; 2447 case S_IFREG: 2448 #if NVERIEXEC > 0 2449 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2450 O_CREAT); 2451 #endif /* NVERIEXEC > 0 */ 2452 vattr.va_type = VREG; 2453 vattr.va_rdev = VNOVAL; 2454 optype = VOP_CREATE_DESCOFFSET; 2455 break; 2456 default: 2457 error = EINVAL; 2458 break; 2459 } 2460 2461 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2462 vattr.va_rdev == VNOVAL) 2463 error = EINVAL; 2464 } 2465 2466 if (!error) { 2467 switch (optype) { 2468 case VOP_WHITEOUT_DESCOFFSET: 2469 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2470 if (error) 2471 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2472 vput(nd.ni_dvp); 2473 break; 2474 2475 case VOP_MKNOD_DESCOFFSET: 2476 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2477 &nd.ni_cnd, &vattr); 2478 if (error == 0) 2479 vrele(nd.ni_vp); 2480 vput(nd.ni_dvp); 2481 break; 2482 2483 case VOP_CREATE_DESCOFFSET: 2484 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2485 &nd.ni_cnd, &vattr); 2486 if (error == 0) 2487 vrele(nd.ni_vp); 2488 vput(nd.ni_dvp); 2489 break; 2490 } 2491 } else { 2492 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2493 if (nd.ni_dvp == vp) 2494 vrele(nd.ni_dvp); 2495 else 2496 vput(nd.ni_dvp); 2497 if (vp) 2498 vrele(vp); 2499 } 2500 out: 2501 pathbuf_stringcopy_put(pb, pathstring); 2502 pathbuf_destroy(pb); 2503 return (error); 2504 } 2505 2506 /* 2507 * Create a named pipe. 2508 */ 2509 /* ARGSUSED */ 2510 int 2511 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2512 { 2513 /* { 2514 syscallarg(const char *) path; 2515 syscallarg(int) mode; 2516 } */ 2517 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2518 } 2519 2520 int 2521 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2522 register_t *retval) 2523 { 2524 /* { 2525 syscallarg(int) fd; 2526 syscallarg(const char *) path; 2527 syscallarg(int) mode; 2528 } */ 2529 2530 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2531 SCARG(uap, mode)); 2532 } 2533 2534 static int 2535 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2536 { 2537 struct proc *p = l->l_proc; 2538 struct vattr vattr; 2539 int error; 2540 struct pathbuf *pb; 2541 struct nameidata nd; 2542 2543 error = pathbuf_copyin(path, &pb); 2544 if (error) { 2545 return error; 2546 } 2547 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2548 2549 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2550 pathbuf_destroy(pb); 2551 return error; 2552 } 2553 if (nd.ni_vp != NULL) { 2554 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2555 if (nd.ni_dvp == nd.ni_vp) 2556 vrele(nd.ni_dvp); 2557 else 2558 vput(nd.ni_dvp); 2559 vrele(nd.ni_vp); 2560 pathbuf_destroy(pb); 2561 return (EEXIST); 2562 } 2563 vattr_null(&vattr); 2564 vattr.va_type = VFIFO; 2565 /* We will read cwdi->cwdi_cmask unlocked. */ 2566 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2567 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2568 if (error == 0) 2569 vrele(nd.ni_vp); 2570 vput(nd.ni_dvp); 2571 pathbuf_destroy(pb); 2572 return (error); 2573 } 2574 2575 /* 2576 * Make a hard file link. 2577 */ 2578 /* ARGSUSED */ 2579 int 2580 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2581 const char *link, int follow, register_t *retval) 2582 { 2583 struct vnode *vp; 2584 struct pathbuf *linkpb; 2585 struct nameidata nd; 2586 namei_simple_flags_t ns_flags; 2587 int error; 2588 2589 if (follow & AT_SYMLINK_FOLLOW) 2590 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2591 else 2592 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2593 2594 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2595 if (error != 0) 2596 return (error); 2597 error = pathbuf_copyin(link, &linkpb); 2598 if (error) { 2599 goto out1; 2600 } 2601 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2602 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2603 goto out2; 2604 if (nd.ni_vp) { 2605 error = EEXIST; 2606 goto abortop; 2607 } 2608 /* Prevent hard links on directories. */ 2609 if (vp->v_type == VDIR) { 2610 error = EPERM; 2611 goto abortop; 2612 } 2613 /* Prevent cross-mount operation. */ 2614 if (nd.ni_dvp->v_mount != vp->v_mount) { 2615 error = EXDEV; 2616 goto abortop; 2617 } 2618 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2619 VOP_UNLOCK(nd.ni_dvp); 2620 vrele(nd.ni_dvp); 2621 out2: 2622 pathbuf_destroy(linkpb); 2623 out1: 2624 vrele(vp); 2625 return (error); 2626 abortop: 2627 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2628 if (nd.ni_dvp == nd.ni_vp) 2629 vrele(nd.ni_dvp); 2630 else 2631 vput(nd.ni_dvp); 2632 if (nd.ni_vp != NULL) 2633 vrele(nd.ni_vp); 2634 goto out2; 2635 } 2636 2637 int 2638 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2639 { 2640 /* { 2641 syscallarg(const char *) path; 2642 syscallarg(const char *) link; 2643 } */ 2644 const char *path = SCARG(uap, path); 2645 const char *link = SCARG(uap, link); 2646 2647 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2648 AT_SYMLINK_FOLLOW, retval); 2649 } 2650 2651 int 2652 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2653 register_t *retval) 2654 { 2655 /* { 2656 syscallarg(int) fd1; 2657 syscallarg(const char *) name1; 2658 syscallarg(int) fd2; 2659 syscallarg(const char *) name2; 2660 syscallarg(int) flags; 2661 } */ 2662 int fd1 = SCARG(uap, fd1); 2663 const char *name1 = SCARG(uap, name1); 2664 int fd2 = SCARG(uap, fd2); 2665 const char *name2 = SCARG(uap, name2); 2666 int follow; 2667 2668 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2669 2670 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2671 } 2672 2673 2674 int 2675 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2676 { 2677 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2678 } 2679 2680 static int 2681 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2682 const char *link, enum uio_seg seg) 2683 { 2684 struct proc *p = curproc; 2685 struct vattr vattr; 2686 char *path; 2687 int error; 2688 size_t len; 2689 struct pathbuf *linkpb; 2690 struct nameidata nd; 2691 2692 KASSERT(l != NULL || fdat == AT_FDCWD); 2693 2694 path = PNBUF_GET(); 2695 if (seg == UIO_USERSPACE) { 2696 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2697 goto out1; 2698 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2699 goto out1; 2700 } else { 2701 len = strlen(patharg) + 1; 2702 KASSERT(len <= MAXPATHLEN); 2703 memcpy(path, patharg, len); 2704 linkpb = pathbuf_create(link); 2705 if (linkpb == NULL) { 2706 error = ENOMEM; 2707 goto out1; 2708 } 2709 } 2710 ktrkuser("symlink-target", path, len - 1); 2711 2712 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2713 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2714 goto out2; 2715 if (nd.ni_vp) { 2716 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2717 if (nd.ni_dvp == nd.ni_vp) 2718 vrele(nd.ni_dvp); 2719 else 2720 vput(nd.ni_dvp); 2721 vrele(nd.ni_vp); 2722 error = EEXIST; 2723 goto out2; 2724 } 2725 vattr_null(&vattr); 2726 vattr.va_type = VLNK; 2727 /* We will read cwdi->cwdi_cmask unlocked. */ 2728 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2729 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2730 if (error == 0) 2731 vrele(nd.ni_vp); 2732 vput(nd.ni_dvp); 2733 out2: 2734 pathbuf_destroy(linkpb); 2735 out1: 2736 PNBUF_PUT(path); 2737 return (error); 2738 } 2739 2740 /* 2741 * Make a symbolic link. 2742 */ 2743 /* ARGSUSED */ 2744 int 2745 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2746 { 2747 /* { 2748 syscallarg(const char *) path; 2749 syscallarg(const char *) link; 2750 } */ 2751 2752 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2753 UIO_USERSPACE); 2754 } 2755 2756 int 2757 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2758 register_t *retval) 2759 { 2760 /* { 2761 syscallarg(const char *) path1; 2762 syscallarg(int) fd; 2763 syscallarg(const char *) path2; 2764 } */ 2765 2766 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2767 SCARG(uap, path2), UIO_USERSPACE); 2768 } 2769 2770 /* 2771 * Delete a whiteout from the filesystem. 2772 */ 2773 /* ARGSUSED */ 2774 int 2775 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2776 { 2777 /* { 2778 syscallarg(const char *) path; 2779 } */ 2780 int error; 2781 struct pathbuf *pb; 2782 struct nameidata nd; 2783 2784 error = pathbuf_copyin(SCARG(uap, path), &pb); 2785 if (error) { 2786 return error; 2787 } 2788 2789 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2790 error = namei(&nd); 2791 if (error) { 2792 pathbuf_destroy(pb); 2793 return (error); 2794 } 2795 2796 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2797 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2798 if (nd.ni_dvp == nd.ni_vp) 2799 vrele(nd.ni_dvp); 2800 else 2801 vput(nd.ni_dvp); 2802 if (nd.ni_vp) 2803 vrele(nd.ni_vp); 2804 pathbuf_destroy(pb); 2805 return (EEXIST); 2806 } 2807 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2808 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2809 vput(nd.ni_dvp); 2810 pathbuf_destroy(pb); 2811 return (error); 2812 } 2813 2814 /* 2815 * Delete a name from the filesystem. 2816 */ 2817 /* ARGSUSED */ 2818 int 2819 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2820 { 2821 /* { 2822 syscallarg(const char *) path; 2823 } */ 2824 2825 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2826 } 2827 2828 int 2829 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2830 register_t *retval) 2831 { 2832 /* { 2833 syscallarg(int) fd; 2834 syscallarg(const char *) path; 2835 syscallarg(int) flag; 2836 } */ 2837 2838 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2839 SCARG(uap, flag), UIO_USERSPACE); 2840 } 2841 2842 int 2843 do_sys_unlink(const char *arg, enum uio_seg seg) 2844 { 2845 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2846 } 2847 2848 static int 2849 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2850 enum uio_seg seg) 2851 { 2852 struct vnode *vp; 2853 int error; 2854 struct pathbuf *pb; 2855 struct nameidata nd; 2856 const char *pathstring; 2857 2858 KASSERT(l != NULL || fdat == AT_FDCWD); 2859 2860 error = pathbuf_maybe_copyin(arg, seg, &pb); 2861 if (error) { 2862 return error; 2863 } 2864 pathstring = pathbuf_stringcopy_get(pb); 2865 if (pathstring == NULL) { 2866 pathbuf_destroy(pb); 2867 return ENOMEM; 2868 } 2869 2870 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2871 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2872 goto out; 2873 vp = nd.ni_vp; 2874 2875 /* 2876 * The root of a mounted filesystem cannot be deleted. 2877 */ 2878 if ((vp->v_vflag & VV_ROOT) != 0) { 2879 error = EBUSY; 2880 goto abort; 2881 } 2882 2883 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2884 error = EBUSY; 2885 goto abort; 2886 } 2887 2888 /* 2889 * No rmdir "." please. 2890 */ 2891 if (nd.ni_dvp == vp) { 2892 error = EINVAL; 2893 goto abort; 2894 } 2895 2896 /* 2897 * AT_REMOVEDIR is required to remove a directory 2898 */ 2899 if (vp->v_type == VDIR) { 2900 if (!(flags & AT_REMOVEDIR)) { 2901 error = EPERM; 2902 goto abort; 2903 } else { 2904 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2905 vput(nd.ni_dvp); 2906 goto out; 2907 } 2908 } 2909 2910 /* 2911 * Starting here we only deal with non directories. 2912 */ 2913 if (flags & AT_REMOVEDIR) { 2914 error = ENOTDIR; 2915 goto abort; 2916 } 2917 2918 #if NVERIEXEC > 0 2919 /* Handle remove requests for veriexec entries. */ 2920 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2921 goto abort; 2922 } 2923 #endif /* NVERIEXEC > 0 */ 2924 2925 #ifdef FILEASSOC 2926 (void)fileassoc_file_delete(vp); 2927 #endif /* FILEASSOC */ 2928 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2929 vput(nd.ni_dvp); 2930 goto out; 2931 2932 abort: 2933 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2934 if (nd.ni_dvp == vp) 2935 vrele(nd.ni_dvp); 2936 else 2937 vput(nd.ni_dvp); 2938 vput(vp); 2939 2940 out: 2941 pathbuf_stringcopy_put(pb, pathstring); 2942 pathbuf_destroy(pb); 2943 return (error); 2944 } 2945 2946 /* 2947 * Reposition read/write file offset. 2948 */ 2949 int 2950 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2951 { 2952 /* { 2953 syscallarg(int) fd; 2954 syscallarg(int) pad; 2955 syscallarg(off_t) offset; 2956 syscallarg(int) whence; 2957 } */ 2958 file_t *fp; 2959 int error, fd; 2960 2961 switch (SCARG(uap, whence)) { 2962 case SEEK_CUR: 2963 case SEEK_END: 2964 case SEEK_SET: 2965 break; 2966 default: 2967 return EINVAL; 2968 } 2969 2970 fd = SCARG(uap, fd); 2971 2972 if ((fp = fd_getfile(fd)) == NULL) 2973 return (EBADF); 2974 2975 if (fp->f_ops->fo_seek == NULL) { 2976 error = ESPIPE; 2977 goto out; 2978 } 2979 2980 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 2981 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 2982 out: 2983 fd_putfile(fd); 2984 return (error); 2985 } 2986 2987 /* 2988 * Positional read system call. 2989 */ 2990 int 2991 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2992 { 2993 /* { 2994 syscallarg(int) fd; 2995 syscallarg(void *) buf; 2996 syscallarg(size_t) nbyte; 2997 syscallarg(off_t) offset; 2998 } */ 2999 file_t *fp; 3000 off_t offset; 3001 int error, fd = SCARG(uap, fd); 3002 3003 if ((fp = fd_getfile(fd)) == NULL) 3004 return (EBADF); 3005 3006 if ((fp->f_flag & FREAD) == 0) { 3007 fd_putfile(fd); 3008 return (EBADF); 3009 } 3010 3011 if (fp->f_ops->fo_seek == NULL) { 3012 error = ESPIPE; 3013 goto out; 3014 } 3015 3016 offset = SCARG(uap, offset); 3017 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3018 if (error) 3019 goto out; 3020 3021 /* dofileread() will unuse the descriptor for us */ 3022 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3023 &offset, 0, retval)); 3024 3025 out: 3026 fd_putfile(fd); 3027 return (error); 3028 } 3029 3030 /* 3031 * Positional scatter read system call. 3032 */ 3033 int 3034 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 3035 { 3036 /* { 3037 syscallarg(int) fd; 3038 syscallarg(const struct iovec *) iovp; 3039 syscallarg(int) iovcnt; 3040 syscallarg(off_t) offset; 3041 } */ 3042 off_t offset = SCARG(uap, offset); 3043 3044 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3045 SCARG(uap, iovcnt), &offset, 0, retval); 3046 } 3047 3048 /* 3049 * Positional write system call. 3050 */ 3051 int 3052 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 3053 { 3054 /* { 3055 syscallarg(int) fd; 3056 syscallarg(const void *) buf; 3057 syscallarg(size_t) nbyte; 3058 syscallarg(off_t) offset; 3059 } */ 3060 file_t *fp; 3061 off_t offset; 3062 int error, fd = SCARG(uap, fd); 3063 3064 if ((fp = fd_getfile(fd)) == NULL) 3065 return (EBADF); 3066 3067 if ((fp->f_flag & FWRITE) == 0) { 3068 fd_putfile(fd); 3069 return (EBADF); 3070 } 3071 3072 if (fp->f_ops->fo_seek == NULL) { 3073 error = ESPIPE; 3074 goto out; 3075 } 3076 3077 offset = SCARG(uap, offset); 3078 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3079 if (error) 3080 goto out; 3081 3082 /* dofilewrite() will unuse the descriptor for us */ 3083 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3084 &offset, 0, retval)); 3085 3086 out: 3087 fd_putfile(fd); 3088 return (error); 3089 } 3090 3091 /* 3092 * Positional gather write system call. 3093 */ 3094 int 3095 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3096 { 3097 /* { 3098 syscallarg(int) fd; 3099 syscallarg(const struct iovec *) iovp; 3100 syscallarg(int) iovcnt; 3101 syscallarg(off_t) offset; 3102 } */ 3103 off_t offset = SCARG(uap, offset); 3104 3105 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3106 SCARG(uap, iovcnt), &offset, 0, retval); 3107 } 3108 3109 /* 3110 * Check access permissions. 3111 */ 3112 int 3113 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3114 { 3115 /* { 3116 syscallarg(const char *) path; 3117 syscallarg(int) flags; 3118 } */ 3119 3120 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3121 SCARG(uap, flags), 0); 3122 } 3123 3124 int 3125 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3126 int mode, int flags) 3127 { 3128 kauth_cred_t cred; 3129 struct vnode *vp; 3130 int error, nd_flag, vmode; 3131 struct pathbuf *pb; 3132 struct nameidata nd; 3133 3134 CTASSERT(F_OK == 0); 3135 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3136 /* nonsense mode */ 3137 return EINVAL; 3138 } 3139 3140 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3141 if (flags & AT_SYMLINK_NOFOLLOW) 3142 nd_flag &= ~FOLLOW; 3143 3144 error = pathbuf_copyin(path, &pb); 3145 if (error) 3146 return error; 3147 3148 NDINIT(&nd, LOOKUP, nd_flag, pb); 3149 3150 /* Override default credentials */ 3151 if (!(flags & AT_EACCESS)) { 3152 cred = kauth_cred_dup(l->l_cred); 3153 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3154 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3155 } else 3156 cred = l->l_cred; 3157 nd.ni_cnd.cn_cred = cred; 3158 3159 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3160 pathbuf_destroy(pb); 3161 goto out; 3162 } 3163 vp = nd.ni_vp; 3164 pathbuf_destroy(pb); 3165 3166 /* Flags == 0 means only check for existence. */ 3167 if (mode) { 3168 vmode = 0; 3169 if (mode & R_OK) 3170 vmode |= VREAD; 3171 if (mode & W_OK) 3172 vmode |= VWRITE; 3173 if (mode & X_OK) 3174 vmode |= VEXEC; 3175 3176 error = VOP_ACCESS(vp, vmode, cred); 3177 if (!error && (vmode & VWRITE)) 3178 error = vn_writechk(vp); 3179 } 3180 vput(vp); 3181 out: 3182 if (!(flags & AT_EACCESS)) 3183 kauth_cred_free(cred); 3184 return (error); 3185 } 3186 3187 int 3188 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3189 register_t *retval) 3190 { 3191 /* { 3192 syscallarg(int) fd; 3193 syscallarg(const char *) path; 3194 syscallarg(int) amode; 3195 syscallarg(int) flag; 3196 } */ 3197 3198 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3199 SCARG(uap, amode), SCARG(uap, flag)); 3200 } 3201 3202 /* 3203 * Common code for all sys_stat functions, including compat versions. 3204 */ 3205 int 3206 do_sys_stat(const char *userpath, unsigned int nd_flag, 3207 struct stat *sb) 3208 { 3209 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3210 } 3211 3212 int 3213 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3214 unsigned int nd_flag, struct stat *sb) 3215 { 3216 int error; 3217 struct pathbuf *pb; 3218 struct nameidata nd; 3219 3220 KASSERT(l != NULL || fdat == AT_FDCWD); 3221 3222 error = pathbuf_copyin(userpath, &pb); 3223 if (error) { 3224 return error; 3225 } 3226 3227 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3228 3229 error = fd_nameiat(l, fdat, &nd); 3230 if (error != 0) { 3231 pathbuf_destroy(pb); 3232 return error; 3233 } 3234 error = vn_stat(nd.ni_vp, sb); 3235 vput(nd.ni_vp); 3236 pathbuf_destroy(pb); 3237 return error; 3238 } 3239 3240 /* 3241 * Get file status; this version follows links. 3242 */ 3243 /* ARGSUSED */ 3244 int 3245 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3246 { 3247 /* { 3248 syscallarg(const char *) path; 3249 syscallarg(struct stat *) ub; 3250 } */ 3251 struct stat sb; 3252 int error; 3253 3254 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3255 if (error) 3256 return error; 3257 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3258 } 3259 3260 /* 3261 * Get file status; this version does not follow links. 3262 */ 3263 /* ARGSUSED */ 3264 int 3265 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3266 { 3267 /* { 3268 syscallarg(const char *) path; 3269 syscallarg(struct stat *) ub; 3270 } */ 3271 struct stat sb; 3272 int error; 3273 3274 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3275 if (error) 3276 return error; 3277 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3278 } 3279 3280 int 3281 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3282 register_t *retval) 3283 { 3284 /* { 3285 syscallarg(int) fd; 3286 syscallarg(const char *) path; 3287 syscallarg(struct stat *) buf; 3288 syscallarg(int) flag; 3289 } */ 3290 unsigned int nd_flag; 3291 struct stat sb; 3292 int error; 3293 3294 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3295 nd_flag = NOFOLLOW; 3296 else 3297 nd_flag = FOLLOW; 3298 3299 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3300 &sb); 3301 if (error) 3302 return error; 3303 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3304 } 3305 3306 static int 3307 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3308 { 3309 int error; 3310 struct pathbuf *pb; 3311 struct nameidata nd; 3312 3313 error = pathbuf_copyin(path, &pb); 3314 if (error) { 3315 return error; 3316 } 3317 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3318 if ((error = namei(&nd)) != 0) { 3319 pathbuf_destroy(pb); 3320 return error; 3321 } 3322 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3323 vput(nd.ni_vp); 3324 pathbuf_destroy(pb); 3325 return error; 3326 } 3327 3328 /* 3329 * Get configurable pathname variables. 3330 */ 3331 /* ARGSUSED */ 3332 int 3333 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3334 register_t *retval) 3335 { 3336 /* { 3337 syscallarg(const char *) path; 3338 syscallarg(int) name; 3339 } */ 3340 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3341 FOLLOW); 3342 } 3343 3344 /* ARGSUSED */ 3345 int 3346 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3347 register_t *retval) 3348 { 3349 /* { 3350 syscallarg(const char *) path; 3351 syscallarg(int) name; 3352 } */ 3353 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3354 NOFOLLOW); 3355 } 3356 3357 /* 3358 * Return target name of a symbolic link. 3359 */ 3360 /* ARGSUSED */ 3361 int 3362 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3363 register_t *retval) 3364 { 3365 /* { 3366 syscallarg(const char *) path; 3367 syscallarg(char *) buf; 3368 syscallarg(size_t) count; 3369 } */ 3370 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3371 SCARG(uap, buf), SCARG(uap, count), retval); 3372 } 3373 3374 static int 3375 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3376 size_t count, register_t *retval) 3377 { 3378 struct vnode *vp; 3379 struct iovec aiov; 3380 struct uio auio; 3381 int error; 3382 struct pathbuf *pb; 3383 struct nameidata nd; 3384 3385 error = pathbuf_copyin(path, &pb); 3386 if (error) { 3387 return error; 3388 } 3389 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3390 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3391 pathbuf_destroy(pb); 3392 return error; 3393 } 3394 vp = nd.ni_vp; 3395 pathbuf_destroy(pb); 3396 if (vp->v_type != VLNK) 3397 error = EINVAL; 3398 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3399 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3400 aiov.iov_base = buf; 3401 aiov.iov_len = count; 3402 auio.uio_iov = &aiov; 3403 auio.uio_iovcnt = 1; 3404 auio.uio_offset = 0; 3405 auio.uio_rw = UIO_READ; 3406 KASSERT(l == curlwp); 3407 auio.uio_vmspace = l->l_proc->p_vmspace; 3408 auio.uio_resid = count; 3409 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3410 *retval = count - auio.uio_resid; 3411 } 3412 vput(vp); 3413 return (error); 3414 } 3415 3416 int 3417 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3418 register_t *retval) 3419 { 3420 /* { 3421 syscallarg(int) fd; 3422 syscallarg(const char *) path; 3423 syscallarg(char *) buf; 3424 syscallarg(size_t) bufsize; 3425 } */ 3426 3427 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3428 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3429 } 3430 3431 /* 3432 * Change flags of a file given a path name. 3433 */ 3434 /* ARGSUSED */ 3435 int 3436 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3437 { 3438 /* { 3439 syscallarg(const char *) path; 3440 syscallarg(u_long) flags; 3441 } */ 3442 struct vnode *vp; 3443 int error; 3444 3445 error = namei_simple_user(SCARG(uap, path), 3446 NSM_FOLLOW_TRYEMULROOT, &vp); 3447 if (error != 0) 3448 return (error); 3449 error = change_flags(vp, SCARG(uap, flags), l); 3450 vput(vp); 3451 return (error); 3452 } 3453 3454 /* 3455 * Change flags of a file given a file descriptor. 3456 */ 3457 /* ARGSUSED */ 3458 int 3459 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3460 { 3461 /* { 3462 syscallarg(int) fd; 3463 syscallarg(u_long) flags; 3464 } */ 3465 struct vnode *vp; 3466 file_t *fp; 3467 int error; 3468 3469 /* fd_getvnode() will use the descriptor for us */ 3470 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3471 return (error); 3472 vp = fp->f_vnode; 3473 error = change_flags(vp, SCARG(uap, flags), l); 3474 VOP_UNLOCK(vp); 3475 fd_putfile(SCARG(uap, fd)); 3476 return (error); 3477 } 3478 3479 /* 3480 * Change flags of a file given a path name; this version does 3481 * not follow links. 3482 */ 3483 int 3484 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3485 { 3486 /* { 3487 syscallarg(const char *) path; 3488 syscallarg(u_long) flags; 3489 } */ 3490 struct vnode *vp; 3491 int error; 3492 3493 error = namei_simple_user(SCARG(uap, path), 3494 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3495 if (error != 0) 3496 return (error); 3497 error = change_flags(vp, SCARG(uap, flags), l); 3498 vput(vp); 3499 return (error); 3500 } 3501 3502 /* 3503 * Common routine to change flags of a file. 3504 */ 3505 int 3506 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3507 { 3508 struct vattr vattr; 3509 int error; 3510 3511 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3512 3513 vattr_null(&vattr); 3514 vattr.va_flags = flags; 3515 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3516 3517 return (error); 3518 } 3519 3520 /* 3521 * Change mode of a file given path name; this version follows links. 3522 */ 3523 /* ARGSUSED */ 3524 int 3525 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3526 { 3527 /* { 3528 syscallarg(const char *) path; 3529 syscallarg(int) mode; 3530 } */ 3531 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3532 SCARG(uap, mode), 0); 3533 } 3534 3535 int 3536 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3537 { 3538 int error; 3539 struct vnode *vp; 3540 namei_simple_flags_t ns_flag; 3541 3542 if (flags & AT_SYMLINK_NOFOLLOW) 3543 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3544 else 3545 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3546 3547 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3548 if (error != 0) 3549 return error; 3550 3551 error = change_mode(vp, mode, l); 3552 3553 vrele(vp); 3554 3555 return (error); 3556 } 3557 3558 /* 3559 * Change mode of a file given a file descriptor. 3560 */ 3561 /* ARGSUSED */ 3562 int 3563 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3564 { 3565 /* { 3566 syscallarg(int) fd; 3567 syscallarg(int) mode; 3568 } */ 3569 file_t *fp; 3570 int error; 3571 3572 /* fd_getvnode() will use the descriptor for us */ 3573 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3574 return (error); 3575 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3576 fd_putfile(SCARG(uap, fd)); 3577 return (error); 3578 } 3579 3580 int 3581 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3582 register_t *retval) 3583 { 3584 /* { 3585 syscallarg(int) fd; 3586 syscallarg(const char *) path; 3587 syscallarg(int) mode; 3588 syscallarg(int) flag; 3589 } */ 3590 3591 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3592 SCARG(uap, mode), SCARG(uap, flag)); 3593 } 3594 3595 /* 3596 * Change mode of a file given path name; this version does not follow links. 3597 */ 3598 /* ARGSUSED */ 3599 int 3600 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3601 { 3602 /* { 3603 syscallarg(const char *) path; 3604 syscallarg(int) mode; 3605 } */ 3606 int error; 3607 struct vnode *vp; 3608 3609 error = namei_simple_user(SCARG(uap, path), 3610 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3611 if (error != 0) 3612 return (error); 3613 3614 error = change_mode(vp, SCARG(uap, mode), l); 3615 3616 vrele(vp); 3617 return (error); 3618 } 3619 3620 /* 3621 * Common routine to set mode given a vnode. 3622 */ 3623 static int 3624 change_mode(struct vnode *vp, int mode, struct lwp *l) 3625 { 3626 struct vattr vattr; 3627 int error; 3628 3629 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3630 vattr_null(&vattr); 3631 vattr.va_mode = mode & ALLPERMS; 3632 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3633 VOP_UNLOCK(vp); 3634 return (error); 3635 } 3636 3637 /* 3638 * Set ownership given a path name; this version follows links. 3639 */ 3640 /* ARGSUSED */ 3641 int 3642 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3643 { 3644 /* { 3645 syscallarg(const char *) path; 3646 syscallarg(uid_t) uid; 3647 syscallarg(gid_t) gid; 3648 } */ 3649 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3650 SCARG(uap, gid), 0); 3651 } 3652 3653 int 3654 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3655 gid_t gid, int flags) 3656 { 3657 int error; 3658 struct vnode *vp; 3659 namei_simple_flags_t ns_flag; 3660 3661 if (flags & AT_SYMLINK_NOFOLLOW) 3662 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3663 else 3664 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3665 3666 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3667 if (error != 0) 3668 return error; 3669 3670 error = change_owner(vp, uid, gid, l, 0); 3671 3672 vrele(vp); 3673 3674 return (error); 3675 } 3676 3677 /* 3678 * Set ownership given a path name; this version follows links. 3679 * Provides POSIX semantics. 3680 */ 3681 /* ARGSUSED */ 3682 int 3683 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3684 { 3685 /* { 3686 syscallarg(const char *) path; 3687 syscallarg(uid_t) uid; 3688 syscallarg(gid_t) gid; 3689 } */ 3690 int error; 3691 struct vnode *vp; 3692 3693 error = namei_simple_user(SCARG(uap, path), 3694 NSM_FOLLOW_TRYEMULROOT, &vp); 3695 if (error != 0) 3696 return (error); 3697 3698 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3699 3700 vrele(vp); 3701 return (error); 3702 } 3703 3704 /* 3705 * Set ownership given a file descriptor. 3706 */ 3707 /* ARGSUSED */ 3708 int 3709 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3710 { 3711 /* { 3712 syscallarg(int) fd; 3713 syscallarg(uid_t) uid; 3714 syscallarg(gid_t) gid; 3715 } */ 3716 int error; 3717 file_t *fp; 3718 3719 /* fd_getvnode() will use the descriptor for us */ 3720 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3721 return (error); 3722 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3723 l, 0); 3724 fd_putfile(SCARG(uap, fd)); 3725 return (error); 3726 } 3727 3728 int 3729 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3730 register_t *retval) 3731 { 3732 /* { 3733 syscallarg(int) fd; 3734 syscallarg(const char *) path; 3735 syscallarg(uid_t) owner; 3736 syscallarg(gid_t) group; 3737 syscallarg(int) flag; 3738 } */ 3739 3740 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3741 SCARG(uap, owner), SCARG(uap, group), 3742 SCARG(uap, flag)); 3743 } 3744 3745 /* 3746 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3747 */ 3748 /* ARGSUSED */ 3749 int 3750 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3751 { 3752 /* { 3753 syscallarg(int) fd; 3754 syscallarg(uid_t) uid; 3755 syscallarg(gid_t) gid; 3756 } */ 3757 int error; 3758 file_t *fp; 3759 3760 /* fd_getvnode() will use the descriptor for us */ 3761 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3762 return (error); 3763 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3764 l, 1); 3765 fd_putfile(SCARG(uap, fd)); 3766 return (error); 3767 } 3768 3769 /* 3770 * Set ownership given a path name; this version does not follow links. 3771 */ 3772 /* ARGSUSED */ 3773 int 3774 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3775 { 3776 /* { 3777 syscallarg(const char *) path; 3778 syscallarg(uid_t) uid; 3779 syscallarg(gid_t) gid; 3780 } */ 3781 int error; 3782 struct vnode *vp; 3783 3784 error = namei_simple_user(SCARG(uap, path), 3785 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3786 if (error != 0) 3787 return (error); 3788 3789 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3790 3791 vrele(vp); 3792 return (error); 3793 } 3794 3795 /* 3796 * Set ownership given a path name; this version does not follow links. 3797 * Provides POSIX/XPG semantics. 3798 */ 3799 /* ARGSUSED */ 3800 int 3801 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3802 { 3803 /* { 3804 syscallarg(const char *) path; 3805 syscallarg(uid_t) uid; 3806 syscallarg(gid_t) gid; 3807 } */ 3808 int error; 3809 struct vnode *vp; 3810 3811 error = namei_simple_user(SCARG(uap, path), 3812 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3813 if (error != 0) 3814 return (error); 3815 3816 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3817 3818 vrele(vp); 3819 return (error); 3820 } 3821 3822 /* 3823 * Common routine to set ownership given a vnode. 3824 */ 3825 static int 3826 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3827 int posix_semantics) 3828 { 3829 struct vattr vattr; 3830 mode_t newmode; 3831 int error; 3832 3833 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3834 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3835 goto out; 3836 3837 #define CHANGED(x) ((int)(x) != -1) 3838 newmode = vattr.va_mode; 3839 if (posix_semantics) { 3840 /* 3841 * POSIX/XPG semantics: if the caller is not the super-user, 3842 * clear set-user-id and set-group-id bits. Both POSIX and 3843 * the XPG consider the behaviour for calls by the super-user 3844 * implementation-defined; we leave the set-user-id and set- 3845 * group-id settings intact in that case. 3846 */ 3847 if (vattr.va_mode & S_ISUID) { 3848 if (kauth_authorize_vnode(l->l_cred, 3849 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3850 newmode &= ~S_ISUID; 3851 } 3852 if (vattr.va_mode & S_ISGID) { 3853 if (kauth_authorize_vnode(l->l_cred, 3854 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3855 newmode &= ~S_ISGID; 3856 } 3857 } else { 3858 /* 3859 * NetBSD semantics: when changing owner and/or group, 3860 * clear the respective bit(s). 3861 */ 3862 if (CHANGED(uid)) 3863 newmode &= ~S_ISUID; 3864 if (CHANGED(gid)) 3865 newmode &= ~S_ISGID; 3866 } 3867 /* Update va_mode iff altered. */ 3868 if (vattr.va_mode == newmode) 3869 newmode = VNOVAL; 3870 3871 vattr_null(&vattr); 3872 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3873 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3874 vattr.va_mode = newmode; 3875 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3876 #undef CHANGED 3877 3878 out: 3879 VOP_UNLOCK(vp); 3880 return (error); 3881 } 3882 3883 /* 3884 * Set the access and modification times given a path name; this 3885 * version follows links. 3886 */ 3887 /* ARGSUSED */ 3888 int 3889 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3890 register_t *retval) 3891 { 3892 /* { 3893 syscallarg(const char *) path; 3894 syscallarg(const struct timeval *) tptr; 3895 } */ 3896 3897 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3898 SCARG(uap, tptr), UIO_USERSPACE); 3899 } 3900 3901 /* 3902 * Set the access and modification times given a file descriptor. 3903 */ 3904 /* ARGSUSED */ 3905 int 3906 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3907 register_t *retval) 3908 { 3909 /* { 3910 syscallarg(int) fd; 3911 syscallarg(const struct timeval *) tptr; 3912 } */ 3913 int error; 3914 file_t *fp; 3915 3916 /* fd_getvnode() will use the descriptor for us */ 3917 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3918 return (error); 3919 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3920 UIO_USERSPACE); 3921 fd_putfile(SCARG(uap, fd)); 3922 return (error); 3923 } 3924 3925 int 3926 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3927 register_t *retval) 3928 { 3929 /* { 3930 syscallarg(int) fd; 3931 syscallarg(const struct timespec *) tptr; 3932 } */ 3933 int error; 3934 file_t *fp; 3935 3936 /* fd_getvnode() will use the descriptor for us */ 3937 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3938 return (error); 3939 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3940 SCARG(uap, tptr), UIO_USERSPACE); 3941 fd_putfile(SCARG(uap, fd)); 3942 return (error); 3943 } 3944 3945 /* 3946 * Set the access and modification times given a path name; this 3947 * version does not follow links. 3948 */ 3949 int 3950 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3951 register_t *retval) 3952 { 3953 /* { 3954 syscallarg(const char *) path; 3955 syscallarg(const struct timeval *) tptr; 3956 } */ 3957 3958 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3959 SCARG(uap, tptr), UIO_USERSPACE); 3960 } 3961 3962 int 3963 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3964 register_t *retval) 3965 { 3966 /* { 3967 syscallarg(int) fd; 3968 syscallarg(const char *) path; 3969 syscallarg(const struct timespec *) tptr; 3970 syscallarg(int) flag; 3971 } */ 3972 int follow; 3973 const struct timespec *tptr; 3974 int error; 3975 3976 tptr = SCARG(uap, tptr); 3977 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3978 3979 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3980 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3981 3982 return error; 3983 } 3984 3985 /* 3986 * Common routine to set access and modification times given a vnode. 3987 */ 3988 int 3989 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3990 const struct timespec *tptr, enum uio_seg seg) 3991 { 3992 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3993 } 3994 3995 int 3996 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3997 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3998 { 3999 struct vattr vattr; 4000 int error, dorele = 0; 4001 namei_simple_flags_t sflags; 4002 bool vanull, setbirthtime; 4003 struct timespec ts[2]; 4004 4005 KASSERT(l != NULL || fdat == AT_FDCWD); 4006 4007 /* 4008 * I have checked all callers and they pass either FOLLOW, 4009 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 4010 * is 0. More to the point, they don't pass anything else. 4011 * Let's keep it that way at least until the namei interfaces 4012 * are fully sanitized. 4013 */ 4014 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4015 sflags = (flag == FOLLOW) ? 4016 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4017 4018 if (tptr == NULL) { 4019 vanull = true; 4020 nanotime(&ts[0]); 4021 ts[1] = ts[0]; 4022 } else { 4023 vanull = false; 4024 if (seg != UIO_SYSSPACE) { 4025 error = copyin(tptr, ts, sizeof (ts)); 4026 if (error != 0) 4027 return error; 4028 } else { 4029 ts[0] = tptr[0]; 4030 ts[1] = tptr[1]; 4031 } 4032 } 4033 4034 if (ts[0].tv_nsec == UTIME_NOW) { 4035 nanotime(&ts[0]); 4036 if (ts[1].tv_nsec == UTIME_NOW) { 4037 vanull = true; 4038 ts[1] = ts[0]; 4039 } 4040 } else if (ts[1].tv_nsec == UTIME_NOW) 4041 nanotime(&ts[1]); 4042 4043 if (vp == NULL) { 4044 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4045 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4046 if (error != 0) 4047 return error; 4048 dorele = 1; 4049 } 4050 4051 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4052 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4053 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4054 vattr_null(&vattr); 4055 4056 if (ts[0].tv_nsec != UTIME_OMIT) 4057 vattr.va_atime = ts[0]; 4058 4059 if (ts[1].tv_nsec != UTIME_OMIT) { 4060 vattr.va_mtime = ts[1]; 4061 if (setbirthtime) 4062 vattr.va_birthtime = ts[1]; 4063 } 4064 4065 if (vanull) 4066 vattr.va_vaflags |= VA_UTIMES_NULL; 4067 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4068 VOP_UNLOCK(vp); 4069 4070 if (dorele != 0) 4071 vrele(vp); 4072 4073 return error; 4074 } 4075 4076 int 4077 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4078 const struct timeval *tptr, enum uio_seg seg) 4079 { 4080 struct timespec ts[2]; 4081 struct timespec *tsptr = NULL; 4082 int error; 4083 4084 if (tptr != NULL) { 4085 struct timeval tv[2]; 4086 4087 if (seg != UIO_SYSSPACE) { 4088 error = copyin(tptr, tv, sizeof(tv)); 4089 if (error != 0) 4090 return error; 4091 tptr = tv; 4092 } 4093 4094 if ((tptr[0].tv_usec == UTIME_NOW) || 4095 (tptr[0].tv_usec == UTIME_OMIT)) 4096 ts[0].tv_nsec = tptr[0].tv_usec; 4097 else { 4098 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4099 return EINVAL; 4100 4101 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4102 } 4103 4104 if ((tptr[1].tv_usec == UTIME_NOW) || 4105 (tptr[1].tv_usec == UTIME_OMIT)) 4106 ts[1].tv_nsec = tptr[1].tv_usec; 4107 else { 4108 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4109 return EINVAL; 4110 4111 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4112 } 4113 4114 tsptr = &ts[0]; 4115 } 4116 4117 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4118 } 4119 4120 /* 4121 * Truncate a file given its path name. 4122 */ 4123 /* ARGSUSED */ 4124 int 4125 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4126 { 4127 /* { 4128 syscallarg(const char *) path; 4129 syscallarg(int) pad; 4130 syscallarg(off_t) length; 4131 } */ 4132 struct vnode *vp; 4133 struct vattr vattr; 4134 int error; 4135 4136 if (SCARG(uap, length) < 0) 4137 return EINVAL; 4138 4139 error = namei_simple_user(SCARG(uap, path), 4140 NSM_FOLLOW_TRYEMULROOT, &vp); 4141 if (error != 0) 4142 return (error); 4143 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4144 if (vp->v_type == VDIR) 4145 error = EISDIR; 4146 else if ((error = vn_writechk(vp)) == 0 && 4147 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4148 vattr_null(&vattr); 4149 vattr.va_size = SCARG(uap, length); 4150 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4151 } 4152 vput(vp); 4153 return (error); 4154 } 4155 4156 /* 4157 * Truncate a file given a file descriptor. 4158 */ 4159 /* ARGSUSED */ 4160 int 4161 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4162 { 4163 /* { 4164 syscallarg(int) fd; 4165 syscallarg(int) pad; 4166 syscallarg(off_t) length; 4167 } */ 4168 file_t *fp; 4169 int error, fd = SCARG(uap, fd); 4170 4171 fp = fd_getfile(fd); 4172 if (fp == NULL) 4173 return EBADF; 4174 if (fp->f_ops->fo_truncate == NULL) 4175 error = EOPNOTSUPP; 4176 else 4177 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4178 4179 fd_putfile(fd); 4180 return error; 4181 } 4182 4183 /* 4184 * Sync an open file. 4185 */ 4186 /* ARGSUSED */ 4187 int 4188 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4189 { 4190 /* { 4191 syscallarg(int) fd; 4192 } */ 4193 struct vnode *vp; 4194 file_t *fp; 4195 int error; 4196 4197 /* fd_getvnode() will use the descriptor for us */ 4198 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4199 return (error); 4200 vp = fp->f_vnode; 4201 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4202 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4203 VOP_UNLOCK(vp); 4204 fd_putfile(SCARG(uap, fd)); 4205 return (error); 4206 } 4207 4208 /* 4209 * Sync a range of file data. API modeled after that found in AIX. 4210 * 4211 * FDATASYNC indicates that we need only save enough metadata to be able 4212 * to re-read the written data. 4213 */ 4214 /* ARGSUSED */ 4215 int 4216 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4217 { 4218 /* { 4219 syscallarg(int) fd; 4220 syscallarg(int) flags; 4221 syscallarg(off_t) start; 4222 syscallarg(off_t) length; 4223 } */ 4224 struct vnode *vp; 4225 file_t *fp; 4226 int flags, nflags; 4227 off_t s, e, len; 4228 int error; 4229 4230 /* fd_getvnode() will use the descriptor for us */ 4231 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4232 return (error); 4233 4234 if ((fp->f_flag & FWRITE) == 0) { 4235 error = EBADF; 4236 goto out; 4237 } 4238 4239 flags = SCARG(uap, flags); 4240 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4241 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4242 error = EINVAL; 4243 goto out; 4244 } 4245 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4246 if (flags & FDATASYNC) 4247 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4248 else 4249 nflags = FSYNC_WAIT; 4250 if (flags & FDISKSYNC) 4251 nflags |= FSYNC_CACHE; 4252 4253 len = SCARG(uap, length); 4254 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4255 if (len) { 4256 s = SCARG(uap, start); 4257 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4258 error = EINVAL; 4259 goto out; 4260 } 4261 e = s + len; 4262 KASSERT(s <= e); 4263 } else { 4264 e = 0; 4265 s = 0; 4266 } 4267 4268 vp = fp->f_vnode; 4269 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4270 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4271 VOP_UNLOCK(vp); 4272 out: 4273 fd_putfile(SCARG(uap, fd)); 4274 return (error); 4275 } 4276 4277 /* 4278 * Sync the data of an open file. 4279 */ 4280 /* ARGSUSED */ 4281 int 4282 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4283 { 4284 /* { 4285 syscallarg(int) fd; 4286 } */ 4287 struct vnode *vp; 4288 file_t *fp; 4289 int error; 4290 4291 /* fd_getvnode() will use the descriptor for us */ 4292 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4293 return (error); 4294 vp = fp->f_vnode; 4295 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4296 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4297 VOP_UNLOCK(vp); 4298 fd_putfile(SCARG(uap, fd)); 4299 return (error); 4300 } 4301 4302 /* 4303 * Rename files, (standard) BSD semantics frontend. 4304 */ 4305 /* ARGSUSED */ 4306 int 4307 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4308 { 4309 /* { 4310 syscallarg(const char *) from; 4311 syscallarg(const char *) to; 4312 } */ 4313 4314 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4315 SCARG(uap, to), UIO_USERSPACE, 0)); 4316 } 4317 4318 int 4319 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4320 register_t *retval) 4321 { 4322 /* { 4323 syscallarg(int) fromfd; 4324 syscallarg(const char *) from; 4325 syscallarg(int) tofd; 4326 syscallarg(const char *) to; 4327 } */ 4328 4329 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4330 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4331 } 4332 4333 /* 4334 * Rename files, POSIX semantics frontend. 4335 */ 4336 /* ARGSUSED */ 4337 int 4338 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4339 { 4340 /* { 4341 syscallarg(const char *) from; 4342 syscallarg(const char *) to; 4343 } */ 4344 4345 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4346 SCARG(uap, to), UIO_USERSPACE, 1)); 4347 } 4348 4349 /* 4350 * Rename files. Source and destination must either both be directories, 4351 * or both not be directories. If target is a directory, it must be empty. 4352 * If `from' and `to' refer to the same object, the value of the `retain' 4353 * argument is used to determine whether `from' will be 4354 * 4355 * (retain == 0) deleted unless `from' and `to' refer to the same 4356 * object in the file system's name space (BSD). 4357 * (retain == 1) always retained (POSIX). 4358 * 4359 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4360 */ 4361 int 4362 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4363 { 4364 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4365 } 4366 4367 static int 4368 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4369 const char *to, enum uio_seg seg, int retain) 4370 { 4371 struct pathbuf *fpb, *tpb; 4372 struct nameidata fnd, tnd; 4373 struct vnode *fdvp, *fvp; 4374 struct vnode *tdvp, *tvp; 4375 struct mount *mp, *tmp; 4376 int error; 4377 4378 KASSERT(l != NULL || fromfd == AT_FDCWD); 4379 KASSERT(l != NULL || tofd == AT_FDCWD); 4380 4381 error = pathbuf_maybe_copyin(from, seg, &fpb); 4382 if (error) 4383 goto out0; 4384 KASSERT(fpb != NULL); 4385 4386 error = pathbuf_maybe_copyin(to, seg, &tpb); 4387 if (error) 4388 goto out1; 4389 KASSERT(tpb != NULL); 4390 4391 /* 4392 * Lookup from. 4393 * 4394 * XXX LOCKPARENT is wrong because we don't actually want it 4395 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4396 * insane, so for the time being we need to leave it like this. 4397 */ 4398 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4399 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4400 goto out2; 4401 4402 /* 4403 * Pull out the important results of the lookup, fdvp and fvp. 4404 * Of course, fvp is bogus because we're about to unlock fdvp. 4405 */ 4406 fdvp = fnd.ni_dvp; 4407 fvp = fnd.ni_vp; 4408 mp = fdvp->v_mount; 4409 KASSERT(fdvp != NULL); 4410 KASSERT(fvp != NULL); 4411 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4412 /* 4413 * Bracket the operation with fstrans_start()/fstrans_done(). 4414 * 4415 * Inside the bracket this file system cannot be unmounted so 4416 * a vnode on this file system cannot change its v_mount. 4417 * A vnode on another file system may still change to dead mount. 4418 */ 4419 fstrans_start(mp); 4420 4421 /* 4422 * Make sure neither fdvp nor fvp is locked. 4423 */ 4424 if (fdvp != fvp) 4425 VOP_UNLOCK(fdvp); 4426 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4427 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4428 4429 /* 4430 * Reject renaming `.' and `..'. Can't do this until after 4431 * namei because we need namei's parsing to find the final 4432 * component name. (namei should just leave us with the final 4433 * component name and not look it up itself, but anyway...) 4434 * 4435 * This was here before because we used to relookup from 4436 * instead of to and relookup requires the caller to check 4437 * this, but now file systems may depend on this check, so we 4438 * must retain it until the file systems are all rototilled. 4439 */ 4440 if (((fnd.ni_cnd.cn_namelen == 1) && 4441 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4442 ((fnd.ni_cnd.cn_namelen == 2) && 4443 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4444 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4445 error = EINVAL; /* XXX EISDIR? */ 4446 goto abort0; 4447 } 4448 4449 /* 4450 * Lookup to. 4451 * 4452 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4453 * fvp here to decide whether to add CREATEDIR is a load of 4454 * bollocks because fvp might be the wrong node by now, since 4455 * fdvp is unlocked. 4456 * 4457 * XXX Why not pass CREATEDIR always? 4458 */ 4459 NDINIT(&tnd, RENAME, 4460 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4461 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4462 tpb); 4463 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4464 goto abort0; 4465 4466 /* 4467 * Pull out the important results of the lookup, tdvp and tvp. 4468 * Of course, tvp is bogus because we're about to unlock tdvp. 4469 */ 4470 tdvp = tnd.ni_dvp; 4471 tvp = tnd.ni_vp; 4472 KASSERT(tdvp != NULL); 4473 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4474 4475 if (fvp->v_type == VDIR) 4476 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4477 /* 4478 * Make sure neither tdvp nor tvp is locked. 4479 */ 4480 if (tdvp != tvp) 4481 VOP_UNLOCK(tdvp); 4482 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4483 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4484 4485 /* 4486 * Reject renaming onto `.' or `..'. relookup is unhappy with 4487 * these, which is why we must do this here. Once upon a time 4488 * we relooked up from instead of to, and consequently didn't 4489 * need this check, but now that we relookup to instead of 4490 * from, we need this; and we shall need it forever forward 4491 * until the VOP_RENAME protocol changes, because file systems 4492 * will no doubt begin to depend on this check. 4493 */ 4494 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4495 error = EISDIR; 4496 goto abort1; 4497 } 4498 if ((tnd.ni_cnd.cn_namelen == 2) && 4499 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4500 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4501 error = EINVAL; 4502 goto abort1; 4503 } 4504 4505 /* 4506 * Make sure the mount points match. Although we don't hold 4507 * any vnode locks, the v_mount on fdvp file system are stable. 4508 * 4509 * Unmounting another file system at an inopportune moment may 4510 * cause tdvp to disappear and change its v_mount to dead. 4511 * 4512 * So in either case different v_mount means cross-device rename. 4513 */ 4514 KASSERT(mp != NULL); 4515 tmp = tdvp->v_mount; 4516 4517 if (mp != tmp) { 4518 error = EXDEV; 4519 goto abort1; 4520 } 4521 4522 /* 4523 * Take the vfs rename lock to avoid cross-directory screw cases. 4524 * Nothing is locked currently, so taking this lock is safe. 4525 */ 4526 error = VFS_RENAMELOCK_ENTER(mp); 4527 if (error) 4528 goto abort1; 4529 4530 /* 4531 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4532 * and nothing is locked except for the vfs rename lock. 4533 * 4534 * The next step is a little rain dance to conform to the 4535 * insane lock protocol, even though it does nothing to ward 4536 * off race conditions. 4537 * 4538 * We need tdvp and tvp to be locked. However, because we have 4539 * unlocked tdvp in order to hold no locks while we take the 4540 * vfs rename lock, tvp may be wrong here, and we can't safely 4541 * lock it even if the sensible file systems will just unlock 4542 * it straight away. Consequently, we must lock tdvp and then 4543 * relookup tvp to get it locked. 4544 * 4545 * Finally, because the VOP_RENAME protocol is brain-damaged 4546 * and various file systems insanely depend on the semantics of 4547 * this brain damage, the lookup of to must be the last lookup 4548 * before VOP_RENAME. 4549 */ 4550 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4551 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4552 if (error) 4553 goto abort2; 4554 4555 /* 4556 * Drop the old tvp and pick up the new one -- which might be 4557 * the same, but that doesn't matter to us. After this, tdvp 4558 * and tvp should both be locked. 4559 */ 4560 if (tvp != NULL) 4561 vrele(tvp); 4562 tvp = tnd.ni_vp; 4563 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4564 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4565 4566 /* 4567 * The old do_sys_rename had various consistency checks here 4568 * involving fvp and tvp. fvp is bogus already here, and tvp 4569 * will become bogus soon in any sensible file system, so the 4570 * only purpose in putting these checks here is to give lip 4571 * service to these screw cases and to acknowledge that they 4572 * exist, not actually to handle them, but here you go 4573 * anyway... 4574 */ 4575 4576 /* 4577 * Acknowledge that directories and non-directories aren't 4578 * supposed to mix. 4579 */ 4580 if (tvp != NULL) { 4581 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4582 error = ENOTDIR; 4583 goto abort3; 4584 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4585 error = EISDIR; 4586 goto abort3; 4587 } 4588 } 4589 4590 /* 4591 * Acknowledge some random screw case, among the dozens that 4592 * might arise. 4593 */ 4594 if (fvp == tdvp) { 4595 error = EINVAL; 4596 goto abort3; 4597 } 4598 4599 /* 4600 * Acknowledge that POSIX has a wacky screw case. 4601 * 4602 * XXX Eventually the retain flag needs to be passed on to 4603 * VOP_RENAME. 4604 */ 4605 if (fvp == tvp) { 4606 if (retain) { 4607 error = 0; 4608 goto abort3; 4609 } else if ((fdvp == tdvp) && 4610 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4611 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4612 fnd.ni_cnd.cn_namelen))) { 4613 error = 0; 4614 goto abort3; 4615 } 4616 } 4617 4618 /* 4619 * Make sure veriexec can screw us up. (But a race can screw 4620 * up veriexec, of course -- remember, fvp and (soon) tvp are 4621 * bogus.) 4622 */ 4623 #if NVERIEXEC > 0 4624 { 4625 char *f1, *f2; 4626 size_t f1_len; 4627 size_t f2_len; 4628 4629 f1_len = fnd.ni_cnd.cn_namelen + 1; 4630 f1 = kmem_alloc(f1_len, KM_SLEEP); 4631 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4632 4633 f2_len = tnd.ni_cnd.cn_namelen + 1; 4634 f2 = kmem_alloc(f2_len, KM_SLEEP); 4635 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4636 4637 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4638 4639 kmem_free(f1, f1_len); 4640 kmem_free(f2, f2_len); 4641 4642 if (error) 4643 goto abort3; 4644 } 4645 #endif /* NVERIEXEC > 0 */ 4646 4647 /* 4648 * All ready. Incant the rename vop. 4649 */ 4650 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4651 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4652 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4653 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4654 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4655 4656 /* 4657 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4658 * tdvp and tvp. But we can't assert any of that. 4659 */ 4660 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4661 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4662 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4663 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4664 4665 /* 4666 * So all we have left to do is to drop the rename lock and 4667 * destroy the pathbufs. 4668 */ 4669 VFS_RENAMELOCK_EXIT(mp); 4670 fstrans_done(mp); 4671 goto out2; 4672 4673 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4674 VOP_UNLOCK(tvp); 4675 abort2: VOP_UNLOCK(tdvp); 4676 VFS_RENAMELOCK_EXIT(mp); 4677 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4678 vrele(tdvp); 4679 if (tvp != NULL) 4680 vrele(tvp); 4681 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4682 vrele(fdvp); 4683 vrele(fvp); 4684 fstrans_done(mp); 4685 out2: pathbuf_destroy(tpb); 4686 out1: pathbuf_destroy(fpb); 4687 out0: return error; 4688 } 4689 4690 /* 4691 * Make a directory file. 4692 */ 4693 /* ARGSUSED */ 4694 int 4695 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4696 { 4697 /* { 4698 syscallarg(const char *) path; 4699 syscallarg(int) mode; 4700 } */ 4701 4702 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4703 SCARG(uap, mode), UIO_USERSPACE); 4704 } 4705 4706 int 4707 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4708 register_t *retval) 4709 { 4710 /* { 4711 syscallarg(int) fd; 4712 syscallarg(const char *) path; 4713 syscallarg(int) mode; 4714 } */ 4715 4716 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4717 SCARG(uap, mode), UIO_USERSPACE); 4718 } 4719 4720 4721 int 4722 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4723 { 4724 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4725 } 4726 4727 static int 4728 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4729 enum uio_seg seg) 4730 { 4731 struct proc *p = curlwp->l_proc; 4732 struct vnode *vp; 4733 struct vattr vattr; 4734 int error; 4735 struct pathbuf *pb; 4736 struct nameidata nd; 4737 4738 KASSERT(l != NULL || fdat == AT_FDCWD); 4739 4740 /* XXX bollocks, should pass in a pathbuf */ 4741 error = pathbuf_maybe_copyin(path, seg, &pb); 4742 if (error) { 4743 return error; 4744 } 4745 4746 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4747 4748 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4749 pathbuf_destroy(pb); 4750 return (error); 4751 } 4752 vp = nd.ni_vp; 4753 if (vp != NULL) { 4754 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4755 if (nd.ni_dvp == vp) 4756 vrele(nd.ni_dvp); 4757 else 4758 vput(nd.ni_dvp); 4759 vrele(vp); 4760 pathbuf_destroy(pb); 4761 return (EEXIST); 4762 } 4763 vattr_null(&vattr); 4764 vattr.va_type = VDIR; 4765 /* We will read cwdi->cwdi_cmask unlocked. */ 4766 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4767 nd.ni_cnd.cn_flags |= WILLBEDIR; 4768 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4769 if (!error) 4770 vrele(nd.ni_vp); 4771 vput(nd.ni_dvp); 4772 pathbuf_destroy(pb); 4773 return (error); 4774 } 4775 4776 /* 4777 * Remove a directory file. 4778 */ 4779 /* ARGSUSED */ 4780 int 4781 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4782 { 4783 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4784 AT_REMOVEDIR, UIO_USERSPACE); 4785 } 4786 4787 /* 4788 * Read a block of directory entries in a file system independent format. 4789 */ 4790 int 4791 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4792 { 4793 /* { 4794 syscallarg(int) fd; 4795 syscallarg(char *) buf; 4796 syscallarg(size_t) count; 4797 } */ 4798 file_t *fp; 4799 int error, done; 4800 4801 /* fd_getvnode() will use the descriptor for us */ 4802 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4803 return (error); 4804 if ((fp->f_flag & FREAD) == 0) { 4805 error = EBADF; 4806 goto out; 4807 } 4808 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4809 SCARG(uap, count), &done, l, 0, 0); 4810 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4811 *retval = done; 4812 out: 4813 fd_putfile(SCARG(uap, fd)); 4814 return (error); 4815 } 4816 4817 /* 4818 * Set the mode mask for creation of filesystem nodes. 4819 */ 4820 int 4821 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4822 { 4823 /* { 4824 syscallarg(mode_t) newmask; 4825 } */ 4826 4827 /* 4828 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4829 * serialization with those reads is required. It's important to 4830 * return a coherent answer for the caller of umask() though, and 4831 * the atomic operation accomplishes that. 4832 */ 4833 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4834 SCARG(uap, newmask) & ALLPERMS); 4835 4836 return (0); 4837 } 4838 4839 int 4840 dorevoke(struct vnode *vp, kauth_cred_t cred) 4841 { 4842 struct vattr vattr; 4843 int error, fs_decision; 4844 4845 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4846 error = VOP_GETATTR(vp, &vattr, cred); 4847 VOP_UNLOCK(vp); 4848 if (error != 0) 4849 return error; 4850 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4851 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4852 fs_decision); 4853 if (!error) 4854 VOP_REVOKE(vp, REVOKEALL); 4855 return (error); 4856 } 4857 4858 /* 4859 * Void all references to file by ripping underlying filesystem 4860 * away from vnode. 4861 */ 4862 /* ARGSUSED */ 4863 int 4864 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4865 { 4866 /* { 4867 syscallarg(const char *) path; 4868 } */ 4869 struct vnode *vp; 4870 int error; 4871 4872 error = namei_simple_user(SCARG(uap, path), 4873 NSM_FOLLOW_TRYEMULROOT, &vp); 4874 if (error != 0) 4875 return (error); 4876 error = dorevoke(vp, l->l_cred); 4877 vrele(vp); 4878 return (error); 4879 } 4880 4881 /* 4882 * Allocate backing store for a file, filling a hole without having to 4883 * explicitly write anything out. 4884 */ 4885 /* ARGSUSED */ 4886 int 4887 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4888 register_t *retval) 4889 { 4890 /* { 4891 syscallarg(int) fd; 4892 syscallarg(off_t) pos; 4893 syscallarg(off_t) len; 4894 } */ 4895 int fd; 4896 off_t pos, len; 4897 struct file *fp; 4898 struct vnode *vp; 4899 int error; 4900 4901 fd = SCARG(uap, fd); 4902 pos = SCARG(uap, pos); 4903 len = SCARG(uap, len); 4904 4905 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4906 *retval = EINVAL; 4907 return 0; 4908 } 4909 4910 error = fd_getvnode(fd, &fp); 4911 if (error) { 4912 *retval = error; 4913 return 0; 4914 } 4915 if ((fp->f_flag & FWRITE) == 0) { 4916 error = EBADF; 4917 goto fail; 4918 } 4919 vp = fp->f_vnode; 4920 4921 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4922 if (vp->v_type == VDIR) { 4923 error = EISDIR; 4924 } else { 4925 error = VOP_FALLOCATE(vp, pos, len); 4926 } 4927 VOP_UNLOCK(vp); 4928 4929 fail: 4930 fd_putfile(fd); 4931 *retval = error; 4932 return 0; 4933 } 4934 4935 /* 4936 * Deallocate backing store for a file, creating a hole. Also used for 4937 * invoking TRIM on disks. 4938 */ 4939 /* ARGSUSED */ 4940 int 4941 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4942 register_t *retval) 4943 { 4944 /* { 4945 syscallarg(int) fd; 4946 syscallarg(off_t) pos; 4947 syscallarg(off_t) len; 4948 } */ 4949 int fd; 4950 off_t pos, len; 4951 struct file *fp; 4952 struct vnode *vp; 4953 int error; 4954 4955 fd = SCARG(uap, fd); 4956 pos = SCARG(uap, pos); 4957 len = SCARG(uap, len); 4958 4959 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4960 return EINVAL; 4961 } 4962 4963 error = fd_getvnode(fd, &fp); 4964 if (error) { 4965 return error; 4966 } 4967 if ((fp->f_flag & FWRITE) == 0) { 4968 error = EBADF; 4969 goto fail; 4970 } 4971 vp = fp->f_vnode; 4972 4973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4974 if (vp->v_type == VDIR) { 4975 error = EISDIR; 4976 } else { 4977 error = VOP_FDISCARD(vp, pos, len); 4978 } 4979 VOP_UNLOCK(vp); 4980 4981 fail: 4982 fd_putfile(fd); 4983 return error; 4984 } 4985