1 /* $NetBSD: vfs_syscalls.c,v 1.568 2024/08/11 13:43:20 bad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.568 2024/08/11 13:43:20 bad Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 /* 169 * Filter event method for EVFILT_FS. 170 */ 171 static struct klist fs_klist; 172 static kmutex_t fs_klist_lock; 173 174 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 175 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 176 177 void 178 vfs_evfilt_fs_init(void) 179 { 180 klist_init(&fs_klist); 181 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 182 } 183 184 static int 185 filt_fsattach(struct knote *kn) 186 { 187 mutex_enter(&fs_klist_lock); 188 kn->kn_flags |= EV_CLEAR; 189 klist_insert(&fs_klist, kn); 190 mutex_exit(&fs_klist_lock); 191 192 return 0; 193 } 194 195 static void 196 filt_fsdetach(struct knote *kn) 197 { 198 mutex_enter(&fs_klist_lock); 199 klist_remove(&fs_klist, kn); 200 mutex_exit(&fs_klist_lock); 201 } 202 203 static int 204 filt_fs(struct knote *kn, long hint) 205 { 206 int rv; 207 208 if (hint & NOTE_SUBMIT) { 209 KASSERT(mutex_owned(&fs_klist_lock)); 210 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 211 } else { 212 mutex_enter(&fs_klist_lock); 213 } 214 215 rv = (kn->kn_fflags != 0); 216 217 if ((hint & NOTE_SUBMIT) == 0) { 218 mutex_exit(&fs_klist_lock); 219 } 220 221 return rv; 222 } 223 224 /* referenced in kern_event.c */ 225 const struct filterops fs_filtops = { 226 .f_flags = FILTEROP_MPSAFE, 227 .f_attach = filt_fsattach, 228 .f_detach = filt_fsdetach, 229 .f_event = filt_fs, 230 }; 231 232 static int 233 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 234 { 235 file_t *dfp; 236 int error; 237 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 238 239 if (fdat != AT_FDCWD && path[0] != '/') { 240 if ((error = fd_getvnode(fdat, &dfp)) != 0) 241 goto out; 242 243 NDAT(ndp, dfp->f_vnode); 244 } 245 246 error = namei(ndp); 247 248 if (fdat != AT_FDCWD && path[0] != '/') 249 fd_putfile(fdat); 250 out: 251 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 252 return error; 253 } 254 255 static int 256 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 257 namei_simple_flags_t sflags, struct vnode **vp_ret) 258 { 259 file_t *dfp; 260 struct vnode *dvp; 261 int error; 262 struct pathbuf *pb; 263 const char *p; 264 265 error = pathbuf_copyin(path, &pb); 266 if (error) { 267 return error; 268 } 269 p = pathbuf_stringcopy_get(pb); 270 271 if (fdat != AT_FDCWD && p[0] != '/') { 272 if ((error = fd_getvnode(fdat, &dfp)) != 0) 273 goto out; 274 275 dvp = dfp->f_vnode; 276 } else { 277 dvp = NULL; 278 } 279 280 error = nameiat_simple(dvp, pb, sflags, vp_ret); 281 282 if (fdat != AT_FDCWD && p[0] != '/') 283 fd_putfile(fdat); 284 285 out: 286 pathbuf_stringcopy_put(pb, p); 287 pathbuf_destroy(pb); 288 289 return error; 290 } 291 292 static int 293 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 294 { 295 int error; 296 297 fp->f_flag = flags & FMASK; 298 fp->f_type = DTYPE_VNODE; 299 fp->f_ops = &vnops; 300 fp->f_vnode = vp; 301 302 if (flags & (O_EXLOCK | O_SHLOCK)) { 303 struct flock lf; 304 int type; 305 306 lf.l_whence = SEEK_SET; 307 lf.l_start = 0; 308 lf.l_len = 0; 309 if (flags & O_EXLOCK) 310 lf.l_type = F_WRLCK; 311 else 312 lf.l_type = F_RDLCK; 313 type = F_FLOCK; 314 if ((flags & FNONBLOCK) == 0) 315 type |= F_WAIT; 316 VOP_UNLOCK(vp); 317 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 318 if (error) { 319 (void) vn_close(vp, fp->f_flag, fp->f_cred); 320 fd_abort(l->l_proc, fp, indx); 321 return error; 322 } 323 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 324 atomic_or_uint(&fp->f_flag, FHASLOCK); 325 } 326 if (flags & O_CLOEXEC) 327 fd_set_exclose(l, indx, true); 328 return 0; 329 } 330 331 static int 332 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 333 void *data, size_t *data_len) 334 { 335 struct mount *mp; 336 int error = 0, saved_flags; 337 338 mp = vp->v_mount; 339 saved_flags = mp->mnt_flag; 340 341 /* We can operate only on VV_ROOT nodes. */ 342 if ((vp->v_vflag & VV_ROOT) == 0) { 343 error = EINVAL; 344 goto out; 345 } 346 347 /* 348 * We only allow the filesystem to be reloaded if it 349 * is currently mounted read-only. Additionally, we 350 * prevent read-write to read-only downgrades. 351 */ 352 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 353 (mp->mnt_flag & MNT_RDONLY) == 0 && 354 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 355 error = EOPNOTSUPP; /* Needs translation */ 356 goto out; 357 } 358 359 /* 360 * Enabling MNT_UNION requires a covered mountpoint and 361 * must not happen on the root mount. 362 */ 363 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 364 error = EOPNOTSUPP; 365 goto out; 366 } 367 368 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 369 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 370 if (error) 371 goto out; 372 373 error = vfs_suspend(mp, 0); 374 if (error) 375 goto out; 376 377 mutex_enter(mp->mnt_updating); 378 379 mp->mnt_flag &= ~MNT_OP_FLAGS; 380 mp->mnt_flag |= flags & MNT_OP_FLAGS; 381 382 /* 383 * Set the mount level flags. 384 */ 385 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 386 if ((flags & MNT_RDONLY)) 387 mp->mnt_iflag |= IMNT_WANTRDONLY; 388 else 389 mp->mnt_iflag |= IMNT_WANTRDWR; 390 } 391 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 392 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 393 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 394 mp->mnt_flag &= ~MNT_RDONLY; 395 396 error = VFS_MOUNT(mp, path, data, data_len); 397 398 if (error && data != NULL) { 399 int error2; 400 401 /* 402 * Update failed; let's try and see if it was an 403 * export request. For compat with 3.0 and earlier. 404 */ 405 error2 = vfs_hooks_reexport(mp, path, data); 406 407 /* 408 * Only update error code if the export request was 409 * understood but some problem occurred while 410 * processing it. 411 */ 412 if (error2 != EJUSTRETURN) 413 error = error2; 414 } 415 416 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 417 mp->mnt_flag |= MNT_RDONLY; 418 if (error) 419 mp->mnt_flag = saved_flags; 420 mp->mnt_flag &= ~MNT_OP_FLAGS; 421 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 422 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 423 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 424 vfs_syncer_add_to_worklist(mp); 425 } else { 426 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 427 vfs_syncer_remove_from_worklist(mp); 428 } 429 mutex_exit(mp->mnt_updating); 430 vfs_resume(mp); 431 432 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 433 (flags & MNT_EXTATTR)) { 434 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 435 NULL, 0, NULL) != 0) { 436 printf("%s: failed to start extattr, error = %d", 437 mp->mnt_stat.f_mntonname, error); 438 mp->mnt_flag &= ~MNT_EXTATTR; 439 } 440 } 441 442 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 443 !(flags & MNT_EXTATTR)) { 444 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 445 NULL, 0, NULL) != 0) { 446 printf("%s: failed to stop extattr, error = %d", 447 mp->mnt_stat.f_mntonname, error); 448 mp->mnt_flag |= MNT_RDONLY; 449 } 450 } 451 out: 452 return (error); 453 } 454 455 static int 456 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 457 struct vfsops **vfsops) 458 { 459 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 460 int error; 461 462 if (type_seg == UIO_USERSPACE) { 463 /* Copy file-system type from userspace. */ 464 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 465 } else { 466 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 467 KASSERT(error == 0); 468 } 469 470 if (error) { 471 /* 472 * Historically, filesystem types were identified by numbers. 473 * If we get an integer for the filesystem type instead of a 474 * string, we check to see if it matches one of the historic 475 * filesystem types. 476 */ 477 u_long fsindex = (u_long)fstype; 478 if (fsindex >= nmountcompatnames || 479 mountcompatnames[fsindex] == NULL) 480 return ENODEV; 481 strlcpy(fstypename, mountcompatnames[fsindex], 482 sizeof(fstypename)); 483 } 484 485 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 486 if (strcmp(fstypename, "ufs") == 0) 487 fstypename[0] = 'f'; 488 489 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 490 return 0; 491 492 /* If we can autoload a vfs module, try again */ 493 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 494 495 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 496 return 0; 497 498 return ENODEV; 499 } 500 501 static int 502 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 503 void *data, size_t *data_len) 504 { 505 struct mount *mp; 506 int error; 507 508 /* If MNT_GETARGS is specified, it should be the only flag. */ 509 if (flags & ~MNT_GETARGS) 510 return EINVAL; 511 512 mp = vp->v_mount; 513 514 /* XXX: probably some notion of "can see" here if we want isolation. */ 515 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 516 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 517 if (error) 518 return error; 519 520 if ((vp->v_vflag & VV_ROOT) == 0) 521 return EINVAL; 522 523 if (vfs_busy(mp)) 524 return EPERM; 525 526 mutex_enter(mp->mnt_updating); 527 mp->mnt_flag &= ~MNT_OP_FLAGS; 528 mp->mnt_flag |= MNT_GETARGS; 529 error = VFS_MOUNT(mp, path, data, data_len); 530 mp->mnt_flag &= ~MNT_OP_FLAGS; 531 mutex_exit(mp->mnt_updating); 532 533 vfs_unbusy(mp); 534 return (error); 535 } 536 537 int 538 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 539 { 540 /* { 541 syscallarg(const char *) type; 542 syscallarg(const char *) path; 543 syscallarg(int) flags; 544 syscallarg(void *) data; 545 syscallarg(size_t) data_len; 546 } */ 547 548 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 549 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 550 SCARG(uap, data_len), retval); 551 } 552 553 int 554 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 555 const char *path, int flags, void *data, enum uio_seg data_seg, 556 size_t data_len, register_t *retval) 557 { 558 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 559 struct vnode *vp; 560 void *data_buf = data; 561 bool vfsopsrele = false; 562 size_t alloc_sz = 0; 563 int error; 564 565 /* 566 * Get vnode to be covered 567 */ 568 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 569 if (error != 0) { 570 vp = NULL; 571 goto done; 572 } 573 574 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 575 vfsops = vp->v_mount->mnt_op; 576 } else { 577 /* 'type' is userspace */ 578 error = mount_get_vfsops(type, type_seg, &vfsops); 579 if (error != 0) 580 goto done; 581 vfsopsrele = true; 582 } 583 584 /* 585 * We allow data to be NULL, even for userspace. Some fs's don't need 586 * it. The others will handle NULL. 587 */ 588 if (data != NULL && data_seg == UIO_USERSPACE) { 589 if (data_len == 0) { 590 /* No length supplied, use default for filesystem */ 591 data_len = vfsops->vfs_min_mount_data; 592 593 /* 594 * Hopefully a longer buffer won't make copyin() fail. 595 * For compatibility with 3.0 and earlier. 596 */ 597 if (flags & MNT_UPDATE 598 && data_len < sizeof (struct mnt_export_args30)) 599 data_len = sizeof (struct mnt_export_args30); 600 } 601 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 602 error = EINVAL; 603 goto done; 604 } 605 alloc_sz = data_len; 606 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 607 608 /* NFS needs the buffer even for mnt_getargs .... */ 609 error = copyin(data, data_buf, data_len); 610 if (error != 0) 611 goto done; 612 } 613 614 if (flags & MNT_GETARGS) { 615 if (data_len == 0) { 616 error = EINVAL; 617 goto done; 618 } 619 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 620 if (error != 0) 621 goto done; 622 if (data_seg == UIO_USERSPACE) 623 error = copyout(data_buf, data, data_len); 624 *retval = data_len; 625 } else if (flags & MNT_UPDATE) { 626 error = mount_update(l, vp, path, flags, data_buf, &data_len); 627 } else { 628 /* Locking is handled internally in mount_domount(). */ 629 KASSERT(vfsopsrele == true); 630 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 631 &data_len); 632 vfsopsrele = false; 633 } 634 if (!error) { 635 mutex_enter(&fs_klist_lock); 636 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 637 mutex_exit(&fs_klist_lock); 638 } 639 640 done: 641 if (vfsopsrele) 642 vfs_delref(vfsops); 643 if (vp != NULL) { 644 vrele(vp); 645 } 646 if (data_buf != data) 647 kmem_free(data_buf, alloc_sz); 648 return (error); 649 } 650 651 /* 652 * Unmount a file system. 653 * 654 * Note: unmount takes a path to the vnode mounted on as argument, 655 * not special file (as before). 656 */ 657 /* ARGSUSED */ 658 int 659 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 660 { 661 /* { 662 syscallarg(const char *) path; 663 syscallarg(int) flags; 664 } */ 665 struct vnode *vp; 666 struct mount *mp; 667 int error; 668 struct pathbuf *pb; 669 struct nameidata nd; 670 671 error = pathbuf_copyin(SCARG(uap, path), &pb); 672 if (error) { 673 return error; 674 } 675 676 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 677 if ((error = namei(&nd)) != 0) { 678 pathbuf_destroy(pb); 679 return error; 680 } 681 vp = nd.ni_vp; 682 pathbuf_destroy(pb); 683 684 mp = vp->v_mount; 685 vfs_ref(mp); 686 VOP_UNLOCK(vp); 687 688 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 689 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 690 if (error) { 691 vrele(vp); 692 vfs_rele(mp); 693 return (error); 694 } 695 696 /* 697 * Don't allow unmounting the root file system. 698 */ 699 if (mp->mnt_flag & MNT_ROOTFS) { 700 vrele(vp); 701 vfs_rele(mp); 702 return (EINVAL); 703 } 704 705 /* 706 * Must be the root of the filesystem 707 */ 708 if ((vp->v_vflag & VV_ROOT) == 0) { 709 vrele(vp); 710 vfs_rele(mp); 711 return (EINVAL); 712 } 713 714 vrele(vp); 715 error = dounmount(mp, SCARG(uap, flags), l); 716 vfs_rele(mp); 717 if (!error) { 718 mutex_enter(&fs_klist_lock); 719 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 720 mutex_exit(&fs_klist_lock); 721 } 722 return error; 723 } 724 725 /* 726 * Sync each mounted filesystem. 727 */ 728 #ifdef DEBUG 729 int syncprt = 0; 730 struct ctldebug debug0 = { "syncprt", &syncprt }; 731 #endif 732 733 void 734 do_sys_sync(struct lwp *l) 735 { 736 mount_iterator_t *iter; 737 struct mount *mp; 738 int asyncflag; 739 740 mountlist_iterator_init(&iter); 741 while ((mp = mountlist_iterator_next(iter)) != NULL) { 742 mutex_enter(mp->mnt_updating); 743 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 744 /* 745 * Temporarily clear the MNT_ASYNC flags so that 746 * bwrite() doesnt convert the sync writes to 747 * delayed writes. 748 */ 749 asyncflag = mp->mnt_flag & MNT_ASYNC; 750 mp->mnt_flag &= ~MNT_ASYNC; 751 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 752 mp->mnt_flag |= asyncflag; 753 } 754 mutex_exit(mp->mnt_updating); 755 } 756 mountlist_iterator_destroy(iter); 757 #ifdef DEBUG 758 if (syncprt) 759 vfs_bufstats(); 760 #endif /* DEBUG */ 761 } 762 763 static bool 764 sync_vnode_filter(void *cookie, vnode_t *vp) 765 { 766 767 if (vp->v_numoutput > 0) { 768 ++*(int *)cookie; 769 } 770 return false; 771 } 772 773 int 774 vfs_syncwait(void) 775 { 776 int nbusy, nbusy_prev, iter; 777 struct vnode_iterator *vniter; 778 mount_iterator_t *mpiter; 779 struct mount *mp; 780 781 for (nbusy_prev = 0, iter = 0; iter < 20;) { 782 nbusy = 0; 783 mountlist_iterator_init(&mpiter); 784 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 785 vnode_t *vp __diagused; 786 vfs_vnode_iterator_init(mp, &vniter); 787 vp = vfs_vnode_iterator_next(vniter, 788 sync_vnode_filter, &nbusy); 789 KASSERT(vp == NULL); 790 vfs_vnode_iterator_destroy(vniter); 791 } 792 mountlist_iterator_destroy(mpiter); 793 794 if (nbusy == 0) 795 break; 796 if (nbusy_prev == 0) 797 nbusy_prev = nbusy; 798 printf("%d ", nbusy); 799 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 800 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 801 iter++; 802 else 803 nbusy_prev = nbusy; 804 } 805 806 if (nbusy) { 807 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 808 printf("giving up\nPrinting vnodes for busy buffers\n"); 809 mountlist_iterator_init(&mpiter); 810 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 811 vnode_t *vp; 812 vfs_vnode_iterator_init(mp, &vniter); 813 vp = vfs_vnode_iterator_next(vniter, 814 NULL, NULL); 815 mutex_enter(vp->v_interlock); 816 if (vp->v_numoutput > 0) 817 vprint(NULL, vp); 818 mutex_exit(vp->v_interlock); 819 vrele(vp); 820 vfs_vnode_iterator_destroy(vniter); 821 } 822 mountlist_iterator_destroy(mpiter); 823 #endif 824 } 825 826 return nbusy; 827 } 828 829 /* ARGSUSED */ 830 int 831 sys_sync(struct lwp *l, const void *v, register_t *retval) 832 { 833 do_sys_sync(l); 834 return (0); 835 } 836 837 838 /* 839 * Access or change filesystem quotas. 840 * 841 * (this is really 14 different calls bundled into one) 842 */ 843 844 static int 845 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 846 { 847 struct quotastat info_k; 848 int error; 849 850 /* ensure any padding bytes are cleared */ 851 memset(&info_k, 0, sizeof(info_k)); 852 853 error = vfs_quotactl_stat(mp, &info_k); 854 if (error) { 855 return error; 856 } 857 858 return copyout(&info_k, info_u, sizeof(info_k)); 859 } 860 861 static int 862 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 863 struct quotaidtypestat *info_u) 864 { 865 struct quotaidtypestat info_k; 866 int error; 867 868 /* ensure any padding bytes are cleared */ 869 memset(&info_k, 0, sizeof(info_k)); 870 871 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 872 if (error) { 873 return error; 874 } 875 876 return copyout(&info_k, info_u, sizeof(info_k)); 877 } 878 879 static int 880 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 881 struct quotaobjtypestat *info_u) 882 { 883 struct quotaobjtypestat info_k; 884 int error; 885 886 /* ensure any padding bytes are cleared */ 887 memset(&info_k, 0, sizeof(info_k)); 888 889 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 890 if (error) { 891 return error; 892 } 893 894 return copyout(&info_k, info_u, sizeof(info_k)); 895 } 896 897 static int 898 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 899 struct quotaval *val_u) 900 { 901 struct quotakey key_k; 902 struct quotaval val_k; 903 int error; 904 905 /* ensure any padding bytes are cleared */ 906 memset(&val_k, 0, sizeof(val_k)); 907 908 error = copyin(key_u, &key_k, sizeof(key_k)); 909 if (error) { 910 return error; 911 } 912 913 error = vfs_quotactl_get(mp, &key_k, &val_k); 914 if (error) { 915 return error; 916 } 917 918 return copyout(&val_k, val_u, sizeof(val_k)); 919 } 920 921 static int 922 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 923 const struct quotaval *val_u) 924 { 925 struct quotakey key_k; 926 struct quotaval val_k; 927 int error; 928 929 error = copyin(key_u, &key_k, sizeof(key_k)); 930 if (error) { 931 return error; 932 } 933 934 error = copyin(val_u, &val_k, sizeof(val_k)); 935 if (error) { 936 return error; 937 } 938 939 return vfs_quotactl_put(mp, &key_k, &val_k); 940 } 941 942 static int 943 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 944 { 945 struct quotakey key_k; 946 int error; 947 948 error = copyin(key_u, &key_k, sizeof(key_k)); 949 if (error) { 950 return error; 951 } 952 953 return vfs_quotactl_del(mp, &key_k); 954 } 955 956 static int 957 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 958 { 959 struct quotakcursor cursor_k; 960 int error; 961 962 /* ensure any padding bytes are cleared */ 963 memset(&cursor_k, 0, sizeof(cursor_k)); 964 965 error = vfs_quotactl_cursoropen(mp, &cursor_k); 966 if (error) { 967 return error; 968 } 969 970 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 971 } 972 973 static int 974 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 975 { 976 struct quotakcursor cursor_k; 977 int error; 978 979 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 980 if (error) { 981 return error; 982 } 983 984 return vfs_quotactl_cursorclose(mp, &cursor_k); 985 } 986 987 static int 988 do_sys_quotactl_cursorskipidtype(struct mount *mp, 989 struct quotakcursor *cursor_u, int idtype) 990 { 991 struct quotakcursor cursor_k; 992 int error; 993 994 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 995 if (error) { 996 return error; 997 } 998 999 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 1000 if (error) { 1001 return error; 1002 } 1003 1004 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1005 } 1006 1007 static int 1008 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 1009 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 1010 unsigned *ret_u) 1011 { 1012 #define CGET_STACK_MAX 8 1013 struct quotakcursor cursor_k; 1014 struct quotakey stackkeys[CGET_STACK_MAX]; 1015 struct quotaval stackvals[CGET_STACK_MAX]; 1016 struct quotakey *keys_k; 1017 struct quotaval *vals_k; 1018 unsigned ret_k; 1019 int error; 1020 1021 if (maxnum > 128) { 1022 maxnum = 128; 1023 } 1024 1025 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1026 if (error) { 1027 return error; 1028 } 1029 1030 if (maxnum <= CGET_STACK_MAX) { 1031 keys_k = stackkeys; 1032 vals_k = stackvals; 1033 /* ensure any padding bytes are cleared */ 1034 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1035 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1036 } else { 1037 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1038 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1039 } 1040 1041 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1042 &ret_k); 1043 if (error) { 1044 goto fail; 1045 } 1046 1047 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1048 if (error) { 1049 goto fail; 1050 } 1051 1052 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1053 if (error) { 1054 goto fail; 1055 } 1056 1057 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1058 if (error) { 1059 goto fail; 1060 } 1061 1062 /* do last to maximize the chance of being able to recover a failure */ 1063 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1064 1065 fail: 1066 if (keys_k != stackkeys) { 1067 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1068 } 1069 if (vals_k != stackvals) { 1070 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1071 } 1072 return error; 1073 } 1074 1075 static int 1076 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1077 int *ret_u) 1078 { 1079 struct quotakcursor cursor_k; 1080 int ret_k; 1081 int error; 1082 1083 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1084 if (error) { 1085 return error; 1086 } 1087 1088 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1089 if (error) { 1090 return error; 1091 } 1092 1093 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1094 if (error) { 1095 return error; 1096 } 1097 1098 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1099 } 1100 1101 static int 1102 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1103 { 1104 struct quotakcursor cursor_k; 1105 int error; 1106 1107 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1108 if (error) { 1109 return error; 1110 } 1111 1112 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1113 if (error) { 1114 return error; 1115 } 1116 1117 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1118 } 1119 1120 static int 1121 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1122 { 1123 char *path_k; 1124 int error; 1125 1126 /* XXX this should probably be a struct pathbuf */ 1127 path_k = PNBUF_GET(); 1128 error = copyin(path_u, path_k, PATH_MAX); 1129 if (error) { 1130 PNBUF_PUT(path_k); 1131 return error; 1132 } 1133 1134 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1135 1136 PNBUF_PUT(path_k); 1137 return error; 1138 } 1139 1140 static int 1141 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1142 { 1143 return vfs_quotactl_quotaoff(mp, idtype); 1144 } 1145 1146 int 1147 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1148 { 1149 struct mount *mp; 1150 struct vnode *vp; 1151 int error; 1152 1153 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1154 if (error != 0) 1155 return (error); 1156 mp = vp->v_mount; 1157 1158 switch (args->qc_op) { 1159 case QUOTACTL_STAT: 1160 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1161 break; 1162 case QUOTACTL_IDTYPESTAT: 1163 error = do_sys_quotactl_idtypestat(mp, 1164 args->u.idtypestat.qc_idtype, 1165 args->u.idtypestat.qc_info); 1166 break; 1167 case QUOTACTL_OBJTYPESTAT: 1168 error = do_sys_quotactl_objtypestat(mp, 1169 args->u.objtypestat.qc_objtype, 1170 args->u.objtypestat.qc_info); 1171 break; 1172 case QUOTACTL_GET: 1173 error = do_sys_quotactl_get(mp, 1174 args->u.get.qc_key, 1175 args->u.get.qc_val); 1176 break; 1177 case QUOTACTL_PUT: 1178 error = do_sys_quotactl_put(mp, 1179 args->u.put.qc_key, 1180 args->u.put.qc_val); 1181 break; 1182 case QUOTACTL_DEL: 1183 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1184 break; 1185 case QUOTACTL_CURSOROPEN: 1186 error = do_sys_quotactl_cursoropen(mp, 1187 args->u.cursoropen.qc_cursor); 1188 break; 1189 case QUOTACTL_CURSORCLOSE: 1190 error = do_sys_quotactl_cursorclose(mp, 1191 args->u.cursorclose.qc_cursor); 1192 break; 1193 case QUOTACTL_CURSORSKIPIDTYPE: 1194 error = do_sys_quotactl_cursorskipidtype(mp, 1195 args->u.cursorskipidtype.qc_cursor, 1196 args->u.cursorskipidtype.qc_idtype); 1197 break; 1198 case QUOTACTL_CURSORGET: 1199 error = do_sys_quotactl_cursorget(mp, 1200 args->u.cursorget.qc_cursor, 1201 args->u.cursorget.qc_keys, 1202 args->u.cursorget.qc_vals, 1203 args->u.cursorget.qc_maxnum, 1204 args->u.cursorget.qc_ret); 1205 break; 1206 case QUOTACTL_CURSORATEND: 1207 error = do_sys_quotactl_cursoratend(mp, 1208 args->u.cursoratend.qc_cursor, 1209 args->u.cursoratend.qc_ret); 1210 break; 1211 case QUOTACTL_CURSORREWIND: 1212 error = do_sys_quotactl_cursorrewind(mp, 1213 args->u.cursorrewind.qc_cursor); 1214 break; 1215 case QUOTACTL_QUOTAON: 1216 error = do_sys_quotactl_quotaon(mp, 1217 args->u.quotaon.qc_idtype, 1218 args->u.quotaon.qc_quotafile); 1219 break; 1220 case QUOTACTL_QUOTAOFF: 1221 error = do_sys_quotactl_quotaoff(mp, 1222 args->u.quotaoff.qc_idtype); 1223 break; 1224 default: 1225 error = EINVAL; 1226 break; 1227 } 1228 1229 vrele(vp); 1230 return error; 1231 } 1232 1233 /* ARGSUSED */ 1234 int 1235 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1236 register_t *retval) 1237 { 1238 /* { 1239 syscallarg(const char *) path; 1240 syscallarg(struct quotactl_args *) args; 1241 } */ 1242 struct quotactl_args args; 1243 int error; 1244 1245 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1246 if (error) { 1247 return error; 1248 } 1249 1250 return do_sys_quotactl(SCARG(uap, path), &args); 1251 } 1252 1253 int 1254 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1255 int root) 1256 { 1257 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1258 bool chrooted; 1259 int error = 0; 1260 1261 KASSERT(l == curlwp); 1262 1263 /* 1264 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1265 * since it would imply chroots can be escaped. Just make sure this 1266 * routine is self-consistent. 1267 */ 1268 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1269 1270 /* 1271 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1272 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1273 * overrides MNT_NOWAIT. 1274 */ 1275 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1276 (flags != MNT_WAIT && flags != 0)) { 1277 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1278 } else { 1279 /* Get the filesystem stats now */ 1280 memset(sp, 0, sizeof(*sp)); 1281 if ((error = VFS_STATVFS(mp, sp)) != 0) 1282 return error; 1283 if (!chrooted) 1284 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1285 } 1286 1287 if (chrooted) { 1288 size_t len; 1289 char *bp; 1290 char c; 1291 char *path = PNBUF_GET(); 1292 1293 bp = path + MAXPATHLEN; 1294 *--bp = '\0'; 1295 rw_enter(&cwdi->cwdi_lock, RW_READER); 1296 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1297 MAXPATHLEN / 2, 0, l); 1298 rw_exit(&cwdi->cwdi_lock); 1299 if (error) { 1300 PNBUF_PUT(path); 1301 return error; 1302 } 1303 len = strlen(bp); 1304 if (len != 1) { 1305 /* 1306 * for mount points that are below our root, we can see 1307 * them, so we fix up the pathname and return them. The 1308 * rest we cannot see, so we don't allow viewing the 1309 * data. 1310 */ 1311 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1312 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1313 (void)strlcpy(sp->f_mntonname, 1314 c == '\0' ? "/" : &sp->f_mntonname[len], 1315 sizeof(sp->f_mntonname)); 1316 } else { 1317 if (root) 1318 (void)strlcpy(sp->f_mntonname, "/", 1319 sizeof(sp->f_mntonname)); 1320 else 1321 error = EPERM; 1322 } 1323 } 1324 PNBUF_PUT(path); 1325 } 1326 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1327 return error; 1328 } 1329 1330 /* 1331 * Get filesystem statistics by path. 1332 */ 1333 int 1334 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1335 { 1336 struct mount *mp; 1337 int error; 1338 struct vnode *vp; 1339 1340 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1341 if (error != 0) 1342 return error; 1343 mp = vp->v_mount; 1344 error = dostatvfs(mp, sb, l, flags, 1); 1345 vrele(vp); 1346 return error; 1347 } 1348 1349 /* ARGSUSED */ 1350 int 1351 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1352 { 1353 /* { 1354 syscallarg(const char *) path; 1355 syscallarg(struct statvfs *) buf; 1356 syscallarg(int) flags; 1357 } */ 1358 struct statvfs *sb; 1359 int error; 1360 1361 sb = STATVFSBUF_GET(); 1362 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1363 if (error == 0) 1364 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1365 STATVFSBUF_PUT(sb); 1366 return error; 1367 } 1368 1369 /* 1370 * Get filesystem statistics by fd. 1371 */ 1372 int 1373 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1374 { 1375 file_t *fp; 1376 struct mount *mp; 1377 int error; 1378 1379 /* fd_getvnode() will use the descriptor for us */ 1380 if ((error = fd_getvnode(fd, &fp)) != 0) 1381 return (error); 1382 mp = fp->f_vnode->v_mount; 1383 error = dostatvfs(mp, sb, curlwp, flags, 1); 1384 fd_putfile(fd); 1385 return error; 1386 } 1387 1388 /* ARGSUSED */ 1389 int 1390 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1391 { 1392 /* { 1393 syscallarg(int) fd; 1394 syscallarg(struct statvfs *) buf; 1395 syscallarg(int) flags; 1396 } */ 1397 struct statvfs *sb; 1398 int error; 1399 1400 sb = STATVFSBUF_GET(); 1401 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1402 if (error == 0) 1403 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1404 STATVFSBUF_PUT(sb); 1405 return error; 1406 } 1407 1408 1409 /* 1410 * Get statistics on all filesystems. 1411 */ 1412 int 1413 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1414 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1415 register_t *retval) 1416 { 1417 int root = 0; 1418 mount_iterator_t *iter; 1419 struct proc *p = l->l_proc; 1420 struct mount *mp; 1421 struct statvfs *sb; 1422 size_t count, maxcount; 1423 int error = 0; 1424 1425 sb = STATVFSBUF_GET(); 1426 maxcount = bufsize / entry_sz; 1427 count = 0; 1428 mountlist_iterator_init(&iter); 1429 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1430 if (sfsp && count < maxcount) { 1431 error = dostatvfs(mp, sb, l, flags, 0); 1432 if (error) { 1433 error = 0; 1434 continue; 1435 } 1436 error = copyfn(sb, sfsp, entry_sz); 1437 if (error) 1438 goto out; 1439 sfsp = (char *)sfsp + entry_sz; 1440 root |= strcmp(sb->f_mntonname, "/") == 0; 1441 } 1442 count++; 1443 } 1444 1445 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1446 /* 1447 * fake a root entry 1448 */ 1449 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1450 sb, l, flags, 1); 1451 if (error != 0) 1452 goto out; 1453 if (sfsp) { 1454 error = copyfn(sb, sfsp, entry_sz); 1455 if (error != 0) 1456 goto out; 1457 } 1458 count++; 1459 } 1460 if (sfsp && count > maxcount) 1461 *retval = maxcount; 1462 else 1463 *retval = count; 1464 out: 1465 mountlist_iterator_destroy(iter); 1466 STATVFSBUF_PUT(sb); 1467 return error; 1468 } 1469 1470 int 1471 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1472 register_t *retval) 1473 { 1474 /* { 1475 syscallarg(struct statvfs *) buf; 1476 syscallarg(size_t) bufsize; 1477 syscallarg(int) flags; 1478 } */ 1479 1480 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1481 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1482 } 1483 1484 /* 1485 * Change current working directory to a given file descriptor. 1486 */ 1487 int 1488 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1489 { 1490 struct proc *p = l->l_proc; 1491 struct cwdinfo *cwdi; 1492 struct vnode *vp, *tdp; 1493 struct mount *mp; 1494 file_t *fp; 1495 int error; 1496 1497 /* fd_getvnode() will use the descriptor for us */ 1498 if ((error = fd_getvnode(fd, &fp)) != 0) 1499 return error; 1500 vp = fp->f_vnode; 1501 1502 vref(vp); 1503 vn_lock(vp, LK_SHARED | LK_RETRY); 1504 if (vp->v_type != VDIR) 1505 error = ENOTDIR; 1506 else 1507 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1508 if (error) { 1509 vput(vp); 1510 goto out; 1511 } 1512 while ((mp = vp->v_mountedhere) != NULL) { 1513 error = vfs_busy(mp); 1514 vput(vp); 1515 if (error != 0) 1516 goto out; 1517 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1518 vfs_unbusy(mp); 1519 if (error) 1520 goto out; 1521 vp = tdp; 1522 } 1523 VOP_UNLOCK(vp); 1524 1525 /* 1526 * Disallow changing to a directory not under the process's 1527 * current root directory (if there is one). 1528 */ 1529 cwdi = p->p_cwdi; 1530 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1531 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1532 vrele(vp); 1533 error = EPERM; /* operation not permitted */ 1534 } else { 1535 vrele(cwdi->cwdi_cdir); 1536 cwdi->cwdi_cdir = vp; 1537 } 1538 rw_exit(&cwdi->cwdi_lock); 1539 1540 out: 1541 fd_putfile(fd); 1542 return error; 1543 } 1544 1545 /* 1546 * Change current working directory to a given file descriptor. 1547 */ 1548 /* ARGSUSED */ 1549 int 1550 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1551 { 1552 /* { 1553 syscallarg(int) fd; 1554 } */ 1555 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1556 } 1557 1558 /* 1559 * Change this process's notion of the root directory to a given file 1560 * descriptor. 1561 */ 1562 int 1563 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1564 { 1565 struct vnode *vp; 1566 file_t *fp; 1567 int error, fd = SCARG(uap, fd); 1568 1569 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1570 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1571 return error; 1572 /* fd_getvnode() will use the descriptor for us */ 1573 if ((error = fd_getvnode(fd, &fp)) != 0) 1574 return error; 1575 vp = fp->f_vnode; 1576 vn_lock(vp, LK_SHARED | LK_RETRY); 1577 if (vp->v_type != VDIR) 1578 error = ENOTDIR; 1579 else 1580 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1581 VOP_UNLOCK(vp); 1582 if (error) 1583 goto out; 1584 vref(vp); 1585 change_root(vp); 1586 1587 out: 1588 fd_putfile(fd); 1589 return (error); 1590 } 1591 1592 /* 1593 * Change current working directory (``.''). 1594 */ 1595 int 1596 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1597 register_t *retval) 1598 { 1599 struct proc *p = l->l_proc; 1600 struct cwdinfo * cwdi; 1601 int error; 1602 struct vnode *vp; 1603 1604 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1605 return error; 1606 cwdi = p->p_cwdi; 1607 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1608 vrele(cwdi->cwdi_cdir); 1609 cwdi->cwdi_cdir = vp; 1610 rw_exit(&cwdi->cwdi_lock); 1611 return 0; 1612 } 1613 1614 /* 1615 * Change current working directory (``.''). 1616 */ 1617 /* ARGSUSED */ 1618 int 1619 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1620 { 1621 /* { 1622 syscallarg(const char *) path; 1623 } */ 1624 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1625 } 1626 1627 /* 1628 * Change notion of root (``/'') directory. 1629 */ 1630 /* ARGSUSED */ 1631 int 1632 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1633 { 1634 /* { 1635 syscallarg(const char *) path; 1636 } */ 1637 int error; 1638 struct vnode *vp; 1639 1640 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1641 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1642 return (error); 1643 1644 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1645 if (error == 0) 1646 change_root(vp); 1647 return error; 1648 } 1649 1650 /* 1651 * Common routine for chroot and fchroot. 1652 * NB: callers need to properly authorize the change root operation. 1653 */ 1654 void 1655 change_root(struct vnode *vp) 1656 { 1657 kauth_cred_t ncred; 1658 struct lwp *l = curlwp; 1659 struct proc *p = l->l_proc; 1660 struct cwdinfo *cwdi = p->p_cwdi; 1661 1662 ncred = kauth_cred_alloc(); 1663 1664 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1665 if (cwdi->cwdi_rdir != NULL) 1666 vrele(cwdi->cwdi_rdir); 1667 cwdi->cwdi_rdir = vp; 1668 1669 /* 1670 * Prevent escaping from chroot by putting the root under 1671 * the working directory. Silently chdir to / if we aren't 1672 * already there. 1673 */ 1674 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1675 /* 1676 * XXX would be more failsafe to change directory to a 1677 * deadfs node here instead 1678 */ 1679 vrele(cwdi->cwdi_cdir); 1680 vref(vp); 1681 cwdi->cwdi_cdir = vp; 1682 } 1683 rw_exit(&cwdi->cwdi_lock); 1684 1685 /* Get a write lock on the process credential. */ 1686 proc_crmod_enter(); 1687 1688 kauth_cred_clone(p->p_cred, ncred); 1689 kauth_proc_chroot(ncred, p->p_cwdi); 1690 1691 /* Broadcast our credentials to the process and other LWPs. */ 1692 proc_crmod_leave(ncred, p->p_cred, true); 1693 } 1694 1695 /* 1696 * Common routine for chroot and chdir. 1697 * XXX "where" should be enum uio_seg 1698 */ 1699 int 1700 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1701 { 1702 struct pathbuf *pb; 1703 struct nameidata nd; 1704 int error; 1705 1706 error = pathbuf_maybe_copyin(path, where, &pb); 1707 if (error) { 1708 return error; 1709 } 1710 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1711 if ((error = namei(&nd)) != 0) { 1712 pathbuf_destroy(pb); 1713 return error; 1714 } 1715 *vpp = nd.ni_vp; 1716 pathbuf_destroy(pb); 1717 1718 if ((*vpp)->v_type != VDIR) 1719 error = ENOTDIR; 1720 else 1721 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1722 1723 if (error) 1724 vput(*vpp); 1725 else 1726 VOP_UNLOCK(*vpp); 1727 return (error); 1728 } 1729 1730 /* 1731 * Internals of sys_open - path has already been converted into a pathbuf 1732 * (so we can easily reuse this function from other parts of the kernel, 1733 * like posix_spawn post-processing). 1734 */ 1735 int 1736 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1737 int open_mode, int *fd) 1738 { 1739 struct proc *p = l->l_proc; 1740 struct cwdinfo *cwdi = p->p_cwdi; 1741 file_t *fp; 1742 struct vnode *vp; 1743 int dupfd; 1744 bool dupfd_move; 1745 int flags, cmode; 1746 int indx, error; 1747 1748 if (open_flags & O_SEARCH) { 1749 open_flags &= ~(int)O_SEARCH; 1750 } 1751 1752 /* 1753 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1754 * may be specified. 1755 */ 1756 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1757 return EINVAL; 1758 1759 flags = FFLAGS(open_flags); 1760 if ((flags & (FREAD | FWRITE)) == 0) 1761 return EINVAL; 1762 1763 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1764 return error; 1765 } 1766 1767 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1768 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1769 1770 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1771 &vp, &dupfd_move, &dupfd); 1772 if (error != 0) { 1773 fd_abort(p, fp, indx); 1774 return error; 1775 } 1776 1777 if (vp == NULL) { 1778 fd_abort(p, fp, indx); 1779 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1780 if (error) 1781 return error; 1782 *fd = indx; 1783 } else { 1784 error = open_setfp(l, fp, vp, indx, flags); 1785 if (error) 1786 return error; 1787 VOP_UNLOCK(vp); 1788 *fd = indx; 1789 fd_affix(p, fp, indx); 1790 } 1791 1792 return 0; 1793 } 1794 1795 int 1796 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1797 { 1798 struct pathbuf *pb; 1799 int error, oflags; 1800 1801 oflags = FFLAGS(open_flags); 1802 if ((oflags & (FREAD | FWRITE)) == 0) 1803 return EINVAL; 1804 1805 pb = pathbuf_create(path); 1806 if (pb == NULL) 1807 return ENOMEM; 1808 1809 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1810 pathbuf_destroy(pb); 1811 1812 return error; 1813 } 1814 1815 static int 1816 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1817 int mode, int *fd) 1818 { 1819 file_t *dfp = NULL; 1820 struct vnode *dvp = NULL; 1821 struct pathbuf *pb; 1822 const char *pathstring = NULL; 1823 int error; 1824 1825 if (path == NULL) { 1826 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1827 if (error == ENOSYS) 1828 goto no_compat; 1829 if (error) 1830 return error; 1831 } else { 1832 no_compat: 1833 error = pathbuf_copyin(path, &pb); 1834 if (error) 1835 return error; 1836 } 1837 1838 pathstring = pathbuf_stringcopy_get(pb); 1839 1840 /* 1841 * fdat is ignored if: 1842 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1843 * 2) if path is absolute, then fdat is useless. 1844 */ 1845 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1846 /* fd_getvnode() will use the descriptor for us */ 1847 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1848 goto out; 1849 1850 dvp = dfp->f_vnode; 1851 } 1852 1853 error = do_open(l, dvp, pb, flags, mode, fd); 1854 1855 if (dfp != NULL) 1856 fd_putfile(fdat); 1857 out: 1858 pathbuf_stringcopy_put(pb, pathstring); 1859 pathbuf_destroy(pb); 1860 return error; 1861 } 1862 1863 int 1864 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1865 { 1866 /* { 1867 syscallarg(const char *) path; 1868 syscallarg(int) flags; 1869 syscallarg(int) mode; 1870 } */ 1871 int error; 1872 int fd; 1873 1874 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1875 SCARG(uap, flags), SCARG(uap, mode), &fd); 1876 1877 if (error == 0) 1878 *retval = fd; 1879 1880 return error; 1881 } 1882 1883 int 1884 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1885 { 1886 /* { 1887 syscallarg(int) fd; 1888 syscallarg(const char *) path; 1889 syscallarg(int) oflags; 1890 syscallarg(int) mode; 1891 } */ 1892 int error; 1893 int fd; 1894 1895 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1896 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1897 1898 if (error == 0) 1899 *retval = fd; 1900 1901 return error; 1902 } 1903 1904 static void 1905 vfs__fhfree(fhandle_t *fhp) 1906 { 1907 size_t fhsize; 1908 1909 fhsize = FHANDLE_SIZE(fhp); 1910 kmem_free(fhp, fhsize); 1911 } 1912 1913 /* 1914 * vfs_composefh: compose a filehandle. 1915 */ 1916 1917 int 1918 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1919 { 1920 struct mount *mp; 1921 struct fid *fidp; 1922 int error; 1923 size_t needfhsize; 1924 size_t fidsize; 1925 1926 mp = vp->v_mount; 1927 fidp = NULL; 1928 if (*fh_size < FHANDLE_SIZE_MIN) { 1929 fidsize = 0; 1930 } else { 1931 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1932 if (fhp != NULL) { 1933 memset(fhp, 0, *fh_size); 1934 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1935 fidp = &fhp->fh_fid; 1936 } 1937 } 1938 error = VFS_VPTOFH(vp, fidp, &fidsize); 1939 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1940 if (error == 0 && *fh_size < needfhsize) { 1941 error = E2BIG; 1942 } 1943 *fh_size = needfhsize; 1944 return error; 1945 } 1946 1947 int 1948 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1949 { 1950 struct mount *mp; 1951 fhandle_t *fhp; 1952 size_t fhsize; 1953 size_t fidsize; 1954 int error; 1955 1956 mp = vp->v_mount; 1957 fidsize = 0; 1958 error = VFS_VPTOFH(vp, NULL, &fidsize); 1959 KASSERT(error != 0); 1960 if (error != E2BIG) { 1961 goto out; 1962 } 1963 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1964 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1965 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1966 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1967 if (error == 0) { 1968 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1969 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1970 *fhpp = fhp; 1971 } else { 1972 kmem_free(fhp, fhsize); 1973 } 1974 out: 1975 return error; 1976 } 1977 1978 void 1979 vfs_composefh_free(fhandle_t *fhp) 1980 { 1981 1982 vfs__fhfree(fhp); 1983 } 1984 1985 /* 1986 * vfs_fhtovp: lookup a vnode by a filehandle. 1987 */ 1988 1989 int 1990 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1991 { 1992 struct mount *mp; 1993 int error; 1994 1995 *vpp = NULL; 1996 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1997 if (mp == NULL) { 1998 error = ESTALE; 1999 goto out; 2000 } 2001 if (mp->mnt_op->vfs_fhtovp == NULL) { 2002 error = EOPNOTSUPP; 2003 goto out; 2004 } 2005 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 2006 out: 2007 return error; 2008 } 2009 2010 /* 2011 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 2012 * the needed size. 2013 */ 2014 2015 int 2016 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2017 { 2018 fhandle_t *fhp; 2019 int error; 2020 2021 if (fhsize > FHANDLE_SIZE_MAX) { 2022 return EINVAL; 2023 } 2024 if (fhsize < FHANDLE_SIZE_MIN) { 2025 return EINVAL; 2026 } 2027 again: 2028 fhp = kmem_alloc(fhsize, KM_SLEEP); 2029 error = copyin(ufhp, fhp, fhsize); 2030 if (error == 0) { 2031 /* XXX this check shouldn't be here */ 2032 if (FHANDLE_SIZE(fhp) == fhsize) { 2033 *fhpp = fhp; 2034 return 0; 2035 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2036 /* 2037 * a kludge for nfsv2 padded handles. 2038 */ 2039 size_t sz; 2040 2041 sz = FHANDLE_SIZE(fhp); 2042 kmem_free(fhp, fhsize); 2043 fhsize = sz; 2044 goto again; 2045 } else { 2046 /* 2047 * userland told us wrong size. 2048 */ 2049 error = EINVAL; 2050 } 2051 } 2052 kmem_free(fhp, fhsize); 2053 return error; 2054 } 2055 2056 void 2057 vfs_copyinfh_free(fhandle_t *fhp) 2058 { 2059 2060 vfs__fhfree(fhp); 2061 } 2062 2063 /* 2064 * Get file handle system call 2065 */ 2066 int 2067 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 2068 { 2069 /* { 2070 syscallarg(char *) fname; 2071 syscallarg(fhandle_t *) fhp; 2072 syscallarg(size_t *) fh_size; 2073 } */ 2074 struct vnode *vp; 2075 fhandle_t *fh; 2076 int error; 2077 struct pathbuf *pb; 2078 struct nameidata nd; 2079 size_t sz; 2080 size_t usz; 2081 2082 /* 2083 * Must be super user 2084 */ 2085 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2086 0, NULL, NULL, NULL); 2087 if (error) 2088 return (error); 2089 2090 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2091 if (error) { 2092 return error; 2093 } 2094 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2095 error = namei(&nd); 2096 if (error) { 2097 pathbuf_destroy(pb); 2098 return error; 2099 } 2100 vp = nd.ni_vp; 2101 pathbuf_destroy(pb); 2102 2103 error = vfs_composefh_alloc(vp, &fh); 2104 vput(vp); 2105 if (error != 0) { 2106 return error; 2107 } 2108 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2109 if (error != 0) { 2110 goto out; 2111 } 2112 sz = FHANDLE_SIZE(fh); 2113 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2114 if (error != 0) { 2115 goto out; 2116 } 2117 if (usz >= sz) { 2118 error = copyout(fh, SCARG(uap, fhp), sz); 2119 } else { 2120 error = E2BIG; 2121 } 2122 out: 2123 vfs_composefh_free(fh); 2124 return (error); 2125 } 2126 2127 /* 2128 * Open a file given a file handle. 2129 * 2130 * Check permissions, allocate an open file structure, 2131 * and call the device open routine if any. 2132 */ 2133 2134 int 2135 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2136 register_t *retval) 2137 { 2138 file_t *fp; 2139 struct vnode *vp = NULL; 2140 kauth_cred_t cred = l->l_cred; 2141 file_t *nfp; 2142 int indx, error; 2143 struct vattr va; 2144 fhandle_t *fh; 2145 int flags; 2146 proc_t *p; 2147 2148 p = curproc; 2149 2150 /* 2151 * Must be super user 2152 */ 2153 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2154 0, NULL, NULL, NULL))) 2155 return (error); 2156 2157 if (oflags & O_SEARCH) { 2158 oflags &= ~(int)O_SEARCH; 2159 } 2160 2161 flags = FFLAGS(oflags); 2162 if ((flags & (FREAD | FWRITE)) == 0) 2163 return (EINVAL); 2164 if ((flags & O_CREAT)) 2165 return (EINVAL); 2166 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2167 return (error); 2168 fp = nfp; 2169 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2170 if (error != 0) { 2171 goto bad; 2172 } 2173 error = vfs_fhtovp(fh, &vp); 2174 vfs_copyinfh_free(fh); 2175 if (error != 0) { 2176 goto bad; 2177 } 2178 2179 /* Now do an effective vn_open */ 2180 2181 if (vp->v_type == VSOCK) { 2182 error = EOPNOTSUPP; 2183 goto bad; 2184 } 2185 error = vn_openchk(vp, cred, flags); 2186 if (error != 0) 2187 goto bad; 2188 if (flags & O_TRUNC) { 2189 VOP_UNLOCK(vp); /* XXX */ 2190 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2191 vattr_null(&va); 2192 va.va_size = 0; 2193 error = VOP_SETATTR(vp, &va, cred); 2194 if (error) 2195 goto bad; 2196 } 2197 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2198 goto bad; 2199 if (flags & FWRITE) { 2200 mutex_enter(vp->v_interlock); 2201 vp->v_writecount++; 2202 mutex_exit(vp->v_interlock); 2203 } 2204 2205 /* done with modified vn_open, now finish what sys_open does. */ 2206 if ((error = open_setfp(l, fp, vp, indx, flags))) 2207 return error; 2208 2209 VOP_UNLOCK(vp); 2210 *retval = indx; 2211 fd_affix(p, fp, indx); 2212 return (0); 2213 2214 bad: 2215 fd_abort(p, fp, indx); 2216 if (vp != NULL) 2217 vput(vp); 2218 if (error == EDUPFD || error == EMOVEFD) { 2219 /* XXX should probably close curlwp->l_dupfd */ 2220 error = EOPNOTSUPP; 2221 } 2222 return (error); 2223 } 2224 2225 int 2226 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2227 { 2228 /* { 2229 syscallarg(const void *) fhp; 2230 syscallarg(size_t) fh_size; 2231 syscallarg(int) flags; 2232 } */ 2233 2234 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2235 SCARG(uap, flags), retval); 2236 } 2237 2238 int 2239 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2240 { 2241 int error; 2242 fhandle_t *fh; 2243 struct vnode *vp; 2244 2245 /* 2246 * Must be super user 2247 */ 2248 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2249 0, NULL, NULL, NULL))) 2250 return (error); 2251 2252 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2253 if (error != 0) 2254 return error; 2255 2256 error = vfs_fhtovp(fh, &vp); 2257 vfs_copyinfh_free(fh); 2258 if (error != 0) 2259 return error; 2260 2261 error = vn_stat(vp, sb); 2262 vput(vp); 2263 return error; 2264 } 2265 2266 2267 /* ARGSUSED */ 2268 int 2269 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2270 { 2271 /* { 2272 syscallarg(const void *) fhp; 2273 syscallarg(size_t) fh_size; 2274 syscallarg(struct stat *) sb; 2275 } */ 2276 struct stat sb; 2277 int error; 2278 2279 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2280 if (error) 2281 return error; 2282 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2283 } 2284 2285 int 2286 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2287 int flags) 2288 { 2289 fhandle_t *fh; 2290 struct mount *mp; 2291 struct vnode *vp; 2292 int error; 2293 2294 /* 2295 * Must be super user 2296 */ 2297 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2298 0, NULL, NULL, NULL))) 2299 return error; 2300 2301 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2302 if (error != 0) 2303 return error; 2304 2305 error = vfs_fhtovp(fh, &vp); 2306 vfs_copyinfh_free(fh); 2307 if (error != 0) 2308 return error; 2309 2310 mp = vp->v_mount; 2311 error = dostatvfs(mp, sb, l, flags, 1); 2312 vput(vp); 2313 return error; 2314 } 2315 2316 /* ARGSUSED */ 2317 int 2318 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2319 { 2320 /* { 2321 syscallarg(const void *) fhp; 2322 syscallarg(size_t) fh_size; 2323 syscallarg(struct statvfs *) buf; 2324 syscallarg(int) flags; 2325 } */ 2326 struct statvfs *sb = STATVFSBUF_GET(); 2327 int error; 2328 2329 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2330 SCARG(uap, flags)); 2331 if (error == 0) 2332 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2333 STATVFSBUF_PUT(sb); 2334 return error; 2335 } 2336 2337 int 2338 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2339 dev_t dev) 2340 { 2341 2342 /* 2343 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2344 * in mode and dev=0. 2345 * 2346 * In all the other cases it's implementation defined behavior. 2347 */ 2348 2349 if ((mode & S_IFIFO) && dev == 0) 2350 return do_sys_mkfifoat(l, fdat, pathname, mode); 2351 else 2352 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2353 UIO_USERSPACE); 2354 } 2355 2356 /* 2357 * Create a special file. 2358 */ 2359 /* ARGSUSED */ 2360 int 2361 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2362 register_t *retval) 2363 { 2364 /* { 2365 syscallarg(const char *) path; 2366 syscallarg(mode_t) mode; 2367 syscallarg(dev_t) dev; 2368 } */ 2369 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2370 SCARG(uap, mode), SCARG(uap, dev)); 2371 } 2372 2373 int 2374 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2375 register_t *retval) 2376 { 2377 /* { 2378 syscallarg(int) fd; 2379 syscallarg(const char *) path; 2380 syscallarg(mode_t) mode; 2381 syscallarg(int) pad; 2382 syscallarg(dev_t) dev; 2383 } */ 2384 2385 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2386 SCARG(uap, mode), SCARG(uap, dev)); 2387 } 2388 2389 int 2390 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2391 enum uio_seg seg) 2392 { 2393 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2394 } 2395 2396 int 2397 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2398 dev_t dev, enum uio_seg seg) 2399 { 2400 struct proc *p = l->l_proc; 2401 struct vnode *vp; 2402 struct vattr vattr; 2403 int error, optype; 2404 struct pathbuf *pb; 2405 struct nameidata nd; 2406 const char *pathstring; 2407 2408 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2409 0, NULL, NULL, NULL)) != 0) 2410 return (error); 2411 2412 optype = VOP_MKNOD_DESCOFFSET; 2413 2414 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2415 if (error) { 2416 return error; 2417 } 2418 pathstring = pathbuf_stringcopy_get(pb); 2419 if (pathstring == NULL) { 2420 pathbuf_destroy(pb); 2421 return ENOMEM; 2422 } 2423 2424 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2425 2426 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2427 goto out; 2428 vp = nd.ni_vp; 2429 2430 if (vp != NULL) 2431 error = EEXIST; 2432 else { 2433 vattr_null(&vattr); 2434 /* We will read cwdi->cwdi_cmask unlocked. */ 2435 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2436 vattr.va_rdev = dev; 2437 2438 switch (mode & S_IFMT) { 2439 case S_IFMT: /* used by badsect to flag bad sectors */ 2440 vattr.va_type = VBAD; 2441 break; 2442 case S_IFCHR: 2443 vattr.va_type = VCHR; 2444 break; 2445 case S_IFBLK: 2446 vattr.va_type = VBLK; 2447 break; 2448 case S_IFWHT: 2449 optype = VOP_WHITEOUT_DESCOFFSET; 2450 break; 2451 case S_IFREG: 2452 #if NVERIEXEC > 0 2453 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2454 O_CREAT); 2455 #endif /* NVERIEXEC > 0 */ 2456 vattr.va_type = VREG; 2457 vattr.va_rdev = VNOVAL; 2458 optype = VOP_CREATE_DESCOFFSET; 2459 break; 2460 default: 2461 error = EINVAL; 2462 break; 2463 } 2464 2465 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2466 vattr.va_rdev == VNOVAL) 2467 error = EINVAL; 2468 } 2469 2470 if (!error) { 2471 switch (optype) { 2472 case VOP_WHITEOUT_DESCOFFSET: 2473 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2474 if (error) 2475 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2476 vput(nd.ni_dvp); 2477 break; 2478 2479 case VOP_MKNOD_DESCOFFSET: 2480 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2481 &nd.ni_cnd, &vattr); 2482 if (error == 0) 2483 vrele(nd.ni_vp); 2484 vput(nd.ni_dvp); 2485 break; 2486 2487 case VOP_CREATE_DESCOFFSET: 2488 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2489 &nd.ni_cnd, &vattr); 2490 if (error == 0) 2491 vrele(nd.ni_vp); 2492 vput(nd.ni_dvp); 2493 break; 2494 } 2495 } else { 2496 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2497 if (nd.ni_dvp == vp) 2498 vrele(nd.ni_dvp); 2499 else 2500 vput(nd.ni_dvp); 2501 if (vp) 2502 vrele(vp); 2503 } 2504 out: 2505 pathbuf_stringcopy_put(pb, pathstring); 2506 pathbuf_destroy(pb); 2507 return (error); 2508 } 2509 2510 /* 2511 * Create a named pipe. 2512 */ 2513 /* ARGSUSED */ 2514 int 2515 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2516 { 2517 /* { 2518 syscallarg(const char *) path; 2519 syscallarg(int) mode; 2520 } */ 2521 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2522 } 2523 2524 int 2525 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2526 register_t *retval) 2527 { 2528 /* { 2529 syscallarg(int) fd; 2530 syscallarg(const char *) path; 2531 syscallarg(int) mode; 2532 } */ 2533 2534 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2535 SCARG(uap, mode)); 2536 } 2537 2538 static int 2539 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2540 { 2541 struct proc *p = l->l_proc; 2542 struct vattr vattr; 2543 int error; 2544 struct pathbuf *pb; 2545 struct nameidata nd; 2546 2547 error = pathbuf_copyin(path, &pb); 2548 if (error) { 2549 return error; 2550 } 2551 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2552 2553 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2554 pathbuf_destroy(pb); 2555 return error; 2556 } 2557 if (nd.ni_vp != NULL) { 2558 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2559 if (nd.ni_dvp == nd.ni_vp) 2560 vrele(nd.ni_dvp); 2561 else 2562 vput(nd.ni_dvp); 2563 vrele(nd.ni_vp); 2564 pathbuf_destroy(pb); 2565 return (EEXIST); 2566 } 2567 vattr_null(&vattr); 2568 vattr.va_type = VFIFO; 2569 /* We will read cwdi->cwdi_cmask unlocked. */ 2570 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2571 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2572 if (error == 0) 2573 vrele(nd.ni_vp); 2574 vput(nd.ni_dvp); 2575 pathbuf_destroy(pb); 2576 return (error); 2577 } 2578 2579 /* 2580 * Make a hard file link. 2581 */ 2582 /* ARGSUSED */ 2583 int 2584 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2585 const char *link, int follow, register_t *retval) 2586 { 2587 struct vnode *vp; 2588 struct pathbuf *linkpb; 2589 struct nameidata nd; 2590 namei_simple_flags_t ns_flags; 2591 int error; 2592 2593 if (follow & AT_SYMLINK_FOLLOW) 2594 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2595 else 2596 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2597 2598 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2599 if (error != 0) 2600 return (error); 2601 error = pathbuf_copyin(link, &linkpb); 2602 if (error) { 2603 goto out1; 2604 } 2605 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2606 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2607 goto out2; 2608 if (nd.ni_vp) { 2609 error = EEXIST; 2610 goto abortop; 2611 } 2612 /* Prevent hard links on directories. */ 2613 if (vp->v_type == VDIR) { 2614 error = EPERM; 2615 goto abortop; 2616 } 2617 /* Prevent cross-mount operation. */ 2618 if (nd.ni_dvp->v_mount != vp->v_mount) { 2619 error = EXDEV; 2620 goto abortop; 2621 } 2622 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2623 VOP_UNLOCK(nd.ni_dvp); 2624 vrele(nd.ni_dvp); 2625 out2: 2626 pathbuf_destroy(linkpb); 2627 out1: 2628 vrele(vp); 2629 return (error); 2630 abortop: 2631 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2632 if (nd.ni_dvp == nd.ni_vp) 2633 vrele(nd.ni_dvp); 2634 else 2635 vput(nd.ni_dvp); 2636 if (nd.ni_vp != NULL) 2637 vrele(nd.ni_vp); 2638 goto out2; 2639 } 2640 2641 int 2642 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2643 { 2644 /* { 2645 syscallarg(const char *) path; 2646 syscallarg(const char *) link; 2647 } */ 2648 const char *path = SCARG(uap, path); 2649 const char *link = SCARG(uap, link); 2650 2651 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2652 AT_SYMLINK_FOLLOW, retval); 2653 } 2654 2655 int 2656 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2657 register_t *retval) 2658 { 2659 /* { 2660 syscallarg(int) fd1; 2661 syscallarg(const char *) name1; 2662 syscallarg(int) fd2; 2663 syscallarg(const char *) name2; 2664 syscallarg(int) flags; 2665 } */ 2666 int fd1 = SCARG(uap, fd1); 2667 const char *name1 = SCARG(uap, name1); 2668 int fd2 = SCARG(uap, fd2); 2669 const char *name2 = SCARG(uap, name2); 2670 int follow; 2671 2672 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2673 2674 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2675 } 2676 2677 2678 int 2679 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2680 { 2681 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2682 } 2683 2684 static int 2685 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2686 const char *link, enum uio_seg seg) 2687 { 2688 struct proc *p = curproc; 2689 struct vattr vattr; 2690 char *path; 2691 int error; 2692 size_t len; 2693 struct pathbuf *linkpb; 2694 struct nameidata nd; 2695 2696 KASSERT(l != NULL || fdat == AT_FDCWD); 2697 2698 path = PNBUF_GET(); 2699 if (seg == UIO_USERSPACE) { 2700 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2701 goto out1; 2702 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2703 goto out1; 2704 } else { 2705 len = strlen(patharg) + 1; 2706 KASSERT(len <= MAXPATHLEN); 2707 memcpy(path, patharg, len); 2708 linkpb = pathbuf_create(link); 2709 if (linkpb == NULL) { 2710 error = ENOMEM; 2711 goto out1; 2712 } 2713 } 2714 ktrkuser("symlink-target", path, len - 1); 2715 2716 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2717 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2718 goto out2; 2719 if (nd.ni_vp) { 2720 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2721 if (nd.ni_dvp == nd.ni_vp) 2722 vrele(nd.ni_dvp); 2723 else 2724 vput(nd.ni_dvp); 2725 vrele(nd.ni_vp); 2726 error = EEXIST; 2727 goto out2; 2728 } 2729 vattr_null(&vattr); 2730 vattr.va_type = VLNK; 2731 /* We will read cwdi->cwdi_cmask unlocked. */ 2732 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2733 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2734 if (error == 0) 2735 vrele(nd.ni_vp); 2736 vput(nd.ni_dvp); 2737 out2: 2738 pathbuf_destroy(linkpb); 2739 out1: 2740 PNBUF_PUT(path); 2741 return (error); 2742 } 2743 2744 /* 2745 * Make a symbolic link. 2746 */ 2747 /* ARGSUSED */ 2748 int 2749 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2750 { 2751 /* { 2752 syscallarg(const char *) path; 2753 syscallarg(const char *) link; 2754 } */ 2755 2756 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2757 UIO_USERSPACE); 2758 } 2759 2760 int 2761 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2762 register_t *retval) 2763 { 2764 /* { 2765 syscallarg(const char *) path1; 2766 syscallarg(int) fd; 2767 syscallarg(const char *) path2; 2768 } */ 2769 2770 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2771 SCARG(uap, path2), UIO_USERSPACE); 2772 } 2773 2774 /* 2775 * Delete a whiteout from the filesystem. 2776 */ 2777 /* ARGSUSED */ 2778 int 2779 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2780 { 2781 /* { 2782 syscallarg(const char *) path; 2783 } */ 2784 int error; 2785 struct pathbuf *pb; 2786 struct nameidata nd; 2787 2788 error = pathbuf_copyin(SCARG(uap, path), &pb); 2789 if (error) { 2790 return error; 2791 } 2792 2793 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2794 error = namei(&nd); 2795 if (error) { 2796 pathbuf_destroy(pb); 2797 return (error); 2798 } 2799 2800 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2801 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2802 if (nd.ni_dvp == nd.ni_vp) 2803 vrele(nd.ni_dvp); 2804 else 2805 vput(nd.ni_dvp); 2806 if (nd.ni_vp) 2807 vrele(nd.ni_vp); 2808 pathbuf_destroy(pb); 2809 return (EEXIST); 2810 } 2811 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2812 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2813 vput(nd.ni_dvp); 2814 pathbuf_destroy(pb); 2815 return (error); 2816 } 2817 2818 /* 2819 * Delete a name from the filesystem. 2820 */ 2821 /* ARGSUSED */ 2822 int 2823 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2824 { 2825 /* { 2826 syscallarg(const char *) path; 2827 } */ 2828 2829 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2830 } 2831 2832 int 2833 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2834 register_t *retval) 2835 { 2836 /* { 2837 syscallarg(int) fd; 2838 syscallarg(const char *) path; 2839 syscallarg(int) flag; 2840 } */ 2841 2842 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2843 SCARG(uap, flag), UIO_USERSPACE); 2844 } 2845 2846 int 2847 do_sys_unlink(const char *arg, enum uio_seg seg) 2848 { 2849 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2850 } 2851 2852 static int 2853 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2854 enum uio_seg seg) 2855 { 2856 struct vnode *vp; 2857 int error; 2858 struct pathbuf *pb; 2859 struct nameidata nd; 2860 const char *pathstring; 2861 2862 KASSERT(l != NULL || fdat == AT_FDCWD); 2863 2864 error = pathbuf_maybe_copyin(arg, seg, &pb); 2865 if (error) { 2866 return error; 2867 } 2868 pathstring = pathbuf_stringcopy_get(pb); 2869 if (pathstring == NULL) { 2870 pathbuf_destroy(pb); 2871 return ENOMEM; 2872 } 2873 2874 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2875 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2876 goto out; 2877 vp = nd.ni_vp; 2878 2879 /* 2880 * The root of a mounted filesystem cannot be deleted. 2881 */ 2882 if ((vp->v_vflag & VV_ROOT) != 0) { 2883 error = EBUSY; 2884 goto abort; 2885 } 2886 2887 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2888 error = EBUSY; 2889 goto abort; 2890 } 2891 2892 /* 2893 * No rmdir "." please. 2894 */ 2895 if (nd.ni_dvp == vp) { 2896 error = EINVAL; 2897 goto abort; 2898 } 2899 2900 /* 2901 * AT_REMOVEDIR is required to remove a directory 2902 */ 2903 if (vp->v_type == VDIR) { 2904 if (!(flags & AT_REMOVEDIR)) { 2905 error = EPERM; 2906 goto abort; 2907 } else { 2908 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2909 vput(nd.ni_dvp); 2910 goto out; 2911 } 2912 } 2913 2914 /* 2915 * Starting here we only deal with non directories. 2916 */ 2917 if (flags & AT_REMOVEDIR) { 2918 error = ENOTDIR; 2919 goto abort; 2920 } 2921 2922 #if NVERIEXEC > 0 2923 /* Handle remove requests for veriexec entries. */ 2924 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2925 goto abort; 2926 } 2927 #endif /* NVERIEXEC > 0 */ 2928 2929 #ifdef FILEASSOC 2930 (void)fileassoc_file_delete(vp); 2931 #endif /* FILEASSOC */ 2932 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2933 vput(nd.ni_dvp); 2934 goto out; 2935 2936 abort: 2937 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2938 if (nd.ni_dvp == vp) 2939 vrele(nd.ni_dvp); 2940 else 2941 vput(nd.ni_dvp); 2942 vput(vp); 2943 2944 out: 2945 pathbuf_stringcopy_put(pb, pathstring); 2946 pathbuf_destroy(pb); 2947 return (error); 2948 } 2949 2950 /* 2951 * Reposition read/write file offset. 2952 */ 2953 int 2954 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2955 { 2956 /* { 2957 syscallarg(int) fd; 2958 syscallarg(int) pad; 2959 syscallarg(off_t) offset; 2960 syscallarg(int) whence; 2961 } */ 2962 file_t *fp; 2963 int error, fd; 2964 2965 switch (SCARG(uap, whence)) { 2966 case SEEK_CUR: 2967 case SEEK_END: 2968 case SEEK_SET: 2969 break; 2970 default: 2971 return EINVAL; 2972 } 2973 2974 fd = SCARG(uap, fd); 2975 2976 if ((fp = fd_getfile(fd)) == NULL) 2977 return (EBADF); 2978 2979 if (fp->f_ops->fo_seek == NULL) { 2980 error = ESPIPE; 2981 goto out; 2982 } 2983 2984 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 2985 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 2986 out: 2987 fd_putfile(fd); 2988 return (error); 2989 } 2990 2991 /* 2992 * Positional read system call. 2993 */ 2994 int 2995 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2996 { 2997 /* { 2998 syscallarg(int) fd; 2999 syscallarg(void *) buf; 3000 syscallarg(size_t) nbyte; 3001 syscallarg(off_t) offset; 3002 } */ 3003 file_t *fp; 3004 off_t offset; 3005 int error, fd = SCARG(uap, fd); 3006 3007 if ((fp = fd_getfile(fd)) == NULL) 3008 return (EBADF); 3009 3010 if ((fp->f_flag & FREAD) == 0) { 3011 fd_putfile(fd); 3012 return (EBADF); 3013 } 3014 3015 if (fp->f_ops->fo_seek == NULL) { 3016 error = ESPIPE; 3017 goto out; 3018 } 3019 3020 offset = SCARG(uap, offset); 3021 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3022 if (error) 3023 goto out; 3024 3025 /* dofileread() will unuse the descriptor for us */ 3026 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3027 &offset, 0, retval)); 3028 3029 out: 3030 fd_putfile(fd); 3031 return (error); 3032 } 3033 3034 /* 3035 * Positional scatter read system call. 3036 */ 3037 int 3038 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 3039 { 3040 /* { 3041 syscallarg(int) fd; 3042 syscallarg(const struct iovec *) iovp; 3043 syscallarg(int) iovcnt; 3044 syscallarg(off_t) offset; 3045 } */ 3046 off_t offset = SCARG(uap, offset); 3047 3048 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3049 SCARG(uap, iovcnt), &offset, 0, retval); 3050 } 3051 3052 /* 3053 * Positional write system call. 3054 */ 3055 int 3056 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 3057 { 3058 /* { 3059 syscallarg(int) fd; 3060 syscallarg(const void *) buf; 3061 syscallarg(size_t) nbyte; 3062 syscallarg(off_t) offset; 3063 } */ 3064 file_t *fp; 3065 off_t offset; 3066 int error, fd = SCARG(uap, fd); 3067 3068 if ((fp = fd_getfile(fd)) == NULL) 3069 return (EBADF); 3070 3071 if ((fp->f_flag & FWRITE) == 0) { 3072 fd_putfile(fd); 3073 return (EBADF); 3074 } 3075 3076 if (fp->f_ops->fo_seek == NULL) { 3077 error = ESPIPE; 3078 goto out; 3079 } 3080 3081 offset = SCARG(uap, offset); 3082 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3083 if (error) 3084 goto out; 3085 3086 /* dofilewrite() will unuse the descriptor for us */ 3087 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3088 &offset, 0, retval)); 3089 3090 out: 3091 fd_putfile(fd); 3092 return (error); 3093 } 3094 3095 /* 3096 * Positional gather write system call. 3097 */ 3098 int 3099 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3100 { 3101 /* { 3102 syscallarg(int) fd; 3103 syscallarg(const struct iovec *) iovp; 3104 syscallarg(int) iovcnt; 3105 syscallarg(off_t) offset; 3106 } */ 3107 off_t offset = SCARG(uap, offset); 3108 3109 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3110 SCARG(uap, iovcnt), &offset, 0, retval); 3111 } 3112 3113 /* 3114 * Check access permissions. 3115 */ 3116 int 3117 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3118 { 3119 /* { 3120 syscallarg(const char *) path; 3121 syscallarg(int) flags; 3122 } */ 3123 3124 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3125 SCARG(uap, flags), 0); 3126 } 3127 3128 int 3129 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3130 int mode, int flags) 3131 { 3132 kauth_cred_t cred; 3133 struct vnode *vp; 3134 int error, nd_flag, vmode; 3135 struct pathbuf *pb; 3136 struct nameidata nd; 3137 3138 CTASSERT(F_OK == 0); 3139 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3140 /* nonsense mode */ 3141 return EINVAL; 3142 } 3143 3144 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3145 if (flags & AT_SYMLINK_NOFOLLOW) 3146 nd_flag &= ~FOLLOW; 3147 3148 error = pathbuf_copyin(path, &pb); 3149 if (error) 3150 return error; 3151 3152 NDINIT(&nd, LOOKUP, nd_flag, pb); 3153 3154 /* Override default credentials */ 3155 if (!(flags & AT_EACCESS)) { 3156 cred = kauth_cred_dup(l->l_cred); 3157 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3158 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3159 } else 3160 cred = l->l_cred; 3161 nd.ni_cnd.cn_cred = cred; 3162 3163 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3164 pathbuf_destroy(pb); 3165 goto out; 3166 } 3167 vp = nd.ni_vp; 3168 pathbuf_destroy(pb); 3169 3170 /* Flags == 0 means only check for existence. */ 3171 if (mode) { 3172 vmode = 0; 3173 if (mode & R_OK) 3174 vmode |= VREAD; 3175 if (mode & W_OK) 3176 vmode |= VWRITE; 3177 if (mode & X_OK) 3178 vmode |= VEXEC; 3179 3180 error = VOP_ACCESS(vp, vmode, cred); 3181 if (!error && (vmode & VWRITE)) 3182 error = vn_writechk(vp); 3183 } 3184 vput(vp); 3185 out: 3186 if (!(flags & AT_EACCESS)) 3187 kauth_cred_free(cred); 3188 return (error); 3189 } 3190 3191 int 3192 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3193 register_t *retval) 3194 { 3195 /* { 3196 syscallarg(int) fd; 3197 syscallarg(const char *) path; 3198 syscallarg(int) amode; 3199 syscallarg(int) flag; 3200 } */ 3201 3202 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3203 SCARG(uap, amode), SCARG(uap, flag)); 3204 } 3205 3206 /* 3207 * Common code for all sys_stat functions, including compat versions. 3208 */ 3209 int 3210 do_sys_stat(const char *userpath, unsigned int nd_flag, 3211 struct stat *sb) 3212 { 3213 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3214 } 3215 3216 int 3217 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3218 unsigned int nd_flag, struct stat *sb) 3219 { 3220 int error; 3221 struct pathbuf *pb; 3222 struct nameidata nd; 3223 3224 KASSERT(l != NULL || fdat == AT_FDCWD); 3225 3226 error = pathbuf_copyin(userpath, &pb); 3227 if (error) { 3228 return error; 3229 } 3230 3231 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3232 3233 error = fd_nameiat(l, fdat, &nd); 3234 if (error != 0) { 3235 pathbuf_destroy(pb); 3236 return error; 3237 } 3238 error = vn_stat(nd.ni_vp, sb); 3239 vput(nd.ni_vp); 3240 pathbuf_destroy(pb); 3241 return error; 3242 } 3243 3244 /* 3245 * Get file status; this version follows links. 3246 */ 3247 /* ARGSUSED */ 3248 int 3249 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3250 { 3251 /* { 3252 syscallarg(const char *) path; 3253 syscallarg(struct stat *) ub; 3254 } */ 3255 struct stat sb; 3256 int error; 3257 3258 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3259 if (error) 3260 return error; 3261 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3262 } 3263 3264 /* 3265 * Get file status; this version does not follow links. 3266 */ 3267 /* ARGSUSED */ 3268 int 3269 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3270 { 3271 /* { 3272 syscallarg(const char *) path; 3273 syscallarg(struct stat *) ub; 3274 } */ 3275 struct stat sb; 3276 int error; 3277 3278 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3279 if (error) 3280 return error; 3281 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3282 } 3283 3284 int 3285 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3286 register_t *retval) 3287 { 3288 /* { 3289 syscallarg(int) fd; 3290 syscallarg(const char *) path; 3291 syscallarg(struct stat *) buf; 3292 syscallarg(int) flag; 3293 } */ 3294 unsigned int nd_flag; 3295 struct stat sb; 3296 int error; 3297 3298 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3299 nd_flag = NOFOLLOW; 3300 else 3301 nd_flag = FOLLOW; 3302 3303 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3304 &sb); 3305 if (error) 3306 return error; 3307 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3308 } 3309 3310 static int 3311 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3312 { 3313 int error; 3314 struct pathbuf *pb; 3315 struct nameidata nd; 3316 3317 error = pathbuf_copyin(path, &pb); 3318 if (error) { 3319 return error; 3320 } 3321 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3322 if ((error = namei(&nd)) != 0) { 3323 pathbuf_destroy(pb); 3324 return error; 3325 } 3326 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3327 vput(nd.ni_vp); 3328 pathbuf_destroy(pb); 3329 return error; 3330 } 3331 3332 /* 3333 * Get configurable pathname variables. 3334 */ 3335 /* ARGSUSED */ 3336 int 3337 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3338 register_t *retval) 3339 { 3340 /* { 3341 syscallarg(const char *) path; 3342 syscallarg(int) name; 3343 } */ 3344 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3345 FOLLOW); 3346 } 3347 3348 /* ARGSUSED */ 3349 int 3350 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3351 register_t *retval) 3352 { 3353 /* { 3354 syscallarg(const char *) path; 3355 syscallarg(int) name; 3356 } */ 3357 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3358 NOFOLLOW); 3359 } 3360 3361 /* 3362 * Return target name of a symbolic link. 3363 */ 3364 /* ARGSUSED */ 3365 int 3366 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3367 register_t *retval) 3368 { 3369 /* { 3370 syscallarg(const char *) path; 3371 syscallarg(char *) buf; 3372 syscallarg(size_t) count; 3373 } */ 3374 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3375 SCARG(uap, buf), SCARG(uap, count), retval); 3376 } 3377 3378 static int 3379 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3380 size_t count, register_t *retval) 3381 { 3382 struct vnode *vp; 3383 struct iovec aiov; 3384 struct uio auio; 3385 int error; 3386 struct pathbuf *pb; 3387 struct nameidata nd; 3388 3389 error = pathbuf_copyin(path, &pb); 3390 if (error) { 3391 return error; 3392 } 3393 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3394 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3395 pathbuf_destroy(pb); 3396 return error; 3397 } 3398 vp = nd.ni_vp; 3399 pathbuf_destroy(pb); 3400 if (vp->v_type != VLNK) 3401 error = EINVAL; 3402 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3403 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3404 aiov.iov_base = buf; 3405 aiov.iov_len = count; 3406 auio.uio_iov = &aiov; 3407 auio.uio_iovcnt = 1; 3408 auio.uio_offset = 0; 3409 auio.uio_rw = UIO_READ; 3410 KASSERT(l == curlwp); 3411 auio.uio_vmspace = l->l_proc->p_vmspace; 3412 auio.uio_resid = count; 3413 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3414 *retval = count - auio.uio_resid; 3415 } 3416 vput(vp); 3417 return (error); 3418 } 3419 3420 int 3421 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3422 register_t *retval) 3423 { 3424 /* { 3425 syscallarg(int) fd; 3426 syscallarg(const char *) path; 3427 syscallarg(char *) buf; 3428 syscallarg(size_t) bufsize; 3429 } */ 3430 3431 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3432 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3433 } 3434 3435 /* 3436 * Change flags of a file given a path name. 3437 */ 3438 /* ARGSUSED */ 3439 int 3440 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3441 { 3442 /* { 3443 syscallarg(const char *) path; 3444 syscallarg(u_long) flags; 3445 } */ 3446 struct vnode *vp; 3447 int error; 3448 3449 error = namei_simple_user(SCARG(uap, path), 3450 NSM_FOLLOW_TRYEMULROOT, &vp); 3451 if (error != 0) 3452 return (error); 3453 error = change_flags(vp, SCARG(uap, flags), l); 3454 vput(vp); 3455 return (error); 3456 } 3457 3458 /* 3459 * Change flags of a file given a file descriptor. 3460 */ 3461 /* ARGSUSED */ 3462 int 3463 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3464 { 3465 /* { 3466 syscallarg(int) fd; 3467 syscallarg(u_long) flags; 3468 } */ 3469 struct vnode *vp; 3470 file_t *fp; 3471 int error; 3472 3473 /* fd_getvnode() will use the descriptor for us */ 3474 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3475 return (error); 3476 vp = fp->f_vnode; 3477 error = change_flags(vp, SCARG(uap, flags), l); 3478 VOP_UNLOCK(vp); 3479 fd_putfile(SCARG(uap, fd)); 3480 return (error); 3481 } 3482 3483 /* 3484 * Change flags of a file given a path name; this version does 3485 * not follow links. 3486 */ 3487 int 3488 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3489 { 3490 /* { 3491 syscallarg(const char *) path; 3492 syscallarg(u_long) flags; 3493 } */ 3494 struct vnode *vp; 3495 int error; 3496 3497 error = namei_simple_user(SCARG(uap, path), 3498 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3499 if (error != 0) 3500 return (error); 3501 error = change_flags(vp, SCARG(uap, flags), l); 3502 vput(vp); 3503 return (error); 3504 } 3505 3506 /* 3507 * Common routine to change flags of a file. 3508 */ 3509 int 3510 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3511 { 3512 struct vattr vattr; 3513 int error; 3514 3515 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3516 3517 vattr_null(&vattr); 3518 vattr.va_flags = flags; 3519 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3520 3521 return (error); 3522 } 3523 3524 /* 3525 * Change mode of a file given path name; this version follows links. 3526 */ 3527 /* ARGSUSED */ 3528 int 3529 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3530 { 3531 /* { 3532 syscallarg(const char *) path; 3533 syscallarg(int) mode; 3534 } */ 3535 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3536 SCARG(uap, mode), 0); 3537 } 3538 3539 int 3540 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3541 { 3542 int error; 3543 struct vnode *vp; 3544 namei_simple_flags_t ns_flag; 3545 3546 if (flags & AT_SYMLINK_NOFOLLOW) 3547 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3548 else 3549 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3550 3551 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3552 if (error != 0) 3553 return error; 3554 3555 error = change_mode(vp, mode, l); 3556 3557 vrele(vp); 3558 3559 return (error); 3560 } 3561 3562 /* 3563 * Change mode of a file given a file descriptor. 3564 */ 3565 /* ARGSUSED */ 3566 int 3567 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3568 { 3569 /* { 3570 syscallarg(int) fd; 3571 syscallarg(int) mode; 3572 } */ 3573 file_t *fp; 3574 int error; 3575 3576 /* fd_getvnode() will use the descriptor for us */ 3577 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3578 return (error); 3579 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3580 fd_putfile(SCARG(uap, fd)); 3581 return (error); 3582 } 3583 3584 int 3585 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3586 register_t *retval) 3587 { 3588 /* { 3589 syscallarg(int) fd; 3590 syscallarg(const char *) path; 3591 syscallarg(int) mode; 3592 syscallarg(int) flag; 3593 } */ 3594 3595 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3596 SCARG(uap, mode), SCARG(uap, flag)); 3597 } 3598 3599 /* 3600 * Change mode of a file given path name; this version does not follow links. 3601 */ 3602 /* ARGSUSED */ 3603 int 3604 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3605 { 3606 /* { 3607 syscallarg(const char *) path; 3608 syscallarg(int) mode; 3609 } */ 3610 int error; 3611 struct vnode *vp; 3612 3613 error = namei_simple_user(SCARG(uap, path), 3614 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3615 if (error != 0) 3616 return (error); 3617 3618 error = change_mode(vp, SCARG(uap, mode), l); 3619 3620 vrele(vp); 3621 return (error); 3622 } 3623 3624 /* 3625 * Common routine to set mode given a vnode. 3626 */ 3627 static int 3628 change_mode(struct vnode *vp, int mode, struct lwp *l) 3629 { 3630 struct vattr vattr; 3631 int error; 3632 3633 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3634 vattr_null(&vattr); 3635 vattr.va_mode = mode & ALLPERMS; 3636 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3637 VOP_UNLOCK(vp); 3638 return (error); 3639 } 3640 3641 /* 3642 * Set ownership given a path name; this version follows links. 3643 */ 3644 /* ARGSUSED */ 3645 int 3646 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3647 { 3648 /* { 3649 syscallarg(const char *) path; 3650 syscallarg(uid_t) uid; 3651 syscallarg(gid_t) gid; 3652 } */ 3653 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3654 SCARG(uap, gid), 0); 3655 } 3656 3657 int 3658 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3659 gid_t gid, int flags) 3660 { 3661 int error; 3662 struct vnode *vp; 3663 namei_simple_flags_t ns_flag; 3664 3665 if (flags & AT_SYMLINK_NOFOLLOW) 3666 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3667 else 3668 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3669 3670 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3671 if (error != 0) 3672 return error; 3673 3674 error = change_owner(vp, uid, gid, l, 0); 3675 3676 vrele(vp); 3677 3678 return (error); 3679 } 3680 3681 /* 3682 * Set ownership given a path name; this version follows links. 3683 * Provides POSIX semantics. 3684 */ 3685 /* ARGSUSED */ 3686 int 3687 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3688 { 3689 /* { 3690 syscallarg(const char *) path; 3691 syscallarg(uid_t) uid; 3692 syscallarg(gid_t) gid; 3693 } */ 3694 int error; 3695 struct vnode *vp; 3696 3697 error = namei_simple_user(SCARG(uap, path), 3698 NSM_FOLLOW_TRYEMULROOT, &vp); 3699 if (error != 0) 3700 return (error); 3701 3702 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3703 3704 vrele(vp); 3705 return (error); 3706 } 3707 3708 /* 3709 * Set ownership given a file descriptor. 3710 */ 3711 /* ARGSUSED */ 3712 int 3713 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3714 { 3715 /* { 3716 syscallarg(int) fd; 3717 syscallarg(uid_t) uid; 3718 syscallarg(gid_t) gid; 3719 } */ 3720 int error; 3721 file_t *fp; 3722 3723 /* fd_getvnode() will use the descriptor for us */ 3724 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3725 return (error); 3726 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3727 l, 0); 3728 fd_putfile(SCARG(uap, fd)); 3729 return (error); 3730 } 3731 3732 int 3733 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3734 register_t *retval) 3735 { 3736 /* { 3737 syscallarg(int) fd; 3738 syscallarg(const char *) path; 3739 syscallarg(uid_t) owner; 3740 syscallarg(gid_t) group; 3741 syscallarg(int) flag; 3742 } */ 3743 3744 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3745 SCARG(uap, owner), SCARG(uap, group), 3746 SCARG(uap, flag)); 3747 } 3748 3749 /* 3750 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3751 */ 3752 /* ARGSUSED */ 3753 int 3754 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3755 { 3756 /* { 3757 syscallarg(int) fd; 3758 syscallarg(uid_t) uid; 3759 syscallarg(gid_t) gid; 3760 } */ 3761 int error; 3762 file_t *fp; 3763 3764 /* fd_getvnode() will use the descriptor for us */ 3765 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3766 return (error); 3767 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3768 l, 1); 3769 fd_putfile(SCARG(uap, fd)); 3770 return (error); 3771 } 3772 3773 /* 3774 * Set ownership given a path name; this version does not follow links. 3775 */ 3776 /* ARGSUSED */ 3777 int 3778 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3779 { 3780 /* { 3781 syscallarg(const char *) path; 3782 syscallarg(uid_t) uid; 3783 syscallarg(gid_t) gid; 3784 } */ 3785 int error; 3786 struct vnode *vp; 3787 3788 error = namei_simple_user(SCARG(uap, path), 3789 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3790 if (error != 0) 3791 return (error); 3792 3793 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3794 3795 vrele(vp); 3796 return (error); 3797 } 3798 3799 /* 3800 * Set ownership given a path name; this version does not follow links. 3801 * Provides POSIX/XPG semantics. 3802 */ 3803 /* ARGSUSED */ 3804 int 3805 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3806 { 3807 /* { 3808 syscallarg(const char *) path; 3809 syscallarg(uid_t) uid; 3810 syscallarg(gid_t) gid; 3811 } */ 3812 int error; 3813 struct vnode *vp; 3814 3815 error = namei_simple_user(SCARG(uap, path), 3816 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3817 if (error != 0) 3818 return (error); 3819 3820 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3821 3822 vrele(vp); 3823 return (error); 3824 } 3825 3826 /* 3827 * Common routine to set ownership given a vnode. 3828 */ 3829 static int 3830 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3831 int posix_semantics) 3832 { 3833 struct vattr vattr; 3834 mode_t newmode; 3835 int error; 3836 3837 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3838 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3839 goto out; 3840 3841 #define CHANGED(x) ((int)(x) != -1) 3842 newmode = vattr.va_mode; 3843 if (posix_semantics) { 3844 /* 3845 * POSIX/XPG semantics: if the caller is not the super-user, 3846 * clear set-user-id and set-group-id bits. Both POSIX and 3847 * the XPG consider the behaviour for calls by the super-user 3848 * implementation-defined; we leave the set-user-id and set- 3849 * group-id settings intact in that case. 3850 */ 3851 if (vattr.va_mode & S_ISUID) { 3852 if (kauth_authorize_vnode(l->l_cred, 3853 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3854 newmode &= ~S_ISUID; 3855 } 3856 if (vattr.va_mode & S_ISGID) { 3857 if (kauth_authorize_vnode(l->l_cred, 3858 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3859 newmode &= ~S_ISGID; 3860 } 3861 } else { 3862 /* 3863 * NetBSD semantics: when changing owner and/or group, 3864 * clear the respective bit(s). 3865 */ 3866 if (CHANGED(uid)) 3867 newmode &= ~S_ISUID; 3868 if (CHANGED(gid)) 3869 newmode &= ~S_ISGID; 3870 } 3871 /* Update va_mode iff altered. */ 3872 if (vattr.va_mode == newmode) 3873 newmode = VNOVAL; 3874 3875 vattr_null(&vattr); 3876 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3877 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3878 vattr.va_mode = newmode; 3879 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3880 #undef CHANGED 3881 3882 out: 3883 VOP_UNLOCK(vp); 3884 return (error); 3885 } 3886 3887 /* 3888 * Set the access and modification times given a path name; this 3889 * version follows links. 3890 */ 3891 /* ARGSUSED */ 3892 int 3893 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3894 register_t *retval) 3895 { 3896 /* { 3897 syscallarg(const char *) path; 3898 syscallarg(const struct timeval *) tptr; 3899 } */ 3900 3901 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3902 SCARG(uap, tptr), UIO_USERSPACE); 3903 } 3904 3905 /* 3906 * Set the access and modification times given a file descriptor. 3907 */ 3908 /* ARGSUSED */ 3909 int 3910 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3911 register_t *retval) 3912 { 3913 /* { 3914 syscallarg(int) fd; 3915 syscallarg(const struct timeval *) tptr; 3916 } */ 3917 int error; 3918 file_t *fp; 3919 3920 /* fd_getvnode() will use the descriptor for us */ 3921 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3922 return (error); 3923 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3924 UIO_USERSPACE); 3925 fd_putfile(SCARG(uap, fd)); 3926 return (error); 3927 } 3928 3929 int 3930 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3931 register_t *retval) 3932 { 3933 /* { 3934 syscallarg(int) fd; 3935 syscallarg(const struct timespec *) tptr; 3936 } */ 3937 int error; 3938 file_t *fp; 3939 3940 /* fd_getvnode() will use the descriptor for us */ 3941 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3942 return (error); 3943 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3944 SCARG(uap, tptr), UIO_USERSPACE); 3945 fd_putfile(SCARG(uap, fd)); 3946 return (error); 3947 } 3948 3949 /* 3950 * Set the access and modification times given a path name; this 3951 * version does not follow links. 3952 */ 3953 int 3954 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3955 register_t *retval) 3956 { 3957 /* { 3958 syscallarg(const char *) path; 3959 syscallarg(const struct timeval *) tptr; 3960 } */ 3961 3962 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3963 SCARG(uap, tptr), UIO_USERSPACE); 3964 } 3965 3966 int 3967 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3968 register_t *retval) 3969 { 3970 /* { 3971 syscallarg(int) fd; 3972 syscallarg(const char *) path; 3973 syscallarg(const struct timespec *) tptr; 3974 syscallarg(int) flag; 3975 } */ 3976 int follow; 3977 const struct timespec *tptr; 3978 int error; 3979 3980 tptr = SCARG(uap, tptr); 3981 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3982 3983 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3984 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3985 3986 return error; 3987 } 3988 3989 /* 3990 * Common routine to set access and modification times given a vnode. 3991 */ 3992 int 3993 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3994 const struct timespec *tptr, enum uio_seg seg) 3995 { 3996 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3997 } 3998 3999 int 4000 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 4001 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 4002 { 4003 struct vattr vattr; 4004 int error, dorele = 0; 4005 namei_simple_flags_t sflags; 4006 bool vanull, setbirthtime; 4007 struct timespec ts[2]; 4008 4009 KASSERT(l != NULL || fdat == AT_FDCWD); 4010 4011 /* 4012 * I have checked all callers and they pass either FOLLOW, 4013 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 4014 * is 0. More to the point, they don't pass anything else. 4015 * Let's keep it that way at least until the namei interfaces 4016 * are fully sanitized. 4017 */ 4018 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4019 sflags = (flag == FOLLOW) ? 4020 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4021 4022 if (tptr == NULL) { 4023 vanull = true; 4024 nanotime(&ts[0]); 4025 ts[1] = ts[0]; 4026 } else { 4027 vanull = false; 4028 if (seg != UIO_SYSSPACE) { 4029 error = copyin(tptr, ts, sizeof (ts)); 4030 if (error != 0) 4031 return error; 4032 } else { 4033 ts[0] = tptr[0]; 4034 ts[1] = tptr[1]; 4035 } 4036 } 4037 4038 if (ts[0].tv_nsec == UTIME_NOW) { 4039 nanotime(&ts[0]); 4040 if (ts[1].tv_nsec == UTIME_NOW) { 4041 vanull = true; 4042 ts[1] = ts[0]; 4043 } 4044 } else if (ts[1].tv_nsec == UTIME_NOW) 4045 nanotime(&ts[1]); 4046 4047 if (vp == NULL) { 4048 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4049 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4050 if (error != 0) 4051 return error; 4052 dorele = 1; 4053 } 4054 4055 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4056 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4057 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4058 vattr_null(&vattr); 4059 4060 if (ts[0].tv_nsec != UTIME_OMIT) 4061 vattr.va_atime = ts[0]; 4062 4063 if (ts[1].tv_nsec != UTIME_OMIT) { 4064 vattr.va_mtime = ts[1]; 4065 if (setbirthtime) 4066 vattr.va_birthtime = ts[1]; 4067 } 4068 4069 if (vanull) 4070 vattr.va_vaflags |= VA_UTIMES_NULL; 4071 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4072 VOP_UNLOCK(vp); 4073 4074 if (dorele != 0) 4075 vrele(vp); 4076 4077 return error; 4078 } 4079 4080 int 4081 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4082 const struct timeval *tptr, enum uio_seg seg) 4083 { 4084 struct timespec ts[2]; 4085 struct timespec *tsptr = NULL; 4086 int error; 4087 4088 if (tptr != NULL) { 4089 struct timeval tv[2]; 4090 4091 if (seg != UIO_SYSSPACE) { 4092 error = copyin(tptr, tv, sizeof(tv)); 4093 if (error != 0) 4094 return error; 4095 tptr = tv; 4096 } 4097 4098 if ((tptr[0].tv_usec == UTIME_NOW) || 4099 (tptr[0].tv_usec == UTIME_OMIT)) 4100 ts[0].tv_nsec = tptr[0].tv_usec; 4101 else { 4102 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4103 return EINVAL; 4104 4105 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4106 } 4107 4108 if ((tptr[1].tv_usec == UTIME_NOW) || 4109 (tptr[1].tv_usec == UTIME_OMIT)) 4110 ts[1].tv_nsec = tptr[1].tv_usec; 4111 else { 4112 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4113 return EINVAL; 4114 4115 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4116 } 4117 4118 tsptr = &ts[0]; 4119 } 4120 4121 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4122 } 4123 4124 /* 4125 * Truncate a file given its path name. 4126 */ 4127 /* ARGSUSED */ 4128 int 4129 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4130 { 4131 /* { 4132 syscallarg(const char *) path; 4133 syscallarg(int) pad; 4134 syscallarg(off_t) length; 4135 } */ 4136 struct vnode *vp; 4137 struct vattr vattr; 4138 int error; 4139 4140 if (SCARG(uap, length) < 0) 4141 return EINVAL; 4142 4143 error = namei_simple_user(SCARG(uap, path), 4144 NSM_FOLLOW_TRYEMULROOT, &vp); 4145 if (error != 0) 4146 return (error); 4147 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4148 if (vp->v_type == VDIR) 4149 error = EISDIR; 4150 else if ((error = vn_writechk(vp)) == 0 && 4151 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4152 vattr_null(&vattr); 4153 vattr.va_size = SCARG(uap, length); 4154 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4155 } 4156 vput(vp); 4157 return (error); 4158 } 4159 4160 /* 4161 * Truncate a file given a file descriptor. 4162 */ 4163 /* ARGSUSED */ 4164 int 4165 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4166 { 4167 /* { 4168 syscallarg(int) fd; 4169 syscallarg(int) pad; 4170 syscallarg(off_t) length; 4171 } */ 4172 file_t *fp; 4173 int error, fd = SCARG(uap, fd); 4174 4175 fp = fd_getfile(fd); 4176 if (fp == NULL) 4177 return EBADF; 4178 if (fp->f_ops->fo_truncate == NULL) 4179 error = EOPNOTSUPP; 4180 else 4181 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4182 4183 fd_putfile(fd); 4184 return error; 4185 } 4186 4187 /* 4188 * Sync an open file. 4189 */ 4190 /* ARGSUSED */ 4191 int 4192 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4193 { 4194 /* { 4195 syscallarg(int) fd; 4196 } */ 4197 struct vnode *vp; 4198 file_t *fp; 4199 int error; 4200 4201 /* fd_getvnode() will use the descriptor for us */ 4202 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4203 return (error); 4204 vp = fp->f_vnode; 4205 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4206 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4207 VOP_UNLOCK(vp); 4208 fd_putfile(SCARG(uap, fd)); 4209 return (error); 4210 } 4211 4212 /* 4213 * Sync a range of file data. API modeled after that found in AIX. 4214 * 4215 * FDATASYNC indicates that we need only save enough metadata to be able 4216 * to re-read the written data. 4217 */ 4218 /* ARGSUSED */ 4219 int 4220 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4221 { 4222 /* { 4223 syscallarg(int) fd; 4224 syscallarg(int) flags; 4225 syscallarg(off_t) start; 4226 syscallarg(off_t) length; 4227 } */ 4228 struct vnode *vp; 4229 file_t *fp; 4230 int flags, nflags; 4231 off_t s, e, len; 4232 int error; 4233 4234 /* fd_getvnode() will use the descriptor for us */ 4235 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4236 return (error); 4237 4238 if ((fp->f_flag & FWRITE) == 0) { 4239 error = EBADF; 4240 goto out; 4241 } 4242 4243 flags = SCARG(uap, flags); 4244 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4245 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4246 error = EINVAL; 4247 goto out; 4248 } 4249 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4250 if (flags & FDATASYNC) 4251 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4252 else 4253 nflags = FSYNC_WAIT; 4254 if (flags & FDISKSYNC) 4255 nflags |= FSYNC_CACHE; 4256 4257 len = SCARG(uap, length); 4258 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4259 if (len) { 4260 s = SCARG(uap, start); 4261 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4262 error = EINVAL; 4263 goto out; 4264 } 4265 e = s + len; 4266 KASSERT(s <= e); 4267 } else { 4268 e = 0; 4269 s = 0; 4270 } 4271 4272 vp = fp->f_vnode; 4273 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4274 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4275 VOP_UNLOCK(vp); 4276 out: 4277 fd_putfile(SCARG(uap, fd)); 4278 return (error); 4279 } 4280 4281 /* 4282 * Sync the data of an open file. 4283 */ 4284 /* ARGSUSED */ 4285 int 4286 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4287 { 4288 /* { 4289 syscallarg(int) fd; 4290 } */ 4291 struct vnode *vp; 4292 file_t *fp; 4293 int error; 4294 4295 /* fd_getvnode() will use the descriptor for us */ 4296 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4297 return (error); 4298 vp = fp->f_vnode; 4299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4300 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4301 VOP_UNLOCK(vp); 4302 fd_putfile(SCARG(uap, fd)); 4303 return (error); 4304 } 4305 4306 /* 4307 * Rename files, (standard) BSD semantics frontend. 4308 */ 4309 /* ARGSUSED */ 4310 int 4311 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4312 { 4313 /* { 4314 syscallarg(const char *) from; 4315 syscallarg(const char *) to; 4316 } */ 4317 4318 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4319 SCARG(uap, to), UIO_USERSPACE, 0)); 4320 } 4321 4322 int 4323 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4324 register_t *retval) 4325 { 4326 /* { 4327 syscallarg(int) fromfd; 4328 syscallarg(const char *) from; 4329 syscallarg(int) tofd; 4330 syscallarg(const char *) to; 4331 } */ 4332 4333 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4334 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4335 } 4336 4337 /* 4338 * Rename files, POSIX semantics frontend. 4339 */ 4340 /* ARGSUSED */ 4341 int 4342 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4343 { 4344 /* { 4345 syscallarg(const char *) from; 4346 syscallarg(const char *) to; 4347 } */ 4348 4349 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4350 SCARG(uap, to), UIO_USERSPACE, 1)); 4351 } 4352 4353 /* 4354 * Rename files. Source and destination must either both be directories, 4355 * or both not be directories. If target is a directory, it must be empty. 4356 * If `from' and `to' refer to the same object, the value of the `retain' 4357 * argument is used to determine whether `from' will be 4358 * 4359 * (retain == 0) deleted unless `from' and `to' refer to the same 4360 * object in the file system's name space (BSD). 4361 * (retain == 1) always retained (POSIX). 4362 * 4363 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4364 */ 4365 int 4366 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4367 { 4368 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4369 } 4370 4371 static int 4372 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4373 const char *to, enum uio_seg seg, int retain) 4374 { 4375 struct pathbuf *fpb, *tpb; 4376 struct nameidata fnd, tnd; 4377 struct vnode *fdvp, *fvp; 4378 struct vnode *tdvp, *tvp; 4379 struct mount *mp, *tmp; 4380 int error; 4381 4382 KASSERT(l != NULL || fromfd == AT_FDCWD); 4383 KASSERT(l != NULL || tofd == AT_FDCWD); 4384 4385 error = pathbuf_maybe_copyin(from, seg, &fpb); 4386 if (error) 4387 goto out0; 4388 KASSERT(fpb != NULL); 4389 4390 error = pathbuf_maybe_copyin(to, seg, &tpb); 4391 if (error) 4392 goto out1; 4393 KASSERT(tpb != NULL); 4394 4395 /* 4396 * Lookup from. 4397 * 4398 * XXX LOCKPARENT is wrong because we don't actually want it 4399 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4400 * insane, so for the time being we need to leave it like this. 4401 */ 4402 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4403 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4404 goto out2; 4405 4406 /* 4407 * Pull out the important results of the lookup, fdvp and fvp. 4408 * Of course, fvp is bogus because we're about to unlock fdvp. 4409 */ 4410 fdvp = fnd.ni_dvp; 4411 fvp = fnd.ni_vp; 4412 mp = fdvp->v_mount; 4413 KASSERT(fdvp != NULL); 4414 KASSERT(fvp != NULL); 4415 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4416 /* 4417 * Bracket the operation with fstrans_start()/fstrans_done(). 4418 * 4419 * Inside the bracket this file system cannot be unmounted so 4420 * a vnode on this file system cannot change its v_mount. 4421 * A vnode on another file system may still change to dead mount. 4422 */ 4423 fstrans_start(mp); 4424 4425 /* 4426 * Make sure neither fdvp nor fvp is locked. 4427 */ 4428 if (fdvp != fvp) 4429 VOP_UNLOCK(fdvp); 4430 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4431 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4432 4433 /* 4434 * Reject renaming `.' and `..'. Can't do this until after 4435 * namei because we need namei's parsing to find the final 4436 * component name. (namei should just leave us with the final 4437 * component name and not look it up itself, but anyway...) 4438 * 4439 * This was here before because we used to relookup from 4440 * instead of to and relookup requires the caller to check 4441 * this, but now file systems may depend on this check, so we 4442 * must retain it until the file systems are all rototilled. 4443 */ 4444 if (((fnd.ni_cnd.cn_namelen == 1) && 4445 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4446 ((fnd.ni_cnd.cn_namelen == 2) && 4447 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4448 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4449 error = EINVAL; /* XXX EISDIR? */ 4450 goto abort0; 4451 } 4452 4453 /* 4454 * Lookup to. 4455 * 4456 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4457 * fvp here to decide whether to add CREATEDIR is a load of 4458 * bollocks because fvp might be the wrong node by now, since 4459 * fdvp is unlocked. 4460 * 4461 * XXX Why not pass CREATEDIR always? 4462 */ 4463 NDINIT(&tnd, RENAME, 4464 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4465 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4466 tpb); 4467 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4468 goto abort0; 4469 4470 /* 4471 * Pull out the important results of the lookup, tdvp and tvp. 4472 * Of course, tvp is bogus because we're about to unlock tdvp. 4473 */ 4474 tdvp = tnd.ni_dvp; 4475 tvp = tnd.ni_vp; 4476 KASSERT(tdvp != NULL); 4477 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4478 4479 if (fvp->v_type == VDIR) 4480 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4481 /* 4482 * Make sure neither tdvp nor tvp is locked. 4483 */ 4484 if (tdvp != tvp) 4485 VOP_UNLOCK(tdvp); 4486 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4487 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4488 4489 /* 4490 * Reject renaming onto `.' or `..'. relookup is unhappy with 4491 * these, which is why we must do this here. Once upon a time 4492 * we relooked up from instead of to, and consequently didn't 4493 * need this check, but now that we relookup to instead of 4494 * from, we need this; and we shall need it forever forward 4495 * until the VOP_RENAME protocol changes, because file systems 4496 * will no doubt begin to depend on this check. 4497 */ 4498 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4499 error = EISDIR; 4500 goto abort1; 4501 } 4502 if ((tnd.ni_cnd.cn_namelen == 2) && 4503 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4504 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4505 error = EINVAL; 4506 goto abort1; 4507 } 4508 4509 /* 4510 * Make sure the mount points match. Although we don't hold 4511 * any vnode locks, the v_mount on fdvp file system are stable. 4512 * 4513 * Unmounting another file system at an inopportune moment may 4514 * cause tdvp to disappear and change its v_mount to dead. 4515 * 4516 * So in either case different v_mount means cross-device rename. 4517 */ 4518 KASSERT(mp != NULL); 4519 tmp = tdvp->v_mount; 4520 4521 if (mp != tmp) { 4522 error = EXDEV; 4523 goto abort1; 4524 } 4525 4526 /* 4527 * Take the vfs rename lock to avoid cross-directory screw cases. 4528 * Nothing is locked currently, so taking this lock is safe. 4529 */ 4530 error = VFS_RENAMELOCK_ENTER(mp); 4531 if (error) 4532 goto abort1; 4533 4534 /* 4535 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4536 * and nothing is locked except for the vfs rename lock. 4537 * 4538 * The next step is a little rain dance to conform to the 4539 * insane lock protocol, even though it does nothing to ward 4540 * off race conditions. 4541 * 4542 * We need tdvp and tvp to be locked. However, because we have 4543 * unlocked tdvp in order to hold no locks while we take the 4544 * vfs rename lock, tvp may be wrong here, and we can't safely 4545 * lock it even if the sensible file systems will just unlock 4546 * it straight away. Consequently, we must lock tdvp and then 4547 * relookup tvp to get it locked. 4548 * 4549 * Finally, because the VOP_RENAME protocol is brain-damaged 4550 * and various file systems insanely depend on the semantics of 4551 * this brain damage, the lookup of to must be the last lookup 4552 * before VOP_RENAME. 4553 */ 4554 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4555 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4556 if (error) 4557 goto abort2; 4558 4559 /* 4560 * Drop the old tvp and pick up the new one -- which might be 4561 * the same, but that doesn't matter to us. After this, tdvp 4562 * and tvp should both be locked. 4563 */ 4564 if (tvp != NULL) 4565 vrele(tvp); 4566 tvp = tnd.ni_vp; 4567 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4568 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4569 4570 /* 4571 * The old do_sys_rename had various consistency checks here 4572 * involving fvp and tvp. fvp is bogus already here, and tvp 4573 * will become bogus soon in any sensible file system, so the 4574 * only purpose in putting these checks here is to give lip 4575 * service to these screw cases and to acknowledge that they 4576 * exist, not actually to handle them, but here you go 4577 * anyway... 4578 */ 4579 4580 /* 4581 * Acknowledge that directories and non-directories aren't 4582 * supposed to mix. 4583 */ 4584 if (tvp != NULL) { 4585 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4586 error = ENOTDIR; 4587 goto abort3; 4588 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4589 error = EISDIR; 4590 goto abort3; 4591 } 4592 } 4593 4594 /* 4595 * Acknowledge some random screw case, among the dozens that 4596 * might arise. 4597 */ 4598 if (fvp == tdvp) { 4599 error = EINVAL; 4600 goto abort3; 4601 } 4602 4603 /* 4604 * Acknowledge that POSIX has a wacky screw case. 4605 * 4606 * XXX Eventually the retain flag needs to be passed on to 4607 * VOP_RENAME. 4608 */ 4609 if (fvp == tvp) { 4610 if (retain) { 4611 error = 0; 4612 goto abort3; 4613 } else if ((fdvp == tdvp) && 4614 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4615 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4616 fnd.ni_cnd.cn_namelen))) { 4617 error = 0; 4618 goto abort3; 4619 } 4620 } 4621 4622 /* 4623 * Make sure veriexec can screw us up. (But a race can screw 4624 * up veriexec, of course -- remember, fvp and (soon) tvp are 4625 * bogus.) 4626 */ 4627 #if NVERIEXEC > 0 4628 { 4629 char *f1, *f2; 4630 size_t f1_len; 4631 size_t f2_len; 4632 4633 f1_len = fnd.ni_cnd.cn_namelen + 1; 4634 f1 = kmem_alloc(f1_len, KM_SLEEP); 4635 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4636 4637 f2_len = tnd.ni_cnd.cn_namelen + 1; 4638 f2 = kmem_alloc(f2_len, KM_SLEEP); 4639 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4640 4641 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4642 4643 kmem_free(f1, f1_len); 4644 kmem_free(f2, f2_len); 4645 4646 if (error) 4647 goto abort3; 4648 } 4649 #endif /* NVERIEXEC > 0 */ 4650 4651 /* 4652 * All ready. Incant the rename vop. 4653 */ 4654 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4655 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4656 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4657 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4658 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4659 4660 /* 4661 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4662 * tdvp and tvp. But we can't assert any of that. 4663 */ 4664 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4665 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4666 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4667 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4668 4669 /* 4670 * So all we have left to do is to drop the rename lock and 4671 * destroy the pathbufs. 4672 */ 4673 VFS_RENAMELOCK_EXIT(mp); 4674 fstrans_done(mp); 4675 goto out2; 4676 4677 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4678 VOP_UNLOCK(tvp); 4679 abort2: VOP_UNLOCK(tdvp); 4680 VFS_RENAMELOCK_EXIT(mp); 4681 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4682 vrele(tdvp); 4683 if (tvp != NULL) 4684 vrele(tvp); 4685 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4686 vrele(fdvp); 4687 vrele(fvp); 4688 fstrans_done(mp); 4689 out2: pathbuf_destroy(tpb); 4690 out1: pathbuf_destroy(fpb); 4691 out0: return error; 4692 } 4693 4694 /* 4695 * Make a directory file. 4696 */ 4697 /* ARGSUSED */ 4698 int 4699 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4700 { 4701 /* { 4702 syscallarg(const char *) path; 4703 syscallarg(int) mode; 4704 } */ 4705 4706 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4707 SCARG(uap, mode), UIO_USERSPACE); 4708 } 4709 4710 int 4711 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4712 register_t *retval) 4713 { 4714 /* { 4715 syscallarg(int) fd; 4716 syscallarg(const char *) path; 4717 syscallarg(int) mode; 4718 } */ 4719 4720 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4721 SCARG(uap, mode), UIO_USERSPACE); 4722 } 4723 4724 4725 int 4726 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4727 { 4728 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4729 } 4730 4731 static int 4732 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4733 enum uio_seg seg) 4734 { 4735 struct proc *p = curlwp->l_proc; 4736 struct vnode *vp; 4737 struct vattr vattr; 4738 int error; 4739 struct pathbuf *pb; 4740 struct nameidata nd; 4741 4742 KASSERT(l != NULL || fdat == AT_FDCWD); 4743 4744 /* XXX bollocks, should pass in a pathbuf */ 4745 error = pathbuf_maybe_copyin(path, seg, &pb); 4746 if (error) { 4747 return error; 4748 } 4749 4750 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4751 4752 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4753 pathbuf_destroy(pb); 4754 return (error); 4755 } 4756 vp = nd.ni_vp; 4757 if (vp != NULL) { 4758 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4759 if (nd.ni_dvp == vp) 4760 vrele(nd.ni_dvp); 4761 else 4762 vput(nd.ni_dvp); 4763 vrele(vp); 4764 pathbuf_destroy(pb); 4765 return (EEXIST); 4766 } 4767 vattr_null(&vattr); 4768 vattr.va_type = VDIR; 4769 /* We will read cwdi->cwdi_cmask unlocked. */ 4770 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4771 nd.ni_cnd.cn_flags |= WILLBEDIR; 4772 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4773 if (!error) 4774 vrele(nd.ni_vp); 4775 vput(nd.ni_dvp); 4776 pathbuf_destroy(pb); 4777 return (error); 4778 } 4779 4780 /* 4781 * Remove a directory file. 4782 */ 4783 /* ARGSUSED */ 4784 int 4785 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4786 { 4787 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4788 AT_REMOVEDIR, UIO_USERSPACE); 4789 } 4790 4791 /* 4792 * Read a block of directory entries in a file system independent format. 4793 */ 4794 int 4795 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4796 { 4797 /* { 4798 syscallarg(int) fd; 4799 syscallarg(char *) buf; 4800 syscallarg(size_t) count; 4801 } */ 4802 file_t *fp; 4803 int error, done; 4804 4805 /* fd_getvnode() will use the descriptor for us */ 4806 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4807 return (error); 4808 if ((fp->f_flag & FREAD) == 0) { 4809 error = EBADF; 4810 goto out; 4811 } 4812 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4813 SCARG(uap, count), &done, l, 0, 0); 4814 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4815 *retval = done; 4816 out: 4817 fd_putfile(SCARG(uap, fd)); 4818 return (error); 4819 } 4820 4821 /* 4822 * Set the mode mask for creation of filesystem nodes. 4823 */ 4824 int 4825 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4826 { 4827 /* { 4828 syscallarg(mode_t) newmask; 4829 } */ 4830 4831 /* 4832 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4833 * serialization with those reads is required. It's important to 4834 * return a coherent answer for the caller of umask() though, and 4835 * the atomic operation accomplishes that. 4836 */ 4837 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4838 SCARG(uap, newmask) & ALLPERMS); 4839 4840 return (0); 4841 } 4842 4843 int 4844 dorevoke(struct vnode *vp, kauth_cred_t cred) 4845 { 4846 struct vattr vattr; 4847 int error, fs_decision; 4848 4849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4850 error = VOP_GETATTR(vp, &vattr, cred); 4851 VOP_UNLOCK(vp); 4852 if (error != 0) 4853 return error; 4854 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4855 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4856 fs_decision); 4857 if (!error) 4858 VOP_REVOKE(vp, REVOKEALL); 4859 return (error); 4860 } 4861 4862 /* 4863 * Void all references to file by ripping underlying filesystem 4864 * away from vnode. 4865 */ 4866 /* ARGSUSED */ 4867 int 4868 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4869 { 4870 /* { 4871 syscallarg(const char *) path; 4872 } */ 4873 struct vnode *vp; 4874 int error; 4875 4876 error = namei_simple_user(SCARG(uap, path), 4877 NSM_FOLLOW_TRYEMULROOT, &vp); 4878 if (error != 0) 4879 return (error); 4880 error = dorevoke(vp, l->l_cred); 4881 vrele(vp); 4882 return (error); 4883 } 4884 4885 /* 4886 * Allocate backing store for a file, filling a hole without having to 4887 * explicitly write anything out. 4888 */ 4889 /* ARGSUSED */ 4890 int 4891 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4892 register_t *retval) 4893 { 4894 /* { 4895 syscallarg(int) fd; 4896 syscallarg(off_t) pos; 4897 syscallarg(off_t) len; 4898 } */ 4899 int fd; 4900 off_t pos, len; 4901 struct file *fp; 4902 struct vnode *vp; 4903 int error; 4904 4905 fd = SCARG(uap, fd); 4906 pos = SCARG(uap, pos); 4907 len = SCARG(uap, len); 4908 4909 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4910 *retval = EINVAL; 4911 return 0; 4912 } 4913 4914 error = fd_getvnode(fd, &fp); 4915 if (error) { 4916 *retval = error; 4917 return 0; 4918 } 4919 if ((fp->f_flag & FWRITE) == 0) { 4920 error = EBADF; 4921 goto fail; 4922 } 4923 vp = fp->f_vnode; 4924 4925 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4926 if (vp->v_type == VDIR) { 4927 error = EISDIR; 4928 } else { 4929 error = VOP_FALLOCATE(vp, pos, len); 4930 } 4931 VOP_UNLOCK(vp); 4932 4933 fail: 4934 fd_putfile(fd); 4935 *retval = error; 4936 return 0; 4937 } 4938 4939 /* 4940 * Deallocate backing store for a file, creating a hole. Also used for 4941 * invoking TRIM on disks. 4942 */ 4943 /* ARGSUSED */ 4944 int 4945 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4946 register_t *retval) 4947 { 4948 /* { 4949 syscallarg(int) fd; 4950 syscallarg(off_t) pos; 4951 syscallarg(off_t) len; 4952 } */ 4953 int fd; 4954 off_t pos, len; 4955 struct file *fp; 4956 struct vnode *vp; 4957 int error; 4958 4959 fd = SCARG(uap, fd); 4960 pos = SCARG(uap, pos); 4961 len = SCARG(uap, len); 4962 4963 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4964 return EINVAL; 4965 } 4966 4967 error = fd_getvnode(fd, &fp); 4968 if (error) { 4969 return error; 4970 } 4971 if ((fp->f_flag & FWRITE) == 0) { 4972 error = EBADF; 4973 goto fail; 4974 } 4975 vp = fp->f_vnode; 4976 4977 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4978 if (vp->v_type == VDIR) { 4979 error = EISDIR; 4980 } else { 4981 error = VOP_FDISCARD(vp, pos, len); 4982 } 4983 VOP_UNLOCK(vp); 4984 4985 fail: 4986 fd_putfile(fd); 4987 return error; 4988 } 4989