1 /* $NetBSD: vfs_syscalls.c,v 1.528 2019/05/13 08:17:30 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.528 2019/05/13 08:17:30 hannken Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const int nmountcompatnames = __arraycount(mountcompatnames); 167 168 static int 169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 170 { 171 file_t *dfp; 172 int error; 173 174 if (fdat != AT_FDCWD) { 175 if ((error = fd_getvnode(fdat, &dfp)) != 0) 176 goto out; 177 178 NDAT(ndp, dfp->f_vnode); 179 } 180 181 error = namei(ndp); 182 183 if (fdat != AT_FDCWD) 184 fd_putfile(fdat); 185 out: 186 return error; 187 } 188 189 static int 190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 191 namei_simple_flags_t sflags, struct vnode **vp_ret) 192 { 193 file_t *dfp; 194 struct vnode *dvp; 195 int error; 196 197 if (fdat != AT_FDCWD) { 198 if ((error = fd_getvnode(fdat, &dfp)) != 0) 199 goto out; 200 201 dvp = dfp->f_vnode; 202 } else { 203 dvp = NULL; 204 } 205 206 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 207 208 if (fdat != AT_FDCWD) 209 fd_putfile(fdat); 210 out: 211 return error; 212 } 213 214 static int 215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 216 { 217 int error; 218 219 fp->f_flag = flags & FMASK; 220 fp->f_type = DTYPE_VNODE; 221 fp->f_ops = &vnops; 222 fp->f_vnode = vp; 223 224 if (flags & (O_EXLOCK | O_SHLOCK)) { 225 struct flock lf; 226 int type; 227 228 lf.l_whence = SEEK_SET; 229 lf.l_start = 0; 230 lf.l_len = 0; 231 if (flags & O_EXLOCK) 232 lf.l_type = F_WRLCK; 233 else 234 lf.l_type = F_RDLCK; 235 type = F_FLOCK; 236 if ((flags & FNONBLOCK) == 0) 237 type |= F_WAIT; 238 VOP_UNLOCK(vp); 239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 240 if (error) { 241 (void) vn_close(vp, fp->f_flag, fp->f_cred); 242 fd_abort(l->l_proc, fp, indx); 243 return error; 244 } 245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 246 atomic_or_uint(&fp->f_flag, FHASLOCK); 247 } 248 if (flags & O_CLOEXEC) 249 fd_set_exclose(l, indx, true); 250 return 0; 251 } 252 253 static int 254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 255 void *data, size_t *data_len) 256 { 257 struct mount *mp; 258 int error = 0, saved_flags; 259 260 mp = vp->v_mount; 261 saved_flags = mp->mnt_flag; 262 263 /* We can operate only on VV_ROOT nodes. */ 264 if ((vp->v_vflag & VV_ROOT) == 0) { 265 error = EINVAL; 266 goto out; 267 } 268 269 /* 270 * We only allow the filesystem to be reloaded if it 271 * is currently mounted read-only. Additionally, we 272 * prevent read-write to read-only downgrades. 273 */ 274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 275 (mp->mnt_flag & MNT_RDONLY) == 0 && 276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 277 error = EOPNOTSUPP; /* Needs translation */ 278 goto out; 279 } 280 281 /* 282 * Enabling MNT_UNION requires a covered mountpoint and 283 * must not happen on the root mount. 284 */ 285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 286 error = EOPNOTSUPP; 287 goto out; 288 } 289 290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 292 if (error) 293 goto out; 294 295 error = vfs_suspend(mp, 0); 296 if (error) 297 goto out; 298 299 mutex_enter(&mp->mnt_updating); 300 301 mp->mnt_flag &= ~MNT_OP_FLAGS; 302 mp->mnt_flag |= flags & MNT_OP_FLAGS; 303 304 /* 305 * Set the mount level flags. 306 */ 307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 308 if ((flags & MNT_RDONLY)) 309 mp->mnt_iflag |= IMNT_WANTRDONLY; 310 else 311 mp->mnt_iflag |= IMNT_WANTRDWR; 312 } 313 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 315 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 316 mp->mnt_flag &= ~MNT_RDONLY; 317 318 error = VFS_MOUNT(mp, path, data, data_len); 319 320 if (error && data != NULL) { 321 int error2; 322 323 /* 324 * Update failed; let's try and see if it was an 325 * export request. For compat with 3.0 and earlier. 326 */ 327 error2 = vfs_hooks_reexport(mp, path, data); 328 329 /* 330 * Only update error code if the export request was 331 * understood but some problem occurred while 332 * processing it. 333 */ 334 if (error2 != EJUSTRETURN) 335 error = error2; 336 } 337 338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 339 mp->mnt_flag |= MNT_RDONLY; 340 if (error) 341 mp->mnt_flag = saved_flags; 342 mp->mnt_flag &= ~MNT_OP_FLAGS; 343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 346 vfs_syncer_add_to_worklist(mp); 347 } else { 348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 349 vfs_syncer_remove_from_worklist(mp); 350 } 351 mutex_exit(&mp->mnt_updating); 352 vfs_resume(mp); 353 354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 355 (flags & MNT_EXTATTR)) { 356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 357 NULL, 0, NULL) != 0) { 358 printf("%s: failed to start extattr, error = %d", 359 mp->mnt_stat.f_mntonname, error); 360 mp->mnt_flag &= ~MNT_EXTATTR; 361 } 362 } 363 364 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 365 !(flags & MNT_EXTATTR)) { 366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 367 NULL, 0, NULL) != 0) { 368 printf("%s: failed to stop extattr, error = %d", 369 mp->mnt_stat.f_mntonname, error); 370 mp->mnt_flag |= MNT_RDONLY; 371 } 372 } 373 out: 374 return (error); 375 } 376 377 static int 378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 379 struct vfsops **vfsops) 380 { 381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 382 int error; 383 384 if (type_seg == UIO_USERSPACE) { 385 /* Copy file-system type from userspace. */ 386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 387 } else { 388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 389 KASSERT(error == 0); 390 } 391 392 if (error) { 393 /* 394 * Historically, filesystem types were identified by numbers. 395 * If we get an integer for the filesystem type instead of a 396 * string, we check to see if it matches one of the historic 397 * filesystem types. 398 */ 399 u_long fsindex = (u_long)fstype; 400 if (fsindex >= nmountcompatnames || 401 mountcompatnames[fsindex] == NULL) 402 return ENODEV; 403 strlcpy(fstypename, mountcompatnames[fsindex], 404 sizeof(fstypename)); 405 } 406 407 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 408 if (strcmp(fstypename, "ufs") == 0) 409 fstypename[0] = 'f'; 410 411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 412 return 0; 413 414 /* If we can autoload a vfs module, try again */ 415 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 416 417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 418 return 0; 419 420 return ENODEV; 421 } 422 423 static int 424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 425 void *data, size_t *data_len) 426 { 427 struct mount *mp; 428 int error; 429 430 /* If MNT_GETARGS is specified, it should be the only flag. */ 431 if (flags & ~MNT_GETARGS) 432 return EINVAL; 433 434 mp = vp->v_mount; 435 436 /* XXX: probably some notion of "can see" here if we want isolation. */ 437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 439 if (error) 440 return error; 441 442 if ((vp->v_vflag & VV_ROOT) == 0) 443 return EINVAL; 444 445 if (vfs_busy(mp)) 446 return EPERM; 447 448 mutex_enter(&mp->mnt_updating); 449 mp->mnt_flag &= ~MNT_OP_FLAGS; 450 mp->mnt_flag |= MNT_GETARGS; 451 error = VFS_MOUNT(mp, path, data, data_len); 452 mp->mnt_flag &= ~MNT_OP_FLAGS; 453 mutex_exit(&mp->mnt_updating); 454 455 vfs_unbusy(mp); 456 return (error); 457 } 458 459 int 460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 461 { 462 /* { 463 syscallarg(const char *) type; 464 syscallarg(const char *) path; 465 syscallarg(int) flags; 466 syscallarg(void *) data; 467 syscallarg(size_t) data_len; 468 } */ 469 470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 472 SCARG(uap, data_len), retval); 473 } 474 475 int 476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 477 const char *path, int flags, void *data, enum uio_seg data_seg, 478 size_t data_len, register_t *retval) 479 { 480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 481 struct vnode *vp; 482 void *data_buf = data; 483 bool vfsopsrele = false; 484 size_t alloc_sz = 0; 485 int error; 486 487 /* 488 * Get vnode to be covered 489 */ 490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 491 if (error != 0) { 492 vp = NULL; 493 goto done; 494 } 495 496 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 497 vfsops = vp->v_mount->mnt_op; 498 } else { 499 /* 'type' is userspace */ 500 error = mount_get_vfsops(type, type_seg, &vfsops); 501 if (error != 0) 502 goto done; 503 vfsopsrele = true; 504 } 505 506 /* 507 * We allow data to be NULL, even for userspace. Some fs's don't need 508 * it. The others will handle NULL. 509 */ 510 if (data != NULL && data_seg == UIO_USERSPACE) { 511 if (data_len == 0) { 512 /* No length supplied, use default for filesystem */ 513 data_len = vfsops->vfs_min_mount_data; 514 515 /* 516 * Hopefully a longer buffer won't make copyin() fail. 517 * For compatibility with 3.0 and earlier. 518 */ 519 if (flags & MNT_UPDATE 520 && data_len < sizeof (struct mnt_export_args30)) 521 data_len = sizeof (struct mnt_export_args30); 522 } 523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 524 error = EINVAL; 525 goto done; 526 } 527 alloc_sz = data_len; 528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 529 530 /* NFS needs the buffer even for mnt_getargs .... */ 531 error = copyin(data, data_buf, data_len); 532 if (error != 0) 533 goto done; 534 } 535 536 if (flags & MNT_GETARGS) { 537 if (data_len == 0) { 538 error = EINVAL; 539 goto done; 540 } 541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 542 if (error != 0) 543 goto done; 544 if (data_seg == UIO_USERSPACE) 545 error = copyout(data_buf, data, data_len); 546 *retval = data_len; 547 } else if (flags & MNT_UPDATE) { 548 error = mount_update(l, vp, path, flags, data_buf, &data_len); 549 } else { 550 /* Locking is handled internally in mount_domount(). */ 551 KASSERT(vfsopsrele == true); 552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 553 &data_len); 554 vfsopsrele = false; 555 } 556 if (!error) 557 KNOTE(&fs_klist, VQ_MOUNT); 558 559 done: 560 if (vfsopsrele) 561 vfs_delref(vfsops); 562 if (vp != NULL) { 563 vrele(vp); 564 } 565 if (data_buf != data) 566 kmem_free(data_buf, alloc_sz); 567 return (error); 568 } 569 570 /* 571 * Unmount a file system. 572 * 573 * Note: unmount takes a path to the vnode mounted on as argument, 574 * not special file (as before). 575 */ 576 /* ARGSUSED */ 577 int 578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 579 { 580 /* { 581 syscallarg(const char *) path; 582 syscallarg(int) flags; 583 } */ 584 struct vnode *vp; 585 struct mount *mp; 586 int error; 587 struct pathbuf *pb; 588 struct nameidata nd; 589 590 error = pathbuf_copyin(SCARG(uap, path), &pb); 591 if (error) { 592 return error; 593 } 594 595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 596 if ((error = namei(&nd)) != 0) { 597 pathbuf_destroy(pb); 598 return error; 599 } 600 vp = nd.ni_vp; 601 pathbuf_destroy(pb); 602 603 mp = vp->v_mount; 604 vfs_ref(mp); 605 VOP_UNLOCK(vp); 606 607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 609 if (error) { 610 vrele(vp); 611 vfs_rele(mp); 612 return (error); 613 } 614 615 /* 616 * Don't allow unmounting the root file system. 617 */ 618 if (mp->mnt_flag & MNT_ROOTFS) { 619 vrele(vp); 620 vfs_rele(mp); 621 return (EINVAL); 622 } 623 624 /* 625 * Must be the root of the filesystem 626 */ 627 if ((vp->v_vflag & VV_ROOT) == 0) { 628 vrele(vp); 629 vfs_rele(mp); 630 return (EINVAL); 631 } 632 633 vrele(vp); 634 error = dounmount(mp, SCARG(uap, flags), l); 635 vfs_rele(mp); 636 if (!error) 637 KNOTE(&fs_klist, VQ_UNMOUNT); 638 return error; 639 } 640 641 /* 642 * Sync each mounted filesystem. 643 */ 644 #ifdef DEBUG 645 int syncprt = 0; 646 struct ctldebug debug0 = { "syncprt", &syncprt }; 647 #endif 648 649 void 650 do_sys_sync(struct lwp *l) 651 { 652 mount_iterator_t *iter; 653 struct mount *mp; 654 int asyncflag; 655 656 mountlist_iterator_init(&iter); 657 while ((mp = mountlist_iterator_next(iter)) != NULL) { 658 mutex_enter(&mp->mnt_updating); 659 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 660 asyncflag = mp->mnt_flag & MNT_ASYNC; 661 mp->mnt_flag &= ~MNT_ASYNC; 662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 663 if (asyncflag) 664 mp->mnt_flag |= MNT_ASYNC; 665 } 666 mutex_exit(&mp->mnt_updating); 667 } 668 mountlist_iterator_destroy(iter); 669 #ifdef DEBUG 670 if (syncprt) 671 vfs_bufstats(); 672 #endif /* DEBUG */ 673 } 674 675 /* ARGSUSED */ 676 int 677 sys_sync(struct lwp *l, const void *v, register_t *retval) 678 { 679 do_sys_sync(l); 680 return (0); 681 } 682 683 684 /* 685 * Access or change filesystem quotas. 686 * 687 * (this is really 14 different calls bundled into one) 688 */ 689 690 static int 691 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 692 { 693 struct quotastat info_k; 694 int error; 695 696 /* ensure any padding bytes are cleared */ 697 memset(&info_k, 0, sizeof(info_k)); 698 699 error = vfs_quotactl_stat(mp, &info_k); 700 if (error) { 701 return error; 702 } 703 704 return copyout(&info_k, info_u, sizeof(info_k)); 705 } 706 707 static int 708 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 709 struct quotaidtypestat *info_u) 710 { 711 struct quotaidtypestat info_k; 712 int error; 713 714 /* ensure any padding bytes are cleared */ 715 memset(&info_k, 0, sizeof(info_k)); 716 717 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 718 if (error) { 719 return error; 720 } 721 722 return copyout(&info_k, info_u, sizeof(info_k)); 723 } 724 725 static int 726 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 727 struct quotaobjtypestat *info_u) 728 { 729 struct quotaobjtypestat info_k; 730 int error; 731 732 /* ensure any padding bytes are cleared */ 733 memset(&info_k, 0, sizeof(info_k)); 734 735 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 736 if (error) { 737 return error; 738 } 739 740 return copyout(&info_k, info_u, sizeof(info_k)); 741 } 742 743 static int 744 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 745 struct quotaval *val_u) 746 { 747 struct quotakey key_k; 748 struct quotaval val_k; 749 int error; 750 751 /* ensure any padding bytes are cleared */ 752 memset(&val_k, 0, sizeof(val_k)); 753 754 error = copyin(key_u, &key_k, sizeof(key_k)); 755 if (error) { 756 return error; 757 } 758 759 error = vfs_quotactl_get(mp, &key_k, &val_k); 760 if (error) { 761 return error; 762 } 763 764 return copyout(&val_k, val_u, sizeof(val_k)); 765 } 766 767 static int 768 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 769 const struct quotaval *val_u) 770 { 771 struct quotakey key_k; 772 struct quotaval val_k; 773 int error; 774 775 error = copyin(key_u, &key_k, sizeof(key_k)); 776 if (error) { 777 return error; 778 } 779 780 error = copyin(val_u, &val_k, sizeof(val_k)); 781 if (error) { 782 return error; 783 } 784 785 return vfs_quotactl_put(mp, &key_k, &val_k); 786 } 787 788 static int 789 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 790 { 791 struct quotakey key_k; 792 int error; 793 794 error = copyin(key_u, &key_k, sizeof(key_k)); 795 if (error) { 796 return error; 797 } 798 799 return vfs_quotactl_del(mp, &key_k); 800 } 801 802 static int 803 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 804 { 805 struct quotakcursor cursor_k; 806 int error; 807 808 /* ensure any padding bytes are cleared */ 809 memset(&cursor_k, 0, sizeof(cursor_k)); 810 811 error = vfs_quotactl_cursoropen(mp, &cursor_k); 812 if (error) { 813 return error; 814 } 815 816 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 817 } 818 819 static int 820 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 821 { 822 struct quotakcursor cursor_k; 823 int error; 824 825 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 826 if (error) { 827 return error; 828 } 829 830 return vfs_quotactl_cursorclose(mp, &cursor_k); 831 } 832 833 static int 834 do_sys_quotactl_cursorskipidtype(struct mount *mp, 835 struct quotakcursor *cursor_u, int idtype) 836 { 837 struct quotakcursor cursor_k; 838 int error; 839 840 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 841 if (error) { 842 return error; 843 } 844 845 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 846 if (error) { 847 return error; 848 } 849 850 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 851 } 852 853 static int 854 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 855 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 856 unsigned *ret_u) 857 { 858 #define CGET_STACK_MAX 8 859 struct quotakcursor cursor_k; 860 struct quotakey stackkeys[CGET_STACK_MAX]; 861 struct quotaval stackvals[CGET_STACK_MAX]; 862 struct quotakey *keys_k; 863 struct quotaval *vals_k; 864 unsigned ret_k; 865 int error; 866 867 if (maxnum > 128) { 868 maxnum = 128; 869 } 870 871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 872 if (error) { 873 return error; 874 } 875 876 if (maxnum <= CGET_STACK_MAX) { 877 keys_k = stackkeys; 878 vals_k = stackvals; 879 /* ensure any padding bytes are cleared */ 880 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 881 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 882 } else { 883 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 884 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 885 } 886 887 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 888 &ret_k); 889 if (error) { 890 goto fail; 891 } 892 893 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 894 if (error) { 895 goto fail; 896 } 897 898 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 899 if (error) { 900 goto fail; 901 } 902 903 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 904 if (error) { 905 goto fail; 906 } 907 908 /* do last to maximize the chance of being able to recover a failure */ 909 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 910 911 fail: 912 if (keys_k != stackkeys) { 913 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 914 } 915 if (vals_k != stackvals) { 916 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 917 } 918 return error; 919 } 920 921 static int 922 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 923 int *ret_u) 924 { 925 struct quotakcursor cursor_k; 926 int ret_k; 927 int error; 928 929 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 930 if (error) { 931 return error; 932 } 933 934 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 935 if (error) { 936 return error; 937 } 938 939 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 940 if (error) { 941 return error; 942 } 943 944 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 945 } 946 947 static int 948 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 949 { 950 struct quotakcursor cursor_k; 951 int error; 952 953 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 954 if (error) { 955 return error; 956 } 957 958 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 959 if (error) { 960 return error; 961 } 962 963 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 964 } 965 966 static int 967 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 968 { 969 char *path_k; 970 int error; 971 972 /* XXX this should probably be a struct pathbuf */ 973 path_k = PNBUF_GET(); 974 error = copyin(path_u, path_k, PATH_MAX); 975 if (error) { 976 PNBUF_PUT(path_k); 977 return error; 978 } 979 980 error = vfs_quotactl_quotaon(mp, idtype, path_k); 981 982 PNBUF_PUT(path_k); 983 return error; 984 } 985 986 static int 987 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 988 { 989 return vfs_quotactl_quotaoff(mp, idtype); 990 } 991 992 int 993 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 994 { 995 struct mount *mp; 996 struct vnode *vp; 997 int error; 998 999 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1000 if (error != 0) 1001 return (error); 1002 mp = vp->v_mount; 1003 1004 switch (args->qc_op) { 1005 case QUOTACTL_STAT: 1006 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1007 break; 1008 case QUOTACTL_IDTYPESTAT: 1009 error = do_sys_quotactl_idtypestat(mp, 1010 args->u.idtypestat.qc_idtype, 1011 args->u.idtypestat.qc_info); 1012 break; 1013 case QUOTACTL_OBJTYPESTAT: 1014 error = do_sys_quotactl_objtypestat(mp, 1015 args->u.objtypestat.qc_objtype, 1016 args->u.objtypestat.qc_info); 1017 break; 1018 case QUOTACTL_GET: 1019 error = do_sys_quotactl_get(mp, 1020 args->u.get.qc_key, 1021 args->u.get.qc_val); 1022 break; 1023 case QUOTACTL_PUT: 1024 error = do_sys_quotactl_put(mp, 1025 args->u.put.qc_key, 1026 args->u.put.qc_val); 1027 break; 1028 case QUOTACTL_DEL: 1029 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1030 break; 1031 case QUOTACTL_CURSOROPEN: 1032 error = do_sys_quotactl_cursoropen(mp, 1033 args->u.cursoropen.qc_cursor); 1034 break; 1035 case QUOTACTL_CURSORCLOSE: 1036 error = do_sys_quotactl_cursorclose(mp, 1037 args->u.cursorclose.qc_cursor); 1038 break; 1039 case QUOTACTL_CURSORSKIPIDTYPE: 1040 error = do_sys_quotactl_cursorskipidtype(mp, 1041 args->u.cursorskipidtype.qc_cursor, 1042 args->u.cursorskipidtype.qc_idtype); 1043 break; 1044 case QUOTACTL_CURSORGET: 1045 error = do_sys_quotactl_cursorget(mp, 1046 args->u.cursorget.qc_cursor, 1047 args->u.cursorget.qc_keys, 1048 args->u.cursorget.qc_vals, 1049 args->u.cursorget.qc_maxnum, 1050 args->u.cursorget.qc_ret); 1051 break; 1052 case QUOTACTL_CURSORATEND: 1053 error = do_sys_quotactl_cursoratend(mp, 1054 args->u.cursoratend.qc_cursor, 1055 args->u.cursoratend.qc_ret); 1056 break; 1057 case QUOTACTL_CURSORREWIND: 1058 error = do_sys_quotactl_cursorrewind(mp, 1059 args->u.cursorrewind.qc_cursor); 1060 break; 1061 case QUOTACTL_QUOTAON: 1062 error = do_sys_quotactl_quotaon(mp, 1063 args->u.quotaon.qc_idtype, 1064 args->u.quotaon.qc_quotafile); 1065 break; 1066 case QUOTACTL_QUOTAOFF: 1067 error = do_sys_quotactl_quotaoff(mp, 1068 args->u.quotaoff.qc_idtype); 1069 break; 1070 default: 1071 error = EINVAL; 1072 break; 1073 } 1074 1075 vrele(vp); 1076 return error; 1077 } 1078 1079 /* ARGSUSED */ 1080 int 1081 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1082 register_t *retval) 1083 { 1084 /* { 1085 syscallarg(const char *) path; 1086 syscallarg(struct quotactl_args *) args; 1087 } */ 1088 struct quotactl_args args; 1089 int error; 1090 1091 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1092 if (error) { 1093 return error; 1094 } 1095 1096 return do_sys_quotactl(SCARG(uap, path), &args); 1097 } 1098 1099 int 1100 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1101 int root) 1102 { 1103 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1104 int error = 0; 1105 1106 /* 1107 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1108 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1109 * overrides MNT_NOWAIT. 1110 */ 1111 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1112 (flags != MNT_WAIT && flags != 0)) { 1113 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1114 goto done; 1115 } 1116 1117 /* Get the filesystem stats now */ 1118 memset(sp, 0, sizeof(*sp)); 1119 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1120 return error; 1121 } 1122 1123 if (cwdi->cwdi_rdir == NULL) 1124 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1125 done: 1126 if (cwdi->cwdi_rdir != NULL) { 1127 size_t len; 1128 char *bp; 1129 char c; 1130 char *path = PNBUF_GET(); 1131 1132 bp = path + MAXPATHLEN; 1133 *--bp = '\0'; 1134 rw_enter(&cwdi->cwdi_lock, RW_READER); 1135 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1136 MAXPATHLEN / 2, 0, l); 1137 rw_exit(&cwdi->cwdi_lock); 1138 if (error) { 1139 PNBUF_PUT(path); 1140 return error; 1141 } 1142 len = strlen(bp); 1143 if (len != 1) { 1144 /* 1145 * for mount points that are below our root, we can see 1146 * them, so we fix up the pathname and return them. The 1147 * rest we cannot see, so we don't allow viewing the 1148 * data. 1149 */ 1150 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1151 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1152 (void)strlcpy(sp->f_mntonname, 1153 c == '\0' ? "/" : &sp->f_mntonname[len], 1154 sizeof(sp->f_mntonname)); 1155 } else { 1156 if (root) 1157 (void)strlcpy(sp->f_mntonname, "/", 1158 sizeof(sp->f_mntonname)); 1159 else 1160 error = EPERM; 1161 } 1162 } 1163 PNBUF_PUT(path); 1164 } 1165 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1166 return error; 1167 } 1168 1169 /* 1170 * Get filesystem statistics by path. 1171 */ 1172 int 1173 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1174 { 1175 struct mount *mp; 1176 int error; 1177 struct vnode *vp; 1178 1179 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1180 if (error != 0) 1181 return error; 1182 mp = vp->v_mount; 1183 error = dostatvfs(mp, sb, l, flags, 1); 1184 vrele(vp); 1185 return error; 1186 } 1187 1188 /* ARGSUSED */ 1189 int 1190 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1191 { 1192 /* { 1193 syscallarg(const char *) path; 1194 syscallarg(struct statvfs *) buf; 1195 syscallarg(int) flags; 1196 } */ 1197 struct statvfs *sb; 1198 int error; 1199 1200 sb = STATVFSBUF_GET(); 1201 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1202 if (error == 0) 1203 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1204 STATVFSBUF_PUT(sb); 1205 return error; 1206 } 1207 1208 /* 1209 * Get filesystem statistics by fd. 1210 */ 1211 int 1212 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1213 { 1214 file_t *fp; 1215 struct mount *mp; 1216 int error; 1217 1218 /* fd_getvnode() will use the descriptor for us */ 1219 if ((error = fd_getvnode(fd, &fp)) != 0) 1220 return (error); 1221 mp = fp->f_vnode->v_mount; 1222 error = dostatvfs(mp, sb, curlwp, flags, 1); 1223 fd_putfile(fd); 1224 return error; 1225 } 1226 1227 /* ARGSUSED */ 1228 int 1229 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1230 { 1231 /* { 1232 syscallarg(int) fd; 1233 syscallarg(struct statvfs *) buf; 1234 syscallarg(int) flags; 1235 } */ 1236 struct statvfs *sb; 1237 int error; 1238 1239 sb = STATVFSBUF_GET(); 1240 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1241 if (error == 0) 1242 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1243 STATVFSBUF_PUT(sb); 1244 return error; 1245 } 1246 1247 1248 /* 1249 * Get statistics on all filesystems. 1250 */ 1251 int 1252 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1253 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1254 register_t *retval) 1255 { 1256 int root = 0; 1257 mount_iterator_t *iter; 1258 struct proc *p = l->l_proc; 1259 struct mount *mp; 1260 struct statvfs *sb; 1261 size_t count, maxcount; 1262 int error = 0; 1263 1264 sb = STATVFSBUF_GET(); 1265 maxcount = bufsize / entry_sz; 1266 count = 0; 1267 mountlist_iterator_init(&iter); 1268 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1269 if (sfsp && count < maxcount) { 1270 error = dostatvfs(mp, sb, l, flags, 0); 1271 if (error) { 1272 error = 0; 1273 continue; 1274 } 1275 error = copyfn(sb, sfsp, entry_sz); 1276 if (error) 1277 goto out; 1278 sfsp = (char *)sfsp + entry_sz; 1279 root |= strcmp(sb->f_mntonname, "/") == 0; 1280 } 1281 count++; 1282 } 1283 1284 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1285 /* 1286 * fake a root entry 1287 */ 1288 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1289 sb, l, flags, 1); 1290 if (error != 0) 1291 goto out; 1292 if (sfsp) { 1293 error = copyfn(sb, sfsp, entry_sz); 1294 if (error != 0) 1295 goto out; 1296 } 1297 count++; 1298 } 1299 if (sfsp && count > maxcount) 1300 *retval = maxcount; 1301 else 1302 *retval = count; 1303 out: 1304 mountlist_iterator_destroy(iter); 1305 STATVFSBUF_PUT(sb); 1306 return error; 1307 } 1308 1309 int 1310 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1311 { 1312 /* { 1313 syscallarg(struct statvfs *) buf; 1314 syscallarg(size_t) bufsize; 1315 syscallarg(int) flags; 1316 } */ 1317 1318 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1319 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1320 } 1321 1322 /* 1323 * Change current working directory to a given file descriptor. 1324 */ 1325 /* ARGSUSED */ 1326 int 1327 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1328 { 1329 /* { 1330 syscallarg(int) fd; 1331 } */ 1332 struct proc *p = l->l_proc; 1333 struct cwdinfo *cwdi; 1334 struct vnode *vp, *tdp; 1335 struct mount *mp; 1336 file_t *fp; 1337 int error, fd; 1338 1339 /* fd_getvnode() will use the descriptor for us */ 1340 fd = SCARG(uap, fd); 1341 if ((error = fd_getvnode(fd, &fp)) != 0) 1342 return (error); 1343 vp = fp->f_vnode; 1344 1345 vref(vp); 1346 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1347 if (vp->v_type != VDIR) 1348 error = ENOTDIR; 1349 else 1350 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1351 if (error) { 1352 vput(vp); 1353 goto out; 1354 } 1355 while ((mp = vp->v_mountedhere) != NULL) { 1356 error = vfs_busy(mp); 1357 vput(vp); 1358 if (error != 0) 1359 goto out; 1360 error = VFS_ROOT(mp, &tdp); 1361 vfs_unbusy(mp); 1362 if (error) 1363 goto out; 1364 vp = tdp; 1365 } 1366 VOP_UNLOCK(vp); 1367 1368 /* 1369 * Disallow changing to a directory not under the process's 1370 * current root directory (if there is one). 1371 */ 1372 cwdi = p->p_cwdi; 1373 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1374 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1375 vrele(vp); 1376 error = EPERM; /* operation not permitted */ 1377 } else { 1378 vrele(cwdi->cwdi_cdir); 1379 cwdi->cwdi_cdir = vp; 1380 } 1381 rw_exit(&cwdi->cwdi_lock); 1382 1383 out: 1384 fd_putfile(fd); 1385 return (error); 1386 } 1387 1388 /* 1389 * Change this process's notion of the root directory to a given file 1390 * descriptor. 1391 */ 1392 int 1393 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1394 { 1395 struct proc *p = l->l_proc; 1396 struct vnode *vp; 1397 file_t *fp; 1398 int error, fd = SCARG(uap, fd); 1399 1400 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1401 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1402 return error; 1403 /* fd_getvnode() will use the descriptor for us */ 1404 if ((error = fd_getvnode(fd, &fp)) != 0) 1405 return error; 1406 vp = fp->f_vnode; 1407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1408 if (vp->v_type != VDIR) 1409 error = ENOTDIR; 1410 else 1411 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1412 VOP_UNLOCK(vp); 1413 if (error) 1414 goto out; 1415 vref(vp); 1416 1417 change_root(p->p_cwdi, vp, l); 1418 1419 out: 1420 fd_putfile(fd); 1421 return (error); 1422 } 1423 1424 /* 1425 * Change current working directory (``.''). 1426 */ 1427 /* ARGSUSED */ 1428 int 1429 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1430 { 1431 /* { 1432 syscallarg(const char *) path; 1433 } */ 1434 struct proc *p = l->l_proc; 1435 struct cwdinfo *cwdi; 1436 int error; 1437 struct vnode *vp; 1438 1439 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1440 &vp, l)) != 0) 1441 return (error); 1442 cwdi = p->p_cwdi; 1443 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1444 vrele(cwdi->cwdi_cdir); 1445 cwdi->cwdi_cdir = vp; 1446 rw_exit(&cwdi->cwdi_lock); 1447 return (0); 1448 } 1449 1450 /* 1451 * Change notion of root (``/'') directory. 1452 */ 1453 /* ARGSUSED */ 1454 int 1455 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1456 { 1457 /* { 1458 syscallarg(const char *) path; 1459 } */ 1460 struct proc *p = l->l_proc; 1461 int error; 1462 struct vnode *vp; 1463 1464 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1465 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1466 return (error); 1467 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1468 &vp, l)) != 0) 1469 return (error); 1470 1471 change_root(p->p_cwdi, vp, l); 1472 1473 return (0); 1474 } 1475 1476 /* 1477 * Common routine for chroot and fchroot. 1478 * NB: callers need to properly authorize the change root operation. 1479 */ 1480 void 1481 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1482 { 1483 struct proc *p = l->l_proc; 1484 kauth_cred_t ncred; 1485 1486 ncred = kauth_cred_alloc(); 1487 1488 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1489 if (cwdi->cwdi_rdir != NULL) 1490 vrele(cwdi->cwdi_rdir); 1491 cwdi->cwdi_rdir = vp; 1492 1493 /* 1494 * Prevent escaping from chroot by putting the root under 1495 * the working directory. Silently chdir to / if we aren't 1496 * already there. 1497 */ 1498 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1499 /* 1500 * XXX would be more failsafe to change directory to a 1501 * deadfs node here instead 1502 */ 1503 vrele(cwdi->cwdi_cdir); 1504 vref(vp); 1505 cwdi->cwdi_cdir = vp; 1506 } 1507 rw_exit(&cwdi->cwdi_lock); 1508 1509 /* Get a write lock on the process credential. */ 1510 proc_crmod_enter(); 1511 1512 kauth_cred_clone(p->p_cred, ncred); 1513 kauth_proc_chroot(ncred, p->p_cwdi); 1514 1515 /* Broadcast our credentials to the process and other LWPs. */ 1516 proc_crmod_leave(ncred, p->p_cred, true); 1517 } 1518 1519 /* 1520 * Common routine for chroot and chdir. 1521 * XXX "where" should be enum uio_seg 1522 */ 1523 int 1524 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1525 { 1526 struct pathbuf *pb; 1527 struct nameidata nd; 1528 int error; 1529 1530 error = pathbuf_maybe_copyin(path, where, &pb); 1531 if (error) { 1532 return error; 1533 } 1534 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1535 if ((error = namei(&nd)) != 0) { 1536 pathbuf_destroy(pb); 1537 return error; 1538 } 1539 *vpp = nd.ni_vp; 1540 pathbuf_destroy(pb); 1541 1542 if ((*vpp)->v_type != VDIR) 1543 error = ENOTDIR; 1544 else 1545 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1546 1547 if (error) 1548 vput(*vpp); 1549 else 1550 VOP_UNLOCK(*vpp); 1551 return (error); 1552 } 1553 1554 /* 1555 * Internals of sys_open - path has already been converted into a pathbuf 1556 * (so we can easily reuse this function from other parts of the kernel, 1557 * like posix_spawn post-processing). 1558 */ 1559 int 1560 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1561 int open_mode, int *fd) 1562 { 1563 struct proc *p = l->l_proc; 1564 struct cwdinfo *cwdi = p->p_cwdi; 1565 file_t *fp; 1566 struct vnode *vp; 1567 int flags, cmode; 1568 int indx, error; 1569 struct nameidata nd; 1570 1571 if (open_flags & O_SEARCH) { 1572 open_flags &= ~(int)O_SEARCH; 1573 } 1574 1575 flags = FFLAGS(open_flags); 1576 if ((flags & (FREAD | FWRITE)) == 0) 1577 return EINVAL; 1578 1579 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1580 return error; 1581 } 1582 1583 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1584 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1585 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1586 if (dvp != NULL) 1587 NDAT(&nd, dvp); 1588 1589 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1590 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1591 fd_abort(p, fp, indx); 1592 if ((error == EDUPFD || error == EMOVEFD) && 1593 l->l_dupfd >= 0 && /* XXX from fdopen */ 1594 (error = 1595 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1596 *fd = indx; 1597 return 0; 1598 } 1599 if (error == ERESTART) 1600 error = EINTR; 1601 return error; 1602 } 1603 1604 l->l_dupfd = 0; 1605 vp = nd.ni_vp; 1606 1607 if ((error = open_setfp(l, fp, vp, indx, flags))) 1608 return error; 1609 1610 VOP_UNLOCK(vp); 1611 *fd = indx; 1612 fd_affix(p, fp, indx); 1613 return 0; 1614 } 1615 1616 int 1617 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1618 { 1619 struct pathbuf *pb; 1620 int error, oflags; 1621 1622 oflags = FFLAGS(open_flags); 1623 if ((oflags & (FREAD | FWRITE)) == 0) 1624 return EINVAL; 1625 1626 pb = pathbuf_create(path); 1627 if (pb == NULL) 1628 return ENOMEM; 1629 1630 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1631 pathbuf_destroy(pb); 1632 1633 return error; 1634 } 1635 1636 static int 1637 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1638 int mode, int *fd) 1639 { 1640 file_t *dfp = NULL; 1641 struct vnode *dvp = NULL; 1642 struct pathbuf *pb; 1643 const char *pathstring = NULL; 1644 int error; 1645 1646 if (path == NULL) { 1647 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1648 if (error == ENOSYS) 1649 goto no_compat; 1650 if (error) 1651 return error; 1652 } else { 1653 no_compat: 1654 error = pathbuf_copyin(path, &pb); 1655 if (error) 1656 return error; 1657 } 1658 1659 pathstring = pathbuf_stringcopy_get(pb); 1660 1661 /* 1662 * fdat is ignored if: 1663 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1664 * 2) if path is absolute, then fdat is useless. 1665 */ 1666 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1667 /* fd_getvnode() will use the descriptor for us */ 1668 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1669 goto out; 1670 1671 dvp = dfp->f_vnode; 1672 } 1673 1674 error = do_open(l, dvp, pb, flags, mode, fd); 1675 1676 if (dfp != NULL) 1677 fd_putfile(fdat); 1678 out: 1679 pathbuf_stringcopy_put(pb, pathstring); 1680 pathbuf_destroy(pb); 1681 return error; 1682 } 1683 1684 int 1685 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1686 { 1687 /* { 1688 syscallarg(const char *) path; 1689 syscallarg(int) flags; 1690 syscallarg(int) mode; 1691 } */ 1692 int error; 1693 int fd; 1694 1695 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1696 SCARG(uap, flags), SCARG(uap, mode), &fd); 1697 1698 if (error == 0) 1699 *retval = fd; 1700 1701 return error; 1702 } 1703 1704 int 1705 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1706 { 1707 /* { 1708 syscallarg(int) fd; 1709 syscallarg(const char *) path; 1710 syscallarg(int) oflags; 1711 syscallarg(int) mode; 1712 } */ 1713 int error; 1714 int fd; 1715 1716 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1717 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1718 1719 if (error == 0) 1720 *retval = fd; 1721 1722 return error; 1723 } 1724 1725 static void 1726 vfs__fhfree(fhandle_t *fhp) 1727 { 1728 size_t fhsize; 1729 1730 fhsize = FHANDLE_SIZE(fhp); 1731 kmem_free(fhp, fhsize); 1732 } 1733 1734 /* 1735 * vfs_composefh: compose a filehandle. 1736 */ 1737 1738 int 1739 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1740 { 1741 struct mount *mp; 1742 struct fid *fidp; 1743 int error; 1744 size_t needfhsize; 1745 size_t fidsize; 1746 1747 mp = vp->v_mount; 1748 fidp = NULL; 1749 if (*fh_size < FHANDLE_SIZE_MIN) { 1750 fidsize = 0; 1751 } else { 1752 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1753 if (fhp != NULL) { 1754 memset(fhp, 0, *fh_size); 1755 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1756 fidp = &fhp->fh_fid; 1757 } 1758 } 1759 error = VFS_VPTOFH(vp, fidp, &fidsize); 1760 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1761 if (error == 0 && *fh_size < needfhsize) { 1762 error = E2BIG; 1763 } 1764 *fh_size = needfhsize; 1765 return error; 1766 } 1767 1768 int 1769 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1770 { 1771 struct mount *mp; 1772 fhandle_t *fhp; 1773 size_t fhsize; 1774 size_t fidsize; 1775 int error; 1776 1777 mp = vp->v_mount; 1778 fidsize = 0; 1779 error = VFS_VPTOFH(vp, NULL, &fidsize); 1780 KASSERT(error != 0); 1781 if (error != E2BIG) { 1782 goto out; 1783 } 1784 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1785 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1786 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1787 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1788 if (error == 0) { 1789 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1790 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1791 *fhpp = fhp; 1792 } else { 1793 kmem_free(fhp, fhsize); 1794 } 1795 out: 1796 return error; 1797 } 1798 1799 void 1800 vfs_composefh_free(fhandle_t *fhp) 1801 { 1802 1803 vfs__fhfree(fhp); 1804 } 1805 1806 /* 1807 * vfs_fhtovp: lookup a vnode by a filehandle. 1808 */ 1809 1810 int 1811 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1812 { 1813 struct mount *mp; 1814 int error; 1815 1816 *vpp = NULL; 1817 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1818 if (mp == NULL) { 1819 error = ESTALE; 1820 goto out; 1821 } 1822 if (mp->mnt_op->vfs_fhtovp == NULL) { 1823 error = EOPNOTSUPP; 1824 goto out; 1825 } 1826 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1827 out: 1828 return error; 1829 } 1830 1831 /* 1832 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1833 * the needed size. 1834 */ 1835 1836 int 1837 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1838 { 1839 fhandle_t *fhp; 1840 int error; 1841 1842 if (fhsize > FHANDLE_SIZE_MAX) { 1843 return EINVAL; 1844 } 1845 if (fhsize < FHANDLE_SIZE_MIN) { 1846 return EINVAL; 1847 } 1848 again: 1849 fhp = kmem_alloc(fhsize, KM_SLEEP); 1850 error = copyin(ufhp, fhp, fhsize); 1851 if (error == 0) { 1852 /* XXX this check shouldn't be here */ 1853 if (FHANDLE_SIZE(fhp) == fhsize) { 1854 *fhpp = fhp; 1855 return 0; 1856 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1857 /* 1858 * a kludge for nfsv2 padded handles. 1859 */ 1860 size_t sz; 1861 1862 sz = FHANDLE_SIZE(fhp); 1863 kmem_free(fhp, fhsize); 1864 fhsize = sz; 1865 goto again; 1866 } else { 1867 /* 1868 * userland told us wrong size. 1869 */ 1870 error = EINVAL; 1871 } 1872 } 1873 kmem_free(fhp, fhsize); 1874 return error; 1875 } 1876 1877 void 1878 vfs_copyinfh_free(fhandle_t *fhp) 1879 { 1880 1881 vfs__fhfree(fhp); 1882 } 1883 1884 /* 1885 * Get file handle system call 1886 */ 1887 int 1888 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1889 { 1890 /* { 1891 syscallarg(char *) fname; 1892 syscallarg(fhandle_t *) fhp; 1893 syscallarg(size_t *) fh_size; 1894 } */ 1895 struct vnode *vp; 1896 fhandle_t *fh; 1897 int error; 1898 struct pathbuf *pb; 1899 struct nameidata nd; 1900 size_t sz; 1901 size_t usz; 1902 1903 /* 1904 * Must be super user 1905 */ 1906 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1907 0, NULL, NULL, NULL); 1908 if (error) 1909 return (error); 1910 1911 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1912 if (error) { 1913 return error; 1914 } 1915 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1916 error = namei(&nd); 1917 if (error) { 1918 pathbuf_destroy(pb); 1919 return error; 1920 } 1921 vp = nd.ni_vp; 1922 pathbuf_destroy(pb); 1923 1924 error = vfs_composefh_alloc(vp, &fh); 1925 vput(vp); 1926 if (error != 0) { 1927 return error; 1928 } 1929 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1930 if (error != 0) { 1931 goto out; 1932 } 1933 sz = FHANDLE_SIZE(fh); 1934 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1935 if (error != 0) { 1936 goto out; 1937 } 1938 if (usz >= sz) { 1939 error = copyout(fh, SCARG(uap, fhp), sz); 1940 } else { 1941 error = E2BIG; 1942 } 1943 out: 1944 vfs_composefh_free(fh); 1945 return (error); 1946 } 1947 1948 /* 1949 * Open a file given a file handle. 1950 * 1951 * Check permissions, allocate an open file structure, 1952 * and call the device open routine if any. 1953 */ 1954 1955 int 1956 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1957 register_t *retval) 1958 { 1959 file_t *fp; 1960 struct vnode *vp = NULL; 1961 kauth_cred_t cred = l->l_cred; 1962 file_t *nfp; 1963 int indx, error; 1964 struct vattr va; 1965 fhandle_t *fh; 1966 int flags; 1967 proc_t *p; 1968 1969 p = curproc; 1970 1971 /* 1972 * Must be super user 1973 */ 1974 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1975 0, NULL, NULL, NULL))) 1976 return (error); 1977 1978 if (oflags & O_SEARCH) { 1979 oflags &= ~(int)O_SEARCH; 1980 } 1981 1982 flags = FFLAGS(oflags); 1983 if ((flags & (FREAD | FWRITE)) == 0) 1984 return (EINVAL); 1985 if ((flags & O_CREAT)) 1986 return (EINVAL); 1987 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1988 return (error); 1989 fp = nfp; 1990 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1991 if (error != 0) { 1992 goto bad; 1993 } 1994 error = vfs_fhtovp(fh, &vp); 1995 vfs_copyinfh_free(fh); 1996 if (error != 0) { 1997 goto bad; 1998 } 1999 2000 /* Now do an effective vn_open */ 2001 2002 if (vp->v_type == VSOCK) { 2003 error = EOPNOTSUPP; 2004 goto bad; 2005 } 2006 error = vn_openchk(vp, cred, flags); 2007 if (error != 0) 2008 goto bad; 2009 if (flags & O_TRUNC) { 2010 VOP_UNLOCK(vp); /* XXX */ 2011 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2012 vattr_null(&va); 2013 va.va_size = 0; 2014 error = VOP_SETATTR(vp, &va, cred); 2015 if (error) 2016 goto bad; 2017 } 2018 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2019 goto bad; 2020 if (flags & FWRITE) { 2021 mutex_enter(vp->v_interlock); 2022 vp->v_writecount++; 2023 mutex_exit(vp->v_interlock); 2024 } 2025 2026 /* done with modified vn_open, now finish what sys_open does. */ 2027 if ((error = open_setfp(l, fp, vp, indx, flags))) 2028 return error; 2029 2030 VOP_UNLOCK(vp); 2031 *retval = indx; 2032 fd_affix(p, fp, indx); 2033 return (0); 2034 2035 bad: 2036 fd_abort(p, fp, indx); 2037 if (vp != NULL) 2038 vput(vp); 2039 return (error); 2040 } 2041 2042 int 2043 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2044 { 2045 /* { 2046 syscallarg(const void *) fhp; 2047 syscallarg(size_t) fh_size; 2048 syscallarg(int) flags; 2049 } */ 2050 2051 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2052 SCARG(uap, flags), retval); 2053 } 2054 2055 int 2056 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2057 { 2058 int error; 2059 fhandle_t *fh; 2060 struct vnode *vp; 2061 2062 /* 2063 * Must be super user 2064 */ 2065 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2066 0, NULL, NULL, NULL))) 2067 return (error); 2068 2069 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2070 if (error != 0) 2071 return error; 2072 2073 error = vfs_fhtovp(fh, &vp); 2074 vfs_copyinfh_free(fh); 2075 if (error != 0) 2076 return error; 2077 2078 error = vn_stat(vp, sb); 2079 vput(vp); 2080 return error; 2081 } 2082 2083 2084 /* ARGSUSED */ 2085 int 2086 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2087 { 2088 /* { 2089 syscallarg(const void *) fhp; 2090 syscallarg(size_t) fh_size; 2091 syscallarg(struct stat *) sb; 2092 } */ 2093 struct stat sb; 2094 int error; 2095 2096 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2097 if (error) 2098 return error; 2099 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2100 } 2101 2102 int 2103 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2104 int flags) 2105 { 2106 fhandle_t *fh; 2107 struct mount *mp; 2108 struct vnode *vp; 2109 int error; 2110 2111 /* 2112 * Must be super user 2113 */ 2114 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2115 0, NULL, NULL, NULL))) 2116 return error; 2117 2118 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2119 if (error != 0) 2120 return error; 2121 2122 error = vfs_fhtovp(fh, &vp); 2123 vfs_copyinfh_free(fh); 2124 if (error != 0) 2125 return error; 2126 2127 mp = vp->v_mount; 2128 error = dostatvfs(mp, sb, l, flags, 1); 2129 vput(vp); 2130 return error; 2131 } 2132 2133 /* ARGSUSED */ 2134 int 2135 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2136 { 2137 /* { 2138 syscallarg(const void *) fhp; 2139 syscallarg(size_t) fh_size; 2140 syscallarg(struct statvfs *) buf; 2141 syscallarg(int) flags; 2142 } */ 2143 struct statvfs *sb = STATVFSBUF_GET(); 2144 int error; 2145 2146 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2147 SCARG(uap, flags)); 2148 if (error == 0) 2149 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2150 STATVFSBUF_PUT(sb); 2151 return error; 2152 } 2153 2154 /* 2155 * Create a special file. 2156 */ 2157 /* ARGSUSED */ 2158 int 2159 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2160 register_t *retval) 2161 { 2162 /* { 2163 syscallarg(const char *) path; 2164 syscallarg(mode_t) mode; 2165 syscallarg(dev_t) dev; 2166 } */ 2167 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2168 SCARG(uap, dev), retval, UIO_USERSPACE); 2169 } 2170 2171 int 2172 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2173 register_t *retval) 2174 { 2175 /* { 2176 syscallarg(int) fd; 2177 syscallarg(const char *) path; 2178 syscallarg(mode_t) mode; 2179 syscallarg(int) pad; 2180 syscallarg(dev_t) dev; 2181 } */ 2182 2183 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2184 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2185 } 2186 2187 int 2188 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2189 register_t *retval, enum uio_seg seg) 2190 { 2191 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2192 } 2193 2194 int 2195 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2196 dev_t dev, register_t *retval, enum uio_seg seg) 2197 { 2198 struct proc *p = l->l_proc; 2199 struct vnode *vp; 2200 struct vattr vattr; 2201 int error, optype; 2202 struct pathbuf *pb; 2203 struct nameidata nd; 2204 const char *pathstring; 2205 2206 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2207 0, NULL, NULL, NULL)) != 0) 2208 return (error); 2209 2210 optype = VOP_MKNOD_DESCOFFSET; 2211 2212 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2213 if (error) { 2214 return error; 2215 } 2216 pathstring = pathbuf_stringcopy_get(pb); 2217 if (pathstring == NULL) { 2218 pathbuf_destroy(pb); 2219 return ENOMEM; 2220 } 2221 2222 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2223 2224 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2225 goto out; 2226 vp = nd.ni_vp; 2227 2228 if (vp != NULL) 2229 error = EEXIST; 2230 else { 2231 vattr_null(&vattr); 2232 /* We will read cwdi->cwdi_cmask unlocked. */ 2233 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2234 vattr.va_rdev = dev; 2235 2236 switch (mode & S_IFMT) { 2237 case S_IFMT: /* used by badsect to flag bad sectors */ 2238 vattr.va_type = VBAD; 2239 break; 2240 case S_IFCHR: 2241 vattr.va_type = VCHR; 2242 break; 2243 case S_IFBLK: 2244 vattr.va_type = VBLK; 2245 break; 2246 case S_IFWHT: 2247 optype = VOP_WHITEOUT_DESCOFFSET; 2248 break; 2249 case S_IFREG: 2250 #if NVERIEXEC > 0 2251 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2252 O_CREAT); 2253 #endif /* NVERIEXEC > 0 */ 2254 vattr.va_type = VREG; 2255 vattr.va_rdev = VNOVAL; 2256 optype = VOP_CREATE_DESCOFFSET; 2257 break; 2258 default: 2259 error = EINVAL; 2260 break; 2261 } 2262 } 2263 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2264 && vattr.va_rdev == VNOVAL) 2265 error = EINVAL; 2266 if (!error) { 2267 switch (optype) { 2268 case VOP_WHITEOUT_DESCOFFSET: 2269 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2270 if (error) 2271 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2272 vput(nd.ni_dvp); 2273 break; 2274 2275 case VOP_MKNOD_DESCOFFSET: 2276 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2277 &nd.ni_cnd, &vattr); 2278 if (error == 0) 2279 vrele(nd.ni_vp); 2280 vput(nd.ni_dvp); 2281 break; 2282 2283 case VOP_CREATE_DESCOFFSET: 2284 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2285 &nd.ni_cnd, &vattr); 2286 if (error == 0) 2287 vrele(nd.ni_vp); 2288 vput(nd.ni_dvp); 2289 break; 2290 } 2291 } else { 2292 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2293 if (nd.ni_dvp == vp) 2294 vrele(nd.ni_dvp); 2295 else 2296 vput(nd.ni_dvp); 2297 if (vp) 2298 vrele(vp); 2299 } 2300 out: 2301 pathbuf_stringcopy_put(pb, pathstring); 2302 pathbuf_destroy(pb); 2303 return (error); 2304 } 2305 2306 /* 2307 * Create a named pipe. 2308 */ 2309 /* ARGSUSED */ 2310 int 2311 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2312 { 2313 /* { 2314 syscallarg(const char *) path; 2315 syscallarg(int) mode; 2316 } */ 2317 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2318 } 2319 2320 int 2321 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2322 register_t *retval) 2323 { 2324 /* { 2325 syscallarg(int) fd; 2326 syscallarg(const char *) path; 2327 syscallarg(int) mode; 2328 } */ 2329 2330 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2331 SCARG(uap, mode)); 2332 } 2333 2334 static int 2335 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2336 { 2337 struct proc *p = l->l_proc; 2338 struct vattr vattr; 2339 int error; 2340 struct pathbuf *pb; 2341 struct nameidata nd; 2342 2343 error = pathbuf_copyin(path, &pb); 2344 if (error) { 2345 return error; 2346 } 2347 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2348 2349 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2350 pathbuf_destroy(pb); 2351 return error; 2352 } 2353 if (nd.ni_vp != NULL) { 2354 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2355 if (nd.ni_dvp == nd.ni_vp) 2356 vrele(nd.ni_dvp); 2357 else 2358 vput(nd.ni_dvp); 2359 vrele(nd.ni_vp); 2360 pathbuf_destroy(pb); 2361 return (EEXIST); 2362 } 2363 vattr_null(&vattr); 2364 vattr.va_type = VFIFO; 2365 /* We will read cwdi->cwdi_cmask unlocked. */ 2366 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2367 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2368 if (error == 0) 2369 vrele(nd.ni_vp); 2370 vput(nd.ni_dvp); 2371 pathbuf_destroy(pb); 2372 return (error); 2373 } 2374 2375 /* 2376 * Make a hard file link. 2377 */ 2378 /* ARGSUSED */ 2379 int 2380 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2381 const char *link, int follow, register_t *retval) 2382 { 2383 struct vnode *vp; 2384 struct pathbuf *linkpb; 2385 struct nameidata nd; 2386 namei_simple_flags_t ns_flags; 2387 int error; 2388 2389 if (follow & AT_SYMLINK_FOLLOW) 2390 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2391 else 2392 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2393 2394 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2395 if (error != 0) 2396 return (error); 2397 error = pathbuf_copyin(link, &linkpb); 2398 if (error) { 2399 goto out1; 2400 } 2401 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2402 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2403 goto out2; 2404 if (nd.ni_vp) { 2405 error = EEXIST; 2406 goto abortop; 2407 } 2408 /* Prevent hard links on directories. */ 2409 if (vp->v_type == VDIR) { 2410 error = EPERM; 2411 goto abortop; 2412 } 2413 /* Prevent cross-mount operation. */ 2414 if (nd.ni_dvp->v_mount != vp->v_mount) { 2415 error = EXDEV; 2416 goto abortop; 2417 } 2418 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2419 VOP_UNLOCK(nd.ni_dvp); 2420 vrele(nd.ni_dvp); 2421 out2: 2422 pathbuf_destroy(linkpb); 2423 out1: 2424 vrele(vp); 2425 return (error); 2426 abortop: 2427 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2428 if (nd.ni_dvp == nd.ni_vp) 2429 vrele(nd.ni_dvp); 2430 else 2431 vput(nd.ni_dvp); 2432 if (nd.ni_vp != NULL) 2433 vrele(nd.ni_vp); 2434 goto out2; 2435 } 2436 2437 int 2438 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2439 { 2440 /* { 2441 syscallarg(const char *) path; 2442 syscallarg(const char *) link; 2443 } */ 2444 const char *path = SCARG(uap, path); 2445 const char *link = SCARG(uap, link); 2446 2447 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2448 AT_SYMLINK_FOLLOW, retval); 2449 } 2450 2451 int 2452 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2453 register_t *retval) 2454 { 2455 /* { 2456 syscallarg(int) fd1; 2457 syscallarg(const char *) name1; 2458 syscallarg(int) fd2; 2459 syscallarg(const char *) name2; 2460 syscallarg(int) flags; 2461 } */ 2462 int fd1 = SCARG(uap, fd1); 2463 const char *name1 = SCARG(uap, name1); 2464 int fd2 = SCARG(uap, fd2); 2465 const char *name2 = SCARG(uap, name2); 2466 int follow; 2467 2468 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2469 2470 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2471 } 2472 2473 2474 int 2475 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2476 { 2477 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2478 } 2479 2480 static int 2481 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2482 const char *link, enum uio_seg seg) 2483 { 2484 struct proc *p = curproc; 2485 struct vattr vattr; 2486 char *path; 2487 int error; 2488 size_t len; 2489 struct pathbuf *linkpb; 2490 struct nameidata nd; 2491 2492 KASSERT(l != NULL || fdat == AT_FDCWD); 2493 2494 path = PNBUF_GET(); 2495 if (seg == UIO_USERSPACE) { 2496 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2497 goto out1; 2498 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2499 goto out1; 2500 } else { 2501 len = strlen(patharg) + 1; 2502 KASSERT(len <= MAXPATHLEN); 2503 memcpy(path, patharg, len); 2504 linkpb = pathbuf_create(link); 2505 if (linkpb == NULL) { 2506 error = ENOMEM; 2507 goto out1; 2508 } 2509 } 2510 ktrkuser("symlink-target", path, len - 1); 2511 2512 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2513 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2514 goto out2; 2515 if (nd.ni_vp) { 2516 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2517 if (nd.ni_dvp == nd.ni_vp) 2518 vrele(nd.ni_dvp); 2519 else 2520 vput(nd.ni_dvp); 2521 vrele(nd.ni_vp); 2522 error = EEXIST; 2523 goto out2; 2524 } 2525 vattr_null(&vattr); 2526 vattr.va_type = VLNK; 2527 /* We will read cwdi->cwdi_cmask unlocked. */ 2528 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2529 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2530 if (error == 0) 2531 vrele(nd.ni_vp); 2532 vput(nd.ni_dvp); 2533 out2: 2534 pathbuf_destroy(linkpb); 2535 out1: 2536 PNBUF_PUT(path); 2537 return (error); 2538 } 2539 2540 /* 2541 * Make a symbolic link. 2542 */ 2543 /* ARGSUSED */ 2544 int 2545 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2546 { 2547 /* { 2548 syscallarg(const char *) path; 2549 syscallarg(const char *) link; 2550 } */ 2551 2552 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2553 UIO_USERSPACE); 2554 } 2555 2556 int 2557 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2558 register_t *retval) 2559 { 2560 /* { 2561 syscallarg(const char *) path1; 2562 syscallarg(int) fd; 2563 syscallarg(const char *) path2; 2564 } */ 2565 2566 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2567 SCARG(uap, path2), UIO_USERSPACE); 2568 } 2569 2570 /* 2571 * Delete a whiteout from the filesystem. 2572 */ 2573 /* ARGSUSED */ 2574 int 2575 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2576 { 2577 /* { 2578 syscallarg(const char *) path; 2579 } */ 2580 int error; 2581 struct pathbuf *pb; 2582 struct nameidata nd; 2583 2584 error = pathbuf_copyin(SCARG(uap, path), &pb); 2585 if (error) { 2586 return error; 2587 } 2588 2589 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2590 error = namei(&nd); 2591 if (error) { 2592 pathbuf_destroy(pb); 2593 return (error); 2594 } 2595 2596 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2597 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2598 if (nd.ni_dvp == nd.ni_vp) 2599 vrele(nd.ni_dvp); 2600 else 2601 vput(nd.ni_dvp); 2602 if (nd.ni_vp) 2603 vrele(nd.ni_vp); 2604 pathbuf_destroy(pb); 2605 return (EEXIST); 2606 } 2607 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2608 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2609 vput(nd.ni_dvp); 2610 pathbuf_destroy(pb); 2611 return (error); 2612 } 2613 2614 /* 2615 * Delete a name from the filesystem. 2616 */ 2617 /* ARGSUSED */ 2618 int 2619 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2620 { 2621 /* { 2622 syscallarg(const char *) path; 2623 } */ 2624 2625 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2626 } 2627 2628 int 2629 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2630 register_t *retval) 2631 { 2632 /* { 2633 syscallarg(int) fd; 2634 syscallarg(const char *) path; 2635 syscallarg(int) flag; 2636 } */ 2637 2638 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2639 SCARG(uap, flag), UIO_USERSPACE); 2640 } 2641 2642 int 2643 do_sys_unlink(const char *arg, enum uio_seg seg) 2644 { 2645 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2646 } 2647 2648 static int 2649 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2650 enum uio_seg seg) 2651 { 2652 struct vnode *vp; 2653 int error; 2654 struct pathbuf *pb; 2655 struct nameidata nd; 2656 const char *pathstring; 2657 2658 KASSERT(l != NULL || fdat == AT_FDCWD); 2659 2660 error = pathbuf_maybe_copyin(arg, seg, &pb); 2661 if (error) { 2662 return error; 2663 } 2664 pathstring = pathbuf_stringcopy_get(pb); 2665 if (pathstring == NULL) { 2666 pathbuf_destroy(pb); 2667 return ENOMEM; 2668 } 2669 2670 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2671 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2672 goto out; 2673 vp = nd.ni_vp; 2674 2675 /* 2676 * The root of a mounted filesystem cannot be deleted. 2677 */ 2678 if ((vp->v_vflag & VV_ROOT) != 0) { 2679 error = EBUSY; 2680 goto abort; 2681 } 2682 2683 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2684 error = EBUSY; 2685 goto abort; 2686 } 2687 2688 /* 2689 * No rmdir "." please. 2690 */ 2691 if (nd.ni_dvp == vp) { 2692 error = EINVAL; 2693 goto abort; 2694 } 2695 2696 /* 2697 * AT_REMOVEDIR is required to remove a directory 2698 */ 2699 if (vp->v_type == VDIR) { 2700 if (!(flags & AT_REMOVEDIR)) { 2701 error = EPERM; 2702 goto abort; 2703 } else { 2704 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2705 vput(nd.ni_dvp); 2706 goto out; 2707 } 2708 } 2709 2710 /* 2711 * Starting here we only deal with non directories. 2712 */ 2713 if (flags & AT_REMOVEDIR) { 2714 error = ENOTDIR; 2715 goto abort; 2716 } 2717 2718 #if NVERIEXEC > 0 2719 /* Handle remove requests for veriexec entries. */ 2720 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2721 goto abort; 2722 } 2723 #endif /* NVERIEXEC > 0 */ 2724 2725 #ifdef FILEASSOC 2726 (void)fileassoc_file_delete(vp); 2727 #endif /* FILEASSOC */ 2728 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2729 vput(nd.ni_dvp); 2730 goto out; 2731 2732 abort: 2733 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2734 if (nd.ni_dvp == vp) 2735 vrele(nd.ni_dvp); 2736 else 2737 vput(nd.ni_dvp); 2738 vput(vp); 2739 2740 out: 2741 pathbuf_stringcopy_put(pb, pathstring); 2742 pathbuf_destroy(pb); 2743 return (error); 2744 } 2745 2746 /* 2747 * Reposition read/write file offset. 2748 */ 2749 int 2750 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2751 { 2752 /* { 2753 syscallarg(int) fd; 2754 syscallarg(int) pad; 2755 syscallarg(off_t) offset; 2756 syscallarg(int) whence; 2757 } */ 2758 kauth_cred_t cred = l->l_cred; 2759 file_t *fp; 2760 struct vnode *vp; 2761 struct vattr vattr; 2762 off_t newoff; 2763 int error, fd; 2764 2765 fd = SCARG(uap, fd); 2766 2767 if ((fp = fd_getfile(fd)) == NULL) 2768 return (EBADF); 2769 2770 vp = fp->f_vnode; 2771 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2772 error = ESPIPE; 2773 goto out; 2774 } 2775 2776 vn_lock(vp, LK_SHARED | LK_RETRY); 2777 2778 switch (SCARG(uap, whence)) { 2779 case SEEK_CUR: 2780 newoff = fp->f_offset + SCARG(uap, offset); 2781 break; 2782 case SEEK_END: 2783 error = VOP_GETATTR(vp, &vattr, cred); 2784 if (error) { 2785 VOP_UNLOCK(vp); 2786 goto out; 2787 } 2788 newoff = SCARG(uap, offset) + vattr.va_size; 2789 break; 2790 case SEEK_SET: 2791 newoff = SCARG(uap, offset); 2792 break; 2793 default: 2794 error = EINVAL; 2795 VOP_UNLOCK(vp); 2796 goto out; 2797 } 2798 VOP_UNLOCK(vp); 2799 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2800 *(off_t *)retval = fp->f_offset = newoff; 2801 } 2802 out: 2803 fd_putfile(fd); 2804 return (error); 2805 } 2806 2807 /* 2808 * Positional read system call. 2809 */ 2810 int 2811 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2812 { 2813 /* { 2814 syscallarg(int) fd; 2815 syscallarg(void *) buf; 2816 syscallarg(size_t) nbyte; 2817 syscallarg(off_t) offset; 2818 } */ 2819 file_t *fp; 2820 struct vnode *vp; 2821 off_t offset; 2822 int error, fd = SCARG(uap, fd); 2823 2824 if ((fp = fd_getfile(fd)) == NULL) 2825 return (EBADF); 2826 2827 if ((fp->f_flag & FREAD) == 0) { 2828 fd_putfile(fd); 2829 return (EBADF); 2830 } 2831 2832 vp = fp->f_vnode; 2833 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2834 error = ESPIPE; 2835 goto out; 2836 } 2837 2838 offset = SCARG(uap, offset); 2839 2840 /* 2841 * XXX This works because no file systems actually 2842 * XXX take any action on the seek operation. 2843 */ 2844 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2845 goto out; 2846 2847 /* dofileread() will unuse the descriptor for us */ 2848 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2849 &offset, 0, retval)); 2850 2851 out: 2852 fd_putfile(fd); 2853 return (error); 2854 } 2855 2856 /* 2857 * Positional scatter read system call. 2858 */ 2859 int 2860 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2861 { 2862 /* { 2863 syscallarg(int) fd; 2864 syscallarg(const struct iovec *) iovp; 2865 syscallarg(int) iovcnt; 2866 syscallarg(off_t) offset; 2867 } */ 2868 off_t offset = SCARG(uap, offset); 2869 2870 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2871 SCARG(uap, iovcnt), &offset, 0, retval); 2872 } 2873 2874 /* 2875 * Positional write system call. 2876 */ 2877 int 2878 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2879 { 2880 /* { 2881 syscallarg(int) fd; 2882 syscallarg(const void *) buf; 2883 syscallarg(size_t) nbyte; 2884 syscallarg(off_t) offset; 2885 } */ 2886 file_t *fp; 2887 struct vnode *vp; 2888 off_t offset; 2889 int error, fd = SCARG(uap, fd); 2890 2891 if ((fp = fd_getfile(fd)) == NULL) 2892 return (EBADF); 2893 2894 if ((fp->f_flag & FWRITE) == 0) { 2895 fd_putfile(fd); 2896 return (EBADF); 2897 } 2898 2899 vp = fp->f_vnode; 2900 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2901 error = ESPIPE; 2902 goto out; 2903 } 2904 2905 offset = SCARG(uap, offset); 2906 2907 /* 2908 * XXX This works because no file systems actually 2909 * XXX take any action on the seek operation. 2910 */ 2911 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2912 goto out; 2913 2914 /* dofilewrite() will unuse the descriptor for us */ 2915 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2916 &offset, 0, retval)); 2917 2918 out: 2919 fd_putfile(fd); 2920 return (error); 2921 } 2922 2923 /* 2924 * Positional gather write system call. 2925 */ 2926 int 2927 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2928 { 2929 /* { 2930 syscallarg(int) fd; 2931 syscallarg(const struct iovec *) iovp; 2932 syscallarg(int) iovcnt; 2933 syscallarg(off_t) offset; 2934 } */ 2935 off_t offset = SCARG(uap, offset); 2936 2937 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2938 SCARG(uap, iovcnt), &offset, 0, retval); 2939 } 2940 2941 /* 2942 * Check access permissions. 2943 */ 2944 int 2945 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2946 { 2947 /* { 2948 syscallarg(const char *) path; 2949 syscallarg(int) flags; 2950 } */ 2951 2952 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2953 SCARG(uap, flags), 0); 2954 } 2955 2956 int 2957 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2958 int mode, int flags) 2959 { 2960 kauth_cred_t cred; 2961 struct vnode *vp; 2962 int error, nd_flag, vmode; 2963 struct pathbuf *pb; 2964 struct nameidata nd; 2965 2966 CTASSERT(F_OK == 0); 2967 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2968 /* nonsense mode */ 2969 return EINVAL; 2970 } 2971 2972 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2973 if (flags & AT_SYMLINK_NOFOLLOW) 2974 nd_flag &= ~FOLLOW; 2975 2976 error = pathbuf_copyin(path, &pb); 2977 if (error) 2978 return error; 2979 2980 NDINIT(&nd, LOOKUP, nd_flag, pb); 2981 2982 /* Override default credentials */ 2983 cred = kauth_cred_dup(l->l_cred); 2984 if (!(flags & AT_EACCESS)) { 2985 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2986 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2987 } 2988 nd.ni_cnd.cn_cred = cred; 2989 2990 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2991 pathbuf_destroy(pb); 2992 goto out; 2993 } 2994 vp = nd.ni_vp; 2995 pathbuf_destroy(pb); 2996 2997 /* Flags == 0 means only check for existence. */ 2998 if (mode) { 2999 vmode = 0; 3000 if (mode & R_OK) 3001 vmode |= VREAD; 3002 if (mode & W_OK) 3003 vmode |= VWRITE; 3004 if (mode & X_OK) 3005 vmode |= VEXEC; 3006 3007 error = VOP_ACCESS(vp, vmode, cred); 3008 if (!error && (vmode & VWRITE)) 3009 error = vn_writechk(vp); 3010 } 3011 vput(vp); 3012 out: 3013 kauth_cred_free(cred); 3014 return (error); 3015 } 3016 3017 int 3018 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3019 register_t *retval) 3020 { 3021 /* { 3022 syscallarg(int) fd; 3023 syscallarg(const char *) path; 3024 syscallarg(int) amode; 3025 syscallarg(int) flag; 3026 } */ 3027 3028 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3029 SCARG(uap, amode), SCARG(uap, flag)); 3030 } 3031 3032 /* 3033 * Common code for all sys_stat functions, including compat versions. 3034 */ 3035 int 3036 do_sys_stat(const char *userpath, unsigned int nd_flag, 3037 struct stat *sb) 3038 { 3039 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3040 } 3041 3042 int 3043 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3044 unsigned int nd_flag, struct stat *sb) 3045 { 3046 int error; 3047 struct pathbuf *pb; 3048 struct nameidata nd; 3049 3050 KASSERT(l != NULL || fdat == AT_FDCWD); 3051 3052 error = pathbuf_copyin(userpath, &pb); 3053 if (error) { 3054 return error; 3055 } 3056 3057 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3058 3059 error = fd_nameiat(l, fdat, &nd); 3060 if (error != 0) { 3061 pathbuf_destroy(pb); 3062 return error; 3063 } 3064 error = vn_stat(nd.ni_vp, sb); 3065 vput(nd.ni_vp); 3066 pathbuf_destroy(pb); 3067 return error; 3068 } 3069 3070 /* 3071 * Get file status; this version follows links. 3072 */ 3073 /* ARGSUSED */ 3074 int 3075 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3076 { 3077 /* { 3078 syscallarg(const char *) path; 3079 syscallarg(struct stat *) ub; 3080 } */ 3081 struct stat sb; 3082 int error; 3083 3084 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3085 if (error) 3086 return error; 3087 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3088 } 3089 3090 /* 3091 * Get file status; this version does not follow links. 3092 */ 3093 /* ARGSUSED */ 3094 int 3095 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3096 { 3097 /* { 3098 syscallarg(const char *) path; 3099 syscallarg(struct stat *) ub; 3100 } */ 3101 struct stat sb; 3102 int error; 3103 3104 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3105 if (error) 3106 return error; 3107 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3108 } 3109 3110 int 3111 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3112 register_t *retval) 3113 { 3114 /* { 3115 syscallarg(int) fd; 3116 syscallarg(const char *) path; 3117 syscallarg(struct stat *) buf; 3118 syscallarg(int) flag; 3119 } */ 3120 unsigned int nd_flag; 3121 struct stat sb; 3122 int error; 3123 3124 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3125 nd_flag = NOFOLLOW; 3126 else 3127 nd_flag = FOLLOW; 3128 3129 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3130 &sb); 3131 if (error) 3132 return error; 3133 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3134 } 3135 3136 /* 3137 * Get configurable pathname variables. 3138 */ 3139 /* ARGSUSED */ 3140 int 3141 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3142 { 3143 /* { 3144 syscallarg(const char *) path; 3145 syscallarg(int) name; 3146 } */ 3147 int error; 3148 struct pathbuf *pb; 3149 struct nameidata nd; 3150 3151 error = pathbuf_copyin(SCARG(uap, path), &pb); 3152 if (error) { 3153 return error; 3154 } 3155 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3156 if ((error = namei(&nd)) != 0) { 3157 pathbuf_destroy(pb); 3158 return (error); 3159 } 3160 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3161 vput(nd.ni_vp); 3162 pathbuf_destroy(pb); 3163 return (error); 3164 } 3165 3166 /* 3167 * Return target name of a symbolic link. 3168 */ 3169 /* ARGSUSED */ 3170 int 3171 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3172 register_t *retval) 3173 { 3174 /* { 3175 syscallarg(const char *) path; 3176 syscallarg(char *) buf; 3177 syscallarg(size_t) count; 3178 } */ 3179 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3180 SCARG(uap, buf), SCARG(uap, count), retval); 3181 } 3182 3183 static int 3184 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3185 size_t count, register_t *retval) 3186 { 3187 struct vnode *vp; 3188 struct iovec aiov; 3189 struct uio auio; 3190 int error; 3191 struct pathbuf *pb; 3192 struct nameidata nd; 3193 3194 error = pathbuf_copyin(path, &pb); 3195 if (error) { 3196 return error; 3197 } 3198 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3199 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3200 pathbuf_destroy(pb); 3201 return error; 3202 } 3203 vp = nd.ni_vp; 3204 pathbuf_destroy(pb); 3205 if (vp->v_type != VLNK) 3206 error = EINVAL; 3207 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3208 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3209 aiov.iov_base = buf; 3210 aiov.iov_len = count; 3211 auio.uio_iov = &aiov; 3212 auio.uio_iovcnt = 1; 3213 auio.uio_offset = 0; 3214 auio.uio_rw = UIO_READ; 3215 KASSERT(l == curlwp); 3216 auio.uio_vmspace = l->l_proc->p_vmspace; 3217 auio.uio_resid = count; 3218 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3219 *retval = count - auio.uio_resid; 3220 } 3221 vput(vp); 3222 return (error); 3223 } 3224 3225 int 3226 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3227 register_t *retval) 3228 { 3229 /* { 3230 syscallarg(int) fd; 3231 syscallarg(const char *) path; 3232 syscallarg(char *) buf; 3233 syscallarg(size_t) bufsize; 3234 } */ 3235 3236 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3237 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3238 } 3239 3240 /* 3241 * Change flags of a file given a path name. 3242 */ 3243 /* ARGSUSED */ 3244 int 3245 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3246 { 3247 /* { 3248 syscallarg(const char *) path; 3249 syscallarg(u_long) flags; 3250 } */ 3251 struct vnode *vp; 3252 int error; 3253 3254 error = namei_simple_user(SCARG(uap, path), 3255 NSM_FOLLOW_TRYEMULROOT, &vp); 3256 if (error != 0) 3257 return (error); 3258 error = change_flags(vp, SCARG(uap, flags), l); 3259 vput(vp); 3260 return (error); 3261 } 3262 3263 /* 3264 * Change flags of a file given a file descriptor. 3265 */ 3266 /* ARGSUSED */ 3267 int 3268 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3269 { 3270 /* { 3271 syscallarg(int) fd; 3272 syscallarg(u_long) flags; 3273 } */ 3274 struct vnode *vp; 3275 file_t *fp; 3276 int error; 3277 3278 /* fd_getvnode() will use the descriptor for us */ 3279 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3280 return (error); 3281 vp = fp->f_vnode; 3282 error = change_flags(vp, SCARG(uap, flags), l); 3283 VOP_UNLOCK(vp); 3284 fd_putfile(SCARG(uap, fd)); 3285 return (error); 3286 } 3287 3288 /* 3289 * Change flags of a file given a path name; this version does 3290 * not follow links. 3291 */ 3292 int 3293 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3294 { 3295 /* { 3296 syscallarg(const char *) path; 3297 syscallarg(u_long) flags; 3298 } */ 3299 struct vnode *vp; 3300 int error; 3301 3302 error = namei_simple_user(SCARG(uap, path), 3303 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3304 if (error != 0) 3305 return (error); 3306 error = change_flags(vp, SCARG(uap, flags), l); 3307 vput(vp); 3308 return (error); 3309 } 3310 3311 /* 3312 * Common routine to change flags of a file. 3313 */ 3314 int 3315 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3316 { 3317 struct vattr vattr; 3318 int error; 3319 3320 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3321 3322 vattr_null(&vattr); 3323 vattr.va_flags = flags; 3324 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3325 3326 return (error); 3327 } 3328 3329 /* 3330 * Change mode of a file given path name; this version follows links. 3331 */ 3332 /* ARGSUSED */ 3333 int 3334 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3335 { 3336 /* { 3337 syscallarg(const char *) path; 3338 syscallarg(int) mode; 3339 } */ 3340 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3341 SCARG(uap, mode), 0); 3342 } 3343 3344 int 3345 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3346 { 3347 int error; 3348 struct vnode *vp; 3349 namei_simple_flags_t ns_flag; 3350 3351 if (flags & AT_SYMLINK_NOFOLLOW) 3352 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3353 else 3354 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3355 3356 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3357 if (error != 0) 3358 return error; 3359 3360 error = change_mode(vp, mode, l); 3361 3362 vrele(vp); 3363 3364 return (error); 3365 } 3366 3367 /* 3368 * Change mode of a file given a file descriptor. 3369 */ 3370 /* ARGSUSED */ 3371 int 3372 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3373 { 3374 /* { 3375 syscallarg(int) fd; 3376 syscallarg(int) mode; 3377 } */ 3378 file_t *fp; 3379 int error; 3380 3381 /* fd_getvnode() will use the descriptor for us */ 3382 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3383 return (error); 3384 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3385 fd_putfile(SCARG(uap, fd)); 3386 return (error); 3387 } 3388 3389 int 3390 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3391 register_t *retval) 3392 { 3393 /* { 3394 syscallarg(int) fd; 3395 syscallarg(const char *) path; 3396 syscallarg(int) mode; 3397 syscallarg(int) flag; 3398 } */ 3399 3400 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3401 SCARG(uap, mode), SCARG(uap, flag)); 3402 } 3403 3404 /* 3405 * Change mode of a file given path name; this version does not follow links. 3406 */ 3407 /* ARGSUSED */ 3408 int 3409 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3410 { 3411 /* { 3412 syscallarg(const char *) path; 3413 syscallarg(int) mode; 3414 } */ 3415 int error; 3416 struct vnode *vp; 3417 3418 error = namei_simple_user(SCARG(uap, path), 3419 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3420 if (error != 0) 3421 return (error); 3422 3423 error = change_mode(vp, SCARG(uap, mode), l); 3424 3425 vrele(vp); 3426 return (error); 3427 } 3428 3429 /* 3430 * Common routine to set mode given a vnode. 3431 */ 3432 static int 3433 change_mode(struct vnode *vp, int mode, struct lwp *l) 3434 { 3435 struct vattr vattr; 3436 int error; 3437 3438 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3439 vattr_null(&vattr); 3440 vattr.va_mode = mode & ALLPERMS; 3441 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3442 VOP_UNLOCK(vp); 3443 return (error); 3444 } 3445 3446 /* 3447 * Set ownership given a path name; this version follows links. 3448 */ 3449 /* ARGSUSED */ 3450 int 3451 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3452 { 3453 /* { 3454 syscallarg(const char *) path; 3455 syscallarg(uid_t) uid; 3456 syscallarg(gid_t) gid; 3457 } */ 3458 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3459 SCARG(uap, gid), 0); 3460 } 3461 3462 int 3463 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3464 gid_t gid, int flags) 3465 { 3466 int error; 3467 struct vnode *vp; 3468 namei_simple_flags_t ns_flag; 3469 3470 if (flags & AT_SYMLINK_NOFOLLOW) 3471 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3472 else 3473 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3474 3475 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3476 if (error != 0) 3477 return error; 3478 3479 error = change_owner(vp, uid, gid, l, 0); 3480 3481 vrele(vp); 3482 3483 return (error); 3484 } 3485 3486 /* 3487 * Set ownership given a path name; this version follows links. 3488 * Provides POSIX semantics. 3489 */ 3490 /* ARGSUSED */ 3491 int 3492 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3493 { 3494 /* { 3495 syscallarg(const char *) path; 3496 syscallarg(uid_t) uid; 3497 syscallarg(gid_t) gid; 3498 } */ 3499 int error; 3500 struct vnode *vp; 3501 3502 error = namei_simple_user(SCARG(uap, path), 3503 NSM_FOLLOW_TRYEMULROOT, &vp); 3504 if (error != 0) 3505 return (error); 3506 3507 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3508 3509 vrele(vp); 3510 return (error); 3511 } 3512 3513 /* 3514 * Set ownership given a file descriptor. 3515 */ 3516 /* ARGSUSED */ 3517 int 3518 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3519 { 3520 /* { 3521 syscallarg(int) fd; 3522 syscallarg(uid_t) uid; 3523 syscallarg(gid_t) gid; 3524 } */ 3525 int error; 3526 file_t *fp; 3527 3528 /* fd_getvnode() will use the descriptor for us */ 3529 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3530 return (error); 3531 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3532 l, 0); 3533 fd_putfile(SCARG(uap, fd)); 3534 return (error); 3535 } 3536 3537 int 3538 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3539 register_t *retval) 3540 { 3541 /* { 3542 syscallarg(int) fd; 3543 syscallarg(const char *) path; 3544 syscallarg(uid_t) owner; 3545 syscallarg(gid_t) group; 3546 syscallarg(int) flag; 3547 } */ 3548 3549 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3550 SCARG(uap, owner), SCARG(uap, group), 3551 SCARG(uap, flag)); 3552 } 3553 3554 /* 3555 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3556 */ 3557 /* ARGSUSED */ 3558 int 3559 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3560 { 3561 /* { 3562 syscallarg(int) fd; 3563 syscallarg(uid_t) uid; 3564 syscallarg(gid_t) gid; 3565 } */ 3566 int error; 3567 file_t *fp; 3568 3569 /* fd_getvnode() will use the descriptor for us */ 3570 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3571 return (error); 3572 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3573 l, 1); 3574 fd_putfile(SCARG(uap, fd)); 3575 return (error); 3576 } 3577 3578 /* 3579 * Set ownership given a path name; this version does not follow links. 3580 */ 3581 /* ARGSUSED */ 3582 int 3583 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3584 { 3585 /* { 3586 syscallarg(const char *) path; 3587 syscallarg(uid_t) uid; 3588 syscallarg(gid_t) gid; 3589 } */ 3590 int error; 3591 struct vnode *vp; 3592 3593 error = namei_simple_user(SCARG(uap, path), 3594 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3595 if (error != 0) 3596 return (error); 3597 3598 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3599 3600 vrele(vp); 3601 return (error); 3602 } 3603 3604 /* 3605 * Set ownership given a path name; this version does not follow links. 3606 * Provides POSIX/XPG semantics. 3607 */ 3608 /* ARGSUSED */ 3609 int 3610 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3611 { 3612 /* { 3613 syscallarg(const char *) path; 3614 syscallarg(uid_t) uid; 3615 syscallarg(gid_t) gid; 3616 } */ 3617 int error; 3618 struct vnode *vp; 3619 3620 error = namei_simple_user(SCARG(uap, path), 3621 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3622 if (error != 0) 3623 return (error); 3624 3625 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3626 3627 vrele(vp); 3628 return (error); 3629 } 3630 3631 /* 3632 * Common routine to set ownership given a vnode. 3633 */ 3634 static int 3635 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3636 int posix_semantics) 3637 { 3638 struct vattr vattr; 3639 mode_t newmode; 3640 int error; 3641 3642 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3643 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3644 goto out; 3645 3646 #define CHANGED(x) ((int)(x) != -1) 3647 newmode = vattr.va_mode; 3648 if (posix_semantics) { 3649 /* 3650 * POSIX/XPG semantics: if the caller is not the super-user, 3651 * clear set-user-id and set-group-id bits. Both POSIX and 3652 * the XPG consider the behaviour for calls by the super-user 3653 * implementation-defined; we leave the set-user-id and set- 3654 * group-id settings intact in that case. 3655 */ 3656 if (vattr.va_mode & S_ISUID) { 3657 if (kauth_authorize_vnode(l->l_cred, 3658 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3659 newmode &= ~S_ISUID; 3660 } 3661 if (vattr.va_mode & S_ISGID) { 3662 if (kauth_authorize_vnode(l->l_cred, 3663 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3664 newmode &= ~S_ISGID; 3665 } 3666 } else { 3667 /* 3668 * NetBSD semantics: when changing owner and/or group, 3669 * clear the respective bit(s). 3670 */ 3671 if (CHANGED(uid)) 3672 newmode &= ~S_ISUID; 3673 if (CHANGED(gid)) 3674 newmode &= ~S_ISGID; 3675 } 3676 /* Update va_mode iff altered. */ 3677 if (vattr.va_mode == newmode) 3678 newmode = VNOVAL; 3679 3680 vattr_null(&vattr); 3681 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3682 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3683 vattr.va_mode = newmode; 3684 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3685 #undef CHANGED 3686 3687 out: 3688 VOP_UNLOCK(vp); 3689 return (error); 3690 } 3691 3692 /* 3693 * Set the access and modification times given a path name; this 3694 * version follows links. 3695 */ 3696 /* ARGSUSED */ 3697 int 3698 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3699 register_t *retval) 3700 { 3701 /* { 3702 syscallarg(const char *) path; 3703 syscallarg(const struct timeval *) tptr; 3704 } */ 3705 3706 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3707 SCARG(uap, tptr), UIO_USERSPACE); 3708 } 3709 3710 /* 3711 * Set the access and modification times given a file descriptor. 3712 */ 3713 /* ARGSUSED */ 3714 int 3715 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3716 register_t *retval) 3717 { 3718 /* { 3719 syscallarg(int) fd; 3720 syscallarg(const struct timeval *) tptr; 3721 } */ 3722 int error; 3723 file_t *fp; 3724 3725 /* fd_getvnode() will use the descriptor for us */ 3726 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3727 return (error); 3728 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3729 UIO_USERSPACE); 3730 fd_putfile(SCARG(uap, fd)); 3731 return (error); 3732 } 3733 3734 int 3735 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3736 register_t *retval) 3737 { 3738 /* { 3739 syscallarg(int) fd; 3740 syscallarg(const struct timespec *) tptr; 3741 } */ 3742 int error; 3743 file_t *fp; 3744 3745 /* fd_getvnode() will use the descriptor for us */ 3746 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3747 return (error); 3748 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3749 SCARG(uap, tptr), UIO_USERSPACE); 3750 fd_putfile(SCARG(uap, fd)); 3751 return (error); 3752 } 3753 3754 /* 3755 * Set the access and modification times given a path name; this 3756 * version does not follow links. 3757 */ 3758 int 3759 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3760 register_t *retval) 3761 { 3762 /* { 3763 syscallarg(const char *) path; 3764 syscallarg(const struct timeval *) tptr; 3765 } */ 3766 3767 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3768 SCARG(uap, tptr), UIO_USERSPACE); 3769 } 3770 3771 int 3772 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3773 register_t *retval) 3774 { 3775 /* { 3776 syscallarg(int) fd; 3777 syscallarg(const char *) path; 3778 syscallarg(const struct timespec *) tptr; 3779 syscallarg(int) flag; 3780 } */ 3781 int follow; 3782 const struct timespec *tptr; 3783 int error; 3784 3785 tptr = SCARG(uap, tptr); 3786 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3787 3788 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3789 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3790 3791 return error; 3792 } 3793 3794 /* 3795 * Common routine to set access and modification times given a vnode. 3796 */ 3797 int 3798 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3799 const struct timespec *tptr, enum uio_seg seg) 3800 { 3801 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3802 } 3803 3804 int 3805 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3806 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3807 { 3808 struct vattr vattr; 3809 int error, dorele = 0; 3810 namei_simple_flags_t sflags; 3811 bool vanull, setbirthtime; 3812 struct timespec ts[2]; 3813 3814 KASSERT(l != NULL || fdat == AT_FDCWD); 3815 3816 /* 3817 * I have checked all callers and they pass either FOLLOW, 3818 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3819 * is 0. More to the point, they don't pass anything else. 3820 * Let's keep it that way at least until the namei interfaces 3821 * are fully sanitized. 3822 */ 3823 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3824 sflags = (flag == FOLLOW) ? 3825 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3826 3827 if (tptr == NULL) { 3828 vanull = true; 3829 nanotime(&ts[0]); 3830 ts[1] = ts[0]; 3831 } else { 3832 vanull = false; 3833 if (seg != UIO_SYSSPACE) { 3834 error = copyin(tptr, ts, sizeof (ts)); 3835 if (error != 0) 3836 return error; 3837 } else { 3838 ts[0] = tptr[0]; 3839 ts[1] = tptr[1]; 3840 } 3841 } 3842 3843 if (ts[0].tv_nsec == UTIME_NOW) { 3844 nanotime(&ts[0]); 3845 if (ts[1].tv_nsec == UTIME_NOW) { 3846 vanull = true; 3847 ts[1] = ts[0]; 3848 } 3849 } else if (ts[1].tv_nsec == UTIME_NOW) 3850 nanotime(&ts[1]); 3851 3852 if (vp == NULL) { 3853 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3854 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3855 if (error != 0) 3856 return error; 3857 dorele = 1; 3858 } 3859 3860 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3861 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3862 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3863 vattr_null(&vattr); 3864 3865 if (ts[0].tv_nsec != UTIME_OMIT) 3866 vattr.va_atime = ts[0]; 3867 3868 if (ts[1].tv_nsec != UTIME_OMIT) { 3869 vattr.va_mtime = ts[1]; 3870 if (setbirthtime) 3871 vattr.va_birthtime = ts[1]; 3872 } 3873 3874 if (vanull) 3875 vattr.va_vaflags |= VA_UTIMES_NULL; 3876 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3877 VOP_UNLOCK(vp); 3878 3879 if (dorele != 0) 3880 vrele(vp); 3881 3882 return error; 3883 } 3884 3885 int 3886 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3887 const struct timeval *tptr, enum uio_seg seg) 3888 { 3889 struct timespec ts[2]; 3890 struct timespec *tsptr = NULL; 3891 int error; 3892 3893 if (tptr != NULL) { 3894 struct timeval tv[2]; 3895 3896 if (seg != UIO_SYSSPACE) { 3897 error = copyin(tptr, tv, sizeof (tv)); 3898 if (error != 0) 3899 return error; 3900 tptr = tv; 3901 } 3902 3903 if ((tv[0].tv_usec == UTIME_NOW) || 3904 (tv[0].tv_usec == UTIME_OMIT)) 3905 ts[0].tv_nsec = tv[0].tv_usec; 3906 else 3907 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3908 3909 if ((tv[1].tv_usec == UTIME_NOW) || 3910 (tv[1].tv_usec == UTIME_OMIT)) 3911 ts[1].tv_nsec = tv[1].tv_usec; 3912 else 3913 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3914 3915 tsptr = &ts[0]; 3916 } 3917 3918 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3919 } 3920 3921 /* 3922 * Truncate a file given its path name. 3923 */ 3924 /* ARGSUSED */ 3925 int 3926 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3927 { 3928 /* { 3929 syscallarg(const char *) path; 3930 syscallarg(int) pad; 3931 syscallarg(off_t) length; 3932 } */ 3933 struct vnode *vp; 3934 struct vattr vattr; 3935 int error; 3936 3937 if (SCARG(uap, length) < 0) 3938 return EINVAL; 3939 3940 error = namei_simple_user(SCARG(uap, path), 3941 NSM_FOLLOW_TRYEMULROOT, &vp); 3942 if (error != 0) 3943 return (error); 3944 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3945 if (vp->v_type == VDIR) 3946 error = EISDIR; 3947 else if ((error = vn_writechk(vp)) == 0 && 3948 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3949 vattr_null(&vattr); 3950 vattr.va_size = SCARG(uap, length); 3951 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3952 } 3953 vput(vp); 3954 return (error); 3955 } 3956 3957 /* 3958 * Truncate a file given a file descriptor. 3959 */ 3960 /* ARGSUSED */ 3961 int 3962 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3963 { 3964 /* { 3965 syscallarg(int) fd; 3966 syscallarg(int) pad; 3967 syscallarg(off_t) length; 3968 } */ 3969 struct vattr vattr; 3970 struct vnode *vp; 3971 file_t *fp; 3972 int error; 3973 3974 if (SCARG(uap, length) < 0) 3975 return EINVAL; 3976 3977 /* fd_getvnode() will use the descriptor for us */ 3978 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3979 return (error); 3980 if ((fp->f_flag & FWRITE) == 0) { 3981 error = EINVAL; 3982 goto out; 3983 } 3984 vp = fp->f_vnode; 3985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3986 if (vp->v_type == VDIR) 3987 error = EISDIR; 3988 else if ((error = vn_writechk(vp)) == 0) { 3989 vattr_null(&vattr); 3990 vattr.va_size = SCARG(uap, length); 3991 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3992 } 3993 VOP_UNLOCK(vp); 3994 out: 3995 fd_putfile(SCARG(uap, fd)); 3996 return (error); 3997 } 3998 3999 /* 4000 * Sync an open file. 4001 */ 4002 /* ARGSUSED */ 4003 int 4004 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4005 { 4006 /* { 4007 syscallarg(int) fd; 4008 } */ 4009 struct vnode *vp; 4010 file_t *fp; 4011 int error; 4012 4013 /* fd_getvnode() will use the descriptor for us */ 4014 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4015 return (error); 4016 vp = fp->f_vnode; 4017 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4018 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4019 VOP_UNLOCK(vp); 4020 fd_putfile(SCARG(uap, fd)); 4021 return (error); 4022 } 4023 4024 /* 4025 * Sync a range of file data. API modeled after that found in AIX. 4026 * 4027 * FDATASYNC indicates that we need only save enough metadata to be able 4028 * to re-read the written data. Note we duplicate AIX's requirement that 4029 * the file be open for writing. 4030 */ 4031 /* ARGSUSED */ 4032 int 4033 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4034 { 4035 /* { 4036 syscallarg(int) fd; 4037 syscallarg(int) flags; 4038 syscallarg(off_t) start; 4039 syscallarg(off_t) length; 4040 } */ 4041 struct vnode *vp; 4042 file_t *fp; 4043 int flags, nflags; 4044 off_t s, e, len; 4045 int error; 4046 4047 /* fd_getvnode() will use the descriptor for us */ 4048 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4049 return (error); 4050 4051 if ((fp->f_flag & FWRITE) == 0) { 4052 error = EBADF; 4053 goto out; 4054 } 4055 4056 flags = SCARG(uap, flags); 4057 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4058 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4059 error = EINVAL; 4060 goto out; 4061 } 4062 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4063 if (flags & FDATASYNC) 4064 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4065 else 4066 nflags = FSYNC_WAIT; 4067 if (flags & FDISKSYNC) 4068 nflags |= FSYNC_CACHE; 4069 4070 len = SCARG(uap, length); 4071 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4072 if (len) { 4073 s = SCARG(uap, start); 4074 e = s + len; 4075 if (e < s) { 4076 error = EINVAL; 4077 goto out; 4078 } 4079 } else { 4080 e = 0; 4081 s = 0; 4082 } 4083 4084 vp = fp->f_vnode; 4085 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4086 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4087 VOP_UNLOCK(vp); 4088 out: 4089 fd_putfile(SCARG(uap, fd)); 4090 return (error); 4091 } 4092 4093 /* 4094 * Sync the data of an open file. 4095 */ 4096 /* ARGSUSED */ 4097 int 4098 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4099 { 4100 /* { 4101 syscallarg(int) fd; 4102 } */ 4103 struct vnode *vp; 4104 file_t *fp; 4105 int error; 4106 4107 /* fd_getvnode() will use the descriptor for us */ 4108 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4109 return (error); 4110 if ((fp->f_flag & FWRITE) == 0) { 4111 fd_putfile(SCARG(uap, fd)); 4112 return (EBADF); 4113 } 4114 vp = fp->f_vnode; 4115 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4116 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4117 VOP_UNLOCK(vp); 4118 fd_putfile(SCARG(uap, fd)); 4119 return (error); 4120 } 4121 4122 /* 4123 * Rename files, (standard) BSD semantics frontend. 4124 */ 4125 /* ARGSUSED */ 4126 int 4127 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4128 { 4129 /* { 4130 syscallarg(const char *) from; 4131 syscallarg(const char *) to; 4132 } */ 4133 4134 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4135 SCARG(uap, to), UIO_USERSPACE, 0)); 4136 } 4137 4138 int 4139 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4140 register_t *retval) 4141 { 4142 /* { 4143 syscallarg(int) fromfd; 4144 syscallarg(const char *) from; 4145 syscallarg(int) tofd; 4146 syscallarg(const char *) to; 4147 } */ 4148 4149 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4150 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4151 } 4152 4153 /* 4154 * Rename files, POSIX semantics frontend. 4155 */ 4156 /* ARGSUSED */ 4157 int 4158 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4159 { 4160 /* { 4161 syscallarg(const char *) from; 4162 syscallarg(const char *) to; 4163 } */ 4164 4165 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4166 SCARG(uap, to), UIO_USERSPACE, 1)); 4167 } 4168 4169 /* 4170 * Rename files. Source and destination must either both be directories, 4171 * or both not be directories. If target is a directory, it must be empty. 4172 * If `from' and `to' refer to the same object, the value of the `retain' 4173 * argument is used to determine whether `from' will be 4174 * 4175 * (retain == 0) deleted unless `from' and `to' refer to the same 4176 * object in the file system's name space (BSD). 4177 * (retain == 1) always retained (POSIX). 4178 * 4179 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4180 */ 4181 int 4182 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4183 { 4184 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4185 } 4186 4187 static int 4188 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4189 const char *to, enum uio_seg seg, int retain) 4190 { 4191 struct pathbuf *fpb, *tpb; 4192 struct nameidata fnd, tnd; 4193 struct vnode *fdvp, *fvp; 4194 struct vnode *tdvp, *tvp; 4195 struct mount *mp, *tmp; 4196 int error; 4197 4198 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4199 4200 error = pathbuf_maybe_copyin(from, seg, &fpb); 4201 if (error) 4202 goto out0; 4203 KASSERT(fpb != NULL); 4204 4205 error = pathbuf_maybe_copyin(to, seg, &tpb); 4206 if (error) 4207 goto out1; 4208 KASSERT(tpb != NULL); 4209 4210 /* 4211 * Lookup from. 4212 * 4213 * XXX LOCKPARENT is wrong because we don't actually want it 4214 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4215 * insane, so for the time being we need to leave it like this. 4216 */ 4217 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4218 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4219 goto out2; 4220 4221 /* 4222 * Pull out the important results of the lookup, fdvp and fvp. 4223 * Of course, fvp is bogus because we're about to unlock fdvp. 4224 */ 4225 fdvp = fnd.ni_dvp; 4226 fvp = fnd.ni_vp; 4227 mp = fdvp->v_mount; 4228 KASSERT(fdvp != NULL); 4229 KASSERT(fvp != NULL); 4230 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4231 /* 4232 * Bracket the operation with fstrans_start()/fstrans_done(). 4233 * 4234 * Inside the bracket this file system cannot be unmounted so 4235 * a vnode on this file system cannot change its v_mount. 4236 * A vnode on another file system may still change to dead mount. 4237 */ 4238 fstrans_start(mp); 4239 4240 /* 4241 * Make sure neither fdvp nor fvp is locked. 4242 */ 4243 if (fdvp != fvp) 4244 VOP_UNLOCK(fdvp); 4245 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4246 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4247 4248 /* 4249 * Reject renaming `.' and `..'. Can't do this until after 4250 * namei because we need namei's parsing to find the final 4251 * component name. (namei should just leave us with the final 4252 * component name and not look it up itself, but anyway...) 4253 * 4254 * This was here before because we used to relookup from 4255 * instead of to and relookup requires the caller to check 4256 * this, but now file systems may depend on this check, so we 4257 * must retain it until the file systems are all rototilled. 4258 */ 4259 if (((fnd.ni_cnd.cn_namelen == 1) && 4260 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4261 ((fnd.ni_cnd.cn_namelen == 2) && 4262 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4263 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4264 error = EINVAL; /* XXX EISDIR? */ 4265 goto abort0; 4266 } 4267 4268 /* 4269 * Lookup to. 4270 * 4271 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4272 * fvp here to decide whether to add CREATEDIR is a load of 4273 * bollocks because fvp might be the wrong node by now, since 4274 * fdvp is unlocked. 4275 * 4276 * XXX Why not pass CREATEDIR always? 4277 */ 4278 NDINIT(&tnd, RENAME, 4279 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4280 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4281 tpb); 4282 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4283 goto abort0; 4284 4285 /* 4286 * Pull out the important results of the lookup, tdvp and tvp. 4287 * Of course, tvp is bogus because we're about to unlock tdvp. 4288 */ 4289 tdvp = tnd.ni_dvp; 4290 tvp = tnd.ni_vp; 4291 KASSERT(tdvp != NULL); 4292 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4293 4294 /* 4295 * Make sure neither tdvp nor tvp is locked. 4296 */ 4297 if (tdvp != tvp) 4298 VOP_UNLOCK(tdvp); 4299 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4300 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4301 4302 /* 4303 * Reject renaming onto `.' or `..'. relookup is unhappy with 4304 * these, which is why we must do this here. Once upon a time 4305 * we relooked up from instead of to, and consequently didn't 4306 * need this check, but now that we relookup to instead of 4307 * from, we need this; and we shall need it forever forward 4308 * until the VOP_RENAME protocol changes, because file systems 4309 * will no doubt begin to depend on this check. 4310 */ 4311 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4312 error = EISDIR; 4313 goto abort1; 4314 } 4315 if ((tnd.ni_cnd.cn_namelen == 2) && 4316 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4317 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4318 error = EINVAL; 4319 goto abort1; 4320 } 4321 4322 /* 4323 * Make sure the mount points match. Although we don't hold 4324 * any vnode locks, the v_mount on fdvp file system are stable. 4325 * 4326 * Unmounting another file system at an inopportune moment may 4327 * cause tdvp to disappear and change its v_mount to dead. 4328 * 4329 * So in either case different v_mount means cross-device rename. 4330 */ 4331 KASSERT(mp != NULL); 4332 tmp = tdvp->v_mount; 4333 4334 if (mp != tmp) { 4335 error = EXDEV; 4336 goto abort1; 4337 } 4338 4339 /* 4340 * Take the vfs rename lock to avoid cross-directory screw cases. 4341 * Nothing is locked currently, so taking this lock is safe. 4342 */ 4343 error = VFS_RENAMELOCK_ENTER(mp); 4344 if (error) 4345 goto abort1; 4346 4347 /* 4348 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4349 * and nothing is locked except for the vfs rename lock. 4350 * 4351 * The next step is a little rain dance to conform to the 4352 * insane lock protocol, even though it does nothing to ward 4353 * off race conditions. 4354 * 4355 * We need tdvp and tvp to be locked. However, because we have 4356 * unlocked tdvp in order to hold no locks while we take the 4357 * vfs rename lock, tvp may be wrong here, and we can't safely 4358 * lock it even if the sensible file systems will just unlock 4359 * it straight away. Consequently, we must lock tdvp and then 4360 * relookup tvp to get it locked. 4361 * 4362 * Finally, because the VOP_RENAME protocol is brain-damaged 4363 * and various file systems insanely depend on the semantics of 4364 * this brain damage, the lookup of to must be the last lookup 4365 * before VOP_RENAME. 4366 */ 4367 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4368 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4369 if (error) 4370 goto abort2; 4371 4372 /* 4373 * Drop the old tvp and pick up the new one -- which might be 4374 * the same, but that doesn't matter to us. After this, tdvp 4375 * and tvp should both be locked. 4376 */ 4377 if (tvp != NULL) 4378 vrele(tvp); 4379 tvp = tnd.ni_vp; 4380 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4381 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4382 4383 /* 4384 * The old do_sys_rename had various consistency checks here 4385 * involving fvp and tvp. fvp is bogus already here, and tvp 4386 * will become bogus soon in any sensible file system, so the 4387 * only purpose in putting these checks here is to give lip 4388 * service to these screw cases and to acknowledge that they 4389 * exist, not actually to handle them, but here you go 4390 * anyway... 4391 */ 4392 4393 /* 4394 * Acknowledge that directories and non-directories aren't 4395 * suposed to mix. 4396 */ 4397 if (tvp != NULL) { 4398 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4399 error = ENOTDIR; 4400 goto abort3; 4401 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4402 error = EISDIR; 4403 goto abort3; 4404 } 4405 } 4406 4407 /* 4408 * Acknowledge some random screw case, among the dozens that 4409 * might arise. 4410 */ 4411 if (fvp == tdvp) { 4412 error = EINVAL; 4413 goto abort3; 4414 } 4415 4416 /* 4417 * Acknowledge that POSIX has a wacky screw case. 4418 * 4419 * XXX Eventually the retain flag needs to be passed on to 4420 * VOP_RENAME. 4421 */ 4422 if (fvp == tvp) { 4423 if (retain) { 4424 error = 0; 4425 goto abort3; 4426 } else if ((fdvp == tdvp) && 4427 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4428 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4429 fnd.ni_cnd.cn_namelen))) { 4430 error = 0; 4431 goto abort3; 4432 } 4433 } 4434 4435 /* 4436 * Make sure veriexec can screw us up. (But a race can screw 4437 * up veriexec, of course -- remember, fvp and (soon) tvp are 4438 * bogus.) 4439 */ 4440 #if NVERIEXEC > 0 4441 { 4442 char *f1, *f2; 4443 size_t f1_len; 4444 size_t f2_len; 4445 4446 f1_len = fnd.ni_cnd.cn_namelen + 1; 4447 f1 = kmem_alloc(f1_len, KM_SLEEP); 4448 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4449 4450 f2_len = tnd.ni_cnd.cn_namelen + 1; 4451 f2 = kmem_alloc(f2_len, KM_SLEEP); 4452 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4453 4454 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4455 4456 kmem_free(f1, f1_len); 4457 kmem_free(f2, f2_len); 4458 4459 if (error) 4460 goto abort3; 4461 } 4462 #endif /* NVERIEXEC > 0 */ 4463 4464 /* 4465 * All ready. Incant the rename vop. 4466 */ 4467 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4468 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4469 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4470 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4471 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4472 4473 /* 4474 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4475 * tdvp and tvp. But we can't assert any of that. 4476 */ 4477 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4478 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4479 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4480 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4481 4482 /* 4483 * So all we have left to do is to drop the rename lock and 4484 * destroy the pathbufs. 4485 */ 4486 VFS_RENAMELOCK_EXIT(mp); 4487 fstrans_done(mp); 4488 goto out2; 4489 4490 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4491 VOP_UNLOCK(tvp); 4492 abort2: VOP_UNLOCK(tdvp); 4493 VFS_RENAMELOCK_EXIT(mp); 4494 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4495 vrele(tdvp); 4496 if (tvp != NULL) 4497 vrele(tvp); 4498 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4499 vrele(fdvp); 4500 vrele(fvp); 4501 fstrans_done(mp); 4502 out2: pathbuf_destroy(tpb); 4503 out1: pathbuf_destroy(fpb); 4504 out0: return error; 4505 } 4506 4507 /* 4508 * Make a directory file. 4509 */ 4510 /* ARGSUSED */ 4511 int 4512 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4513 { 4514 /* { 4515 syscallarg(const char *) path; 4516 syscallarg(int) mode; 4517 } */ 4518 4519 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4520 SCARG(uap, mode), UIO_USERSPACE); 4521 } 4522 4523 int 4524 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4525 register_t *retval) 4526 { 4527 /* { 4528 syscallarg(int) fd; 4529 syscallarg(const char *) path; 4530 syscallarg(int) mode; 4531 } */ 4532 4533 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4534 SCARG(uap, mode), UIO_USERSPACE); 4535 } 4536 4537 4538 int 4539 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4540 { 4541 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4542 } 4543 4544 static int 4545 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4546 enum uio_seg seg) 4547 { 4548 struct proc *p = curlwp->l_proc; 4549 struct vnode *vp; 4550 struct vattr vattr; 4551 int error; 4552 struct pathbuf *pb; 4553 struct nameidata nd; 4554 4555 KASSERT(l != NULL || fdat == AT_FDCWD); 4556 4557 /* XXX bollocks, should pass in a pathbuf */ 4558 error = pathbuf_maybe_copyin(path, seg, &pb); 4559 if (error) { 4560 return error; 4561 } 4562 4563 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4564 4565 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4566 pathbuf_destroy(pb); 4567 return (error); 4568 } 4569 vp = nd.ni_vp; 4570 if (vp != NULL) { 4571 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4572 if (nd.ni_dvp == vp) 4573 vrele(nd.ni_dvp); 4574 else 4575 vput(nd.ni_dvp); 4576 vrele(vp); 4577 pathbuf_destroy(pb); 4578 return (EEXIST); 4579 } 4580 vattr_null(&vattr); 4581 vattr.va_type = VDIR; 4582 /* We will read cwdi->cwdi_cmask unlocked. */ 4583 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4584 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4585 if (!error) 4586 vrele(nd.ni_vp); 4587 vput(nd.ni_dvp); 4588 pathbuf_destroy(pb); 4589 return (error); 4590 } 4591 4592 /* 4593 * Remove a directory file. 4594 */ 4595 /* ARGSUSED */ 4596 int 4597 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4598 { 4599 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4600 AT_REMOVEDIR, UIO_USERSPACE); 4601 } 4602 4603 /* 4604 * Read a block of directory entries in a file system independent format. 4605 */ 4606 int 4607 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4608 { 4609 /* { 4610 syscallarg(int) fd; 4611 syscallarg(char *) buf; 4612 syscallarg(size_t) count; 4613 } */ 4614 file_t *fp; 4615 int error, done; 4616 4617 /* fd_getvnode() will use the descriptor for us */ 4618 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4619 return (error); 4620 if ((fp->f_flag & FREAD) == 0) { 4621 error = EBADF; 4622 goto out; 4623 } 4624 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4625 SCARG(uap, count), &done, l, 0, 0); 4626 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4627 *retval = done; 4628 out: 4629 fd_putfile(SCARG(uap, fd)); 4630 return (error); 4631 } 4632 4633 /* 4634 * Set the mode mask for creation of filesystem nodes. 4635 */ 4636 int 4637 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4638 { 4639 /* { 4640 syscallarg(mode_t) newmask; 4641 } */ 4642 struct proc *p = l->l_proc; 4643 struct cwdinfo *cwdi; 4644 4645 /* 4646 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4647 * important is that we serialize changes to the mask. The 4648 * rw_exit() will issue a write memory barrier on our behalf, 4649 * and force the changes out to other CPUs (as it must use an 4650 * atomic operation, draining the local CPU's store buffers). 4651 */ 4652 cwdi = p->p_cwdi; 4653 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4654 *retval = cwdi->cwdi_cmask; 4655 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4656 rw_exit(&cwdi->cwdi_lock); 4657 4658 return (0); 4659 } 4660 4661 int 4662 dorevoke(struct vnode *vp, kauth_cred_t cred) 4663 { 4664 struct vattr vattr; 4665 int error, fs_decision; 4666 4667 vn_lock(vp, LK_SHARED | LK_RETRY); 4668 error = VOP_GETATTR(vp, &vattr, cred); 4669 VOP_UNLOCK(vp); 4670 if (error != 0) 4671 return error; 4672 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4673 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4674 fs_decision); 4675 if (!error) 4676 VOP_REVOKE(vp, REVOKEALL); 4677 return (error); 4678 } 4679 4680 /* 4681 * Void all references to file by ripping underlying filesystem 4682 * away from vnode. 4683 */ 4684 /* ARGSUSED */ 4685 int 4686 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4687 { 4688 /* { 4689 syscallarg(const char *) path; 4690 } */ 4691 struct vnode *vp; 4692 int error; 4693 4694 error = namei_simple_user(SCARG(uap, path), 4695 NSM_FOLLOW_TRYEMULROOT, &vp); 4696 if (error != 0) 4697 return (error); 4698 error = dorevoke(vp, l->l_cred); 4699 vrele(vp); 4700 return (error); 4701 } 4702 4703 /* 4704 * Allocate backing store for a file, filling a hole without having to 4705 * explicitly write anything out. 4706 */ 4707 /* ARGSUSED */ 4708 int 4709 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4710 register_t *retval) 4711 { 4712 /* { 4713 syscallarg(int) fd; 4714 syscallarg(off_t) pos; 4715 syscallarg(off_t) len; 4716 } */ 4717 int fd; 4718 off_t pos, len; 4719 struct file *fp; 4720 struct vnode *vp; 4721 int error; 4722 4723 fd = SCARG(uap, fd); 4724 pos = SCARG(uap, pos); 4725 len = SCARG(uap, len); 4726 4727 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4728 *retval = EINVAL; 4729 return 0; 4730 } 4731 4732 error = fd_getvnode(fd, &fp); 4733 if (error) { 4734 *retval = error; 4735 return 0; 4736 } 4737 if ((fp->f_flag & FWRITE) == 0) { 4738 error = EBADF; 4739 goto fail; 4740 } 4741 vp = fp->f_vnode; 4742 4743 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4744 if (vp->v_type == VDIR) { 4745 error = EISDIR; 4746 } else { 4747 error = VOP_FALLOCATE(vp, pos, len); 4748 } 4749 VOP_UNLOCK(vp); 4750 4751 fail: 4752 fd_putfile(fd); 4753 *retval = error; 4754 return 0; 4755 } 4756 4757 /* 4758 * Deallocate backing store for a file, creating a hole. Also used for 4759 * invoking TRIM on disks. 4760 */ 4761 /* ARGSUSED */ 4762 int 4763 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4764 register_t *retval) 4765 { 4766 /* { 4767 syscallarg(int) fd; 4768 syscallarg(off_t) pos; 4769 syscallarg(off_t) len; 4770 } */ 4771 int fd; 4772 off_t pos, len; 4773 struct file *fp; 4774 struct vnode *vp; 4775 int error; 4776 4777 fd = SCARG(uap, fd); 4778 pos = SCARG(uap, pos); 4779 len = SCARG(uap, len); 4780 4781 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4782 return EINVAL; 4783 } 4784 4785 error = fd_getvnode(fd, &fp); 4786 if (error) { 4787 return error; 4788 } 4789 if ((fp->f_flag & FWRITE) == 0) { 4790 error = EBADF; 4791 goto fail; 4792 } 4793 vp = fp->f_vnode; 4794 4795 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4796 if (vp->v_type == VDIR) { 4797 error = EISDIR; 4798 } else { 4799 error = VOP_FDISCARD(vp, pos, len); 4800 } 4801 VOP_UNLOCK(vp); 4802 4803 fail: 4804 fd_putfile(fd); 4805 return error; 4806 } 4807