1 /* $NetBSD: vfs_syscalls.c,v 1.518 2018/01/09 03:31:13 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.518 2018/01/09 03:31:13 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/specfs/specdev.h> 114 115 #include <nfs/rpcv2.h> 116 #include <nfs/nfsproto.h> 117 #include <nfs/nfs.h> 118 #include <nfs/nfs_var.h> 119 120 /* XXX this shouldn't be here */ 121 #ifndef OFF_T_MAX 122 #define OFF_T_MAX __type_max(off_t) 123 #endif 124 125 static int change_flags(struct vnode *, u_long, struct lwp *); 126 static int change_mode(struct vnode *, int, struct lwp *); 127 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 128 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 129 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 130 enum uio_seg); 131 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 132 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 133 enum uio_seg); 134 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 135 enum uio_seg, int); 136 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 137 size_t, register_t *); 138 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 139 140 static int fd_nameiat(struct lwp *, int, struct nameidata *); 141 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 142 namei_simple_flags_t, struct vnode **); 143 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const int nmountcompatnames = __arraycount(mountcompatnames); 167 168 static int 169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 170 { 171 file_t *dfp; 172 int error; 173 174 if (fdat != AT_FDCWD) { 175 if ((error = fd_getvnode(fdat, &dfp)) != 0) 176 goto out; 177 178 NDAT(ndp, dfp->f_vnode); 179 } 180 181 error = namei(ndp); 182 183 if (fdat != AT_FDCWD) 184 fd_putfile(fdat); 185 out: 186 return error; 187 } 188 189 static int 190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 191 namei_simple_flags_t sflags, struct vnode **vp_ret) 192 { 193 file_t *dfp; 194 struct vnode *dvp; 195 int error; 196 197 if (fdat != AT_FDCWD) { 198 if ((error = fd_getvnode(fdat, &dfp)) != 0) 199 goto out; 200 201 dvp = dfp->f_vnode; 202 } else { 203 dvp = NULL; 204 } 205 206 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 207 208 if (fdat != AT_FDCWD) 209 fd_putfile(fdat); 210 out: 211 return error; 212 } 213 214 static int 215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 216 { 217 int error; 218 219 fp->f_flag = flags & FMASK; 220 fp->f_type = DTYPE_VNODE; 221 fp->f_ops = &vnops; 222 fp->f_vnode = vp; 223 224 if (flags & (O_EXLOCK | O_SHLOCK)) { 225 struct flock lf; 226 int type; 227 228 lf.l_whence = SEEK_SET; 229 lf.l_start = 0; 230 lf.l_len = 0; 231 if (flags & O_EXLOCK) 232 lf.l_type = F_WRLCK; 233 else 234 lf.l_type = F_RDLCK; 235 type = F_FLOCK; 236 if ((flags & FNONBLOCK) == 0) 237 type |= F_WAIT; 238 VOP_UNLOCK(vp); 239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 240 if (error) { 241 (void) vn_close(vp, fp->f_flag, fp->f_cred); 242 fd_abort(l->l_proc, fp, indx); 243 return error; 244 } 245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 246 atomic_or_uint(&fp->f_flag, FHASLOCK); 247 } 248 if (flags & O_CLOEXEC) 249 fd_set_exclose(l, indx, true); 250 return 0; 251 } 252 253 static int 254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 255 void *data, size_t *data_len) 256 { 257 struct mount *mp; 258 int error = 0, saved_flags; 259 260 mp = vp->v_mount; 261 saved_flags = mp->mnt_flag; 262 263 /* We can operate only on VV_ROOT nodes. */ 264 if ((vp->v_vflag & VV_ROOT) == 0) { 265 error = EINVAL; 266 goto out; 267 } 268 269 /* 270 * We only allow the filesystem to be reloaded if it 271 * is currently mounted read-only. Additionally, we 272 * prevent read-write to read-only downgrades. 273 */ 274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 275 (mp->mnt_flag & MNT_RDONLY) == 0 && 276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 277 error = EOPNOTSUPP; /* Needs translation */ 278 goto out; 279 } 280 281 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 282 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 283 if (error) 284 goto out; 285 286 error = vfs_suspend(mp, 0); 287 if (error) 288 goto out; 289 290 mutex_enter(&mp->mnt_updating); 291 292 mp->mnt_flag &= ~MNT_OP_FLAGS; 293 mp->mnt_flag |= flags & MNT_OP_FLAGS; 294 295 /* 296 * Set the mount level flags. 297 */ 298 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 299 if ((flags & MNT_RDONLY)) 300 mp->mnt_iflag |= IMNT_WANTRDONLY; 301 else 302 mp->mnt_iflag |= IMNT_WANTRDWR; 303 } 304 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 305 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 306 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 307 mp->mnt_flag &= ~MNT_RDONLY; 308 309 error = VFS_MOUNT(mp, path, data, data_len); 310 311 if (error && data != NULL) { 312 int error2; 313 314 /* 315 * Update failed; let's try and see if it was an 316 * export request. For compat with 3.0 and earlier. 317 */ 318 error2 = vfs_hooks_reexport(mp, path, data); 319 320 /* 321 * Only update error code if the export request was 322 * understood but some problem occurred while 323 * processing it. 324 */ 325 if (error2 != EJUSTRETURN) 326 error = error2; 327 } 328 329 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 330 mp->mnt_flag |= MNT_RDONLY; 331 if (error) 332 mp->mnt_flag = saved_flags; 333 mp->mnt_flag &= ~MNT_OP_FLAGS; 334 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 335 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 336 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 337 vfs_syncer_add_to_worklist(mp); 338 } else { 339 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 340 vfs_syncer_remove_from_worklist(mp); 341 } 342 mutex_exit(&mp->mnt_updating); 343 vfs_resume(mp); 344 345 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 346 (flags & MNT_EXTATTR)) { 347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 348 NULL, 0, NULL) != 0) { 349 printf("%s: failed to start extattr, error = %d", 350 mp->mnt_stat.f_mntonname, error); 351 mp->mnt_flag &= ~MNT_EXTATTR; 352 } 353 } 354 355 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 356 !(flags & MNT_EXTATTR)) { 357 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 358 NULL, 0, NULL) != 0) { 359 printf("%s: failed to stop extattr, error = %d", 360 mp->mnt_stat.f_mntonname, error); 361 mp->mnt_flag |= MNT_RDONLY; 362 } 363 } 364 out: 365 return (error); 366 } 367 368 static int 369 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 370 struct vfsops **vfsops) 371 { 372 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 373 int error; 374 375 if (type_seg == UIO_USERSPACE) { 376 /* Copy file-system type from userspace. */ 377 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 378 } else { 379 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 380 KASSERT(error == 0); 381 } 382 383 if (error) { 384 /* 385 * Historically, filesystem types were identified by numbers. 386 * If we get an integer for the filesystem type instead of a 387 * string, we check to see if it matches one of the historic 388 * filesystem types. 389 */ 390 u_long fsindex = (u_long)fstype; 391 if (fsindex >= nmountcompatnames || 392 mountcompatnames[fsindex] == NULL) 393 return ENODEV; 394 strlcpy(fstypename, mountcompatnames[fsindex], 395 sizeof(fstypename)); 396 } 397 398 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 399 if (strcmp(fstypename, "ufs") == 0) 400 fstypename[0] = 'f'; 401 402 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 403 return 0; 404 405 /* If we can autoload a vfs module, try again */ 406 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 407 408 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 409 return 0; 410 411 return ENODEV; 412 } 413 414 static int 415 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 416 void *data, size_t *data_len) 417 { 418 struct mount *mp; 419 int error; 420 421 /* If MNT_GETARGS is specified, it should be the only flag. */ 422 if (flags & ~MNT_GETARGS) 423 return EINVAL; 424 425 mp = vp->v_mount; 426 427 /* XXX: probably some notion of "can see" here if we want isolation. */ 428 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 429 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 430 if (error) 431 return error; 432 433 if ((vp->v_vflag & VV_ROOT) == 0) 434 return EINVAL; 435 436 if (vfs_busy(mp)) 437 return EPERM; 438 439 mutex_enter(&mp->mnt_updating); 440 mp->mnt_flag &= ~MNT_OP_FLAGS; 441 mp->mnt_flag |= MNT_GETARGS; 442 error = VFS_MOUNT(mp, path, data, data_len); 443 mp->mnt_flag &= ~MNT_OP_FLAGS; 444 mutex_exit(&mp->mnt_updating); 445 446 vfs_unbusy(mp); 447 return (error); 448 } 449 450 int 451 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 452 { 453 /* { 454 syscallarg(const char *) type; 455 syscallarg(const char *) path; 456 syscallarg(int) flags; 457 syscallarg(void *) data; 458 syscallarg(size_t) data_len; 459 } */ 460 461 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 462 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 463 SCARG(uap, data_len), retval); 464 } 465 466 int 467 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 468 const char *path, int flags, void *data, enum uio_seg data_seg, 469 size_t data_len, register_t *retval) 470 { 471 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 472 struct vnode *vp; 473 void *data_buf = data; 474 bool vfsopsrele = false; 475 size_t alloc_sz = 0; 476 int error; 477 478 /* 479 * Get vnode to be covered 480 */ 481 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 482 if (error != 0) { 483 vp = NULL; 484 goto done; 485 } 486 487 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 488 vfsops = vp->v_mount->mnt_op; 489 } else { 490 /* 'type' is userspace */ 491 error = mount_get_vfsops(type, type_seg, &vfsops); 492 if (error != 0) 493 goto done; 494 vfsopsrele = true; 495 } 496 497 /* 498 * We allow data to be NULL, even for userspace. Some fs's don't need 499 * it. The others will handle NULL. 500 */ 501 if (data != NULL && data_seg == UIO_USERSPACE) { 502 if (data_len == 0) { 503 /* No length supplied, use default for filesystem */ 504 data_len = vfsops->vfs_min_mount_data; 505 506 /* 507 * Hopefully a longer buffer won't make copyin() fail. 508 * For compatibility with 3.0 and earlier. 509 */ 510 if (flags & MNT_UPDATE 511 && data_len < sizeof (struct mnt_export_args30)) 512 data_len = sizeof (struct mnt_export_args30); 513 } 514 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 515 error = EINVAL; 516 goto done; 517 } 518 alloc_sz = data_len; 519 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 520 521 /* NFS needs the buffer even for mnt_getargs .... */ 522 error = copyin(data, data_buf, data_len); 523 if (error != 0) 524 goto done; 525 } 526 527 if (flags & MNT_GETARGS) { 528 if (data_len == 0) { 529 error = EINVAL; 530 goto done; 531 } 532 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 533 if (error != 0) 534 goto done; 535 if (data_seg == UIO_USERSPACE) 536 error = copyout(data_buf, data, data_len); 537 *retval = data_len; 538 } else if (flags & MNT_UPDATE) { 539 error = mount_update(l, vp, path, flags, data_buf, &data_len); 540 } else { 541 /* Locking is handled internally in mount_domount(). */ 542 KASSERT(vfsopsrele == true); 543 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 544 &data_len); 545 vfsopsrele = false; 546 } 547 if (!error) 548 KNOTE(&fs_klist, VQ_MOUNT); 549 550 done: 551 if (vfsopsrele) 552 vfs_delref(vfsops); 553 if (vp != NULL) { 554 vrele(vp); 555 } 556 if (data_buf != data) 557 kmem_free(data_buf, alloc_sz); 558 return (error); 559 } 560 561 /* 562 * Unmount a file system. 563 * 564 * Note: unmount takes a path to the vnode mounted on as argument, 565 * not special file (as before). 566 */ 567 /* ARGSUSED */ 568 int 569 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 570 { 571 /* { 572 syscallarg(const char *) path; 573 syscallarg(int) flags; 574 } */ 575 struct vnode *vp; 576 struct mount *mp; 577 int error; 578 struct pathbuf *pb; 579 struct nameidata nd; 580 581 error = pathbuf_copyin(SCARG(uap, path), &pb); 582 if (error) { 583 return error; 584 } 585 586 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 587 if ((error = namei(&nd)) != 0) { 588 pathbuf_destroy(pb); 589 return error; 590 } 591 vp = nd.ni_vp; 592 pathbuf_destroy(pb); 593 594 mp = vp->v_mount; 595 vfs_ref(mp); 596 VOP_UNLOCK(vp); 597 598 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 599 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 600 if (error) { 601 vrele(vp); 602 vfs_rele(mp); 603 return (error); 604 } 605 606 /* 607 * Don't allow unmounting the root file system. 608 */ 609 if (mp->mnt_flag & MNT_ROOTFS) { 610 vrele(vp); 611 vfs_rele(mp); 612 return (EINVAL); 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if ((vp->v_vflag & VV_ROOT) == 0) { 619 vrele(vp); 620 vfs_rele(mp); 621 return (EINVAL); 622 } 623 624 vrele(vp); 625 error = dounmount(mp, SCARG(uap, flags), l); 626 vfs_rele(mp); 627 if (!error) 628 KNOTE(&fs_klist, VQ_UNMOUNT); 629 return error; 630 } 631 632 /* 633 * Sync each mounted filesystem. 634 */ 635 #ifdef DEBUG 636 int syncprt = 0; 637 struct ctldebug debug0 = { "syncprt", &syncprt }; 638 #endif 639 640 void 641 do_sys_sync(struct lwp *l) 642 { 643 mount_iterator_t *iter; 644 struct mount *mp; 645 int asyncflag; 646 647 mountlist_iterator_init(&iter); 648 while ((mp = mountlist_iterator_next(iter)) != NULL) { 649 mutex_enter(&mp->mnt_updating); 650 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 651 asyncflag = mp->mnt_flag & MNT_ASYNC; 652 mp->mnt_flag &= ~MNT_ASYNC; 653 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 654 if (asyncflag) 655 mp->mnt_flag |= MNT_ASYNC; 656 } 657 mutex_exit(&mp->mnt_updating); 658 } 659 mountlist_iterator_destroy(iter); 660 #ifdef DEBUG 661 if (syncprt) 662 vfs_bufstats(); 663 #endif /* DEBUG */ 664 } 665 666 /* ARGSUSED */ 667 int 668 sys_sync(struct lwp *l, const void *v, register_t *retval) 669 { 670 do_sys_sync(l); 671 return (0); 672 } 673 674 675 /* 676 * Access or change filesystem quotas. 677 * 678 * (this is really 14 different calls bundled into one) 679 */ 680 681 static int 682 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 683 { 684 struct quotastat info_k; 685 int error; 686 687 /* ensure any padding bytes are cleared */ 688 memset(&info_k, 0, sizeof(info_k)); 689 690 error = vfs_quotactl_stat(mp, &info_k); 691 if (error) { 692 return error; 693 } 694 695 return copyout(&info_k, info_u, sizeof(info_k)); 696 } 697 698 static int 699 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 700 struct quotaidtypestat *info_u) 701 { 702 struct quotaidtypestat info_k; 703 int error; 704 705 /* ensure any padding bytes are cleared */ 706 memset(&info_k, 0, sizeof(info_k)); 707 708 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 709 if (error) { 710 return error; 711 } 712 713 return copyout(&info_k, info_u, sizeof(info_k)); 714 } 715 716 static int 717 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 718 struct quotaobjtypestat *info_u) 719 { 720 struct quotaobjtypestat info_k; 721 int error; 722 723 /* ensure any padding bytes are cleared */ 724 memset(&info_k, 0, sizeof(info_k)); 725 726 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 727 if (error) { 728 return error; 729 } 730 731 return copyout(&info_k, info_u, sizeof(info_k)); 732 } 733 734 static int 735 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 736 struct quotaval *val_u) 737 { 738 struct quotakey key_k; 739 struct quotaval val_k; 740 int error; 741 742 /* ensure any padding bytes are cleared */ 743 memset(&val_k, 0, sizeof(val_k)); 744 745 error = copyin(key_u, &key_k, sizeof(key_k)); 746 if (error) { 747 return error; 748 } 749 750 error = vfs_quotactl_get(mp, &key_k, &val_k); 751 if (error) { 752 return error; 753 } 754 755 return copyout(&val_k, val_u, sizeof(val_k)); 756 } 757 758 static int 759 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 760 const struct quotaval *val_u) 761 { 762 struct quotakey key_k; 763 struct quotaval val_k; 764 int error; 765 766 error = copyin(key_u, &key_k, sizeof(key_k)); 767 if (error) { 768 return error; 769 } 770 771 error = copyin(val_u, &val_k, sizeof(val_k)); 772 if (error) { 773 return error; 774 } 775 776 return vfs_quotactl_put(mp, &key_k, &val_k); 777 } 778 779 static int 780 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 781 { 782 struct quotakey key_k; 783 int error; 784 785 error = copyin(key_u, &key_k, sizeof(key_k)); 786 if (error) { 787 return error; 788 } 789 790 return vfs_quotactl_del(mp, &key_k); 791 } 792 793 static int 794 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 795 { 796 struct quotakcursor cursor_k; 797 int error; 798 799 /* ensure any padding bytes are cleared */ 800 memset(&cursor_k, 0, sizeof(cursor_k)); 801 802 error = vfs_quotactl_cursoropen(mp, &cursor_k); 803 if (error) { 804 return error; 805 } 806 807 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 808 } 809 810 static int 811 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 812 { 813 struct quotakcursor cursor_k; 814 int error; 815 816 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 817 if (error) { 818 return error; 819 } 820 821 return vfs_quotactl_cursorclose(mp, &cursor_k); 822 } 823 824 static int 825 do_sys_quotactl_cursorskipidtype(struct mount *mp, 826 struct quotakcursor *cursor_u, int idtype) 827 { 828 struct quotakcursor cursor_k; 829 int error; 830 831 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 832 if (error) { 833 return error; 834 } 835 836 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 837 if (error) { 838 return error; 839 } 840 841 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 842 } 843 844 static int 845 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 846 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 847 unsigned *ret_u) 848 { 849 #define CGET_STACK_MAX 8 850 struct quotakcursor cursor_k; 851 struct quotakey stackkeys[CGET_STACK_MAX]; 852 struct quotaval stackvals[CGET_STACK_MAX]; 853 struct quotakey *keys_k; 854 struct quotaval *vals_k; 855 unsigned ret_k; 856 int error; 857 858 if (maxnum > 128) { 859 maxnum = 128; 860 } 861 862 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 863 if (error) { 864 return error; 865 } 866 867 if (maxnum <= CGET_STACK_MAX) { 868 keys_k = stackkeys; 869 vals_k = stackvals; 870 /* ensure any padding bytes are cleared */ 871 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 872 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 873 } else { 874 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 875 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 876 } 877 878 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 879 &ret_k); 880 if (error) { 881 goto fail; 882 } 883 884 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 885 if (error) { 886 goto fail; 887 } 888 889 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 890 if (error) { 891 goto fail; 892 } 893 894 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 895 if (error) { 896 goto fail; 897 } 898 899 /* do last to maximize the chance of being able to recover a failure */ 900 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 901 902 fail: 903 if (keys_k != stackkeys) { 904 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 905 } 906 if (vals_k != stackvals) { 907 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 908 } 909 return error; 910 } 911 912 static int 913 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 914 int *ret_u) 915 { 916 struct quotakcursor cursor_k; 917 int ret_k; 918 int error; 919 920 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 921 if (error) { 922 return error; 923 } 924 925 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 926 if (error) { 927 return error; 928 } 929 930 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 931 if (error) { 932 return error; 933 } 934 935 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 936 } 937 938 static int 939 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 940 { 941 struct quotakcursor cursor_k; 942 int error; 943 944 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 945 if (error) { 946 return error; 947 } 948 949 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 950 if (error) { 951 return error; 952 } 953 954 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 955 } 956 957 static int 958 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 959 { 960 char *path_k; 961 int error; 962 963 /* XXX this should probably be a struct pathbuf */ 964 path_k = PNBUF_GET(); 965 error = copyin(path_u, path_k, PATH_MAX); 966 if (error) { 967 PNBUF_PUT(path_k); 968 return error; 969 } 970 971 error = vfs_quotactl_quotaon(mp, idtype, path_k); 972 973 PNBUF_PUT(path_k); 974 return error; 975 } 976 977 static int 978 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 979 { 980 return vfs_quotactl_quotaoff(mp, idtype); 981 } 982 983 int 984 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 985 { 986 struct mount *mp; 987 struct vnode *vp; 988 int error; 989 990 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 991 if (error != 0) 992 return (error); 993 mp = vp->v_mount; 994 995 switch (args->qc_op) { 996 case QUOTACTL_STAT: 997 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 998 break; 999 case QUOTACTL_IDTYPESTAT: 1000 error = do_sys_quotactl_idtypestat(mp, 1001 args->u.idtypestat.qc_idtype, 1002 args->u.idtypestat.qc_info); 1003 break; 1004 case QUOTACTL_OBJTYPESTAT: 1005 error = do_sys_quotactl_objtypestat(mp, 1006 args->u.objtypestat.qc_objtype, 1007 args->u.objtypestat.qc_info); 1008 break; 1009 case QUOTACTL_GET: 1010 error = do_sys_quotactl_get(mp, 1011 args->u.get.qc_key, 1012 args->u.get.qc_val); 1013 break; 1014 case QUOTACTL_PUT: 1015 error = do_sys_quotactl_put(mp, 1016 args->u.put.qc_key, 1017 args->u.put.qc_val); 1018 break; 1019 case QUOTACTL_DEL: 1020 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1021 break; 1022 case QUOTACTL_CURSOROPEN: 1023 error = do_sys_quotactl_cursoropen(mp, 1024 args->u.cursoropen.qc_cursor); 1025 break; 1026 case QUOTACTL_CURSORCLOSE: 1027 error = do_sys_quotactl_cursorclose(mp, 1028 args->u.cursorclose.qc_cursor); 1029 break; 1030 case QUOTACTL_CURSORSKIPIDTYPE: 1031 error = do_sys_quotactl_cursorskipidtype(mp, 1032 args->u.cursorskipidtype.qc_cursor, 1033 args->u.cursorskipidtype.qc_idtype); 1034 break; 1035 case QUOTACTL_CURSORGET: 1036 error = do_sys_quotactl_cursorget(mp, 1037 args->u.cursorget.qc_cursor, 1038 args->u.cursorget.qc_keys, 1039 args->u.cursorget.qc_vals, 1040 args->u.cursorget.qc_maxnum, 1041 args->u.cursorget.qc_ret); 1042 break; 1043 case QUOTACTL_CURSORATEND: 1044 error = do_sys_quotactl_cursoratend(mp, 1045 args->u.cursoratend.qc_cursor, 1046 args->u.cursoratend.qc_ret); 1047 break; 1048 case QUOTACTL_CURSORREWIND: 1049 error = do_sys_quotactl_cursorrewind(mp, 1050 args->u.cursorrewind.qc_cursor); 1051 break; 1052 case QUOTACTL_QUOTAON: 1053 error = do_sys_quotactl_quotaon(mp, 1054 args->u.quotaon.qc_idtype, 1055 args->u.quotaon.qc_quotafile); 1056 break; 1057 case QUOTACTL_QUOTAOFF: 1058 error = do_sys_quotactl_quotaoff(mp, 1059 args->u.quotaoff.qc_idtype); 1060 break; 1061 default: 1062 error = EINVAL; 1063 break; 1064 } 1065 1066 vrele(vp); 1067 return error; 1068 } 1069 1070 /* ARGSUSED */ 1071 int 1072 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1073 register_t *retval) 1074 { 1075 /* { 1076 syscallarg(const char *) path; 1077 syscallarg(struct quotactl_args *) args; 1078 } */ 1079 struct quotactl_args args; 1080 int error; 1081 1082 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1083 if (error) { 1084 return error; 1085 } 1086 1087 return do_sys_quotactl(SCARG(uap, path), &args); 1088 } 1089 1090 int 1091 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1092 int root) 1093 { 1094 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1095 int error = 0; 1096 1097 /* 1098 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1099 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1100 * overrides MNT_NOWAIT. 1101 */ 1102 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1103 (flags != MNT_WAIT && flags != 0)) { 1104 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1105 goto done; 1106 } 1107 1108 /* Get the filesystem stats now */ 1109 memset(sp, 0, sizeof(*sp)); 1110 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1111 return error; 1112 } 1113 1114 if (cwdi->cwdi_rdir == NULL) 1115 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1116 done: 1117 if (cwdi->cwdi_rdir != NULL) { 1118 size_t len; 1119 char *bp; 1120 char c; 1121 char *path = PNBUF_GET(); 1122 1123 bp = path + MAXPATHLEN; 1124 *--bp = '\0'; 1125 rw_enter(&cwdi->cwdi_lock, RW_READER); 1126 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1127 MAXPATHLEN / 2, 0, l); 1128 rw_exit(&cwdi->cwdi_lock); 1129 if (error) { 1130 PNBUF_PUT(path); 1131 return error; 1132 } 1133 len = strlen(bp); 1134 if (len != 1) { 1135 /* 1136 * for mount points that are below our root, we can see 1137 * them, so we fix up the pathname and return them. The 1138 * rest we cannot see, so we don't allow viewing the 1139 * data. 1140 */ 1141 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1142 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1143 (void)strlcpy(sp->f_mntonname, 1144 c == '\0' ? "/" : &sp->f_mntonname[len], 1145 sizeof(sp->f_mntonname)); 1146 } else { 1147 if (root) 1148 (void)strlcpy(sp->f_mntonname, "/", 1149 sizeof(sp->f_mntonname)); 1150 else 1151 error = EPERM; 1152 } 1153 } 1154 PNBUF_PUT(path); 1155 } 1156 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1157 return error; 1158 } 1159 1160 /* 1161 * Get filesystem statistics by path. 1162 */ 1163 int 1164 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1165 { 1166 struct mount *mp; 1167 int error; 1168 struct vnode *vp; 1169 1170 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1171 if (error != 0) 1172 return error; 1173 mp = vp->v_mount; 1174 error = dostatvfs(mp, sb, l, flags, 1); 1175 vrele(vp); 1176 return error; 1177 } 1178 1179 /* ARGSUSED */ 1180 int 1181 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1182 { 1183 /* { 1184 syscallarg(const char *) path; 1185 syscallarg(struct statvfs *) buf; 1186 syscallarg(int) flags; 1187 } */ 1188 struct statvfs *sb; 1189 int error; 1190 1191 sb = STATVFSBUF_GET(); 1192 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1193 if (error == 0) 1194 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1195 STATVFSBUF_PUT(sb); 1196 return error; 1197 } 1198 1199 /* 1200 * Get filesystem statistics by fd. 1201 */ 1202 int 1203 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1204 { 1205 file_t *fp; 1206 struct mount *mp; 1207 int error; 1208 1209 /* fd_getvnode() will use the descriptor for us */ 1210 if ((error = fd_getvnode(fd, &fp)) != 0) 1211 return (error); 1212 mp = fp->f_vnode->v_mount; 1213 error = dostatvfs(mp, sb, curlwp, flags, 1); 1214 fd_putfile(fd); 1215 return error; 1216 } 1217 1218 /* ARGSUSED */ 1219 int 1220 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1221 { 1222 /* { 1223 syscallarg(int) fd; 1224 syscallarg(struct statvfs *) buf; 1225 syscallarg(int) flags; 1226 } */ 1227 struct statvfs *sb; 1228 int error; 1229 1230 sb = STATVFSBUF_GET(); 1231 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1232 if (error == 0) 1233 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1234 STATVFSBUF_PUT(sb); 1235 return error; 1236 } 1237 1238 1239 /* 1240 * Get statistics on all filesystems. 1241 */ 1242 int 1243 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1244 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1245 register_t *retval) 1246 { 1247 int root = 0; 1248 mount_iterator_t *iter; 1249 struct proc *p = l->l_proc; 1250 struct mount *mp; 1251 struct statvfs *sb; 1252 size_t count, maxcount; 1253 int error = 0; 1254 1255 sb = STATVFSBUF_GET(); 1256 maxcount = bufsize / entry_sz; 1257 count = 0; 1258 mountlist_iterator_init(&iter); 1259 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1260 if (sfsp && count < maxcount) { 1261 error = dostatvfs(mp, sb, l, flags, 0); 1262 if (error) { 1263 error = 0; 1264 continue; 1265 } 1266 error = copyfn(sb, sfsp, entry_sz); 1267 if (error) 1268 goto out; 1269 sfsp = (char *)sfsp + entry_sz; 1270 root |= strcmp(sb->f_mntonname, "/") == 0; 1271 } 1272 count++; 1273 } 1274 1275 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1276 /* 1277 * fake a root entry 1278 */ 1279 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1280 sb, l, flags, 1); 1281 if (error != 0) 1282 goto out; 1283 if (sfsp) { 1284 error = copyfn(sb, sfsp, entry_sz); 1285 if (error != 0) 1286 goto out; 1287 } 1288 count++; 1289 } 1290 if (sfsp && count > maxcount) 1291 *retval = maxcount; 1292 else 1293 *retval = count; 1294 out: 1295 mountlist_iterator_destroy(iter); 1296 STATVFSBUF_PUT(sb); 1297 return error; 1298 } 1299 1300 int 1301 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1302 { 1303 /* { 1304 syscallarg(struct statvfs *) buf; 1305 syscallarg(size_t) bufsize; 1306 syscallarg(int) flags; 1307 } */ 1308 1309 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1310 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1311 } 1312 1313 /* 1314 * Change current working directory to a given file descriptor. 1315 */ 1316 /* ARGSUSED */ 1317 int 1318 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1319 { 1320 /* { 1321 syscallarg(int) fd; 1322 } */ 1323 struct proc *p = l->l_proc; 1324 struct cwdinfo *cwdi; 1325 struct vnode *vp, *tdp; 1326 struct mount *mp; 1327 file_t *fp; 1328 int error, fd; 1329 1330 /* fd_getvnode() will use the descriptor for us */ 1331 fd = SCARG(uap, fd); 1332 if ((error = fd_getvnode(fd, &fp)) != 0) 1333 return (error); 1334 vp = fp->f_vnode; 1335 1336 vref(vp); 1337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1338 if (vp->v_type != VDIR) 1339 error = ENOTDIR; 1340 else 1341 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1342 if (error) { 1343 vput(vp); 1344 goto out; 1345 } 1346 while ((mp = vp->v_mountedhere) != NULL) { 1347 error = vfs_busy(mp); 1348 vput(vp); 1349 if (error != 0) 1350 goto out; 1351 error = VFS_ROOT(mp, &tdp); 1352 vfs_unbusy(mp); 1353 if (error) 1354 goto out; 1355 vp = tdp; 1356 } 1357 VOP_UNLOCK(vp); 1358 1359 /* 1360 * Disallow changing to a directory not under the process's 1361 * current root directory (if there is one). 1362 */ 1363 cwdi = p->p_cwdi; 1364 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1365 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1366 vrele(vp); 1367 error = EPERM; /* operation not permitted */ 1368 } else { 1369 vrele(cwdi->cwdi_cdir); 1370 cwdi->cwdi_cdir = vp; 1371 } 1372 rw_exit(&cwdi->cwdi_lock); 1373 1374 out: 1375 fd_putfile(fd); 1376 return (error); 1377 } 1378 1379 /* 1380 * Change this process's notion of the root directory to a given file 1381 * descriptor. 1382 */ 1383 int 1384 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1385 { 1386 struct proc *p = l->l_proc; 1387 struct vnode *vp; 1388 file_t *fp; 1389 int error, fd = SCARG(uap, fd); 1390 1391 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1392 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1393 return error; 1394 /* fd_getvnode() will use the descriptor for us */ 1395 if ((error = fd_getvnode(fd, &fp)) != 0) 1396 return error; 1397 vp = fp->f_vnode; 1398 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1399 if (vp->v_type != VDIR) 1400 error = ENOTDIR; 1401 else 1402 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1403 VOP_UNLOCK(vp); 1404 if (error) 1405 goto out; 1406 vref(vp); 1407 1408 change_root(p->p_cwdi, vp, l); 1409 1410 out: 1411 fd_putfile(fd); 1412 return (error); 1413 } 1414 1415 /* 1416 * Change current working directory (``.''). 1417 */ 1418 /* ARGSUSED */ 1419 int 1420 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1421 { 1422 /* { 1423 syscallarg(const char *) path; 1424 } */ 1425 struct proc *p = l->l_proc; 1426 struct cwdinfo *cwdi; 1427 int error; 1428 struct vnode *vp; 1429 1430 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1431 &vp, l)) != 0) 1432 return (error); 1433 cwdi = p->p_cwdi; 1434 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1435 vrele(cwdi->cwdi_cdir); 1436 cwdi->cwdi_cdir = vp; 1437 rw_exit(&cwdi->cwdi_lock); 1438 return (0); 1439 } 1440 1441 /* 1442 * Change notion of root (``/'') directory. 1443 */ 1444 /* ARGSUSED */ 1445 int 1446 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1447 { 1448 /* { 1449 syscallarg(const char *) path; 1450 } */ 1451 struct proc *p = l->l_proc; 1452 int error; 1453 struct vnode *vp; 1454 1455 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1456 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1457 return (error); 1458 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1459 &vp, l)) != 0) 1460 return (error); 1461 1462 change_root(p->p_cwdi, vp, l); 1463 1464 return (0); 1465 } 1466 1467 /* 1468 * Common routine for chroot and fchroot. 1469 * NB: callers need to properly authorize the change root operation. 1470 */ 1471 void 1472 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1473 { 1474 struct proc *p = l->l_proc; 1475 kauth_cred_t ncred; 1476 1477 ncred = kauth_cred_alloc(); 1478 1479 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1480 if (cwdi->cwdi_rdir != NULL) 1481 vrele(cwdi->cwdi_rdir); 1482 cwdi->cwdi_rdir = vp; 1483 1484 /* 1485 * Prevent escaping from chroot by putting the root under 1486 * the working directory. Silently chdir to / if we aren't 1487 * already there. 1488 */ 1489 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1490 /* 1491 * XXX would be more failsafe to change directory to a 1492 * deadfs node here instead 1493 */ 1494 vrele(cwdi->cwdi_cdir); 1495 vref(vp); 1496 cwdi->cwdi_cdir = vp; 1497 } 1498 rw_exit(&cwdi->cwdi_lock); 1499 1500 /* Get a write lock on the process credential. */ 1501 proc_crmod_enter(); 1502 1503 kauth_cred_clone(p->p_cred, ncred); 1504 kauth_proc_chroot(ncred, p->p_cwdi); 1505 1506 /* Broadcast our credentials to the process and other LWPs. */ 1507 proc_crmod_leave(ncred, p->p_cred, true); 1508 } 1509 1510 /* 1511 * Common routine for chroot and chdir. 1512 * XXX "where" should be enum uio_seg 1513 */ 1514 int 1515 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1516 { 1517 struct pathbuf *pb; 1518 struct nameidata nd; 1519 int error; 1520 1521 error = pathbuf_maybe_copyin(path, where, &pb); 1522 if (error) { 1523 return error; 1524 } 1525 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1526 if ((error = namei(&nd)) != 0) { 1527 pathbuf_destroy(pb); 1528 return error; 1529 } 1530 *vpp = nd.ni_vp; 1531 pathbuf_destroy(pb); 1532 1533 if ((*vpp)->v_type != VDIR) 1534 error = ENOTDIR; 1535 else 1536 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1537 1538 if (error) 1539 vput(*vpp); 1540 else 1541 VOP_UNLOCK(*vpp); 1542 return (error); 1543 } 1544 1545 /* 1546 * Internals of sys_open - path has already been converted into a pathbuf 1547 * (so we can easily reuse this function from other parts of the kernel, 1548 * like posix_spawn post-processing). 1549 */ 1550 int 1551 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1552 int open_mode, int *fd) 1553 { 1554 struct proc *p = l->l_proc; 1555 struct cwdinfo *cwdi = p->p_cwdi; 1556 file_t *fp; 1557 struct vnode *vp; 1558 int flags, cmode; 1559 int indx, error; 1560 struct nameidata nd; 1561 1562 if (open_flags & O_SEARCH) { 1563 open_flags &= ~(int)O_SEARCH; 1564 } 1565 1566 flags = FFLAGS(open_flags); 1567 if ((flags & (FREAD | FWRITE)) == 0) 1568 return EINVAL; 1569 1570 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1571 return error; 1572 } 1573 1574 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1575 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1576 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1577 if (dvp != NULL) 1578 NDAT(&nd, dvp); 1579 1580 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1581 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1582 fd_abort(p, fp, indx); 1583 if ((error == EDUPFD || error == EMOVEFD) && 1584 l->l_dupfd >= 0 && /* XXX from fdopen */ 1585 (error = 1586 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1587 *fd = indx; 1588 return 0; 1589 } 1590 if (error == ERESTART) 1591 error = EINTR; 1592 return error; 1593 } 1594 1595 l->l_dupfd = 0; 1596 vp = nd.ni_vp; 1597 1598 if ((error = open_setfp(l, fp, vp, indx, flags))) 1599 return error; 1600 1601 VOP_UNLOCK(vp); 1602 *fd = indx; 1603 fd_affix(p, fp, indx); 1604 return 0; 1605 } 1606 1607 int 1608 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1609 { 1610 struct pathbuf *pb; 1611 int error, oflags; 1612 1613 oflags = FFLAGS(open_flags); 1614 if ((oflags & (FREAD | FWRITE)) == 0) 1615 return EINVAL; 1616 1617 pb = pathbuf_create(path); 1618 if (pb == NULL) 1619 return ENOMEM; 1620 1621 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1622 pathbuf_destroy(pb); 1623 1624 return error; 1625 } 1626 1627 /* 1628 * Check permissions, allocate an open file structure, 1629 * and call the device open routine if any. 1630 */ 1631 static int 1632 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1633 int mode, int *fd) 1634 { 1635 file_t *dfp = NULL; 1636 struct vnode *dvp = NULL; 1637 struct pathbuf *pb; 1638 int error; 1639 1640 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1641 if (path == NULL) { 1642 pb = pathbuf_create("."); 1643 if (pb == NULL) 1644 return ENOMEM; 1645 } else 1646 #endif 1647 { 1648 error = pathbuf_copyin(path, &pb); 1649 if (error) 1650 return error; 1651 } 1652 1653 if (fdat != AT_FDCWD) { 1654 /* fd_getvnode() will use the descriptor for us */ 1655 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1656 goto out; 1657 1658 dvp = dfp->f_vnode; 1659 } 1660 1661 error = do_open(l, dvp, pb, flags, mode, fd); 1662 1663 if (dfp != NULL) 1664 fd_putfile(fdat); 1665 out: 1666 pathbuf_destroy(pb); 1667 return error; 1668 } 1669 1670 int 1671 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1672 { 1673 /* { 1674 syscallarg(const char *) path; 1675 syscallarg(int) flags; 1676 syscallarg(int) mode; 1677 } */ 1678 int error; 1679 int fd; 1680 1681 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1682 SCARG(uap, flags), SCARG(uap, mode), &fd); 1683 1684 if (error == 0) 1685 *retval = fd; 1686 1687 return error; 1688 } 1689 1690 int 1691 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1692 { 1693 /* { 1694 syscallarg(int) fd; 1695 syscallarg(const char *) path; 1696 syscallarg(int) oflags; 1697 syscallarg(int) mode; 1698 } */ 1699 int error; 1700 int fd; 1701 1702 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1703 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1704 1705 if (error == 0) 1706 *retval = fd; 1707 1708 return error; 1709 } 1710 1711 static void 1712 vfs__fhfree(fhandle_t *fhp) 1713 { 1714 size_t fhsize; 1715 1716 fhsize = FHANDLE_SIZE(fhp); 1717 kmem_free(fhp, fhsize); 1718 } 1719 1720 /* 1721 * vfs_composefh: compose a filehandle. 1722 */ 1723 1724 int 1725 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1726 { 1727 struct mount *mp; 1728 struct fid *fidp; 1729 int error; 1730 size_t needfhsize; 1731 size_t fidsize; 1732 1733 mp = vp->v_mount; 1734 fidp = NULL; 1735 if (*fh_size < FHANDLE_SIZE_MIN) { 1736 fidsize = 0; 1737 } else { 1738 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1739 if (fhp != NULL) { 1740 memset(fhp, 0, *fh_size); 1741 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1742 fidp = &fhp->fh_fid; 1743 } 1744 } 1745 error = VFS_VPTOFH(vp, fidp, &fidsize); 1746 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1747 if (error == 0 && *fh_size < needfhsize) { 1748 error = E2BIG; 1749 } 1750 *fh_size = needfhsize; 1751 return error; 1752 } 1753 1754 int 1755 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1756 { 1757 struct mount *mp; 1758 fhandle_t *fhp; 1759 size_t fhsize; 1760 size_t fidsize; 1761 int error; 1762 1763 mp = vp->v_mount; 1764 fidsize = 0; 1765 error = VFS_VPTOFH(vp, NULL, &fidsize); 1766 KASSERT(error != 0); 1767 if (error != E2BIG) { 1768 goto out; 1769 } 1770 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1771 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1772 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1773 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1774 if (error == 0) { 1775 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1776 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1777 *fhpp = fhp; 1778 } else { 1779 kmem_free(fhp, fhsize); 1780 } 1781 out: 1782 return error; 1783 } 1784 1785 void 1786 vfs_composefh_free(fhandle_t *fhp) 1787 { 1788 1789 vfs__fhfree(fhp); 1790 } 1791 1792 /* 1793 * vfs_fhtovp: lookup a vnode by a filehandle. 1794 */ 1795 1796 int 1797 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1798 { 1799 struct mount *mp; 1800 int error; 1801 1802 *vpp = NULL; 1803 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1804 if (mp == NULL) { 1805 error = ESTALE; 1806 goto out; 1807 } 1808 if (mp->mnt_op->vfs_fhtovp == NULL) { 1809 error = EOPNOTSUPP; 1810 goto out; 1811 } 1812 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1813 out: 1814 return error; 1815 } 1816 1817 /* 1818 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1819 * the needed size. 1820 */ 1821 1822 int 1823 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1824 { 1825 fhandle_t *fhp; 1826 int error; 1827 1828 if (fhsize > FHANDLE_SIZE_MAX) { 1829 return EINVAL; 1830 } 1831 if (fhsize < FHANDLE_SIZE_MIN) { 1832 return EINVAL; 1833 } 1834 again: 1835 fhp = kmem_alloc(fhsize, KM_SLEEP); 1836 error = copyin(ufhp, fhp, fhsize); 1837 if (error == 0) { 1838 /* XXX this check shouldn't be here */ 1839 if (FHANDLE_SIZE(fhp) == fhsize) { 1840 *fhpp = fhp; 1841 return 0; 1842 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1843 /* 1844 * a kludge for nfsv2 padded handles. 1845 */ 1846 size_t sz; 1847 1848 sz = FHANDLE_SIZE(fhp); 1849 kmem_free(fhp, fhsize); 1850 fhsize = sz; 1851 goto again; 1852 } else { 1853 /* 1854 * userland told us wrong size. 1855 */ 1856 error = EINVAL; 1857 } 1858 } 1859 kmem_free(fhp, fhsize); 1860 return error; 1861 } 1862 1863 void 1864 vfs_copyinfh_free(fhandle_t *fhp) 1865 { 1866 1867 vfs__fhfree(fhp); 1868 } 1869 1870 /* 1871 * Get file handle system call 1872 */ 1873 int 1874 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1875 { 1876 /* { 1877 syscallarg(char *) fname; 1878 syscallarg(fhandle_t *) fhp; 1879 syscallarg(size_t *) fh_size; 1880 } */ 1881 struct vnode *vp; 1882 fhandle_t *fh; 1883 int error; 1884 struct pathbuf *pb; 1885 struct nameidata nd; 1886 size_t sz; 1887 size_t usz; 1888 1889 /* 1890 * Must be super user 1891 */ 1892 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1893 0, NULL, NULL, NULL); 1894 if (error) 1895 return (error); 1896 1897 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1898 if (error) { 1899 return error; 1900 } 1901 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1902 error = namei(&nd); 1903 if (error) { 1904 pathbuf_destroy(pb); 1905 return error; 1906 } 1907 vp = nd.ni_vp; 1908 pathbuf_destroy(pb); 1909 1910 error = vfs_composefh_alloc(vp, &fh); 1911 vput(vp); 1912 if (error != 0) { 1913 return error; 1914 } 1915 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1916 if (error != 0) { 1917 goto out; 1918 } 1919 sz = FHANDLE_SIZE(fh); 1920 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1921 if (error != 0) { 1922 goto out; 1923 } 1924 if (usz >= sz) { 1925 error = copyout(fh, SCARG(uap, fhp), sz); 1926 } else { 1927 error = E2BIG; 1928 } 1929 out: 1930 vfs_composefh_free(fh); 1931 return (error); 1932 } 1933 1934 /* 1935 * Open a file given a file handle. 1936 * 1937 * Check permissions, allocate an open file structure, 1938 * and call the device open routine if any. 1939 */ 1940 1941 int 1942 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1943 register_t *retval) 1944 { 1945 file_t *fp; 1946 struct vnode *vp = NULL; 1947 kauth_cred_t cred = l->l_cred; 1948 file_t *nfp; 1949 int indx, error; 1950 struct vattr va; 1951 fhandle_t *fh; 1952 int flags; 1953 proc_t *p; 1954 1955 p = curproc; 1956 1957 /* 1958 * Must be super user 1959 */ 1960 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1961 0, NULL, NULL, NULL))) 1962 return (error); 1963 1964 if (oflags & O_SEARCH) { 1965 oflags &= ~(int)O_SEARCH; 1966 } 1967 1968 flags = FFLAGS(oflags); 1969 if ((flags & (FREAD | FWRITE)) == 0) 1970 return (EINVAL); 1971 if ((flags & O_CREAT)) 1972 return (EINVAL); 1973 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1974 return (error); 1975 fp = nfp; 1976 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1977 if (error != 0) { 1978 goto bad; 1979 } 1980 error = vfs_fhtovp(fh, &vp); 1981 vfs_copyinfh_free(fh); 1982 if (error != 0) { 1983 goto bad; 1984 } 1985 1986 /* Now do an effective vn_open */ 1987 1988 if (vp->v_type == VSOCK) { 1989 error = EOPNOTSUPP; 1990 goto bad; 1991 } 1992 error = vn_openchk(vp, cred, flags); 1993 if (error != 0) 1994 goto bad; 1995 if (flags & O_TRUNC) { 1996 VOP_UNLOCK(vp); /* XXX */ 1997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1998 vattr_null(&va); 1999 va.va_size = 0; 2000 error = VOP_SETATTR(vp, &va, cred); 2001 if (error) 2002 goto bad; 2003 } 2004 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2005 goto bad; 2006 if (flags & FWRITE) { 2007 mutex_enter(vp->v_interlock); 2008 vp->v_writecount++; 2009 mutex_exit(vp->v_interlock); 2010 } 2011 2012 /* done with modified vn_open, now finish what sys_open does. */ 2013 if ((error = open_setfp(l, fp, vp, indx, flags))) 2014 return error; 2015 2016 VOP_UNLOCK(vp); 2017 *retval = indx; 2018 fd_affix(p, fp, indx); 2019 return (0); 2020 2021 bad: 2022 fd_abort(p, fp, indx); 2023 if (vp != NULL) 2024 vput(vp); 2025 return (error); 2026 } 2027 2028 int 2029 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2030 { 2031 /* { 2032 syscallarg(const void *) fhp; 2033 syscallarg(size_t) fh_size; 2034 syscallarg(int) flags; 2035 } */ 2036 2037 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2038 SCARG(uap, flags), retval); 2039 } 2040 2041 int 2042 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2043 { 2044 int error; 2045 fhandle_t *fh; 2046 struct vnode *vp; 2047 2048 /* 2049 * Must be super user 2050 */ 2051 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2052 0, NULL, NULL, NULL))) 2053 return (error); 2054 2055 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2056 if (error != 0) 2057 return error; 2058 2059 error = vfs_fhtovp(fh, &vp); 2060 vfs_copyinfh_free(fh); 2061 if (error != 0) 2062 return error; 2063 2064 error = vn_stat(vp, sb); 2065 vput(vp); 2066 return error; 2067 } 2068 2069 2070 /* ARGSUSED */ 2071 int 2072 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const void *) fhp; 2076 syscallarg(size_t) fh_size; 2077 syscallarg(struct stat *) sb; 2078 } */ 2079 struct stat sb; 2080 int error; 2081 2082 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2083 if (error) 2084 return error; 2085 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2086 } 2087 2088 int 2089 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2090 int flags) 2091 { 2092 fhandle_t *fh; 2093 struct mount *mp; 2094 struct vnode *vp; 2095 int error; 2096 2097 /* 2098 * Must be super user 2099 */ 2100 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2101 0, NULL, NULL, NULL))) 2102 return error; 2103 2104 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2105 if (error != 0) 2106 return error; 2107 2108 error = vfs_fhtovp(fh, &vp); 2109 vfs_copyinfh_free(fh); 2110 if (error != 0) 2111 return error; 2112 2113 mp = vp->v_mount; 2114 error = dostatvfs(mp, sb, l, flags, 1); 2115 vput(vp); 2116 return error; 2117 } 2118 2119 /* ARGSUSED */ 2120 int 2121 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2122 { 2123 /* { 2124 syscallarg(const void *) fhp; 2125 syscallarg(size_t) fh_size; 2126 syscallarg(struct statvfs *) buf; 2127 syscallarg(int) flags; 2128 } */ 2129 struct statvfs *sb = STATVFSBUF_GET(); 2130 int error; 2131 2132 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2133 SCARG(uap, flags)); 2134 if (error == 0) 2135 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2136 STATVFSBUF_PUT(sb); 2137 return error; 2138 } 2139 2140 /* 2141 * Create a special file. 2142 */ 2143 /* ARGSUSED */ 2144 int 2145 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2146 register_t *retval) 2147 { 2148 /* { 2149 syscallarg(const char *) path; 2150 syscallarg(mode_t) mode; 2151 syscallarg(dev_t) dev; 2152 } */ 2153 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2154 SCARG(uap, dev), retval, UIO_USERSPACE); 2155 } 2156 2157 int 2158 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2159 register_t *retval) 2160 { 2161 /* { 2162 syscallarg(int) fd; 2163 syscallarg(const char *) path; 2164 syscallarg(mode_t) mode; 2165 syscallarg(int) pad; 2166 syscallarg(dev_t) dev; 2167 } */ 2168 2169 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2170 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2171 } 2172 2173 int 2174 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2175 register_t *retval, enum uio_seg seg) 2176 { 2177 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2178 } 2179 2180 int 2181 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2182 dev_t dev, register_t *retval, enum uio_seg seg) 2183 { 2184 struct proc *p = l->l_proc; 2185 struct vnode *vp; 2186 struct vattr vattr; 2187 int error, optype; 2188 struct pathbuf *pb; 2189 struct nameidata nd; 2190 const char *pathstring; 2191 2192 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2193 0, NULL, NULL, NULL)) != 0) 2194 return (error); 2195 2196 optype = VOP_MKNOD_DESCOFFSET; 2197 2198 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2199 if (error) { 2200 return error; 2201 } 2202 pathstring = pathbuf_stringcopy_get(pb); 2203 if (pathstring == NULL) { 2204 pathbuf_destroy(pb); 2205 return ENOMEM; 2206 } 2207 2208 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2209 2210 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2211 goto out; 2212 vp = nd.ni_vp; 2213 2214 if (vp != NULL) 2215 error = EEXIST; 2216 else { 2217 vattr_null(&vattr); 2218 /* We will read cwdi->cwdi_cmask unlocked. */ 2219 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2220 vattr.va_rdev = dev; 2221 2222 switch (mode & S_IFMT) { 2223 case S_IFMT: /* used by badsect to flag bad sectors */ 2224 vattr.va_type = VBAD; 2225 break; 2226 case S_IFCHR: 2227 vattr.va_type = VCHR; 2228 break; 2229 case S_IFBLK: 2230 vattr.va_type = VBLK; 2231 break; 2232 case S_IFWHT: 2233 optype = VOP_WHITEOUT_DESCOFFSET; 2234 break; 2235 case S_IFREG: 2236 #if NVERIEXEC > 0 2237 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2238 O_CREAT); 2239 #endif /* NVERIEXEC > 0 */ 2240 vattr.va_type = VREG; 2241 vattr.va_rdev = VNOVAL; 2242 optype = VOP_CREATE_DESCOFFSET; 2243 break; 2244 default: 2245 error = EINVAL; 2246 break; 2247 } 2248 } 2249 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2250 && vattr.va_rdev == VNOVAL) 2251 error = EINVAL; 2252 if (!error) { 2253 switch (optype) { 2254 case VOP_WHITEOUT_DESCOFFSET: 2255 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2256 if (error) 2257 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2258 vput(nd.ni_dvp); 2259 break; 2260 2261 case VOP_MKNOD_DESCOFFSET: 2262 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2263 &nd.ni_cnd, &vattr); 2264 if (error == 0) 2265 vrele(nd.ni_vp); 2266 vput(nd.ni_dvp); 2267 break; 2268 2269 case VOP_CREATE_DESCOFFSET: 2270 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2271 &nd.ni_cnd, &vattr); 2272 if (error == 0) 2273 vrele(nd.ni_vp); 2274 vput(nd.ni_dvp); 2275 break; 2276 } 2277 } else { 2278 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2279 if (nd.ni_dvp == vp) 2280 vrele(nd.ni_dvp); 2281 else 2282 vput(nd.ni_dvp); 2283 if (vp) 2284 vrele(vp); 2285 } 2286 out: 2287 pathbuf_stringcopy_put(pb, pathstring); 2288 pathbuf_destroy(pb); 2289 return (error); 2290 } 2291 2292 /* 2293 * Create a named pipe. 2294 */ 2295 /* ARGSUSED */ 2296 int 2297 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2298 { 2299 /* { 2300 syscallarg(const char *) path; 2301 syscallarg(int) mode; 2302 } */ 2303 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2304 } 2305 2306 int 2307 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2308 register_t *retval) 2309 { 2310 /* { 2311 syscallarg(int) fd; 2312 syscallarg(const char *) path; 2313 syscallarg(int) mode; 2314 } */ 2315 2316 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2317 SCARG(uap, mode)); 2318 } 2319 2320 static int 2321 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2322 { 2323 struct proc *p = l->l_proc; 2324 struct vattr vattr; 2325 int error; 2326 struct pathbuf *pb; 2327 struct nameidata nd; 2328 2329 error = pathbuf_copyin(path, &pb); 2330 if (error) { 2331 return error; 2332 } 2333 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2334 2335 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2336 pathbuf_destroy(pb); 2337 return error; 2338 } 2339 if (nd.ni_vp != NULL) { 2340 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2341 if (nd.ni_dvp == nd.ni_vp) 2342 vrele(nd.ni_dvp); 2343 else 2344 vput(nd.ni_dvp); 2345 vrele(nd.ni_vp); 2346 pathbuf_destroy(pb); 2347 return (EEXIST); 2348 } 2349 vattr_null(&vattr); 2350 vattr.va_type = VFIFO; 2351 /* We will read cwdi->cwdi_cmask unlocked. */ 2352 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2353 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2354 if (error == 0) 2355 vrele(nd.ni_vp); 2356 vput(nd.ni_dvp); 2357 pathbuf_destroy(pb); 2358 return (error); 2359 } 2360 2361 /* 2362 * Make a hard file link. 2363 */ 2364 /* ARGSUSED */ 2365 int 2366 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2367 const char *link, int follow, register_t *retval) 2368 { 2369 struct vnode *vp; 2370 struct pathbuf *linkpb; 2371 struct nameidata nd; 2372 namei_simple_flags_t ns_flags; 2373 int error; 2374 2375 if (follow & AT_SYMLINK_FOLLOW) 2376 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2377 else 2378 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2379 2380 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2381 if (error != 0) 2382 return (error); 2383 error = pathbuf_copyin(link, &linkpb); 2384 if (error) { 2385 goto out1; 2386 } 2387 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2388 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2389 goto out2; 2390 if (nd.ni_vp) { 2391 error = EEXIST; 2392 goto abortop; 2393 } 2394 /* Prevent hard links on directories. */ 2395 if (vp->v_type == VDIR) { 2396 error = EPERM; 2397 goto abortop; 2398 } 2399 /* Prevent cross-mount operation. */ 2400 if (nd.ni_dvp->v_mount != vp->v_mount) { 2401 error = EXDEV; 2402 goto abortop; 2403 } 2404 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2405 VOP_UNLOCK(nd.ni_dvp); 2406 vrele(nd.ni_dvp); 2407 out2: 2408 pathbuf_destroy(linkpb); 2409 out1: 2410 vrele(vp); 2411 return (error); 2412 abortop: 2413 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2414 if (nd.ni_dvp == nd.ni_vp) 2415 vrele(nd.ni_dvp); 2416 else 2417 vput(nd.ni_dvp); 2418 if (nd.ni_vp != NULL) 2419 vrele(nd.ni_vp); 2420 goto out2; 2421 } 2422 2423 int 2424 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2425 { 2426 /* { 2427 syscallarg(const char *) path; 2428 syscallarg(const char *) link; 2429 } */ 2430 const char *path = SCARG(uap, path); 2431 const char *link = SCARG(uap, link); 2432 2433 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2434 AT_SYMLINK_FOLLOW, retval); 2435 } 2436 2437 int 2438 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2439 register_t *retval) 2440 { 2441 /* { 2442 syscallarg(int) fd1; 2443 syscallarg(const char *) name1; 2444 syscallarg(int) fd2; 2445 syscallarg(const char *) name2; 2446 syscallarg(int) flags; 2447 } */ 2448 int fd1 = SCARG(uap, fd1); 2449 const char *name1 = SCARG(uap, name1); 2450 int fd2 = SCARG(uap, fd2); 2451 const char *name2 = SCARG(uap, name2); 2452 int follow; 2453 2454 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2455 2456 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2457 } 2458 2459 2460 int 2461 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2462 { 2463 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2464 } 2465 2466 static int 2467 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2468 const char *link, enum uio_seg seg) 2469 { 2470 struct proc *p = curproc; 2471 struct vattr vattr; 2472 char *path; 2473 int error; 2474 size_t len; 2475 struct pathbuf *linkpb; 2476 struct nameidata nd; 2477 2478 KASSERT(l != NULL || fdat == AT_FDCWD); 2479 2480 path = PNBUF_GET(); 2481 if (seg == UIO_USERSPACE) { 2482 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2483 goto out1; 2484 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2485 goto out1; 2486 } else { 2487 len = strlen(patharg) + 1; 2488 KASSERT(len <= MAXPATHLEN); 2489 memcpy(path, patharg, len); 2490 linkpb = pathbuf_create(link); 2491 if (linkpb == NULL) { 2492 error = ENOMEM; 2493 goto out1; 2494 } 2495 } 2496 ktrkuser("symlink-target", path, len - 1); 2497 2498 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2499 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2500 goto out2; 2501 if (nd.ni_vp) { 2502 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2503 if (nd.ni_dvp == nd.ni_vp) 2504 vrele(nd.ni_dvp); 2505 else 2506 vput(nd.ni_dvp); 2507 vrele(nd.ni_vp); 2508 error = EEXIST; 2509 goto out2; 2510 } 2511 vattr_null(&vattr); 2512 vattr.va_type = VLNK; 2513 /* We will read cwdi->cwdi_cmask unlocked. */ 2514 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2515 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2516 if (error == 0) 2517 vrele(nd.ni_vp); 2518 vput(nd.ni_dvp); 2519 out2: 2520 pathbuf_destroy(linkpb); 2521 out1: 2522 PNBUF_PUT(path); 2523 return (error); 2524 } 2525 2526 /* 2527 * Make a symbolic link. 2528 */ 2529 /* ARGSUSED */ 2530 int 2531 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2532 { 2533 /* { 2534 syscallarg(const char *) path; 2535 syscallarg(const char *) link; 2536 } */ 2537 2538 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2539 UIO_USERSPACE); 2540 } 2541 2542 int 2543 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2544 register_t *retval) 2545 { 2546 /* { 2547 syscallarg(const char *) path1; 2548 syscallarg(int) fd; 2549 syscallarg(const char *) path2; 2550 } */ 2551 2552 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2553 SCARG(uap, path2), UIO_USERSPACE); 2554 } 2555 2556 /* 2557 * Delete a whiteout from the filesystem. 2558 */ 2559 /* ARGSUSED */ 2560 int 2561 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2562 { 2563 /* { 2564 syscallarg(const char *) path; 2565 } */ 2566 int error; 2567 struct pathbuf *pb; 2568 struct nameidata nd; 2569 2570 error = pathbuf_copyin(SCARG(uap, path), &pb); 2571 if (error) { 2572 return error; 2573 } 2574 2575 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2576 error = namei(&nd); 2577 if (error) { 2578 pathbuf_destroy(pb); 2579 return (error); 2580 } 2581 2582 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2583 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2584 if (nd.ni_dvp == nd.ni_vp) 2585 vrele(nd.ni_dvp); 2586 else 2587 vput(nd.ni_dvp); 2588 if (nd.ni_vp) 2589 vrele(nd.ni_vp); 2590 pathbuf_destroy(pb); 2591 return (EEXIST); 2592 } 2593 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2594 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2595 vput(nd.ni_dvp); 2596 pathbuf_destroy(pb); 2597 return (error); 2598 } 2599 2600 /* 2601 * Delete a name from the filesystem. 2602 */ 2603 /* ARGSUSED */ 2604 int 2605 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2606 { 2607 /* { 2608 syscallarg(const char *) path; 2609 } */ 2610 2611 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2612 } 2613 2614 int 2615 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2616 register_t *retval) 2617 { 2618 /* { 2619 syscallarg(int) fd; 2620 syscallarg(const char *) path; 2621 syscallarg(int) flag; 2622 } */ 2623 2624 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2625 SCARG(uap, flag), UIO_USERSPACE); 2626 } 2627 2628 int 2629 do_sys_unlink(const char *arg, enum uio_seg seg) 2630 { 2631 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2632 } 2633 2634 static int 2635 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2636 enum uio_seg seg) 2637 { 2638 struct vnode *vp; 2639 int error; 2640 struct pathbuf *pb; 2641 struct nameidata nd; 2642 const char *pathstring; 2643 2644 KASSERT(l != NULL || fdat == AT_FDCWD); 2645 2646 error = pathbuf_maybe_copyin(arg, seg, &pb); 2647 if (error) { 2648 return error; 2649 } 2650 pathstring = pathbuf_stringcopy_get(pb); 2651 if (pathstring == NULL) { 2652 pathbuf_destroy(pb); 2653 return ENOMEM; 2654 } 2655 2656 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2657 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2658 goto out; 2659 vp = nd.ni_vp; 2660 2661 /* 2662 * The root of a mounted filesystem cannot be deleted. 2663 */ 2664 if ((vp->v_vflag & VV_ROOT) != 0) { 2665 error = EBUSY; 2666 goto abort; 2667 } 2668 2669 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2670 error = EBUSY; 2671 goto abort; 2672 } 2673 2674 /* 2675 * No rmdir "." please. 2676 */ 2677 if (nd.ni_dvp == vp) { 2678 error = EINVAL; 2679 goto abort; 2680 } 2681 2682 /* 2683 * AT_REMOVEDIR is required to remove a directory 2684 */ 2685 if (vp->v_type == VDIR) { 2686 if (!(flags & AT_REMOVEDIR)) { 2687 error = EPERM; 2688 goto abort; 2689 } else { 2690 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2691 vput(nd.ni_dvp); 2692 goto out; 2693 } 2694 } 2695 2696 /* 2697 * Starting here we only deal with non directories. 2698 */ 2699 if (flags & AT_REMOVEDIR) { 2700 error = ENOTDIR; 2701 goto abort; 2702 } 2703 2704 #if NVERIEXEC > 0 2705 /* Handle remove requests for veriexec entries. */ 2706 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2707 goto abort; 2708 } 2709 #endif /* NVERIEXEC > 0 */ 2710 2711 #ifdef FILEASSOC 2712 (void)fileassoc_file_delete(vp); 2713 #endif /* FILEASSOC */ 2714 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2715 vput(nd.ni_dvp); 2716 goto out; 2717 2718 abort: 2719 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2720 if (nd.ni_dvp == vp) 2721 vrele(nd.ni_dvp); 2722 else 2723 vput(nd.ni_dvp); 2724 vput(vp); 2725 2726 out: 2727 pathbuf_stringcopy_put(pb, pathstring); 2728 pathbuf_destroy(pb); 2729 return (error); 2730 } 2731 2732 /* 2733 * Reposition read/write file offset. 2734 */ 2735 int 2736 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2737 { 2738 /* { 2739 syscallarg(int) fd; 2740 syscallarg(int) pad; 2741 syscallarg(off_t) offset; 2742 syscallarg(int) whence; 2743 } */ 2744 kauth_cred_t cred = l->l_cred; 2745 file_t *fp; 2746 struct vnode *vp; 2747 struct vattr vattr; 2748 off_t newoff; 2749 int error, fd; 2750 2751 fd = SCARG(uap, fd); 2752 2753 if ((fp = fd_getfile(fd)) == NULL) 2754 return (EBADF); 2755 2756 vp = fp->f_vnode; 2757 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2758 error = ESPIPE; 2759 goto out; 2760 } 2761 2762 vn_lock(vp, LK_SHARED | LK_RETRY); 2763 2764 switch (SCARG(uap, whence)) { 2765 case SEEK_CUR: 2766 newoff = fp->f_offset + SCARG(uap, offset); 2767 break; 2768 case SEEK_END: 2769 error = VOP_GETATTR(vp, &vattr, cred); 2770 if (error) { 2771 VOP_UNLOCK(vp); 2772 goto out; 2773 } 2774 newoff = SCARG(uap, offset) + vattr.va_size; 2775 break; 2776 case SEEK_SET: 2777 newoff = SCARG(uap, offset); 2778 break; 2779 default: 2780 error = EINVAL; 2781 VOP_UNLOCK(vp); 2782 goto out; 2783 } 2784 VOP_UNLOCK(vp); 2785 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2786 *(off_t *)retval = fp->f_offset = newoff; 2787 } 2788 out: 2789 fd_putfile(fd); 2790 return (error); 2791 } 2792 2793 /* 2794 * Positional read system call. 2795 */ 2796 int 2797 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2798 { 2799 /* { 2800 syscallarg(int) fd; 2801 syscallarg(void *) buf; 2802 syscallarg(size_t) nbyte; 2803 syscallarg(off_t) offset; 2804 } */ 2805 file_t *fp; 2806 struct vnode *vp; 2807 off_t offset; 2808 int error, fd = SCARG(uap, fd); 2809 2810 if ((fp = fd_getfile(fd)) == NULL) 2811 return (EBADF); 2812 2813 if ((fp->f_flag & FREAD) == 0) { 2814 fd_putfile(fd); 2815 return (EBADF); 2816 } 2817 2818 vp = fp->f_vnode; 2819 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2820 error = ESPIPE; 2821 goto out; 2822 } 2823 2824 offset = SCARG(uap, offset); 2825 2826 /* 2827 * XXX This works because no file systems actually 2828 * XXX take any action on the seek operation. 2829 */ 2830 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2831 goto out; 2832 2833 /* dofileread() will unuse the descriptor for us */ 2834 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2835 &offset, 0, retval)); 2836 2837 out: 2838 fd_putfile(fd); 2839 return (error); 2840 } 2841 2842 /* 2843 * Positional scatter read system call. 2844 */ 2845 int 2846 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2847 { 2848 /* { 2849 syscallarg(int) fd; 2850 syscallarg(const struct iovec *) iovp; 2851 syscallarg(int) iovcnt; 2852 syscallarg(off_t) offset; 2853 } */ 2854 off_t offset = SCARG(uap, offset); 2855 2856 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2857 SCARG(uap, iovcnt), &offset, 0, retval); 2858 } 2859 2860 /* 2861 * Positional write system call. 2862 */ 2863 int 2864 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2865 { 2866 /* { 2867 syscallarg(int) fd; 2868 syscallarg(const void *) buf; 2869 syscallarg(size_t) nbyte; 2870 syscallarg(off_t) offset; 2871 } */ 2872 file_t *fp; 2873 struct vnode *vp; 2874 off_t offset; 2875 int error, fd = SCARG(uap, fd); 2876 2877 if ((fp = fd_getfile(fd)) == NULL) 2878 return (EBADF); 2879 2880 if ((fp->f_flag & FWRITE) == 0) { 2881 fd_putfile(fd); 2882 return (EBADF); 2883 } 2884 2885 vp = fp->f_vnode; 2886 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2887 error = ESPIPE; 2888 goto out; 2889 } 2890 2891 offset = SCARG(uap, offset); 2892 2893 /* 2894 * XXX This works because no file systems actually 2895 * XXX take any action on the seek operation. 2896 */ 2897 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2898 goto out; 2899 2900 /* dofilewrite() will unuse the descriptor for us */ 2901 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2902 &offset, 0, retval)); 2903 2904 out: 2905 fd_putfile(fd); 2906 return (error); 2907 } 2908 2909 /* 2910 * Positional gather write system call. 2911 */ 2912 int 2913 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2914 { 2915 /* { 2916 syscallarg(int) fd; 2917 syscallarg(const struct iovec *) iovp; 2918 syscallarg(int) iovcnt; 2919 syscallarg(off_t) offset; 2920 } */ 2921 off_t offset = SCARG(uap, offset); 2922 2923 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2924 SCARG(uap, iovcnt), &offset, 0, retval); 2925 } 2926 2927 /* 2928 * Check access permissions. 2929 */ 2930 int 2931 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2932 { 2933 /* { 2934 syscallarg(const char *) path; 2935 syscallarg(int) flags; 2936 } */ 2937 2938 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2939 SCARG(uap, flags), 0); 2940 } 2941 2942 int 2943 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2944 int mode, int flags) 2945 { 2946 kauth_cred_t cred; 2947 struct vnode *vp; 2948 int error, nd_flag, vmode; 2949 struct pathbuf *pb; 2950 struct nameidata nd; 2951 2952 CTASSERT(F_OK == 0); 2953 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2954 /* nonsense mode */ 2955 return EINVAL; 2956 } 2957 2958 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2959 if (flags & AT_SYMLINK_NOFOLLOW) 2960 nd_flag &= ~FOLLOW; 2961 2962 error = pathbuf_copyin(path, &pb); 2963 if (error) 2964 return error; 2965 2966 NDINIT(&nd, LOOKUP, nd_flag, pb); 2967 2968 /* Override default credentials */ 2969 cred = kauth_cred_dup(l->l_cred); 2970 if (!(flags & AT_EACCESS)) { 2971 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2972 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2973 } 2974 nd.ni_cnd.cn_cred = cred; 2975 2976 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2977 pathbuf_destroy(pb); 2978 goto out; 2979 } 2980 vp = nd.ni_vp; 2981 pathbuf_destroy(pb); 2982 2983 /* Flags == 0 means only check for existence. */ 2984 if (mode) { 2985 vmode = 0; 2986 if (mode & R_OK) 2987 vmode |= VREAD; 2988 if (mode & W_OK) 2989 vmode |= VWRITE; 2990 if (mode & X_OK) 2991 vmode |= VEXEC; 2992 2993 error = VOP_ACCESS(vp, vmode, cred); 2994 if (!error && (vmode & VWRITE)) 2995 error = vn_writechk(vp); 2996 } 2997 vput(vp); 2998 out: 2999 kauth_cred_free(cred); 3000 return (error); 3001 } 3002 3003 int 3004 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3005 register_t *retval) 3006 { 3007 /* { 3008 syscallarg(int) fd; 3009 syscallarg(const char *) path; 3010 syscallarg(int) amode; 3011 syscallarg(int) flag; 3012 } */ 3013 3014 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3015 SCARG(uap, amode), SCARG(uap, flag)); 3016 } 3017 3018 /* 3019 * Common code for all sys_stat functions, including compat versions. 3020 */ 3021 int 3022 do_sys_stat(const char *userpath, unsigned int nd_flag, 3023 struct stat *sb) 3024 { 3025 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3026 } 3027 3028 int 3029 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3030 unsigned int nd_flag, struct stat *sb) 3031 { 3032 int error; 3033 struct pathbuf *pb; 3034 struct nameidata nd; 3035 3036 KASSERT(l != NULL || fdat == AT_FDCWD); 3037 3038 error = pathbuf_copyin(userpath, &pb); 3039 if (error) { 3040 return error; 3041 } 3042 3043 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3044 3045 error = fd_nameiat(l, fdat, &nd); 3046 if (error != 0) { 3047 pathbuf_destroy(pb); 3048 return error; 3049 } 3050 error = vn_stat(nd.ni_vp, sb); 3051 vput(nd.ni_vp); 3052 pathbuf_destroy(pb); 3053 return error; 3054 } 3055 3056 /* 3057 * Get file status; this version follows links. 3058 */ 3059 /* ARGSUSED */ 3060 int 3061 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3062 { 3063 /* { 3064 syscallarg(const char *) path; 3065 syscallarg(struct stat *) ub; 3066 } */ 3067 struct stat sb; 3068 int error; 3069 3070 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3071 if (error) 3072 return error; 3073 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3074 } 3075 3076 /* 3077 * Get file status; this version does not follow links. 3078 */ 3079 /* ARGSUSED */ 3080 int 3081 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3082 { 3083 /* { 3084 syscallarg(const char *) path; 3085 syscallarg(struct stat *) ub; 3086 } */ 3087 struct stat sb; 3088 int error; 3089 3090 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3091 if (error) 3092 return error; 3093 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3094 } 3095 3096 int 3097 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3098 register_t *retval) 3099 { 3100 /* { 3101 syscallarg(int) fd; 3102 syscallarg(const char *) path; 3103 syscallarg(struct stat *) buf; 3104 syscallarg(int) flag; 3105 } */ 3106 unsigned int nd_flag; 3107 struct stat sb; 3108 int error; 3109 3110 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3111 nd_flag = NOFOLLOW; 3112 else 3113 nd_flag = FOLLOW; 3114 3115 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3116 &sb); 3117 if (error) 3118 return error; 3119 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3120 } 3121 3122 /* 3123 * Get configurable pathname variables. 3124 */ 3125 /* ARGSUSED */ 3126 int 3127 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3128 { 3129 /* { 3130 syscallarg(const char *) path; 3131 syscallarg(int) name; 3132 } */ 3133 int error; 3134 struct pathbuf *pb; 3135 struct nameidata nd; 3136 3137 error = pathbuf_copyin(SCARG(uap, path), &pb); 3138 if (error) { 3139 return error; 3140 } 3141 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3142 if ((error = namei(&nd)) != 0) { 3143 pathbuf_destroy(pb); 3144 return (error); 3145 } 3146 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3147 vput(nd.ni_vp); 3148 pathbuf_destroy(pb); 3149 return (error); 3150 } 3151 3152 /* 3153 * Return target name of a symbolic link. 3154 */ 3155 /* ARGSUSED */ 3156 int 3157 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3158 register_t *retval) 3159 { 3160 /* { 3161 syscallarg(const char *) path; 3162 syscallarg(char *) buf; 3163 syscallarg(size_t) count; 3164 } */ 3165 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3166 SCARG(uap, buf), SCARG(uap, count), retval); 3167 } 3168 3169 static int 3170 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3171 size_t count, register_t *retval) 3172 { 3173 struct vnode *vp; 3174 struct iovec aiov; 3175 struct uio auio; 3176 int error; 3177 struct pathbuf *pb; 3178 struct nameidata nd; 3179 3180 error = pathbuf_copyin(path, &pb); 3181 if (error) { 3182 return error; 3183 } 3184 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3185 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3186 pathbuf_destroy(pb); 3187 return error; 3188 } 3189 vp = nd.ni_vp; 3190 pathbuf_destroy(pb); 3191 if (vp->v_type != VLNK) 3192 error = EINVAL; 3193 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3194 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3195 aiov.iov_base = buf; 3196 aiov.iov_len = count; 3197 auio.uio_iov = &aiov; 3198 auio.uio_iovcnt = 1; 3199 auio.uio_offset = 0; 3200 auio.uio_rw = UIO_READ; 3201 KASSERT(l == curlwp); 3202 auio.uio_vmspace = l->l_proc->p_vmspace; 3203 auio.uio_resid = count; 3204 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3205 *retval = count - auio.uio_resid; 3206 } 3207 vput(vp); 3208 return (error); 3209 } 3210 3211 int 3212 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3213 register_t *retval) 3214 { 3215 /* { 3216 syscallarg(int) fd; 3217 syscallarg(const char *) path; 3218 syscallarg(char *) buf; 3219 syscallarg(size_t) bufsize; 3220 } */ 3221 3222 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3223 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3224 } 3225 3226 /* 3227 * Change flags of a file given a path name. 3228 */ 3229 /* ARGSUSED */ 3230 int 3231 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3232 { 3233 /* { 3234 syscallarg(const char *) path; 3235 syscallarg(u_long) flags; 3236 } */ 3237 struct vnode *vp; 3238 int error; 3239 3240 error = namei_simple_user(SCARG(uap, path), 3241 NSM_FOLLOW_TRYEMULROOT, &vp); 3242 if (error != 0) 3243 return (error); 3244 error = change_flags(vp, SCARG(uap, flags), l); 3245 vput(vp); 3246 return (error); 3247 } 3248 3249 /* 3250 * Change flags of a file given a file descriptor. 3251 */ 3252 /* ARGSUSED */ 3253 int 3254 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3255 { 3256 /* { 3257 syscallarg(int) fd; 3258 syscallarg(u_long) flags; 3259 } */ 3260 struct vnode *vp; 3261 file_t *fp; 3262 int error; 3263 3264 /* fd_getvnode() will use the descriptor for us */ 3265 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3266 return (error); 3267 vp = fp->f_vnode; 3268 error = change_flags(vp, SCARG(uap, flags), l); 3269 VOP_UNLOCK(vp); 3270 fd_putfile(SCARG(uap, fd)); 3271 return (error); 3272 } 3273 3274 /* 3275 * Change flags of a file given a path name; this version does 3276 * not follow links. 3277 */ 3278 int 3279 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3280 { 3281 /* { 3282 syscallarg(const char *) path; 3283 syscallarg(u_long) flags; 3284 } */ 3285 struct vnode *vp; 3286 int error; 3287 3288 error = namei_simple_user(SCARG(uap, path), 3289 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3290 if (error != 0) 3291 return (error); 3292 error = change_flags(vp, SCARG(uap, flags), l); 3293 vput(vp); 3294 return (error); 3295 } 3296 3297 /* 3298 * Common routine to change flags of a file. 3299 */ 3300 int 3301 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3302 { 3303 struct vattr vattr; 3304 int error; 3305 3306 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3307 3308 vattr_null(&vattr); 3309 vattr.va_flags = flags; 3310 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3311 3312 return (error); 3313 } 3314 3315 /* 3316 * Change mode of a file given path name; this version follows links. 3317 */ 3318 /* ARGSUSED */ 3319 int 3320 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3321 { 3322 /* { 3323 syscallarg(const char *) path; 3324 syscallarg(int) mode; 3325 } */ 3326 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3327 SCARG(uap, mode), 0); 3328 } 3329 3330 int 3331 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3332 { 3333 int error; 3334 struct vnode *vp; 3335 namei_simple_flags_t ns_flag; 3336 3337 if (flags & AT_SYMLINK_NOFOLLOW) 3338 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3339 else 3340 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3341 3342 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3343 if (error != 0) 3344 return error; 3345 3346 error = change_mode(vp, mode, l); 3347 3348 vrele(vp); 3349 3350 return (error); 3351 } 3352 3353 /* 3354 * Change mode of a file given a file descriptor. 3355 */ 3356 /* ARGSUSED */ 3357 int 3358 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3359 { 3360 /* { 3361 syscallarg(int) fd; 3362 syscallarg(int) mode; 3363 } */ 3364 file_t *fp; 3365 int error; 3366 3367 /* fd_getvnode() will use the descriptor for us */ 3368 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3369 return (error); 3370 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3371 fd_putfile(SCARG(uap, fd)); 3372 return (error); 3373 } 3374 3375 int 3376 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3377 register_t *retval) 3378 { 3379 /* { 3380 syscallarg(int) fd; 3381 syscallarg(const char *) path; 3382 syscallarg(int) mode; 3383 syscallarg(int) flag; 3384 } */ 3385 3386 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3387 SCARG(uap, mode), SCARG(uap, flag)); 3388 } 3389 3390 /* 3391 * Change mode of a file given path name; this version does not follow links. 3392 */ 3393 /* ARGSUSED */ 3394 int 3395 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3396 { 3397 /* { 3398 syscallarg(const char *) path; 3399 syscallarg(int) mode; 3400 } */ 3401 int error; 3402 struct vnode *vp; 3403 3404 error = namei_simple_user(SCARG(uap, path), 3405 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3406 if (error != 0) 3407 return (error); 3408 3409 error = change_mode(vp, SCARG(uap, mode), l); 3410 3411 vrele(vp); 3412 return (error); 3413 } 3414 3415 /* 3416 * Common routine to set mode given a vnode. 3417 */ 3418 static int 3419 change_mode(struct vnode *vp, int mode, struct lwp *l) 3420 { 3421 struct vattr vattr; 3422 int error; 3423 3424 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3425 vattr_null(&vattr); 3426 vattr.va_mode = mode & ALLPERMS; 3427 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3428 VOP_UNLOCK(vp); 3429 return (error); 3430 } 3431 3432 /* 3433 * Set ownership given a path name; this version follows links. 3434 */ 3435 /* ARGSUSED */ 3436 int 3437 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3438 { 3439 /* { 3440 syscallarg(const char *) path; 3441 syscallarg(uid_t) uid; 3442 syscallarg(gid_t) gid; 3443 } */ 3444 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3445 SCARG(uap, gid), 0); 3446 } 3447 3448 int 3449 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3450 gid_t gid, int flags) 3451 { 3452 int error; 3453 struct vnode *vp; 3454 namei_simple_flags_t ns_flag; 3455 3456 if (flags & AT_SYMLINK_NOFOLLOW) 3457 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3458 else 3459 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3460 3461 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3462 if (error != 0) 3463 return error; 3464 3465 error = change_owner(vp, uid, gid, l, 0); 3466 3467 vrele(vp); 3468 3469 return (error); 3470 } 3471 3472 /* 3473 * Set ownership given a path name; this version follows links. 3474 * Provides POSIX semantics. 3475 */ 3476 /* ARGSUSED */ 3477 int 3478 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3479 { 3480 /* { 3481 syscallarg(const char *) path; 3482 syscallarg(uid_t) uid; 3483 syscallarg(gid_t) gid; 3484 } */ 3485 int error; 3486 struct vnode *vp; 3487 3488 error = namei_simple_user(SCARG(uap, path), 3489 NSM_FOLLOW_TRYEMULROOT, &vp); 3490 if (error != 0) 3491 return (error); 3492 3493 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3494 3495 vrele(vp); 3496 return (error); 3497 } 3498 3499 /* 3500 * Set ownership given a file descriptor. 3501 */ 3502 /* ARGSUSED */ 3503 int 3504 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3505 { 3506 /* { 3507 syscallarg(int) fd; 3508 syscallarg(uid_t) uid; 3509 syscallarg(gid_t) gid; 3510 } */ 3511 int error; 3512 file_t *fp; 3513 3514 /* fd_getvnode() will use the descriptor for us */ 3515 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3516 return (error); 3517 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3518 l, 0); 3519 fd_putfile(SCARG(uap, fd)); 3520 return (error); 3521 } 3522 3523 int 3524 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3525 register_t *retval) 3526 { 3527 /* { 3528 syscallarg(int) fd; 3529 syscallarg(const char *) path; 3530 syscallarg(uid_t) owner; 3531 syscallarg(gid_t) group; 3532 syscallarg(int) flag; 3533 } */ 3534 3535 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3536 SCARG(uap, owner), SCARG(uap, group), 3537 SCARG(uap, flag)); 3538 } 3539 3540 /* 3541 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3542 */ 3543 /* ARGSUSED */ 3544 int 3545 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3546 { 3547 /* { 3548 syscallarg(int) fd; 3549 syscallarg(uid_t) uid; 3550 syscallarg(gid_t) gid; 3551 } */ 3552 int error; 3553 file_t *fp; 3554 3555 /* fd_getvnode() will use the descriptor for us */ 3556 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3557 return (error); 3558 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3559 l, 1); 3560 fd_putfile(SCARG(uap, fd)); 3561 return (error); 3562 } 3563 3564 /* 3565 * Set ownership given a path name; this version does not follow links. 3566 */ 3567 /* ARGSUSED */ 3568 int 3569 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3570 { 3571 /* { 3572 syscallarg(const char *) path; 3573 syscallarg(uid_t) uid; 3574 syscallarg(gid_t) gid; 3575 } */ 3576 int error; 3577 struct vnode *vp; 3578 3579 error = namei_simple_user(SCARG(uap, path), 3580 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3581 if (error != 0) 3582 return (error); 3583 3584 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3585 3586 vrele(vp); 3587 return (error); 3588 } 3589 3590 /* 3591 * Set ownership given a path name; this version does not follow links. 3592 * Provides POSIX/XPG semantics. 3593 */ 3594 /* ARGSUSED */ 3595 int 3596 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3597 { 3598 /* { 3599 syscallarg(const char *) path; 3600 syscallarg(uid_t) uid; 3601 syscallarg(gid_t) gid; 3602 } */ 3603 int error; 3604 struct vnode *vp; 3605 3606 error = namei_simple_user(SCARG(uap, path), 3607 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3608 if (error != 0) 3609 return (error); 3610 3611 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3612 3613 vrele(vp); 3614 return (error); 3615 } 3616 3617 /* 3618 * Common routine to set ownership given a vnode. 3619 */ 3620 static int 3621 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3622 int posix_semantics) 3623 { 3624 struct vattr vattr; 3625 mode_t newmode; 3626 int error; 3627 3628 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3629 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3630 goto out; 3631 3632 #define CHANGED(x) ((int)(x) != -1) 3633 newmode = vattr.va_mode; 3634 if (posix_semantics) { 3635 /* 3636 * POSIX/XPG semantics: if the caller is not the super-user, 3637 * clear set-user-id and set-group-id bits. Both POSIX and 3638 * the XPG consider the behaviour for calls by the super-user 3639 * implementation-defined; we leave the set-user-id and set- 3640 * group-id settings intact in that case. 3641 */ 3642 if (vattr.va_mode & S_ISUID) { 3643 if (kauth_authorize_vnode(l->l_cred, 3644 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3645 newmode &= ~S_ISUID; 3646 } 3647 if (vattr.va_mode & S_ISGID) { 3648 if (kauth_authorize_vnode(l->l_cred, 3649 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3650 newmode &= ~S_ISGID; 3651 } 3652 } else { 3653 /* 3654 * NetBSD semantics: when changing owner and/or group, 3655 * clear the respective bit(s). 3656 */ 3657 if (CHANGED(uid)) 3658 newmode &= ~S_ISUID; 3659 if (CHANGED(gid)) 3660 newmode &= ~S_ISGID; 3661 } 3662 /* Update va_mode iff altered. */ 3663 if (vattr.va_mode == newmode) 3664 newmode = VNOVAL; 3665 3666 vattr_null(&vattr); 3667 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3668 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3669 vattr.va_mode = newmode; 3670 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3671 #undef CHANGED 3672 3673 out: 3674 VOP_UNLOCK(vp); 3675 return (error); 3676 } 3677 3678 /* 3679 * Set the access and modification times given a path name; this 3680 * version follows links. 3681 */ 3682 /* ARGSUSED */ 3683 int 3684 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3685 register_t *retval) 3686 { 3687 /* { 3688 syscallarg(const char *) path; 3689 syscallarg(const struct timeval *) tptr; 3690 } */ 3691 3692 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3693 SCARG(uap, tptr), UIO_USERSPACE); 3694 } 3695 3696 /* 3697 * Set the access and modification times given a file descriptor. 3698 */ 3699 /* ARGSUSED */ 3700 int 3701 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3702 register_t *retval) 3703 { 3704 /* { 3705 syscallarg(int) fd; 3706 syscallarg(const struct timeval *) tptr; 3707 } */ 3708 int error; 3709 file_t *fp; 3710 3711 /* fd_getvnode() will use the descriptor for us */ 3712 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3713 return (error); 3714 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3715 UIO_USERSPACE); 3716 fd_putfile(SCARG(uap, fd)); 3717 return (error); 3718 } 3719 3720 int 3721 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3722 register_t *retval) 3723 { 3724 /* { 3725 syscallarg(int) fd; 3726 syscallarg(const struct timespec *) tptr; 3727 } */ 3728 int error; 3729 file_t *fp; 3730 3731 /* fd_getvnode() will use the descriptor for us */ 3732 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3733 return (error); 3734 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3735 SCARG(uap, tptr), UIO_USERSPACE); 3736 fd_putfile(SCARG(uap, fd)); 3737 return (error); 3738 } 3739 3740 /* 3741 * Set the access and modification times given a path name; this 3742 * version does not follow links. 3743 */ 3744 int 3745 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3746 register_t *retval) 3747 { 3748 /* { 3749 syscallarg(const char *) path; 3750 syscallarg(const struct timeval *) tptr; 3751 } */ 3752 3753 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3754 SCARG(uap, tptr), UIO_USERSPACE); 3755 } 3756 3757 int 3758 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3759 register_t *retval) 3760 { 3761 /* { 3762 syscallarg(int) fd; 3763 syscallarg(const char *) path; 3764 syscallarg(const struct timespec *) tptr; 3765 syscallarg(int) flag; 3766 } */ 3767 int follow; 3768 const struct timespec *tptr; 3769 int error; 3770 3771 tptr = SCARG(uap, tptr); 3772 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3773 3774 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3775 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3776 3777 return error; 3778 } 3779 3780 /* 3781 * Common routine to set access and modification times given a vnode. 3782 */ 3783 int 3784 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3785 const struct timespec *tptr, enum uio_seg seg) 3786 { 3787 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3788 } 3789 3790 int 3791 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3792 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3793 { 3794 struct vattr vattr; 3795 int error, dorele = 0; 3796 namei_simple_flags_t sflags; 3797 bool vanull, setbirthtime; 3798 struct timespec ts[2]; 3799 3800 KASSERT(l != NULL || fdat == AT_FDCWD); 3801 3802 /* 3803 * I have checked all callers and they pass either FOLLOW, 3804 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3805 * is 0. More to the point, they don't pass anything else. 3806 * Let's keep it that way at least until the namei interfaces 3807 * are fully sanitized. 3808 */ 3809 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3810 sflags = (flag == FOLLOW) ? 3811 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3812 3813 if (tptr == NULL) { 3814 vanull = true; 3815 nanotime(&ts[0]); 3816 ts[1] = ts[0]; 3817 } else { 3818 vanull = false; 3819 if (seg != UIO_SYSSPACE) { 3820 error = copyin(tptr, ts, sizeof (ts)); 3821 if (error != 0) 3822 return error; 3823 } else { 3824 ts[0] = tptr[0]; 3825 ts[1] = tptr[1]; 3826 } 3827 } 3828 3829 if (ts[0].tv_nsec == UTIME_NOW) { 3830 nanotime(&ts[0]); 3831 if (ts[1].tv_nsec == UTIME_NOW) { 3832 vanull = true; 3833 ts[1] = ts[0]; 3834 } 3835 } else if (ts[1].tv_nsec == UTIME_NOW) 3836 nanotime(&ts[1]); 3837 3838 if (vp == NULL) { 3839 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3840 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3841 if (error != 0) 3842 return error; 3843 dorele = 1; 3844 } 3845 3846 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3847 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3848 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3849 vattr_null(&vattr); 3850 3851 if (ts[0].tv_nsec != UTIME_OMIT) 3852 vattr.va_atime = ts[0]; 3853 3854 if (ts[1].tv_nsec != UTIME_OMIT) { 3855 vattr.va_mtime = ts[1]; 3856 if (setbirthtime) 3857 vattr.va_birthtime = ts[1]; 3858 } 3859 3860 if (vanull) 3861 vattr.va_vaflags |= VA_UTIMES_NULL; 3862 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3863 VOP_UNLOCK(vp); 3864 3865 if (dorele != 0) 3866 vrele(vp); 3867 3868 return error; 3869 } 3870 3871 int 3872 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3873 const struct timeval *tptr, enum uio_seg seg) 3874 { 3875 struct timespec ts[2]; 3876 struct timespec *tsptr = NULL; 3877 int error; 3878 3879 if (tptr != NULL) { 3880 struct timeval tv[2]; 3881 3882 if (seg != UIO_SYSSPACE) { 3883 error = copyin(tptr, tv, sizeof (tv)); 3884 if (error != 0) 3885 return error; 3886 tptr = tv; 3887 } 3888 3889 if ((tv[0].tv_usec == UTIME_NOW) || 3890 (tv[0].tv_usec == UTIME_OMIT)) 3891 ts[0].tv_nsec = tv[0].tv_usec; 3892 else 3893 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3894 3895 if ((tv[1].tv_usec == UTIME_NOW) || 3896 (tv[1].tv_usec == UTIME_OMIT)) 3897 ts[1].tv_nsec = tv[1].tv_usec; 3898 else 3899 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3900 3901 tsptr = &ts[0]; 3902 } 3903 3904 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3905 } 3906 3907 /* 3908 * Truncate a file given its path name. 3909 */ 3910 /* ARGSUSED */ 3911 int 3912 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3913 { 3914 /* { 3915 syscallarg(const char *) path; 3916 syscallarg(int) pad; 3917 syscallarg(off_t) length; 3918 } */ 3919 struct vnode *vp; 3920 struct vattr vattr; 3921 int error; 3922 3923 if (SCARG(uap, length) < 0) 3924 return EINVAL; 3925 3926 error = namei_simple_user(SCARG(uap, path), 3927 NSM_FOLLOW_TRYEMULROOT, &vp); 3928 if (error != 0) 3929 return (error); 3930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3931 if (vp->v_type == VDIR) 3932 error = EISDIR; 3933 else if ((error = vn_writechk(vp)) == 0 && 3934 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3935 vattr_null(&vattr); 3936 vattr.va_size = SCARG(uap, length); 3937 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3938 } 3939 vput(vp); 3940 return (error); 3941 } 3942 3943 /* 3944 * Truncate a file given a file descriptor. 3945 */ 3946 /* ARGSUSED */ 3947 int 3948 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3949 { 3950 /* { 3951 syscallarg(int) fd; 3952 syscallarg(int) pad; 3953 syscallarg(off_t) length; 3954 } */ 3955 struct vattr vattr; 3956 struct vnode *vp; 3957 file_t *fp; 3958 int error; 3959 3960 if (SCARG(uap, length) < 0) 3961 return EINVAL; 3962 3963 /* fd_getvnode() will use the descriptor for us */ 3964 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3965 return (error); 3966 if ((fp->f_flag & FWRITE) == 0) { 3967 error = EINVAL; 3968 goto out; 3969 } 3970 vp = fp->f_vnode; 3971 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3972 if (vp->v_type == VDIR) 3973 error = EISDIR; 3974 else if ((error = vn_writechk(vp)) == 0) { 3975 vattr_null(&vattr); 3976 vattr.va_size = SCARG(uap, length); 3977 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3978 } 3979 VOP_UNLOCK(vp); 3980 out: 3981 fd_putfile(SCARG(uap, fd)); 3982 return (error); 3983 } 3984 3985 /* 3986 * Sync an open file. 3987 */ 3988 /* ARGSUSED */ 3989 int 3990 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3991 { 3992 /* { 3993 syscallarg(int) fd; 3994 } */ 3995 struct vnode *vp; 3996 file_t *fp; 3997 int error; 3998 3999 /* fd_getvnode() will use the descriptor for us */ 4000 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4001 return (error); 4002 vp = fp->f_vnode; 4003 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4004 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4005 VOP_UNLOCK(vp); 4006 fd_putfile(SCARG(uap, fd)); 4007 return (error); 4008 } 4009 4010 /* 4011 * Sync a range of file data. API modeled after that found in AIX. 4012 * 4013 * FDATASYNC indicates that we need only save enough metadata to be able 4014 * to re-read the written data. Note we duplicate AIX's requirement that 4015 * the file be open for writing. 4016 */ 4017 /* ARGSUSED */ 4018 int 4019 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4020 { 4021 /* { 4022 syscallarg(int) fd; 4023 syscallarg(int) flags; 4024 syscallarg(off_t) start; 4025 syscallarg(off_t) length; 4026 } */ 4027 struct vnode *vp; 4028 file_t *fp; 4029 int flags, nflags; 4030 off_t s, e, len; 4031 int error; 4032 4033 /* fd_getvnode() will use the descriptor for us */ 4034 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4035 return (error); 4036 4037 if ((fp->f_flag & FWRITE) == 0) { 4038 error = EBADF; 4039 goto out; 4040 } 4041 4042 flags = SCARG(uap, flags); 4043 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4044 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4045 error = EINVAL; 4046 goto out; 4047 } 4048 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4049 if (flags & FDATASYNC) 4050 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4051 else 4052 nflags = FSYNC_WAIT; 4053 if (flags & FDISKSYNC) 4054 nflags |= FSYNC_CACHE; 4055 4056 len = SCARG(uap, length); 4057 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4058 if (len) { 4059 s = SCARG(uap, start); 4060 e = s + len; 4061 if (e < s) { 4062 error = EINVAL; 4063 goto out; 4064 } 4065 } else { 4066 e = 0; 4067 s = 0; 4068 } 4069 4070 vp = fp->f_vnode; 4071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4072 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4073 VOP_UNLOCK(vp); 4074 out: 4075 fd_putfile(SCARG(uap, fd)); 4076 return (error); 4077 } 4078 4079 /* 4080 * Sync the data of an open file. 4081 */ 4082 /* ARGSUSED */ 4083 int 4084 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4085 { 4086 /* { 4087 syscallarg(int) fd; 4088 } */ 4089 struct vnode *vp; 4090 file_t *fp; 4091 int error; 4092 4093 /* fd_getvnode() will use the descriptor for us */ 4094 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4095 return (error); 4096 if ((fp->f_flag & FWRITE) == 0) { 4097 fd_putfile(SCARG(uap, fd)); 4098 return (EBADF); 4099 } 4100 vp = fp->f_vnode; 4101 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4102 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4103 VOP_UNLOCK(vp); 4104 fd_putfile(SCARG(uap, fd)); 4105 return (error); 4106 } 4107 4108 /* 4109 * Rename files, (standard) BSD semantics frontend. 4110 */ 4111 /* ARGSUSED */ 4112 int 4113 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4114 { 4115 /* { 4116 syscallarg(const char *) from; 4117 syscallarg(const char *) to; 4118 } */ 4119 4120 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4121 SCARG(uap, to), UIO_USERSPACE, 0)); 4122 } 4123 4124 int 4125 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4126 register_t *retval) 4127 { 4128 /* { 4129 syscallarg(int) fromfd; 4130 syscallarg(const char *) from; 4131 syscallarg(int) tofd; 4132 syscallarg(const char *) to; 4133 } */ 4134 4135 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4136 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4137 } 4138 4139 /* 4140 * Rename files, POSIX semantics frontend. 4141 */ 4142 /* ARGSUSED */ 4143 int 4144 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4145 { 4146 /* { 4147 syscallarg(const char *) from; 4148 syscallarg(const char *) to; 4149 } */ 4150 4151 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4152 SCARG(uap, to), UIO_USERSPACE, 1)); 4153 } 4154 4155 /* 4156 * Rename files. Source and destination must either both be directories, 4157 * or both not be directories. If target is a directory, it must be empty. 4158 * If `from' and `to' refer to the same object, the value of the `retain' 4159 * argument is used to determine whether `from' will be 4160 * 4161 * (retain == 0) deleted unless `from' and `to' refer to the same 4162 * object in the file system's name space (BSD). 4163 * (retain == 1) always retained (POSIX). 4164 * 4165 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4166 */ 4167 int 4168 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4169 { 4170 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4171 } 4172 4173 static int 4174 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4175 const char *to, enum uio_seg seg, int retain) 4176 { 4177 struct pathbuf *fpb, *tpb; 4178 struct nameidata fnd, tnd; 4179 struct vnode *fdvp, *fvp; 4180 struct vnode *tdvp, *tvp; 4181 struct mount *mp, *tmp; 4182 int error; 4183 4184 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4185 4186 error = pathbuf_maybe_copyin(from, seg, &fpb); 4187 if (error) 4188 goto out0; 4189 KASSERT(fpb != NULL); 4190 4191 error = pathbuf_maybe_copyin(to, seg, &tpb); 4192 if (error) 4193 goto out1; 4194 KASSERT(tpb != NULL); 4195 4196 /* 4197 * Lookup from. 4198 * 4199 * XXX LOCKPARENT is wrong because we don't actually want it 4200 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4201 * insane, so for the time being we need to leave it like this. 4202 */ 4203 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4204 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4205 goto out2; 4206 4207 /* 4208 * Pull out the important results of the lookup, fdvp and fvp. 4209 * Of course, fvp is bogus because we're about to unlock fdvp. 4210 */ 4211 fdvp = fnd.ni_dvp; 4212 fvp = fnd.ni_vp; 4213 KASSERT(fdvp != NULL); 4214 KASSERT(fvp != NULL); 4215 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4216 4217 /* 4218 * Make sure neither fdvp nor fvp is locked. 4219 */ 4220 if (fdvp != fvp) 4221 VOP_UNLOCK(fdvp); 4222 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4223 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4224 4225 /* 4226 * Reject renaming `.' and `..'. Can't do this until after 4227 * namei because we need namei's parsing to find the final 4228 * component name. (namei should just leave us with the final 4229 * component name and not look it up itself, but anyway...) 4230 * 4231 * This was here before because we used to relookup from 4232 * instead of to and relookup requires the caller to check 4233 * this, but now file systems may depend on this check, so we 4234 * must retain it until the file systems are all rototilled. 4235 */ 4236 if (((fnd.ni_cnd.cn_namelen == 1) && 4237 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4238 ((fnd.ni_cnd.cn_namelen == 2) && 4239 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4240 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4241 error = EINVAL; /* XXX EISDIR? */ 4242 goto abort0; 4243 } 4244 4245 /* 4246 * Lookup to. 4247 * 4248 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4249 * fvp here to decide whether to add CREATEDIR is a load of 4250 * bollocks because fvp might be the wrong node by now, since 4251 * fdvp is unlocked. 4252 * 4253 * XXX Why not pass CREATEDIR always? 4254 */ 4255 NDINIT(&tnd, RENAME, 4256 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4257 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4258 tpb); 4259 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4260 goto abort0; 4261 4262 /* 4263 * Pull out the important results of the lookup, tdvp and tvp. 4264 * Of course, tvp is bogus because we're about to unlock tdvp. 4265 */ 4266 tdvp = tnd.ni_dvp; 4267 tvp = tnd.ni_vp; 4268 KASSERT(tdvp != NULL); 4269 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4270 4271 /* 4272 * Make sure neither tdvp nor tvp is locked. 4273 */ 4274 if (tdvp != tvp) 4275 VOP_UNLOCK(tdvp); 4276 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4277 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4278 4279 /* 4280 * Reject renaming onto `.' or `..'. relookup is unhappy with 4281 * these, which is why we must do this here. Once upon a time 4282 * we relooked up from instead of to, and consequently didn't 4283 * need this check, but now that we relookup to instead of 4284 * from, we need this; and we shall need it forever forward 4285 * until the VOP_RENAME protocol changes, because file systems 4286 * will no doubt begin to depend on this check. 4287 */ 4288 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4289 error = EISDIR; 4290 goto abort1; 4291 } 4292 if ((tnd.ni_cnd.cn_namelen == 2) && 4293 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4294 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4295 error = EINVAL; 4296 goto abort1; 4297 } 4298 4299 /* 4300 * Get the mount point. If the file system has been unmounted, 4301 * which it may be because we're not holding any vnode locks, 4302 * then v_mount will be NULL. We're not really supposed to 4303 * read v_mount without holding the vnode lock, but since we 4304 * have fdvp referenced, if fdvp->v_mount changes then at worst 4305 * it will be set to NULL, not changed to another mount point. 4306 * And, of course, since it is up to the file system to 4307 * determine the real lock order, we can't lock both fdvp and 4308 * tdvp at the same time. 4309 */ 4310 mp = fdvp->v_mount; 4311 if (mp == NULL) { 4312 error = ENOENT; 4313 goto abort1; 4314 } 4315 4316 /* 4317 * Make sure the mount points match. Again, although we don't 4318 * hold any vnode locks, the v_mount fields may change -- but 4319 * at worst they will change to NULL, so this will never become 4320 * a cross-device rename, because we hold vnode references. 4321 * 4322 * XXX Because nothing is locked and the compiler may reorder 4323 * things here, unmounting the file system at an inopportune 4324 * moment may cause rename to fail with EXDEV when it really 4325 * should fail with ENOENT. 4326 */ 4327 tmp = tdvp->v_mount; 4328 if (tmp == NULL) { 4329 error = ENOENT; 4330 goto abort1; 4331 } 4332 4333 if (mp != tmp) { 4334 error = EXDEV; 4335 goto abort1; 4336 } 4337 4338 /* 4339 * Take the vfs rename lock to avoid cross-directory screw cases. 4340 * Nothing is locked currently, so taking this lock is safe. 4341 */ 4342 error = VFS_RENAMELOCK_ENTER(mp); 4343 if (error) 4344 goto abort1; 4345 4346 /* 4347 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4348 * and nothing is locked except for the vfs rename lock. 4349 * 4350 * The next step is a little rain dance to conform to the 4351 * insane lock protocol, even though it does nothing to ward 4352 * off race conditions. 4353 * 4354 * We need tdvp and tvp to be locked. However, because we have 4355 * unlocked tdvp in order to hold no locks while we take the 4356 * vfs rename lock, tvp may be wrong here, and we can't safely 4357 * lock it even if the sensible file systems will just unlock 4358 * it straight away. Consequently, we must lock tdvp and then 4359 * relookup tvp to get it locked. 4360 * 4361 * Finally, because the VOP_RENAME protocol is brain-damaged 4362 * and various file systems insanely depend on the semantics of 4363 * this brain damage, the lookup of to must be the last lookup 4364 * before VOP_RENAME. 4365 */ 4366 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4367 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4368 if (error) 4369 goto abort2; 4370 4371 /* 4372 * Drop the old tvp and pick up the new one -- which might be 4373 * the same, but that doesn't matter to us. After this, tdvp 4374 * and tvp should both be locked. 4375 */ 4376 if (tvp != NULL) 4377 vrele(tvp); 4378 tvp = tnd.ni_vp; 4379 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4380 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4381 4382 /* 4383 * The old do_sys_rename had various consistency checks here 4384 * involving fvp and tvp. fvp is bogus already here, and tvp 4385 * will become bogus soon in any sensible file system, so the 4386 * only purpose in putting these checks here is to give lip 4387 * service to these screw cases and to acknowledge that they 4388 * exist, not actually to handle them, but here you go 4389 * anyway... 4390 */ 4391 4392 /* 4393 * Acknowledge that directories and non-directories aren't 4394 * suposed to mix. 4395 */ 4396 if (tvp != NULL) { 4397 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4398 error = ENOTDIR; 4399 goto abort3; 4400 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4401 error = EISDIR; 4402 goto abort3; 4403 } 4404 } 4405 4406 /* 4407 * Acknowledge some random screw case, among the dozens that 4408 * might arise. 4409 */ 4410 if (fvp == tdvp) { 4411 error = EINVAL; 4412 goto abort3; 4413 } 4414 4415 /* 4416 * Acknowledge that POSIX has a wacky screw case. 4417 * 4418 * XXX Eventually the retain flag needs to be passed on to 4419 * VOP_RENAME. 4420 */ 4421 if (fvp == tvp) { 4422 if (retain) { 4423 error = 0; 4424 goto abort3; 4425 } else if ((fdvp == tdvp) && 4426 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4427 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4428 fnd.ni_cnd.cn_namelen))) { 4429 error = 0; 4430 goto abort3; 4431 } 4432 } 4433 4434 /* 4435 * Make sure veriexec can screw us up. (But a race can screw 4436 * up veriexec, of course -- remember, fvp and (soon) tvp are 4437 * bogus.) 4438 */ 4439 #if NVERIEXEC > 0 4440 { 4441 char *f1, *f2; 4442 size_t f1_len; 4443 size_t f2_len; 4444 4445 f1_len = fnd.ni_cnd.cn_namelen + 1; 4446 f1 = kmem_alloc(f1_len, KM_SLEEP); 4447 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4448 4449 f2_len = tnd.ni_cnd.cn_namelen + 1; 4450 f2 = kmem_alloc(f2_len, KM_SLEEP); 4451 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4452 4453 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4454 4455 kmem_free(f1, f1_len); 4456 kmem_free(f2, f2_len); 4457 4458 if (error) 4459 goto abort3; 4460 } 4461 #endif /* NVERIEXEC > 0 */ 4462 4463 /* 4464 * All ready. Incant the rename vop. 4465 */ 4466 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4467 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4468 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4469 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4470 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4471 4472 /* 4473 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4474 * tdvp and tvp. But we can't assert any of that. 4475 */ 4476 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4477 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4478 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4479 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4480 4481 /* 4482 * So all we have left to do is to drop the rename lock and 4483 * destroy the pathbufs. 4484 */ 4485 VFS_RENAMELOCK_EXIT(mp); 4486 goto out2; 4487 4488 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4489 VOP_UNLOCK(tvp); 4490 abort2: VOP_UNLOCK(tdvp); 4491 VFS_RENAMELOCK_EXIT(mp); 4492 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4493 vrele(tdvp); 4494 if (tvp != NULL) 4495 vrele(tvp); 4496 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4497 vrele(fdvp); 4498 vrele(fvp); 4499 out2: pathbuf_destroy(tpb); 4500 out1: pathbuf_destroy(fpb); 4501 out0: return error; 4502 } 4503 4504 /* 4505 * Make a directory file. 4506 */ 4507 /* ARGSUSED */ 4508 int 4509 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4510 { 4511 /* { 4512 syscallarg(const char *) path; 4513 syscallarg(int) mode; 4514 } */ 4515 4516 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4517 SCARG(uap, mode), UIO_USERSPACE); 4518 } 4519 4520 int 4521 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4522 register_t *retval) 4523 { 4524 /* { 4525 syscallarg(int) fd; 4526 syscallarg(const char *) path; 4527 syscallarg(int) mode; 4528 } */ 4529 4530 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4531 SCARG(uap, mode), UIO_USERSPACE); 4532 } 4533 4534 4535 int 4536 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4537 { 4538 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4539 } 4540 4541 static int 4542 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4543 enum uio_seg seg) 4544 { 4545 struct proc *p = curlwp->l_proc; 4546 struct vnode *vp; 4547 struct vattr vattr; 4548 int error; 4549 struct pathbuf *pb; 4550 struct nameidata nd; 4551 4552 KASSERT(l != NULL || fdat == AT_FDCWD); 4553 4554 /* XXX bollocks, should pass in a pathbuf */ 4555 error = pathbuf_maybe_copyin(path, seg, &pb); 4556 if (error) { 4557 return error; 4558 } 4559 4560 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4561 4562 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4563 pathbuf_destroy(pb); 4564 return (error); 4565 } 4566 vp = nd.ni_vp; 4567 if (vp != NULL) { 4568 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4569 if (nd.ni_dvp == vp) 4570 vrele(nd.ni_dvp); 4571 else 4572 vput(nd.ni_dvp); 4573 vrele(vp); 4574 pathbuf_destroy(pb); 4575 return (EEXIST); 4576 } 4577 vattr_null(&vattr); 4578 vattr.va_type = VDIR; 4579 /* We will read cwdi->cwdi_cmask unlocked. */ 4580 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4581 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4582 if (!error) 4583 vrele(nd.ni_vp); 4584 vput(nd.ni_dvp); 4585 pathbuf_destroy(pb); 4586 return (error); 4587 } 4588 4589 /* 4590 * Remove a directory file. 4591 */ 4592 /* ARGSUSED */ 4593 int 4594 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4595 { 4596 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4597 AT_REMOVEDIR, UIO_USERSPACE); 4598 } 4599 4600 /* 4601 * Read a block of directory entries in a file system independent format. 4602 */ 4603 int 4604 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4605 { 4606 /* { 4607 syscallarg(int) fd; 4608 syscallarg(char *) buf; 4609 syscallarg(size_t) count; 4610 } */ 4611 file_t *fp; 4612 int error, done; 4613 4614 /* fd_getvnode() will use the descriptor for us */ 4615 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4616 return (error); 4617 if ((fp->f_flag & FREAD) == 0) { 4618 error = EBADF; 4619 goto out; 4620 } 4621 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4622 SCARG(uap, count), &done, l, 0, 0); 4623 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4624 *retval = done; 4625 out: 4626 fd_putfile(SCARG(uap, fd)); 4627 return (error); 4628 } 4629 4630 /* 4631 * Set the mode mask for creation of filesystem nodes. 4632 */ 4633 int 4634 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4635 { 4636 /* { 4637 syscallarg(mode_t) newmask; 4638 } */ 4639 struct proc *p = l->l_proc; 4640 struct cwdinfo *cwdi; 4641 4642 /* 4643 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4644 * important is that we serialize changes to the mask. The 4645 * rw_exit() will issue a write memory barrier on our behalf, 4646 * and force the changes out to other CPUs (as it must use an 4647 * atomic operation, draining the local CPU's store buffers). 4648 */ 4649 cwdi = p->p_cwdi; 4650 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4651 *retval = cwdi->cwdi_cmask; 4652 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4653 rw_exit(&cwdi->cwdi_lock); 4654 4655 return (0); 4656 } 4657 4658 int 4659 dorevoke(struct vnode *vp, kauth_cred_t cred) 4660 { 4661 struct vattr vattr; 4662 int error, fs_decision; 4663 4664 vn_lock(vp, LK_SHARED | LK_RETRY); 4665 error = VOP_GETATTR(vp, &vattr, cred); 4666 VOP_UNLOCK(vp); 4667 if (error != 0) 4668 return error; 4669 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4670 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4671 fs_decision); 4672 if (!error) 4673 VOP_REVOKE(vp, REVOKEALL); 4674 return (error); 4675 } 4676 4677 /* 4678 * Void all references to file by ripping underlying filesystem 4679 * away from vnode. 4680 */ 4681 /* ARGSUSED */ 4682 int 4683 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4684 { 4685 /* { 4686 syscallarg(const char *) path; 4687 } */ 4688 struct vnode *vp; 4689 int error; 4690 4691 error = namei_simple_user(SCARG(uap, path), 4692 NSM_FOLLOW_TRYEMULROOT, &vp); 4693 if (error != 0) 4694 return (error); 4695 error = dorevoke(vp, l->l_cred); 4696 vrele(vp); 4697 return (error); 4698 } 4699 4700 /* 4701 * Allocate backing store for a file, filling a hole without having to 4702 * explicitly write anything out. 4703 */ 4704 /* ARGSUSED */ 4705 int 4706 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4707 register_t *retval) 4708 { 4709 /* { 4710 syscallarg(int) fd; 4711 syscallarg(off_t) pos; 4712 syscallarg(off_t) len; 4713 } */ 4714 int fd; 4715 off_t pos, len; 4716 struct file *fp; 4717 struct vnode *vp; 4718 int error; 4719 4720 fd = SCARG(uap, fd); 4721 pos = SCARG(uap, pos); 4722 len = SCARG(uap, len); 4723 4724 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4725 *retval = EINVAL; 4726 return 0; 4727 } 4728 4729 error = fd_getvnode(fd, &fp); 4730 if (error) { 4731 *retval = error; 4732 return 0; 4733 } 4734 if ((fp->f_flag & FWRITE) == 0) { 4735 error = EBADF; 4736 goto fail; 4737 } 4738 vp = fp->f_vnode; 4739 4740 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4741 if (vp->v_type == VDIR) { 4742 error = EISDIR; 4743 } else { 4744 error = VOP_FALLOCATE(vp, pos, len); 4745 } 4746 VOP_UNLOCK(vp); 4747 4748 fail: 4749 fd_putfile(fd); 4750 *retval = error; 4751 return 0; 4752 } 4753 4754 /* 4755 * Deallocate backing store for a file, creating a hole. Also used for 4756 * invoking TRIM on disks. 4757 */ 4758 /* ARGSUSED */ 4759 int 4760 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4761 register_t *retval) 4762 { 4763 /* { 4764 syscallarg(int) fd; 4765 syscallarg(off_t) pos; 4766 syscallarg(off_t) len; 4767 } */ 4768 int fd; 4769 off_t pos, len; 4770 struct file *fp; 4771 struct vnode *vp; 4772 int error; 4773 4774 fd = SCARG(uap, fd); 4775 pos = SCARG(uap, pos); 4776 len = SCARG(uap, len); 4777 4778 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4779 return EINVAL; 4780 } 4781 4782 error = fd_getvnode(fd, &fp); 4783 if (error) { 4784 return error; 4785 } 4786 if ((fp->f_flag & FWRITE) == 0) { 4787 error = EBADF; 4788 goto fail; 4789 } 4790 vp = fp->f_vnode; 4791 4792 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4793 if (vp->v_type == VDIR) { 4794 error = EISDIR; 4795 } else { 4796 error = VOP_FDISCARD(vp, pos, len); 4797 } 4798 VOP_UNLOCK(vp); 4799 4800 fail: 4801 fd_putfile(fd); 4802 return error; 4803 } 4804