1 /* $NetBSD: vfs_syscalls.c,v 1.270 2006/09/13 10:07:42 elad Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.270 2006/09/13 10:07:42 elad Exp $"); 41 42 #include "opt_compat_netbsd.h" 43 #include "opt_compat_43.h" 44 #include "opt_fileassoc.h" 45 #include "opt_ktrace.h" 46 #include "fss.h" 47 #include "veriexec.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/namei.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/file.h> 55 #include <sys/stat.h> 56 #include <sys/vnode.h> 57 #include <sys/mount.h> 58 #include <sys/proc.h> 59 #include <sys/uio.h> 60 #include <sys/malloc.h> 61 #include <sys/kmem.h> 62 #include <sys/dirent.h> 63 #include <sys/sysctl.h> 64 #include <sys/sa.h> 65 #include <sys/syscallargs.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 #ifdef FILEASSOC 70 #include <sys/fileassoc.h> 71 #endif /* FILEASSOC */ 72 #if NVERIEXEC > 0 73 #include <sys/verified_exec.h> 74 #endif /* NVERIEXEC > 0 */ 75 #include <sys/kauth.h> 76 77 #include <miscfs/genfs/genfs.h> 78 #include <miscfs/syncfs/syncfs.h> 79 80 #ifdef COMPAT_30 81 #include "opt_nfsserver.h" 82 #include <nfs/rpcv2.h> 83 #endif 84 #include <nfs/nfsproto.h> 85 #ifdef COMPAT_30 86 #include <nfs/nfs.h> 87 #include <nfs/nfs_var.h> 88 #endif 89 90 #if NFSS > 0 91 #include <dev/fssvar.h> 92 #endif 93 94 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 95 96 static int change_dir(struct nameidata *, struct lwp *); 97 static int change_flags(struct vnode *, u_long, struct lwp *); 98 static int change_mode(struct vnode *, int, struct lwp *l); 99 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 100 static int change_utimes(struct vnode *vp, const struct timeval *, 101 struct lwp *l); 102 static int rename_files(const char *, const char *, struct lwp *, int); 103 104 void checkdirs(struct vnode *); 105 106 int dovfsusermount = 0; 107 108 /* 109 * Virtual File System System Calls 110 */ 111 112 /* 113 * Mount a file system. 114 */ 115 116 #if defined(COMPAT_09) || defined(COMPAT_43) 117 /* 118 * This table is used to maintain compatibility with 4.3BSD 119 * and NetBSD 0.9 mount syscalls. Note, the order is important! 120 * 121 * Do not modify this table. It should only contain filesystems 122 * supported by NetBSD 0.9 and 4.3BSD. 123 */ 124 const char * const mountcompatnames[] = { 125 NULL, /* 0 = MOUNT_NONE */ 126 MOUNT_FFS, /* 1 = MOUNT_UFS */ 127 MOUNT_NFS, /* 2 */ 128 MOUNT_MFS, /* 3 */ 129 MOUNT_MSDOS, /* 4 */ 130 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 131 MOUNT_FDESC, /* 6 */ 132 MOUNT_KERNFS, /* 7 */ 133 NULL, /* 8 = MOUNT_DEVFS */ 134 MOUNT_AFS, /* 9 */ 135 }; 136 const int nmountcompatnames = sizeof(mountcompatnames) / 137 sizeof(mountcompatnames[0]); 138 #endif /* COMPAT_09 || COMPAT_43 */ 139 140 /* ARGSUSED */ 141 int 142 sys_mount(struct lwp *l, void *v, register_t *retval) 143 { 144 struct sys_mount_args /* { 145 syscallarg(const char *) type; 146 syscallarg(const char *) path; 147 syscallarg(int) flags; 148 syscallarg(void *) data; 149 } */ *uap = v; 150 struct vnode *vp; 151 struct mount *mp; 152 int error, flag = 0; 153 char fstypename[MFSNAMELEN]; 154 struct vattr va; 155 struct nameidata nd; 156 struct vfsops *vfs; 157 158 /* 159 * if MNT_GETARGS is specified, it should be only flag. 160 */ 161 162 if ((SCARG(uap, flags) & MNT_GETARGS) != 0 && 163 (SCARG(uap, flags) & ~MNT_GETARGS) != 0) { 164 return EINVAL; 165 } 166 167 if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 && 168 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 169 &l->l_acflag))) 170 return (error); 171 /* 172 * Get vnode to be covered 173 */ 174 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, 175 SCARG(uap, path), l); 176 if ((error = namei(&nd)) != 0) 177 return (error); 178 vp = nd.ni_vp; 179 /* 180 * A lookup in VFS_MOUNT might result in an attempt to 181 * lock this vnode again, so make the lock recursive. 182 */ 183 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE); 184 if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) { 185 if ((vp->v_flag & VROOT) == 0) { 186 vput(vp); 187 return (EINVAL); 188 } 189 mp = vp->v_mount; 190 flag = mp->mnt_flag; 191 vfs = mp->mnt_op; 192 /* 193 * We only allow the filesystem to be reloaded if it 194 * is currently mounted read-only. 195 */ 196 if ((SCARG(uap, flags) & MNT_RELOAD) && 197 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 198 vput(vp); 199 return (EOPNOTSUPP); /* Needs translation */ 200 } 201 /* 202 * In "highly secure" mode, don't let the caller do anything 203 * but downgrade a filesystem from read-write to read-only. 204 * (see also below; MNT_UPDATE or MNT_GETARGS is required.) 205 */ 206 if (securelevel >= 2 && 207 SCARG(uap, flags) != MNT_GETARGS && 208 SCARG(uap, flags) != 209 (mp->mnt_flag | MNT_RDONLY | 210 MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) { 211 vput(vp); 212 return (EPERM); 213 } 214 mp->mnt_flag |= SCARG(uap, flags) & 215 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS); 216 /* 217 * Only root, or the user that did the original mount is 218 * permitted to update it. 219 */ 220 if ((mp->mnt_flag & MNT_GETARGS) == 0 && 221 mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred) && 222 (error = kauth_authorize_generic(l->l_cred, 223 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) { 224 vput(vp); 225 return (error); 226 } 227 /* 228 * Do not allow NFS export by non-root users. For non-root 229 * users, silently enforce MNT_NOSUID and MNT_NODEV, and 230 * MNT_NOEXEC if mount point is already MNT_NOEXEC. 231 */ 232 if (kauth_cred_geteuid(l->l_cred) != 0) { 233 if (SCARG(uap, flags) & MNT_EXPORTED) { 234 vput(vp); 235 return (EPERM); 236 } 237 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; 238 if (flag & MNT_NOEXEC) 239 SCARG(uap, flags) |= MNT_NOEXEC; 240 } 241 if (vfs_busy(mp, LK_NOWAIT, 0)) { 242 vput(vp); 243 return (EPERM); 244 } 245 goto update; 246 } else { 247 if (securelevel >= 2) { 248 vput(vp); 249 return (EPERM); 250 } 251 } 252 /* 253 * If the user is not root, ensure that they own the directory 254 * onto which we are attempting to mount. 255 */ 256 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 || 257 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 258 (error = kauth_authorize_generic(l->l_cred, 259 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)) { 260 vput(vp); 261 return (error); 262 } 263 /* 264 * Do not allow NFS export by non-root users. For non-root users, 265 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the 266 * mount point is already MNT_NOEXEC. 267 */ 268 if (kauth_cred_geteuid(l->l_cred) != 0) { 269 if (SCARG(uap, flags) & MNT_EXPORTED) { 270 vput(vp); 271 return (EPERM); 272 } 273 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; 274 if (vp->v_mount->mnt_flag & MNT_NOEXEC) 275 SCARG(uap, flags) |= MNT_NOEXEC; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) { 278 vput(vp); 279 return (error); 280 } 281 if (vp->v_type != VDIR) { 282 vput(vp); 283 return (ENOTDIR); 284 } 285 error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL); 286 if (error) { 287 #if defined(COMPAT_09) || defined(COMPAT_43) 288 /* 289 * Historically, filesystem types were identified by numbers. 290 * If we get an integer for the filesystem type instead of a 291 * string, we check to see if it matches one of the historic 292 * filesystem types. 293 */ 294 u_long fsindex = (u_long)SCARG(uap, type); 295 if (fsindex >= nmountcompatnames || 296 mountcompatnames[fsindex] == NULL) { 297 vput(vp); 298 return (ENODEV); 299 } 300 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN); 301 #else 302 vput(vp); 303 return (error); 304 #endif 305 } 306 #ifdef COMPAT_10 307 /* Accept `ufs' as an alias for `ffs'. */ 308 if (!strncmp(fstypename, "ufs", MFSNAMELEN)) 309 strncpy(fstypename, "ffs", MFSNAMELEN); 310 #endif 311 if ((vfs = vfs_getopsbyname(fstypename)) == NULL) { 312 vput(vp); 313 return (ENODEV); 314 } 315 if (vp->v_mountedhere != NULL) { 316 vput(vp); 317 return (EBUSY); 318 } 319 320 /* 321 * Allocate and initialize the file system. 322 */ 323 mp = (struct mount *)malloc((u_long)sizeof(struct mount), 324 M_MOUNT, M_WAITOK); 325 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 326 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 327 simple_lock_init(&mp->mnt_slock); 328 (void)vfs_busy(mp, LK_NOWAIT, 0); 329 mp->mnt_op = vfs; 330 vfs->vfs_refcount++; 331 mp->mnt_vnodecovered = vp; 332 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 333 mp->mnt_unmounter = NULL; 334 mp->mnt_leaf = mp; 335 336 /* 337 * The underlying file system may refuse the mount for 338 * various reasons. Allow the user to force it to happen. 339 */ 340 mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE; 341 update: 342 if ((SCARG(uap, flags) & MNT_GETARGS) == 0) { 343 /* 344 * Set the mount level flags. 345 */ 346 if (SCARG(uap, flags) & MNT_RDONLY) 347 mp->mnt_flag |= MNT_RDONLY; 348 else if (mp->mnt_flag & MNT_RDONLY) 349 mp->mnt_iflag |= IMNT_WANTRDWR; 350 mp->mnt_flag &= 351 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 352 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 353 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP); 354 mp->mnt_flag |= SCARG(uap, flags) & 355 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 356 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 357 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 358 MNT_IGNORE); 359 } 360 /* 361 * Mount the filesystem. 362 */ 363 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l); 364 if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) { 365 #if defined(COMPAT_30) && defined(NFSSERVER) 366 if (mp->mnt_flag & MNT_UPDATE && error != 0) { 367 int error2; 368 369 /* Update failed; let's try and see if it was an 370 * export request. */ 371 error2 = nfs_update_exports_30(mp, SCARG(uap, path), 372 SCARG(uap, data), l); 373 374 /* Only update error code if the export request was 375 * understood but some problem occurred while 376 * processing it. */ 377 if (error2 != EJUSTRETURN) 378 error = error2; 379 } 380 #endif 381 if (mp->mnt_iflag & IMNT_WANTRDWR) 382 mp->mnt_flag &= ~MNT_RDONLY; 383 if (error) 384 mp->mnt_flag = flag; 385 mp->mnt_flag &=~ 386 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS); 387 mp->mnt_iflag &=~ IMNT_WANTRDWR; 388 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 389 if (mp->mnt_syncer == NULL) 390 error = vfs_allocate_syncvnode(mp); 391 } else { 392 if (mp->mnt_syncer != NULL) 393 vfs_deallocate_syncvnode(mp); 394 } 395 vfs_unbusy(mp); 396 VOP_UNLOCK(vp, 0); 397 vrele(vp); 398 return (error); 399 } 400 /* 401 * Put the new filesystem on the mount list after root. 402 */ 403 cache_purge(vp); 404 if (!error) { 405 mp->mnt_flag &=~ 406 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS); 407 mp->mnt_iflag &=~ IMNT_WANTRDWR; 408 vp->v_mountedhere = mp; 409 simple_lock(&mountlist_slock); 410 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 411 simple_unlock(&mountlist_slock); 412 checkdirs(vp); 413 VOP_UNLOCK(vp, 0); 414 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 415 error = vfs_allocate_syncvnode(mp); 416 vfs_unbusy(mp); 417 (void) VFS_STATVFS(mp, &mp->mnt_stat, l); 418 if ((error = VFS_START(mp, 0, l))) 419 vrele(vp); 420 } else { 421 vp->v_mountedhere = (struct mount *)0; 422 vfs->vfs_refcount--; 423 vfs_unbusy(mp); 424 free(mp, M_MOUNT); 425 vput(vp); 426 } 427 return (error); 428 } 429 430 /* 431 * Scan all active processes to see if any of them have a current 432 * or root directory onto which the new filesystem has just been 433 * mounted. If so, replace them with the new mount point. 434 */ 435 void 436 checkdirs(struct vnode *olddp) 437 { 438 struct cwdinfo *cwdi; 439 struct vnode *newdp; 440 struct proc *p; 441 442 if (olddp->v_usecount == 1) 443 return; 444 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 445 panic("mount: lost mount"); 446 proclist_lock_read(); 447 PROCLIST_FOREACH(p, &allproc) { 448 cwdi = p->p_cwdi; 449 if (!cwdi) 450 continue; 451 if (cwdi->cwdi_cdir == olddp) { 452 vrele(cwdi->cwdi_cdir); 453 VREF(newdp); 454 cwdi->cwdi_cdir = newdp; 455 } 456 if (cwdi->cwdi_rdir == olddp) { 457 vrele(cwdi->cwdi_rdir); 458 VREF(newdp); 459 cwdi->cwdi_rdir = newdp; 460 } 461 } 462 proclist_unlock_read(); 463 if (rootvnode == olddp) { 464 vrele(rootvnode); 465 VREF(newdp); 466 rootvnode = newdp; 467 } 468 vput(newdp); 469 } 470 471 /* 472 * Unmount a file system. 473 * 474 * Note: unmount takes a path to the vnode mounted on as argument, 475 * not special file (as before). 476 */ 477 /* ARGSUSED */ 478 int 479 sys_unmount(struct lwp *l, void *v, register_t *retval) 480 { 481 struct sys_unmount_args /* { 482 syscallarg(const char *) path; 483 syscallarg(int) flags; 484 } */ *uap = v; 485 struct vnode *vp; 486 struct mount *mp; 487 int error; 488 struct nameidata nd; 489 490 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 491 SCARG(uap, path), l); 492 if ((error = namei(&nd)) != 0) 493 return (error); 494 vp = nd.ni_vp; 495 mp = vp->v_mount; 496 497 /* 498 * Only root, or the user that did the original mount is 499 * permitted to unmount this filesystem. 500 */ 501 if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred)) && 502 (error = kauth_authorize_generic(l->l_cred, 503 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) { 504 vput(vp); 505 return (error); 506 } 507 508 /* 509 * Don't allow unmounting the root file system. 510 */ 511 if (mp->mnt_flag & MNT_ROOTFS) { 512 vput(vp); 513 return (EINVAL); 514 } 515 516 /* 517 * Must be the root of the filesystem 518 */ 519 if ((vp->v_flag & VROOT) == 0) { 520 vput(vp); 521 return (EINVAL); 522 } 523 vput(vp); 524 525 /* 526 * XXX Freeze syncer. Must do this before locking the 527 * mount point. See dounmount() for details. 528 */ 529 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 530 531 if (vfs_busy(mp, 0, 0)) { 532 lockmgr(&syncer_lock, LK_RELEASE, NULL); 533 return (EBUSY); 534 } 535 536 return (dounmount(mp, SCARG(uap, flags), l)); 537 } 538 539 /* 540 * Do the actual file system unmount. File system is assumed to have been 541 * marked busy by the caller. 542 */ 543 int 544 dounmount(struct mount *mp, int flags, struct lwp *l) 545 { 546 struct vnode *coveredvp; 547 int error; 548 int async; 549 int used_syncer; 550 551 #if NVERIEXEC > 0 552 if (!doing_shutdown) { 553 if (veriexec_strict >= VERIEXEC_LOCKDOWN) { 554 printf("Veriexec: Lockdown mode, preventing unmount of" 555 " \"%s\". (uid=%u)\n", mp->mnt_stat.f_mntonname, 556 kauth_cred_getuid(l->l_cred)); 557 return (EPERM); 558 } 559 560 if (veriexec_strict == VERIEXEC_IPS) { 561 struct veriexec_table_entry *vte; 562 563 /* Check if we have fingerprints on mount. */ 564 vte = fileassoc_tabledata_lookup(mp, veriexec_hook); 565 if ((vte != NULL) && (vte->vte_count > 0)) { 566 printf("Veriexec: IPS mode, preventing unmount" 567 " of \"%s\" with monitored files. " 568 "(uid=%u)\n", mp->mnt_stat.f_mntonname, 569 kauth_cred_getuid(l->l_cred)); 570 return (EPERM); 571 } 572 } 573 } 574 #endif /* NVERIEXEC > 0 */ 575 576 #ifdef FILEASSOC 577 (void)fileassoc_table_delete(mp); 578 #endif /* FILEASSOC */ 579 580 simple_lock(&mountlist_slock); 581 vfs_unbusy(mp); 582 used_syncer = (mp->mnt_syncer != NULL); 583 584 /* 585 * XXX Syncer must be frozen when we get here. This should really 586 * be done on a per-mountpoint basis, but especially the softdep 587 * code possibly called from the syncer doesn't exactly work on a 588 * per-mountpoint basis, so the softdep code would become a maze 589 * of vfs_busy() calls. 590 * 591 * The caller of dounmount() must acquire syncer_lock because 592 * the syncer itself acquires locks in syncer_lock -> vfs_busy 593 * order, and we must preserve that order to avoid deadlock. 594 * 595 * So, if the file system did not use the syncer, now is 596 * the time to release the syncer_lock. 597 */ 598 if (used_syncer == 0) 599 lockmgr(&syncer_lock, LK_RELEASE, NULL); 600 601 mp->mnt_iflag |= IMNT_UNMOUNT; 602 mp->mnt_unmounter = l; 603 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock); 604 vn_start_write(NULL, &mp, V_WAIT); 605 606 async = mp->mnt_flag & MNT_ASYNC; 607 mp->mnt_flag &= ~MNT_ASYNC; 608 cache_purgevfs(mp); /* remove cache entries for this file sys */ 609 if (mp->mnt_syncer != NULL) 610 vfs_deallocate_syncvnode(mp); 611 error = 0; 612 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 613 #if NFSS > 0 614 error = fss_umount_hook(mp, (flags & MNT_FORCE)); 615 #endif 616 if (error == 0) 617 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l); 618 } 619 if (error == 0 || (flags & MNT_FORCE)) 620 error = VFS_UNMOUNT(mp, flags, l); 621 vn_finished_write(mp, 0); 622 simple_lock(&mountlist_slock); 623 if (error) { 624 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 625 (void) vfs_allocate_syncvnode(mp); 626 mp->mnt_iflag &= ~IMNT_UNMOUNT; 627 mp->mnt_unmounter = NULL; 628 mp->mnt_flag |= async; 629 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, 630 &mountlist_slock); 631 if (used_syncer) 632 lockmgr(&syncer_lock, LK_RELEASE, NULL); 633 simple_lock(&mp->mnt_slock); 634 while (mp->mnt_wcnt > 0) { 635 wakeup(mp); 636 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", 637 0, &mp->mnt_slock); 638 } 639 simple_unlock(&mp->mnt_slock); 640 return (error); 641 } 642 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 643 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 644 coveredvp->v_mountedhere = NULL; 645 vrele(coveredvp); 646 } 647 mp->mnt_op->vfs_refcount--; 648 if (LIST_FIRST(&mp->mnt_vnodelist) != NULL) 649 panic("unmount: dangling vnode"); 650 mp->mnt_iflag |= IMNT_GONE; 651 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock); 652 if (used_syncer) 653 lockmgr(&syncer_lock, LK_RELEASE, NULL); 654 simple_lock(&mp->mnt_slock); 655 while (mp->mnt_wcnt > 0) { 656 wakeup(mp); 657 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock); 658 } 659 simple_unlock(&mp->mnt_slock); 660 vfs_hooks_unmount(mp); 661 free(mp, M_MOUNT); 662 return (0); 663 } 664 665 /* 666 * Sync each mounted filesystem. 667 */ 668 #ifdef DEBUG 669 int syncprt = 0; 670 struct ctldebug debug0 = { "syncprt", &syncprt }; 671 #endif 672 673 /* ARGSUSED */ 674 int 675 sys_sync(struct lwp *l, void *v, register_t *retval) 676 { 677 struct mount *mp, *nmp; 678 int asyncflag; 679 680 if (l == NULL) 681 l = &lwp0; 682 683 simple_lock(&mountlist_slock); 684 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 685 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 686 nmp = mp->mnt_list.cqe_prev; 687 continue; 688 } 689 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 690 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 691 asyncflag = mp->mnt_flag & MNT_ASYNC; 692 mp->mnt_flag &= ~MNT_ASYNC; 693 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l); 694 if (asyncflag) 695 mp->mnt_flag |= MNT_ASYNC; 696 vn_finished_write(mp, 0); 697 } 698 simple_lock(&mountlist_slock); 699 nmp = mp->mnt_list.cqe_prev; 700 vfs_unbusy(mp); 701 702 } 703 simple_unlock(&mountlist_slock); 704 #ifdef DEBUG 705 if (syncprt) 706 vfs_bufstats(); 707 #endif /* DEBUG */ 708 return (0); 709 } 710 711 /* 712 * Change filesystem quotas. 713 */ 714 /* ARGSUSED */ 715 int 716 sys_quotactl(struct lwp *l, void *v, register_t *retval) 717 { 718 struct sys_quotactl_args /* { 719 syscallarg(const char *) path; 720 syscallarg(int) cmd; 721 syscallarg(int) uid; 722 syscallarg(caddr_t) arg; 723 } */ *uap = v; 724 struct mount *mp; 725 int error; 726 struct nameidata nd; 727 728 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 729 if ((error = namei(&nd)) != 0) 730 return (error); 731 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH); 732 vrele(nd.ni_vp); 733 if (error) 734 return (error); 735 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 736 SCARG(uap, arg), l); 737 vn_finished_write(mp, 0); 738 return (error); 739 } 740 741 int 742 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 743 int root) 744 { 745 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 746 int error = 0; 747 748 /* 749 * If MNT_NOWAIT or MNT_LAZY is specified, do not 750 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 751 * overrides MNT_NOWAIT. 752 */ 753 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 754 (flags != MNT_WAIT && flags != 0)) { 755 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 756 goto done; 757 } 758 759 /* Get the filesystem stats now */ 760 memset(sp, 0, sizeof(*sp)); 761 if ((error = VFS_STATVFS(mp, sp, l)) != 0) { 762 return error; 763 } 764 765 if (cwdi->cwdi_rdir == NULL) 766 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 767 done: 768 if (cwdi->cwdi_rdir != NULL) { 769 size_t len; 770 char *bp; 771 char *path = PNBUF_GET(); 772 if (!path) 773 return ENOMEM; 774 775 bp = path + MAXPATHLEN; 776 *--bp = '\0'; 777 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 778 MAXPATHLEN / 2, 0, l); 779 if (error) { 780 PNBUF_PUT(path); 781 return error; 782 } 783 len = strlen(bp); 784 /* 785 * for mount points that are below our root, we can see 786 * them, so we fix up the pathname and return them. The 787 * rest we cannot see, so we don't allow viewing the 788 * data. 789 */ 790 if (strncmp(bp, sp->f_mntonname, len) == 0) { 791 strlcpy(sp->f_mntonname, &sp->f_mntonname[len], 792 sizeof(sp->f_mntonname)); 793 if (sp->f_mntonname[0] == '\0') 794 (void)strlcpy(sp->f_mntonname, "/", 795 sizeof(sp->f_mntonname)); 796 } else { 797 if (root) 798 (void)strlcpy(sp->f_mntonname, "/", 799 sizeof(sp->f_mntonname)); 800 else 801 error = EPERM; 802 } 803 PNBUF_PUT(path); 804 } 805 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 806 return error; 807 } 808 809 /* 810 * Get filesystem statistics. 811 */ 812 /* ARGSUSED */ 813 int 814 sys_statvfs1(struct lwp *l, void *v, register_t *retval) 815 { 816 struct sys_statvfs1_args /* { 817 syscallarg(const char *) path; 818 syscallarg(struct statvfs *) buf; 819 syscallarg(int) flags; 820 } */ *uap = v; 821 struct mount *mp; 822 struct statvfs *sb; 823 int error; 824 struct nameidata nd; 825 826 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 827 if ((error = namei(&nd)) != 0) 828 return error; 829 mp = nd.ni_vp->v_mount; 830 vrele(nd.ni_vp); 831 sb = STATVFSBUF_GET(); 832 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1); 833 if (error == 0) { 834 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 835 } 836 STATVFSBUF_PUT(sb); 837 return error; 838 } 839 840 /* 841 * Get filesystem statistics. 842 */ 843 /* ARGSUSED */ 844 int 845 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval) 846 { 847 struct sys_fstatvfs1_args /* { 848 syscallarg(int) fd; 849 syscallarg(struct statvfs *) buf; 850 syscallarg(int) flags; 851 } */ *uap = v; 852 struct proc *p = l->l_proc; 853 struct file *fp; 854 struct mount *mp; 855 struct statvfs *sb; 856 int error; 857 858 /* getvnode() will use the descriptor for us */ 859 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 860 return (error); 861 mp = ((struct vnode *)fp->f_data)->v_mount; 862 sb = STATVFSBUF_GET(); 863 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0) 864 goto out; 865 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 866 out: 867 FILE_UNUSE(fp, l); 868 STATVFSBUF_PUT(sb); 869 return error; 870 } 871 872 873 /* 874 * Get statistics on all filesystems. 875 */ 876 int 877 sys_getvfsstat(struct lwp *l, void *v, register_t *retval) 878 { 879 struct sys_getvfsstat_args /* { 880 syscallarg(struct statvfs *) buf; 881 syscallarg(size_t) bufsize; 882 syscallarg(int) flags; 883 } */ *uap = v; 884 int root = 0; 885 struct proc *p = l->l_proc; 886 struct mount *mp, *nmp; 887 struct statvfs *sb; 888 struct statvfs *sfsp; 889 size_t count, maxcount; 890 int error = 0; 891 892 sb = STATVFSBUF_GET(); 893 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs); 894 sfsp = SCARG(uap, buf); 895 simple_lock(&mountlist_slock); 896 count = 0; 897 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 898 mp = nmp) { 899 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 900 nmp = CIRCLEQ_NEXT(mp, mnt_list); 901 continue; 902 } 903 if (sfsp && count < maxcount) { 904 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0); 905 if (error) { 906 simple_lock(&mountlist_slock); 907 nmp = CIRCLEQ_NEXT(mp, mnt_list); 908 vfs_unbusy(mp); 909 continue; 910 } 911 error = copyout(sb, sfsp, sizeof(*sfsp)); 912 if (error) { 913 vfs_unbusy(mp); 914 goto out; 915 } 916 sfsp++; 917 root |= strcmp(sb->f_mntonname, "/") == 0; 918 } 919 count++; 920 simple_lock(&mountlist_slock); 921 nmp = CIRCLEQ_NEXT(mp, mnt_list); 922 vfs_unbusy(mp); 923 } 924 simple_unlock(&mountlist_slock); 925 if (root == 0 && p->p_cwdi->cwdi_rdir) { 926 /* 927 * fake a root entry 928 */ 929 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l, 930 SCARG(uap, flags), 1)) != 0) 931 goto out; 932 if (sfsp) 933 error = copyout(sb, sfsp, sizeof(*sfsp)); 934 count++; 935 } 936 if (sfsp && count > maxcount) 937 *retval = maxcount; 938 else 939 *retval = count; 940 out: 941 STATVFSBUF_PUT(sb); 942 return error; 943 } 944 945 /* 946 * Change current working directory to a given file descriptor. 947 */ 948 /* ARGSUSED */ 949 int 950 sys_fchdir(struct lwp *l, void *v, register_t *retval) 951 { 952 struct sys_fchdir_args /* { 953 syscallarg(int) fd; 954 } */ *uap = v; 955 struct proc *p = l->l_proc; 956 struct filedesc *fdp = p->p_fd; 957 struct cwdinfo *cwdi = p->p_cwdi; 958 struct vnode *vp, *tdp; 959 struct mount *mp; 960 struct file *fp; 961 int error; 962 963 /* getvnode() will use the descriptor for us */ 964 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 965 return (error); 966 vp = (struct vnode *)fp->f_data; 967 968 VREF(vp); 969 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 970 if (vp->v_type != VDIR) 971 error = ENOTDIR; 972 else 973 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 974 while (!error && (mp = vp->v_mountedhere) != NULL) { 975 if (vfs_busy(mp, 0, 0)) 976 continue; 977 error = VFS_ROOT(mp, &tdp); 978 vfs_unbusy(mp); 979 if (error) 980 break; 981 vput(vp); 982 vp = tdp; 983 } 984 if (error) { 985 vput(vp); 986 goto out; 987 } 988 VOP_UNLOCK(vp, 0); 989 990 /* 991 * Disallow changing to a directory not under the process's 992 * current root directory (if there is one). 993 */ 994 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 995 vrele(vp); 996 error = EPERM; /* operation not permitted */ 997 goto out; 998 } 999 1000 vrele(cwdi->cwdi_cdir); 1001 cwdi->cwdi_cdir = vp; 1002 out: 1003 FILE_UNUSE(fp, l); 1004 return (error); 1005 } 1006 1007 /* 1008 * Change this process's notion of the root directory to a given file 1009 * descriptor. 1010 */ 1011 int 1012 sys_fchroot(struct lwp *l, void *v, register_t *retval) 1013 { 1014 struct sys_fchroot_args *uap = v; 1015 struct proc *p = l->l_proc; 1016 struct filedesc *fdp = p->p_fd; 1017 struct cwdinfo *cwdi = p->p_cwdi; 1018 struct vnode *vp; 1019 struct file *fp; 1020 int error; 1021 1022 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1023 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1024 return error; 1025 /* getvnode() will use the descriptor for us */ 1026 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 1027 return error; 1028 vp = (struct vnode *) fp->f_data; 1029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1030 if (vp->v_type != VDIR) 1031 error = ENOTDIR; 1032 else 1033 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 1034 VOP_UNLOCK(vp, 0); 1035 if (error) 1036 goto out; 1037 VREF(vp); 1038 1039 /* 1040 * Prevent escaping from chroot by putting the root under 1041 * the working directory. Silently chdir to / if we aren't 1042 * already there. 1043 */ 1044 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1045 /* 1046 * XXX would be more failsafe to change directory to a 1047 * deadfs node here instead 1048 */ 1049 vrele(cwdi->cwdi_cdir); 1050 VREF(vp); 1051 cwdi->cwdi_cdir = vp; 1052 } 1053 1054 if (cwdi->cwdi_rdir != NULL) 1055 vrele(cwdi->cwdi_rdir); 1056 cwdi->cwdi_rdir = vp; 1057 out: 1058 FILE_UNUSE(fp, l); 1059 return (error); 1060 } 1061 1062 /* 1063 * Change current working directory (``.''). 1064 */ 1065 /* ARGSUSED */ 1066 int 1067 sys_chdir(struct lwp *l, void *v, register_t *retval) 1068 { 1069 struct sys_chdir_args /* { 1070 syscallarg(const char *) path; 1071 } */ *uap = v; 1072 struct proc *p = l->l_proc; 1073 struct cwdinfo *cwdi = p->p_cwdi; 1074 int error; 1075 struct nameidata nd; 1076 1077 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 1078 SCARG(uap, path), l); 1079 if ((error = change_dir(&nd, l)) != 0) 1080 return (error); 1081 vrele(cwdi->cwdi_cdir); 1082 cwdi->cwdi_cdir = nd.ni_vp; 1083 return (0); 1084 } 1085 1086 /* 1087 * Change notion of root (``/'') directory. 1088 */ 1089 /* ARGSUSED */ 1090 int 1091 sys_chroot(struct lwp *l, void *v, register_t *retval) 1092 { 1093 struct sys_chroot_args /* { 1094 syscallarg(const char *) path; 1095 } */ *uap = v; 1096 struct proc *p = l->l_proc; 1097 struct cwdinfo *cwdi = p->p_cwdi; 1098 struct vnode *vp; 1099 int error; 1100 struct nameidata nd; 1101 1102 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1103 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1104 return (error); 1105 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 1106 SCARG(uap, path), l); 1107 if ((error = change_dir(&nd, l)) != 0) 1108 return (error); 1109 if (cwdi->cwdi_rdir != NULL) 1110 vrele(cwdi->cwdi_rdir); 1111 vp = nd.ni_vp; 1112 cwdi->cwdi_rdir = vp; 1113 1114 /* 1115 * Prevent escaping from chroot by putting the root under 1116 * the working directory. Silently chdir to / if we aren't 1117 * already there. 1118 */ 1119 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1120 /* 1121 * XXX would be more failsafe to change directory to a 1122 * deadfs node here instead 1123 */ 1124 vrele(cwdi->cwdi_cdir); 1125 VREF(vp); 1126 cwdi->cwdi_cdir = vp; 1127 } 1128 1129 return (0); 1130 } 1131 1132 /* 1133 * Common routine for chroot and chdir. 1134 */ 1135 static int 1136 change_dir(struct nameidata *ndp, struct lwp *l) 1137 { 1138 struct vnode *vp; 1139 int error; 1140 1141 if ((error = namei(ndp)) != 0) 1142 return (error); 1143 vp = ndp->ni_vp; 1144 if (vp->v_type != VDIR) 1145 error = ENOTDIR; 1146 else 1147 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l); 1148 1149 if (error) 1150 vput(vp); 1151 else 1152 VOP_UNLOCK(vp, 0); 1153 return (error); 1154 } 1155 1156 /* 1157 * Check permissions, allocate an open file structure, 1158 * and call the device open routine if any. 1159 */ 1160 int 1161 sys_open(struct lwp *l, void *v, register_t *retval) 1162 { 1163 struct sys_open_args /* { 1164 syscallarg(const char *) path; 1165 syscallarg(int) flags; 1166 syscallarg(int) mode; 1167 } */ *uap = v; 1168 struct proc *p = l->l_proc; 1169 struct cwdinfo *cwdi = p->p_cwdi; 1170 struct filedesc *fdp = p->p_fd; 1171 struct file *fp; 1172 struct vnode *vp; 1173 int flags, cmode; 1174 int type, indx, error; 1175 struct flock lf; 1176 struct nameidata nd; 1177 1178 flags = FFLAGS(SCARG(uap, flags)); 1179 if ((flags & (FREAD | FWRITE)) == 0) 1180 return (EINVAL); 1181 /* falloc() will use the file descriptor for us */ 1182 if ((error = falloc(l, &fp, &indx)) != 0) 1183 return (error); 1184 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1185 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 1186 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1187 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1188 FILE_UNUSE(fp, l); 1189 fdp->fd_ofiles[indx] = NULL; 1190 ffree(fp); 1191 if ((error == EDUPFD || error == EMOVEFD) && 1192 l->l_dupfd >= 0 && /* XXX from fdopen */ 1193 (error = 1194 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) { 1195 *retval = indx; 1196 return (0); 1197 } 1198 if (error == ERESTART) 1199 error = EINTR; 1200 fdremove(fdp, indx); 1201 return (error); 1202 } 1203 l->l_dupfd = 0; 1204 vp = nd.ni_vp; 1205 fp->f_flag = flags & FMASK; 1206 fp->f_type = DTYPE_VNODE; 1207 fp->f_ops = &vnops; 1208 fp->f_data = vp; 1209 if (flags & (O_EXLOCK | O_SHLOCK)) { 1210 lf.l_whence = SEEK_SET; 1211 lf.l_start = 0; 1212 lf.l_len = 0; 1213 if (flags & O_EXLOCK) 1214 lf.l_type = F_WRLCK; 1215 else 1216 lf.l_type = F_RDLCK; 1217 type = F_FLOCK; 1218 if ((flags & FNONBLOCK) == 0) 1219 type |= F_WAIT; 1220 VOP_UNLOCK(vp, 0); 1221 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1222 if (error) { 1223 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1224 FILE_UNUSE(fp, l); 1225 ffree(fp); 1226 fdremove(fdp, indx); 1227 return (error); 1228 } 1229 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1230 fp->f_flag |= FHASLOCK; 1231 } 1232 VOP_UNLOCK(vp, 0); 1233 *retval = indx; 1234 FILE_SET_MATURE(fp); 1235 FILE_UNUSE(fp, l); 1236 return (0); 1237 } 1238 1239 static void 1240 vfs__fhfree(fhandle_t *fhp) 1241 { 1242 size_t fhsize; 1243 1244 if (fhp == NULL) { 1245 return; 1246 } 1247 fhsize = FHANDLE_SIZE(fhp); 1248 kmem_free(fhp, fhsize); 1249 } 1250 1251 /* 1252 * vfs_composefh: compose a filehandle. 1253 */ 1254 1255 int 1256 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1257 { 1258 struct mount *mp; 1259 struct fid *fidp; 1260 int error; 1261 size_t needfhsize; 1262 size_t fidsize; 1263 1264 mp = vp->v_mount; 1265 if (mp->mnt_op->vfs_vptofh == NULL) { 1266 return EOPNOTSUPP; 1267 } 1268 fidp = NULL; 1269 if (*fh_size < FHANDLE_SIZE_MIN) { 1270 fidsize = 0; 1271 } else { 1272 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1273 if (fhp != NULL) { 1274 memset(fhp, 0, *fh_size); 1275 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1276 fidp = &fhp->fh_fid; 1277 } 1278 } 1279 error = VFS_VPTOFH(vp, fidp, &fidsize); 1280 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1281 if (error == 0 && *fh_size < needfhsize) { 1282 error = E2BIG; 1283 } 1284 *fh_size = needfhsize; 1285 return error; 1286 } 1287 1288 int 1289 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1290 { 1291 struct mount *mp; 1292 fhandle_t *fhp; 1293 size_t fhsize; 1294 size_t fidsize; 1295 int error; 1296 1297 *fhpp = NULL; 1298 mp = vp->v_mount; 1299 if (mp->mnt_op->vfs_vptofh == NULL) { 1300 error = EOPNOTSUPP; 1301 goto out; 1302 } 1303 fidsize = 0; 1304 error = VFS_VPTOFH(vp, NULL, &fidsize); 1305 KASSERT(error != 0); 1306 if (error != E2BIG) { 1307 goto out; 1308 } 1309 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1310 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1311 if (fhp == NULL) { 1312 error = ENOMEM; 1313 goto out; 1314 } 1315 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1316 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1317 if (error == 0) { 1318 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1319 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1320 *fhpp = fhp; 1321 } else { 1322 kmem_free(fhp, fhsize); 1323 } 1324 out: 1325 return error; 1326 } 1327 1328 void 1329 vfs_composefh_free(fhandle_t *fhp) 1330 { 1331 1332 vfs__fhfree(fhp); 1333 } 1334 1335 /* 1336 * vfs_fhtovp: lookup a vnode by a filehandle. 1337 */ 1338 1339 int 1340 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1341 { 1342 struct mount *mp; 1343 int error; 1344 1345 *vpp = NULL; 1346 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1347 if (mp == NULL) { 1348 error = ESTALE; 1349 goto out; 1350 } 1351 if (mp->mnt_op->vfs_fhtovp == NULL) { 1352 error = EOPNOTSUPP; 1353 goto out; 1354 } 1355 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1356 out: 1357 return error; 1358 } 1359 1360 /* 1361 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1362 * the needed size. 1363 */ 1364 1365 int 1366 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1367 { 1368 fhandle_t *fhp; 1369 int error; 1370 1371 *fhpp = NULL; 1372 if (fhsize > FHANDLE_SIZE_MAX) { 1373 return EINVAL; 1374 } 1375 if (fhsize < FHANDLE_SIZE_MIN) { 1376 return EINVAL; 1377 } 1378 again: 1379 fhp = kmem_alloc(fhsize, KM_SLEEP); 1380 if (fhp == NULL) { 1381 return ENOMEM; 1382 } 1383 error = copyin(ufhp, fhp, fhsize); 1384 if (error == 0) { 1385 /* XXX this check shouldn't be here */ 1386 if (FHANDLE_SIZE(fhp) == fhsize) { 1387 *fhpp = fhp; 1388 return 0; 1389 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1390 /* 1391 * a kludge for nfsv2 padded handles. 1392 */ 1393 size_t sz; 1394 1395 sz = FHANDLE_SIZE(fhp); 1396 kmem_free(fhp, fhsize); 1397 fhsize = sz; 1398 goto again; 1399 } else { 1400 /* 1401 * userland told us wrong size. 1402 */ 1403 error = EINVAL; 1404 } 1405 } 1406 kmem_free(fhp, fhsize); 1407 return error; 1408 } 1409 1410 void 1411 vfs_copyinfh_free(fhandle_t *fhp) 1412 { 1413 1414 vfs__fhfree(fhp); 1415 } 1416 1417 /* 1418 * Get file handle system call 1419 */ 1420 int 1421 sys___getfh30(struct lwp *l, void *v, register_t *retval) 1422 { 1423 struct sys___getfh30_args /* { 1424 syscallarg(char *) fname; 1425 syscallarg(fhandle_t *) fhp; 1426 syscallarg(size_t *) fh_size; 1427 } */ *uap = v; 1428 struct vnode *vp; 1429 fhandle_t *fh; 1430 int error; 1431 struct nameidata nd; 1432 size_t sz; 1433 size_t usz; 1434 1435 /* 1436 * Must be super user 1437 */ 1438 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1439 0, NULL, NULL, NULL); 1440 if (error) 1441 return (error); 1442 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 1443 SCARG(uap, fname), l); 1444 error = namei(&nd); 1445 if (error) 1446 return (error); 1447 vp = nd.ni_vp; 1448 error = vfs_composefh_alloc(vp, &fh); 1449 vput(vp); 1450 if (error != 0) { 1451 goto out; 1452 } 1453 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1454 if (error != 0) { 1455 goto out; 1456 } 1457 sz = FHANDLE_SIZE(fh); 1458 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1459 if (error != 0) { 1460 goto out; 1461 } 1462 if (usz >= sz) { 1463 error = copyout(fh, SCARG(uap, fhp), sz); 1464 } else { 1465 error = E2BIG; 1466 } 1467 out: 1468 vfs_composefh_free(fh); 1469 return (error); 1470 } 1471 1472 /* 1473 * Open a file given a file handle. 1474 * 1475 * Check permissions, allocate an open file structure, 1476 * and call the device open routine if any. 1477 */ 1478 1479 int 1480 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1481 register_t *retval) 1482 { 1483 struct filedesc *fdp = l->l_proc->p_fd; 1484 struct file *fp; 1485 struct vnode *vp = NULL; 1486 struct mount *mp; 1487 kauth_cred_t cred = l->l_cred; 1488 struct file *nfp; 1489 int type, indx, error=0; 1490 struct flock lf; 1491 struct vattr va; 1492 fhandle_t *fh; 1493 int flags; 1494 1495 /* 1496 * Must be super user 1497 */ 1498 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1499 0, NULL, NULL, NULL))) 1500 return (error); 1501 1502 flags = FFLAGS(oflags); 1503 if ((flags & (FREAD | FWRITE)) == 0) 1504 return (EINVAL); 1505 if ((flags & O_CREAT)) 1506 return (EINVAL); 1507 /* falloc() will use the file descriptor for us */ 1508 if ((error = falloc(l, &nfp, &indx)) != 0) 1509 return (error); 1510 fp = nfp; 1511 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1512 if (error != 0) { 1513 goto bad; 1514 } 1515 error = vfs_fhtovp(fh, &vp); 1516 if (error != 0) { 1517 goto bad; 1518 } 1519 1520 /* Now do an effective vn_open */ 1521 1522 if (vp->v_type == VSOCK) { 1523 error = EOPNOTSUPP; 1524 goto bad; 1525 } 1526 if (flags & FREAD) { 1527 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0) 1528 goto bad; 1529 } 1530 if (flags & (FWRITE | O_TRUNC)) { 1531 if (vp->v_type == VDIR) { 1532 error = EISDIR; 1533 goto bad; 1534 } 1535 if ((error = vn_writechk(vp)) != 0 || 1536 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0) 1537 goto bad; 1538 } 1539 if (flags & O_TRUNC) { 1540 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 1541 goto bad; 1542 VOP_UNLOCK(vp, 0); /* XXX */ 1543 VOP_LEASE(vp, l, cred, LEASE_WRITE); 1544 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1545 VATTR_NULL(&va); 1546 va.va_size = 0; 1547 error = VOP_SETATTR(vp, &va, cred, l); 1548 vn_finished_write(mp, 0); 1549 if (error) 1550 goto bad; 1551 } 1552 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0) 1553 goto bad; 1554 if (vp->v_type == VREG && 1555 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) { 1556 error = EIO; 1557 goto bad; 1558 } 1559 if (flags & FWRITE) 1560 vp->v_writecount++; 1561 1562 /* done with modified vn_open, now finish what sys_open does. */ 1563 1564 fp->f_flag = flags & FMASK; 1565 fp->f_type = DTYPE_VNODE; 1566 fp->f_ops = &vnops; 1567 fp->f_data = vp; 1568 if (flags & (O_EXLOCK | O_SHLOCK)) { 1569 lf.l_whence = SEEK_SET; 1570 lf.l_start = 0; 1571 lf.l_len = 0; 1572 if (flags & O_EXLOCK) 1573 lf.l_type = F_WRLCK; 1574 else 1575 lf.l_type = F_RDLCK; 1576 type = F_FLOCK; 1577 if ((flags & FNONBLOCK) == 0) 1578 type |= F_WAIT; 1579 VOP_UNLOCK(vp, 0); 1580 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1581 if (error) { 1582 (void) vn_close(vp, fp->f_flag, fp->f_cred, l); 1583 FILE_UNUSE(fp, l); 1584 ffree(fp); 1585 fdremove(fdp, indx); 1586 return (error); 1587 } 1588 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1589 fp->f_flag |= FHASLOCK; 1590 } 1591 VOP_UNLOCK(vp, 0); 1592 *retval = indx; 1593 FILE_SET_MATURE(fp); 1594 FILE_UNUSE(fp, l); 1595 vfs_copyinfh_free(fh); 1596 return (0); 1597 1598 bad: 1599 FILE_UNUSE(fp, l); 1600 ffree(fp); 1601 fdremove(fdp, indx); 1602 if (vp != NULL) 1603 vput(vp); 1604 vfs_copyinfh_free(fh); 1605 return (error); 1606 } 1607 1608 int 1609 sys___fhopen40(struct lwp *l, void *v, register_t *retval) 1610 { 1611 struct sys___fhopen40_args /* { 1612 syscallarg(const void *) fhp; 1613 syscallarg(size_t) fh_size; 1614 syscallarg(int) flags; 1615 } */ *uap = v; 1616 1617 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1618 SCARG(uap, flags), retval); 1619 } 1620 1621 int 1622 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp, 1623 register_t *retval) 1624 { 1625 struct stat sb; 1626 int error; 1627 fhandle_t *fh; 1628 struct vnode *vp; 1629 1630 /* 1631 * Must be super user 1632 */ 1633 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1634 0, NULL, NULL, NULL))) 1635 return (error); 1636 1637 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1638 if (error != 0) { 1639 goto bad; 1640 } 1641 error = vfs_fhtovp(fh, &vp); 1642 if (error != 0) { 1643 goto bad; 1644 } 1645 error = vn_stat(vp, &sb, l); 1646 vput(vp); 1647 if (error) { 1648 goto bad; 1649 } 1650 error = copyout(&sb, sbp, sizeof(sb)); 1651 bad: 1652 vfs_copyinfh_free(fh); 1653 return error; 1654 } 1655 1656 1657 /* ARGSUSED */ 1658 int 1659 sys___fhstat40(struct lwp *l, void *v, register_t *retval) 1660 { 1661 struct sys___fhstat40_args /* { 1662 syscallarg(const void *) fhp; 1663 syscallarg(size_t) fh_size; 1664 syscallarg(struct stat *) sb; 1665 } */ *uap = v; 1666 1667 return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb), 1668 retval); 1669 } 1670 1671 int 1672 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf, 1673 int flags, register_t *retval) 1674 { 1675 struct statvfs *sb = NULL; 1676 fhandle_t *fh; 1677 struct mount *mp; 1678 struct vnode *vp; 1679 int error; 1680 1681 /* 1682 * Must be super user 1683 */ 1684 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1685 0, NULL, NULL, NULL))) 1686 return error; 1687 1688 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1689 if (error != 0) { 1690 goto out; 1691 } 1692 error = vfs_fhtovp(fh, &vp); 1693 if (error != 0) { 1694 goto out; 1695 } 1696 mp = vp->v_mount; 1697 sb = STATVFSBUF_GET(); 1698 if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) { 1699 vput(vp); 1700 goto out; 1701 } 1702 vput(vp); 1703 error = copyout(sb, buf, sizeof(*sb)); 1704 out: 1705 if (sb != NULL) { 1706 STATVFSBUF_PUT(sb); 1707 } 1708 vfs_copyinfh_free(fh); 1709 return error; 1710 } 1711 1712 /* ARGSUSED */ 1713 int 1714 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval) 1715 { 1716 struct sys___fhstatvfs140_args /* { 1717 syscallarg(const void *) fhp; 1718 syscallarg(size_t) fh_size; 1719 syscallarg(struct statvfs *) buf; 1720 syscallarg(int) flags; 1721 } */ *uap = v; 1722 1723 return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1724 SCARG(uap, buf), SCARG(uap, flags), retval); 1725 } 1726 1727 /* 1728 * Create a special file. 1729 */ 1730 /* ARGSUSED */ 1731 int 1732 sys_mknod(struct lwp *l, void *v, register_t *retval) 1733 { 1734 struct sys_mknod_args /* { 1735 syscallarg(const char *) path; 1736 syscallarg(int) mode; 1737 syscallarg(int) dev; 1738 } */ *uap = v; 1739 struct proc *p = l->l_proc; 1740 struct vnode *vp; 1741 struct mount *mp; 1742 struct vattr vattr; 1743 int error; 1744 int whiteout = 0; 1745 struct nameidata nd; 1746 1747 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1748 0, NULL, NULL, NULL)) != 0) 1749 return (error); 1750 restart: 1751 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l); 1752 if ((error = namei(&nd)) != 0) 1753 return (error); 1754 vp = nd.ni_vp; 1755 if (vp != NULL) 1756 error = EEXIST; 1757 else { 1758 VATTR_NULL(&vattr); 1759 vattr.va_mode = 1760 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1761 vattr.va_rdev = SCARG(uap, dev); 1762 whiteout = 0; 1763 1764 switch (SCARG(uap, mode) & S_IFMT) { 1765 case S_IFMT: /* used by badsect to flag bad sectors */ 1766 vattr.va_type = VBAD; 1767 break; 1768 case S_IFCHR: 1769 vattr.va_type = VCHR; 1770 break; 1771 case S_IFBLK: 1772 vattr.va_type = VBLK; 1773 break; 1774 case S_IFWHT: 1775 whiteout = 1; 1776 break; 1777 default: 1778 error = EINVAL; 1779 break; 1780 } 1781 } 1782 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1783 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1784 if (nd.ni_dvp == vp) 1785 vrele(nd.ni_dvp); 1786 else 1787 vput(nd.ni_dvp); 1788 if (vp) 1789 vrele(vp); 1790 if ((error = vn_start_write(NULL, &mp, 1791 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 1792 return (error); 1793 goto restart; 1794 } 1795 if (!error) { 1796 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1797 if (whiteout) { 1798 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1799 if (error) 1800 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1801 vput(nd.ni_dvp); 1802 } else { 1803 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1804 &nd.ni_cnd, &vattr); 1805 if (error == 0) 1806 vput(nd.ni_vp); 1807 } 1808 } else { 1809 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1810 if (nd.ni_dvp == vp) 1811 vrele(nd.ni_dvp); 1812 else 1813 vput(nd.ni_dvp); 1814 if (vp) 1815 vrele(vp); 1816 } 1817 vn_finished_write(mp, 0); 1818 return (error); 1819 } 1820 1821 /* 1822 * Create a named pipe. 1823 */ 1824 /* ARGSUSED */ 1825 int 1826 sys_mkfifo(struct lwp *l, void *v, register_t *retval) 1827 { 1828 struct sys_mkfifo_args /* { 1829 syscallarg(const char *) path; 1830 syscallarg(int) mode; 1831 } */ *uap = v; 1832 struct proc *p = l->l_proc; 1833 struct mount *mp; 1834 struct vattr vattr; 1835 int error; 1836 struct nameidata nd; 1837 1838 restart: 1839 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l); 1840 if ((error = namei(&nd)) != 0) 1841 return (error); 1842 if (nd.ni_vp != NULL) { 1843 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1844 if (nd.ni_dvp == nd.ni_vp) 1845 vrele(nd.ni_dvp); 1846 else 1847 vput(nd.ni_dvp); 1848 vrele(nd.ni_vp); 1849 return (EEXIST); 1850 } 1851 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1852 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1853 if (nd.ni_dvp == nd.ni_vp) 1854 vrele(nd.ni_dvp); 1855 else 1856 vput(nd.ni_dvp); 1857 if (nd.ni_vp) 1858 vrele(nd.ni_vp); 1859 if ((error = vn_start_write(NULL, &mp, 1860 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 1861 return (error); 1862 goto restart; 1863 } 1864 VATTR_NULL(&vattr); 1865 vattr.va_type = VFIFO; 1866 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1867 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1868 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1869 if (error == 0) 1870 vput(nd.ni_vp); 1871 vn_finished_write(mp, 0); 1872 return (error); 1873 } 1874 1875 /* 1876 * Make a hard file link. 1877 */ 1878 /* ARGSUSED */ 1879 int 1880 sys_link(struct lwp *l, void *v, register_t *retval) 1881 { 1882 struct sys_link_args /* { 1883 syscallarg(const char *) path; 1884 syscallarg(const char *) link; 1885 } */ *uap = v; 1886 struct vnode *vp; 1887 struct mount *mp; 1888 struct nameidata nd; 1889 int error; 1890 1891 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 1892 if ((error = namei(&nd)) != 0) 1893 return (error); 1894 vp = nd.ni_vp; 1895 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 1896 vrele(vp); 1897 return (error); 1898 } 1899 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l); 1900 if ((error = namei(&nd)) != 0) 1901 goto out; 1902 if (nd.ni_vp) { 1903 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1904 if (nd.ni_dvp == nd.ni_vp) 1905 vrele(nd.ni_dvp); 1906 else 1907 vput(nd.ni_dvp); 1908 vrele(nd.ni_vp); 1909 error = EEXIST; 1910 goto out; 1911 } 1912 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1913 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 1914 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1915 out: 1916 vrele(vp); 1917 vn_finished_write(mp, 0); 1918 return (error); 1919 } 1920 1921 /* 1922 * Make a symbolic link. 1923 */ 1924 /* ARGSUSED */ 1925 int 1926 sys_symlink(struct lwp *l, void *v, register_t *retval) 1927 { 1928 struct sys_symlink_args /* { 1929 syscallarg(const char *) path; 1930 syscallarg(const char *) link; 1931 } */ *uap = v; 1932 struct proc *p = l->l_proc; 1933 struct mount *mp; 1934 struct vattr vattr; 1935 char *path; 1936 int error; 1937 struct nameidata nd; 1938 1939 path = PNBUF_GET(); 1940 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 1941 if (error) 1942 goto out; 1943 restart: 1944 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l); 1945 if ((error = namei(&nd)) != 0) 1946 goto out; 1947 if (nd.ni_vp) { 1948 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1949 if (nd.ni_dvp == nd.ni_vp) 1950 vrele(nd.ni_dvp); 1951 else 1952 vput(nd.ni_dvp); 1953 vrele(nd.ni_vp); 1954 error = EEXIST; 1955 goto out; 1956 } 1957 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1958 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1959 if (nd.ni_dvp == nd.ni_vp) 1960 vrele(nd.ni_dvp); 1961 else 1962 vput(nd.ni_dvp); 1963 if ((error = vn_start_write(NULL, &mp, 1964 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 1965 return (error); 1966 goto restart; 1967 } 1968 VATTR_NULL(&vattr); 1969 vattr.va_type = VLNK; 1970 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 1971 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 1972 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 1973 if (error == 0) 1974 vput(nd.ni_vp); 1975 vn_finished_write(mp, 0); 1976 out: 1977 PNBUF_PUT(path); 1978 return (error); 1979 } 1980 1981 /* 1982 * Delete a whiteout from the filesystem. 1983 */ 1984 /* ARGSUSED */ 1985 int 1986 sys_undelete(struct lwp *l, void *v, register_t *retval) 1987 { 1988 struct sys_undelete_args /* { 1989 syscallarg(const char *) path; 1990 } */ *uap = v; 1991 int error; 1992 struct mount *mp; 1993 struct nameidata nd; 1994 1995 restart: 1996 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, 1997 SCARG(uap, path), l); 1998 error = namei(&nd); 1999 if (error) 2000 return (error); 2001 2002 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2003 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2004 if (nd.ni_dvp == nd.ni_vp) 2005 vrele(nd.ni_dvp); 2006 else 2007 vput(nd.ni_dvp); 2008 if (nd.ni_vp) 2009 vrele(nd.ni_vp); 2010 return (EEXIST); 2011 } 2012 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2013 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2014 if (nd.ni_dvp == nd.ni_vp) 2015 vrele(nd.ni_dvp); 2016 else 2017 vput(nd.ni_dvp); 2018 if ((error = vn_start_write(NULL, &mp, 2019 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 2020 return (error); 2021 goto restart; 2022 } 2023 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 2024 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2025 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2026 vput(nd.ni_dvp); 2027 vn_finished_write(mp, 0); 2028 return (error); 2029 } 2030 2031 /* 2032 * Delete a name from the filesystem. 2033 */ 2034 /* ARGSUSED */ 2035 int 2036 sys_unlink(struct lwp *l, void *v, register_t *retval) 2037 { 2038 struct sys_unlink_args /* { 2039 syscallarg(const char *) path; 2040 } */ *uap = v; 2041 struct mount *mp; 2042 struct vnode *vp; 2043 int error; 2044 struct nameidata nd; 2045 2046 restart: 2047 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, 2048 SCARG(uap, path), l); 2049 if ((error = namei(&nd)) != 0) 2050 return (error); 2051 vp = nd.ni_vp; 2052 2053 /* 2054 * The root of a mounted filesystem cannot be deleted. 2055 */ 2056 if (vp->v_flag & VROOT) { 2057 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2058 if (nd.ni_dvp == vp) 2059 vrele(nd.ni_dvp); 2060 else 2061 vput(nd.ni_dvp); 2062 vput(vp); 2063 error = EBUSY; 2064 goto out; 2065 } 2066 2067 #if NVERIEXEC > 0 2068 /* Handle remove requests for veriexec entries. */ 2069 if ((error = veriexec_removechk(vp, nd.ni_dirp, l)) != 0) { 2070 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2071 if (nd.ni_dvp == vp) 2072 vrele(nd.ni_dvp); 2073 else 2074 vput(nd.ni_dvp); 2075 vput(vp); 2076 goto out; 2077 } 2078 #endif /* NVERIEXEC > 0 */ 2079 2080 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2081 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2082 if (nd.ni_dvp == vp) 2083 vrele(nd.ni_dvp); 2084 else 2085 vput(nd.ni_dvp); 2086 vput(vp); 2087 if ((error = vn_start_write(NULL, &mp, 2088 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 2089 return (error); 2090 goto restart; 2091 } 2092 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 2093 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2094 #ifdef FILEASSOC 2095 (void)fileassoc_file_delete(nd.ni_vp); 2096 #endif /* FILEASSOC */ 2097 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2098 vn_finished_write(mp, 0); 2099 out: 2100 return (error); 2101 } 2102 2103 /* 2104 * Reposition read/write file offset. 2105 */ 2106 int 2107 sys_lseek(struct lwp *l, void *v, register_t *retval) 2108 { 2109 struct sys_lseek_args /* { 2110 syscallarg(int) fd; 2111 syscallarg(int) pad; 2112 syscallarg(off_t) offset; 2113 syscallarg(int) whence; 2114 } */ *uap = v; 2115 struct proc *p = l->l_proc; 2116 kauth_cred_t cred = l->l_cred; 2117 struct filedesc *fdp = p->p_fd; 2118 struct file *fp; 2119 struct vnode *vp; 2120 struct vattr vattr; 2121 off_t newoff; 2122 int error; 2123 2124 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 2125 return (EBADF); 2126 2127 FILE_USE(fp); 2128 2129 vp = (struct vnode *)fp->f_data; 2130 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2131 error = ESPIPE; 2132 goto out; 2133 } 2134 2135 switch (SCARG(uap, whence)) { 2136 case SEEK_CUR: 2137 newoff = fp->f_offset + SCARG(uap, offset); 2138 break; 2139 case SEEK_END: 2140 error = VOP_GETATTR(vp, &vattr, cred, l); 2141 if (error) 2142 goto out; 2143 newoff = SCARG(uap, offset) + vattr.va_size; 2144 break; 2145 case SEEK_SET: 2146 newoff = SCARG(uap, offset); 2147 break; 2148 default: 2149 error = EINVAL; 2150 goto out; 2151 } 2152 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0) 2153 goto out; 2154 2155 *(off_t *)retval = fp->f_offset = newoff; 2156 out: 2157 FILE_UNUSE(fp, l); 2158 return (error); 2159 } 2160 2161 /* 2162 * Positional read system call. 2163 */ 2164 int 2165 sys_pread(struct lwp *l, void *v, register_t *retval) 2166 { 2167 struct sys_pread_args /* { 2168 syscallarg(int) fd; 2169 syscallarg(void *) buf; 2170 syscallarg(size_t) nbyte; 2171 syscallarg(off_t) offset; 2172 } */ *uap = v; 2173 struct proc *p = l->l_proc; 2174 struct filedesc *fdp = p->p_fd; 2175 struct file *fp; 2176 struct vnode *vp; 2177 off_t offset; 2178 int error, fd = SCARG(uap, fd); 2179 2180 if ((fp = fd_getfile(fdp, fd)) == NULL) 2181 return (EBADF); 2182 2183 if ((fp->f_flag & FREAD) == 0) { 2184 simple_unlock(&fp->f_slock); 2185 return (EBADF); 2186 } 2187 2188 FILE_USE(fp); 2189 2190 vp = (struct vnode *)fp->f_data; 2191 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2192 error = ESPIPE; 2193 goto out; 2194 } 2195 2196 offset = SCARG(uap, offset); 2197 2198 /* 2199 * XXX This works because no file systems actually 2200 * XXX take any action on the seek operation. 2201 */ 2202 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2203 goto out; 2204 2205 /* dofileread() will unuse the descriptor for us */ 2206 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2207 &offset, 0, retval)); 2208 2209 out: 2210 FILE_UNUSE(fp, l); 2211 return (error); 2212 } 2213 2214 /* 2215 * Positional scatter read system call. 2216 */ 2217 int 2218 sys_preadv(struct lwp *l, void *v, register_t *retval) 2219 { 2220 struct sys_preadv_args /* { 2221 syscallarg(int) fd; 2222 syscallarg(const struct iovec *) iovp; 2223 syscallarg(int) iovcnt; 2224 syscallarg(off_t) offset; 2225 } */ *uap = v; 2226 struct proc *p = l->l_proc; 2227 struct filedesc *fdp = p->p_fd; 2228 struct file *fp; 2229 struct vnode *vp; 2230 off_t offset; 2231 int error, fd = SCARG(uap, fd); 2232 2233 if ((fp = fd_getfile(fdp, fd)) == NULL) 2234 return (EBADF); 2235 2236 if ((fp->f_flag & FREAD) == 0) { 2237 simple_unlock(&fp->f_slock); 2238 return (EBADF); 2239 } 2240 2241 FILE_USE(fp); 2242 2243 vp = (struct vnode *)fp->f_data; 2244 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2245 error = ESPIPE; 2246 goto out; 2247 } 2248 2249 offset = SCARG(uap, offset); 2250 2251 /* 2252 * XXX This works because no file systems actually 2253 * XXX take any action on the seek operation. 2254 */ 2255 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2256 goto out; 2257 2258 /* dofilereadv() will unuse the descriptor for us */ 2259 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 2260 &offset, 0, retval)); 2261 2262 out: 2263 FILE_UNUSE(fp, l); 2264 return (error); 2265 } 2266 2267 /* 2268 * Positional write system call. 2269 */ 2270 int 2271 sys_pwrite(struct lwp *l, void *v, register_t *retval) 2272 { 2273 struct sys_pwrite_args /* { 2274 syscallarg(int) fd; 2275 syscallarg(const void *) buf; 2276 syscallarg(size_t) nbyte; 2277 syscallarg(off_t) offset; 2278 } */ *uap = v; 2279 struct proc *p = l->l_proc; 2280 struct filedesc *fdp = p->p_fd; 2281 struct file *fp; 2282 struct vnode *vp; 2283 off_t offset; 2284 int error, fd = SCARG(uap, fd); 2285 2286 if ((fp = fd_getfile(fdp, fd)) == NULL) 2287 return (EBADF); 2288 2289 if ((fp->f_flag & FWRITE) == 0) { 2290 simple_unlock(&fp->f_slock); 2291 return (EBADF); 2292 } 2293 2294 FILE_USE(fp); 2295 2296 vp = (struct vnode *)fp->f_data; 2297 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2298 error = ESPIPE; 2299 goto out; 2300 } 2301 2302 offset = SCARG(uap, offset); 2303 2304 /* 2305 * XXX This works because no file systems actually 2306 * XXX take any action on the seek operation. 2307 */ 2308 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2309 goto out; 2310 2311 /* dofilewrite() will unuse the descriptor for us */ 2312 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2313 &offset, 0, retval)); 2314 2315 out: 2316 FILE_UNUSE(fp, l); 2317 return (error); 2318 } 2319 2320 /* 2321 * Positional gather write system call. 2322 */ 2323 int 2324 sys_pwritev(struct lwp *l, void *v, register_t *retval) 2325 { 2326 struct sys_pwritev_args /* { 2327 syscallarg(int) fd; 2328 syscallarg(const struct iovec *) iovp; 2329 syscallarg(int) iovcnt; 2330 syscallarg(off_t) offset; 2331 } */ *uap = v; 2332 struct proc *p = l->l_proc; 2333 struct filedesc *fdp = p->p_fd; 2334 struct file *fp; 2335 struct vnode *vp; 2336 off_t offset; 2337 int error, fd = SCARG(uap, fd); 2338 2339 if ((fp = fd_getfile(fdp, fd)) == NULL) 2340 return (EBADF); 2341 2342 if ((fp->f_flag & FWRITE) == 0) { 2343 simple_unlock(&fp->f_slock); 2344 return (EBADF); 2345 } 2346 2347 FILE_USE(fp); 2348 2349 vp = (struct vnode *)fp->f_data; 2350 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2351 error = ESPIPE; 2352 goto out; 2353 } 2354 2355 offset = SCARG(uap, offset); 2356 2357 /* 2358 * XXX This works because no file systems actually 2359 * XXX take any action on the seek operation. 2360 */ 2361 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2362 goto out; 2363 2364 /* dofilewritev() will unuse the descriptor for us */ 2365 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 2366 &offset, 0, retval)); 2367 2368 out: 2369 FILE_UNUSE(fp, l); 2370 return (error); 2371 } 2372 2373 /* 2374 * Check access permissions. 2375 */ 2376 int 2377 sys_access(struct lwp *l, void *v, register_t *retval) 2378 { 2379 struct sys_access_args /* { 2380 syscallarg(const char *) path; 2381 syscallarg(int) flags; 2382 } */ *uap = v; 2383 kauth_cred_t cred; 2384 struct vnode *vp; 2385 int error, flags; 2386 struct nameidata nd; 2387 2388 cred = kauth_cred_dup(l->l_cred); 2389 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2390 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2391 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 2392 SCARG(uap, path), l); 2393 /* Override default credentials */ 2394 nd.ni_cnd.cn_cred = cred; 2395 if ((error = namei(&nd)) != 0) 2396 goto out; 2397 vp = nd.ni_vp; 2398 2399 /* Flags == 0 means only check for existence. */ 2400 if (SCARG(uap, flags)) { 2401 flags = 0; 2402 if (SCARG(uap, flags) & R_OK) 2403 flags |= VREAD; 2404 if (SCARG(uap, flags) & W_OK) 2405 flags |= VWRITE; 2406 if (SCARG(uap, flags) & X_OK) 2407 flags |= VEXEC; 2408 2409 error = VOP_ACCESS(vp, flags, cred, l); 2410 if (!error && (flags & VWRITE)) 2411 error = vn_writechk(vp); 2412 } 2413 vput(vp); 2414 out: 2415 kauth_cred_free(cred); 2416 return (error); 2417 } 2418 2419 /* 2420 * Get file status; this version follows links. 2421 */ 2422 /* ARGSUSED */ 2423 int 2424 sys___stat30(struct lwp *l, void *v, register_t *retval) 2425 { 2426 struct sys___stat30_args /* { 2427 syscallarg(const char *) path; 2428 syscallarg(struct stat *) ub; 2429 } */ *uap = v; 2430 struct stat sb; 2431 int error; 2432 struct nameidata nd; 2433 2434 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 2435 SCARG(uap, path), l); 2436 if ((error = namei(&nd)) != 0) 2437 return (error); 2438 error = vn_stat(nd.ni_vp, &sb, l); 2439 vput(nd.ni_vp); 2440 if (error) 2441 return (error); 2442 error = copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2443 return (error); 2444 } 2445 2446 /* 2447 * Get file status; this version does not follow links. 2448 */ 2449 /* ARGSUSED */ 2450 int 2451 sys___lstat30(struct lwp *l, void *v, register_t *retval) 2452 { 2453 struct sys___lstat30_args /* { 2454 syscallarg(const char *) path; 2455 syscallarg(struct stat *) ub; 2456 } */ *uap = v; 2457 struct stat sb; 2458 int error; 2459 struct nameidata nd; 2460 2461 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, 2462 SCARG(uap, path), l); 2463 if ((error = namei(&nd)) != 0) 2464 return (error); 2465 error = vn_stat(nd.ni_vp, &sb, l); 2466 vput(nd.ni_vp); 2467 if (error) 2468 return (error); 2469 error = copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2470 return (error); 2471 } 2472 2473 /* 2474 * Get configurable pathname variables. 2475 */ 2476 /* ARGSUSED */ 2477 int 2478 sys_pathconf(struct lwp *l, void *v, register_t *retval) 2479 { 2480 struct sys_pathconf_args /* { 2481 syscallarg(const char *) path; 2482 syscallarg(int) name; 2483 } */ *uap = v; 2484 int error; 2485 struct nameidata nd; 2486 2487 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, 2488 SCARG(uap, path), l); 2489 if ((error = namei(&nd)) != 0) 2490 return (error); 2491 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2492 vput(nd.ni_vp); 2493 return (error); 2494 } 2495 2496 /* 2497 * Return target name of a symbolic link. 2498 */ 2499 /* ARGSUSED */ 2500 int 2501 sys_readlink(struct lwp *l, void *v, register_t *retval) 2502 { 2503 struct sys_readlink_args /* { 2504 syscallarg(const char *) path; 2505 syscallarg(char *) buf; 2506 syscallarg(size_t) count; 2507 } */ *uap = v; 2508 struct vnode *vp; 2509 struct iovec aiov; 2510 struct uio auio; 2511 int error; 2512 struct nameidata nd; 2513 2514 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, 2515 SCARG(uap, path), l); 2516 if ((error = namei(&nd)) != 0) 2517 return (error); 2518 vp = nd.ni_vp; 2519 if (vp->v_type != VLNK) 2520 error = EINVAL; 2521 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2522 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) { 2523 aiov.iov_base = SCARG(uap, buf); 2524 aiov.iov_len = SCARG(uap, count); 2525 auio.uio_iov = &aiov; 2526 auio.uio_iovcnt = 1; 2527 auio.uio_offset = 0; 2528 auio.uio_rw = UIO_READ; 2529 KASSERT(l == curlwp); 2530 auio.uio_vmspace = l->l_proc->p_vmspace; 2531 auio.uio_resid = SCARG(uap, count); 2532 error = VOP_READLINK(vp, &auio, l->l_cred); 2533 } 2534 vput(vp); 2535 *retval = SCARG(uap, count) - auio.uio_resid; 2536 return (error); 2537 } 2538 2539 /* 2540 * Change flags of a file given a path name. 2541 */ 2542 /* ARGSUSED */ 2543 int 2544 sys_chflags(struct lwp *l, void *v, register_t *retval) 2545 { 2546 struct sys_chflags_args /* { 2547 syscallarg(const char *) path; 2548 syscallarg(u_long) flags; 2549 } */ *uap = v; 2550 struct vnode *vp; 2551 int error; 2552 struct nameidata nd; 2553 2554 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2555 if ((error = namei(&nd)) != 0) 2556 return (error); 2557 vp = nd.ni_vp; 2558 error = change_flags(vp, SCARG(uap, flags), l); 2559 vput(vp); 2560 return (error); 2561 } 2562 2563 /* 2564 * Change flags of a file given a file descriptor. 2565 */ 2566 /* ARGSUSED */ 2567 int 2568 sys_fchflags(struct lwp *l, void *v, register_t *retval) 2569 { 2570 struct sys_fchflags_args /* { 2571 syscallarg(int) fd; 2572 syscallarg(u_long) flags; 2573 } */ *uap = v; 2574 struct proc *p = l->l_proc; 2575 struct vnode *vp; 2576 struct file *fp; 2577 int error; 2578 2579 /* getvnode() will use the descriptor for us */ 2580 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2581 return (error); 2582 vp = (struct vnode *)fp->f_data; 2583 error = change_flags(vp, SCARG(uap, flags), l); 2584 VOP_UNLOCK(vp, 0); 2585 FILE_UNUSE(fp, l); 2586 return (error); 2587 } 2588 2589 /* 2590 * Change flags of a file given a path name; this version does 2591 * not follow links. 2592 */ 2593 int 2594 sys_lchflags(struct lwp *l, void *v, register_t *retval) 2595 { 2596 struct sys_lchflags_args /* { 2597 syscallarg(const char *) path; 2598 syscallarg(u_long) flags; 2599 } */ *uap = v; 2600 struct vnode *vp; 2601 int error; 2602 struct nameidata nd; 2603 2604 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2605 if ((error = namei(&nd)) != 0) 2606 return (error); 2607 vp = nd.ni_vp; 2608 error = change_flags(vp, SCARG(uap, flags), l); 2609 vput(vp); 2610 return (error); 2611 } 2612 2613 /* 2614 * Common routine to change flags of a file. 2615 */ 2616 int 2617 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2618 { 2619 struct mount *mp; 2620 struct vattr vattr; 2621 int error; 2622 2623 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2624 return (error); 2625 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2626 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2627 /* 2628 * Non-superusers cannot change the flags on devices, even if they 2629 * own them. 2630 */ 2631 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2632 &l->l_acflag) != 0) { 2633 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 2634 goto out; 2635 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2636 error = EINVAL; 2637 goto out; 2638 } 2639 } 2640 VATTR_NULL(&vattr); 2641 vattr.va_flags = flags; 2642 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2643 out: 2644 vn_finished_write(mp, 0); 2645 return (error); 2646 } 2647 2648 /* 2649 * Change mode of a file given path name; this version follows links. 2650 */ 2651 /* ARGSUSED */ 2652 int 2653 sys_chmod(struct lwp *l, void *v, register_t *retval) 2654 { 2655 struct sys_chmod_args /* { 2656 syscallarg(const char *) path; 2657 syscallarg(int) mode; 2658 } */ *uap = v; 2659 int error; 2660 struct nameidata nd; 2661 2662 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2663 if ((error = namei(&nd)) != 0) 2664 return (error); 2665 2666 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2667 2668 vrele(nd.ni_vp); 2669 return (error); 2670 } 2671 2672 /* 2673 * Change mode of a file given a file descriptor. 2674 */ 2675 /* ARGSUSED */ 2676 int 2677 sys_fchmod(struct lwp *l, void *v, register_t *retval) 2678 { 2679 struct sys_fchmod_args /* { 2680 syscallarg(int) fd; 2681 syscallarg(int) mode; 2682 } */ *uap = v; 2683 struct proc *p = l->l_proc; 2684 struct file *fp; 2685 int error; 2686 2687 /* getvnode() will use the descriptor for us */ 2688 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2689 return (error); 2690 2691 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l); 2692 FILE_UNUSE(fp, l); 2693 return (error); 2694 } 2695 2696 /* 2697 * Change mode of a file given path name; this version does not follow links. 2698 */ 2699 /* ARGSUSED */ 2700 int 2701 sys_lchmod(struct lwp *l, void *v, register_t *retval) 2702 { 2703 struct sys_lchmod_args /* { 2704 syscallarg(const char *) path; 2705 syscallarg(int) mode; 2706 } */ *uap = v; 2707 int error; 2708 struct nameidata nd; 2709 2710 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2711 if ((error = namei(&nd)) != 0) 2712 return (error); 2713 2714 error = change_mode(nd.ni_vp, SCARG(uap, mode), l); 2715 2716 vrele(nd.ni_vp); 2717 return (error); 2718 } 2719 2720 /* 2721 * Common routine to set mode given a vnode. 2722 */ 2723 static int 2724 change_mode(struct vnode *vp, int mode, struct lwp *l) 2725 { 2726 struct mount *mp; 2727 struct vattr vattr; 2728 int error; 2729 2730 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2731 return (error); 2732 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2733 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2734 VATTR_NULL(&vattr); 2735 vattr.va_mode = mode & ALLPERMS; 2736 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2737 VOP_UNLOCK(vp, 0); 2738 vn_finished_write(mp, 0); 2739 return (error); 2740 } 2741 2742 /* 2743 * Set ownership given a path name; this version follows links. 2744 */ 2745 /* ARGSUSED */ 2746 int 2747 sys_chown(struct lwp *l, void *v, register_t *retval) 2748 { 2749 struct sys_chown_args /* { 2750 syscallarg(const char *) path; 2751 syscallarg(uid_t) uid; 2752 syscallarg(gid_t) gid; 2753 } */ *uap = v; 2754 int error; 2755 struct nameidata nd; 2756 2757 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2758 if ((error = namei(&nd)) != 0) 2759 return (error); 2760 2761 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2762 2763 vrele(nd.ni_vp); 2764 return (error); 2765 } 2766 2767 /* 2768 * Set ownership given a path name; this version follows links. 2769 * Provides POSIX semantics. 2770 */ 2771 /* ARGSUSED */ 2772 int 2773 sys___posix_chown(struct lwp *l, void *v, register_t *retval) 2774 { 2775 struct sys_chown_args /* { 2776 syscallarg(const char *) path; 2777 syscallarg(uid_t) uid; 2778 syscallarg(gid_t) gid; 2779 } */ *uap = v; 2780 int error; 2781 struct nameidata nd; 2782 2783 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2784 if ((error = namei(&nd)) != 0) 2785 return (error); 2786 2787 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2788 2789 vrele(nd.ni_vp); 2790 return (error); 2791 } 2792 2793 /* 2794 * Set ownership given a file descriptor. 2795 */ 2796 /* ARGSUSED */ 2797 int 2798 sys_fchown(struct lwp *l, void *v, register_t *retval) 2799 { 2800 struct sys_fchown_args /* { 2801 syscallarg(int) fd; 2802 syscallarg(uid_t) uid; 2803 syscallarg(gid_t) gid; 2804 } */ *uap = v; 2805 struct proc *p = l->l_proc; 2806 int error; 2807 struct file *fp; 2808 2809 /* getvnode() will use the descriptor for us */ 2810 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2811 return (error); 2812 2813 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2814 SCARG(uap, gid), l, 0); 2815 FILE_UNUSE(fp, l); 2816 return (error); 2817 } 2818 2819 /* 2820 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2821 */ 2822 /* ARGSUSED */ 2823 int 2824 sys___posix_fchown(struct lwp *l, void *v, register_t *retval) 2825 { 2826 struct sys_fchown_args /* { 2827 syscallarg(int) fd; 2828 syscallarg(uid_t) uid; 2829 syscallarg(gid_t) gid; 2830 } */ *uap = v; 2831 struct proc *p = l->l_proc; 2832 int error; 2833 struct file *fp; 2834 2835 /* getvnode() will use the descriptor for us */ 2836 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2837 return (error); 2838 2839 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid), 2840 SCARG(uap, gid), l, 1); 2841 FILE_UNUSE(fp, l); 2842 return (error); 2843 } 2844 2845 /* 2846 * Set ownership given a path name; this version does not follow links. 2847 */ 2848 /* ARGSUSED */ 2849 int 2850 sys_lchown(struct lwp *l, void *v, register_t *retval) 2851 { 2852 struct sys_lchown_args /* { 2853 syscallarg(const char *) path; 2854 syscallarg(uid_t) uid; 2855 syscallarg(gid_t) gid; 2856 } */ *uap = v; 2857 int error; 2858 struct nameidata nd; 2859 2860 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2861 if ((error = namei(&nd)) != 0) 2862 return (error); 2863 2864 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2865 2866 vrele(nd.ni_vp); 2867 return (error); 2868 } 2869 2870 /* 2871 * Set ownership given a path name; this version does not follow links. 2872 * Provides POSIX/XPG semantics. 2873 */ 2874 /* ARGSUSED */ 2875 int 2876 sys___posix_lchown(struct lwp *l, void *v, register_t *retval) 2877 { 2878 struct sys_lchown_args /* { 2879 syscallarg(const char *) path; 2880 syscallarg(uid_t) uid; 2881 syscallarg(gid_t) gid; 2882 } */ *uap = v; 2883 int error; 2884 struct nameidata nd; 2885 2886 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2887 if ((error = namei(&nd)) != 0) 2888 return (error); 2889 2890 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2891 2892 vrele(nd.ni_vp); 2893 return (error); 2894 } 2895 2896 /* 2897 * Common routine to set ownership given a vnode. 2898 */ 2899 static int 2900 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2901 int posix_semantics) 2902 { 2903 struct mount *mp; 2904 struct vattr vattr; 2905 mode_t newmode; 2906 int error; 2907 2908 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2909 return (error); 2910 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 2911 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2912 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 2913 goto out; 2914 2915 #define CHANGED(x) ((int)(x) != -1) 2916 newmode = vattr.va_mode; 2917 if (posix_semantics) { 2918 /* 2919 * POSIX/XPG semantics: if the caller is not the super-user, 2920 * clear set-user-id and set-group-id bits. Both POSIX and 2921 * the XPG consider the behaviour for calls by the super-user 2922 * implementation-defined; we leave the set-user-id and set- 2923 * group-id settings intact in that case. 2924 */ 2925 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2926 NULL) != 0) 2927 newmode &= ~(S_ISUID | S_ISGID); 2928 } else { 2929 /* 2930 * NetBSD semantics: when changing owner and/or group, 2931 * clear the respective bit(s). 2932 */ 2933 if (CHANGED(uid)) 2934 newmode &= ~S_ISUID; 2935 if (CHANGED(gid)) 2936 newmode &= ~S_ISGID; 2937 } 2938 /* Update va_mode iff altered. */ 2939 if (vattr.va_mode == newmode) 2940 newmode = VNOVAL; 2941 2942 VATTR_NULL(&vattr); 2943 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2944 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2945 vattr.va_mode = newmode; 2946 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 2947 #undef CHANGED 2948 2949 out: 2950 VOP_UNLOCK(vp, 0); 2951 vn_finished_write(mp, 0); 2952 return (error); 2953 } 2954 2955 /* 2956 * Set the access and modification times given a path name; this 2957 * version follows links. 2958 */ 2959 /* ARGSUSED */ 2960 int 2961 sys_utimes(struct lwp *l, void *v, register_t *retval) 2962 { 2963 struct sys_utimes_args /* { 2964 syscallarg(const char *) path; 2965 syscallarg(const struct timeval *) tptr; 2966 } */ *uap = v; 2967 int error; 2968 struct nameidata nd; 2969 2970 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 2971 if ((error = namei(&nd)) != 0) 2972 return (error); 2973 2974 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l); 2975 2976 vrele(nd.ni_vp); 2977 return (error); 2978 } 2979 2980 /* 2981 * Set the access and modification times given a file descriptor. 2982 */ 2983 /* ARGSUSED */ 2984 int 2985 sys_futimes(struct lwp *l, void *v, register_t *retval) 2986 { 2987 struct sys_futimes_args /* { 2988 syscallarg(int) fd; 2989 syscallarg(const struct timeval *) tptr; 2990 } */ *uap = v; 2991 struct proc *p = l->l_proc; 2992 int error; 2993 struct file *fp; 2994 2995 /* getvnode() will use the descriptor for us */ 2996 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2997 return (error); 2998 2999 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l); 3000 FILE_UNUSE(fp, l); 3001 return (error); 3002 } 3003 3004 /* 3005 * Set the access and modification times given a path name; this 3006 * version does not follow links. 3007 */ 3008 /* ARGSUSED */ 3009 int 3010 sys_lutimes(struct lwp *l, void *v, register_t *retval) 3011 { 3012 struct sys_lutimes_args /* { 3013 syscallarg(const char *) path; 3014 syscallarg(const struct timeval *) tptr; 3015 } */ *uap = v; 3016 int error; 3017 struct nameidata nd; 3018 3019 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 3020 if ((error = namei(&nd)) != 0) 3021 return (error); 3022 3023 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l); 3024 3025 vrele(nd.ni_vp); 3026 return (error); 3027 } 3028 3029 /* 3030 * Common routine to set access and modification times given a vnode. 3031 */ 3032 static int 3033 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l) 3034 { 3035 struct mount *mp; 3036 struct vattr vattr; 3037 int error; 3038 3039 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3040 return (error); 3041 VATTR_NULL(&vattr); 3042 if (tptr == NULL) { 3043 nanotime(&vattr.va_atime); 3044 vattr.va_mtime = vattr.va_atime; 3045 vattr.va_vaflags |= VA_UTIMES_NULL; 3046 } else { 3047 struct timeval tv[2]; 3048 3049 error = copyin(tptr, tv, sizeof(tv)); 3050 if (error) 3051 goto out; 3052 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime); 3053 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime); 3054 } 3055 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3056 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3057 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 3058 VOP_UNLOCK(vp, 0); 3059 out: 3060 vn_finished_write(mp, 0); 3061 return (error); 3062 } 3063 3064 /* 3065 * Truncate a file given its path name. 3066 */ 3067 /* ARGSUSED */ 3068 int 3069 sys_truncate(struct lwp *l, void *v, register_t *retval) 3070 { 3071 struct sys_truncate_args /* { 3072 syscallarg(const char *) path; 3073 syscallarg(int) pad; 3074 syscallarg(off_t) length; 3075 } */ *uap = v; 3076 struct vnode *vp; 3077 struct mount *mp; 3078 struct vattr vattr; 3079 int error; 3080 struct nameidata nd; 3081 3082 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 3083 if ((error = namei(&nd)) != 0) 3084 return (error); 3085 vp = nd.ni_vp; 3086 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3087 vrele(vp); 3088 return (error); 3089 } 3090 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3091 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3092 if (vp->v_type == VDIR) 3093 error = EISDIR; 3094 else if ((error = vn_writechk(vp)) == 0 && 3095 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) { 3096 VATTR_NULL(&vattr); 3097 vattr.va_size = SCARG(uap, length); 3098 error = VOP_SETATTR(vp, &vattr, l->l_cred, l); 3099 } 3100 vput(vp); 3101 vn_finished_write(mp, 0); 3102 return (error); 3103 } 3104 3105 /* 3106 * Truncate a file given a file descriptor. 3107 */ 3108 /* ARGSUSED */ 3109 int 3110 sys_ftruncate(struct lwp *l, void *v, register_t *retval) 3111 { 3112 struct sys_ftruncate_args /* { 3113 syscallarg(int) fd; 3114 syscallarg(int) pad; 3115 syscallarg(off_t) length; 3116 } */ *uap = v; 3117 struct proc *p = l->l_proc; 3118 struct mount *mp; 3119 struct vattr vattr; 3120 struct vnode *vp; 3121 struct file *fp; 3122 int error; 3123 3124 /* getvnode() will use the descriptor for us */ 3125 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3126 return (error); 3127 if ((fp->f_flag & FWRITE) == 0) { 3128 error = EINVAL; 3129 goto out; 3130 } 3131 vp = (struct vnode *)fp->f_data; 3132 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3133 FILE_UNUSE(fp, l); 3134 return (error); 3135 } 3136 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3137 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3138 if (vp->v_type == VDIR) 3139 error = EISDIR; 3140 else if ((error = vn_writechk(vp)) == 0) { 3141 VATTR_NULL(&vattr); 3142 vattr.va_size = SCARG(uap, length); 3143 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l); 3144 } 3145 VOP_UNLOCK(vp, 0); 3146 vn_finished_write(mp, 0); 3147 out: 3148 FILE_UNUSE(fp, l); 3149 return (error); 3150 } 3151 3152 /* 3153 * Sync an open file. 3154 */ 3155 /* ARGSUSED */ 3156 int 3157 sys_fsync(struct lwp *l, void *v, register_t *retval) 3158 { 3159 struct sys_fsync_args /* { 3160 syscallarg(int) fd; 3161 } */ *uap = v; 3162 struct proc *p = l->l_proc; 3163 struct vnode *vp; 3164 struct mount *mp; 3165 struct file *fp; 3166 int error; 3167 3168 /* getvnode() will use the descriptor for us */ 3169 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3170 return (error); 3171 vp = (struct vnode *)fp->f_data; 3172 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3173 FILE_UNUSE(fp, l); 3174 return (error); 3175 } 3176 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3177 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l); 3178 if (error == 0 && bioops.io_fsync != NULL && 3179 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3180 (*bioops.io_fsync)(vp, 0); 3181 VOP_UNLOCK(vp, 0); 3182 vn_finished_write(mp, 0); 3183 FILE_UNUSE(fp, l); 3184 return (error); 3185 } 3186 3187 /* 3188 * Sync a range of file data. API modeled after that found in AIX. 3189 * 3190 * FDATASYNC indicates that we need only save enough metadata to be able 3191 * to re-read the written data. Note we duplicate AIX's requirement that 3192 * the file be open for writing. 3193 */ 3194 /* ARGSUSED */ 3195 int 3196 sys_fsync_range(struct lwp *l, void *v, register_t *retval) 3197 { 3198 struct sys_fsync_range_args /* { 3199 syscallarg(int) fd; 3200 syscallarg(int) flags; 3201 syscallarg(off_t) start; 3202 syscallarg(off_t) length; 3203 } */ *uap = v; 3204 struct proc *p = l->l_proc; 3205 struct vnode *vp; 3206 struct file *fp; 3207 int flags, nflags; 3208 off_t s, e, len; 3209 int error; 3210 3211 /* getvnode() will use the descriptor for us */ 3212 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3213 return (error); 3214 3215 if ((fp->f_flag & FWRITE) == 0) { 3216 FILE_UNUSE(fp, l); 3217 return (EBADF); 3218 } 3219 3220 flags = SCARG(uap, flags); 3221 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3222 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3223 return (EINVAL); 3224 } 3225 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3226 if (flags & FDATASYNC) 3227 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3228 else 3229 nflags = FSYNC_WAIT; 3230 if (flags & FDISKSYNC) 3231 nflags |= FSYNC_CACHE; 3232 3233 len = SCARG(uap, length); 3234 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3235 if (len) { 3236 s = SCARG(uap, start); 3237 e = s + len; 3238 if (e < s) { 3239 FILE_UNUSE(fp, l); 3240 return (EINVAL); 3241 } 3242 } else { 3243 e = 0; 3244 s = 0; 3245 } 3246 3247 vp = (struct vnode *)fp->f_data; 3248 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3249 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l); 3250 3251 if (error == 0 && bioops.io_fsync != NULL && 3252 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) 3253 (*bioops.io_fsync)(vp, nflags); 3254 3255 VOP_UNLOCK(vp, 0); 3256 FILE_UNUSE(fp, l); 3257 return (error); 3258 } 3259 3260 /* 3261 * Sync the data of an open file. 3262 */ 3263 /* ARGSUSED */ 3264 int 3265 sys_fdatasync(struct lwp *l, void *v, register_t *retval) 3266 { 3267 struct sys_fdatasync_args /* { 3268 syscallarg(int) fd; 3269 } */ *uap = v; 3270 struct proc *p = l->l_proc; 3271 struct vnode *vp; 3272 struct file *fp; 3273 int error; 3274 3275 /* getvnode() will use the descriptor for us */ 3276 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3277 return (error); 3278 if ((fp->f_flag & FWRITE) == 0) { 3279 FILE_UNUSE(fp, l); 3280 return (EBADF); 3281 } 3282 vp = (struct vnode *)fp->f_data; 3283 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3284 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l); 3285 VOP_UNLOCK(vp, 0); 3286 FILE_UNUSE(fp, l); 3287 return (error); 3288 } 3289 3290 /* 3291 * Rename files, (standard) BSD semantics frontend. 3292 */ 3293 /* ARGSUSED */ 3294 int 3295 sys_rename(struct lwp *l, void *v, register_t *retval) 3296 { 3297 struct sys_rename_args /* { 3298 syscallarg(const char *) from; 3299 syscallarg(const char *) to; 3300 } */ *uap = v; 3301 3302 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0)); 3303 } 3304 3305 /* 3306 * Rename files, POSIX semantics frontend. 3307 */ 3308 /* ARGSUSED */ 3309 int 3310 sys___posix_rename(struct lwp *l, void *v, register_t *retval) 3311 { 3312 struct sys___posix_rename_args /* { 3313 syscallarg(const char *) from; 3314 syscallarg(const char *) to; 3315 } */ *uap = v; 3316 3317 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1)); 3318 } 3319 3320 /* 3321 * Rename files. Source and destination must either both be directories, 3322 * or both not be directories. If target is a directory, it must be empty. 3323 * If `from' and `to' refer to the same object, the value of the `retain' 3324 * argument is used to determine whether `from' will be 3325 * 3326 * (retain == 0) deleted unless `from' and `to' refer to the same 3327 * object in the file system's name space (BSD). 3328 * (retain == 1) always retained (POSIX). 3329 */ 3330 static int 3331 rename_files(const char *from, const char *to, struct lwp *l, int retain) 3332 { 3333 struct mount *mp = NULL; 3334 struct vnode *tvp, *fvp, *tdvp; 3335 struct nameidata fromnd, tond; 3336 struct proc *p; 3337 int error; 3338 3339 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, 3340 from, l); 3341 if ((error = namei(&fromnd)) != 0) 3342 return (error); 3343 fvp = fromnd.ni_vp; 3344 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH); 3345 if (error != 0) { 3346 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3347 vrele(fromnd.ni_dvp); 3348 vrele(fvp); 3349 if (fromnd.ni_startdir) 3350 vrele(fromnd.ni_startdir); 3351 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3352 return (error); 3353 } 3354 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | 3355 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l); 3356 if ((error = namei(&tond)) != 0) { 3357 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3358 vrele(fromnd.ni_dvp); 3359 vrele(fvp); 3360 goto out1; 3361 } 3362 tdvp = tond.ni_dvp; 3363 tvp = tond.ni_vp; 3364 3365 if (tvp != NULL) { 3366 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3367 error = ENOTDIR; 3368 goto out; 3369 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3370 error = EISDIR; 3371 goto out; 3372 } 3373 } 3374 3375 if (fvp == tdvp) 3376 error = EINVAL; 3377 3378 /* 3379 * Source and destination refer to the same object. 3380 */ 3381 if (fvp == tvp) { 3382 if (retain) 3383 error = -1; 3384 else if (fromnd.ni_dvp == tdvp && 3385 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3386 !memcmp(fromnd.ni_cnd.cn_nameptr, 3387 tond.ni_cnd.cn_nameptr, 3388 fromnd.ni_cnd.cn_namelen)) 3389 error = -1; 3390 } 3391 3392 #if NVERIEXEC > 0 3393 if (!error) 3394 error = veriexec_renamechk(fvp, fromnd.ni_dirp, tvp, 3395 tond.ni_dirp, l); 3396 #endif /* NVERIEXEC > 0 */ 3397 3398 out: 3399 p = l->l_proc; 3400 if (!error) { 3401 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE); 3402 if (fromnd.ni_dvp != tdvp) 3403 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3404 if (tvp) { 3405 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE); 3406 } 3407 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3408 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3409 } else { 3410 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3411 if (tdvp == tvp) 3412 vrele(tdvp); 3413 else 3414 vput(tdvp); 3415 if (tvp) 3416 vput(tvp); 3417 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3418 vrele(fromnd.ni_dvp); 3419 vrele(fvp); 3420 } 3421 vrele(tond.ni_startdir); 3422 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3423 out1: 3424 vn_finished_write(mp, 0); 3425 if (fromnd.ni_startdir) 3426 vrele(fromnd.ni_startdir); 3427 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3428 return (error == -1 ? 0 : error); 3429 } 3430 3431 /* 3432 * Make a directory file. 3433 */ 3434 /* ARGSUSED */ 3435 int 3436 sys_mkdir(struct lwp *l, void *v, register_t *retval) 3437 { 3438 struct sys_mkdir_args /* { 3439 syscallarg(const char *) path; 3440 syscallarg(int) mode; 3441 } */ *uap = v; 3442 struct proc *p = l->l_proc; 3443 struct mount *mp; 3444 struct vnode *vp; 3445 struct vattr vattr; 3446 int error; 3447 struct nameidata nd; 3448 3449 restart: 3450 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE, 3451 SCARG(uap, path), l); 3452 if ((error = namei(&nd)) != 0) 3453 return (error); 3454 vp = nd.ni_vp; 3455 if (vp != NULL) { 3456 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3457 if (nd.ni_dvp == vp) 3458 vrele(nd.ni_dvp); 3459 else 3460 vput(nd.ni_dvp); 3461 vrele(vp); 3462 return (EEXIST); 3463 } 3464 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3465 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3466 if (nd.ni_dvp == vp) 3467 vrele(nd.ni_dvp); 3468 else 3469 vput(nd.ni_dvp); 3470 if ((error = vn_start_write(NULL, &mp, 3471 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 3472 return (error); 3473 goto restart; 3474 } 3475 VATTR_NULL(&vattr); 3476 vattr.va_type = VDIR; 3477 vattr.va_mode = 3478 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3479 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3480 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3481 if (!error) 3482 vput(nd.ni_vp); 3483 vn_finished_write(mp, 0); 3484 return (error); 3485 } 3486 3487 /* 3488 * Remove a directory file. 3489 */ 3490 /* ARGSUSED */ 3491 int 3492 sys_rmdir(struct lwp *l, void *v, register_t *retval) 3493 { 3494 struct sys_rmdir_args /* { 3495 syscallarg(const char *) path; 3496 } */ *uap = v; 3497 struct mount *mp; 3498 struct vnode *vp; 3499 int error; 3500 struct nameidata nd; 3501 3502 restart: 3503 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, 3504 SCARG(uap, path), l); 3505 if ((error = namei(&nd)) != 0) 3506 return (error); 3507 vp = nd.ni_vp; 3508 if (vp->v_type != VDIR) { 3509 error = ENOTDIR; 3510 goto out; 3511 } 3512 /* 3513 * No rmdir "." please. 3514 */ 3515 if (nd.ni_dvp == vp) { 3516 error = EINVAL; 3517 goto out; 3518 } 3519 /* 3520 * The root of a mounted filesystem cannot be deleted. 3521 */ 3522 if (vp->v_flag & VROOT) { 3523 error = EBUSY; 3524 goto out; 3525 } 3526 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3527 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3528 if (nd.ni_dvp == vp) 3529 vrele(nd.ni_dvp); 3530 else 3531 vput(nd.ni_dvp); 3532 vput(vp); 3533 if ((error = vn_start_write(NULL, &mp, 3534 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0) 3535 return (error); 3536 goto restart; 3537 } 3538 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE); 3539 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE); 3540 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3541 vn_finished_write(mp, 0); 3542 return (error); 3543 3544 out: 3545 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3546 if (nd.ni_dvp == vp) 3547 vrele(nd.ni_dvp); 3548 else 3549 vput(nd.ni_dvp); 3550 vput(vp); 3551 return (error); 3552 } 3553 3554 /* 3555 * Read a block of directory entries in a file system independent format. 3556 */ 3557 int 3558 sys___getdents30(struct lwp *l, void *v, register_t *retval) 3559 { 3560 struct sys___getdents30_args /* { 3561 syscallarg(int) fd; 3562 syscallarg(char *) buf; 3563 syscallarg(size_t) count; 3564 } */ *uap = v; 3565 struct proc *p = l->l_proc; 3566 struct file *fp; 3567 int error, done; 3568 3569 /* getvnode() will use the descriptor for us */ 3570 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 3571 return (error); 3572 if ((fp->f_flag & FREAD) == 0) { 3573 error = EBADF; 3574 goto out; 3575 } 3576 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3577 SCARG(uap, count), &done, l, 0, 0); 3578 #ifdef KTRACE 3579 if (!error && KTRPOINT(p, KTR_GENIO)) { 3580 struct iovec iov; 3581 iov.iov_base = SCARG(uap, buf); 3582 iov.iov_len = done; 3583 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0); 3584 } 3585 #endif 3586 *retval = done; 3587 out: 3588 FILE_UNUSE(fp, l); 3589 return (error); 3590 } 3591 3592 /* 3593 * Set the mode mask for creation of filesystem nodes. 3594 */ 3595 int 3596 sys_umask(struct lwp *l, void *v, register_t *retval) 3597 { 3598 struct sys_umask_args /* { 3599 syscallarg(mode_t) newmask; 3600 } */ *uap = v; 3601 struct proc *p = l->l_proc; 3602 struct cwdinfo *cwdi; 3603 3604 cwdi = p->p_cwdi; 3605 *retval = cwdi->cwdi_cmask; 3606 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3607 return (0); 3608 } 3609 3610 /* 3611 * Void all references to file by ripping underlying filesystem 3612 * away from vnode. 3613 */ 3614 /* ARGSUSED */ 3615 int 3616 sys_revoke(struct lwp *l, void *v, register_t *retval) 3617 { 3618 struct sys_revoke_args /* { 3619 syscallarg(const char *) path; 3620 } */ *uap = v; 3621 struct mount *mp; 3622 struct vnode *vp; 3623 struct vattr vattr; 3624 int error; 3625 struct nameidata nd; 3626 3627 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l); 3628 if ((error = namei(&nd)) != 0) 3629 return (error); 3630 vp = nd.ni_vp; 3631 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0) 3632 goto out; 3633 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid && 3634 (error = kauth_authorize_generic(l->l_cred, 3635 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) 3636 goto out; 3637 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3638 goto out; 3639 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER))) 3640 VOP_REVOKE(vp, REVOKEALL); 3641 vn_finished_write(mp, 0); 3642 out: 3643 vrele(vp); 3644 return (error); 3645 } 3646 3647 /* 3648 * Convert a user file descriptor to a kernel file entry. 3649 */ 3650 int 3651 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 3652 { 3653 struct vnode *vp; 3654 struct file *fp; 3655 3656 if ((fp = fd_getfile(fdp, fd)) == NULL) 3657 return (EBADF); 3658 3659 FILE_USE(fp); 3660 3661 if (fp->f_type != DTYPE_VNODE) { 3662 FILE_UNUSE(fp, NULL); 3663 return (EINVAL); 3664 } 3665 3666 vp = (struct vnode *)fp->f_data; 3667 if (vp->v_type == VBAD) { 3668 FILE_UNUSE(fp, NULL); 3669 return (EBADF); 3670 } 3671 3672 *fpp = fp; 3673 return (0); 3674 } 3675