xref: /netbsd-src/sys/ufs/ufs/ufs_vnops.c (revision 6a3c4a6f4f5cf056149f6ce491c210b119b56ded)
1 /*	$NetBSD: ufs_vnops.c,v 1.262 2022/03/27 16:24:59 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1989, 1993, 1995
34  *	The Regents of the University of California.  All rights reserved.
35  * (c) UNIX System Laboratories, Inc.
36  * All or some portions of this file are derived from material licensed
37  * to the University of California by American Telephone and Telegraph
38  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39  * the permission of UNIX System Laboratories, Inc.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)ufs_vnops.c	8.28 (Berkeley) 7/31/95
66  */
67 
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.262 2022/03/27 16:24:59 christos Exp $");
70 
71 #if defined(_KERNEL_OPT)
72 #include "opt_ffs.h"
73 #include "opt_quota.h"
74 #include "opt_uvmhist.h"
75 #endif
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/namei.h>
80 #include <sys/resourcevar.h>
81 #include <sys/kernel.h>
82 #include <sys/file.h>
83 #include <sys/stat.h>
84 #include <sys/buf.h>
85 #include <sys/proc.h>
86 #include <sys/mount.h>
87 #include <sys/vnode.h>
88 #include <sys/fstrans.h>
89 #include <sys/kmem.h>
90 #include <sys/malloc.h>
91 #include <sys/dirent.h>
92 #include <sys/lockf.h>
93 #include <sys/kauth.h>
94 #include <sys/wapbl.h>
95 
96 #include <miscfs/specfs/specdev.h>
97 #include <miscfs/fifofs/fifo.h>
98 #include <miscfs/genfs/genfs.h>
99 
100 #include <ufs/ufs/acl.h>
101 #include <ufs/ufs/inode.h>
102 #include <ufs/ufs/dir.h>
103 #include <ufs/ufs/ufsmount.h>
104 #include <ufs/ufs/ufs_bswap.h>
105 #include <ufs/ufs/ufs_extern.h>
106 #include <ufs/ufs/ufs_wapbl.h>
107 #ifdef UFS_DIRHASH
108 #include <ufs/ufs/dirhash.h>
109 #endif
110 #include <ufs/ext2fs/ext2fs_extern.h>
111 #include <ufs/ext2fs/ext2fs_dir.h>
112 #include <ufs/ffs/ffs_extern.h>
113 #include <ufs/lfs/lfs_extern.h>
114 #include <ufs/lfs/lfs.h>
115 
116 #ifdef UVMHIST
117 #include <uvm/uvm.h>
118 #endif
119 #include <uvm/uvm_extern.h>
120 #include <uvm/uvm_stat.h>
121 
122 __CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN);
123 __CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN);
124 
125 static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *);
126 static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t,
127     struct lwp *);
128 static int ufs_makeinode(struct vattr *, struct vnode *,
129     const struct ufs_lookup_results *, struct vnode **, struct componentname *);
130 
131 /*
132  * A virgin directory (no blushing please).
133  */
134 static const struct dirtemplate mastertemplate = {
135 	0,	12,			DT_DIR,	1,	".",
136 	0,	UFS_DIRBLKSIZ - 12,	DT_DIR,	2,	".."
137 };
138 
139 /*
140  * Create a regular file
141  */
142 int
ufs_create(void * v)143 ufs_create(void *v)
144 {
145 	struct vop_create_v3_args /* {
146 		struct vnode		*a_dvp;
147 		struct vnode		**a_vpp;
148 		struct componentname	*a_cnp;
149 		struct vattr		*a_vap;
150 	} */ *ap = v;
151 	int	error;
152 	struct vnode *dvp = ap->a_dvp;
153 	struct ufs_lookup_results *ulr;
154 
155 	/* XXX should handle this material another way */
156 	ulr = &VTOI(dvp)->i_crap;
157 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
158 
159 	/*
160 	 * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
161 	 * ufs_makeinode
162 	 */
163 	error = ufs_makeinode(ap->a_vap, dvp, ulr, ap->a_vpp, ap->a_cnp);
164 	if (error) {
165 		return (error);
166 	}
167 	UFS_WAPBL_END(dvp->v_mount);
168 	VOP_UNLOCK(*ap->a_vpp);
169 	return (0);
170 }
171 
172 /*
173  * Mknod vnode call
174  */
175 /* ARGSUSED */
176 int
ufs_mknod(void * v)177 ufs_mknod(void *v)
178 {
179 	struct vop_mknod_v3_args /* {
180 		struct vnode		*a_dvp;
181 		struct vnode		**a_vpp;
182 		struct componentname	*a_cnp;
183 		struct vattr		*a_vap;
184 	} */ *ap = v;
185 	struct vattr	*vap;
186 	struct vnode	**vpp;
187 	struct inode	*ip;
188 	int		error;
189 	struct ufs_lookup_results *ulr;
190 
191 	vap = ap->a_vap;
192 	vpp = ap->a_vpp;
193 
194 	/* XXX should handle this material another way */
195 	ulr = &VTOI(ap->a_dvp)->i_crap;
196 	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
197 
198 	/*
199 	 * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
200 	 * ufs_makeinode
201 	 */
202 	if ((error = ufs_makeinode(vap, ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0)
203 		goto out;
204 	ip = VTOI(*vpp);
205 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
206 	UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
207 	UFS_WAPBL_END(ap->a_dvp->v_mount);
208 	VOP_UNLOCK(*vpp);
209 out:
210 	if (error != 0) {
211 		*vpp = NULL;
212 		return (error);
213 	}
214 	return (0);
215 }
216 
217 /*
218  * Open called.
219  *
220  * Nothing to do.
221  */
222 /* ARGSUSED */
223 int
ufs_open(void * v)224 ufs_open(void *v)
225 {
226 	struct vop_open_args /* {
227 		struct vnode	*a_vp;
228 		int		a_mode;
229 		kauth_cred_t	a_cred;
230 	} */ *ap = v;
231 
232 	/*
233 	 * Files marked append-only must be opened for appending.
234 	 */
235 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
236 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
237 		return (EPERM);
238 	return (0);
239 }
240 
241 /*
242  * Close called.
243  *
244  * Update the times on the inode.
245  */
246 /* ARGSUSED */
247 int
ufs_close(void * v)248 ufs_close(void *v)
249 {
250 	struct vop_close_args /* {
251 		struct vnode	*a_vp;
252 		int		a_fflag;
253 		kauth_cred_t	a_cred;
254 	} */ *ap = v;
255 	struct vnode	*vp;
256 
257 	vp = ap->a_vp;
258 	if (vrefcnt(vp) > 1)
259 		UFS_ITIMES(vp, NULL, NULL, NULL);
260 	return (0);
261 }
262 
263 static int
ufs_check_possible(struct vnode * vp,struct inode * ip,accmode_t accmode,kauth_cred_t cred)264 ufs_check_possible(struct vnode *vp, struct inode *ip, accmode_t accmode,
265     kauth_cred_t cred)
266 {
267 #if defined(QUOTA) || defined(QUOTA2)
268 	int error;
269 #endif
270 
271 	/*
272 	 * Disallow write attempts on read-only file systems;
273 	 * unless the file is a socket, fifo, or a block or
274 	 * character device resident on the file system.
275 	 */
276 	if (accmode & VMODIFY_PERMS) {
277 		switch (vp->v_type) {
278 		case VDIR:
279 		case VLNK:
280 		case VREG:
281 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
282 				return EROFS;
283 #if defined(QUOTA) || defined(QUOTA2)
284 			error = chkdq(ip, 0, cred, 0);
285 			if (error != 0)
286 				return error;
287 #endif
288 			break;
289 		case VBAD:
290 		case VBLK:
291 		case VCHR:
292 		case VSOCK:
293 		case VFIFO:
294 		case VNON:
295 		default:
296 			break;
297 		}
298 	}
299 
300 	/* If it is a snapshot, nobody gets access to it. */
301 	if ((ip->i_flags & SF_SNAPSHOT))
302 		return EPERM;
303 	/*
304 	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
305 	 * permits the owner of the file to remove the IMMUTABLE flag.
306 	 */
307 	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
308 	    (ip->i_flags & IMMUTABLE))
309 		return EPERM;
310 
311 	return 0;
312 }
313 
314 static int
ufs_check_permitted(struct vnode * vp,struct inode * ip,struct acl * acl,accmode_t accmode,kauth_cred_t cred,int (* func)(struct vnode *,kauth_cred_t,uid_t,gid_t,mode_t,struct acl *,accmode_t))315 ufs_check_permitted(struct vnode *vp, struct inode *ip,
316     struct acl *acl, accmode_t accmode, kauth_cred_t cred,
317     int (*func)(struct vnode *, kauth_cred_t, uid_t, gid_t, mode_t,
318     struct acl *, accmode_t))
319 {
320 
321 	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(accmode,
322 	    vp->v_type, ip->i_mode & ALLPERMS), vp, NULL, (*func)(vp, cred,
323 	    ip->i_uid, ip->i_gid, ip->i_mode & ALLPERMS, acl, accmode));
324 }
325 
326 int
ufs_accessx(void * v)327 ufs_accessx(void *v)
328 {
329 	struct vop_accessx_args /* {
330 		struct vnode *a_vp;
331 		accmode_t a_accmode;
332 		kauth_cred_t a_cred;
333 	} */ *ap = v;
334 	struct vnode *vp = ap->a_vp;
335 	struct inode *ip = VTOI(vp);
336 	accmode_t accmode = ap->a_accmode;
337 	int error;
338 #ifdef UFS_ACL
339 	struct acl *acl;
340 	acl_type_t type;
341 #endif
342 
343 	error = ufs_check_possible(vp, ip, accmode, ap->a_cred);
344 	if (error)
345 		return error;
346 
347 #ifdef UFS_ACL
348 	if ((vp->v_mount->mnt_flag & (MNT_POSIX1EACLS | MNT_NFS4ACLS)) != 0) {
349 		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
350 			type = ACL_TYPE_NFS4;
351 		else
352 			type = ACL_TYPE_ACCESS;
353 
354 		acl = acl_alloc(KM_SLEEP);
355 		if (type == ACL_TYPE_NFS4)
356 			error = ufs_getacl_nfs4_internal(vp, acl, curlwp);
357 		else
358 			error = VOP_GETACL(vp, type, acl, ap->a_cred);
359 		if (!error) {
360 			if (type == ACL_TYPE_NFS4) {
361 				error = ufs_check_permitted(vp,
362 				    ip, acl, accmode, ap->a_cred,
363 				    genfs_can_access_acl_nfs4);
364 			} else {
365 				error = vfs_unixify_accmode(&accmode);
366 				if (error == 0)
367 					error = ufs_check_permitted(vp,
368 					    ip, acl, accmode, ap->a_cred,
369 					    genfs_can_access_acl_posix1e);
370 			}
371 			acl_free(acl);
372 			return error;
373 		}
374 		if (error != EOPNOTSUPP)
375 			printf("%s: Error retrieving ACL: %d\n",
376 			    __func__, error);
377 		/*
378 		 * XXX: Fall back until debugged.  Should
379 		 * eventually possibly log an error, and return
380 		 * EPERM for safety.
381 		 */
382 		acl_free(acl);
383 	}
384 #endif /* !UFS_ACL */
385 	error = vfs_unixify_accmode(&accmode);
386 	if (error)
387 		return error;
388 	return ufs_check_permitted(vp, ip,
389 	    NULL, accmode, ap->a_cred, genfs_can_access);
390 }
391 
392 /* ARGSUSED */
393 int
ufs_getattr(void * v)394 ufs_getattr(void *v)
395 {
396 	struct vop_getattr_args /* {
397 		struct vnode	*a_vp;
398 		struct vattr	*a_vap;
399 		kauth_cred_t	a_cred;
400 	} */ *ap = v;
401 	struct vnode	*vp;
402 	struct inode	*ip;
403 	struct vattr	*vap;
404 
405 	vp = ap->a_vp;
406 	ip = VTOI(vp);
407 	vap = ap->a_vap;
408 	UFS_ITIMES(vp, NULL, NULL, NULL);
409 
410 	/*
411 	 * Copy from inode table
412 	 */
413 	vap->va_fsid = ip->i_dev;
414 	vap->va_fileid = ip->i_number;
415 	vap->va_mode = ip->i_mode & ALLPERMS;
416 	vap->va_nlink = ip->i_nlink;
417 	vap->va_uid = ip->i_uid;
418 	vap->va_gid = ip->i_gid;
419 	vap->va_size = vp->v_size;
420 	if (ip->i_ump->um_fstype == UFS1) {
421 		switch (vp->v_type) {
422 		    case VBLK:
423 		    case VCHR:
424 			vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
425 			    UFS_MPNEEDSWAP(ip->i_ump));
426 			break;
427 		    default:
428 			vap->va_rdev = NODEV;
429 			break;
430 		}
431 		vap->va_atime.tv_sec = ip->i_ffs1_atime;
432 		vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
433 		vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
434 		vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
435 		vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
436 		vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
437 		vap->va_birthtime.tv_sec = 0;
438 		vap->va_birthtime.tv_nsec = 0;
439 		vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks);
440 	} else {
441 		switch (vp->v_type) {
442 		    case VBLK:
443 		    case VCHR:
444 			vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
445 			    UFS_MPNEEDSWAP(ip->i_ump));
446 			break;
447 		    default:
448 			vap->va_rdev = NODEV;
449 			break;
450 		}
451 		vap->va_atime.tv_sec = ip->i_ffs2_atime;
452 		vap->va_atime.tv_nsec = ip->i_ffs2_atimensec;
453 		vap->va_mtime.tv_sec = ip->i_ffs2_mtime;
454 		vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec;
455 		vap->va_ctime.tv_sec = ip->i_ffs2_ctime;
456 		vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec;
457 		vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime;
458 		vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec;
459 		vap->va_bytes = dbtob(ip->i_ffs2_blocks);
460 	}
461 	vap->va_gen = ip->i_gen;
462 	vap->va_flags = ip->i_flags;
463 
464 	/* this doesn't belong here */
465 	if (vp->v_type == VBLK)
466 		vap->va_blocksize = BLKDEV_IOSIZE;
467 	else if (vp->v_type == VCHR)
468 		vap->va_blocksize = MAXBSIZE;
469 	else
470 		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
471 	vap->va_type = vp->v_type;
472 	vap->va_filerev = ip->i_modrev;
473 	return (0);
474 }
475 
476 /*
477  * Set attribute vnode op. called from several syscalls
478  */
479 int
ufs_setattr(void * v)480 ufs_setattr(void *v)
481 {
482 	struct vop_setattr_args /* {
483 		struct vnode	*a_vp;
484 		struct vattr	*a_vap;
485 		kauth_cred_t	a_cred;
486 	} */ *ap = v;
487 	struct vattr	*vap;
488 	struct vnode	*vp;
489 	struct inode	*ip;
490 	kauth_cred_t	cred;
491 	struct lwp	*l;
492 	int		error;
493 	kauth_action_t	action;
494 	bool		changing_sysflags;
495 
496 	vap = ap->a_vap;
497 	vp = ap->a_vp;
498 	ip = VTOI(vp);
499 	cred = ap->a_cred;
500 	l = curlwp;
501 	action = KAUTH_VNODE_WRITE_FLAGS;
502 	changing_sysflags = false;
503 
504 	/*
505 	 * Check for unsettable attributes.
506 	 */
507 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
508 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
509 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
510 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
511 		return (EINVAL);
512 	}
513 
514 	UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);
515 
516 	if (vap->va_flags != VNOVAL) {
517 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
518 			error = EROFS;
519 			goto out;
520 		}
521 
522 		/* Snapshot flag cannot be set or cleared */
523 		if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
524 		    (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) {
525 			error = EPERM;
526 			goto out;
527 		}
528 
529 		if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
530 			action |= KAUTH_VNODE_HAS_SYSFLAGS;
531 		}
532 
533 		if ((vap->va_flags & SF_SETTABLE) !=
534 		    (ip->i_flags & SF_SETTABLE)) {
535 			action |= KAUTH_VNODE_WRITE_SYSFLAGS;
536 			changing_sysflags = true;
537 		}
538 
539 		error = kauth_authorize_vnode(cred, action, vp, NULL,
540 		    genfs_can_chflags(vp, cred, ip->i_uid, changing_sysflags));
541 		if (error)
542 			goto out;
543 
544 		if (changing_sysflags) {
545 			error = UFS_WAPBL_BEGIN(vp->v_mount);
546 			if (error)
547 				goto out;
548 			ip->i_flags = vap->va_flags;
549 			DIP_ASSIGN(ip, flags, ip->i_flags);
550 		} else {
551 			error = UFS_WAPBL_BEGIN(vp->v_mount);
552 			if (error)
553 				goto out;
554 			ip->i_flags &= SF_SETTABLE;
555 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
556 			DIP_ASSIGN(ip, flags, ip->i_flags);
557 		}
558 		ip->i_flag |= IN_CHANGE;
559 		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
560 		UFS_WAPBL_END(vp->v_mount);
561 		if (vap->va_flags & (IMMUTABLE | APPEND)) {
562 			error = 0;
563 			goto out;
564 		}
565 	}
566 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
567 		error = EPERM;
568 		goto out;
569 	}
570 	/*
571 	 * Go through the fields and update iff not VNOVAL.
572 	 */
573 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
574 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
575 			error = EROFS;
576 			goto out;
577 		}
578 		error = UFS_WAPBL_BEGIN(vp->v_mount);
579 		if (error)
580 			goto out;
581 		error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
582 		UFS_WAPBL_END(vp->v_mount);
583 		if (error)
584 			goto out;
585 	}
586 	if (vap->va_size != VNOVAL) {
587 		/*
588 		 * Disallow write attempts on read-only file systems;
589 		 * unless the file is a socket, fifo, or a block or
590 		 * character device resident on the file system.
591 		 */
592 		switch (vp->v_type) {
593 		case VDIR:
594 			error = EISDIR;
595 			goto out;
596 		case VCHR:
597 		case VBLK:
598 		case VFIFO:
599 			break;
600 		case VREG:
601 			if (vp->v_mount->mnt_flag & MNT_RDONLY) {
602 				error = EROFS;
603 				goto out;
604 			}
605 			if ((ip->i_flags & SF_SNAPSHOT) != 0) {
606 				error = EPERM;
607 				goto out;
608 			}
609 			error = ufs_truncate_retry(vp, 0, vap->va_size, cred);
610 			if (error)
611 				goto out;
612 			break;
613 		default:
614 			error = EOPNOTSUPP;
615 			goto out;
616 		}
617 	}
618 	ip = VTOI(vp);
619 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
620 	    vap->va_birthtime.tv_sec != VNOVAL) {
621 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
622 			error = EROFS;
623 			goto out;
624 		}
625 		if ((ip->i_flags & SF_SNAPSHOT) != 0) {
626 			error = EPERM;
627 			goto out;
628 		}
629 		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
630 		    NULL, genfs_can_chtimes(vp, cred, ip->i_uid,
631 		    vap->va_vaflags));
632 		if (error)
633 			goto out;
634 		error = UFS_WAPBL_BEGIN(vp->v_mount);
635 		if (error)
636 			goto out;
637 		if (vap->va_atime.tv_sec != VNOVAL)
638 			if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
639 				ip->i_flag |= IN_ACCESS;
640 		if (vap->va_mtime.tv_sec != VNOVAL) {
641 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
642 			if (vp->v_mount->mnt_flag & MNT_RELATIME)
643 				ip->i_flag |= IN_ACCESS;
644 		}
645 		if (vap->va_birthtime.tv_sec != VNOVAL &&
646 		    ip->i_ump->um_fstype == UFS2) {
647 			ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec;
648 			ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
649 		}
650 		error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
651 		UFS_WAPBL_END(vp->v_mount);
652 		if (error)
653 			goto out;
654 	}
655 	error = 0;
656 	if (vap->va_mode != (mode_t)VNOVAL) {
657 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
658 			error = EROFS;
659 			goto out;
660 		}
661 		if ((ip->i_flags & SF_SNAPSHOT) != 0 &&
662 		    (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP |
663 		     S_IXOTH | S_IWOTH))) {
664 			error = EPERM;
665 			goto out;
666 		}
667 		error = UFS_WAPBL_BEGIN(vp->v_mount);
668 		if (error)
669 			goto out;
670 		error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
671 		UFS_WAPBL_END(vp->v_mount);
672 	}
673 out:
674 	cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
675 	return (error);
676 }
677 
678 #ifdef UFS_ACL
679 static int
ufs_update_nfs4_acl_after_mode_change(struct vnode * vp,int mode,int file_owner_id,kauth_cred_t cred,struct lwp * l)680 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
681     int file_owner_id, kauth_cred_t cred, struct lwp *l)
682 {
683 	int error;
684 	struct acl *aclp;
685 
686 	aclp = acl_alloc(KM_SLEEP);
687 	error = ufs_getacl_nfs4_internal(vp, aclp, l);
688 	/*
689 	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
690 	 * it supports ACLs.
691 	 */
692 	if (error)
693 		goto out;
694 
695 	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
696 	error = ufs_setacl_nfs4_internal(vp, aclp, l, false);
697 
698 out:
699 	acl_free(aclp);
700 	return (error);
701 }
702 #endif /* UFS_ACL */
703 
704 /*
705  * Change the mode on a file.
706  * Inode must be locked before calling.
707  */
708 static int
ufs_chmod(struct vnode * vp,int mode,kauth_cred_t cred,struct lwp * l)709 ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
710 {
711 	struct inode	*ip;
712 	int		error;
713 
714 	UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
715 
716 	ip = VTOI(vp);
717 
718 #ifdef UFS_ACL
719 	/*
720 	 * To modify the permissions on a file, must possess VADMIN
721 	 * for that file.
722 	 */
723 	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
724 		return error;
725 #endif
726 
727 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
728 	    NULL, genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode));
729 	if (error)
730 		return (error);
731 
732 #ifdef UFS_ACL
733 	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) {
734 		error = ufs_update_nfs4_acl_after_mode_change(vp, mode,
735 		    ip->i_uid, cred, l);
736 		if (error)
737 			return error;
738 	}
739 #endif
740 	ip->i_mode &= ~ALLPERMS;
741 	ip->i_mode |= (mode & ALLPERMS);
742 	ip->i_flag |= IN_CHANGE;
743 	DIP_ASSIGN(ip, mode, ip->i_mode);
744 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
745 	cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
746 	return (0);
747 }
748 
749 /*
750  * Perform chown operation on inode ip;
751  * inode must be locked prior to call.
752  */
753 static int
ufs_chown(struct vnode * vp,uid_t uid,gid_t gid,kauth_cred_t cred,struct lwp * l)754 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
755     	struct lwp *l)
756 {
757 	struct inode	*ip;
758 	int		error = 0;
759 #if defined(QUOTA) || defined(QUOTA2)
760 	uid_t		ouid;
761 	gid_t		ogid;
762 	int64_t		change;
763 #endif
764 	ip = VTOI(vp);
765 	error = 0;
766 
767 	if (uid == (uid_t)VNOVAL)
768 		uid = ip->i_uid;
769 	if (gid == (gid_t)VNOVAL)
770 		gid = ip->i_gid;
771 
772 #ifdef UFS_ACL
773 	/*
774 	 * To modify the ownership of a file, must possess VADMIN for that
775 	 * file.
776 	 */
777 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
778 		return error;
779 #endif
780 
781 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
782 	    NULL, genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid));
783 	if (error)
784 		return (error);
785 
786 #if defined(QUOTA) || defined(QUOTA2)
787 	ogid = ip->i_gid;
788 	ouid = ip->i_uid;
789 	change = DIP(ip, blocks);
790 	(void) chkdq(ip, -change, cred, 0);
791 	(void) chkiq(ip, -1, cred, 0);
792 #endif
793 	ip->i_gid = gid;
794 	DIP_ASSIGN(ip, gid, gid);
795 	ip->i_uid = uid;
796 	DIP_ASSIGN(ip, uid, uid);
797 #if defined(QUOTA) || defined(QUOTA2)
798 	if ((error = chkdq(ip, change, cred, 0)) == 0) {
799 		if ((error = chkiq(ip, 1, cred, 0)) == 0)
800 			goto good;
801 		else
802 			(void) chkdq(ip, -change, cred, FORCE);
803 	}
804 	ip->i_gid = ogid;
805 	DIP_ASSIGN(ip, gid, ogid);
806 	ip->i_uid = ouid;
807 	DIP_ASSIGN(ip, uid, ouid);
808 	(void) chkdq(ip, change, cred, FORCE);
809 	(void) chkiq(ip, 1, cred, FORCE);
810 	return (error);
811  good:
812 #endif /* QUOTA || QUOTA2 */
813 	ip->i_flag |= IN_CHANGE;
814 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
815 	cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
816 	return (0);
817 }
818 
819 int
ufs_remove(void * v)820 ufs_remove(void *v)
821 {
822 	struct vop_remove_v3_args /* {
823 		struct vnode		*a_dvp;
824 		struct vnode		*a_vp;
825 		struct componentname	*a_cnp;
826 		nlink_t 		 ctx_vp_new_nlink;
827 	} */ *ap = v;
828 	struct vnode	*vp, *dvp;
829 	struct inode	*ip;
830 	struct mount	*mp;
831 	int		error;
832 	struct ufs_lookup_results *ulr;
833 
834 	vp = ap->a_vp;
835 	dvp = ap->a_dvp;
836 	ip = VTOI(vp);
837 	mp = dvp->v_mount;
838 	KASSERT(mp == vp->v_mount); /* XXX Not stable without lock.  */
839 
840 #ifdef UFS_ACL
841 #ifdef notyet
842 	/* We don't do this because if the filesystem is mounted without ACLs
843 	 * this goes through vfs_unixify_accmode() and we get EPERM.
844 	 */
845 	error = VOP_ACCESSX(vp, VDELETE, ap->a_cnp->cn_cred);
846 	if (error)
847 		goto err;
848 #endif
849 #endif
850 
851 	/* XXX should handle this material another way */
852 	ulr = &VTOI(dvp)->i_crap;
853 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
854 
855 	if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
856 	    (VTOI(dvp)->i_flags & APPEND))
857 		error = EPERM;
858 	else {
859 		error = UFS_WAPBL_BEGIN(mp);
860 		if (error == 0) {
861 			error = ufs_dirremove(dvp, ulr,
862 					      ip, ap->a_cnp->cn_flags, 0);
863 			UFS_WAPBL_END(mp);
864 			if (error == 0) {
865 				ap->ctx_vp_new_nlink = ip->i_nlink;
866 			}
867 		}
868 	}
869 #ifdef notyet
870 err:
871 #endif
872 	if (dvp == vp)
873 		vrele(vp);
874 	else
875 		vput(vp);
876 	return (error);
877 }
878 
879 /*
880  * ufs_link: create hard link.
881  */
882 int
ufs_link(void * v)883 ufs_link(void *v)
884 {
885 	struct vop_link_v2_args /* {
886 		struct vnode *a_dvp;
887 		struct vnode *a_vp;
888 		struct componentname *a_cnp;
889 	} */ *ap = v;
890 	struct vnode *dvp = ap->a_dvp;
891 	struct vnode *vp = ap->a_vp;
892 	struct componentname *cnp = ap->a_cnp;
893 	struct mount *mp = dvp->v_mount;
894 	struct inode *ip;
895 	struct direct *newdir;
896 	int error, abrt = 1;
897 	struct ufs_lookup_results *ulr;
898 
899 	KASSERT(dvp != vp);
900 	KASSERT(vp->v_type != VDIR);
901 	KASSERT(mp == vp->v_mount); /* XXX Not stable without lock.  */
902 
903 	/* XXX should handle this material another way */
904 	ulr = &VTOI(dvp)->i_crap;
905 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
906 
907 	error = vn_lock(vp, LK_EXCLUSIVE);
908 	if (error)
909 		goto out2;
910 
911 	ip = VTOI(vp);
912 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
913 		error = EMLINK;
914 		goto out1;
915 	}
916 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
917 		error = EPERM;
918 		goto out1;
919 	}
920 
921 	error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp,
922 	    dvp, 0);
923 	if (error)
924 		goto out1;
925 
926 	error = UFS_WAPBL_BEGIN(mp);
927 	if (error)
928 		goto out1;
929 
930 	ip->i_nlink++;
931 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
932 	ip->i_flag |= IN_CHANGE;
933 	abrt = 0;
934 	error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
935 	if (!error) {
936 		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
937 		ufs_makedirentry(ip, cnp, newdir);
938 		error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL);
939 		pool_cache_put(ufs_direct_cache, newdir);
940 	}
941 	if (error) {
942 		ip->i_nlink--;
943 		DIP_ASSIGN(ip, nlink, ip->i_nlink);
944 		ip->i_flag |= IN_CHANGE;
945 		UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
946 	}
947 	UFS_WAPBL_END(mp);
948  out1:
949 	VOP_UNLOCK(vp);
950  out2:
951 	if (abrt)
952 		VOP_ABORTOP(dvp, cnp);
953 	return (error);
954 }
955 
956 /*
957  * whiteout vnode call
958  */
959 int
ufs_whiteout(void * v)960 ufs_whiteout(void *v)
961 {
962 	struct vop_whiteout_args /* {
963 		struct vnode		*a_dvp;
964 		struct componentname	*a_cnp;
965 		int			a_flags;
966 	} */ *ap = v;
967 	struct vnode		*dvp = ap->a_dvp;
968 	struct componentname	*cnp = ap->a_cnp;
969 	struct direct		*newdir;
970 	int			error;
971 	struct ufsmount		*ump = VFSTOUFS(dvp->v_mount);
972 	struct ufs_lookup_results *ulr;
973 
974 	/* XXX should handle this material another way */
975 	ulr = &VTOI(dvp)->i_crap;
976 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
977 
978 	error = 0;
979 	switch (ap->a_flags) {
980 	case LOOKUP:
981 		/* 4.4 format directories support whiteout operations */
982 		if (ump->um_maxsymlinklen > 0)
983 			return (0);
984 		return (EOPNOTSUPP);
985 
986 	case CREATE:
987 		/* create a new directory whiteout */
988 		error = UFS_WAPBL_BEGIN(dvp->v_mount);
989 		if (error)
990 			break;
991 
992 		KASSERTMSG((ump->um_maxsymlinklen > 0),
993 		    "ufs_whiteout: old format filesystem");
994 
995 		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
996 		newdir->d_ino = UFS_WINO;
997 		newdir->d_namlen = cnp->cn_namelen;
998 		memcpy(newdir->d_name, cnp->cn_nameptr,
999 		    (size_t)cnp->cn_namelen);
1000 
1001 		/* NUL terminate and zero out padding */
1002 		memset(&newdir->d_name[cnp->cn_namelen], 0,
1003 		    UFS_NAMEPAD(cnp->cn_namelen));
1004 
1005 		newdir->d_type = DT_WHT;
1006 		error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL);
1007 		pool_cache_put(ufs_direct_cache, newdir);
1008 		break;
1009 
1010 	case DELETE:
1011 		/* remove an existing directory whiteout */
1012 		error = UFS_WAPBL_BEGIN(dvp->v_mount);
1013 		if (error)
1014 			break;
1015 
1016 		KASSERTMSG((ump->um_maxsymlinklen > 0),
1017 		    "ufs_whiteout: old format filesystem");
1018 
1019 		cnp->cn_flags &= ~DOWHITEOUT;
1020 		error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0);
1021 		break;
1022 	default:
1023 		panic("ufs_whiteout: unknown op");
1024 		/* NOTREACHED */
1025 	}
1026 	UFS_WAPBL_END(dvp->v_mount);
1027 	return (error);
1028 }
1029 
1030 #ifdef UFS_ACL
1031 static int
ufs_do_posix1e_acl_inheritance_dir(struct vnode * dvp,struct vnode * tvp,mode_t dmode,kauth_cred_t cred,struct lwp * l)1032 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
1033     mode_t dmode, kauth_cred_t cred, struct lwp *l)
1034 {
1035 	int error;
1036 	struct inode *ip = VTOI(tvp);
1037 	struct acl *dacl, *acl;
1038 
1039 	acl = acl_alloc(KM_SLEEP);
1040 	dacl = acl_alloc(KM_SLEEP);
1041 
1042 	/*
1043 	 * Retrieve default ACL from parent, if any.
1044 	 */
1045 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred);
1046 	switch (error) {
1047 	case 0:
1048 		/*
1049 		 * Retrieved a default ACL, so merge mode and ACL if
1050 		 * necessary.  If the ACL is empty, fall through to
1051 		 * the "not defined or available" case.
1052 		 */
1053 		if (acl->acl_cnt != 0) {
1054 			dmode = acl_posix1e_newfilemode(dmode, acl);
1055 			ip->i_mode = dmode;
1056 			DIP_ASSIGN(ip, mode, dmode);
1057 			*dacl = *acl;
1058 			ufs_sync_acl_from_inode(ip, acl);
1059 			break;
1060 		}
1061 		/* FALLTHROUGH */
1062 
1063 	case EOPNOTSUPP:
1064 		/*
1065 		 * Just use the mode as-is.
1066 		 */
1067 		ip->i_mode = dmode;
1068 		DIP_ASSIGN(ip, mode, dmode);
1069 		error = 0;
1070 		goto out;
1071 
1072 	default:
1073 		goto out;
1074 	}
1075 
1076 	/*
1077 	 * XXX: If we abort now, will Soft Updates notify the extattr
1078 	 * code that the EAs for the file need to be released?
1079 	 */
1080 	UFS_WAPBL_END(tvp->v_mount);
1081 	error = ufs_setacl_posix1e(tvp, ACL_TYPE_ACCESS, acl, cred, l);
1082 	if (error == 0)
1083 		error = ufs_setacl_posix1e(tvp, ACL_TYPE_DEFAULT, dacl, cred,
1084 		    l);
1085 	UFS_WAPBL_BEGIN(tvp->v_mount);
1086 	switch (error) {
1087 	case 0:
1088 		break;
1089 
1090 	case EOPNOTSUPP:
1091 		/*
1092 		 * XXX: This should not happen, as EOPNOTSUPP above
1093 		 * was supposed to free acl.
1094 		 */
1095 		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
1096 		/*
1097 		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
1098 		 */
1099 		break;
1100 
1101 	default:
1102 		goto out;
1103 	}
1104 
1105 out:
1106 	acl_free(acl);
1107 	acl_free(dacl);
1108 
1109 	return (error);
1110 }
1111 
1112 static int
ufs_do_posix1e_acl_inheritance_file(struct vnode * dvp,struct vnode * tvp,mode_t mode,kauth_cred_t cred,struct lwp * l)1113 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
1114     mode_t mode, kauth_cred_t cred, struct lwp *l)
1115 {
1116 	int error;
1117 	struct inode *ip = VTOI(tvp);
1118 	struct acl *acl;
1119 
1120 	acl = acl_alloc(KM_SLEEP);
1121 
1122 	/*
1123 	 * Retrieve default ACL for parent, if any.
1124 	 */
1125 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred);
1126 	switch (error) {
1127 	case 0:
1128 		/*
1129 		 * Retrieved a default ACL, so merge mode and ACL if
1130 		 * necessary.
1131 		 */
1132 		if (acl->acl_cnt != 0) {
1133 			/*
1134 			 * Two possible ways for default ACL to not
1135 			 * be present.  First, the EA can be
1136 			 * undefined, or second, the default ACL can
1137 			 * be blank.  If it's blank, fall through to
1138 			 * the it's not defined case.
1139 			 */
1140 			mode = acl_posix1e_newfilemode(mode, acl);
1141 			ip->i_mode = mode;
1142 			DIP_ASSIGN(ip, mode, mode);
1143 			ufs_sync_acl_from_inode(ip, acl);
1144 			break;
1145 		}
1146 		/* FALLTHROUGH */
1147 
1148 	case EOPNOTSUPP:
1149 		/*
1150 		 * Just use the mode as-is.
1151 		 */
1152 		ip->i_mode = mode;
1153 		DIP_ASSIGN(ip, mode, mode);
1154 		error = 0;
1155 		goto out;
1156 
1157 	default:
1158 		goto out;
1159 	}
1160 
1161 	UFS_WAPBL_END(tvp->v_mount);
1162 	/*
1163 	 * XXX: If we abort now, will Soft Updates notify the extattr
1164 	 * code that the EAs for the file need to be released?
1165 	 */
1166 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred);
1167 	UFS_WAPBL_BEGIN(tvp->v_mount);
1168 	switch (error) {
1169 	case 0:
1170 		break;
1171 
1172 	case EOPNOTSUPP:
1173 		/*
1174 		 * XXX: This should not happen, as EOPNOTSUPP above was
1175 		 * supposed to free acl.
1176 		 */
1177 		printf("%s: VOP_GETACL() but no VOP_SETACL()\n", __func__);
1178 		/* panic("%s: VOP_GETACL() but no VOP_SETACL()", __func__); */
1179 		break;
1180 
1181 	default:
1182 		goto out;
1183 	}
1184 
1185 out:
1186 	acl_free(acl);
1187 
1188 	return (error);
1189 }
1190 
1191 static int
ufs_do_nfs4_acl_inheritance(struct vnode * dvp,struct vnode * tvp,mode_t child_mode,kauth_cred_t cred,struct lwp * l)1192 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
1193     mode_t child_mode, kauth_cred_t cred, struct lwp *l)
1194 {
1195 	int error;
1196 	struct acl *parent_aclp, *child_aclp;
1197 
1198 	parent_aclp = acl_alloc(KM_SLEEP);
1199 	child_aclp = acl_alloc(KM_SLEEP);
1200 
1201 	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, l);
1202 	if (error)
1203 		goto out;
1204 	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
1205 	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
1206 	error = ufs_setacl_nfs4_internal(tvp, child_aclp, l, false);
1207 	if (error)
1208 		goto out;
1209 out:
1210 	acl_free(parent_aclp);
1211 	acl_free(child_aclp);
1212 
1213 	return (error);
1214 }
1215 #endif
1216 
1217 int
ufs_mkdir(void * v)1218 ufs_mkdir(void *v)
1219 {
1220 	struct vop_mkdir_v3_args /* {
1221 		struct vnode		*a_dvp;
1222 		struct vnode		**a_vpp;
1223 		struct componentname	*a_cnp;
1224 		struct vattr		*a_vap;
1225 	} */ *ap = v;
1226 	struct vnode		*dvp = ap->a_dvp, *tvp;
1227 	struct vattr		*vap = ap->a_vap;
1228 	struct componentname	*cnp = ap->a_cnp;
1229 	struct inode		*ip, *dp = VTOI(dvp);
1230 	struct buf		*bp;
1231 	struct dirtemplate	dirtemplate;
1232 	struct direct		*newdir;
1233 	int			error;
1234 	struct ufsmount		*ump = dp->i_ump;
1235 	int			dirblksiz = ump->um_dirblksiz;
1236 	struct ufs_lookup_results *ulr;
1237 
1238 	/* XXX should handle this material another way */
1239 	ulr = &dp->i_crap;
1240 	UFS_CHECK_CRAPCOUNTER(dp);
1241 
1242 	KASSERT(vap->va_type == VDIR);
1243 
1244 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
1245 		error = EMLINK;
1246 		goto out;
1247 	}
1248 	/*
1249 	 * Must simulate part of ufs_makeinode here to acquire the inode,
1250 	 * but not have it entered in the parent directory. The entry is
1251 	 * made later after writing "." and ".." entries.
1252 	 */
1253 	error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL,
1254 	    ap->a_vpp);
1255 	if (error)
1256 		goto out;
1257 	error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
1258 	if (error) {
1259 		vrele(*ap->a_vpp);
1260 		*ap->a_vpp = NULL;
1261 		goto out;
1262 	}
1263 	error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
1264 	if (error) {
1265 		vput(*ap->a_vpp);
1266 		goto out;
1267 	}
1268 
1269 	tvp = *ap->a_vpp;
1270 	ip = VTOI(tvp);
1271 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1272 	ip->i_nlink = 2;
1273 	DIP_ASSIGN(ip, nlink, 2);
1274 	if (cnp->cn_flags & ISWHITEOUT) {
1275 		ip->i_flags |= UF_OPAQUE;
1276 		DIP_ASSIGN(ip, flags, ip->i_flags);
1277 	}
1278 
1279 	/*
1280 	 * Bump link count in parent directory to reflect work done below.
1281 	 * Should be done before reference is created so cleanup is
1282 	 * possible if we crash.
1283 	 */
1284 	dp->i_nlink++;
1285 	DIP_ASSIGN(dp, nlink, dp->i_nlink);
1286 	dp->i_flag |= IN_CHANGE;
1287 	if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
1288 		goto bad;
1289 
1290 #ifdef UFS_ACL
1291 	mode_t dmode = (vap->va_mode & 0777) | IFDIR;
1292 	struct lwp *l = curlwp;
1293 	if (dvp->v_mount->mnt_flag & MNT_POSIX1EACLS) {
1294 
1295 		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
1296 		    cnp->cn_cred, l);
1297 		if (error)
1298 			goto bad;
1299 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
1300 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
1301 		    cnp->cn_cred, l);
1302 		if (error)
1303 			goto bad;
1304 	}
1305 #endif /* !UFS_ACL */
1306 
1307 	/*
1308 	 * Initialize directory with "." and ".." from static template.
1309 	 */
1310 	dirtemplate = mastertemplate;
1311 	dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
1312 	dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump));
1313 	dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump));
1314 	dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen,
1315 	    UFS_MPNEEDSWAP(ump));
1316 	dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen,
1317 	    UFS_MPNEEDSWAP(ump));
1318 	if (ump->um_maxsymlinklen <= 0) {
1319 #if BYTE_ORDER == LITTLE_ENDIAN
1320 		if (UFS_MPNEEDSWAP(ump) == 0)
1321 #else
1322 		if (UFS_MPNEEDSWAP(ump) != 0)
1323 #endif
1324 		{
1325 			dirtemplate.dot_type = dirtemplate.dot_namlen;
1326 			dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
1327 			dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
1328 		} else
1329 			dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
1330 	}
1331 	if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
1332 	    B_CLRBUF, &bp)) != 0)
1333 		goto bad;
1334 	ip->i_size = dirblksiz;
1335 	DIP_ASSIGN(ip, size, dirblksiz);
1336 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1337 	uvm_vnp_setsize(tvp, ip->i_size);
1338 	memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
1339 
1340 	/*
1341 	 * Directory set up, now install its entry in the parent directory.
1342 	 * We must write out the buffer containing the new directory body
1343 	 * before entering the new name in the parent.
1344 	 */
1345 	if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
1346 		goto bad;
1347 	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
1348 		goto bad;
1349 	}
1350 	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
1351 	ufs_makedirentry(ip, cnp, newdir);
1352 	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
1353 	pool_cache_put(ufs_direct_cache, newdir);
1354  bad:
1355 	if (error == 0) {
1356 		VOP_UNLOCK(tvp);
1357 		UFS_WAPBL_END(dvp->v_mount);
1358 	} else {
1359 		dp->i_nlink--;
1360 		DIP_ASSIGN(dp, nlink, dp->i_nlink);
1361 		dp->i_flag |= IN_CHANGE;
1362 		UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
1363 		/*
1364 		 * No need to do an explicit UFS_TRUNCATE here, vrele will
1365 		 * do this for us because we set the link count to 0.
1366 		 */
1367 		ip->i_nlink = 0;
1368 		DIP_ASSIGN(ip, nlink, 0);
1369 		ip->i_flag |= IN_CHANGE;
1370 		UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
1371 		UFS_WAPBL_END(dvp->v_mount);
1372 		vput(tvp);
1373 	}
1374  out:
1375 	return (error);
1376 }
1377 
1378 int
ufs_rmdir(void * v)1379 ufs_rmdir(void *v)
1380 {
1381 	struct vop_rmdir_v2_args /* {
1382 		struct vnode		*a_dvp;
1383 		struct vnode		*a_vp;
1384 		struct componentname	*a_cnp;
1385 	} */ *ap = v;
1386 	struct vnode		*vp, *dvp;
1387 	struct componentname	*cnp;
1388 	struct inode		*ip, *dp;
1389 	int			error;
1390 	struct ufs_lookup_results *ulr;
1391 
1392 	vp = ap->a_vp;
1393 	dvp = ap->a_dvp;
1394 	cnp = ap->a_cnp;
1395 	ip = VTOI(vp);
1396 	dp = VTOI(dvp);
1397 
1398 #ifdef UFS_ACL
1399 #ifdef notyet
1400 	/* We don't do this because if the filesystem is mounted without ACLs
1401 	 * this goes through vfs_unixify_accmode() and we get EPERM.
1402 	 */
1403 	error = VOP_ACCESSX(vp, VDELETE, cnp->cn_cred);
1404 	if (error)
1405 		goto err;
1406 #endif
1407 #endif
1408 
1409 	/* XXX should handle this material another way */
1410 	ulr = &dp->i_crap;
1411 	UFS_CHECK_CRAPCOUNTER(dp);
1412 
1413 	/*
1414 	 * No rmdir "." or of mounted directories please.
1415 	 */
1416 	if (dp == ip || vp->v_mountedhere != NULL) {
1417 		error = EINVAL;
1418 		goto err;
1419 	}
1420 
1421 	/*
1422 	 * Do not remove a directory that is in the process of being renamed.
1423 	 * Verify that the directory is empty (and valid). (Rmdir ".." won't
1424 	 * be valid since ".." will contain a reference to the current
1425 	 * directory and thus be non-empty.)
1426 	 */
1427 	error = 0;
1428 	if (ip->i_nlink != 2 ||
1429 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1430 		error = ENOTEMPTY;
1431 		goto out;
1432 	}
1433 	if ((dp->i_flags & APPEND) ||
1434 		(ip->i_flags & (IMMUTABLE | APPEND))) {
1435 		error = EPERM;
1436 		goto out;
1437 	}
1438 	error = UFS_WAPBL_BEGIN(dvp->v_mount);
1439 	if (error)
1440 		goto out;
1441 	/*
1442 	 * Delete reference to directory before purging
1443 	 * inode.  If we crash in between, the directory
1444 	 * will be reattached to lost+found,
1445 	 */
1446 	error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1);
1447 	if (error) {
1448 		UFS_WAPBL_END(dvp->v_mount);
1449 		goto out;
1450 	}
1451 	cache_purge(dvp);
1452 	/*
1453 	 * Truncate inode.  The only stuff left in the directory is "." and
1454 	 * "..".  The "." reference is inconsequential since we're quashing
1455 	 * it.
1456 	 */
1457 	dp->i_nlink--;
1458 	DIP_ASSIGN(dp, nlink, dp->i_nlink);
1459 	dp->i_flag |= IN_CHANGE;
1460 	UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
1461 	ip->i_nlink--;
1462 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
1463 	ip->i_flag |= IN_CHANGE;
1464 	(void) UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
1465 	cache_purge(vp);
1466 	/*
1467 	 * Unlock the log while we still have reference to unlinked
1468 	 * directory vp so that it will not get locked for recycling
1469 	 */
1470 	UFS_WAPBL_END(dvp->v_mount);
1471 #ifdef UFS_DIRHASH
1472 	if (ip->i_dirhash != NULL)
1473 		ufsdirhash_free(ip);
1474 #endif
1475  out:
1476 	vput(vp);
1477 	return error;
1478  err:
1479 	if (dp == ip)
1480 		vrele(vp);
1481 	else
1482 		vput(vp);
1483 	return error;
1484 }
1485 
1486 /*
1487  * symlink -- make a symbolic link
1488  */
1489 int
ufs_symlink(void * v)1490 ufs_symlink(void *v)
1491 {
1492 	struct vop_symlink_v3_args /* {
1493 		struct vnode		*a_dvp;
1494 		struct vnode		**a_vpp;
1495 		struct componentname	*a_cnp;
1496 		struct vattr		*a_vap;
1497 		char			*a_target;
1498 	} */ *ap = v;
1499 	struct vnode	*vp, **vpp;
1500 	struct inode	*ip;
1501 	int		len, error;
1502 	struct ufs_lookup_results *ulr;
1503 
1504 	vpp = ap->a_vpp;
1505 
1506 	/* XXX should handle this material another way */
1507 	ulr = &VTOI(ap->a_dvp)->i_crap;
1508 	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
1509 
1510 	/*
1511 	 * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
1512 	 * ufs_makeinode
1513 	 */
1514 	KASSERT(ap->a_vap->va_type == VLNK);
1515 	error = ufs_makeinode(ap->a_vap, ap->a_dvp, ulr, vpp, ap->a_cnp);
1516 	if (error)
1517 		goto out;
1518 	vp = *vpp;
1519 	len = strlen(ap->a_target);
1520 	ip = VTOI(vp);
1521 	/*
1522 	 * This test is off by one. um_maxsymlinklen contains the
1523 	 * number of bytes available, and we aren't storing a \0, so
1524 	 * the test should properly be <=. However, it cannot be
1525 	 * changed as this would break compatibility with existing fs
1526 	 * images -- see the way ufs_readlink() works.
1527 	 */
1528 	if (len < ip->i_ump->um_maxsymlinklen) {
1529 		memcpy((char *)SHORTLINK(ip), ap->a_target, len);
1530 		ip->i_size = len;
1531 		DIP_ASSIGN(ip, size, len);
1532 		uvm_vnp_setsize(vp, ip->i_size);
1533 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1534 		if (vp->v_mount->mnt_flag & MNT_RELATIME)
1535 			ip->i_flag |= IN_ACCESS;
1536 		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
1537 	} else
1538 		error = ufs_bufio(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1539 		    IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
1540 		    NULL);
1541 	UFS_WAPBL_END(ap->a_dvp->v_mount);
1542 	VOP_UNLOCK(vp);
1543 	if (error)
1544 		vrele(vp);
1545 out:
1546 	return (error);
1547 }
1548 
1549 /*
1550  * Vnode op for reading directories.
1551  *
1552  * This routine handles converting from the on-disk directory format
1553  * "struct direct" to the in-memory format "struct dirent" as well as
1554  * byte swapping the entries if necessary.
1555  */
1556 int
ufs_readdir(void * v)1557 ufs_readdir(void *v)
1558 {
1559 	struct vop_readdir_args /* {
1560 		struct vnode	*a_vp;
1561 		struct uio	*a_uio;
1562 		kauth_cred_t	a_cred;
1563 		int		*a_eofflag;
1564 		off_t		**a_cookies;
1565 		int		*a_ncookies;
1566 	} */ *ap = v;
1567 
1568 	/* vnode and fs */
1569 	struct vnode	*vp = ap->a_vp;
1570 	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
1571 	int nswap = UFS_MPNEEDSWAP(ump);
1572 #if BYTE_ORDER == LITTLE_ENDIAN
1573 	int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0;
1574 #else
1575 	int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0;
1576 #endif
1577 	/* caller's buffer */
1578 	struct uio	*calleruio = ap->a_uio;
1579 	off_t		startoffset, endoffset;
1580 	size_t		callerbytes;
1581 	off_t		curoffset;
1582 	/* dirent production buffer */
1583 	char		*direntbuf;
1584 	size_t		direntbufmax;
1585 	struct dirent	*dirent, *stopdirent;
1586 	/* output cookies array */
1587 	off_t		*cookies;
1588 	size_t		numcookies, maxcookies;
1589 	/* disk buffer */
1590 	off_t		physstart, physend;
1591 	size_t		skipstart, dropend;
1592 	char		*rawbuf;
1593 	size_t		rawbufmax, rawbytes;
1594 	struct uio	rawuio;
1595 	struct iovec	rawiov;
1596 	struct direct	*rawdp, *stoprawdp;
1597 	/* general */
1598 	int		error;
1599 
1600 	KASSERT(VOP_ISLOCKED(vp));
1601 
1602 	/*
1603 	 * Figure out where the user wants us to read and how much.
1604 	 *
1605 	 * XXX: there should probably be an upper bound on callerbytes
1606 	 * to avoid silliness trying to do large kernel allocations.
1607 	 */
1608 	callerbytes = calleruio->uio_resid;
1609 	startoffset = calleruio->uio_offset;
1610 	endoffset = startoffset + callerbytes;
1611 
1612 	if (callerbytes < _DIRENT_MINSIZE(dirent)) {
1613 		/* no room for even one struct dirent */
1614 		return EINVAL;
1615 	}
1616 
1617 	/*
1618 	 * Now figure out where to actually start reading. Round the
1619 	 * start down to a block boundary: we need to start at the
1620 	 * beginning of a block in order to read the directory
1621 	 * correctly.
1622 	 *
1623 	 * We also want to always read a whole number of blocks so
1624 	 * that the copying code below doesn't have to worry about
1625 	 * partial entries. (It used to try at one point, and was a
1626 	 * horrible mess.)
1627 	 *
1628 	 * Furthermore, since blocks have to be scanned from the
1629 	 * beginning, if we go partially into another block now we'll
1630 	 * just have to rescan it on the next readdir call, which
1631 	 * doesn't really serve any useful purpose.
1632 	 *
1633 	 * So, round down the end as well. It's ok to underpopulate
1634 	 * the transfer buffer, as long as we send back at least one
1635 	 * dirent so as to avoid giving a bogus EOF indication.
1636 	 *
1637 	 * Note that because dirents are larger than ffs struct
1638 	 * directs, despite the rounding down we may not be able to
1639 	 * send all the entries in the blocks we read and may have to
1640 	 * rescan some of them on the next call anyway. Alternatively
1641 	 * if there's empty space on disk we might have actually been
1642 	 * able to fit the next block in, and so forth. None of this
1643 	 * actually matters that much in practice.
1644 	 *
1645 	 * XXX: what does ffs do if a directory block becomes
1646 	 * completely empty, and what happens if all the blocks we
1647 	 * read are completely empty even though we aren't at EOF? As
1648 	 * of this writing I (dholland) can't remember the details.
1649 	 */
1650 	physstart = rounddown2(startoffset, ump->um_dirblksiz);
1651 	physend = rounddown2(endoffset, ump->um_dirblksiz);
1652 
1653 	if (physstart >= physend) {
1654 		/* Need at least one block */
1655 		return EINVAL;
1656 	}
1657 
1658 	/*
1659 	 * skipstart is the number of bytes we need to read in
1660 	 * (because we need to start at the beginning of a block) but
1661 	 * not transfer to the user.
1662 	 *
1663 	 * dropend is the number of bytes to ignore at the end of the
1664 	 * user's buffer.
1665 	 */
1666 	skipstart = startoffset - physstart;
1667 	dropend = endoffset - physend;
1668 
1669 	/*
1670 	 * Make a transfer buffer.
1671 	 *
1672 	 * Note: rawbufmax = physend - physstart. Proof:
1673 	 *
1674 	 * physend - physstart = physend - physstart
1675 	 *   = physend - physstart + startoffset - startoffset
1676 	 *   = physend + (startoffset - physstart) - startoffset
1677 	 *   = physend + skipstart - startoffset
1678 	 *   = physend + skipstart - startoffset + endoffset - endoffset
1679 	 *   = skipstart - startoffset + endoffset - (endoffset - physend)
1680 	 *   = skipstart - startoffset + endoffset - dropend
1681 	 *   = skipstart - startoffset + (startoffset + callerbytes) - dropend
1682 	 *   = skipstart + callerbytes - dropend
1683 	 *   = rawbufmax
1684 	 * Qed.
1685 	 *
1686 	 * XXX: this should just use physend - physstart.
1687 	 *
1688 	 * XXX: this should be rewritten to read the directs straight
1689 	 * out of bufferio buffers instead of copying twice. This would
1690 	 * also let us adapt better to the user's buffer size.
1691 	 */
1692 
1693 	/* Base buffer space for CALLERBYTES of new data */
1694 	rawbufmax = callerbytes + skipstart;
1695 	if (rawbufmax < callerbytes)
1696 		return EINVAL;
1697 	rawbufmax -= dropend;
1698 
1699 	if (rawbufmax < _DIRENT_MINSIZE(rawdp)) {
1700 		/* no room for even one struct direct */
1701 		return EINVAL;
1702 	}
1703 
1704 	/* read it */
1705 	rawbuf = kmem_alloc(rawbufmax, KM_SLEEP);
1706 	rawiov.iov_base = rawbuf;
1707 	rawiov.iov_len = rawbufmax;
1708 	rawuio.uio_iov = &rawiov;
1709 	rawuio.uio_iovcnt = 1;
1710 	rawuio.uio_offset = physstart;
1711 	rawuio.uio_resid = rawbufmax;
1712 	UIO_SETUP_SYSSPACE(&rawuio);
1713 	rawuio.uio_rw = UIO_READ;
1714 	error = UFS_BUFRD(vp, &rawuio, 0, ap->a_cred);
1715 	if (error != 0) {
1716 		kmem_free(rawbuf, rawbufmax);
1717 		return error;
1718 	}
1719 	rawbytes = rawbufmax - rawuio.uio_resid;
1720 
1721 	/* the raw entries to iterate over */
1722 	rawdp = (struct direct *)(void *)rawbuf;
1723 	stoprawdp = (struct direct *)(void *)&rawbuf[rawbytes];
1724 
1725 	/* allocate space to produce dirents into */
1726 	direntbufmax = callerbytes;
1727 	direntbuf = kmem_alloc(direntbufmax, KM_SLEEP);
1728 
1729 	/* the dirents to iterate over */
1730 	dirent = (struct dirent *)(void *)direntbuf;
1731 	stopdirent = (struct dirent *)(void *)&direntbuf[direntbufmax];
1732 
1733 	/* the output "cookies" (seek positions of directory entries) */
1734 	if (ap->a_cookies) {
1735 		numcookies = 0;
1736 		maxcookies = rawbytes / _DIRENT_RECLEN(rawdp, 1);
1737 		cookies = malloc(maxcookies * sizeof(*cookies),
1738 		    M_TEMP, M_WAITOK);
1739 	} else {
1740 		/* XXX: GCC */
1741 		maxcookies = 0;
1742 		cookies = NULL;
1743 	}
1744 
1745 	/* now produce the dirents */
1746 	curoffset = calleruio->uio_offset;
1747 	while (rawdp < stoprawdp) {
1748 		rawdp->d_reclen = ufs_rw16(rawdp->d_reclen, nswap);
1749 		if (skipstart > 0) {
1750 			/* drain skipstart */
1751 			if (rawdp->d_reclen <= skipstart) {
1752 				skipstart -= rawdp->d_reclen;
1753 				rawdp = _DIRENT_NEXT(rawdp);
1754 				continue;
1755 			}
1756 			/* caller's start position wasn't on an entry */
1757 			error = EINVAL;
1758 			goto out;
1759 		}
1760 		if (rawdp->d_reclen == 0) {
1761 			struct dirent *save = dirent;
1762 			dirent->d_reclen = _DIRENT_MINSIZE(dirent);
1763 			dirent = _DIRENT_NEXT(dirent);
1764 			save->d_reclen = 0;
1765 			rawdp = stoprawdp;
1766 			break;
1767 		}
1768 
1769 		/* copy the header */
1770 		if (needswap) {
1771 			dirent->d_type = rawdp->d_namlen;
1772 			dirent->d_namlen = rawdp->d_type;
1773 		} else {
1774 			dirent->d_type = rawdp->d_type;
1775 			dirent->d_namlen = rawdp->d_namlen;
1776 		}
1777 		dirent->d_reclen = _DIRENT_RECLEN(dirent, dirent->d_namlen);
1778 
1779 		/* stop if there isn't room for the name AND another header */
1780 		if ((char *)(void *)dirent + dirent->d_reclen +
1781 		    _DIRENT_MINSIZE(dirent) > (char *)(void *)stopdirent)
1782 			break;
1783 
1784 		/* copy the name (and inode (XXX: why after the test?)) */
1785 		dirent->d_fileno = ufs_rw32(rawdp->d_ino, nswap);
1786 		(void)memcpy(dirent->d_name, rawdp->d_name, dirent->d_namlen);
1787 		memset(&dirent->d_name[dirent->d_namlen], 0,
1788 		    dirent->d_reclen - _DIRENT_NAMEOFF(dirent)
1789 		    - dirent->d_namlen);
1790 
1791 		/* onward */
1792 		curoffset += rawdp->d_reclen;
1793 		if (ap->a_cookies) {
1794 			KASSERT(numcookies < maxcookies);
1795 			cookies[numcookies++] = curoffset;
1796 		}
1797 		dirent = _DIRENT_NEXT(dirent);
1798 		rawdp = _DIRENT_NEXT(rawdp);
1799 	}
1800 
1801 	/* transfer the dirents to the caller's buffer */
1802 	callerbytes = ((char *)(void *)dirent - direntbuf);
1803 	error = uiomove(direntbuf, callerbytes, calleruio);
1804 
1805 out:
1806 	calleruio->uio_offset = curoffset;
1807 	if (ap->a_cookies) {
1808 		if (error) {
1809 			free(cookies, M_TEMP);
1810 			*ap->a_cookies = NULL;
1811 			*ap->a_ncookies = 0;
1812 		} else {
1813 			*ap->a_cookies = cookies;
1814 			*ap->a_ncookies = numcookies;
1815 		}
1816 	}
1817 	kmem_free(direntbuf, direntbufmax);
1818 	kmem_free(rawbuf, rawbufmax);
1819 	*ap->a_eofflag = VTOI(vp)->i_size <= calleruio->uio_offset;
1820 	return error;
1821 }
1822 
1823 /*
1824  * Return target name of a symbolic link
1825  */
1826 int
ufs_readlink(void * v)1827 ufs_readlink(void *v)
1828 {
1829 	struct vop_readlink_args /* {
1830 		struct vnode	*a_vp;
1831 		struct uio	*a_uio;
1832 		kauth_cred_t	a_cred;
1833 	} */ *ap = v;
1834 	struct vnode	*vp = ap->a_vp;
1835 	struct inode	*ip = VTOI(vp);
1836 	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
1837 	int		isize;
1838 
1839 	/*
1840 	 * The test against um_maxsymlinklen is off by one; it should
1841 	 * theoretically be <=, not <. However, it cannot be changed
1842 	 * as that would break compatibility with existing fs images.
1843 	 */
1844 
1845 	isize = ip->i_size;
1846 	if (isize < ump->um_maxsymlinklen ||
1847 	    (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) {
1848 		uiomove((char *)SHORTLINK(ip), isize, ap->a_uio);
1849 		return (0);
1850 	}
1851 	return (UFS_BUFRD(vp, ap->a_uio, 0, ap->a_cred));
1852 }
1853 
1854 /*
1855  * Calculate the logical to physical mapping if not done already,
1856  * then call the device strategy routine.
1857  */
1858 int
ufs_strategy(void * v)1859 ufs_strategy(void *v)
1860 {
1861 	struct vop_strategy_args /* {
1862 		struct vnode *a_vp;
1863 		struct buf *a_bp;
1864 	} */ *ap = v;
1865 	struct buf	*bp;
1866 	struct vnode	*vp;
1867 	struct inode	*ip;
1868 	struct mount	*mp;
1869 	int		error;
1870 
1871 	bp = ap->a_bp;
1872 	vp = ap->a_vp;
1873 	ip = VTOI(vp);
1874 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1875 		panic("ufs_strategy: spec");
1876 	KASSERT(fstrans_held(vp->v_mount));
1877 	KASSERT(bp->b_bcount != 0);
1878 	if (bp->b_blkno == bp->b_lblkno) {
1879 		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
1880 				 NULL);
1881 		if (error) {
1882 			bp->b_error = error;
1883 			biodone(bp);
1884 			return (error);
1885 		}
1886 		if (bp->b_blkno == -1) /* no valid data */
1887 			clrbuf(bp);
1888 	}
1889 	if (bp->b_blkno < 0) { /* block is not on disk */
1890 		biodone(bp);
1891 		return (0);
1892 	}
1893 	vp = ip->i_devvp;
1894 
1895 	error = VOP_STRATEGY(vp, bp);
1896 	if (error)
1897 		return error;
1898 
1899 	if (!BUF_ISREAD(bp))
1900 		return 0;
1901 
1902 	mp = wapbl_vptomp(vp);
1903 	if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
1904 	    !WAPBL_REPLAY_ISOPEN(mp) ||
1905 	    !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
1906 		return 0;
1907 
1908 	error = biowait(bp);
1909 	if (error)
1910 		return error;
1911 
1912 	error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
1913 	if (error) {
1914 		mutex_enter(&bufcache_lock);
1915 		SET(bp->b_cflags, BC_INVAL);
1916 		mutex_exit(&bufcache_lock);
1917 	}
1918 	return error;
1919 }
1920 
1921 /*
1922  * Print out the contents of an inode.
1923  */
1924 int
ufs_print(void * v)1925 ufs_print(void *v)
1926 {
1927 	struct vop_print_args /* {
1928 		struct vnode	*a_vp;
1929 	} */ *ap = v;
1930 	struct vnode	*vp;
1931 	struct inode	*ip;
1932 
1933 	vp = ap->a_vp;
1934 	ip = VTOI(vp);
1935 	printf("tag VT_UFS, ino %llu, on dev %llu, %llu",
1936 	    (unsigned long long)ip->i_number,
1937 	    (unsigned long long)major(ip->i_dev),
1938 	    (unsigned long long)minor(ip->i_dev));
1939 	printf(" flags 0x%x, nlink %d\n",
1940 	    ip->i_flag, ip->i_nlink);
1941 	printf("\tmode 0%o, owner %d, group %d, size %qd",
1942 	    ip->i_mode, ip->i_uid, ip->i_gid,
1943 	    (long long)ip->i_size);
1944 	if (vp->v_type == VFIFO)
1945 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1946 	printf("\n");
1947 	return (0);
1948 }
1949 
1950 /*
1951  * Read wrapper for special devices.
1952  */
1953 int
ufsspec_read(void * v)1954 ufsspec_read(void *v)
1955 {
1956 	struct vop_read_args /* {
1957 		struct vnode	*a_vp;
1958 		struct uio	*a_uio;
1959 		int		a_ioflag;
1960 		kauth_cred_t	a_cred;
1961 	} */ *ap = v;
1962 
1963 	/*
1964 	 * Set access flag.
1965 	 */
1966 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
1967 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1968 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
1969 }
1970 
1971 /*
1972  * Write wrapper for special devices.
1973  */
1974 int
ufsspec_write(void * v)1975 ufsspec_write(void *v)
1976 {
1977 	struct vop_write_args /* {
1978 		struct vnode	*a_vp;
1979 		struct uio	*a_uio;
1980 		int		a_ioflag;
1981 		kauth_cred_t	a_cred;
1982 	} */ *ap = v;
1983 
1984 	/*
1985 	 * Set update and change flags.
1986 	 */
1987 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
1988 		VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
1989 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
1990 }
1991 
1992 /*
1993  * Close wrapper for special devices.
1994  *
1995  * Update the times on the inode then do device close.
1996  */
1997 int
ufsspec_close(void * v)1998 ufsspec_close(void *v)
1999 {
2000 	struct vop_close_args /* {
2001 		struct vnode	*a_vp;
2002 		int		a_fflag;
2003 		kauth_cred_t	a_cred;
2004 	} */ *ap = v;
2005 	struct vnode	*vp;
2006 
2007 	vp = ap->a_vp;
2008 	if (vrefcnt(vp) > 1)
2009 		UFS_ITIMES(vp, NULL, NULL, NULL);
2010 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
2011 }
2012 
2013 /*
2014  * Read wrapper for fifo's
2015  */
2016 int
ufsfifo_read(void * v)2017 ufsfifo_read(void *v)
2018 {
2019 	struct vop_read_args /* {
2020 		struct vnode	*a_vp;
2021 		struct uio	*a_uio;
2022 		int		a_ioflag;
2023 		kauth_cred_t	a_cred;
2024 	} */ *ap = v;
2025 
2026 	/*
2027 	 * Set access flag.
2028 	 */
2029 	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2030 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
2031 }
2032 
2033 /*
2034  * Write wrapper for fifo's.
2035  */
2036 int
ufsfifo_write(void * v)2037 ufsfifo_write(void *v)
2038 {
2039 	struct vop_write_args /* {
2040 		struct vnode	*a_vp;
2041 		struct uio	*a_uio;
2042 		int		a_ioflag;
2043 		kauth_cred_t	a_cred;
2044 	} */ *ap = v;
2045 
2046 	/*
2047 	 * Set update and change flags.
2048 	 */
2049 	VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
2050 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
2051 }
2052 
2053 /*
2054  * Close wrapper for fifo's.
2055  *
2056  * Update the times on the inode then do device close.
2057  */
2058 int
ufsfifo_close(void * v)2059 ufsfifo_close(void *v)
2060 {
2061 	struct vop_close_args /* {
2062 		struct vnode	*a_vp;
2063 		int		a_fflag;
2064 		kauth_cred_t	a_cred;
2065 	} */ *ap = v;
2066 	struct vnode	*vp;
2067 
2068 	vp = ap->a_vp;
2069 	if (vrefcnt(ap->a_vp) > 1)
2070 		UFS_ITIMES(vp, NULL, NULL, NULL);
2071 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
2072 }
2073 
2074 /*
2075  * Return POSIX pathconf information applicable to ufs filesystems.
2076  */
2077 int
ufs_pathconf(void * v)2078 ufs_pathconf(void *v)
2079 {
2080 	struct vop_pathconf_args /* {
2081 		struct vnode	*a_vp;
2082 		int		a_name;
2083 		register_t	*a_retval;
2084 	} */ *ap = v;
2085 
2086 	switch (ap->a_name) {
2087 	case _PC_LINK_MAX:
2088 		*ap->a_retval = LINK_MAX;
2089 		return (0);
2090 	case _PC_NAME_MAX:
2091 		*ap->a_retval = FFS_MAXNAMLEN;
2092 		return (0);
2093 	case _PC_PATH_MAX:
2094 		*ap->a_retval = PATH_MAX;
2095 		return (0);
2096 	case _PC_PIPE_BUF:
2097 		*ap->a_retval = PIPE_BUF;
2098 		return (0);
2099 	case _PC_CHOWN_RESTRICTED:
2100 		*ap->a_retval = 1;
2101 		return (0);
2102 	case _PC_NO_TRUNC:
2103 		*ap->a_retval = 1;
2104 		return (0);
2105 #ifdef UFS_ACL
2106 	case _PC_ACL_EXTENDED:
2107 		if (ap->a_vp->v_mount->mnt_flag & MNT_POSIX1EACLS)
2108 			*ap->a_retval = 1;
2109 		else
2110 			*ap->a_retval = 0;
2111 		return 0;
2112 	case _PC_ACL_NFS4:
2113 		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
2114 			*ap->a_retval = 1;
2115 		else
2116 			*ap->a_retval = 0;
2117 		return 0;
2118 #endif
2119 	case _PC_ACL_PATH_MAX:
2120 #ifdef UFS_ACL
2121 		if (ap->a_vp->v_mount->mnt_flag & (MNT_POSIX1EACLS | MNT_NFS4ACLS))
2122 			*ap->a_retval = ACL_MAX_ENTRIES;
2123 		else
2124 			*ap->a_retval = 3;
2125 #else
2126 		*ap->a_retval = 3;
2127 #endif
2128 		return 0;
2129 	case _PC_SYNC_IO:
2130 		*ap->a_retval = 1;
2131 		return (0);
2132 	case _PC_FILESIZEBITS:
2133 		*ap->a_retval = 42;
2134 		return (0);
2135 	case _PC_SYMLINK_MAX:
2136 		*ap->a_retval = MAXPATHLEN;
2137 		return (0);
2138 	case _PC_2_SYMLINKS:
2139 		*ap->a_retval = 1;
2140 		return (0);
2141 	default:
2142 		return (EINVAL);
2143 	}
2144 	/* NOTREACHED */
2145 }
2146 
2147 /*
2148  * Advisory record locking support
2149  */
2150 int
ufs_advlock(void * v)2151 ufs_advlock(void *v)
2152 {
2153 	struct vop_advlock_args /* {
2154 		struct vnode	*a_vp;
2155 		void *		a_id;
2156 		int		a_op;
2157 		struct flock	*a_fl;
2158 		int		a_flags;
2159 	} */ *ap = v;
2160 	struct inode *ip;
2161 
2162 	ip = VTOI(ap->a_vp);
2163 	return lf_advlock(ap, &ip->i_lockf, ip->i_size);
2164 }
2165 
2166 /*
2167  * Initialize the vnode associated with a new inode, handle aliased
2168  * vnodes.
2169  */
2170 void
ufs_vinit(struct mount * mntp,int (** specops)(void *),int (** fifoops)(void *),struct vnode ** vpp)2171 ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *),
2172 	struct vnode **vpp)
2173 {
2174 	struct timeval	tv;
2175 	struct inode	*ip;
2176 	struct vnode	*vp;
2177 	dev_t		rdev;
2178 	struct ufsmount	*ump;
2179 
2180 	vp = *vpp;
2181 	ip = VTOI(vp);
2182 	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2183 	case VCHR:
2184 	case VBLK:
2185 		vp->v_op = specops;
2186 		ump = ip->i_ump;
2187 		if (ump->um_fstype == UFS1)
2188 			rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
2189 			    UFS_MPNEEDSWAP(ump));
2190 		else
2191 			rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
2192 			    UFS_MPNEEDSWAP(ump));
2193 		spec_node_init(vp, rdev);
2194 		break;
2195 	case VFIFO:
2196 		vp->v_op = fifoops;
2197 		break;
2198 	case VNON:
2199 	case VBAD:
2200 	case VSOCK:
2201 	case VLNK:
2202 	case VDIR:
2203 	case VREG:
2204 		break;
2205 	}
2206 	if (ip->i_number == UFS_ROOTINO)
2207                 vp->v_vflag |= VV_ROOT;
2208 	/*
2209 	 * Initialize modrev times
2210 	 */
2211 	getmicrouptime(&tv);
2212 	ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32
2213 			| tv.tv_usec * 4294u;
2214 	*vpp = vp;
2215 }
2216 
2217 /*
2218  * Allocate a new inode.
2219  */
2220 static int
ufs_makeinode(struct vattr * vap,struct vnode * dvp,const struct ufs_lookup_results * ulr,struct vnode ** vpp,struct componentname * cnp)2221 ufs_makeinode(struct vattr *vap, struct vnode *dvp,
2222 	const struct ufs_lookup_results *ulr,
2223 	struct vnode **vpp, struct componentname *cnp)
2224 {
2225 	struct inode	*ip;
2226 	struct direct	*newdir;
2227 	struct vnode	*tvp;
2228 	int		error;
2229 
2230 	UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);
2231 
2232 	error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, &tvp);
2233 	if (error)
2234 		return error;
2235 	error = vn_lock(tvp, LK_EXCLUSIVE);
2236 	if (error) {
2237 		vrele(tvp);
2238 		return error;
2239 	}
2240 	*vpp = tvp;
2241 	ip = VTOI(tvp);
2242 	error = UFS_WAPBL_BEGIN(dvp->v_mount);
2243 	if (error) {
2244 		vput(tvp);
2245 		return (error);
2246 	}
2247 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2248 	ip->i_nlink = 1;
2249 	DIP_ASSIGN(ip, nlink, 1);
2250 
2251 	/* Authorize setting SGID if needed. */
2252 	if (ip->i_mode & ISGID) {
2253 		error = kauth_authorize_vnode(cnp->cn_cred,
2254 		    KAUTH_VNODE_WRITE_SECURITY,
2255 		    tvp, NULL, genfs_can_chmod(tvp, cnp->cn_cred, ip->i_uid,
2256 		    ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode)));
2257 		if (error) {
2258 			ip->i_mode &= ~ISGID;
2259 			DIP_ASSIGN(ip, mode, ip->i_mode);
2260 		}
2261 	}
2262 
2263 	if (cnp->cn_flags & ISWHITEOUT) {
2264 		ip->i_flags |= UF_OPAQUE;
2265 		DIP_ASSIGN(ip, flags, ip->i_flags);
2266 	}
2267 
2268 	/*
2269 	 * Make sure inode goes to disk before directory entry.
2270 	 */
2271 	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
2272 		goto bad;
2273 #ifdef UFS_ACL
2274 	struct lwp *l = curlwp;
2275 	if (dvp->v_mount->mnt_flag & MNT_POSIX1EACLS) {
2276 		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp,
2277 		    ip->i_mode, cnp->cn_cred, l);
2278 		if (error)
2279 			goto bad;
2280 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
2281 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, ip->i_mode,
2282 		    cnp->cn_cred, l);
2283 		if (error)
2284 			goto bad;
2285 	}
2286 #endif /* !UFS_ACL */
2287 	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
2288 	ufs_makedirentry(ip, cnp, newdir);
2289 	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL);
2290 	pool_cache_put(ufs_direct_cache, newdir);
2291 	if (error)
2292 		goto bad;
2293 	*vpp = tvp;
2294 	cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags);
2295 	return (0);
2296 
2297  bad:
2298 	/*
2299 	 * Write error occurred trying to update the inode
2300 	 * or the directory so must deallocate the inode.
2301 	 */
2302 	ip->i_nlink = 0;
2303 	DIP_ASSIGN(ip, nlink, 0);
2304 	ip->i_flag |= IN_CHANGE;
2305 	UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
2306 	UFS_WAPBL_END(dvp->v_mount);
2307 	vput(tvp);
2308 	return (error);
2309 }
2310 
2311 /*
2312  * Allocate len bytes at offset off.
2313  */
2314 int
ufs_gop_alloc(struct vnode * vp,off_t off,off_t len,int flags,kauth_cred_t cred)2315 ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
2316     kauth_cred_t cred)
2317 {
2318         struct inode *ip = VTOI(vp);
2319         int error, delta, bshift, bsize;
2320         UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist);
2321 
2322         error = 0;
2323         bshift = vp->v_mount->mnt_fs_bshift;
2324         bsize = 1 << bshift;
2325 
2326         delta = off & (bsize - 1);
2327         off -= delta;
2328         len += delta;
2329 
2330         while (len > 0) {
2331                 bsize = MIN(bsize, len);
2332 
2333                 error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL);
2334                 if (error) {
2335                         goto out;
2336                 }
2337 
2338                 /*
2339                  * increase file size now, UFS_BALLOC() requires that
2340                  * EOF be up-to-date before each call.
2341                  */
2342 
2343                 if (ip->i_size < off + bsize) {
2344                         UVMHIST_LOG(ubchist, "vp %#jx old 0x%jx new 0x%x",
2345                             (uintptr_t)vp, ip->i_size, off + bsize, 0);
2346                         ip->i_size = off + bsize;
2347 			DIP_ASSIGN(ip, size, ip->i_size);
2348                 }
2349 
2350                 off += bsize;
2351                 len -= bsize;
2352         }
2353 
2354 out:
2355 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
2356 	return error;
2357 }
2358 
2359 void
ufs_gop_markupdate(struct vnode * vp,int flags)2360 ufs_gop_markupdate(struct vnode *vp, int flags)
2361 {
2362 	u_int32_t mask = 0;
2363 
2364 	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
2365 		mask = IN_ACCESS;
2366 	}
2367 	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
2368 		if (vp->v_type == VREG) {
2369 			mask |= IN_CHANGE | IN_UPDATE;
2370 		} else {
2371 			mask |= IN_MODIFY;
2372 		}
2373 	}
2374 	if (mask) {
2375 		struct inode *ip = VTOI(vp);
2376 
2377 		ip->i_flag |= mask;
2378 	}
2379 }
2380 
2381 int
ufs_bufio(enum uio_rw rw,struct vnode * vp,void * buf,size_t len,off_t off,int ioflg,kauth_cred_t cred,size_t * aresid,struct lwp * l)2382 ufs_bufio(enum uio_rw rw, struct vnode *vp, void *buf, size_t len, off_t off,
2383     int ioflg, kauth_cred_t cred, size_t *aresid, struct lwp *l)
2384 {
2385 	struct iovec iov;
2386 	struct uio uio;
2387 	int error;
2388 
2389 	KASSERT(ISSET(ioflg, IO_NODELOCKED));
2390 	KASSERT(VOP_ISLOCKED(vp));
2391 	KASSERT(rw != UIO_WRITE || VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2392 	KASSERT(rw != UIO_WRITE || vp->v_mount->mnt_wapbl == NULL ||
2393 	    ISSET(ioflg, IO_JOURNALLOCKED));
2394 
2395 	iov.iov_base = buf;
2396 	iov.iov_len = len;
2397 	uio.uio_iov = &iov;
2398 	uio.uio_iovcnt = 1;
2399 	uio.uio_resid = len;
2400 	uio.uio_offset = off;
2401 	uio.uio_rw = rw;
2402 	UIO_SETUP_SYSSPACE(&uio);
2403 
2404 	switch (rw) {
2405 	case UIO_READ:
2406 		error = UFS_BUFRD(vp, &uio, ioflg, cred);
2407 		break;
2408 	case UIO_WRITE:
2409 		error = UFS_BUFWR(vp, &uio, ioflg, cred);
2410 		break;
2411 	default:
2412 		panic("invalid uio rw: %d", (int)rw);
2413 	}
2414 
2415 	if (aresid)
2416 		*aresid = uio.uio_resid;
2417 	else if (uio.uio_resid && error == 0)
2418 		error = EIO;
2419 
2420 	KASSERT(VOP_ISLOCKED(vp));
2421 	KASSERT(rw != UIO_WRITE || VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
2422 	return error;
2423 }
2424