xref: /netbsd-src/sys/ufs/ufs/ufs_vnops.c (revision e39ef1d61eee3ccba837ee281f1e098c864487aa)
1 /*	$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1989, 1993, 1995
34  *	The Regents of the University of California.  All rights reserved.
35  * (c) UNIX System Laboratories, Inc.
36  * All or some portions of this file are derived from material licensed
37  * to the University of California by American Telephone and Telegraph
38  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39  * the permission of UNIX System Laboratories, Inc.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)ufs_vnops.c	8.28 (Berkeley) 7/31/95
66  */
67 
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $");
70 
71 #if defined(_KERNEL_OPT)
72 #include "opt_ffs.h"
73 #include "opt_quota.h"
74 #endif
75 
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/namei.h>
79 #include <sys/resourcevar.h>
80 #include <sys/kernel.h>
81 #include <sys/file.h>
82 #include <sys/stat.h>
83 #include <sys/buf.h>
84 #include <sys/proc.h>
85 #include <sys/mount.h>
86 #include <sys/vnode.h>
87 #include <sys/malloc.h>
88 #include <sys/dirent.h>
89 #include <sys/lockf.h>
90 #include <sys/kauth.h>
91 #include <sys/wapbl.h>
92 #include <sys/fstrans.h>
93 
94 #include <miscfs/specfs/specdev.h>
95 #include <miscfs/fifofs/fifo.h>
96 #include <miscfs/genfs/genfs.h>
97 
98 #include <ufs/ufs/inode.h>
99 #include <ufs/ufs/dir.h>
100 #include <ufs/ufs/ufsmount.h>
101 #include <ufs/ufs/ufs_bswap.h>
102 #include <ufs/ufs/ufs_extern.h>
103 #include <ufs/ufs/ufs_wapbl.h>
104 #ifdef UFS_DIRHASH
105 #include <ufs/ufs/dirhash.h>
106 #endif
107 #include <ufs/ext2fs/ext2fs_extern.h>
108 #include <ufs/ext2fs/ext2fs_dir.h>
109 #include <ufs/ffs/ffs_extern.h>
110 #include <ufs/lfs/lfs_extern.h>
111 #include <ufs/lfs/lfs.h>
112 
113 #include <uvm/uvm.h>
114 
115 __CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN);
116 __CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN);
117 
118 static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *);
119 static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t,
120     struct lwp *);
121 
122 /*
123  * A virgin directory (no blushing please).
124  */
125 static const struct dirtemplate mastertemplate = {
126 	0,	12,		DT_DIR,	1,	".",
127 	0,	DIRBLKSIZ - 12,	DT_DIR,	2,	".."
128 };
129 
130 /*
131  * Create a regular file
132  */
133 int
134 ufs_create(void *v)
135 {
136 	struct vop_create_args /* {
137 		struct vnode		*a_dvp;
138 		struct vnode		**a_vpp;
139 		struct componentname	*a_cnp;
140 		struct vattr		*a_vap;
141 	} */ *ap = v;
142 	int	error;
143 	struct vnode *dvp = ap->a_dvp;
144 	struct ufs_lookup_results *ulr;
145 
146 	/* XXX should handle this material another way */
147 	ulr = &VTOI(dvp)->i_crap;
148 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
149 
150 	/*
151 	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
152 	 * ufs_makeinode
153 	 */
154 	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
155 	error =
156 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
157 			  dvp, ulr, ap->a_vpp, ap->a_cnp);
158 	if (error) {
159 		fstrans_done(dvp->v_mount);
160 		return (error);
161 	}
162 	UFS_WAPBL_END1(dvp->v_mount, dvp);
163 	fstrans_done(dvp->v_mount);
164 	VN_KNOTE(dvp, NOTE_WRITE);
165 	return (0);
166 }
167 
168 /*
169  * Mknod vnode call
170  */
171 /* ARGSUSED */
172 int
173 ufs_mknod(void *v)
174 {
175 	struct vop_mknod_args /* {
176 		struct vnode		*a_dvp;
177 		struct vnode		**a_vpp;
178 		struct componentname	*a_cnp;
179 		struct vattr		*a_vap;
180 	} */ *ap = v;
181 	struct vattr	*vap;
182 	struct vnode	**vpp;
183 	struct inode	*ip;
184 	int		error;
185 	struct mount	*mp;
186 	ino_t		ino;
187 	struct ufs_lookup_results *ulr;
188 
189 	vap = ap->a_vap;
190 	vpp = ap->a_vpp;
191 
192 	/* XXX should handle this material another way */
193 	ulr = &VTOI(ap->a_dvp)->i_crap;
194 	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
195 
196 	/*
197 	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
198 	 * ufs_makeinode
199 	 */
200 	fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
201 	if ((error =
202 	    ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
203 	    ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0)
204 		goto out;
205 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
206 	ip = VTOI(*vpp);
207 	mp  = (*vpp)->v_mount;
208 	ino = ip->i_number;
209 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
210 	if (vap->va_rdev != VNOVAL) {
211 		struct ufsmount *ump = ip->i_ump;
212 		/*
213 		 * Want to be able to use this to make badblock
214 		 * inodes, so don't truncate the dev number.
215 		 */
216 		if (ump->um_fstype == UFS1)
217 			ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev,
218 			    UFS_MPNEEDSWAP(ump));
219 		else
220 			ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
221 			    UFS_MPNEEDSWAP(ump));
222 	}
223 	UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
224 	UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
225 	/*
226 	 * Remove inode so that it will be reloaded by VFS_VGET and
227 	 * checked to see if it is an alias of an existing entry in
228 	 * the inode cache.
229 	 */
230 	(*vpp)->v_type = VNON;
231 	VOP_UNLOCK(*vpp);
232 	vgone(*vpp);
233 	error = VFS_VGET(mp, ino, vpp);
234 out:
235 	fstrans_done(ap->a_dvp->v_mount);
236 	if (error != 0) {
237 		*vpp = NULL;
238 		return (error);
239 	}
240 	return (0);
241 }
242 
243 /*
244  * Open called.
245  *
246  * Nothing to do.
247  */
248 /* ARGSUSED */
249 int
250 ufs_open(void *v)
251 {
252 	struct vop_open_args /* {
253 		struct vnode	*a_vp;
254 		int		a_mode;
255 		kauth_cred_t	a_cred;
256 	} */ *ap = v;
257 
258 	/*
259 	 * Files marked append-only must be opened for appending.
260 	 */
261 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
262 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
263 		return (EPERM);
264 	return (0);
265 }
266 
267 /*
268  * Close called.
269  *
270  * Update the times on the inode.
271  */
272 /* ARGSUSED */
273 int
274 ufs_close(void *v)
275 {
276 	struct vop_close_args /* {
277 		struct vnode	*a_vp;
278 		int		a_fflag;
279 		kauth_cred_t	a_cred;
280 	} */ *ap = v;
281 	struct vnode	*vp;
282 	struct inode	*ip;
283 
284 	vp = ap->a_vp;
285 	ip = VTOI(vp);
286 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
287 	if (vp->v_usecount > 1)
288 		UFS_ITIMES(vp, NULL, NULL, NULL);
289 	fstrans_done(vp->v_mount);
290 	return (0);
291 }
292 
293 static int
294 ufs_check_possible(struct vnode *vp, struct inode *ip, mode_t mode,
295     kauth_cred_t cred)
296 {
297 #if defined(QUOTA) || defined(QUOTA2)
298 	int error;
299 #endif
300 
301 	/*
302 	 * Disallow write attempts on read-only file systems;
303 	 * unless the file is a socket, fifo, or a block or
304 	 * character device resident on the file system.
305 	 */
306 	if (mode & VWRITE) {
307 		switch (vp->v_type) {
308 		case VDIR:
309 		case VLNK:
310 		case VREG:
311 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
312 				return (EROFS);
313 #if defined(QUOTA) || defined(QUOTA2)
314 			fstrans_start(vp->v_mount, FSTRANS_SHARED);
315 			error = chkdq(ip, 0, cred, 0);
316 			fstrans_done(vp->v_mount);
317 			if (error != 0)
318 				return error;
319 #endif
320 			break;
321 		case VBAD:
322 		case VBLK:
323 		case VCHR:
324 		case VSOCK:
325 		case VFIFO:
326 		case VNON:
327 		default:
328 			break;
329 		}
330 	}
331 
332 	/* If it is a snapshot, nobody gets access to it. */
333 	if ((ip->i_flags & SF_SNAPSHOT))
334 		return (EPERM);
335 	/* If immutable bit set, nobody gets to write it. */
336 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
337 		return (EPERM);
338 
339 	return 0;
340 }
341 
342 static int
343 ufs_check_permitted(struct vnode *vp, struct inode *ip, mode_t mode,
344     kauth_cred_t cred)
345 {
346 
347 	return genfs_can_access(vp->v_type, ip->i_mode & ALLPERMS, ip->i_uid,
348 	    ip->i_gid, mode, cred);
349 }
350 
351 int
352 ufs_access(void *v)
353 {
354 	struct vop_access_args /* {
355 		struct vnode	*a_vp;
356 		int		a_mode;
357 		kauth_cred_t	a_cred;
358 	} */ *ap = v;
359 	struct vnode	*vp;
360 	struct inode	*ip;
361 	mode_t		mode;
362 	int		error;
363 
364 	vp = ap->a_vp;
365 	ip = VTOI(vp);
366 	mode = ap->a_mode;
367 
368 	error = ufs_check_possible(vp, ip, mode, ap->a_cred);
369 	if (error)
370 		return error;
371 
372 	error = ufs_check_permitted(vp, ip, mode, ap->a_cred);
373 
374 	return error;
375 }
376 
377 /* ARGSUSED */
378 int
379 ufs_getattr(void *v)
380 {
381 	struct vop_getattr_args /* {
382 		struct vnode	*a_vp;
383 		struct vattr	*a_vap;
384 		kauth_cred_t	a_cred;
385 	} */ *ap = v;
386 	struct vnode	*vp;
387 	struct inode	*ip;
388 	struct vattr	*vap;
389 
390 	vp = ap->a_vp;
391 	ip = VTOI(vp);
392 	vap = ap->a_vap;
393 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
394 	UFS_ITIMES(vp, NULL, NULL, NULL);
395 
396 	/*
397 	 * Copy from inode table
398 	 */
399 	vap->va_fsid = ip->i_dev;
400 	vap->va_fileid = ip->i_number;
401 	vap->va_mode = ip->i_mode & ALLPERMS;
402 	vap->va_nlink = ip->i_nlink;
403 	vap->va_uid = ip->i_uid;
404 	vap->va_gid = ip->i_gid;
405 	vap->va_size = vp->v_size;
406 	if (ip->i_ump->um_fstype == UFS1) {
407 		vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
408 		    UFS_MPNEEDSWAP(ip->i_ump));
409 		vap->va_atime.tv_sec = ip->i_ffs1_atime;
410 		vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
411 		vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
412 		vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
413 		vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
414 		vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
415 		vap->va_birthtime.tv_sec = 0;
416 		vap->va_birthtime.tv_nsec = 0;
417 		vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks);
418 	} else {
419 		vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
420 		    UFS_MPNEEDSWAP(ip->i_ump));
421 		vap->va_atime.tv_sec = ip->i_ffs2_atime;
422 		vap->va_atime.tv_nsec = ip->i_ffs2_atimensec;
423 		vap->va_mtime.tv_sec = ip->i_ffs2_mtime;
424 		vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec;
425 		vap->va_ctime.tv_sec = ip->i_ffs2_ctime;
426 		vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec;
427 		vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime;
428 		vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec;
429 		vap->va_bytes = dbtob(ip->i_ffs2_blocks);
430 	}
431 	vap->va_gen = ip->i_gen;
432 	vap->va_flags = ip->i_flags;
433 
434 	/* this doesn't belong here */
435 	if (vp->v_type == VBLK)
436 		vap->va_blocksize = BLKDEV_IOSIZE;
437 	else if (vp->v_type == VCHR)
438 		vap->va_blocksize = MAXBSIZE;
439 	else
440 		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
441 	vap->va_type = vp->v_type;
442 	vap->va_filerev = ip->i_modrev;
443 	fstrans_done(vp->v_mount);
444 	return (0);
445 }
446 
447 /*
448  * Set attribute vnode op. called from several syscalls
449  */
450 int
451 ufs_setattr(void *v)
452 {
453 	struct vop_setattr_args /* {
454 		struct vnode	*a_vp;
455 		struct vattr	*a_vap;
456 		kauth_cred_t	a_cred;
457 	} */ *ap = v;
458 	struct vattr	*vap;
459 	struct vnode	*vp;
460 	struct inode	*ip;
461 	kauth_cred_t	cred;
462 	struct lwp	*l;
463 	int		error;
464 
465 	vap = ap->a_vap;
466 	vp = ap->a_vp;
467 	ip = VTOI(vp);
468 	cred = ap->a_cred;
469 	l = curlwp;
470 
471 	/*
472 	 * Check for unsettable attributes.
473 	 */
474 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
475 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
476 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
477 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
478 		return (EINVAL);
479 	}
480 
481 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
482 
483 	if (vap->va_flags != VNOVAL) {
484 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
485 			error = EROFS;
486 			goto out;
487 		}
488 		if (kauth_cred_geteuid(cred) != ip->i_uid &&
489 		    (error = kauth_authorize_generic(cred,
490 		    KAUTH_GENERIC_ISSUSER, NULL)))
491 			goto out;
492 		if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
493 		    NULL) == 0) {
494 			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
495 			    kauth_authorize_system(l->l_cred,
496 			     KAUTH_SYSTEM_CHSYSFLAGS, 0, NULL, NULL, NULL)) {
497 				error = EPERM;
498 				goto out;
499 			}
500 			/* Snapshot flag cannot be set or cleared */
501 			if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
502 			    (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) {
503 				error = EPERM;
504 				goto out;
505 			}
506 			error = UFS_WAPBL_BEGIN(vp->v_mount);
507 			if (error)
508 				goto out;
509 			ip->i_flags = vap->va_flags;
510 			DIP_ASSIGN(ip, flags, ip->i_flags);
511 		} else {
512 			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) ||
513 			    (vap->va_flags & UF_SETTABLE) != vap->va_flags) {
514 				error = EPERM;
515 				goto out;
516 			}
517 			if ((ip->i_flags & SF_SETTABLE) !=
518 			    (vap->va_flags & SF_SETTABLE)) {
519 				error = EPERM;
520 				goto out;
521 			}
522 			error = UFS_WAPBL_BEGIN(vp->v_mount);
523 			if (error)
524 				goto out;
525 			ip->i_flags &= SF_SETTABLE;
526 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
527 			DIP_ASSIGN(ip, flags, ip->i_flags);
528 		}
529 		ip->i_flag |= IN_CHANGE;
530 		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
531 		UFS_WAPBL_END(vp->v_mount);
532 		if (vap->va_flags & (IMMUTABLE | APPEND)) {
533 			error = 0;
534 			goto out;
535 		}
536 	}
537 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
538 		error = EPERM;
539 		goto out;
540 	}
541 	/*
542 	 * Go through the fields and update iff not VNOVAL.
543 	 */
544 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
545 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
546 			error = EROFS;
547 			goto out;
548 		}
549 		error = UFS_WAPBL_BEGIN(vp->v_mount);
550 		if (error)
551 			goto out;
552 		error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
553 		UFS_WAPBL_END(vp->v_mount);
554 		if (error)
555 			goto out;
556 	}
557 	if (vap->va_size != VNOVAL) {
558 		/*
559 		 * Disallow write attempts on read-only file systems;
560 		 * unless the file is a socket, fifo, or a block or
561 		 * character device resident on the file system.
562 		 */
563 		switch (vp->v_type) {
564 		case VDIR:
565 			error = EISDIR;
566 			goto out;
567 		case VCHR:
568 		case VBLK:
569 		case VFIFO:
570 			break;
571 		case VREG:
572 			if (vp->v_mount->mnt_flag & MNT_RDONLY) {
573 				error = EROFS;
574 				goto out;
575 			}
576 			if ((ip->i_flags & SF_SNAPSHOT) != 0) {
577 				error = EPERM;
578 				goto out;
579 			}
580 			error = UFS_WAPBL_BEGIN(vp->v_mount);
581 			if (error)
582 				goto out;
583 			/*
584 			 * When journaling, only truncate one indirect block
585 			 * at a time.
586 			 */
587 			if (vp->v_mount->mnt_wapbl) {
588 				uint64_t incr = MNINDIR(ip->i_ump) <<
589 				    vp->v_mount->mnt_fs_bshift; /* Power of 2 */
590 				uint64_t base = NDADDR <<
591 				    vp->v_mount->mnt_fs_bshift;
592 				while (!error && ip->i_size > base + incr &&
593 				    ip->i_size > vap->va_size + incr) {
594 					/*
595 					 * round down to next full indirect
596 					 * block boundary.
597 					 */
598 					uint64_t nsize = base +
599 					    ((ip->i_size - base - 1) &
600 					    ~(incr - 1));
601 					error = UFS_TRUNCATE(vp, nsize, 0,
602 					    cred);
603 					if (error == 0) {
604 						UFS_WAPBL_END(vp->v_mount);
605 						error =
606 						   UFS_WAPBL_BEGIN(vp->v_mount);
607 					}
608 				}
609 			}
610 			if (!error)
611 				error = UFS_TRUNCATE(vp, vap->va_size, 0, cred);
612 			UFS_WAPBL_END(vp->v_mount);
613 			if (error)
614 				goto out;
615 			break;
616 		default:
617 			error = EOPNOTSUPP;
618 			goto out;
619 		}
620 	}
621 	ip = VTOI(vp);
622 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
623 	    vap->va_birthtime.tv_sec != VNOVAL) {
624 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
625 			error = EROFS;
626 			goto out;
627 		}
628 		if ((ip->i_flags & SF_SNAPSHOT) != 0) {
629 			error = EPERM;
630 			goto out;
631 		}
632 		error = genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid, cred);
633 		if (error)
634 			goto out;
635 		error = UFS_WAPBL_BEGIN(vp->v_mount);
636 		if (error)
637 			goto out;
638 		if (vap->va_atime.tv_sec != VNOVAL)
639 			if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
640 				ip->i_flag |= IN_ACCESS;
641 		if (vap->va_mtime.tv_sec != VNOVAL) {
642 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
643 			if (vp->v_mount->mnt_flag & MNT_RELATIME)
644 				ip->i_flag |= IN_ACCESS;
645 		}
646 		if (vap->va_birthtime.tv_sec != VNOVAL &&
647 		    ip->i_ump->um_fstype == UFS2) {
648 			ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec;
649 			ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
650 		}
651 		error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
652 		UFS_WAPBL_END(vp->v_mount);
653 		if (error)
654 			goto out;
655 	}
656 	error = 0;
657 	if (vap->va_mode != (mode_t)VNOVAL) {
658 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
659 			error = EROFS;
660 			goto out;
661 		}
662 		if ((ip->i_flags & SF_SNAPSHOT) != 0 &&
663 		    (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP |
664 		     S_IXOTH | S_IWOTH))) {
665 			error = EPERM;
666 			goto out;
667 		}
668 		error = UFS_WAPBL_BEGIN(vp->v_mount);
669 		if (error)
670 			goto out;
671 		error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
672 		UFS_WAPBL_END(vp->v_mount);
673 	}
674 	VN_KNOTE(vp, NOTE_ATTRIB);
675 out:
676 	fstrans_done(vp->v_mount);
677 	return (error);
678 }
679 
680 /*
681  * Change the mode on a file.
682  * Inode must be locked before calling.
683  */
684 static int
685 ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
686 {
687 	struct inode	*ip;
688 	int		error;
689 
690 	UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);
691 
692 	ip = VTOI(vp);
693 
694 	error = genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode);
695 	if (error)
696 		return (error);
697 
698 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
699 	ip->i_mode &= ~ALLPERMS;
700 	ip->i_mode |= (mode & ALLPERMS);
701 	ip->i_flag |= IN_CHANGE;
702 	DIP_ASSIGN(ip, mode, ip->i_mode);
703 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
704 	fstrans_done(vp->v_mount);
705 	return (0);
706 }
707 
708 /*
709  * Perform chown operation on inode ip;
710  * inode must be locked prior to call.
711  */
712 static int
713 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
714     	struct lwp *l)
715 {
716 	struct inode	*ip;
717 	int		error = 0;
718 #if defined(QUOTA) || defined(QUOTA2)
719 	uid_t		ouid;
720 	gid_t		ogid;
721 	int64_t		change;
722 #endif
723 	ip = VTOI(vp);
724 	error = 0;
725 
726 	if (uid == (uid_t)VNOVAL)
727 		uid = ip->i_uid;
728 	if (gid == (gid_t)VNOVAL)
729 		gid = ip->i_gid;
730 
731 	error = genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid);
732 	if (error)
733 		return (error);
734 
735 	fstrans_start(vp->v_mount, FSTRANS_SHARED);
736 #if defined(QUOTA) || defined(QUOTA2)
737 	ogid = ip->i_gid;
738 	ouid = ip->i_uid;
739 	change = DIP(ip, blocks);
740 	(void) chkdq(ip, -change, cred, 0);
741 	(void) chkiq(ip, -1, cred, 0);
742 #endif
743 	ip->i_gid = gid;
744 	DIP_ASSIGN(ip, gid, gid);
745 	ip->i_uid = uid;
746 	DIP_ASSIGN(ip, uid, uid);
747 #if defined(QUOTA) || defined(QUOTA2)
748 	if ((error = chkdq(ip, change, cred, 0)) == 0) {
749 		if ((error = chkiq(ip, 1, cred, 0)) == 0)
750 			goto good;
751 		else
752 			(void) chkdq(ip, -change, cred, FORCE);
753 	}
754 	ip->i_gid = ogid;
755 	DIP_ASSIGN(ip, gid, ogid);
756 	ip->i_uid = ouid;
757 	DIP_ASSIGN(ip, uid, ouid);
758 	(void) chkdq(ip, change, cred, FORCE);
759 	(void) chkiq(ip, 1, cred, FORCE);
760 	fstrans_done(vp->v_mount);
761 	return (error);
762  good:
763 #endif /* QUOTA || QUOTA2 */
764 	ip->i_flag |= IN_CHANGE;
765 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
766 	fstrans_done(vp->v_mount);
767 	return (0);
768 }
769 
770 int
771 ufs_remove(void *v)
772 {
773 	struct vop_remove_args /* {
774 		struct vnode		*a_dvp;
775 		struct vnode		*a_vp;
776 		struct componentname	*a_cnp;
777 	} */ *ap = v;
778 	struct vnode	*vp, *dvp;
779 	struct inode	*ip;
780 	int		error;
781 	struct ufs_lookup_results *ulr;
782 
783 	vp = ap->a_vp;
784 	dvp = ap->a_dvp;
785 	ip = VTOI(vp);
786 
787 	/* XXX should handle this material another way */
788 	ulr = &VTOI(dvp)->i_crap;
789 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
790 
791 	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
792 	if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
793 	    (VTOI(dvp)->i_flags & APPEND))
794 		error = EPERM;
795 	else {
796 		error = UFS_WAPBL_BEGIN(dvp->v_mount);
797 		if (error == 0) {
798 			error = ufs_dirremove(dvp, ulr,
799 					      ip, ap->a_cnp->cn_flags, 0);
800 			UFS_WAPBL_END(dvp->v_mount);
801 		}
802 	}
803 	VN_KNOTE(vp, NOTE_DELETE);
804 	VN_KNOTE(dvp, NOTE_WRITE);
805 	if (dvp == vp)
806 		vrele(vp);
807 	else
808 		vput(vp);
809 	vput(dvp);
810 	fstrans_done(dvp->v_mount);
811 	return (error);
812 }
813 
814 /*
815  * ufs_link: create hard link.
816  */
817 int
818 ufs_link(void *v)
819 {
820 	struct vop_link_args /* {
821 		struct vnode *a_dvp;
822 		struct vnode *a_vp;
823 		struct componentname *a_cnp;
824 	} */ *ap = v;
825 	struct vnode *dvp = ap->a_dvp;
826 	struct vnode *vp = ap->a_vp;
827 	struct componentname *cnp = ap->a_cnp;
828 	struct inode *ip;
829 	struct direct *newdir;
830 	int error;
831 	struct ufs_lookup_results *ulr;
832 
833 	KASSERT(dvp != vp);
834 	KASSERT(vp->v_type != VDIR);
835 	KASSERT(dvp->v_mount == vp->v_mount);
836 
837 	/* XXX should handle this material another way */
838 	ulr = &VTOI(dvp)->i_crap;
839 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
840 
841 	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
842 	error = vn_lock(vp, LK_EXCLUSIVE);
843 	if (error) {
844 		VOP_ABORTOP(dvp, cnp);
845 		goto out2;
846 	}
847 	ip = VTOI(vp);
848 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
849 		VOP_ABORTOP(dvp, cnp);
850 		error = EMLINK;
851 		goto out1;
852 	}
853 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
854 		VOP_ABORTOP(dvp, cnp);
855 		error = EPERM;
856 		goto out1;
857 	}
858 	error = UFS_WAPBL_BEGIN(vp->v_mount);
859 	if (error) {
860 		VOP_ABORTOP(dvp, cnp);
861 		goto out1;
862 	}
863 	ip->i_nlink++;
864 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
865 	ip->i_flag |= IN_CHANGE;
866 	error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
867 	if (!error) {
868 		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
869 		ufs_makedirentry(ip, cnp, newdir);
870 		error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL);
871 		pool_cache_put(ufs_direct_cache, newdir);
872 	}
873 	if (error) {
874 		ip->i_nlink--;
875 		DIP_ASSIGN(ip, nlink, ip->i_nlink);
876 		ip->i_flag |= IN_CHANGE;
877 		UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
878 	}
879 	UFS_WAPBL_END(vp->v_mount);
880  out1:
881 	VOP_UNLOCK(vp);
882  out2:
883 	VN_KNOTE(vp, NOTE_LINK);
884 	VN_KNOTE(dvp, NOTE_WRITE);
885 	vput(dvp);
886 	fstrans_done(dvp->v_mount);
887 	return (error);
888 }
889 
890 /*
891  * whiteout vnode call
892  */
893 int
894 ufs_whiteout(void *v)
895 {
896 	struct vop_whiteout_args /* {
897 		struct vnode		*a_dvp;
898 		struct componentname	*a_cnp;
899 		int			a_flags;
900 	} */ *ap = v;
901 	struct vnode		*dvp = ap->a_dvp;
902 	struct componentname	*cnp = ap->a_cnp;
903 	struct direct		*newdir;
904 	int			error;
905 	struct ufsmount		*ump = VFSTOUFS(dvp->v_mount);
906 	struct ufs_lookup_results *ulr;
907 
908 	/* XXX should handle this material another way */
909 	ulr = &VTOI(dvp)->i_crap;
910 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
911 
912 	error = 0;
913 	switch (ap->a_flags) {
914 	case LOOKUP:
915 		/* 4.4 format directories support whiteout operations */
916 		if (ump->um_maxsymlinklen > 0)
917 			return (0);
918 		return (EOPNOTSUPP);
919 
920 	case CREATE:
921 		/* create a new directory whiteout */
922 		fstrans_start(dvp->v_mount, FSTRANS_SHARED);
923 		error = UFS_WAPBL_BEGIN(dvp->v_mount);
924 		if (error)
925 			break;
926 #ifdef DIAGNOSTIC
927 		if (ump->um_maxsymlinklen <= 0)
928 			panic("ufs_whiteout: old format filesystem");
929 #endif
930 
931 		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
932 		newdir->d_ino = WINO;
933 		newdir->d_namlen = cnp->cn_namelen;
934 		memcpy(newdir->d_name, cnp->cn_nameptr,
935 		    (size_t)cnp->cn_namelen);
936 		newdir->d_name[cnp->cn_namelen] = '\0';
937 		newdir->d_type = DT_WHT;
938 		error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL);
939 		pool_cache_put(ufs_direct_cache, newdir);
940 		break;
941 
942 	case DELETE:
943 		/* remove an existing directory whiteout */
944 		fstrans_start(dvp->v_mount, FSTRANS_SHARED);
945 		error = UFS_WAPBL_BEGIN(dvp->v_mount);
946 		if (error)
947 			break;
948 #ifdef DIAGNOSTIC
949 		if (ump->um_maxsymlinklen <= 0)
950 			panic("ufs_whiteout: old format filesystem");
951 #endif
952 
953 		cnp->cn_flags &= ~DOWHITEOUT;
954 		error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0);
955 		break;
956 	default:
957 		panic("ufs_whiteout: unknown op");
958 		/* NOTREACHED */
959 	}
960 	UFS_WAPBL_END(dvp->v_mount);
961 	fstrans_done(dvp->v_mount);
962 	return (error);
963 }
964 
965 
966 /*
967  * Rename vnode operation
968  * 	rename("foo", "bar");
969  * is essentially
970  *	unlink("bar");
971  *	link("foo", "bar");
972  *	unlink("foo");
973  * but ``atomically''.  Can't do full commit without saving state in the
974  * inode on disk which isn't feasible at this time.  Best we can do is
975  * always guarantee the target exists.
976  *
977  * Basic algorithm is:
978  *
979  * 1) Bump link count on source while we're linking it to the
980  *    target.  This also ensure the inode won't be deleted out
981  *    from underneath us while we work (it may be truncated by
982  *    a concurrent `trunc' or `open' for creation).
983  * 2) Link source to destination.  If destination already exists,
984  *    delete it first.
985  * 3) Unlink source reference to inode if still around. If a
986  *    directory was moved and the parent of the destination
987  *    is different from the source, patch the ".." entry in the
988  *    directory.
989  */
990 
991 /*
992  * Notes on rename locking:
993  *
994  * We lock parent vnodes before child vnodes. This means in particular
995  * that if A is above B in the directory tree then A must be locked
996  * before B. (This is true regardless of how many steps appear in
997  * between, because an arbitrary number of other processes could lock
998  * parent/child in between and establish a lock cycle and deadlock.)
999  *
1000  * Therefore, if tdvp is above fdvp we must lock tdvp first; if fdvp
1001  * is above tdvp we must lock fdvp first; and if they're
1002  * incommensurate it doesn't matter. (But, we rely on the fact that
1003  * there's a whole-volume rename lock to prevent deadlock among groups
1004  * of renames upon overlapping sets of incommensurate vnodes.)
1005  *
1006  * In addition to establishing lock ordering the parent check also
1007  * serves to rule out cases where someone tries to move a directory
1008  * underneath itself, e.g. rename("a/b", "a/b/c"). If allowed to
1009  * proceed such renames would detach portions of the directory tree
1010  * and make fsck very unhappy.
1011  *
1012  * Note that it is an error for *fvp* to be above tdvp; however,
1013  * *fdvp* can be above tdvp, as in rename("a/b", "a/c/d").
1014  *
1015  * The parent check searches up the tree from tdvp until it either
1016  * finds fdvp or the root of the volume. It also returns the vnode it
1017  * saw immediately before fdvp, if any. Later on (after looking up
1018  * fvp) we will check to see if this *is* fvp and if so fail.
1019  *
1020  * If the parent check finds fdvp, it means fdvp is above tdvp, so we
1021  * lock fdvp first and then tdvp. Otherwise, either tdvp is above fdvp
1022  * or they're incommensurate and we lock tdvp first.
1023  *
1024  * In either case each of the child vnodes has to be looked up and
1025  * locked immediately after its parent. The cases
1026  *
1027  *       fdvp/fvp/[.../]tdvp/tvp
1028  *       tdvp/tvp/[.../]fdvp/fvp
1029  *
1030  * can cause deadlock otherwise. Note that both of these are error
1031  * cases; the first fails the parent check and the second fails
1032  * because tvp isn't empty. The parent check case is handled before
1033  * we start locking; however, the nonempty case requires locking tvp
1034  * to find out safely that it's nonempty.
1035  *
1036  * Therefore the procedure is either
1037  *
1038  *   lock fdvp
1039  *   lookup fvp
1040  *   lock fvp
1041  *   lock tdvp
1042  *   lookup tvp
1043  *   lock tvp
1044  *
1045  * or
1046  *
1047  *   lock tdvp
1048  *   lookup tvp
1049  *   lock tvp
1050  *   lock fdvp
1051  *   lookup fvp
1052  *   lock fvp
1053  *
1054  * This could in principle be simplified by always looking up fvp
1055  * last; because of the parent check we know by the time we start
1056  * locking that fvp cannot be directly above tdvp, so (given the
1057  * whole-volume rename lock and other assumptions) it's safe to lock
1058  * tdvp before fvp. This would allow the following scheme:
1059  *
1060  *   lock fdvp
1061  *   lock tdvp
1062  * or
1063  *   lock tdvp
1064  *   lock fdvp
1065  *
1066  * then
1067  *   lookup tvp
1068  *   lock tvp
1069  *   lookup fvp
1070  *   check if fvp is above of tdvp, fail if so
1071  *   lock fvp
1072  *
1073  * which is much, much simpler.
1074  *
1075  * However, current levels of vfs namei/lookup sanity do not permit
1076  * this. It is impossible currently to look up fvp without locking it.
1077  * (It gets locked regardless of whether LOCKLEAF is set; without
1078  * LOCKLEAF it just gets unlocked again, which doesn't help.)
1079  *
1080  * Therefore, because we must look up fvp to know if it's above tdvp,
1081  * which locks fvp, we must, at least in the case where fdvp is above
1082  * tdvp, do that before locking tdvp. The longer scheme does that; the
1083  * simpler scheme is not safe.
1084  *
1085  * Note that for now we aren't doing lookup() but relookup(); however,
1086  * the differences are minor.
1087  *
1088  * On top of all the above, just to make everything more
1089  * exciting, any two of the vnodes might end up being the same.
1090  *
1091  * FROMPARENT == FROMCHILD	mv a/. foo	is an error.
1092  * FROMPARENT == TOPARENT	mv a/b a/c	is ok.
1093  * FROMPARENT == TOCHILD	mv a/b/c a/b	will give ENOTEMPTY.
1094  * FROMCHILD == TOPARENT	mv a/b a/b/c	fails the parent check.
1095  * FROMCHILD == TOCHILD		mv a/b a/b	is ok.
1096  * TOPARENT == TOCHILD		mv foo a/.	is an error.
1097  *
1098  * This introduces more cases in the locking, because each distinct
1099  * vnode must be locked exactly once.
1100  *
1101  * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we assume it
1102  * doesn't matter what order the children are locked in, because the
1103  * per-volume rename lock excludes other renames and no other
1104  * operation locks two files in the same directory at once. (Note: if
1105  * it turns out that link() does, link() is wrong.)
1106  *
1107  * Until such time as we can do lookups without the namei and lookup
1108  * machinery "helpfully" locking the result vnode for us, we can't
1109  * avoid tripping on cases where FROMCHILD == TOCHILD. Currently for
1110  * non-directories we unlock the first one we lock while looking up
1111  * the second, then relock it if necessary. This is more or less
1112  * harmless since not much of interest can happen to the objects in
1113  * that window while we have the containing directory locked; but it's
1114  * not desirable and should be cleaned up when that becomes possible.
1115  * The right way to do it is to check after looking the second one up
1116  * and only lock it if it's different. (Note: for directories we don't
1117  * do this dance because the same directory can't appear more than
1118  * once.)
1119  */
1120 
1121 /* XXX following lifted from ufs_lookup.c */
1122 #define	FSFMT(vp)	(((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0)
1123 
1124 /*
1125  * Check if either entry referred to by FROM_ULR is within the range
1126  * of entries named by TO_ULR.
1127  */
1128 static int
1129 ulr_overlap(const struct ufs_lookup_results *from_ulr,
1130 	    const struct ufs_lookup_results *to_ulr)
1131 {
1132 	doff_t from_start, from_prevstart;
1133 	doff_t to_start, to_end;
1134 
1135 	/*
1136 	 * FROM is a DELETE result; offset points to the entry to
1137 	 * remove and subtracting count gives the previous entry.
1138 	 */
1139 	from_start = from_ulr->ulr_offset - from_ulr->ulr_count;
1140 	from_prevstart = from_ulr->ulr_offset;
1141 
1142 	/*
1143 	 * TO is a RENAME (thus non-DELETE) result; offset points
1144 	 * to the beginning of a region to write in, and adding
1145 	 * count gives the end of the region.
1146 	 */
1147 	to_start = to_ulr->ulr_offset;
1148 	to_end = to_ulr->ulr_offset + to_ulr->ulr_count;
1149 
1150 	if (from_prevstart >= to_start && from_prevstart < to_end) {
1151 		return 1;
1152 	}
1153 	if (from_start >= to_start && from_start < to_end) {
1154 		return 1;
1155 	}
1156 	return 0;
1157 }
1158 
1159 /*
1160  * Wrapper for relookup that also updates the supplemental results.
1161  */
1162 static int
1163 do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr,
1164 	    struct vnode **vp, struct componentname *cnp)
1165 {
1166 	int error;
1167 
1168 	error = relookup(dvp, vp, cnp, 0);
1169 	if (error) {
1170 		return error;
1171 	}
1172 	/* update the supplemental reasults */
1173 	*ulr = VTOI(dvp)->i_crap;
1174 	UFS_CHECK_CRAPCOUNTER(VTOI(dvp));
1175 	return 0;
1176 }
1177 
1178 /*
1179  * Lock and relookup a sequence of two directories and two children.
1180  *
1181  */
1182 static int
1183 lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1,
1184 		    struct vnode **v1_ret, struct componentname *cn1,
1185 		    int v1_missing_ok,
1186 		    int overlap_error,
1187 		    struct vnode *d2, struct ufs_lookup_results *ulr2,
1188 		    struct vnode **v2_ret, struct componentname *cn2,
1189 		    int v2_missing_ok)
1190 {
1191 	struct vnode *v1, *v2;
1192 	int error;
1193 
1194 	KASSERT(d1 != d2);
1195 
1196 	vn_lock(d1, LK_EXCLUSIVE | LK_RETRY);
1197 	if (VTOI(d1)->i_size == 0) {
1198 		/* d1 has been rmdir'd */
1199 		VOP_UNLOCK(d1);
1200 		return ENOENT;
1201 	}
1202 	error = do_relookup(d1, ulr1, &v1, cn1);
1203 	if (v1_missing_ok) {
1204 		if (error == ENOENT) {
1205 			/*
1206 			 * Note: currently if the name doesn't exist,
1207 			 * relookup succeeds (it intercepts the
1208 			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
1209 			 * to NULL. Therefore, we will never get
1210 			 * ENOENT and this branch is not needed.
1211 			 * However, in a saner future the EJUSTRETURN
1212 			 * garbage will go away, so let's DTRT.
1213 			 */
1214 			v1 = NULL;
1215 			error = 0;
1216 		}
1217 	} else {
1218 		if (error == 0 && v1 == NULL) {
1219 			/* This is what relookup sets if v1 disappeared. */
1220 			error = ENOENT;
1221 		}
1222 	}
1223 	if (error) {
1224 		VOP_UNLOCK(d1);
1225 		return error;
1226 	}
1227 	if (v1 && v1 == d2) {
1228 		VOP_UNLOCK(d1);
1229 		VOP_UNLOCK(v1);
1230 		vrele(v1);
1231 		return overlap_error;
1232 	}
1233 
1234 	/*
1235 	 * The right way to do this is to do lookups without locking
1236 	 * the results, and lock the results afterwards; then at the
1237 	 * end we can avoid trying to lock v2 if v2 == v1.
1238 	 *
1239 	 * However, for the reasons described in the fdvp == tdvp case
1240 	 * in rename below, we can't do that safely. So, in the case
1241 	 * where v1 is not a directory, unlock it and lock it again
1242 	 * afterwards. This is safe in locking order because a
1243 	 * non-directory can't be above anything else in the tree. If
1244 	 * v1 *is* a directory, that's not true, but then because d1
1245 	 * != d2, v1 != v2.
1246 	 */
1247 	if (v1 && v1->v_type != VDIR) {
1248 		VOP_UNLOCK(v1);
1249 	}
1250 	vn_lock(d2, LK_EXCLUSIVE | LK_RETRY);
1251 	if (VTOI(d2)->i_size == 0) {
1252 		/* d2 has been rmdir'd */
1253 		VOP_UNLOCK(d2);
1254 		if (v1 && v1->v_type == VDIR) {
1255 			VOP_UNLOCK(v1);
1256 		}
1257 		VOP_UNLOCK(d1);
1258 		if (v1) {
1259 			vrele(v1);
1260 		}
1261 		return ENOENT;
1262 	}
1263 	error = do_relookup(d2, ulr2, &v2, cn2);
1264 	if (v2_missing_ok) {
1265 		if (error == ENOENT) {
1266 			/* as above */
1267 			v2 = NULL;
1268 			error = 0;
1269 		}
1270 	} else {
1271 		if (error == 0 && v2 == NULL) {
1272 			/* This is what relookup sets if v2 disappeared. */
1273 			error = ENOENT;
1274 		}
1275 	}
1276 	if (error) {
1277 		VOP_UNLOCK(d2);
1278 		if (v1 && v1->v_type == VDIR) {
1279 			VOP_UNLOCK(v1);
1280 		}
1281 		VOP_UNLOCK(d1);
1282 		if (v1) {
1283 			vrele(v1);
1284 		}
1285 		return error;
1286 	}
1287 	if (v1 && v1->v_type != VDIR && v1 != v2) {
1288 		vn_lock(v1, LK_EXCLUSIVE | LK_RETRY);
1289 	}
1290 	*v1_ret = v1;
1291 	*v2_ret = v2;
1292 	return 0;
1293 }
1294 
1295 /*
1296  * Rename vnode operation
1297  * 	rename("foo", "bar");
1298  * is essentially
1299  *	unlink("bar");
1300  *	link("foo", "bar");
1301  *	unlink("foo");
1302  * but ``atomically''.  Can't do full commit without saving state in the
1303  * inode on disk which isn't feasible at this time.  Best we can do is
1304  * always guarantee the target exists.
1305  *
1306  * Basic algorithm is:
1307  *
1308  * 1) Bump link count on source while we're linking it to the
1309  *    target.  This also ensure the inode won't be deleted out
1310  *    from underneath us while we work (it may be truncated by
1311  *    a concurrent `trunc' or `open' for creation).
1312  * 2) Link source to destination.  If destination already exists,
1313  *    delete it first.
1314  * 3) Unlink source reference to inode if still around. If a
1315  *    directory was moved and the parent of the destination
1316  *    is different from the source, patch the ".." entry in the
1317  *    directory.
1318  */
1319 int
1320 ufs_rename(void *v)
1321 {
1322 	struct vop_rename_args  /* {
1323 		struct vnode		*a_fdvp;
1324 		struct vnode		*a_fvp;
1325 		struct componentname	*a_fcnp;
1326 		struct vnode		*a_tdvp;
1327 		struct vnode		*a_tvp;
1328 		struct componentname	*a_tcnp;
1329 	} */ *ap = v;
1330 	struct vnode		*tvp, *tdvp, *fvp, *fdvp;
1331 	struct componentname	*tcnp, *fcnp;
1332 	struct inode		*ip, *txp, *fxp, *tdp, *fdp;
1333 	struct mount		*mp;
1334 	struct direct		*newdir;
1335 	int			doingdirectory, error;
1336 	ino_t			oldparent, newparent;
1337 
1338 	struct ufs_lookup_results from_ulr, to_ulr;
1339 
1340 	tvp = ap->a_tvp;
1341 	tdvp = ap->a_tdvp;
1342 	fvp = ap->a_fvp;
1343 	fdvp = ap->a_fdvp;
1344 	tcnp = ap->a_tcnp;
1345 	fcnp = ap->a_fcnp;
1346 	doingdirectory = error = 0;
1347 	oldparent = newparent = 0;
1348 
1349 	/* save the supplemental lookup results as they currently exist */
1350 	from_ulr = VTOI(fdvp)->i_crap;
1351 	to_ulr = VTOI(tdvp)->i_crap;
1352 	UFS_CHECK_CRAPCOUNTER(VTOI(fdvp));
1353 	UFS_CHECK_CRAPCOUNTER(VTOI(tdvp));
1354 
1355 	/*
1356 	 * Owing to VFS oddities we are currently called with tdvp/tvp
1357 	 * locked and not fdvp/fvp. In a sane world we'd be passed
1358 	 * tdvp and fdvp only, unlocked, and two name strings. Pretend
1359 	 * we have a sane world and unlock tdvp and tvp.
1360 	 */
1361 	VOP_UNLOCK(tdvp);
1362 	if (tvp && tvp != tdvp) {
1363 		VOP_UNLOCK(tvp);
1364 	}
1365 
1366 	/* Also pretend we have a sane world and vrele fvp/tvp. */
1367 	vrele(fvp);
1368 	fvp = NULL;
1369 	if (tvp) {
1370 		vrele(tvp);
1371 		tvp = NULL;
1372 	}
1373 
1374 	/*
1375 	 * Check for cross-device rename.
1376 	 */
1377 	if (fdvp->v_mount != tdvp->v_mount) {
1378 		error = EXDEV;
1379 		goto abort;
1380 	}
1381 
1382 	/*
1383 	 * Reject "." and ".."
1384 	 */
1385 	if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) ||
1386 	    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1387 	    (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) {
1388 		error = EINVAL;
1389 		goto abort;
1390 	}
1391 
1392 	/*
1393 	 * Get locks.
1394 	 */
1395 
1396 	/* paranoia */
1397 	fcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
1398 	tcnp->cn_flags |= LOCKPARENT|LOCKLEAF;
1399 
1400 	if (fdvp == tdvp) {
1401 		/* One directory. Lock it and relookup both children. */
1402 		vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
1403 
1404 		if (VTOI(fdvp)->i_size == 0) {
1405 			/* directory has been rmdir'd */
1406 			VOP_UNLOCK(fdvp);
1407 			error = ENOENT;
1408 			goto abort;
1409 		}
1410 
1411 		error = do_relookup(fdvp, &from_ulr, &fvp, fcnp);
1412 		if (error == 0 && fvp == NULL) {
1413 			/* relookup may produce this if fvp disappears */
1414 			error = ENOENT;
1415 		}
1416 		if (error) {
1417 			VOP_UNLOCK(fdvp);
1418 			goto abort;
1419 		}
1420 
1421 		/*
1422 		 * The right way to do this is to look up both children
1423 		 * without locking either, and then lock both unless they
1424 		 * turn out to be the same. However, due to deep-seated
1425 		 * VFS-level issues all lookups lock the child regardless
1426 		 * of whether LOCKLEAF is set (if LOCKLEAF is not set,
1427 		 * the child is locked during lookup and then unlocked)
1428 		 * so it is not safe to look up tvp while fvp is locked.
1429 		 *
1430 		 * Unlocking fvp here temporarily is more or less safe,
1431 		 * because with the directory locked there's not much
1432 		 * that can happen to it. However, ideally it wouldn't
1433 		 * be necessary. XXX.
1434 		 */
1435 		VOP_UNLOCK(fvp);
1436 		/* remember fdvp == tdvp so tdvp is locked */
1437 		error = do_relookup(tdvp, &to_ulr, &tvp, tcnp);
1438 		if (error && error != ENOENT) {
1439 			VOP_UNLOCK(fdvp);
1440 			goto abort;
1441 		}
1442 		if (error == ENOENT) {
1443 			/*
1444 			 * Note: currently if the name doesn't exist,
1445 			 * relookup succeeds (it intercepts the
1446 			 * EJUSTRETURN from VOP_LOOKUP) and sets tvp
1447 			 * to NULL. Therefore, we will never get
1448 			 * ENOENT and this branch is not needed.
1449 			 * However, in a saner future the EJUSTRETURN
1450 			 * garbage will go away, so let's DTRT.
1451 			 */
1452 			tvp = NULL;
1453 		}
1454 
1455 		/* tvp is locked; lock fvp if necessary */
1456 		if (!tvp || tvp != fvp) {
1457 			vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
1458 		}
1459 	} else {
1460 		int found_fdvp;
1461 		struct vnode *illegal_fvp;
1462 
1463 		/*
1464 		 * The source must not be above the destination. (If
1465 		 * it were, the rename would detach a section of the
1466 		 * tree.)
1467 		 *
1468 		 * Look up the tree from tdvp to see if we find fdvp,
1469 		 * and if so, return the immediate child of fdvp we're
1470 		 * under; that must not turn out to be the same as
1471 		 * fvp.
1472 		 *
1473 		 * The per-volume rename lock guarantees that the
1474 		 * result of this check remains true until we finish
1475 		 * looking up and locking.
1476 		 */
1477 		error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred,
1478 					&found_fdvp, &illegal_fvp);
1479 		if (error) {
1480 			goto abort;
1481 		}
1482 
1483 		/* Must lock in tree order. */
1484 
1485 		if (found_fdvp) {
1486 			/* fdvp -> fvp -> tdvp -> tvp */
1487 			error = lock_vnode_sequence(fdvp, &from_ulr,
1488 						    &fvp, fcnp, 0,
1489 						    EINVAL,
1490 						    tdvp, &to_ulr,
1491 						    &tvp, tcnp, 1);
1492 		} else {
1493 			/* tdvp -> tvp -> fdvp -> fvp */
1494 			error = lock_vnode_sequence(tdvp, &to_ulr,
1495 						    &tvp, tcnp, 1,
1496 						    ENOTEMPTY,
1497 						    fdvp, &from_ulr,
1498 						    &fvp, fcnp, 0);
1499 		}
1500 		if (error) {
1501 			if (illegal_fvp) {
1502 				vrele(illegal_fvp);
1503 			}
1504 			goto abort;
1505 		}
1506 		KASSERT(fvp != NULL);
1507 
1508 		if (illegal_fvp && fvp == illegal_fvp) {
1509 			vrele(illegal_fvp);
1510 			error = EINVAL;
1511 			goto abort_withlocks;
1512 		}
1513 
1514 		if (illegal_fvp) {
1515 			vrele(illegal_fvp);
1516 		}
1517 	}
1518 
1519 	KASSERT(fdvp && VOP_ISLOCKED(fdvp));
1520 	KASSERT(fvp && VOP_ISLOCKED(fvp));
1521 	KASSERT(tdvp && VOP_ISLOCKED(tdvp));
1522 	KASSERT(tvp == NULL || VOP_ISLOCKED(tvp));
1523 
1524 	/* --- everything is now locked --- */
1525 
1526 	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
1527 	    (VTOI(tdvp)->i_flags & APPEND))) {
1528 		error = EPERM;
1529 		goto abort_withlocks;
1530 	}
1531 
1532 	/*
1533 	 * Check if just deleting a link name.
1534 	 */
1535 	if (fvp == tvp) {
1536 		if (fvp->v_type == VDIR) {
1537 			error = EINVAL;
1538 			goto abort_withlocks;
1539 		}
1540 
1541 		/* Release destination completely. Leave fdvp locked. */
1542 		VOP_ABORTOP(tdvp, tcnp);
1543 		if (fdvp != tdvp) {
1544 			VOP_UNLOCK(tdvp);
1545 		}
1546 		VOP_UNLOCK(tvp);
1547 		vrele(tdvp);
1548 		vrele(tvp);
1549 
1550 		/* Delete source. */
1551 		/* XXX: do we really need to relookup again? */
1552 
1553 		/*
1554 		 * fdvp is still locked, but we just unlocked fvp
1555 		 * (because fvp == tvp) so just decref fvp
1556 		 */
1557 		vrele(fvp);
1558 		fcnp->cn_flags &= ~(MODMASK);
1559 		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1560 		fcnp->cn_nameiop = DELETE;
1561 		if ((error = relookup(fdvp, &fvp, fcnp, 0))) {
1562 			vput(fdvp);
1563 			return (error);
1564 		}
1565 		return (VOP_REMOVE(fdvp, fvp, fcnp));
1566 	}
1567 	fdp = VTOI(fdvp);
1568 	ip = VTOI(fvp);
1569 	if ((nlink_t) ip->i_nlink >= LINK_MAX) {
1570 		error = EMLINK;
1571 		goto abort_withlocks;
1572 	}
1573 	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
1574 		(fdp->i_flags & APPEND)) {
1575 		error = EPERM;
1576 		goto abort_withlocks;
1577 	}
1578 	if ((ip->i_mode & IFMT) == IFDIR) {
1579 		/*
1580 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1581 		 */
1582 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1583 		    fdp == ip ||
1584 		    (fcnp->cn_flags & ISDOTDOT) ||
1585 		    (tcnp->cn_flags & ISDOTDOT) ||
1586 		    (ip->i_flag & IN_RENAME)) {
1587 			error = EINVAL;
1588 			goto abort_withlocks;
1589 		}
1590 		ip->i_flag |= IN_RENAME;
1591 		doingdirectory = 1;
1592 	}
1593 	oldparent = fdp->i_number;
1594 	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXXLUKEM/XXX: right place? */
1595 
1596 	/*
1597 	 * Both the directory
1598 	 * and target vnodes are locked.
1599 	 */
1600 	tdp = VTOI(tdvp);
1601 	txp = NULL;
1602 	if (tvp)
1603 		txp = VTOI(tvp);
1604 
1605 	mp = fdvp->v_mount;
1606 	fstrans_start(mp, FSTRANS_SHARED);
1607 
1608 	if (oldparent != tdp->i_number)
1609 		newparent = tdp->i_number;
1610 
1611 	/*
1612 	 * If ".." must be changed (ie the directory gets a new
1613 	 * parent) the user must have write permission in the source
1614 	 * so as to be able to change "..".
1615 	 */
1616 	if (doingdirectory && newparent) {
1617 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
1618 		if (error)
1619 			goto out;
1620 	}
1621 
1622 	KASSERT(fdvp != tvp);
1623 
1624 	if (newparent) {
1625 		/* Check for the rename("foo/foo", "foo") case. */
1626 		if (fdvp == tvp) {
1627 			error = doingdirectory ? ENOTEMPTY : EISDIR;
1628 			goto out;
1629 		}
1630 	}
1631 
1632 	fxp = VTOI(fvp);
1633 	fdp = VTOI(fdvp);
1634 
1635 	error = UFS_WAPBL_BEGIN(fdvp->v_mount);
1636 	if (error)
1637 		goto out2;
1638 
1639 	/*
1640 	 * 1) Bump link count while we're moving stuff
1641 	 *    around.  If we crash somewhere before
1642 	 *    completing our work, the link count
1643 	 *    may be wrong, but correctable.
1644 	 */
1645 	ip->i_nlink++;
1646 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
1647 	ip->i_flag |= IN_CHANGE;
1648 	if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
1649 		goto bad;
1650 	}
1651 
1652 	/*
1653 	 * 2) If target doesn't exist, link the target
1654 	 *    to the source and unlink the source.
1655 	 *    Otherwise, rewrite the target directory
1656 	 *    entry to reference the source inode and
1657 	 *    expunge the original entry's existence.
1658 	 */
1659 	if (txp == NULL) {
1660 		if (tdp->i_dev != ip->i_dev)
1661 			panic("rename: EXDEV");
1662 		/*
1663 		 * Account for ".." in new directory.
1664 		 * When source and destination have the same
1665 		 * parent we don't fool with the link count.
1666 		 */
1667 		if (doingdirectory && newparent) {
1668 			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
1669 				error = EMLINK;
1670 				goto bad;
1671 			}
1672 			tdp->i_nlink++;
1673 			DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1674 			tdp->i_flag |= IN_CHANGE;
1675 			if ((error = UFS_UPDATE(tdvp, NULL, NULL,
1676 			    UPDATE_DIROP)) != 0) {
1677 				tdp->i_nlink--;
1678 				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1679 				tdp->i_flag |= IN_CHANGE;
1680 				goto bad;
1681 			}
1682 		}
1683 		newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
1684 		ufs_makedirentry(ip, tcnp, newdir);
1685 		error = ufs_direnter(tdvp, &to_ulr,
1686 				     NULL, newdir, tcnp, NULL);
1687 		pool_cache_put(ufs_direct_cache, newdir);
1688 		if (error != 0) {
1689 			if (doingdirectory && newparent) {
1690 				tdp->i_nlink--;
1691 				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1692 				tdp->i_flag |= IN_CHANGE;
1693 				(void)UFS_UPDATE(tdvp, NULL, NULL,
1694 						 UPDATE_WAIT | UPDATE_DIROP);
1695 			}
1696 			goto bad;
1697 		}
1698 		VN_KNOTE(tdvp, NOTE_WRITE);
1699 	} else {
1700 		if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
1701 			panic("rename: EXDEV");
1702 		/*
1703 		 * Short circuit rename(foo, foo).
1704 		 */
1705 		if (txp->i_number == ip->i_number)
1706 			panic("rename: same file");
1707 		/*
1708 		 * If the parent directory is "sticky", then the user must
1709 		 * own the parent directory, or the destination of the rename,
1710 		 * otherwise the destination may not be changed (except by
1711 		 * root). This implements append-only directories.
1712 		 */
1713 		if ((tdp->i_mode & S_ISTXT) &&
1714 		    kauth_authorize_generic(tcnp->cn_cred,
1715 		     KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
1716 		    kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
1717 		    txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
1718 			error = EPERM;
1719 			goto bad;
1720 		}
1721 		/*
1722 		 * Target must be empty if a directory and have no links
1723 		 * to it. Also, ensure source and target are compatible
1724 		 * (both directories, or both not directories).
1725 		 */
1726 		if ((txp->i_mode & IFMT) == IFDIR) {
1727 			if (txp->i_nlink > 2 ||
1728 			    !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
1729 				error = ENOTEMPTY;
1730 				goto bad;
1731 			}
1732 			if (!doingdirectory) {
1733 				error = ENOTDIR;
1734 				goto bad;
1735 			}
1736 			cache_purge(tdvp);
1737 		} else if (doingdirectory) {
1738 			error = EISDIR;
1739 			goto bad;
1740 		}
1741 		if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset,
1742 		    txp, ip->i_number,
1743 		    IFTODT(ip->i_mode), doingdirectory && newparent ?
1744 		    newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
1745 			goto bad;
1746 		if (doingdirectory) {
1747 			/*
1748 			 * Truncate inode. The only stuff left in the directory
1749 			 * is "." and "..". The "." reference is inconsequential
1750 			 * since we are quashing it. We have removed the "."
1751 			 * reference and the reference in the parent directory,
1752 			 * but there may be other hard links.
1753 			 */
1754 			if (!newparent) {
1755 				tdp->i_nlink--;
1756 				DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
1757 				tdp->i_flag |= IN_CHANGE;
1758 				UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
1759 			}
1760 			txp->i_nlink--;
1761 			DIP_ASSIGN(txp, nlink, txp->i_nlink);
1762 			txp->i_flag |= IN_CHANGE;
1763 			if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
1764 			    tcnp->cn_cred)))
1765 				goto bad;
1766 		}
1767 		VN_KNOTE(tdvp, NOTE_WRITE);
1768 		VN_KNOTE(tvp, NOTE_DELETE);
1769 	}
1770 
1771 	/*
1772 	 * Handle case where the directory entry we need to remove,
1773 	 * which is/was at from_ulr.ulr_offset, or the one before it,
1774 	 * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count,
1775 	 * may have been moved when the directory insertion above
1776 	 * performed compaction.
1777 	 */
1778 	if (tdp->i_number == fdp->i_number &&
1779 	    ulr_overlap(&from_ulr, &to_ulr)) {
1780 
1781 		struct buf *bp;
1782 		struct direct *ep;
1783 		struct ufsmount *ump = fdp->i_ump;
1784 		doff_t curpos;
1785 		doff_t endsearch;	/* offset to end directory search */
1786 		uint32_t prev_reclen;
1787 		int dirblksiz = ump->um_dirblksiz;
1788 		const int needswap = UFS_MPNEEDSWAP(ump);
1789 		u_long bmask;
1790 		int namlen, entryoffsetinblock;
1791 		char *dirbuf;
1792 
1793 		bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
1794 
1795 		/*
1796 		 * The fcnp entry will be somewhere between the start of
1797 		 * compaction (to_ulr.ulr_offset) and the original location
1798 		 * (from_ulr.ulr_offset).
1799 		 */
1800 		curpos = to_ulr.ulr_offset;
1801 		endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen;
1802 		entryoffsetinblock = 0;
1803 
1804 		/*
1805 		 * Get the directory block containing the start of
1806 		 * compaction.
1807 		 */
1808 		error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf,
1809 		    &bp, false);
1810 		if (error)
1811 			goto bad;
1812 
1813 		/*
1814 		 * Keep existing ulr_count (length of previous record)
1815 		 * for the case where compaction did not include the
1816 		 * previous entry but started at the from-entry.
1817 		 */
1818 		prev_reclen = from_ulr.ulr_count;
1819 
1820 		while (curpos < endsearch) {
1821 			uint32_t reclen;
1822 
1823 			/*
1824 			 * If necessary, get the next directory block.
1825 			 *
1826 			 * dholland 7/13/11 to the best of my understanding
1827 			 * this should never happen; compaction occurs only
1828 			 * within single blocks. I think.
1829 			 */
1830 			if ((curpos & bmask) == 0) {
1831 				if (bp != NULL)
1832 					brelse(bp, 0);
1833 				error = ufs_blkatoff(fdvp, (off_t)curpos,
1834 				    &dirbuf, &bp, false);
1835 				if (error)
1836 					goto bad;
1837 				entryoffsetinblock = 0;
1838 			}
1839 
1840 			KASSERT(bp != NULL);
1841 			ep = (struct direct *)(dirbuf + entryoffsetinblock);
1842 			reclen = ufs_rw16(ep->d_reclen, needswap);
1843 
1844 #if (BYTE_ORDER == LITTLE_ENDIAN)
1845 			if (FSFMT(fdvp) && needswap == 0)
1846 				namlen = ep->d_type;
1847 			else
1848 				namlen = ep->d_namlen;
1849 #else
1850 			if (FSFMT(fdvp) && needswap != 0)
1851 				namlen = ep->d_type;
1852 			else
1853 				namlen = ep->d_namlen;
1854 #endif
1855 			if ((ep->d_ino != 0) &&
1856 			    (ufs_rw32(ep->d_ino, needswap) != WINO) &&
1857 			    (namlen == fcnp->cn_namelen) &&
1858 			    memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
1859 				from_ulr.ulr_reclen = reclen;
1860 				break;
1861 			}
1862 			curpos += reclen;
1863 			entryoffsetinblock += reclen;
1864 			prev_reclen = reclen;
1865 		}
1866 
1867 		from_ulr.ulr_offset = curpos;
1868 		from_ulr.ulr_count = prev_reclen;
1869 
1870 		KASSERT(curpos <= endsearch);
1871 
1872 		/*
1873 		 * If ulr_offset points to start of a directory block,
1874 		 * clear ulr_count so ufs_dirremove() doesn't try to
1875 		 * merge free space over a directory block boundary.
1876 		 */
1877 		if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0)
1878 			from_ulr.ulr_count = 0;
1879 
1880 		brelse(bp, 0);
1881 	}
1882 
1883 	/*
1884 	 * 3) Unlink the source.
1885 	 */
1886 
1887 #if 0
1888 	/*
1889 	 * Ensure that the directory entry still exists and has not
1890 	 * changed while the new name has been entered. If the source is
1891 	 * a file then the entry may have been unlinked or renamed. In
1892 	 * either case there is no further work to be done. If the source
1893 	 * is a directory then it cannot have been rmdir'ed; The IRENAME
1894 	 * flag ensures that it cannot be moved by another rename or removed
1895 	 * by a rmdir.
1896 	 */
1897 #endif
1898 	KASSERT(fxp == ip);
1899 
1900 	/*
1901 	 * If the source is a directory with a new parent, the link
1902 	 * count of the old parent directory must be decremented and
1903 	 * ".." set to point to the new parent.
1904 	 */
1905 	if (doingdirectory && newparent) {
1906 		KASSERT(fdp != NULL);
1907 		ufs_dirrewrite(fxp, mastertemplate.dot_reclen,
1908 			       fdp, newparent, DT_DIR, 0, IN_CHANGE);
1909 		cache_purge(fdvp);
1910 	}
1911 	error = ufs_dirremove(fdvp, &from_ulr,
1912 			      fxp, fcnp->cn_flags, 0);
1913 	fxp->i_flag &= ~IN_RENAME;
1914 
1915 	VN_KNOTE(fvp, NOTE_RENAME);
1916 	goto done;
1917 
1918  out:
1919 	goto out2;
1920 
1921 	/* exit routines from steps 1 & 2 */
1922  bad:
1923 	if (doingdirectory)
1924 		ip->i_flag &= ~IN_RENAME;
1925 	ip->i_nlink--;
1926 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
1927 	ip->i_flag |= IN_CHANGE;
1928 	ip->i_flag &= ~IN_RENAME;
1929 	UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
1930  done:
1931 	UFS_WAPBL_END(fdvp->v_mount);
1932  out2:
1933 	/*
1934 	 * clear IN_RENAME - some exit paths happen too early to go
1935 	 * through the cleanup done in the "bad" case above, so we
1936 	 * always do this mini-cleanup here.
1937 	 */
1938 	ip->i_flag &= ~IN_RENAME;
1939 
1940 	VOP_UNLOCK(fdvp);
1941 	if (tdvp != fdvp) {
1942 		VOP_UNLOCK(tdvp);
1943 	}
1944 	VOP_UNLOCK(fvp);
1945 	if (tvp && tvp != fvp) {
1946 		VOP_UNLOCK(tvp);
1947 	}
1948 
1949 	vrele(fdvp);
1950 	vrele(tdvp);
1951 	vrele(fvp);
1952 	if (tvp) {
1953 		vrele(tvp);
1954 	}
1955 
1956 	fstrans_done(mp);
1957 	return (error);
1958 
1959  abort_withlocks:
1960 	VOP_UNLOCK(fdvp);
1961 	if (tdvp != fdvp) {
1962 		VOP_UNLOCK(tdvp);
1963 	}
1964 	VOP_UNLOCK(fvp);
1965 	if (tvp && tvp != fvp) {
1966 		VOP_UNLOCK(tvp);
1967 	}
1968 
1969  abort:
1970 	VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
1971 	VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
1972 	vrele(tdvp);
1973 	if (tvp) {
1974 		vrele(tvp);
1975 	}
1976 	vrele(fdvp);
1977 	if (fvp) {
1978 		vrele(fvp);
1979 	}
1980 	return (error);
1981 }
1982 
1983 int
1984 ufs_mkdir(void *v)
1985 {
1986 	struct vop_mkdir_args /* {
1987 		struct vnode		*a_dvp;
1988 		struct vnode		**a_vpp;
1989 		struct componentname	*a_cnp;
1990 		struct vattr		*a_vap;
1991 	} */ *ap = v;
1992 	struct vnode		*dvp = ap->a_dvp, *tvp;
1993 	struct vattr		*vap = ap->a_vap;
1994 	struct componentname	*cnp = ap->a_cnp;
1995 	struct inode		*ip, *dp = VTOI(dvp);
1996 	struct buf		*bp;
1997 	struct dirtemplate	dirtemplate;
1998 	struct direct		*newdir;
1999 	int			error, dmode;
2000 	struct ufsmount		*ump = dp->i_ump;
2001 	int			dirblksiz = ump->um_dirblksiz;
2002 	struct ufs_lookup_results *ulr;
2003 
2004 	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
2005 
2006 	/* XXX should handle this material another way */
2007 	ulr = &dp->i_crap;
2008 	UFS_CHECK_CRAPCOUNTER(dp);
2009 
2010 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
2011 		error = EMLINK;
2012 		goto out;
2013 	}
2014 	dmode = vap->va_mode & ACCESSPERMS;
2015 	dmode |= IFDIR;
2016 	/*
2017 	 * Must simulate part of ufs_makeinode here to acquire the inode,
2018 	 * but not have it entered in the parent directory. The entry is
2019 	 * made later after writing "." and ".." entries.
2020 	 */
2021 	if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0)
2022 		goto out;
2023 
2024 	tvp = *ap->a_vpp;
2025 	ip = VTOI(tvp);
2026 
2027 	error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
2028 	if (error) {
2029 		UFS_VFREE(tvp, ip->i_number, dmode);
2030 		vput(tvp);
2031 		goto out;
2032 	}
2033 	ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
2034 	DIP_ASSIGN(ip, uid, ip->i_uid);
2035 	ip->i_gid = dp->i_gid;
2036 	DIP_ASSIGN(ip, gid, ip->i_gid);
2037 #if defined(QUOTA) || defined(QUOTA2)
2038 	if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2039 		UFS_VFREE(tvp, ip->i_number, dmode);
2040 		UFS_WAPBL_END(dvp->v_mount);
2041 		fstrans_done(dvp->v_mount);
2042 		vput(tvp);
2043 		vput(dvp);
2044 		return (error);
2045 	}
2046 #endif
2047 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2048 	ip->i_mode = dmode;
2049 	DIP_ASSIGN(ip, mode, dmode);
2050 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
2051 	ip->i_nlink = 2;
2052 	DIP_ASSIGN(ip, nlink, 2);
2053 	if (cnp->cn_flags & ISWHITEOUT) {
2054 		ip->i_flags |= UF_OPAQUE;
2055 		DIP_ASSIGN(ip, flags, ip->i_flags);
2056 	}
2057 
2058 	/*
2059 	 * Bump link count in parent directory to reflect work done below.
2060 	 * Should be done before reference is created so cleanup is
2061 	 * possible if we crash.
2062 	 */
2063 	dp->i_nlink++;
2064 	DIP_ASSIGN(dp, nlink, dp->i_nlink);
2065 	dp->i_flag |= IN_CHANGE;
2066 	if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
2067 		goto bad;
2068 
2069 	/*
2070 	 * Initialize directory with "." and ".." from static template.
2071 	 */
2072 	dirtemplate = mastertemplate;
2073 	dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
2074 	dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump));
2075 	dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump));
2076 	dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen,
2077 	    UFS_MPNEEDSWAP(ump));
2078 	dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen,
2079 	    UFS_MPNEEDSWAP(ump));
2080 	if (ump->um_maxsymlinklen <= 0) {
2081 #if BYTE_ORDER == LITTLE_ENDIAN
2082 		if (UFS_MPNEEDSWAP(ump) == 0)
2083 #else
2084 		if (UFS_MPNEEDSWAP(ump) != 0)
2085 #endif
2086 		{
2087 			dirtemplate.dot_type = dirtemplate.dot_namlen;
2088 			dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
2089 			dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
2090 		} else
2091 			dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
2092 	}
2093 	if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
2094 	    B_CLRBUF, &bp)) != 0)
2095 		goto bad;
2096 	ip->i_size = dirblksiz;
2097 	DIP_ASSIGN(ip, size, dirblksiz);
2098 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2099 	uvm_vnp_setsize(tvp, ip->i_size);
2100 	memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);
2101 
2102 	/*
2103 	 * Directory set up, now install it's entry in the parent directory.
2104 	 * We must write out the buffer containing the new directory body
2105 	 * before entering the new name in the parent.
2106 	 */
2107 	if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
2108 		goto bad;
2109 	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
2110 		goto bad;
2111 	}
2112 	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
2113 	ufs_makedirentry(ip, cnp, newdir);
2114 	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
2115 	pool_cache_put(ufs_direct_cache, newdir);
2116  bad:
2117 	if (error == 0) {
2118 		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
2119 		UFS_WAPBL_END(dvp->v_mount);
2120 	} else {
2121 		dp->i_nlink--;
2122 		DIP_ASSIGN(dp, nlink, dp->i_nlink);
2123 		dp->i_flag |= IN_CHANGE;
2124 		UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
2125 		/*
2126 		 * No need to do an explicit UFS_TRUNCATE here, vrele will
2127 		 * do this for us because we set the link count to 0.
2128 		 */
2129 		ip->i_nlink = 0;
2130 		DIP_ASSIGN(ip, nlink, 0);
2131 		ip->i_flag |= IN_CHANGE;
2132 		/* If IN_ADIROP, account for it */
2133 		UFS_UNMARK_VNODE(tvp);
2134 		UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
2135 		UFS_WAPBL_END(dvp->v_mount);
2136 		vput(tvp);
2137 	}
2138  out:
2139 	fstrans_done(dvp->v_mount);
2140 	vput(dvp);
2141 	return (error);
2142 }
2143 
2144 int
2145 ufs_rmdir(void *v)
2146 {
2147 	struct vop_rmdir_args /* {
2148 		struct vnode		*a_dvp;
2149 		struct vnode		*a_vp;
2150 		struct componentname	*a_cnp;
2151 	} */ *ap = v;
2152 	struct vnode		*vp, *dvp;
2153 	struct componentname	*cnp;
2154 	struct inode		*ip, *dp;
2155 	int			error;
2156 	struct ufs_lookup_results *ulr;
2157 
2158 	vp = ap->a_vp;
2159 	dvp = ap->a_dvp;
2160 	cnp = ap->a_cnp;
2161 	ip = VTOI(vp);
2162 	dp = VTOI(dvp);
2163 
2164 	/* XXX should handle this material another way */
2165 	ulr = &dp->i_crap;
2166 	UFS_CHECK_CRAPCOUNTER(dp);
2167 
2168 	/*
2169 	 * No rmdir "." or of mounted directories please.
2170 	 */
2171 	if (dp == ip || vp->v_mountedhere != NULL) {
2172 		if (dp == ip)
2173 			vrele(dvp);
2174 		else
2175 			vput(dvp);
2176 		vput(vp);
2177 		return (EINVAL);
2178 	}
2179 
2180 	fstrans_start(dvp->v_mount, FSTRANS_SHARED);
2181 
2182 	/*
2183 	 * Do not remove a directory that is in the process of being renamed.
2184 	 * Verify that the directory is empty (and valid). (Rmdir ".." won't
2185 	 * be valid since ".." will contain a reference to the current
2186 	 * directory and thus be non-empty.)
2187 	 */
2188 	error = 0;
2189 	if (ip->i_flag & IN_RENAME) {
2190 		error = EINVAL;
2191 		goto out;
2192 	}
2193 	if (ip->i_nlink != 2 ||
2194 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
2195 		error = ENOTEMPTY;
2196 		goto out;
2197 	}
2198 	if ((dp->i_flags & APPEND) ||
2199 		(ip->i_flags & (IMMUTABLE | APPEND))) {
2200 		error = EPERM;
2201 		goto out;
2202 	}
2203 	error = UFS_WAPBL_BEGIN(dvp->v_mount);
2204 	if (error)
2205 		goto out;
2206 	/*
2207 	 * Delete reference to directory before purging
2208 	 * inode.  If we crash in between, the directory
2209 	 * will be reattached to lost+found,
2210 	 */
2211 	error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1);
2212 	if (error) {
2213 		UFS_WAPBL_END(dvp->v_mount);
2214 		goto out;
2215 	}
2216 	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
2217 	cache_purge(dvp);
2218 	/*
2219 	 * Truncate inode.  The only stuff left in the directory is "." and
2220 	 * "..".  The "." reference is inconsequential since we're quashing
2221 	 * it.
2222 	 */
2223 	dp->i_nlink--;
2224 	DIP_ASSIGN(dp, nlink, dp->i_nlink);
2225 	dp->i_flag |= IN_CHANGE;
2226 	UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
2227 	ip->i_nlink--;
2228 	DIP_ASSIGN(ip, nlink, ip->i_nlink);
2229 	ip->i_flag |= IN_CHANGE;
2230 	error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
2231 	cache_purge(vp);
2232 	/*
2233 	 * Unlock the log while we still have reference to unlinked
2234 	 * directory vp so that it will not get locked for recycling
2235 	 */
2236 	UFS_WAPBL_END(dvp->v_mount);
2237 #ifdef UFS_DIRHASH
2238 	if (ip->i_dirhash != NULL)
2239 		ufsdirhash_free(ip);
2240 #endif
2241  out:
2242 	VN_KNOTE(vp, NOTE_DELETE);
2243 	vput(vp);
2244 	fstrans_done(dvp->v_mount);
2245 	vput(dvp);
2246 	return (error);
2247 }
2248 
2249 /*
2250  * symlink -- make a symbolic link
2251  */
2252 int
2253 ufs_symlink(void *v)
2254 {
2255 	struct vop_symlink_args /* {
2256 		struct vnode		*a_dvp;
2257 		struct vnode		**a_vpp;
2258 		struct componentname	*a_cnp;
2259 		struct vattr		*a_vap;
2260 		char			*a_target;
2261 	} */ *ap = v;
2262 	struct vnode	*vp, **vpp;
2263 	struct inode	*ip;
2264 	int		len, error;
2265 	struct ufs_lookup_results *ulr;
2266 
2267 	vpp = ap->a_vpp;
2268 
2269 	/* XXX should handle this material another way */
2270 	ulr = &VTOI(ap->a_dvp)->i_crap;
2271 	UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));
2272 
2273 	/*
2274 	 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful
2275 	 * ufs_makeinode
2276 	 */
2277 	fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED);
2278 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, ulr,
2279 			      vpp, ap->a_cnp);
2280 	if (error)
2281 		goto out;
2282 	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
2283 	vp = *vpp;
2284 	len = strlen(ap->a_target);
2285 	ip = VTOI(vp);
2286 	if (len < ip->i_ump->um_maxsymlinklen) {
2287 		memcpy((char *)SHORTLINK(ip), ap->a_target, len);
2288 		ip->i_size = len;
2289 		DIP_ASSIGN(ip, size, len);
2290 		uvm_vnp_setsize(vp, ip->i_size);
2291 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
2292 		if (vp->v_mount->mnt_flag & MNT_RELATIME)
2293 			ip->i_flag |= IN_ACCESS;
2294 		UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
2295 	} else
2296 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
2297 		    UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED,
2298 		    ap->a_cnp->cn_cred, NULL, NULL);
2299 	UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp);
2300 	if (error)
2301 		vput(vp);
2302 out:
2303 	fstrans_done(ap->a_dvp->v_mount);
2304 	return (error);
2305 }
2306 
2307 /*
2308  * Vnode op for reading directories.
2309  *
2310  * This routine handles converting from the on-disk directory format
2311  * "struct direct" to the in-memory format "struct dirent" as well as
2312  * byte swapping the entries if necessary.
2313  */
2314 int
2315 ufs_readdir(void *v)
2316 {
2317 	struct vop_readdir_args /* {
2318 		struct vnode	*a_vp;
2319 		struct uio	*a_uio;
2320 		kauth_cred_t	a_cred;
2321 		int		*a_eofflag;
2322 		off_t		**a_cookies;
2323 		int		*ncookies;
2324 	} */ *ap = v;
2325 	struct vnode	*vp = ap->a_vp;
2326 	struct direct	*cdp, *ecdp;
2327 	struct dirent	*ndp;
2328 	char		*cdbuf, *ndbuf, *endp;
2329 	struct uio	auio, *uio;
2330 	struct iovec	aiov;
2331 	int		error;
2332 	size_t		count, ccount, rcount;
2333 	off_t		off, *ccp;
2334 	off_t		startoff;
2335 	size_t		skipbytes;
2336 	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
2337 	int nswap = UFS_MPNEEDSWAP(ump);
2338 #if BYTE_ORDER == LITTLE_ENDIAN
2339 	int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0;
2340 #else
2341 	int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0;
2342 #endif
2343 	uio = ap->a_uio;
2344 	count = uio->uio_resid;
2345 	rcount = count - ((uio->uio_offset + count) & (ump->um_dirblksiz - 1));
2346 
2347 	if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp))
2348 		return EINVAL;
2349 
2350 	startoff = uio->uio_offset & ~(ump->um_dirblksiz - 1);
2351 	skipbytes = uio->uio_offset - startoff;
2352 	rcount += skipbytes;
2353 
2354 	auio.uio_iov = &aiov;
2355 	auio.uio_iovcnt = 1;
2356 	auio.uio_offset = startoff;
2357 	auio.uio_resid = rcount;
2358 	UIO_SETUP_SYSSPACE(&auio);
2359 	auio.uio_rw = UIO_READ;
2360 	cdbuf = malloc(rcount, M_TEMP, M_WAITOK);
2361 	aiov.iov_base = cdbuf;
2362 	aiov.iov_len = rcount;
2363 	error = VOP_READ(vp, &auio, 0, ap->a_cred);
2364 	if (error != 0) {
2365 		free(cdbuf, M_TEMP);
2366 		return error;
2367 	}
2368 
2369 	rcount -= auio.uio_resid;
2370 
2371 	cdp = (struct direct *)(void *)cdbuf;
2372 	ecdp = (struct direct *)(void *)&cdbuf[rcount];
2373 
2374 	ndbuf = malloc(count, M_TEMP, M_WAITOK);
2375 	ndp = (struct dirent *)(void *)ndbuf;
2376 	endp = &ndbuf[count];
2377 
2378 	off = uio->uio_offset;
2379 	if (ap->a_cookies) {
2380 		ccount = rcount / _DIRENT_RECLEN(cdp, 1);
2381 		ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp),
2382 		    M_TEMP, M_WAITOK);
2383 	} else {
2384 		/* XXX: GCC */
2385 		ccount = 0;
2386 		ccp = NULL;
2387 	}
2388 
2389 	while (cdp < ecdp) {
2390 		cdp->d_reclen = ufs_rw16(cdp->d_reclen, nswap);
2391 		if (skipbytes > 0) {
2392 			if (cdp->d_reclen <= skipbytes) {
2393 				skipbytes -= cdp->d_reclen;
2394 				cdp = _DIRENT_NEXT(cdp);
2395 				continue;
2396 			}
2397 			/*
2398 			 * invalid cookie.
2399 			 */
2400 			error = EINVAL;
2401 			goto out;
2402 		}
2403 		if (cdp->d_reclen == 0) {
2404 			struct dirent *ondp = ndp;
2405 			ndp->d_reclen = _DIRENT_MINSIZE(ndp);
2406 			ndp = _DIRENT_NEXT(ndp);
2407 			ondp->d_reclen = 0;
2408 			cdp = ecdp;
2409 			break;
2410 		}
2411 		if (needswap) {
2412 			ndp->d_type = cdp->d_namlen;
2413 			ndp->d_namlen = cdp->d_type;
2414 		} else {
2415 			ndp->d_type = cdp->d_type;
2416 			ndp->d_namlen = cdp->d_namlen;
2417 		}
2418 		ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen);
2419 		if ((char *)(void *)ndp + ndp->d_reclen +
2420 		    _DIRENT_MINSIZE(ndp) > endp)
2421 			break;
2422 		ndp->d_fileno = ufs_rw32(cdp->d_ino, nswap);
2423 		(void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen);
2424 		memset(&ndp->d_name[ndp->d_namlen], 0,
2425 		    ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen);
2426 		off += cdp->d_reclen;
2427 		if (ap->a_cookies) {
2428 			KASSERT(ccp - *(ap->a_cookies) < ccount);
2429 			*(ccp++) = off;
2430 		}
2431 		ndp = _DIRENT_NEXT(ndp);
2432 		cdp = _DIRENT_NEXT(cdp);
2433 	}
2434 
2435 	count = ((char *)(void *)ndp - ndbuf);
2436 	error = uiomove(ndbuf, count, uio);
2437 out:
2438 	if (ap->a_cookies) {
2439 		if (error) {
2440 			free(*(ap->a_cookies), M_TEMP);
2441 			*(ap->a_cookies) = NULL;
2442 			*(ap->a_ncookies) = 0;
2443 		} else {
2444 			*ap->a_ncookies = ccp - *(ap->a_cookies);
2445 		}
2446 	}
2447 	uio->uio_offset = off;
2448 	free(ndbuf, M_TEMP);
2449 	free(cdbuf, M_TEMP);
2450 	*ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset;
2451 	return error;
2452 }
2453 
2454 /*
2455  * Return target name of a symbolic link
2456  */
2457 int
2458 ufs_readlink(void *v)
2459 {
2460 	struct vop_readlink_args /* {
2461 		struct vnode	*a_vp;
2462 		struct uio	*a_uio;
2463 		kauth_cred_t	a_cred;
2464 	} */ *ap = v;
2465 	struct vnode	*vp = ap->a_vp;
2466 	struct inode	*ip = VTOI(vp);
2467 	struct ufsmount	*ump = VFSTOUFS(vp->v_mount);
2468 	int		isize;
2469 
2470 	isize = ip->i_size;
2471 	if (isize < ump->um_maxsymlinklen ||
2472 	    (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) {
2473 		uiomove((char *)SHORTLINK(ip), isize, ap->a_uio);
2474 		return (0);
2475 	}
2476 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
2477 }
2478 
2479 /*
2480  * Calculate the logical to physical mapping if not done already,
2481  * then call the device strategy routine.
2482  */
2483 int
2484 ufs_strategy(void *v)
2485 {
2486 	struct vop_strategy_args /* {
2487 		struct vnode *a_vp;
2488 		struct buf *a_bp;
2489 	} */ *ap = v;
2490 	struct buf	*bp;
2491 	struct vnode	*vp;
2492 	struct inode	*ip;
2493 	struct mount	*mp;
2494 	int		error;
2495 
2496 	bp = ap->a_bp;
2497 	vp = ap->a_vp;
2498 	ip = VTOI(vp);
2499 	if (vp->v_type == VBLK || vp->v_type == VCHR)
2500 		panic("ufs_strategy: spec");
2501 	KASSERT(bp->b_bcount != 0);
2502 	if (bp->b_blkno == bp->b_lblkno) {
2503 		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
2504 				 NULL);
2505 		if (error) {
2506 			bp->b_error = error;
2507 			biodone(bp);
2508 			return (error);
2509 		}
2510 		if (bp->b_blkno == -1) /* no valid data */
2511 			clrbuf(bp);
2512 	}
2513 	if (bp->b_blkno < 0) { /* block is not on disk */
2514 		biodone(bp);
2515 		return (0);
2516 	}
2517 	vp = ip->i_devvp;
2518 
2519 	error = VOP_STRATEGY(vp, bp);
2520 	if (error)
2521 		return error;
2522 
2523 	if (!BUF_ISREAD(bp))
2524 		return 0;
2525 
2526 	mp = wapbl_vptomp(vp);
2527 	if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
2528 	    !WAPBL_REPLAY_ISOPEN(mp) ||
2529 	    !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
2530 		return 0;
2531 
2532 	error = biowait(bp);
2533 	if (error)
2534 		return error;
2535 
2536 	error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
2537 	if (error) {
2538 		mutex_enter(&bufcache_lock);
2539 		SET(bp->b_cflags, BC_INVAL);
2540 		mutex_exit(&bufcache_lock);
2541 	}
2542 	return error;
2543 }
2544 
2545 /*
2546  * Print out the contents of an inode.
2547  */
2548 int
2549 ufs_print(void *v)
2550 {
2551 	struct vop_print_args /* {
2552 		struct vnode	*a_vp;
2553 	} */ *ap = v;
2554 	struct vnode	*vp;
2555 	struct inode	*ip;
2556 
2557 	vp = ap->a_vp;
2558 	ip = VTOI(vp);
2559 	printf("tag VT_UFS, ino %llu, on dev %llu, %llu",
2560 	    (unsigned long long)ip->i_number,
2561 	    (unsigned long long)major(ip->i_dev),
2562 	    (unsigned long long)minor(ip->i_dev));
2563 	printf(" flags 0x%x, nlink %d\n",
2564 	    ip->i_flag, ip->i_nlink);
2565 	printf("\tmode 0%o, owner %d, group %d, size %qd",
2566 	    ip->i_mode, ip->i_uid, ip->i_gid,
2567 	    (long long)ip->i_size);
2568 	if (vp->v_type == VFIFO)
2569 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2570 	printf("\n");
2571 	return (0);
2572 }
2573 
2574 /*
2575  * Read wrapper for special devices.
2576  */
2577 int
2578 ufsspec_read(void *v)
2579 {
2580 	struct vop_read_args /* {
2581 		struct vnode	*a_vp;
2582 		struct uio	*a_uio;
2583 		int		a_ioflag;
2584 		kauth_cred_t	a_cred;
2585 	} */ *ap = v;
2586 
2587 	/*
2588 	 * Set access flag.
2589 	 */
2590 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
2591 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2592 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
2593 }
2594 
2595 /*
2596  * Write wrapper for special devices.
2597  */
2598 int
2599 ufsspec_write(void *v)
2600 {
2601 	struct vop_write_args /* {
2602 		struct vnode	*a_vp;
2603 		struct uio	*a_uio;
2604 		int		a_ioflag;
2605 		kauth_cred_t	a_cred;
2606 	} */ *ap = v;
2607 
2608 	/*
2609 	 * Set update and change flags.
2610 	 */
2611 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
2612 		VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
2613 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
2614 }
2615 
2616 /*
2617  * Close wrapper for special devices.
2618  *
2619  * Update the times on the inode then do device close.
2620  */
2621 int
2622 ufsspec_close(void *v)
2623 {
2624 	struct vop_close_args /* {
2625 		struct vnode	*a_vp;
2626 		int		a_fflag;
2627 		kauth_cred_t	a_cred;
2628 	} */ *ap = v;
2629 	struct vnode	*vp;
2630 	struct inode	*ip;
2631 
2632 	vp = ap->a_vp;
2633 	ip = VTOI(vp);
2634 	if (vp->v_usecount > 1)
2635 		UFS_ITIMES(vp, NULL, NULL, NULL);
2636 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
2637 }
2638 
2639 /*
2640  * Read wrapper for fifo's
2641  */
2642 int
2643 ufsfifo_read(void *v)
2644 {
2645 	struct vop_read_args /* {
2646 		struct vnode	*a_vp;
2647 		struct uio	*a_uio;
2648 		int		a_ioflag;
2649 		kauth_cred_t	a_cred;
2650 	} */ *ap = v;
2651 
2652 	/*
2653 	 * Set access flag.
2654 	 */
2655 	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
2656 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
2657 }
2658 
2659 /*
2660  * Write wrapper for fifo's.
2661  */
2662 int
2663 ufsfifo_write(void *v)
2664 {
2665 	struct vop_write_args /* {
2666 		struct vnode	*a_vp;
2667 		struct uio	*a_uio;
2668 		int		a_ioflag;
2669 		kauth_cred_t	a_cred;
2670 	} */ *ap = v;
2671 
2672 	/*
2673 	 * Set update and change flags.
2674 	 */
2675 	VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
2676 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
2677 }
2678 
2679 /*
2680  * Close wrapper for fifo's.
2681  *
2682  * Update the times on the inode then do device close.
2683  */
2684 int
2685 ufsfifo_close(void *v)
2686 {
2687 	struct vop_close_args /* {
2688 		struct vnode	*a_vp;
2689 		int		a_fflag;
2690 		kauth_cred_t	a_cred;
2691 	} */ *ap = v;
2692 	struct vnode	*vp;
2693 	struct inode	*ip;
2694 
2695 	vp = ap->a_vp;
2696 	ip = VTOI(vp);
2697 	if (ap->a_vp->v_usecount > 1)
2698 		UFS_ITIMES(vp, NULL, NULL, NULL);
2699 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
2700 }
2701 
2702 /*
2703  * Return POSIX pathconf information applicable to ufs filesystems.
2704  */
2705 int
2706 ufs_pathconf(void *v)
2707 {
2708 	struct vop_pathconf_args /* {
2709 		struct vnode	*a_vp;
2710 		int		a_name;
2711 		register_t	*a_retval;
2712 	} */ *ap = v;
2713 
2714 	switch (ap->a_name) {
2715 	case _PC_LINK_MAX:
2716 		*ap->a_retval = LINK_MAX;
2717 		return (0);
2718 	case _PC_NAME_MAX:
2719 		*ap->a_retval = FFS_MAXNAMLEN;
2720 		return (0);
2721 	case _PC_PATH_MAX:
2722 		*ap->a_retval = PATH_MAX;
2723 		return (0);
2724 	case _PC_PIPE_BUF:
2725 		*ap->a_retval = PIPE_BUF;
2726 		return (0);
2727 	case _PC_CHOWN_RESTRICTED:
2728 		*ap->a_retval = 1;
2729 		return (0);
2730 	case _PC_NO_TRUNC:
2731 		*ap->a_retval = 1;
2732 		return (0);
2733 	case _PC_SYNC_IO:
2734 		*ap->a_retval = 1;
2735 		return (0);
2736 	case _PC_FILESIZEBITS:
2737 		*ap->a_retval = 42;
2738 		return (0);
2739 	case _PC_SYMLINK_MAX:
2740 		*ap->a_retval = MAXPATHLEN;
2741 		return (0);
2742 	case _PC_2_SYMLINKS:
2743 		*ap->a_retval = 1;
2744 		return (0);
2745 	default:
2746 		return (EINVAL);
2747 	}
2748 	/* NOTREACHED */
2749 }
2750 
2751 /*
2752  * Advisory record locking support
2753  */
2754 int
2755 ufs_advlock(void *v)
2756 {
2757 	struct vop_advlock_args /* {
2758 		struct vnode	*a_vp;
2759 		void *		a_id;
2760 		int		a_op;
2761 		struct flock	*a_fl;
2762 		int		a_flags;
2763 	} */ *ap = v;
2764 	struct inode *ip;
2765 
2766 	ip = VTOI(ap->a_vp);
2767 	return lf_advlock(ap, &ip->i_lockf, ip->i_size);
2768 }
2769 
2770 /*
2771  * Initialize the vnode associated with a new inode, handle aliased
2772  * vnodes.
2773  */
2774 void
2775 ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *),
2776 	struct vnode **vpp)
2777 {
2778 	struct timeval	tv;
2779 	struct inode	*ip;
2780 	struct vnode	*vp;
2781 	dev_t		rdev;
2782 	struct ufsmount	*ump;
2783 
2784 	vp = *vpp;
2785 	ip = VTOI(vp);
2786 	switch(vp->v_type = IFTOVT(ip->i_mode)) {
2787 	case VCHR:
2788 	case VBLK:
2789 		vp->v_op = specops;
2790 		ump = ip->i_ump;
2791 		if (ump->um_fstype == UFS1)
2792 			rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
2793 			    UFS_MPNEEDSWAP(ump));
2794 		else
2795 			rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
2796 			    UFS_MPNEEDSWAP(ump));
2797 		spec_node_init(vp, rdev);
2798 		break;
2799 	case VFIFO:
2800 		vp->v_op = fifoops;
2801 		break;
2802 	case VNON:
2803 	case VBAD:
2804 	case VSOCK:
2805 	case VLNK:
2806 	case VDIR:
2807 	case VREG:
2808 		break;
2809 	}
2810 	if (ip->i_number == ROOTINO)
2811                 vp->v_vflag |= VV_ROOT;
2812 	/*
2813 	 * Initialize modrev times
2814 	 */
2815 	getmicrouptime(&tv);
2816 	ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32
2817 			| tv.tv_usec * 4294u;
2818 	*vpp = vp;
2819 }
2820 
2821 /*
2822  * Allocate a new inode.
2823  */
2824 int
2825 ufs_makeinode(int mode, struct vnode *dvp, const struct ufs_lookup_results *ulr,
2826 	struct vnode **vpp, struct componentname *cnp)
2827 {
2828 	struct inode	*ip, *pdir;
2829 	struct direct	*newdir;
2830 	struct vnode	*tvp;
2831 	int		error, ismember = 0;
2832 
2833 	UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);
2834 
2835 	pdir = VTOI(dvp);
2836 
2837 	if ((mode & IFMT) == 0)
2838 		mode |= IFREG;
2839 
2840 	if ((error = UFS_VALLOC(dvp, mode, cnp->cn_cred, vpp)) != 0) {
2841 		vput(dvp);
2842 		return (error);
2843 	}
2844 	tvp = *vpp;
2845 	ip = VTOI(tvp);
2846 	ip->i_gid = pdir->i_gid;
2847 	DIP_ASSIGN(ip, gid, ip->i_gid);
2848 	ip->i_uid = kauth_cred_geteuid(cnp->cn_cred);
2849 	DIP_ASSIGN(ip, uid, ip->i_uid);
2850 	error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp);
2851 	if (error) {
2852 		/*
2853 		 * Note, we can't VOP_VFREE(tvp) here like we should
2854 		 * because we can't write to the disk.  Instead, we leave
2855 		 * the vnode dangling from the journal.
2856 		 */
2857 		vput(tvp);
2858 		vput(dvp);
2859 		return (error);
2860 	}
2861 #if defined(QUOTA) || defined(QUOTA2)
2862 	if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) {
2863 		UFS_VFREE(tvp, ip->i_number, mode);
2864 		UFS_WAPBL_END1(dvp->v_mount, dvp);
2865 		vput(tvp);
2866 		vput(dvp);
2867 		return (error);
2868 	}
2869 #endif
2870 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
2871 	ip->i_mode = mode;
2872 	DIP_ASSIGN(ip, mode, mode);
2873 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
2874 	ip->i_nlink = 1;
2875 	DIP_ASSIGN(ip, nlink, 1);
2876 	if ((ip->i_mode & ISGID) && (kauth_cred_ismember_gid(cnp->cn_cred,
2877 	    ip->i_gid, &ismember) != 0 || !ismember) &&
2878 	    kauth_authorize_generic(cnp->cn_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2879 		ip->i_mode &= ~ISGID;
2880 		DIP_ASSIGN(ip, mode, ip->i_mode);
2881 	}
2882 
2883 	if (cnp->cn_flags & ISWHITEOUT) {
2884 		ip->i_flags |= UF_OPAQUE;
2885 		DIP_ASSIGN(ip, flags, ip->i_flags);
2886 	}
2887 
2888 	/*
2889 	 * Make sure inode goes to disk before directory entry.
2890 	 */
2891 	if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
2892 		goto bad;
2893 	newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
2894 	ufs_makedirentry(ip, cnp, newdir);
2895 	error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL);
2896 	pool_cache_put(ufs_direct_cache, newdir);
2897 	if (error)
2898 		goto bad;
2899 	vput(dvp);
2900 	*vpp = tvp;
2901 	return (0);
2902 
2903  bad:
2904 	/*
2905 	 * Write error occurred trying to update the inode
2906 	 * or the directory so must deallocate the inode.
2907 	 */
2908 	ip->i_nlink = 0;
2909 	DIP_ASSIGN(ip, nlink, 0);
2910 	ip->i_flag |= IN_CHANGE;
2911 	/* If IN_ADIROP, account for it */
2912 	UFS_UNMARK_VNODE(tvp);
2913 	UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
2914 	tvp->v_type = VNON;		/* explodes later if VBLK */
2915 	UFS_WAPBL_END1(dvp->v_mount, dvp);
2916 	vput(tvp);
2917 	vput(dvp);
2918 	return (error);
2919 }
2920 
2921 /*
2922  * Allocate len bytes at offset off.
2923  */
2924 int
2925 ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
2926     kauth_cred_t cred)
2927 {
2928         struct inode *ip = VTOI(vp);
2929         int error, delta, bshift, bsize;
2930         UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist);
2931 
2932         error = 0;
2933         bshift = vp->v_mount->mnt_fs_bshift;
2934         bsize = 1 << bshift;
2935 
2936         delta = off & (bsize - 1);
2937         off -= delta;
2938         len += delta;
2939 
2940         while (len > 0) {
2941                 bsize = MIN(bsize, len);
2942 
2943                 error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL);
2944                 if (error) {
2945                         goto out;
2946                 }
2947 
2948                 /*
2949                  * increase file size now, UFS_BALLOC() requires that
2950                  * EOF be up-to-date before each call.
2951                  */
2952 
2953                 if (ip->i_size < off + bsize) {
2954                         UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
2955                             vp, ip->i_size, off + bsize, 0);
2956                         ip->i_size = off + bsize;
2957 			DIP_ASSIGN(ip, size, ip->i_size);
2958                 }
2959 
2960                 off += bsize;
2961                 len -= bsize;
2962         }
2963 
2964 out:
2965 	UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
2966 	return error;
2967 }
2968 
2969 void
2970 ufs_gop_markupdate(struct vnode *vp, int flags)
2971 {
2972 	u_int32_t mask = 0;
2973 
2974 	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
2975 		mask = IN_ACCESS;
2976 	}
2977 	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
2978 		if (vp->v_type == VREG) {
2979 			mask |= IN_CHANGE | IN_UPDATE;
2980 		} else {
2981 			mask |= IN_MODIFY;
2982 		}
2983 	}
2984 	if (mask) {
2985 		struct inode *ip = VTOI(vp);
2986 
2987 		ip->i_flag |= mask;
2988 	}
2989 }
2990