xref: /netbsd-src/sys/ufs/lfs/lfs_syscalls.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: lfs_syscalls.c,v 1.139 2011/06/12 03:36:01 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007, 2008
5  *    The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Konrad E. Schroder <perseant@hhhh.org>.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 /*-
33  * Copyright (c) 1991, 1993, 1994
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)lfs_syscalls.c	8.10 (Berkeley) 5/14/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.139 2011/06/12 03:36:01 rmind Exp $");
65 
66 #ifndef LFS
67 # define LFS		/* for prototypes in syscallargs.h */
68 #endif
69 
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/proc.h>
73 #include <sys/buf.h>
74 #include <sys/mount.h>
75 #include <sys/vnode.h>
76 #include <sys/kernel.h>
77 #include <sys/kauth.h>
78 #include <sys/syscallargs.h>
79 
80 #include <ufs/ufs/inode.h>
81 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ufs/ufs_extern.h>
83 
84 #include <ufs/lfs/lfs.h>
85 #include <ufs/lfs/lfs_extern.h>
86 
87 struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, void *);
88 int lfs_fasthashget(dev_t, ino_t, struct vnode **);
89 
90 pid_t lfs_cleaner_pid = 0;
91 
92 /*
93  * sys_lfs_markv:
94  *
95  * This will mark inodes and blocks dirty, so they are written into the log.
96  * It will block until all the blocks have been written.  The segment create
97  * time passed in the block_info and inode_info structures is used to decide
98  * if the data is valid for each block (in case some process dirtied a block
99  * or inode that is being cleaned between the determination that a block is
100  * live and the lfs_markv call).
101  *
102  *  0 on success
103  * -1/errno is return on error.
104  */
105 #ifdef USE_64BIT_SYSCALLS
106 int
107 sys_lfs_markv(struct lwp *l, const struct sys_lfs_markv_args *uap, register_t *retval)
108 {
109 	/* {
110 		syscallarg(fsid_t *) fsidp;
111 		syscallarg(struct block_info *) blkiov;
112 		syscallarg(int) blkcnt;
113 	} */
114 	BLOCK_INFO *blkiov;
115 	int blkcnt, error;
116 	fsid_t fsid;
117 	struct lfs *fs;
118 	struct mount *mntp;
119 
120 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
121 	    NULL)) != 0)
122 		return (error);
123 
124 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
125 		return (error);
126 
127 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
128 		return (ENOENT);
129 	fs = VFSTOUFS(mntp)->um_lfs;
130 
131 	blkcnt = SCARG(uap, blkcnt);
132 	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
133 		return (EINVAL);
134 
135 	KERNEL_LOCK(1, NULL);
136 	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
137 	if ((error = copyin(SCARG(uap, blkiov), blkiov,
138 			    blkcnt * sizeof(BLOCK_INFO))) != 0)
139 		goto out;
140 
141 	if ((error = lfs_markv(p, &fsid, blkiov, blkcnt)) == 0)
142 		copyout(blkiov, SCARG(uap, blkiov),
143 			blkcnt * sizeof(BLOCK_INFO));
144     out:
145 	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
146 	KERNEL_UNLOCK_ONE(NULL);
147 	return error;
148 }
149 #else
150 int
151 sys_lfs_markv(struct lwp *l, const struct sys_lfs_markv_args *uap, register_t *retval)
152 {
153 	/* {
154 		syscallarg(fsid_t *) fsidp;
155 		syscallarg(struct block_info *) blkiov;
156 		syscallarg(int) blkcnt;
157 	} */
158 	BLOCK_INFO *blkiov;
159 	BLOCK_INFO_15 *blkiov15;
160 	int i, blkcnt, error;
161 	fsid_t fsid;
162 	struct lfs *fs;
163 	struct mount *mntp;
164 
165 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
166 	    NULL)) != 0)
167 		return (error);
168 
169 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
170 		return (error);
171 
172 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
173 		return (ENOENT);
174 	fs = VFSTOUFS(mntp)->um_lfs;
175 
176 	blkcnt = SCARG(uap, blkcnt);
177 	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
178 		return (EINVAL);
179 
180 	KERNEL_LOCK(1, NULL);
181 	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
182 	blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
183 	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
184 			    blkcnt * sizeof(BLOCK_INFO_15))) != 0)
185 		goto out;
186 
187 	for (i = 0; i < blkcnt; i++) {
188 		blkiov[i].bi_inode     = blkiov15[i].bi_inode;
189 		blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
190 		blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
191 		blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
192 		blkiov[i].bi_version   = blkiov15[i].bi_version;
193 		blkiov[i].bi_bp	       = blkiov15[i].bi_bp;
194 		blkiov[i].bi_size      = blkiov15[i].bi_size;
195 	}
196 
197 	if ((error = lfs_markv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
198 		for (i = 0; i < blkcnt; i++) {
199 			blkiov15[i].bi_inode	 = blkiov[i].bi_inode;
200 			blkiov15[i].bi_lbn	 = blkiov[i].bi_lbn;
201 			blkiov15[i].bi_daddr	 = blkiov[i].bi_daddr;
202 			blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
203 			blkiov15[i].bi_version	 = blkiov[i].bi_version;
204 			blkiov15[i].bi_bp	 = blkiov[i].bi_bp;
205 			blkiov15[i].bi_size	 = blkiov[i].bi_size;
206 		}
207 		copyout(blkiov15, SCARG(uap, blkiov),
208 			blkcnt * sizeof(BLOCK_INFO_15));
209 	}
210     out:
211 	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
212 	lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
213 	KERNEL_UNLOCK_ONE(NULL);
214 	return error;
215 }
216 #endif
217 
218 #define	LFS_MARKV_MAX_BLOCKS	(LFS_MAX_BUFS)
219 
220 int
221 lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov,
222     int blkcnt)
223 {
224 	BLOCK_INFO *blkp;
225 	IFILE *ifp;
226 	struct buf *bp;
227 	struct inode *ip = NULL;
228 	struct lfs *fs;
229 	struct mount *mntp;
230 	struct vnode *vp = NULL;
231 	ino_t lastino;
232 	daddr_t b_daddr, v_daddr;
233 	int cnt, error;
234 	int do_again = 0;
235 	int numrefed = 0;
236 	ino_t maxino;
237 	size_t obsize;
238 
239 	/* number of blocks/inodes that we have already bwrite'ed */
240 	int nblkwritten, ninowritten;
241 
242 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
243 		return (ENOENT);
244 
245 	fs = VFSTOUFS(mntp)->um_lfs;
246 
247 	if (fs->lfs_ronly)
248 		return EROFS;
249 
250 	maxino = (fragstoblks(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks) -
251 		      fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
252 
253 	cnt = blkcnt;
254 
255 	if ((error = vfs_busy(mntp, NULL)) != 0)
256 		return (error);
257 
258 	/*
259 	 * This seglock is just to prevent the fact that we might have to sleep
260 	 * from allowing the possibility that our blocks might become
261 	 * invalid.
262 	 *
263 	 * It is also important to note here that unless we specify SEGM_CKP,
264 	 * any Ifile blocks that we might be asked to clean will never get
265 	 * to the disk.
266 	 */
267 	lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
268 
269 	/* Mark blocks/inodes dirty.  */
270 	error = 0;
271 
272 	/* these were inside the initialization for the for loop */
273 	v_daddr = LFS_UNUSED_DADDR;
274 	lastino = LFS_UNUSED_INUM;
275 	nblkwritten = ninowritten = 0;
276 	for (blkp = blkiov; cnt--; ++blkp)
277 	{
278 		/* Bounds-check incoming data, avoid panic for failed VGET */
279 		if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
280 			error = EINVAL;
281 			goto err3;
282 		}
283 		/*
284 		 * Get the IFILE entry (only once) and see if the file still
285 		 * exists.
286 		 */
287 		if (lastino != blkp->bi_inode) {
288 			/*
289 			 * Finish the old file, if there was one.  The presence
290 			 * of a usable vnode in vp is signaled by a valid v_daddr.
291 			 */
292 			if (v_daddr != LFS_UNUSED_DADDR) {
293 				lfs_vunref(vp);
294 				numrefed--;
295 			}
296 
297 			/*
298 			 * Start a new file
299 			 */
300 			lastino = blkp->bi_inode;
301 			if (blkp->bi_inode == LFS_IFILE_INUM)
302 				v_daddr = fs->lfs_idaddr;
303 			else {
304 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
305 				/* XXX fix for force write */
306 				v_daddr = ifp->if_daddr;
307 				brelse(bp, 0);
308 			}
309 			if (v_daddr == LFS_UNUSED_DADDR)
310 				continue;
311 
312 			/* Get the vnode/inode. */
313 			error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
314 					   &vp,
315 					   (blkp->bi_lbn == LFS_UNUSED_LBN
316 					    ? blkp->bi_bp
317 					    : NULL));
318 
319 			if (!error) {
320 				numrefed++;
321 			}
322 			if (error) {
323 				DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget"
324 				      " failed with %d (ino %d, segment %d)\n",
325 				      error, blkp->bi_inode,
326 				      dtosn(fs, blkp->bi_daddr)));
327 				/*
328 				 * If we got EAGAIN, that means that the
329 				 * Inode was locked.  This is
330 				 * recoverable: just clean the rest of
331 				 * this segment, and let the cleaner try
332 				 * again with another.	(When the
333 				 * cleaner runs again, this segment will
334 				 * sort high on the list, since it is
335 				 * now almost entirely empty.) But, we
336 				 * still set v_daddr = LFS_UNUSED_ADDR
337 				 * so as not to test this over and over
338 				 * again.
339 				 */
340 				if (error == EAGAIN) {
341 					error = 0;
342 					do_again++;
343 				}
344 #ifdef DIAGNOSTIC
345 				else if (error != ENOENT)
346 					panic("lfs_markv VFS_VGET FAILED");
347 #endif
348 				/* lastino = LFS_UNUSED_INUM; */
349 				v_daddr = LFS_UNUSED_DADDR;
350 				vp = NULL;
351 				ip = NULL;
352 				continue;
353 			}
354 			ip = VTOI(vp);
355 			ninowritten++;
356 		} else if (v_daddr == LFS_UNUSED_DADDR) {
357 			/*
358 			 * This can only happen if the vnode is dead (or
359 			 * in any case we can't get it...e.g., it is
360 			 * inlocked).  Keep going.
361 			 */
362 			continue;
363 		}
364 
365 		/* Past this point we are guaranteed that vp, ip are valid. */
366 
367 		/* Can't clean VU_DIROP directories in case of truncation */
368 		/* XXX - maybe we should mark removed dirs specially? */
369 		if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) {
370 			do_again++;
371 			continue;
372 		}
373 
374 		/* If this BLOCK_INFO didn't contain a block, keep going. */
375 		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
376 			/* XXX need to make sure that the inode gets written in this case */
377 			/* XXX but only write the inode if it's the right one */
378 			if (blkp->bi_inode != LFS_IFILE_INUM) {
379 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
380 				if (ifp->if_daddr == blkp->bi_daddr) {
381 					mutex_enter(&lfs_lock);
382 					LFS_SET_UINO(ip, IN_CLEANING);
383 					mutex_exit(&lfs_lock);
384 				}
385 				brelse(bp, 0);
386 			}
387 			continue;
388 		}
389 
390 		b_daddr = 0;
391 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
392 		    dbtofsb(fs, b_daddr) != blkp->bi_daddr)
393 		{
394 			if (dtosn(fs, dbtofsb(fs, b_daddr)) ==
395 			    dtosn(fs, blkp->bi_daddr))
396 			{
397 				DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %llx vs %llx\n",
398 				      (long long)blkp->bi_daddr, (long long)dbtofsb(fs, b_daddr)));
399 			}
400 			do_again++;
401 			continue;
402 		}
403 
404 		/*
405 		 * Check block sizes.  The blocks being cleaned come from
406 		 * disk, so they should have the same size as their on-disk
407 		 * counterparts.
408 		 */
409 		if (blkp->bi_lbn >= 0)
410 			obsize = blksize(fs, ip, blkp->bi_lbn);
411 		else
412 			obsize = fs->lfs_bsize;
413 		/* Check for fragment size change */
414 		if (blkp->bi_lbn >= 0 && blkp->bi_lbn < NDADDR) {
415 			obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
416 		}
417 		if (obsize != blkp->bi_size) {
418 			DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %lld wrong"
419 			      " size (%ld != %d), try again\n",
420 			      blkp->bi_inode, (long long)blkp->bi_lbn,
421 			      (long) obsize, blkp->bi_size));
422 			do_again++;
423 			continue;
424 		}
425 
426 		/*
427 		 * If we get to here, then we are keeping the block.  If
428 		 * it is an indirect block, we want to actually put it
429 		 * in the buffer cache so that it can be updated in the
430 		 * finish_meta section.	 If it's not, we need to
431 		 * allocate a fake buffer so that writeseg can perform
432 		 * the copyin and write the buffer.
433 		 */
434 		if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
435 			/* Data Block */
436 			bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
437 					 blkp->bi_size, blkp->bi_bp);
438 			/* Pretend we used bread() to get it */
439 			bp->b_blkno = fsbtodb(fs, blkp->bi_daddr);
440 		} else {
441 			/* Indirect block or ifile */
442 			if (blkp->bi_size != fs->lfs_bsize &&
443 			    ip->i_number != LFS_IFILE_INUM)
444 				panic("lfs_markv: partial indirect block?"
445 				    " size=%d\n", blkp->bi_size);
446 			bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
447 			if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
448 				/*
449 				 * The block in question was not found
450 				 * in the cache; i.e., the block that
451 				 * getblk() returned is empty.	So, we
452 				 * can (and should) copy in the
453 				 * contents, because we've already
454 				 * determined that this was the right
455 				 * version of this block on disk.
456 				 *
457 				 * And, it can't have changed underneath
458 				 * us, because we have the segment lock.
459 				 */
460 				error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
461 				if (error)
462 					goto err2;
463 			}
464 		}
465 		if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0)
466 			goto err2;
467 
468 		nblkwritten++;
469 		/*
470 		 * XXX should account indirect blocks and ifile pages as well
471 		 */
472 		if (nblkwritten + lblkno(fs, ninowritten * sizeof (struct ufs1_dinode))
473 		    > LFS_MARKV_MAX_BLOCKS) {
474 			DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n",
475 			      nblkwritten, ninowritten));
476 			lfs_segwrite(mntp, SEGM_CLEAN);
477 			nblkwritten = ninowritten = 0;
478 		}
479 	}
480 
481 	/*
482 	 * Finish the old file, if there was one
483 	 */
484 	if (v_daddr != LFS_UNUSED_DADDR) {
485 		lfs_vunref(vp);
486 		numrefed--;
487 	}
488 
489 #ifdef DIAGNOSTIC
490 	if (numrefed != 0)
491 		panic("lfs_markv: numrefed=%d", numrefed);
492 #endif
493 	DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n",
494 	      nblkwritten, ninowritten));
495 
496 	/*
497 	 * The last write has to be SEGM_SYNC, because of calling semantics.
498 	 * It also has to be SEGM_CKP, because otherwise we could write
499 	 * over the newly cleaned data contained in a checkpoint, and then
500 	 * we'd be unhappy at recovery time.
501 	 */
502 	lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
503 
504 	lfs_segunlock(fs);
505 
506 	vfs_unbusy(mntp, false, NULL);
507 	if (error)
508 		return (error);
509 	else if (do_again)
510 		return EAGAIN;
511 
512 	return 0;
513 
514 err2:
515 	DLOG((DLOG_CLEAN, "lfs_markv err2\n"));
516 
517 	/*
518 	 * XXX we're here because copyin() failed.
519 	 * XXX it means that we can't trust the cleanerd.  too bad.
520 	 * XXX how can we recover from this?
521 	 */
522 
523 err3:
524 	KERNEL_UNLOCK_ONE(NULL);
525 	/*
526 	 * XXX should do segwrite here anyway?
527 	 */
528 
529 	if (v_daddr != LFS_UNUSED_DADDR) {
530 		lfs_vunref(vp);
531 		--numrefed;
532 	}
533 
534 	lfs_segunlock(fs);
535 	vfs_unbusy(mntp, false, NULL);
536 #ifdef DIAGNOSTIC
537 	if (numrefed != 0)
538 		panic("lfs_markv: numrefed=%d", numrefed);
539 #endif
540 
541 	return (error);
542 }
543 
544 /*
545  * sys_lfs_bmapv:
546  *
547  * This will fill in the current disk address for arrays of blocks.
548  *
549  *  0 on success
550  * -1/errno is return on error.
551  */
552 #ifdef USE_64BIT_SYSCALLS
553 int
554 sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval)
555 {
556 	/* {
557 		syscallarg(fsid_t *) fsidp;
558 		syscallarg(struct block_info *) blkiov;
559 		syscallarg(int) blkcnt;
560 	} */
561 	BLOCK_INFO *blkiov;
562 	int blkcnt, error;
563 	fsid_t fsid;
564 	struct lfs *fs;
565 	struct mount *mntp;
566 
567 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
568 	    NULL)) != 0)
569 		return (error);
570 
571 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
572 		return (error);
573 
574 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
575 		return (ENOENT);
576 	fs = VFSTOUFS(mntp)->um_lfs;
577 
578 	blkcnt = SCARG(uap, blkcnt);
579 	if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
580 		return (EINVAL);
581 	KERNEL_LOCK(1, NULL);
582 	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
583 	if ((error = copyin(SCARG(uap, blkiov), blkiov,
584 			    blkcnt * sizeof(BLOCK_INFO))) != 0)
585 		goto out;
586 
587 	if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0)
588 		copyout(blkiov, SCARG(uap, blkiov),
589 			blkcnt * sizeof(BLOCK_INFO));
590     out:
591 	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
592 	KERNEL_UNLOCK_ONE(NULL);
593 	return error;
594 }
595 #else
596 int
597 sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval)
598 {
599 	/* {
600 		syscallarg(fsid_t *) fsidp;
601 		syscallarg(struct block_info *) blkiov;
602 		syscallarg(int) blkcnt;
603 	} */
604 	BLOCK_INFO *blkiov;
605 	BLOCK_INFO_15 *blkiov15;
606 	int i, blkcnt, error;
607 	fsid_t fsid;
608 	struct lfs *fs;
609 	struct mount *mntp;
610 
611 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
612 	    NULL)) != 0)
613 		return (error);
614 
615 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
616 		return (error);
617 
618 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
619 		return (ENOENT);
620 	fs = VFSTOUFS(mntp)->um_lfs;
621 
622 	blkcnt = SCARG(uap, blkcnt);
623 	if ((size_t) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
624 		return (EINVAL);
625 	KERNEL_LOCK(1, NULL);
626 	blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
627 	blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
628 	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
629 			    blkcnt * sizeof(BLOCK_INFO_15))) != 0)
630 		goto out;
631 
632 	for (i = 0; i < blkcnt; i++) {
633 		blkiov[i].bi_inode     = blkiov15[i].bi_inode;
634 		blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
635 		blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
636 		blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
637 		blkiov[i].bi_version   = blkiov15[i].bi_version;
638 		blkiov[i].bi_bp	       = blkiov15[i].bi_bp;
639 		blkiov[i].bi_size      = blkiov15[i].bi_size;
640 	}
641 
642 	if ((error = lfs_bmapv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
643 		for (i = 0; i < blkcnt; i++) {
644 			blkiov15[i].bi_inode	 = blkiov[i].bi_inode;
645 			blkiov15[i].bi_lbn	 = blkiov[i].bi_lbn;
646 			blkiov15[i].bi_daddr	 = blkiov[i].bi_daddr;
647 			blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
648 			blkiov15[i].bi_version	 = blkiov[i].bi_version;
649 			blkiov15[i].bi_bp	 = blkiov[i].bi_bp;
650 			blkiov15[i].bi_size	 = blkiov[i].bi_size;
651 		}
652 		copyout(blkiov15, SCARG(uap, blkiov),
653 			blkcnt * sizeof(BLOCK_INFO_15));
654 	}
655     out:
656 	lfs_free(fs, blkiov, LFS_NB_BLKIOV);
657 	lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
658 	KERNEL_UNLOCK_ONE(NULL);
659 	return error;
660 }
661 #endif
662 
663 int
664 lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
665 {
666 	BLOCK_INFO *blkp;
667 	IFILE *ifp;
668 	struct buf *bp;
669 	struct inode *ip = NULL;
670 	struct lfs *fs;
671 	struct mount *mntp;
672 	struct ufsmount *ump;
673 	struct vnode *vp;
674 	ino_t lastino;
675 	daddr_t v_daddr;
676 	int cnt, error;
677 	int numrefed = 0;
678 
679 	lfs_cleaner_pid = p->p_pid;
680 
681 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
682 		return (ENOENT);
683 
684 	ump = VFSTOUFS(mntp);
685 	if ((error = vfs_busy(mntp, NULL)) != 0)
686 		return (error);
687 
688 	cnt = blkcnt;
689 
690 	fs = VFSTOUFS(mntp)->um_lfs;
691 
692 	error = 0;
693 
694 	/* these were inside the initialization for the for loop */
695 	v_daddr = LFS_UNUSED_DADDR;
696 	lastino = LFS_UNUSED_INUM;
697 	for (blkp = blkiov; cnt--; ++blkp)
698 	{
699 		/*
700 		 * Get the IFILE entry (only once) and see if the file still
701 		 * exists.
702 		 */
703 		if (lastino != blkp->bi_inode) {
704 			/*
705 			 * Finish the old file, if there was one.  The presence
706 			 * of a usable vnode in vp is signaled by a valid
707 			 * v_daddr.
708 			 */
709 			if (v_daddr != LFS_UNUSED_DADDR) {
710 				lfs_vunref(vp);
711 				numrefed--;
712 			}
713 
714 			/*
715 			 * Start a new file
716 			 */
717 			lastino = blkp->bi_inode;
718 			if (blkp->bi_inode == LFS_IFILE_INUM)
719 				v_daddr = fs->lfs_idaddr;
720 			else {
721 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
722 				v_daddr = ifp->if_daddr;
723 				brelse(bp, 0);
724 			}
725 			if (v_daddr == LFS_UNUSED_DADDR) {
726 				blkp->bi_daddr = LFS_UNUSED_DADDR;
727 				continue;
728 			}
729 			/*
730 			 * A regular call to VFS_VGET could deadlock
731 			 * here.  Instead, we try an unlocked access.
732 			 */
733 			mutex_enter(&ufs_ihash_lock);
734 			vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
735 			if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) {
736 				ip = VTOI(vp);
737 				mutex_enter(vp->v_interlock);
738 				mutex_exit(&ufs_ihash_lock);
739 				if (lfs_vref(vp)) {
740 					v_daddr = LFS_UNUSED_DADDR;
741 					continue;
742 				}
743 				numrefed++;
744 			} else {
745 				mutex_exit(&ufs_ihash_lock);
746 				/*
747 				 * Don't VFS_VGET if we're being unmounted,
748 				 * since we hold vfs_busy().
749 				 */
750 				if (mntp->mnt_iflag & IMNT_UNMOUNT) {
751 					v_daddr = LFS_UNUSED_DADDR;
752 					continue;
753 				}
754 				error = VFS_VGET(mntp, blkp->bi_inode, &vp);
755 				if (error) {
756 					DLOG((DLOG_CLEAN, "lfs_bmapv: vget ino"
757 					      "%d failed with %d",
758 					      blkp->bi_inode,error));
759 					v_daddr = LFS_UNUSED_DADDR;
760 					continue;
761 				} else {
762 					KASSERT(VOP_ISLOCKED(vp));
763 					VOP_UNLOCK(vp);
764 					numrefed++;
765 				}
766 			}
767 			ip = VTOI(vp);
768 		} else if (v_daddr == LFS_UNUSED_DADDR) {
769 			/*
770 			 * This can only happen if the vnode is dead.
771 			 * Keep going.	Note that we DO NOT set the
772 			 * bi_addr to anything -- if we failed to get
773 			 * the vnode, for example, we want to assume
774 			 * conservatively that all of its blocks *are*
775 			 * located in the segment in question.
776 			 * lfs_markv will throw them out if we are
777 			 * wrong.
778 			 */
779 			/* blkp->bi_daddr = LFS_UNUSED_DADDR; */
780 			continue;
781 		}
782 
783 		/* Past this point we are guaranteed that vp, ip are valid. */
784 
785 		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
786 			/*
787 			 * We just want the inode address, which is
788 			 * conveniently in v_daddr.
789 			 */
790 			blkp->bi_daddr = v_daddr;
791 		} else {
792 			daddr_t bi_daddr;
793 
794 			/* XXX ondisk32 */
795 			error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
796 					 &bi_daddr, NULL);
797 			if (error)
798 			{
799 				blkp->bi_daddr = LFS_UNUSED_DADDR;
800 				continue;
801 			}
802 			blkp->bi_daddr = dbtofsb(fs, bi_daddr);
803 			/* Fill in the block size, too */
804 			if (blkp->bi_lbn >= 0)
805 				blkp->bi_size = blksize(fs, ip, blkp->bi_lbn);
806 			else
807 				blkp->bi_size = fs->lfs_bsize;
808 		}
809 	}
810 
811 	/*
812 	 * Finish the old file, if there was one.  The presence
813 	 * of a usable vnode in vp is signaled by a valid v_daddr.
814 	 */
815 	if (v_daddr != LFS_UNUSED_DADDR) {
816 		lfs_vunref(vp);
817 		numrefed--;
818 	}
819 
820 #ifdef DIAGNOSTIC
821 	if (numrefed != 0)
822 		panic("lfs_bmapv: numrefed=%d", numrefed);
823 #endif
824 
825 	vfs_unbusy(mntp, false, NULL);
826 
827 	return 0;
828 }
829 
830 /*
831  * sys_lfs_segclean:
832  *
833  * Mark the segment clean.
834  *
835  *  0 on success
836  * -1/errno is return on error.
837  */
838 int
839 sys_lfs_segclean(struct lwp *l, const struct sys_lfs_segclean_args *uap, register_t *retval)
840 {
841 	/* {
842 		syscallarg(fsid_t *) fsidp;
843 		syscallarg(u_long) segment;
844 	} */
845 	struct lfs *fs;
846 	struct mount *mntp;
847 	fsid_t fsid;
848 	int error;
849 	unsigned long segnum;
850 
851 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
852 	    NULL)) != 0)
853 		return (error);
854 
855 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
856 		return (error);
857 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
858 		return (ENOENT);
859 
860 	fs = VFSTOUFS(mntp)->um_lfs;
861 	segnum = SCARG(uap, segment);
862 
863 	if ((error = vfs_busy(mntp, NULL)) != 0)
864 		return (error);
865 
866 	KERNEL_LOCK(1, NULL);
867 	lfs_seglock(fs, SEGM_PROT);
868 	error = lfs_do_segclean(fs, segnum);
869 	lfs_segunlock(fs);
870 	KERNEL_UNLOCK_ONE(NULL);
871 	vfs_unbusy(mntp, false, NULL);
872 	return error;
873 }
874 
875 /*
876  * Actually mark the segment clean.
877  * Must be called with the segment lock held.
878  */
879 int
880 lfs_do_segclean(struct lfs *fs, unsigned long segnum)
881 {
882 	extern int lfs_dostats;
883 	struct buf *bp;
884 	CLEANERINFO *cip;
885 	SEGUSE *sup;
886 
887 	if (dtosn(fs, fs->lfs_curseg) == segnum) {
888 		return (EBUSY);
889 	}
890 
891 	LFS_SEGENTRY(sup, fs, segnum, bp);
892 	if (sup->su_nbytes) {
893 		DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
894 		      " %d live bytes\n", segnum, sup->su_nbytes));
895 		brelse(bp, 0);
896 		return (EBUSY);
897 	}
898 	if (sup->su_flags & SEGUSE_ACTIVE) {
899 		DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
900 		      " segment is active\n", segnum));
901 		brelse(bp, 0);
902 		return (EBUSY);
903 	}
904 	if (!(sup->su_flags & SEGUSE_DIRTY)) {
905 		DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
906 		      " segment is already clean\n", segnum));
907 		brelse(bp, 0);
908 		return (EALREADY);
909 	}
910 
911 	fs->lfs_avail += segtod(fs, 1);
912 	if (sup->su_flags & SEGUSE_SUPERBLOCK)
913 		fs->lfs_avail -= btofsb(fs, LFS_SBPAD);
914 	if (fs->lfs_version > 1 && segnum == 0 &&
915 	    fs->lfs_start < btofsb(fs, LFS_LABELPAD))
916 		fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
917 	mutex_enter(&lfs_lock);
918 	fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
919 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
920 	fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
921 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
922 	if (fs->lfs_dmeta < 0)
923 		fs->lfs_dmeta = 0;
924 	mutex_exit(&lfs_lock);
925 	sup->su_flags &= ~SEGUSE_DIRTY;
926 	LFS_WRITESEGENTRY(sup, fs, segnum, bp);
927 
928 	LFS_CLEANERINFO(cip, fs, bp);
929 	++cip->clean;
930 	--cip->dirty;
931 	fs->lfs_nclean = cip->clean;
932 	cip->bfree = fs->lfs_bfree;
933 	mutex_enter(&lfs_lock);
934 	cip->avail = fs->lfs_avail - fs->lfs_ravail - fs->lfs_favail;
935 	wakeup(&fs->lfs_avail);
936 	mutex_exit(&lfs_lock);
937 	(void) LFS_BWRITE_LOG(bp);
938 
939 	if (lfs_dostats)
940 		++lfs_stats.segs_reclaimed;
941 
942 	return (0);
943 }
944 
945 /*
946  * This will block until a segment in file system fsid is written.  A timeout
947  * in milliseconds may be specified which will awake the cleaner automatically.
948  * An fsid of -1 means any file system, and a timeout of 0 means forever.
949  */
950 int
951 lfs_segwait(fsid_t *fsidp, struct timeval *tv)
952 {
953 	struct mount *mntp;
954 	void *addr;
955 	u_long timeout;
956 	int error;
957 
958 	KERNEL_LOCK(1, NULL);
959 	if (fsidp == NULL || (mntp = vfs_getvfs(fsidp)) == NULL)
960 		addr = &lfs_allclean_wakeup;
961 	else
962 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
963 	/*
964 	 * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
965 	 * XXX IS THAT WHAT IS INTENDED?
966 	 */
967 	timeout = tvtohz(tv);
968 	error = tsleep(addr, PCATCH | PVFS, "segment", timeout);
969 	KERNEL_UNLOCK_ONE(NULL);
970 	return (error == ERESTART ? EINTR : 0);
971 }
972 
973 /*
974  * sys_lfs_segwait:
975  *
976  * System call wrapper around lfs_segwait().
977  *
978  *  0 on success
979  *  1 on timeout
980  * -1/errno is return on error.
981  */
982 int
983 sys___lfs_segwait50(struct lwp *l, const struct sys___lfs_segwait50_args *uap,
984     register_t *retval)
985 {
986 	/* {
987 		syscallarg(fsid_t *) fsidp;
988 		syscallarg(struct timeval *) tv;
989 	} */
990 	struct timeval atv;
991 	fsid_t fsid;
992 	int error;
993 
994 	/* XXX need we be su to segwait? */
995 	if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
996 	    NULL)) != 0)
997 		return (error);
998 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
999 		return (error);
1000 
1001 	if (SCARG(uap, tv)) {
1002 		error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
1003 		if (error)
1004 			return (error);
1005 		if (itimerfix(&atv))
1006 			return (EINVAL);
1007 	} else /* NULL or invalid */
1008 		atv.tv_sec = atv.tv_usec = 0;
1009 	return lfs_segwait(&fsid, &atv);
1010 }
1011 
1012 /*
1013  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
1014  * daddr from the ifile, so don't look it up again.  If the cleaner is
1015  * processing IINFO structures, it may have the ondisk inode already, so
1016  * don't go retrieving it again.
1017  *
1018  * we lfs_vref, and it is the caller's responsibility to lfs_vunref
1019  * when finished.
1020  */
1021 
1022 int
1023 lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp)
1024 {
1025 	struct vnode *vp;
1026 
1027 	mutex_enter(&ufs_ihash_lock);
1028 	if ((vp = ufs_ihashlookup(dev, ino)) != NULL) {
1029 		mutex_enter(vp->v_interlock);
1030 		mutex_exit(&ufs_ihash_lock);
1031 		if (vp->v_iflag & VI_XLOCK) {
1032 			DLOG((DLOG_CLEAN, "lfs_fastvget: ino %d VI_XLOCK\n",
1033 			      ino));
1034 			lfs_stats.clean_vnlocked++;
1035 			mutex_exit(vp->v_interlock);
1036 			return EAGAIN;
1037 		}
1038 		if (lfs_vref(vp)) {
1039 			DLOG((DLOG_CLEAN, "lfs_fastvget: lfs_vref failed"
1040 			      " for ino %d\n", ino));
1041 			lfs_stats.clean_inlocked++;
1042 			return EAGAIN;
1043 		}
1044 	} else {
1045 		mutex_exit(&ufs_ihash_lock);
1046 	}
1047 	*vpp = vp;
1048 
1049 	return (0);
1050 }
1051 
1052 int
1053 lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp,
1054 	     struct ufs1_dinode *dinp)
1055 {
1056 	struct inode *ip;
1057 	struct ufs1_dinode *dip;
1058 	struct vnode *vp;
1059 	struct ufsmount *ump;
1060 	dev_t dev;
1061 	int error, retries;
1062 	struct buf *bp;
1063 	struct lfs *fs;
1064 
1065 	ump = VFSTOUFS(mp);
1066 	dev = ump->um_dev;
1067 	fs = ump->um_lfs;
1068 
1069 	/*
1070 	 * Wait until the filesystem is fully mounted before allowing vget
1071 	 * to complete.	 This prevents possible problems with roll-forward.
1072 	 */
1073 	mutex_enter(&lfs_lock);
1074 	while (fs->lfs_flags & LFS_NOTYET) {
1075 		mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0,
1076 			&lfs_lock);
1077 	}
1078 	mutex_exit(&lfs_lock);
1079 
1080 	/*
1081 	 * This is playing fast and loose.  Someone may have the inode
1082 	 * locked, in which case they are going to be distinctly unhappy
1083 	 * if we trash something.
1084 	 */
1085 
1086 	error = lfs_fasthashget(dev, ino, vpp);
1087 	if (error != 0 || *vpp != NULL)
1088 		return (error);
1089 
1090 	/*
1091 	 * getnewvnode(9) will call vfs_busy, which will block if the
1092 	 * filesystem is being unmounted; but umount(9) is waiting for
1093 	 * us because we're already holding the fs busy.
1094 	 * XXXMP
1095 	 */
1096 	if (mp->mnt_iflag & IMNT_UNMOUNT) {
1097 		*vpp = NULL;
1098 		return EDEADLK;
1099 	}
1100 	error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, NULL, &vp);
1101 	if (error) {
1102 		*vpp = NULL;
1103 		return (error);
1104 	}
1105 
1106 	mutex_enter(&ufs_hashlock);
1107 	error = lfs_fasthashget(dev, ino, vpp);
1108 	if (error != 0 || *vpp != NULL) {
1109 		mutex_exit(&ufs_hashlock);
1110 		ungetnewvnode(vp);
1111 		return (error);
1112 	}
1113 
1114 	/* Allocate new vnode/inode. */
1115 	lfs_vcreate(mp, ino, vp);
1116 
1117 	/*
1118 	 * Put it onto its hash chain and lock it so that other requests for
1119 	 * this inode will block if they arrive while we are sleeping waiting
1120 	 * for old data structures to be purged or for the contents of the
1121 	 * disk portion of this inode to be read.
1122 	 */
1123 	ip = VTOI(vp);
1124 	ufs_ihashins(ip);
1125 	mutex_exit(&ufs_hashlock);
1126 
1127 	/*
1128 	 * XXX
1129 	 * This may not need to be here, logically it should go down with
1130 	 * the i_devvp initialization.
1131 	 * Ask Kirk.
1132 	 */
1133 	ip->i_lfs = fs;
1134 
1135 	/* Read in the disk contents for the inode, copy into the inode. */
1136 	if (dinp) {
1137 		error = copyin(dinp, ip->i_din.ffs1_din, sizeof (struct ufs1_dinode));
1138 		if (error) {
1139 			DLOG((DLOG_CLEAN, "lfs_fastvget: dinode copyin failed"
1140 			      " for ino %d\n", ino));
1141 			ufs_ihashrem(ip);
1142 
1143 			/* Unlock and discard unneeded inode. */
1144 			VOP_UNLOCK(vp);
1145 			lfs_vunref(vp);
1146 			*vpp = NULL;
1147 			return (error);
1148 		}
1149 		if (ip->i_number != ino)
1150 			panic("lfs_fastvget: I was fed the wrong inode!");
1151 	} else {
1152 		retries = 0;
1153 	    again:
1154 		error = bread(ump->um_devvp, fsbtodb(fs, daddr), fs->lfs_ibsize,
1155 			      NOCRED, 0, &bp);
1156 		if (error) {
1157 			DLOG((DLOG_CLEAN, "lfs_fastvget: bread failed (%d)\n",
1158 			      error));
1159 			/*
1160 			 * The inode does not contain anything useful, so it
1161 			 * would be misleading to leave it on its hash chain.
1162 			 * Iput() will return it to the free list.
1163 			 */
1164 			ufs_ihashrem(ip);
1165 
1166 			/* Unlock and discard unneeded inode. */
1167 			VOP_UNLOCK(vp);
1168 			lfs_vunref(vp);
1169 			brelse(bp, 0);
1170 			*vpp = NULL;
1171 			return (error);
1172 		}
1173 		dip = lfs_ifind(ump->um_lfs, ino, bp);
1174 		if (dip == NULL) {
1175 			/* Assume write has not completed yet; try again */
1176 			brelse(bp, BC_INVAL);
1177 			++retries;
1178 			if (retries > LFS_IFIND_RETRIES)
1179 				panic("lfs_fastvget: dinode not found");
1180 			DLOG((DLOG_CLEAN, "lfs_fastvget: dinode not found,"
1181 			      " retrying...\n"));
1182 			goto again;
1183 		}
1184 		*ip->i_din.ffs1_din = *dip;
1185 		brelse(bp, 0);
1186 	}
1187 	lfs_vinit(mp, &vp);
1188 
1189 	*vpp = vp;
1190 
1191 	KASSERT(VOP_ISLOCKED(vp));
1192 	VOP_UNLOCK(vp);
1193 
1194 	return (0);
1195 }
1196 
1197 /*
1198  * Make up a "fake" cleaner buffer, copy the data from userland into it.
1199  */
1200 struct buf *
1201 lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, void *uaddr)
1202 {
1203 	struct buf *bp;
1204 	int error;
1205 
1206 	KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
1207 
1208 	bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
1209 	error = copyin(uaddr, bp->b_data, size);
1210 	if (error) {
1211 		lfs_freebuf(fs, bp);
1212 		return NULL;
1213 	}
1214 	KDASSERT(bp->b_iodone == lfs_callback);
1215 
1216 #if 0
1217 	mutex_enter(&lfs_lock);
1218 	++fs->lfs_iocount;
1219 	mutex_exit(&lfs_lock);
1220 #endif
1221 	bp->b_bufsize = size;
1222 	bp->b_bcount = size;
1223 	return (bp);
1224 }
1225