xref: /netbsd-src/sys/ufs/lfs/lfs_syscalls.c (revision 4b896b232495b7a9b8b94a1cf1e21873296d53b8)
1 /*	$NetBSD: lfs_syscalls.c,v 1.100 2003/12/04 14:57:47 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 /*-
39  * Copyright (c) 1991, 1993, 1994
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)lfs_syscalls.c	8.10 (Berkeley) 5/14/95
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.100 2003/12/04 14:57:47 yamt Exp $");
71 
72 #ifndef LFS
73 # define LFS		/* for prototypes in syscallargs.h */
74 #endif
75 
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79 #include <sys/buf.h>
80 #include <sys/mount.h>
81 #include <sys/vnode.h>
82 #include <sys/malloc.h>
83 #include <sys/kernel.h>
84 
85 #include <sys/sa.h>
86 #include <sys/syscallargs.h>
87 
88 #include <ufs/ufs/inode.h>
89 #include <ufs/ufs/ufsmount.h>
90 #include <ufs/ufs/ufs_extern.h>
91 
92 #include <ufs/lfs/lfs.h>
93 #include <ufs/lfs/lfs_extern.h>
94 
95 struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t);
96 int lfs_fasthashget(dev_t, ino_t, struct vnode **);
97 
98 int debug_cleaner = 0;
99 int clean_vnlocked = 0;
100 int clean_inlocked = 0;
101 int verbose_debug = 0;
102 
103 pid_t lfs_cleaner_pid = 0;
104 
105 #define LFS_FORCE_WRITE UNASSIGNED
106 
107 /*
108  * sys_lfs_markv:
109  *
110  * This will mark inodes and blocks dirty, so they are written into the log.
111  * It will block until all the blocks have been written.  The segment create
112  * time passed in the block_info and inode_info structures is used to decide
113  * if the data is valid for each block (in case some process dirtied a block
114  * or inode that is being cleaned between the determination that a block is
115  * live and the lfs_markv call).
116  *
117  *  0 on success
118  * -1/errno is return on error.
119  */
120 #ifdef USE_64BIT_SYSCALLS
121 int
122 sys_lfs_markv(struct proc *p, void *v, register_t *retval)
123 {
124 	struct sys_lfs_markv_args /* {
125 		syscallarg(fsid_t *) fsidp;
126 		syscallarg(struct block_info *) blkiov;
127 		syscallarg(int) blkcnt;
128 	} */ *uap = v;
129 	BLOCK_INFO *blkiov;
130 	int blkcnt, error;
131 	fsid_t fsid;
132 
133 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
134 		return (error);
135 
136 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
137 		return (error);
138 
139 	blkcnt = SCARG(uap, blkcnt);
140 	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
141 		return (EINVAL);
142 
143 	blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
144 	if ((error = copyin(SCARG(uap, blkiov), blkiov,
145 			    blkcnt * sizeof(BLOCK_INFO))) != 0)
146 		goto out;
147 
148 	if ((error = lfs_markv(p, &fsid, blkiov, blkcnt)) == 0)
149 		copyout(blkiov, SCARG(uap, blkiov),
150 			blkcnt * sizeof(BLOCK_INFO));
151     out:
152 	free(blkiov, M_SEGMENT);
153 	return error;
154 }
155 #else
156 int
157 sys_lfs_markv(struct lwp *l, void *v, register_t *retval)
158 {
159 	struct sys_lfs_markv_args /* {
160 		syscallarg(fsid_t *) fsidp;
161 		syscallarg(struct block_info *) blkiov;
162 		syscallarg(int) blkcnt;
163 	} */ *uap = v;
164 	BLOCK_INFO *blkiov;
165 	BLOCK_INFO_15 *blkiov15;
166 	int i, blkcnt, error;
167 	fsid_t fsid;
168 
169 	if ((error = suser(l->l_proc->p_ucred, &l->l_proc->p_acflag)) != 0)
170 		return (error);
171 
172 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
173 		return (error);
174 
175 	blkcnt = SCARG(uap, blkcnt);
176 	if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
177 		return (EINVAL);
178 
179 	blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
180 	blkiov15 = malloc(blkcnt * sizeof(BLOCK_INFO_15), M_SEGMENT, M_WAITOK);
181 	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
182 			    blkcnt * sizeof(BLOCK_INFO_15))) != 0)
183 		goto out;
184 
185 	for (i = 0; i < blkcnt; i++) {
186 		blkiov[i].bi_inode     = blkiov15[i].bi_inode;
187 		blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
188 		blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
189 		blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
190 		blkiov[i].bi_version   = blkiov15[i].bi_version;
191 		blkiov[i].bi_bp	       = blkiov15[i].bi_bp;
192 		blkiov[i].bi_size      = blkiov15[i].bi_size;
193 	}
194 
195 	if ((error = lfs_markv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
196 		for (i = 0; i < blkcnt; i++) {
197 			blkiov15[i].bi_inode	 = blkiov[i].bi_inode;
198 			blkiov15[i].bi_lbn	 = blkiov[i].bi_lbn;
199 			blkiov15[i].bi_daddr	 = blkiov[i].bi_daddr;
200 			blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
201 			blkiov15[i].bi_version	 = blkiov[i].bi_version;
202 			blkiov15[i].bi_bp	 = blkiov[i].bi_bp;
203 			blkiov15[i].bi_size	 = blkiov[i].bi_size;
204 		}
205 		copyout(blkiov15, SCARG(uap, blkiov),
206 			blkcnt * sizeof(BLOCK_INFO_15));
207 	}
208     out:
209 	free(blkiov, M_SEGMENT);
210 	free(blkiov15, M_SEGMENT);
211 	return error;
212 }
213 #endif
214 
215 #define	LFS_MARKV_MAX_BLOCKS	(LFS_MAX_BUFS)
216 
217 int
218 lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
219 {
220 	BLOCK_INFO *blkp;
221 	IFILE *ifp;
222 	struct buf *bp;
223 	struct inode *ip = NULL;
224 	struct lfs *fs;
225 	struct mount *mntp;
226 	struct vnode *vp;
227 #ifdef DEBUG_LFS
228 	int vputc = 0, iwritten = 0;
229 #endif
230 	ino_t lastino;
231 	daddr_t b_daddr, v_daddr;
232 	int cnt, error;
233 	int do_again = 0;
234 	int numrefed = 0;
235 	ino_t maxino;
236 	size_t obsize;
237 
238 	/* number of blocks/inodes that we have already bwrite'ed */
239 	int nblkwritten, ninowritten;
240 
241 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
242 		return (ENOENT);
243 
244 	fs = VFSTOUFS(mntp)->um_lfs;
245 
246 	if (fs->lfs_ronly)
247 		return EROFS;
248 
249 	maxino = (fragstoblks(fs, fsbtofrags(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks)) -
250 		      fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
251 
252 	cnt = blkcnt;
253 
254 	if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
255 		return (error);
256 
257 	/*
258 	 * This seglock is just to prevent the fact that we might have to sleep
259 	 * from allowing the possibility that our blocks might become
260 	 * invalid.
261 	 *
262 	 * It is also important to note here that unless we specify SEGM_CKP,
263 	 * any Ifile blocks that we might be asked to clean will never get
264 	 * to the disk.
265 	 */
266 	lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
267 
268 	/* Mark blocks/inodes dirty.  */
269 	error = 0;
270 
271 #ifdef DEBUG_LFS
272 	/* Run through and count the inodes */
273 	lastino = LFS_UNUSED_INUM;
274 	for (blkp = blkiov; cnt--; ++blkp) {
275 		if (lastino != blkp->bi_inode) {
276 			lastino = blkp->bi_inode;
277 			vputc++;
278 		}
279 	}
280 	cnt = blkcnt;
281 	printf("[%d/",vputc);
282 	iwritten = 0;
283 #endif /* DEBUG_LFS */
284 	/* these were inside the initialization for the for loop */
285 	v_daddr = LFS_UNUSED_DADDR;
286 	lastino = LFS_UNUSED_INUM;
287 	nblkwritten = ninowritten = 0;
288 	for (blkp = blkiov; cnt--; ++blkp)
289 	{
290 		if (blkp->bi_daddr == LFS_FORCE_WRITE)
291 			printf("lfs_markv: warning: force-writing ino %d "
292 			       "lbn %lld\n",
293 			    blkp->bi_inode, (long long)blkp->bi_lbn);
294 		/* Bounds-check incoming data, avoid panic for failed VGET */
295 		if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
296 			error = EINVAL;
297 			goto err3;
298 		}
299 		/*
300 		 * Get the IFILE entry (only once) and see if the file still
301 		 * exists.
302 		 */
303 		if (lastino != blkp->bi_inode) {
304 			/*
305 			 * Finish the old file, if there was one.  The presence
306 			 * of a usable vnode in vp is signaled by a valid v_daddr.
307 			 */
308 			if (v_daddr != LFS_UNUSED_DADDR) {
309 #ifdef DEBUG_LFS
310 				if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
311 					iwritten++;
312 #endif
313 				lfs_vunref(vp);
314 				numrefed--;
315 			}
316 
317 			/*
318 			 * Start a new file
319 			 */
320 			lastino = blkp->bi_inode;
321 			if (blkp->bi_inode == LFS_IFILE_INUM)
322 				v_daddr = fs->lfs_idaddr;
323 			else {
324 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
325 				/* XXX fix for force write */
326 				v_daddr = ifp->if_daddr;
327 				brelse(bp);
328 			}
329 			/* Don't force-write the ifile */
330 			if (blkp->bi_inode == LFS_IFILE_INUM
331 			    && blkp->bi_daddr == LFS_FORCE_WRITE)
332 			{
333 				continue;
334 			}
335 			if (v_daddr == LFS_UNUSED_DADDR
336 			    && blkp->bi_daddr != LFS_FORCE_WRITE)
337 			{
338 				continue;
339 			}
340 
341 			/* Get the vnode/inode. */
342 			error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
343 					   &vp,
344 					   (blkp->bi_lbn == LFS_UNUSED_LBN
345 					    ? blkp->bi_bp
346 					    : NULL));
347 
348 			if (!error) {
349 				numrefed++;
350 			}
351 			if (error) {
352 #ifdef DEBUG_LFS
353 				printf("lfs_markv: lfs_fastvget failed with %d (ino %d, segment %d)\n",
354 				       error, blkp->bi_inode,
355 				       dtosn(fs, blkp->bi_daddr));
356 #endif /* DEBUG_LFS */
357 				/*
358 				 * If we got EAGAIN, that means that the
359 				 * Inode was locked.  This is
360 				 * recoverable: just clean the rest of
361 				 * this segment, and let the cleaner try
362 				 * again with another.	(When the
363 				 * cleaner runs again, this segment will
364 				 * sort high on the list, since it is
365 				 * now almost entirely empty.) But, we
366 				 * still set v_daddr = LFS_UNUSED_ADDR
367 				 * so as not to test this over and over
368 				 * again.
369 				 */
370 				if (error == EAGAIN) {
371 					error = 0;
372 					do_again++;
373 				}
374 #ifdef DIAGNOSTIC
375 				else if (error != ENOENT)
376 					panic("lfs_markv VFS_VGET FAILED");
377 #endif
378 				/* lastino = LFS_UNUSED_INUM; */
379 				v_daddr = LFS_UNUSED_DADDR;
380 				vp = NULL;
381 				ip = NULL;
382 				continue;
383 			}
384 			ip = VTOI(vp);
385 			ninowritten++;
386 		} else if (v_daddr == LFS_UNUSED_DADDR) {
387 			/*
388 			 * This can only happen if the vnode is dead (or
389 			 * in any case we can't get it...e.g., it is
390 			 * inlocked).  Keep going.
391 			 */
392 			continue;
393 		}
394 
395 		/* Past this point we are guaranteed that vp, ip are valid. */
396 
397 		/* If this BLOCK_INFO didn't contain a block, keep going. */
398 		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
399 			/* XXX need to make sure that the inode gets written in this case */
400 			/* XXX but only write the inode if it's the right one */
401 			if (blkp->bi_inode != LFS_IFILE_INUM) {
402 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
403 				if (ifp->if_daddr == blkp->bi_daddr
404 				   || blkp->bi_daddr == LFS_FORCE_WRITE)
405 				{
406 					LFS_SET_UINO(ip, IN_CLEANING);
407 				}
408 				brelse(bp);
409 			}
410 			continue;
411 		}
412 
413 		b_daddr = 0;
414 		if (blkp->bi_daddr != LFS_FORCE_WRITE) {
415 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
416 			    dbtofsb(fs, b_daddr) != blkp->bi_daddr)
417 			{
418 				if (dtosn(fs,dbtofsb(fs, b_daddr))
419 				   == dtosn(fs,blkp->bi_daddr))
420 				{
421 					printf("lfs_markv: wrong da same seg: %llx vs %llx\n",
422 					       (long long)blkp->bi_daddr, (long long)dbtofsb(fs, b_daddr));
423 				}
424 				do_again++;
425 				continue;
426 			}
427 		}
428 
429 		/*
430 		 * Check block sizes.  The blocks being cleaned come from
431 		 * disk, so they should have the same size as their on-disk
432 		 * counterparts.
433 		 */
434 		if (blkp->bi_lbn >= 0)
435 			obsize = blksize(fs, ip, blkp->bi_lbn);
436 		else
437 			obsize = fs->lfs_bsize;
438 		/* Check for fragment size change */
439 		if (blkp->bi_lbn >= 0 && blkp->bi_lbn < NDADDR) {
440 			obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
441 		}
442 		if (obsize != blkp->bi_size) {
443 			printf("lfs_markv: ino %d lbn %lld wrong size (%ld != %d), try again\n",
444 				blkp->bi_inode, (long long)blkp->bi_lbn,
445 				(long) obsize, blkp->bi_size);
446 			do_again++;
447 			continue;
448 		}
449 
450 		/*
451 		 * If we get to here, then we are keeping the block.  If
452 		 * it is an indirect block, we want to actually put it
453 		 * in the buffer cache so that it can be updated in the
454 		 * finish_meta section.	 If it's not, we need to
455 		 * allocate a fake buffer so that writeseg can perform
456 		 * the copyin and write the buffer.
457 		 */
458 		if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
459 			/* Data Block */
460 			bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
461 					 blkp->bi_size, blkp->bi_bp);
462 			/* Pretend we used bread() to get it */
463 			bp->b_blkno = fsbtodb(fs, blkp->bi_daddr);
464 		} else {
465 			/* Indirect block or ifile */
466 			if (blkp->bi_size != fs->lfs_bsize &&
467 			    ip->i_number != LFS_IFILE_INUM)
468 				panic("lfs_markv: partial indirect block?"
469 				    " size=%d\n", blkp->bi_size);
470 			bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
471 			if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */
472 				/*
473 				 * The block in question was not found
474 				 * in the cache; i.e., the block that
475 				 * getblk() returned is empty.	So, we
476 				 * can (and should) copy in the
477 				 * contents, because we've already
478 				 * determined that this was the right
479 				 * version of this block on disk.
480 				 *
481 				 * And, it can't have changed underneath
482 				 * us, because we have the segment lock.
483 				 */
484 				error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
485 				if (error)
486 					goto err2;
487 			}
488 		}
489 		if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0)
490 			goto err2;
491 
492 		nblkwritten++;
493 		/*
494 		 * XXX should account indirect blocks and ifile pages as well
495 		 */
496 		if (nblkwritten + lblkno(fs, ninowritten * sizeof (struct ufs1_dinode))
497 		    > LFS_MARKV_MAX_BLOCKS) {
498 #ifdef DEBUG_LFS
499 			printf("lfs_markv: writing %d blks %d inos\n",
500 			    nblkwritten, ninowritten);
501 #endif
502 			lfs_segwrite(mntp, SEGM_CLEAN);
503 			nblkwritten = ninowritten = 0;
504 		}
505 	}
506 
507 	/*
508 	 * Finish the old file, if there was one
509 	 */
510 	if (v_daddr != LFS_UNUSED_DADDR) {
511 #ifdef DEBUG_LFS
512 		if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
513 			iwritten++;
514 #endif
515 		lfs_vunref(vp);
516 		numrefed--;
517 	}
518 
519 #ifdef DEBUG_LFS
520 	printf("%d]",iwritten);
521 	if (numrefed != 0) {
522 		panic("lfs_markv: numrefed=%d", numrefed);
523 	}
524 #endif
525 
526 #ifdef DEBUG_LFS
527 	printf("lfs_markv: writing %d blks %d inos (check point)\n",
528 	    nblkwritten, ninowritten);
529 #endif
530 	/*
531 	 * The last write has to be SEGM_SYNC, because of calling semantics.
532 	 * It also has to be SEGM_CKP, because otherwise we could write
533 	 * over the newly cleaned data contained in a checkpoint, and then
534 	 * we'd be unhappy at recovery time.
535 	 */
536 	lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
537 
538 	lfs_segunlock(fs);
539 
540 	vfs_unbusy(mntp);
541 	if (error)
542 		return (error);
543 	else if (do_again)
544 		return EAGAIN;
545 
546 	return 0;
547 
548 err2:
549 	printf("lfs_markv err2\n");
550 
551 	/*
552 	 * XXX we're here because copyin() failed.
553 	 * XXX it means that we can't trust the cleanerd.  too bad.
554 	 * XXX how can we recover from this?
555 	 */
556 
557 err3:
558 	/*
559 	 * XXX should do segwrite here anyway?
560 	 */
561 
562 	if (v_daddr != LFS_UNUSED_DADDR) {
563 		lfs_vunref(vp);
564 		--numrefed;
565 	}
566 
567 	lfs_segunlock(fs);
568 	vfs_unbusy(mntp);
569 #ifdef DEBUG_LFS
570 	if (numrefed != 0) {
571 		panic("lfs_markv: numrefed=%d", numrefed);
572 	}
573 #endif
574 
575 	return (error);
576 }
577 
578 /*
579  * sys_lfs_bmapv:
580  *
581  * This will fill in the current disk address for arrays of blocks.
582  *
583  *  0 on success
584  * -1/errno is return on error.
585  */
586 #ifdef USE_64BIT_SYSCALLS
587 int
588 sys_lfs_bmapv(struct proc *p, void *v, register_t *retval)
589 {
590 	struct sys_lfs_bmapv_args /* {
591 		syscallarg(fsid_t *) fsidp;
592 		syscallarg(struct block_info *) blkiov;
593 		syscallarg(int) blkcnt;
594 	} */ *uap = v;
595 	BLOCK_INFO *blkiov;
596 	int blkcnt, error;
597 	fsid_t fsid;
598 
599 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
600 		return (error);
601 
602 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
603 		return (error);
604 
605 	blkcnt = SCARG(uap, blkcnt);
606 	if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
607 		return (EINVAL);
608 	blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
609 	if ((error = copyin(SCARG(uap, blkiov), blkiov,
610 			    blkcnt * sizeof(BLOCK_INFO))) != 0)
611 		goto out;
612 
613 	if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0)
614 		copyout(blkiov, SCARG(uap, blkiov),
615 			blkcnt * sizeof(BLOCK_INFO));
616     out:
617 	free(blkiov, M_SEGMENT);
618 	return error;
619 }
620 #else
621 int
622 sys_lfs_bmapv(struct lwp *l, void *v, register_t *retval)
623 {
624 	struct sys_lfs_bmapv_args /* {
625 		syscallarg(fsid_t *) fsidp;
626 		syscallarg(struct block_info *) blkiov;
627 		syscallarg(int) blkcnt;
628 	} */ *uap = v;
629 	struct proc *p = l->l_proc;
630 	BLOCK_INFO *blkiov;
631 	BLOCK_INFO_15 *blkiov15;
632 	int i, blkcnt, error;
633 	fsid_t fsid;
634 
635 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
636 		return (error);
637 
638 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
639 		return (error);
640 
641 	blkcnt = SCARG(uap, blkcnt);
642 	if ((size_t) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
643 		return (EINVAL);
644 	blkiov = malloc(blkcnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
645 	blkiov15 = malloc(blkcnt * sizeof(BLOCK_INFO_15), M_SEGMENT, M_WAITOK);
646 	if ((error = copyin(SCARG(uap, blkiov), blkiov15,
647 			    blkcnt * sizeof(BLOCK_INFO_15))) != 0)
648 		goto out;
649 
650 	for (i = 0; i < blkcnt; i++) {
651 		blkiov[i].bi_inode     = blkiov15[i].bi_inode;
652 		blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
653 		blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
654 		blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
655 		blkiov[i].bi_version   = blkiov15[i].bi_version;
656 		blkiov[i].bi_bp	       = blkiov15[i].bi_bp;
657 		blkiov[i].bi_size      = blkiov15[i].bi_size;
658 	}
659 
660 	if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0) {
661 		for (i = 0; i < blkcnt; i++) {
662 			blkiov15[i].bi_inode	 = blkiov[i].bi_inode;
663 			blkiov15[i].bi_lbn	 = blkiov[i].bi_lbn;
664 			blkiov15[i].bi_daddr	 = blkiov[i].bi_daddr;
665 			blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
666 			blkiov15[i].bi_version	 = blkiov[i].bi_version;
667 			blkiov15[i].bi_bp	 = blkiov[i].bi_bp;
668 			blkiov15[i].bi_size	 = blkiov[i].bi_size;
669 		}
670 		copyout(blkiov15, SCARG(uap, blkiov),
671 			blkcnt * sizeof(BLOCK_INFO_15));
672 	}
673     out:
674 	free(blkiov, M_SEGMENT);
675 	free(blkiov15, M_SEGMENT);
676 	return error;
677 }
678 #endif
679 
680 int
681 lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
682 {
683 	BLOCK_INFO *blkp;
684 	IFILE *ifp;
685 	struct buf *bp;
686 	struct inode *ip = NULL;
687 	struct lfs *fs;
688 	struct mount *mntp;
689 	struct ufsmount *ump;
690 	struct vnode *vp;
691 	ino_t lastino;
692 	daddr_t v_daddr;
693 	int cnt, error;
694 	int numrefed = 0;
695 
696 	lfs_cleaner_pid = p->p_pid;
697 
698 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
699 		return (ENOENT);
700 
701 	ump = VFSTOUFS(mntp);
702 	if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
703 		return (error);
704 
705 	cnt = blkcnt;
706 
707 	fs = VFSTOUFS(mntp)->um_lfs;
708 
709 	error = 0;
710 
711 	/* these were inside the initialization for the for loop */
712 	v_daddr = LFS_UNUSED_DADDR;
713 	lastino = LFS_UNUSED_INUM;
714 	for (blkp = blkiov; cnt--; ++blkp)
715 	{
716 		/*
717 		 * Get the IFILE entry (only once) and see if the file still
718 		 * exists.
719 		 */
720 		if (lastino != blkp->bi_inode) {
721 			/*
722 			 * Finish the old file, if there was one.  The presence
723 			 * of a usable vnode in vp is signaled by a valid
724 			 * v_daddr.
725 			 */
726 			if (v_daddr != LFS_UNUSED_DADDR) {
727 				lfs_vunref(vp);
728 				numrefed--;
729 			}
730 
731 			/*
732 			 * Start a new file
733 			 */
734 			lastino = blkp->bi_inode;
735 			if (blkp->bi_inode == LFS_IFILE_INUM)
736 				v_daddr = fs->lfs_idaddr;
737 			else {
738 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
739 				v_daddr = ifp->if_daddr;
740 				brelse(bp);
741 			}
742 			if (v_daddr == LFS_UNUSED_DADDR) {
743 				blkp->bi_daddr = LFS_UNUSED_DADDR;
744 				continue;
745 			}
746 			/*
747 			 * A regular call to VFS_VGET could deadlock
748 			 * here.  Instead, we try an unlocked access.
749 			 */
750 			vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
751 			if (vp != NULL && !(vp->v_flag & VXLOCK)) {
752 				ip = VTOI(vp);
753 				if (lfs_vref(vp)) {
754 					v_daddr = LFS_UNUSED_DADDR;
755 					continue;
756 				}
757 				numrefed++;
758 			} else {
759 				error = VFS_VGET(mntp, blkp->bi_inode, &vp);
760 				if (error) {
761 #ifdef DEBUG_LFS
762 					printf("lfs_bmapv: vget of ino %d failed with %d",blkp->bi_inode,error);
763 #endif
764 					v_daddr = LFS_UNUSED_DADDR;
765 					continue;
766 				} else {
767 					KASSERT(VOP_ISLOCKED(vp));
768 					VOP_UNLOCK(vp, 0);
769 					numrefed++;
770 				}
771 			}
772 			ip = VTOI(vp);
773 		} else if (v_daddr == LFS_UNUSED_DADDR) {
774 			/*
775 			 * This can only happen if the vnode is dead.
776 			 * Keep going.	Note that we DO NOT set the
777 			 * bi_addr to anything -- if we failed to get
778 			 * the vnode, for example, we want to assume
779 			 * conservatively that all of its blocks *are*
780 			 * located in the segment in question.
781 			 * lfs_markv will throw them out if we are
782 			 * wrong.
783 			 */
784 			/* blkp->bi_daddr = LFS_UNUSED_DADDR; */
785 			continue;
786 		}
787 
788 		/* Past this point we are guaranteed that vp, ip are valid. */
789 
790 		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
791 			/*
792 			 * We just want the inode address, which is
793 			 * conveniently in v_daddr.
794 			 */
795 			blkp->bi_daddr = v_daddr;
796 		} else {
797 			daddr_t bi_daddr;
798 
799 			/* XXX ondisk32 */
800 			error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
801 					 &bi_daddr, NULL);
802 			if (error)
803 			{
804 				blkp->bi_daddr = LFS_UNUSED_DADDR;
805 				continue;
806 			}
807 			blkp->bi_daddr = dbtofsb(fs, bi_daddr);
808 			/* Fill in the block size, too */
809 			if (blkp->bi_lbn >= 0)
810 				blkp->bi_size = blksize(fs, ip, blkp->bi_lbn);
811 			else
812 				blkp->bi_size = fs->lfs_bsize;
813 		}
814 	}
815 
816 	/*
817 	 * Finish the old file, if there was one.  The presence
818 	 * of a usable vnode in vp is signaled by a valid v_daddr.
819 	 */
820 	if (v_daddr != LFS_UNUSED_DADDR) {
821 		lfs_vunref(vp);
822 		numrefed--;
823 	}
824 
825 #ifdef DEBUG_LFS
826 	if (numrefed != 0) {
827 		panic("lfs_bmapv: numrefed=%d", numrefed);
828 	}
829 #endif
830 
831 	vfs_unbusy(mntp);
832 
833 	return 0;
834 }
835 
836 /*
837  * sys_lfs_segclean:
838  *
839  * Mark the segment clean.
840  *
841  *  0 on success
842  * -1/errno is return on error.
843  */
844 int
845 sys_lfs_segclean(struct lwp *l, void *v, register_t *retval)
846 {
847 	struct sys_lfs_segclean_args /* {
848 		syscallarg(fsid_t *) fsidp;
849 		syscallarg(u_long) segment;
850 	} */ *uap = v;
851 	struct lfs *fs;
852 	struct mount *mntp;
853 	fsid_t fsid;
854 	int error;
855 	unsigned long segnum;
856 	struct proc *p = l->l_proc;
857 
858 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
859 		return (error);
860 
861 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
862 		return (error);
863 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
864 		return (ENOENT);
865 
866 	fs = VFSTOUFS(mntp)->um_lfs;
867 	segnum = SCARG(uap, segment);
868 
869 	if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
870 		return (error);
871 
872 	lfs_seglock(fs, SEGM_PROT);
873 	error = lfs_do_segclean(fs, segnum);
874 	lfs_segunlock(fs);
875 	vfs_unbusy(mntp);
876 	return error;
877 }
878 
879 /*
880  * Actually mark the segment clean.
881  * Must be called with the segment lock held.
882  */
883 int
884 lfs_do_segclean(struct lfs *fs, unsigned long segnum)
885 {
886 	struct buf *bp;
887 	CLEANERINFO *cip;
888 	SEGUSE *sup;
889 
890 	if (dtosn(fs, fs->lfs_curseg) == segnum) {
891 		return (EBUSY);
892 	}
893 
894 	LFS_SEGENTRY(sup, fs, segnum, bp);
895 	if (sup->su_nbytes) {
896 		printf("lfs_segclean: not cleaning segment %lu: %d live bytes\n",
897 			segnum, sup->su_nbytes);
898 		brelse(bp);
899 		return (EBUSY);
900 	}
901 	if (sup->su_flags & SEGUSE_ACTIVE) {
902 		brelse(bp);
903 		return (EBUSY);
904 	}
905 	if (!(sup->su_flags & SEGUSE_DIRTY)) {
906 		brelse(bp);
907 		return (EALREADY);
908 	}
909 
910 	fs->lfs_avail += segtod(fs, 1);
911 	if (sup->su_flags & SEGUSE_SUPERBLOCK)
912 		fs->lfs_avail -= btofsb(fs, LFS_SBPAD);
913 	if (fs->lfs_version > 1 && segnum == 0 &&
914 	    fs->lfs_start < btofsb(fs, LFS_LABELPAD))
915 		fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
916 	fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
917 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
918 	fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
919 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
920 	if (fs->lfs_dmeta < 0)
921 		fs->lfs_dmeta = 0;
922 	sup->su_flags &= ~SEGUSE_DIRTY;
923 	LFS_WRITESEGENTRY(sup, fs, segnum, bp);
924 
925 	LFS_CLEANERINFO(cip, fs, bp);
926 	++cip->clean;
927 	--cip->dirty;
928 	fs->lfs_nclean = cip->clean;
929 	cip->bfree = fs->lfs_bfree;
930 	cip->avail = fs->lfs_avail - fs->lfs_ravail;
931 	(void) LFS_BWRITE_LOG(bp);
932 	wakeup(&fs->lfs_avail);
933 
934 	return (0);
935 }
936 
937 /*
938  * This will block until a segment in file system fsid is written.  A timeout
939  * in milliseconds may be specified which will awake the cleaner automatically.
940  * An fsid of -1 means any file system, and a timeout of 0 means forever.
941  */
942 int
943 lfs_segwait(fsid_t *fsidp, struct timeval *tv)
944 {
945 	struct mount *mntp;
946 	void *addr;
947 	u_long timeout;
948 	int error, s;
949 
950 	if ((mntp = vfs_getvfs(fsidp)) == NULL)
951 		addr = &lfs_allclean_wakeup;
952 	else
953 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
954 	/*
955 	 * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
956 	 * XXX IS THAT WHAT IS INTENDED?
957 	 */
958 	s = splclock();
959 	timeradd(tv, &time, tv);
960 	timeout = hzto(tv);
961 	splx(s);
962 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
963 	return (error == ERESTART ? EINTR : 0);
964 }
965 
966 /*
967  * sys_lfs_segwait:
968  *
969  * System call wrapper around lfs_segwait().
970  *
971  *  0 on success
972  *  1 on timeout
973  * -1/errno is return on error.
974  */
975 int
976 sys_lfs_segwait(struct lwp *l, void *v, register_t *retval)
977 {
978 	struct sys_lfs_segwait_args /* {
979 		syscallarg(fsid_t *) fsidp;
980 		syscallarg(struct timeval *) tv;
981 	} */ *uap = v;
982 	struct proc *p = l->l_proc;
983 	struct timeval atv;
984 	fsid_t fsid;
985 	int error;
986 
987 	/* XXX need we be su to segwait? */
988 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) {
989 		return (error);
990 	}
991 	if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
992 		return (error);
993 
994 	if (SCARG(uap, tv)) {
995 		error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
996 		if (error)
997 			return (error);
998 		if (itimerfix(&atv))
999 			return (EINVAL);
1000 	} else /* NULL or invalid */
1001 		atv.tv_sec = atv.tv_usec = 0;
1002 	return lfs_segwait(&fsid, &atv);
1003 }
1004 
1005 /*
1006  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
1007  * daddr from the ifile, so don't look it up again.  If the cleaner is
1008  * processing IINFO structures, it may have the ondisk inode already, so
1009  * don't go retrieving it again.
1010  *
1011  * we lfs_vref, and it is the caller's responsibility to lfs_vunref
1012  * when finished.
1013  */
1014 extern struct lock ufs_hashlock;
1015 
1016 int
1017 lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp)
1018 {
1019 
1020 	/*
1021 	 * This is playing fast and loose.  Someone may have the inode
1022 	 * locked, in which case they are going to be distinctly unhappy
1023 	 * if we trash something.
1024 	 */
1025 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
1026 		if ((*vpp)->v_flag & VXLOCK) {
1027 			printf("lfs_fastvget: vnode VXLOCKed for ino %d\n",
1028 			       ino);
1029 			clean_vnlocked++;
1030 #ifdef LFS_EAGAIN_FAIL
1031 			return EAGAIN;
1032 #endif
1033 		}
1034 		if (lfs_vref(*vpp)) {
1035 			clean_inlocked++;
1036 			return EAGAIN;
1037 		}
1038 	} else
1039 		*vpp = NULL;
1040 
1041 	return (0);
1042 }
1043 
1044 int
1045 lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, struct ufs1_dinode *dinp)
1046 {
1047 	struct inode *ip;
1048 	struct ufs1_dinode *dip;
1049 	struct vnode *vp;
1050 	struct ufsmount *ump;
1051 	dev_t dev;
1052 	int error, retries;
1053 	struct buf *bp;
1054 	struct lfs *fs;
1055 
1056 	ump = VFSTOUFS(mp);
1057 	dev = ump->um_dev;
1058 	fs = ump->um_lfs;
1059 
1060 	/*
1061 	 * Wait until the filesystem is fully mounted before allowing vget
1062 	 * to complete.	 This prevents possible problems with roll-forward.
1063 	 */
1064 	while (fs->lfs_flags & LFS_NOTYET) {
1065 		tsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0);
1066 	}
1067 	/*
1068 	 * This is playing fast and loose.  Someone may have the inode
1069 	 * locked, in which case they are going to be distinctly unhappy
1070 	 * if we trash something.
1071 	 */
1072 
1073 	error = lfs_fasthashget(dev, ino, vpp);
1074 	if (error != 0 || *vpp != NULL)
1075 		return (error);
1076 
1077 	if ((error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, &vp)) != 0) {
1078 		*vpp = NULL;
1079 		return (error);
1080 	}
1081 
1082 	do {
1083 		error = lfs_fasthashget(dev, ino, vpp);
1084 		if (error != 0 || *vpp != NULL) {
1085 			ungetnewvnode(vp);
1086 			return (error);
1087 		}
1088 	} while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
1089 
1090 	/* Allocate new vnode/inode. */
1091 	lfs_vcreate(mp, ino, vp);
1092 
1093 	/*
1094 	 * Put it onto its hash chain and lock it so that other requests for
1095 	 * this inode will block if they arrive while we are sleeping waiting
1096 	 * for old data structures to be purged or for the contents of the
1097 	 * disk portion of this inode to be read.
1098 	 */
1099 	ip = VTOI(vp);
1100 	ufs_ihashins(ip);
1101 	lockmgr(&ufs_hashlock, LK_RELEASE, 0);
1102 
1103 	/*
1104 	 * XXX
1105 	 * This may not need to be here, logically it should go down with
1106 	 * the i_devvp initialization.
1107 	 * Ask Kirk.
1108 	 */
1109 	ip->i_lfs = fs;
1110 
1111 	/* Read in the disk contents for the inode, copy into the inode. */
1112 	if (dinp) {
1113 		error = copyin(dinp, ip->i_din.ffs1_din, sizeof (struct ufs1_dinode));
1114 		if (error) {
1115 			printf("lfs_fastvget: dinode copyin failed for ino %d\n", ino);
1116 			ufs_ihashrem(ip);
1117 
1118 			/* Unlock and discard unneeded inode. */
1119 			lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1120 			lfs_vunref(vp);
1121 			*vpp = NULL;
1122 			return (error);
1123 		}
1124 		if (ip->i_number != ino)
1125 			panic("lfs_fastvget: I was fed the wrong inode!");
1126 	} else {
1127 		retries = 0;
1128 	    again:
1129 		error = bread(ump->um_devvp, fsbtodb(fs, daddr), fs->lfs_ibsize,
1130 			      NOCRED, &bp);
1131 		if (error) {
1132 			printf("lfs_fastvget: bread failed with %d\n",error);
1133 			/*
1134 			 * The inode does not contain anything useful, so it
1135 			 * would be misleading to leave it on its hash chain.
1136 			 * Iput() will return it to the free list.
1137 			 */
1138 			ufs_ihashrem(ip);
1139 
1140 			/* Unlock and discard unneeded inode. */
1141 			lockmgr(&vp->v_lock, LK_RELEASE, &vp->v_interlock);
1142 			lfs_vunref(vp);
1143 			brelse(bp);
1144 			*vpp = NULL;
1145 			return (error);
1146 		}
1147 		dip = lfs_ifind(ump->um_lfs, ino, bp);
1148 		if (dip == NULL) {
1149 			/* Assume write has not completed yet; try again */
1150 			bp->b_flags |= B_INVAL;
1151 			brelse(bp);
1152 			++retries;
1153 			if (retries > LFS_IFIND_RETRIES)
1154 				panic("lfs_fastvget: dinode not found");
1155 			printf("lfs_fastvget: dinode not found, retrying...\n");
1156 			goto again;
1157 		}
1158 		*ip->i_din.ffs1_din = *dip;
1159 		brelse(bp);
1160 	}
1161 	lfs_vinit(mp, &vp);
1162 
1163 	*vpp = vp;
1164 
1165 	KASSERT(VOP_ISLOCKED(vp));
1166 	VOP_UNLOCK(vp, 0);
1167 
1168 	return (0);
1169 }
1170 
1171 /*
1172  * Make up a "fake" cleaner buffer, copy the data from userland into it.
1173  */
1174 struct buf *
1175 lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uaddr)
1176 {
1177 	struct buf *bp;
1178 	int error;
1179 
1180 	KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
1181 
1182 	bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
1183 	error = copyin(uaddr, bp->b_data, size);
1184 	if (error) {
1185 		lfs_freebuf(fs, bp);
1186 		return NULL;
1187 	}
1188 	KDASSERT(bp->b_iodone == lfs_callback);
1189 
1190 #if 0
1191 	++fs->lfs_iocount;
1192 #endif
1193 	bp->b_bufsize = size;
1194 	bp->b_bcount = size;
1195 	return (bp);
1196 }
1197