xref: /onnv-gate/usr/src/uts/common/fs/ufs/lufs_log.c (revision 11066:cebb50cbe4f9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54662Sfrankho  * Common Development and Distribution License (the "License").
64662Sfrankho  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*11066Srafael.vanoni@sun.com  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/systm.h>
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/vnode.h>
290Sstevel@tonic-gate #include <sys/errno.h>
300Sstevel@tonic-gate #include <sys/sysmacros.h>
310Sstevel@tonic-gate #include <sys/debug.h>
320Sstevel@tonic-gate #include <sys/kmem.h>
330Sstevel@tonic-gate #include <sys/conf.h>
340Sstevel@tonic-gate #include <sys/proc.h>
350Sstevel@tonic-gate #include <sys/cmn_err.h>
360Sstevel@tonic-gate #include <sys/fssnap_if.h>
370Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
380Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
390Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
400Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
410Sstevel@tonic-gate #include <sys/atomic.h>
420Sstevel@tonic-gate 
430Sstevel@tonic-gate extern int		maxphys;
440Sstevel@tonic-gate extern uint_t		bypass_snapshot_throttle_key;
450Sstevel@tonic-gate 
460Sstevel@tonic-gate extern struct kmem_cache	*lufs_sv;
470Sstevel@tonic-gate extern struct kmem_cache	*lufs_bp;
480Sstevel@tonic-gate 
490Sstevel@tonic-gate static void
makebusy(ml_unit_t * ul,buf_t * bp)500Sstevel@tonic-gate makebusy(ml_unit_t *ul, buf_t *bp)
510Sstevel@tonic-gate {
520Sstevel@tonic-gate 	sema_p(&bp->b_sem);
530Sstevel@tonic-gate 	if ((bp->b_flags & B_ERROR) == 0)
540Sstevel@tonic-gate 		return;
550Sstevel@tonic-gate 	if (bp->b_flags & B_READ)
560Sstevel@tonic-gate 		ldl_seterror(ul, "Error reading ufs log");
570Sstevel@tonic-gate 	else
580Sstevel@tonic-gate 		ldl_seterror(ul, "Error writing ufs log");
590Sstevel@tonic-gate }
600Sstevel@tonic-gate 
610Sstevel@tonic-gate static int
logdone(buf_t * bp)620Sstevel@tonic-gate logdone(buf_t *bp)
630Sstevel@tonic-gate {
640Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
650Sstevel@tonic-gate 
660Sstevel@tonic-gate 	if (bp->b_flags & B_WRITE)
670Sstevel@tonic-gate 		sema_v(&bp->b_sem);
680Sstevel@tonic-gate 	else
690Sstevel@tonic-gate 		/* wakeup the thread waiting on this buf */
700Sstevel@tonic-gate 		sema_v(&bp->b_io);
710Sstevel@tonic-gate 	return (0);
720Sstevel@tonic-gate }
730Sstevel@tonic-gate 
740Sstevel@tonic-gate static int
ldl_strategy_done(buf_t * cb)750Sstevel@tonic-gate ldl_strategy_done(buf_t *cb)
760Sstevel@tonic-gate {
770Sstevel@tonic-gate 	lufs_save_t	*sv;
780Sstevel@tonic-gate 	lufs_buf_t	*lbp;
790Sstevel@tonic-gate 	buf_t		*bp;
800Sstevel@tonic-gate 
810Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&cb->b_sem));
820Sstevel@tonic-gate 	ASSERT((cb->b_flags & B_DONE) == 0);
830Sstevel@tonic-gate 
840Sstevel@tonic-gate 	/*
850Sstevel@tonic-gate 	 * Compute address of the ``save'' struct
860Sstevel@tonic-gate 	 */
870Sstevel@tonic-gate 	lbp = (lufs_buf_t *)cb;
880Sstevel@tonic-gate 	sv = (lufs_save_t *)lbp->lb_ptr;
890Sstevel@tonic-gate 
900Sstevel@tonic-gate 	if (cb->b_flags & B_ERROR)
910Sstevel@tonic-gate 		sv->sv_error = 1;
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 	/*
940Sstevel@tonic-gate 	 * If this is the last request, release the resources and
950Sstevel@tonic-gate 	 * ``done'' the original buffer header.
960Sstevel@tonic-gate 	 */
970Sstevel@tonic-gate 	if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) {
980Sstevel@tonic-gate 		kmem_cache_free(lufs_bp, lbp);
990Sstevel@tonic-gate 		return (1);
1000Sstevel@tonic-gate 	}
1010Sstevel@tonic-gate 	/* Propagate any errors back to the original buffer header */
1020Sstevel@tonic-gate 	bp = sv->sv_bp;
1030Sstevel@tonic-gate 	if (sv->sv_error)
1040Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
1050Sstevel@tonic-gate 	kmem_cache_free(lufs_bp, lbp);
1060Sstevel@tonic-gate 	kmem_cache_free(lufs_sv, sv);
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate 	biodone(bp);
1090Sstevel@tonic-gate 	return (0);
1100Sstevel@tonic-gate }
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate /*
1130Sstevel@tonic-gate  * Map the log logical block number to a physical disk block number
1140Sstevel@tonic-gate  */
1150Sstevel@tonic-gate static int
map_frag(ml_unit_t * ul,daddr_t lblkno,size_t bcount,daddr_t * pblkno,size_t * pbcount)1160Sstevel@tonic-gate map_frag(
1170Sstevel@tonic-gate 	ml_unit_t	*ul,
1180Sstevel@tonic-gate 	daddr_t		lblkno,
1190Sstevel@tonic-gate 	size_t		bcount,
1200Sstevel@tonic-gate 	daddr_t		*pblkno,
1210Sstevel@tonic-gate 	size_t		*pbcount)
1220Sstevel@tonic-gate {
1230Sstevel@tonic-gate 	ic_extent_t	*ext = ul->un_ebp->ic_extents;
1240Sstevel@tonic-gate 	uint32_t	e = ul->un_ebp->ic_nextents;
1250Sstevel@tonic-gate 	uint32_t	s = 0;
1260Sstevel@tonic-gate 	uint32_t	i = e >> 1;
1270Sstevel@tonic-gate 	uint32_t	lasti = i;
1280Sstevel@tonic-gate 	uint32_t	bno_off;
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate again:
1310Sstevel@tonic-gate 	if (ext[i].ic_lbno <= lblkno) {
1320Sstevel@tonic-gate 		if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) {
1330Sstevel@tonic-gate 			/* FOUND IT */
1340Sstevel@tonic-gate 			bno_off = lblkno - (uint32_t)ext[i].ic_lbno;
1350Sstevel@tonic-gate 			*pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off));
1360Sstevel@tonic-gate 			*pblkno = ext[i].ic_pbno + bno_off;
1370Sstevel@tonic-gate 			return (0);
1380Sstevel@tonic-gate 		} else
1390Sstevel@tonic-gate 			s = i;
1400Sstevel@tonic-gate 	} else
1410Sstevel@tonic-gate 		e = i;
1420Sstevel@tonic-gate 	i = s + ((e - s) >> 1);
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate 	if (i == lasti) {
1450Sstevel@tonic-gate 		*pbcount = bcount;
1460Sstevel@tonic-gate 		return (ENOENT);
1470Sstevel@tonic-gate 	}
1480Sstevel@tonic-gate 	lasti = i;
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 	goto again;
1510Sstevel@tonic-gate }
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * The log is a set of extents (which typically will be only one, but
1550Sstevel@tonic-gate  * may be more if the disk was close to full when the log was created)
1560Sstevel@tonic-gate  * and hence the logical offsets into the log
1570Sstevel@tonic-gate  * have to be translated into their real device locations before
1580Sstevel@tonic-gate  * calling the device's strategy routine. The translation may result
1590Sstevel@tonic-gate  * in several IO requests if this request spans extents.
1600Sstevel@tonic-gate  */
1610Sstevel@tonic-gate void
ldl_strategy(ml_unit_t * ul,buf_t * pb)1620Sstevel@tonic-gate ldl_strategy(ml_unit_t *ul, buf_t *pb)
1630Sstevel@tonic-gate {
1640Sstevel@tonic-gate 	lufs_save_t	*sv;
1650Sstevel@tonic-gate 	lufs_buf_t	*lbp;
1660Sstevel@tonic-gate 	buf_t		*cb;
1670Sstevel@tonic-gate 	ufsvfs_t	*ufsvfsp = ul->un_ufsvfs;
1680Sstevel@tonic-gate 	daddr_t		lblkno, pblkno;
1690Sstevel@tonic-gate 	size_t		nb_left, pbcount;
1700Sstevel@tonic-gate 	off_t		offset;
1710Sstevel@tonic-gate 	dev_t		dev	= ul->un_dev;
1720Sstevel@tonic-gate 	int		error;
1730Sstevel@tonic-gate 	int		read = pb->b_flags & B_READ;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	/*
1760Sstevel@tonic-gate 	 * Allocate and initialise the save stucture,
1770Sstevel@tonic-gate 	 */
1780Sstevel@tonic-gate 	sv = kmem_cache_alloc(lufs_sv, KM_SLEEP);
1790Sstevel@tonic-gate 	sv->sv_error = 0;
1800Sstevel@tonic-gate 	sv->sv_bp = pb;
1810Sstevel@tonic-gate 	nb_left = pb->b_bcount;
1820Sstevel@tonic-gate 	sv->sv_nb_left = nb_left;
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate 	lblkno = pb->b_blkno;
1850Sstevel@tonic-gate 	offset = 0;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 	do {
1880Sstevel@tonic-gate 		error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount);
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 		lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP);
1910Sstevel@tonic-gate 		bioinit(&lbp->lb_buf);
1920Sstevel@tonic-gate 		lbp->lb_ptr = sv;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 		cb = bioclone(pb, offset, pbcount, dev,
1950Sstevel@tonic-gate 		    pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP);
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 		offset += pbcount;
1980Sstevel@tonic-gate 		lblkno += btodb(pbcount);
1990Sstevel@tonic-gate 		nb_left -= pbcount;
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 		if (error) {
2020Sstevel@tonic-gate 			cb->b_flags |= B_ERROR;
2030Sstevel@tonic-gate 			cb->b_resid = cb->b_bcount;
2040Sstevel@tonic-gate 			biodone(cb);
2050Sstevel@tonic-gate 		} else {
2060Sstevel@tonic-gate 			if (read) {
2070Sstevel@tonic-gate 				logstats.ls_ldlreads.value.ui64++;
208*11066Srafael.vanoni@sun.com 				ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
2090Sstevel@tonic-gate 				lwp_stat_update(LWP_STAT_INBLK, 1);
2100Sstevel@tonic-gate 			} else {
2110Sstevel@tonic-gate 				logstats.ls_ldlwrites.value.ui64++;
2120Sstevel@tonic-gate 				lwp_stat_update(LWP_STAT_OUBLK, 1);
2130Sstevel@tonic-gate 			}
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate 			/*
2160Sstevel@tonic-gate 			 * write through the snapshot driver if necessary
2170Sstevel@tonic-gate 			 * We do not want this write to be throttled because
2180Sstevel@tonic-gate 			 * we are holding the un_log mutex here. If we
2190Sstevel@tonic-gate 			 * are throttled in fssnap_translate, the fssnap_taskq
2200Sstevel@tonic-gate 			 * thread which can wake us up can get blocked on
2210Sstevel@tonic-gate 			 * the un_log mutex resulting in a deadlock.
2220Sstevel@tonic-gate 			 */
2230Sstevel@tonic-gate 			if (ufsvfsp->vfs_snapshot) {
2244662Sfrankho 				(void) tsd_set(bypass_snapshot_throttle_key,
2254662Sfrankho 				    (void *)1);
2260Sstevel@tonic-gate 				fssnap_strategy(&ufsvfsp->vfs_snapshot, cb);
2270Sstevel@tonic-gate 
2284662Sfrankho 				(void) tsd_set(bypass_snapshot_throttle_key,
2294662Sfrankho 				    (void *)0);
2300Sstevel@tonic-gate 			} else {
2310Sstevel@tonic-gate 				(void) bdev_strategy(cb);
2320Sstevel@tonic-gate 			}
2330Sstevel@tonic-gate 		}
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	} while (nb_left);
2360Sstevel@tonic-gate }
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate static void
writelog(ml_unit_t * ul,buf_t * bp)2390Sstevel@tonic-gate writelog(ml_unit_t *ul, buf_t *bp)
2400Sstevel@tonic-gate {
2410Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	/*
2440Sstevel@tonic-gate 	 * This is really an B_ASYNC write but we want Presto to
2450Sstevel@tonic-gate 	 * cache this write.  The iodone routine, logdone, processes
2460Sstevel@tonic-gate 	 * the buf correctly.
2470Sstevel@tonic-gate 	 */
2480Sstevel@tonic-gate 	bp->b_flags = B_WRITE;
2490Sstevel@tonic-gate 	bp->b_edev = ul->un_dev;
2500Sstevel@tonic-gate 	bp->b_iodone = logdone;
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 	/*
2530Sstevel@tonic-gate 	 * return EIO for every IO if in hard error state
2540Sstevel@tonic-gate 	 */
2550Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR) {
2560Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
2570Sstevel@tonic-gate 		bp->b_error = EIO;
2580Sstevel@tonic-gate 		biodone(bp);
2590Sstevel@tonic-gate 		return;
2600Sstevel@tonic-gate 	}
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 	ldl_strategy(ul, bp);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate static void
readlog(ml_unit_t * ul,buf_t * bp)2660Sstevel@tonic-gate readlog(ml_unit_t *ul, buf_t *bp)
2670Sstevel@tonic-gate {
2680Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
2690Sstevel@tonic-gate 	ASSERT(bp->b_bcount);
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	bp->b_flags = B_READ;
2720Sstevel@tonic-gate 	bp->b_edev = ul->un_dev;
2730Sstevel@tonic-gate 	bp->b_iodone = logdone;
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 	/* all IO returns errors when in error state */
2760Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR) {
2770Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
2780Sstevel@tonic-gate 		bp->b_error = EIO;
2790Sstevel@tonic-gate 		biodone(bp);
2800Sstevel@tonic-gate 		(void) trans_wait(bp);
2810Sstevel@tonic-gate 		return;
2820Sstevel@tonic-gate 	}
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	ldl_strategy(ul, bp);
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	if (trans_wait(bp))
2870Sstevel@tonic-gate 		ldl_seterror(ul, "Error reading ufs log");
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate /*
2910Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
2920Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
2930Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
2940Sstevel@tonic-gate  * the bp makebusy lock.
2950Sstevel@tonic-gate  */
2960Sstevel@tonic-gate static void
push_dirty_bp(ml_unit_t * ul,buf_t * bp)2970Sstevel@tonic-gate push_dirty_bp(ml_unit_t *ul, buf_t *bp)
2980Sstevel@tonic-gate {
2990Sstevel@tonic-gate 	buf_t		*newbp;
3000Sstevel@tonic-gate 	cirbuf_t	*cb		= &ul->un_wrbuf;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
3030Sstevel@tonic-gate 	ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0);
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	/*
3060Sstevel@tonic-gate 	 * async write the buf
3070Sstevel@tonic-gate 	 */
3080Sstevel@tonic-gate 	writelog(ul, bp);
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 	/*
3110Sstevel@tonic-gate 	 * no longer filling any buf
3120Sstevel@tonic-gate 	 */
3130Sstevel@tonic-gate 	cb->cb_dirty = NULL;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	/*
3160Sstevel@tonic-gate 	 * no extra buffer space; all done
3170Sstevel@tonic-gate 	 */
3180Sstevel@tonic-gate 	if (bp->b_bcount == bp->b_bufsize)
3190Sstevel@tonic-gate 		return;
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	/*
3220Sstevel@tonic-gate 	 * give extra buffer space to a new bp
3230Sstevel@tonic-gate 	 * 	try to take buf off of free list
3240Sstevel@tonic-gate 	 */
3250Sstevel@tonic-gate 	if ((newbp = cb->cb_free) != NULL) {
3260Sstevel@tonic-gate 		cb->cb_free = newbp->b_forw;
3270Sstevel@tonic-gate 	} else {
3280Sstevel@tonic-gate 		newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
3290Sstevel@tonic-gate 		sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
3300Sstevel@tonic-gate 		sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
3310Sstevel@tonic-gate 	}
3320Sstevel@tonic-gate 	newbp->b_flags = 0;
3330Sstevel@tonic-gate 	newbp->b_bcount = 0;
3340Sstevel@tonic-gate 	newbp->b_file = NULL;
3350Sstevel@tonic-gate 	newbp->b_offset = -1;
3360Sstevel@tonic-gate 	newbp->b_bufsize = bp->b_bufsize - bp->b_bcount;
3370Sstevel@tonic-gate 	newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount;
3380Sstevel@tonic-gate 	bp->b_bufsize = bp->b_bcount;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	/*
3410Sstevel@tonic-gate 	 * lock out readers and put new buf at LRU position
3420Sstevel@tonic-gate 	 */
3430Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
3440Sstevel@tonic-gate 	newbp->b_forw = bp->b_forw;
3450Sstevel@tonic-gate 	newbp->b_back = bp;
3460Sstevel@tonic-gate 	bp->b_forw->b_back = newbp;
3470Sstevel@tonic-gate 	bp->b_forw = newbp;
3480Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate static void
inval_range(ml_unit_t * ul,cirbuf_t * cb,off_t lof,off_t nb)3520Sstevel@tonic-gate inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb)
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate 	buf_t		*bp;
3550Sstevel@tonic-gate 	off_t		elof	= lof + nb;
3560Sstevel@tonic-gate 	off_t		buflof;
3570Sstevel@tonic-gate 	off_t		bufelof;
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	/*
3600Sstevel@tonic-gate 	 * discard all bufs that overlap the range (lof, lof + nb)
3610Sstevel@tonic-gate 	 */
3620Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
3630Sstevel@tonic-gate 	bp = cb->cb_bp;
3640Sstevel@tonic-gate 	do {
3650Sstevel@tonic-gate 		if (bp == cb->cb_dirty || bp->b_bcount == 0) {
3660Sstevel@tonic-gate 			bp = bp->b_forw;
3670Sstevel@tonic-gate 			continue;
3680Sstevel@tonic-gate 		}
3690Sstevel@tonic-gate 		buflof = dbtob(bp->b_blkno);
3700Sstevel@tonic-gate 		bufelof = buflof + bp->b_bcount;
3710Sstevel@tonic-gate 		if ((buflof < lof && bufelof <= lof) ||
3720Sstevel@tonic-gate 		    (buflof >= elof && bufelof > elof)) {
3730Sstevel@tonic-gate 			bp = bp->b_forw;
3740Sstevel@tonic-gate 			continue;
3750Sstevel@tonic-gate 		}
3760Sstevel@tonic-gate 		makebusy(ul, bp);
3770Sstevel@tonic-gate 		bp->b_flags = 0;
3780Sstevel@tonic-gate 		bp->b_bcount = 0;
3790Sstevel@tonic-gate 		sema_v(&bp->b_sem);
3800Sstevel@tonic-gate 		bp = bp->b_forw;
3810Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
3820Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
3830Sstevel@tonic-gate }
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate /*
3860Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
3870Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
3880Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
3890Sstevel@tonic-gate  * the bp makebusy lock.
3900Sstevel@tonic-gate  */
3910Sstevel@tonic-gate static buf_t *
get_write_bp(ml_unit_t * ul)3920Sstevel@tonic-gate get_write_bp(ml_unit_t *ul)
3930Sstevel@tonic-gate {
3940Sstevel@tonic-gate 	cirbuf_t	*cb = &ul->un_wrbuf;
3950Sstevel@tonic-gate 	buf_t		*bp;
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate 	/*
3980Sstevel@tonic-gate 	 * cb_dirty is the buffer we are currently filling; if any
3990Sstevel@tonic-gate 	 */
4000Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) != NULL) {
4010Sstevel@tonic-gate 		makebusy(ul, bp);
4020Sstevel@tonic-gate 		return (bp);
4030Sstevel@tonic-gate 	}
4040Sstevel@tonic-gate 	/*
4050Sstevel@tonic-gate 	 * discard any bp that overlaps the current tail since we are
4060Sstevel@tonic-gate 	 * about to overwrite it.
4070Sstevel@tonic-gate 	 */
4080Sstevel@tonic-gate 	inval_range(ul, cb, ul->un_tail_lof, 1);
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	/*
4110Sstevel@tonic-gate 	 * steal LRU buf
4120Sstevel@tonic-gate 	 */
4130Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
4140Sstevel@tonic-gate 	bp = cb->cb_bp->b_forw;
4150Sstevel@tonic-gate 	makebusy(ul, bp);
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 	cb->cb_dirty = bp;
4180Sstevel@tonic-gate 	cb->cb_bp = bp;
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	bp->b_flags = 0;
4210Sstevel@tonic-gate 	bp->b_bcount = 0;
4220Sstevel@tonic-gate 	bp->b_blkno = btodb(ul->un_tail_lof);
4230Sstevel@tonic-gate 	ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof);
4240Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	/*
4270Sstevel@tonic-gate 	 * NOTE:
4280Sstevel@tonic-gate 	 *	1. un_tail_lof never addresses >= un_eol_lof
4290Sstevel@tonic-gate 	 *	2. b_blkno + btodb(b_bufsize) may > un_eol_lof
4300Sstevel@tonic-gate 	 *		this case is handled in storebuf
4310Sstevel@tonic-gate 	 */
4320Sstevel@tonic-gate 	return (bp);
4330Sstevel@tonic-gate }
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate void
alloc_wrbuf(cirbuf_t * cb,size_t bufsize)4360Sstevel@tonic-gate alloc_wrbuf(cirbuf_t *cb, size_t bufsize)
4370Sstevel@tonic-gate {
4380Sstevel@tonic-gate 	int	i;
4390Sstevel@tonic-gate 	buf_t	*bp;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	/*
4420Sstevel@tonic-gate 	 * Clear previous allocation
4430Sstevel@tonic-gate 	 */
4440Sstevel@tonic-gate 	if (cb->cb_nb)
4450Sstevel@tonic-gate 		free_cirbuf(cb);
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 	bzero(cb, sizeof (*cb));
4480Sstevel@tonic-gate 	rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	/*
4530Sstevel@tonic-gate 	 * preallocate 3 bp's and put them on the free list.
4540Sstevel@tonic-gate 	 */
4550Sstevel@tonic-gate 	for (i = 0; i < 3; ++i) {
4560Sstevel@tonic-gate 		bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
4570Sstevel@tonic-gate 		sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
4580Sstevel@tonic-gate 		sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
4590Sstevel@tonic-gate 		bp->b_offset = -1;
4600Sstevel@tonic-gate 		bp->b_forw = cb->cb_free;
4610Sstevel@tonic-gate 		cb->cb_free = bp;
4620Sstevel@tonic-gate 	}
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
4650Sstevel@tonic-gate 	cb->cb_nb = bufsize;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	/*
4680Sstevel@tonic-gate 	 * first bp claims entire write buffer
4690Sstevel@tonic-gate 	 */
4700Sstevel@tonic-gate 	bp = cb->cb_free;
4710Sstevel@tonic-gate 	cb->cb_free = bp->b_forw;
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	bp->b_forw = bp;
4740Sstevel@tonic-gate 	bp->b_back = bp;
4750Sstevel@tonic-gate 	cb->cb_bp = bp;
4760Sstevel@tonic-gate 	bp->b_un.b_addr = cb->cb_va;
4770Sstevel@tonic-gate 	bp->b_bufsize = cb->cb_nb;
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
4800Sstevel@tonic-gate }
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate void
alloc_rdbuf(cirbuf_t * cb,size_t bufsize,size_t blksize)4830Sstevel@tonic-gate alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize)
4840Sstevel@tonic-gate {
4850Sstevel@tonic-gate 	caddr_t	va;
4860Sstevel@tonic-gate 	size_t	nb;
4870Sstevel@tonic-gate 	buf_t	*bp;
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate 	/*
4900Sstevel@tonic-gate 	 * Clear previous allocation
4910Sstevel@tonic-gate 	 */
4920Sstevel@tonic-gate 	if (cb->cb_nb)
4930Sstevel@tonic-gate 		free_cirbuf(cb);
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	bzero(cb, sizeof (*cb));
4960Sstevel@tonic-gate 	rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 	cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
5010Sstevel@tonic-gate 	cb->cb_nb = bufsize;
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	/*
5040Sstevel@tonic-gate 	 * preallocate N bufs that are hard-sized to blksize
5050Sstevel@tonic-gate 	 *	in other words, the read buffer pool is a linked list
5060Sstevel@tonic-gate 	 *	of statically sized bufs.
5070Sstevel@tonic-gate 	 */
5080Sstevel@tonic-gate 	va = cb->cb_va;
5090Sstevel@tonic-gate 	while ((nb = bufsize) != 0) {
5100Sstevel@tonic-gate 		if (nb > blksize)
5110Sstevel@tonic-gate 			nb = blksize;
5120Sstevel@tonic-gate 		bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
5130Sstevel@tonic-gate 		bzero(bp, sizeof (buf_t));
5140Sstevel@tonic-gate 		sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
5150Sstevel@tonic-gate 		sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
5160Sstevel@tonic-gate 		bp->b_un.b_addr = va;
5170Sstevel@tonic-gate 		bp->b_bufsize = nb;
5180Sstevel@tonic-gate 		if (cb->cb_bp) {
5190Sstevel@tonic-gate 			bp->b_forw = cb->cb_bp->b_forw;
5200Sstevel@tonic-gate 			bp->b_back = cb->cb_bp;
5210Sstevel@tonic-gate 			cb->cb_bp->b_forw->b_back = bp;
5220Sstevel@tonic-gate 			cb->cb_bp->b_forw = bp;
5230Sstevel@tonic-gate 		} else
5240Sstevel@tonic-gate 			bp->b_forw = bp->b_back = bp;
5250Sstevel@tonic-gate 		cb->cb_bp = bp;
5260Sstevel@tonic-gate 		bufsize -= nb;
5270Sstevel@tonic-gate 		va += nb;
5280Sstevel@tonic-gate 	}
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
5310Sstevel@tonic-gate }
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate void
free_cirbuf(cirbuf_t * cb)5340Sstevel@tonic-gate free_cirbuf(cirbuf_t *cb)
5350Sstevel@tonic-gate {
5360Sstevel@tonic-gate 	buf_t	*bp;
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 	if (cb->cb_nb == 0)
5390Sstevel@tonic-gate 		return;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
5420Sstevel@tonic-gate 	ASSERT(cb->cb_dirty == NULL);
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	/*
5450Sstevel@tonic-gate 	 * free the active bufs
5460Sstevel@tonic-gate 	 */
5470Sstevel@tonic-gate 	while ((bp = cb->cb_bp) != NULL) {
5480Sstevel@tonic-gate 		if (bp == bp->b_forw)
5490Sstevel@tonic-gate 			cb->cb_bp = NULL;
5500Sstevel@tonic-gate 		else
5510Sstevel@tonic-gate 			cb->cb_bp = bp->b_forw;
5520Sstevel@tonic-gate 		bp->b_back->b_forw = bp->b_forw;
5530Sstevel@tonic-gate 		bp->b_forw->b_back = bp->b_back;
5540Sstevel@tonic-gate 		sema_destroy(&bp->b_sem);
5550Sstevel@tonic-gate 		sema_destroy(&bp->b_io);
5560Sstevel@tonic-gate 		kmem_free(bp, sizeof (buf_t));
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate 	/*
5600Sstevel@tonic-gate 	 * free the free bufs
5610Sstevel@tonic-gate 	 */
5620Sstevel@tonic-gate 	while ((bp = cb->cb_free) != NULL) {
5630Sstevel@tonic-gate 		cb->cb_free = bp->b_forw;
5640Sstevel@tonic-gate 		sema_destroy(&bp->b_sem);
5650Sstevel@tonic-gate 		sema_destroy(&bp->b_io);
5660Sstevel@tonic-gate 		kmem_free(bp, sizeof (buf_t));
5670Sstevel@tonic-gate 	}
5680Sstevel@tonic-gate 	kmem_free(cb->cb_va, cb->cb_nb);
5690Sstevel@tonic-gate 	cb->cb_va = NULL;
5700Sstevel@tonic-gate 	cb->cb_nb = 0;
5710Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
5720Sstevel@tonic-gate 	rw_destroy(&cb->cb_rwlock);
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate static int
within_range(off_t lof,daddr_t blkno,ulong_t bcount)5760Sstevel@tonic-gate within_range(off_t lof, daddr_t blkno, ulong_t bcount)
5770Sstevel@tonic-gate {
5780Sstevel@tonic-gate 	off_t	blof	= dbtob(blkno);
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	return ((lof >= blof) && (lof < (blof + bcount)));
5810Sstevel@tonic-gate }
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate static buf_t *
find_bp(ml_unit_t * ul,cirbuf_t * cb,off_t lof)5840Sstevel@tonic-gate find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
5850Sstevel@tonic-gate {
5860Sstevel@tonic-gate 	buf_t *bp;
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate 	/*
5890Sstevel@tonic-gate 	 * find a buf that contains the offset lof
5900Sstevel@tonic-gate 	 */
5910Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_READER);
5920Sstevel@tonic-gate 	bp = cb->cb_bp;
5930Sstevel@tonic-gate 	do {
5940Sstevel@tonic-gate 		if (bp->b_bcount &&
5950Sstevel@tonic-gate 		    within_range(lof, bp->b_blkno, bp->b_bcount)) {
5960Sstevel@tonic-gate 			makebusy(ul, bp);
5970Sstevel@tonic-gate 			rw_exit(&cb->cb_rwlock);
5980Sstevel@tonic-gate 			return (bp);
5990Sstevel@tonic-gate 		}
6000Sstevel@tonic-gate 		bp = bp->b_forw;
6010Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
6020Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	return (NULL);
6050Sstevel@tonic-gate }
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate static off_t
find_read_lof(ml_unit_t * ul,cirbuf_t * cb,off_t lof)6080Sstevel@tonic-gate find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
6090Sstevel@tonic-gate {
6100Sstevel@tonic-gate 	buf_t	*bp, *bpend;
6110Sstevel@tonic-gate 	off_t	rlof;
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 	/*
6140Sstevel@tonic-gate 	 * we mustn't:
6150Sstevel@tonic-gate 	 *	o read past eol
6160Sstevel@tonic-gate 	 *	o read past the tail
6170Sstevel@tonic-gate 	 *	o read data that may be being written.
6180Sstevel@tonic-gate 	 */
6190Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_READER);
6200Sstevel@tonic-gate 	bpend = bp = cb->cb_bp->b_forw;
6210Sstevel@tonic-gate 	rlof = ul->un_tail_lof;
6220Sstevel@tonic-gate 	do {
6230Sstevel@tonic-gate 		if (bp->b_bcount) {
6240Sstevel@tonic-gate 			rlof = dbtob(bp->b_blkno);
6250Sstevel@tonic-gate 			break;
6260Sstevel@tonic-gate 		}
6270Sstevel@tonic-gate 		bp = bp->b_forw;
6280Sstevel@tonic-gate 	} while (bp != bpend);
6290Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	if (lof <= rlof)
6320Sstevel@tonic-gate 		/* lof is prior to the range represented by the write buf */
6330Sstevel@tonic-gate 		return (rlof);
6340Sstevel@tonic-gate 	else
6350Sstevel@tonic-gate 		/* lof follows the range represented by the write buf */
6360Sstevel@tonic-gate 		return ((off_t)ul->un_eol_lof);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate static buf_t *
get_read_bp(ml_unit_t * ul,off_t lof)6400Sstevel@tonic-gate get_read_bp(ml_unit_t *ul, off_t lof)
6410Sstevel@tonic-gate {
6420Sstevel@tonic-gate 	cirbuf_t	*cb;
6430Sstevel@tonic-gate 	buf_t		*bp;
6440Sstevel@tonic-gate 	off_t		rlof;
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	/*
6470Sstevel@tonic-gate 	 * retrieve as much data as possible from the incore buffers
6480Sstevel@tonic-gate 	 */
6490Sstevel@tonic-gate 	if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) {
6500Sstevel@tonic-gate 		logstats.ls_lreadsinmem.value.ui64++;
6510Sstevel@tonic-gate 		return (bp);
6520Sstevel@tonic-gate 	}
6530Sstevel@tonic-gate 	if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) {
6540Sstevel@tonic-gate 		logstats.ls_lreadsinmem.value.ui64++;
6550Sstevel@tonic-gate 		return (bp);
6560Sstevel@tonic-gate 	}
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 	/*
6590Sstevel@tonic-gate 	 * steal the LRU buf
6600Sstevel@tonic-gate 	 */
6610Sstevel@tonic-gate 	cb = &ul->un_rdbuf;
6620Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
6630Sstevel@tonic-gate 	bp = cb->cb_bp->b_forw;
6640Sstevel@tonic-gate 	makebusy(ul, bp);
6650Sstevel@tonic-gate 	bp->b_flags = 0;
6660Sstevel@tonic-gate 	bp->b_bcount = 0;
6670Sstevel@tonic-gate 	cb->cb_bp = bp;
6680Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	/*
6710Sstevel@tonic-gate 	 * don't read past the tail or the end-of-log
6720Sstevel@tonic-gate 	 */
6730Sstevel@tonic-gate 	bp->b_blkno = btodb(lof);
6740Sstevel@tonic-gate 	lof = dbtob(bp->b_blkno);
6750Sstevel@tonic-gate 	rlof = find_read_lof(ul, &ul->un_wrbuf, lof);
6760Sstevel@tonic-gate 	bp->b_bcount = MIN(bp->b_bufsize, rlof - lof);
6770Sstevel@tonic-gate 	readlog(ul, bp);
6780Sstevel@tonic-gate 	return (bp);
6790Sstevel@tonic-gate }
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate /*
6820Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
6830Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
6840Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
6850Sstevel@tonic-gate  * the bp makebusy lock.
6860Sstevel@tonic-gate  */
6870Sstevel@tonic-gate static int
extend_write_bp(ml_unit_t * ul,cirbuf_t * cb,buf_t * bp)6880Sstevel@tonic-gate extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp)
6890Sstevel@tonic-gate {
6900Sstevel@tonic-gate 	buf_t	*bpforw	= bp->b_forw;
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	/*
6950Sstevel@tonic-gate 	 * there is no `next' bp; do nothing
6960Sstevel@tonic-gate 	 */
6970Sstevel@tonic-gate 	if (bpforw == bp)
6980Sstevel@tonic-gate 		return (0);
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 	/*
7010Sstevel@tonic-gate 	 * buffer space is not adjacent; do nothing
7020Sstevel@tonic-gate 	 */
7030Sstevel@tonic-gate 	if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr)
7040Sstevel@tonic-gate 		return (0);
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	/*
7070Sstevel@tonic-gate 	 * locking protocol requires giving up any bp locks before
7080Sstevel@tonic-gate 	 * acquiring cb_rwlock.  This is okay because we hold
7090Sstevel@tonic-gate 	 * un_log_mutex.
7100Sstevel@tonic-gate 	 */
7110Sstevel@tonic-gate 	sema_v(&bp->b_sem);
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	/*
7140Sstevel@tonic-gate 	 * lock out ldl_read
7150Sstevel@tonic-gate 	 */
7160Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 	/*
7190Sstevel@tonic-gate 	 * wait for current IO to finish w/next bp; if necessary
7200Sstevel@tonic-gate 	 */
7210Sstevel@tonic-gate 	makebusy(ul, bpforw);
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate 	/*
7240Sstevel@tonic-gate 	 * free the next bp and steal its space
7250Sstevel@tonic-gate 	 */
7260Sstevel@tonic-gate 	bp->b_forw = bpforw->b_forw;
7270Sstevel@tonic-gate 	bpforw->b_forw->b_back = bp;
7280Sstevel@tonic-gate 	bp->b_bufsize += bpforw->b_bufsize;
7290Sstevel@tonic-gate 	sema_v(&bpforw->b_sem);
7300Sstevel@tonic-gate 	bpforw->b_forw = cb->cb_free;
7310Sstevel@tonic-gate 	cb->cb_free = bpforw;
7320Sstevel@tonic-gate 	makebusy(ul, bp);
7330Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
7340Sstevel@tonic-gate 
7350Sstevel@tonic-gate 	return (1);
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate static size_t
storebuf(ml_unit_t * ul,buf_t * bp,caddr_t va,size_t nb)7390Sstevel@tonic-gate storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb)
7400Sstevel@tonic-gate {
7410Sstevel@tonic-gate 	size_t		copy_nb;
7420Sstevel@tonic-gate 	size_t		nb_in_sec;
7430Sstevel@tonic-gate 	sect_trailer_t	*st;
7440Sstevel@tonic-gate 	size_t		nb_left = nb;
7450Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
7460Sstevel@tonic-gate 
7470Sstevel@tonic-gate again:
7480Sstevel@tonic-gate 	nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount);
7490Sstevel@tonic-gate 	copy_nb = MIN(nb_left, nb_in_sec);
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 	ASSERT(copy_nb);
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate 	bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb);
7540Sstevel@tonic-gate 	bp->b_bcount += copy_nb;
7550Sstevel@tonic-gate 	va += copy_nb;
7560Sstevel@tonic-gate 	nb_left -= copy_nb;
7570Sstevel@tonic-gate 	ul->un_tail_lof += copy_nb;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	if ((nb_in_sec -= copy_nb) == 0) {
7600Sstevel@tonic-gate 		st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount);
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 		st->st_tid = ul->un_logmap->mtm_tid;
7630Sstevel@tonic-gate 		st->st_ident = ul->un_tail_ident++;
7640Sstevel@tonic-gate 		bp->b_bcount += sizeof (sect_trailer_t);
7650Sstevel@tonic-gate 		ul->un_tail_lof += sizeof (sect_trailer_t);
7660Sstevel@tonic-gate 		/*
7670Sstevel@tonic-gate 		 * log wrapped; async write this bp
7680Sstevel@tonic-gate 		 */
7690Sstevel@tonic-gate 		if (ul->un_tail_lof == ul->un_eol_lof) {
7700Sstevel@tonic-gate 			ul->un_tail_lof = ul->un_bol_lof;
7710Sstevel@tonic-gate 			push_dirty_bp(ul, bp);
7720Sstevel@tonic-gate 			return (nb - nb_left);
7730Sstevel@tonic-gate 		}
7740Sstevel@tonic-gate 		/*
7750Sstevel@tonic-gate 		 * out of bp space; get more or async write buf
7760Sstevel@tonic-gate 		 */
7770Sstevel@tonic-gate 		if (bp->b_bcount == bp->b_bufsize) {
7780Sstevel@tonic-gate 			if (!extend_write_bp(ul, cb, bp)) {
7790Sstevel@tonic-gate 				push_dirty_bp(ul, bp);
7800Sstevel@tonic-gate 				return (nb - nb_left);
7810Sstevel@tonic-gate 			}
7820Sstevel@tonic-gate 		}
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate 	if (nb_left)
7850Sstevel@tonic-gate 		goto again;
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 	sema_v(&bp->b_sem);
7880Sstevel@tonic-gate 	return (nb);
7890Sstevel@tonic-gate }
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate static void
fetchzeroes(caddr_t dst_va,offset_t dst_mof,ulong_t dst_nb,mapentry_t * me)7920Sstevel@tonic-gate fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me)
7930Sstevel@tonic-gate {
7940Sstevel@tonic-gate 	offset_t	src_mof	= me->me_mof;
7950Sstevel@tonic-gate 	size_t		src_nb	= me->me_nb;
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	if (src_mof > dst_mof) {
7980Sstevel@tonic-gate 		ASSERT(src_mof < (dst_mof + dst_nb));
7990Sstevel@tonic-gate 		dst_va += (src_mof - dst_mof);
8000Sstevel@tonic-gate 		dst_nb -= (src_mof - dst_mof);
8010Sstevel@tonic-gate 	} else {
8020Sstevel@tonic-gate 		ASSERT(dst_mof < (src_mof + src_nb));
8030Sstevel@tonic-gate 		src_nb -= (dst_mof - src_mof);
8040Sstevel@tonic-gate 	}
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 	src_nb = MIN(src_nb, dst_nb);
8070Sstevel@tonic-gate 	ASSERT(src_nb);
8080Sstevel@tonic-gate 	bzero(dst_va, src_nb);
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate /*
8120Sstevel@tonic-gate  * dst_va == NULL means don't copy anything
8130Sstevel@tonic-gate  */
8140Sstevel@tonic-gate static ulong_t
fetchbuf(ml_unit_t * ul,buf_t * bp,caddr_t dst_va,size_t dst_nb,off_t * dst_lofp)8150Sstevel@tonic-gate fetchbuf(
8160Sstevel@tonic-gate 	ml_unit_t *ul,
8170Sstevel@tonic-gate 	buf_t *bp,
8180Sstevel@tonic-gate 	caddr_t dst_va,
8190Sstevel@tonic-gate 	size_t dst_nb,
8200Sstevel@tonic-gate 	off_t *dst_lofp)
8210Sstevel@tonic-gate {
8220Sstevel@tonic-gate 	caddr_t	copy_va;
8230Sstevel@tonic-gate 	size_t	copy_nb;
8240Sstevel@tonic-gate 	size_t	nb_sec;
8250Sstevel@tonic-gate 	off_t	dst_lof		= *dst_lofp;
8260Sstevel@tonic-gate 	ulong_t	sav_dst_nb	= dst_nb;
8270Sstevel@tonic-gate 	ulong_t	src_nb		= bp->b_bcount;
8280Sstevel@tonic-gate 	off_t	src_lof		= dbtob(bp->b_blkno);
8290Sstevel@tonic-gate 	off_t	src_elof	= src_lof + src_nb;
8300Sstevel@tonic-gate 	caddr_t	src_va		= bp->b_un.b_addr;
8310Sstevel@tonic-gate 
8320Sstevel@tonic-gate 	/*
8330Sstevel@tonic-gate 	 * copy from bp to dst_va
8340Sstevel@tonic-gate 	 */
8350Sstevel@tonic-gate 	while (dst_nb) {
8360Sstevel@tonic-gate 		/*
8370Sstevel@tonic-gate 		 * compute address within bp
8380Sstevel@tonic-gate 		 */
8390Sstevel@tonic-gate 		copy_va = src_va + (dst_lof - src_lof);
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 		/*
8420Sstevel@tonic-gate 		 * adjust copy size to amount of data in bp
8430Sstevel@tonic-gate 		 */
8440Sstevel@tonic-gate 		copy_nb = MIN(dst_nb, src_elof - dst_lof);
8450Sstevel@tonic-gate 
8460Sstevel@tonic-gate 		/*
8470Sstevel@tonic-gate 		 * adjust copy size to amount of data in sector
8480Sstevel@tonic-gate 		 */
8490Sstevel@tonic-gate 		nb_sec = NB_LEFT_IN_SECTOR(dst_lof);
8500Sstevel@tonic-gate 		copy_nb = MIN(copy_nb, nb_sec);
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 		/*
8530Sstevel@tonic-gate 		 * dst_va == NULL means don't do copy (see logseek())
8540Sstevel@tonic-gate 		 */
8550Sstevel@tonic-gate 		if (dst_va) {
8560Sstevel@tonic-gate 			bcopy(copy_va, dst_va, copy_nb);
8570Sstevel@tonic-gate 			dst_va += copy_nb;
8580Sstevel@tonic-gate 		}
8590Sstevel@tonic-gate 		dst_lof += copy_nb;
8600Sstevel@tonic-gate 		dst_nb -= copy_nb;
8610Sstevel@tonic-gate 		nb_sec -= copy_nb;
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 		/*
8640Sstevel@tonic-gate 		 * advance over sector trailer
8650Sstevel@tonic-gate 		 */
8660Sstevel@tonic-gate 		if (nb_sec == 0)
8670Sstevel@tonic-gate 			dst_lof += sizeof (sect_trailer_t);
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 		/*
8700Sstevel@tonic-gate 		 * exhausted buffer
8710Sstevel@tonic-gate 		 *	return current lof for next read
8720Sstevel@tonic-gate 		 */
8730Sstevel@tonic-gate 		if (dst_lof == src_elof) {
8740Sstevel@tonic-gate 			sema_v(&bp->b_sem);
8750Sstevel@tonic-gate 			if (dst_lof == ul->un_eol_lof)
8760Sstevel@tonic-gate 				dst_lof = ul->un_bol_lof;
8770Sstevel@tonic-gate 			*dst_lofp = dst_lof;
8780Sstevel@tonic-gate 			return (sav_dst_nb - dst_nb);
8790Sstevel@tonic-gate 		}
8800Sstevel@tonic-gate 	}
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 	/*
8830Sstevel@tonic-gate 	 * copy complete - return current lof
8840Sstevel@tonic-gate 	 */
8850Sstevel@tonic-gate 	sema_v(&bp->b_sem);
8860Sstevel@tonic-gate 	*dst_lofp = dst_lof;
8870Sstevel@tonic-gate 	return (sav_dst_nb);
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate void
ldl_round_commit(ml_unit_t * ul)8910Sstevel@tonic-gate ldl_round_commit(ml_unit_t *ul)
8920Sstevel@tonic-gate {
8930Sstevel@tonic-gate 	int		wrapped;
8940Sstevel@tonic-gate 	buf_t		*bp;
8950Sstevel@tonic-gate 	sect_trailer_t	*st;
8960Sstevel@tonic-gate 	size_t		bcount;
8970Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	/*
9000Sstevel@tonic-gate 	 * if nothing to write; then do nothing
9010Sstevel@tonic-gate 	 */
9020Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) == NULL)
9030Sstevel@tonic-gate 		return;
9040Sstevel@tonic-gate 	makebusy(ul, bp);
9050Sstevel@tonic-gate 
9060Sstevel@tonic-gate 	/*
9070Sstevel@tonic-gate 	 * round up to sector boundary and set new tail
9080Sstevel@tonic-gate 	 *	don't readjust st_ident if buf is already rounded
9090Sstevel@tonic-gate 	 */
9100Sstevel@tonic-gate 	bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE);
9110Sstevel@tonic-gate 	if (bcount == bp->b_bcount) {
9120Sstevel@tonic-gate 		sema_v(&bp->b_sem);
9130Sstevel@tonic-gate 		return;
9140Sstevel@tonic-gate 	}
9150Sstevel@tonic-gate 	bp->b_bcount = bcount;
9160Sstevel@tonic-gate 	ul->un_tail_lof = dbtob(bp->b_blkno) + bcount;
9170Sstevel@tonic-gate 	wrapped = 0;
9180Sstevel@tonic-gate 	if (ul->un_tail_lof == ul->un_eol_lof) {
9190Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_bol_lof;
9200Sstevel@tonic-gate 		++wrapped;
9210Sstevel@tonic-gate 	}
9220Sstevel@tonic-gate 	ASSERT(ul->un_tail_lof != ul->un_head_lof);
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 	/*
9250Sstevel@tonic-gate 	 * fix up the sector trailer
9260Sstevel@tonic-gate 	 */
9270Sstevel@tonic-gate 	/* LINTED */
9280Sstevel@tonic-gate 	st = (sect_trailer_t *)
9294662Sfrankho 	    ((bp->b_un.b_addr + bcount) - sizeof (*st));
9300Sstevel@tonic-gate 	st->st_tid = ul->un_logmap->mtm_tid;
9310Sstevel@tonic-gate 	st->st_ident = ul->un_tail_ident++;
9320Sstevel@tonic-gate 
9330Sstevel@tonic-gate 	/*
9340Sstevel@tonic-gate 	 * if tail wrapped or we have exhausted this buffer
9350Sstevel@tonic-gate 	 *	async write the buffer
9360Sstevel@tonic-gate 	 */
9370Sstevel@tonic-gate 	if (wrapped || bcount == bp->b_bufsize)
9380Sstevel@tonic-gate 		push_dirty_bp(ul, bp);
9390Sstevel@tonic-gate 	else
9400Sstevel@tonic-gate 		sema_v(&bp->b_sem);
9410Sstevel@tonic-gate }
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate void
ldl_push_commit(ml_unit_t * ul)9440Sstevel@tonic-gate ldl_push_commit(ml_unit_t *ul)
9450Sstevel@tonic-gate {
9460Sstevel@tonic-gate 	buf_t		*bp;
9470Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	/*
9500Sstevel@tonic-gate 	 * if nothing to write; then do nothing
9510Sstevel@tonic-gate 	 */
9520Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) == NULL)
9530Sstevel@tonic-gate 		return;
9540Sstevel@tonic-gate 	makebusy(ul, bp);
9550Sstevel@tonic-gate 	push_dirty_bp(ul, bp);
9560Sstevel@tonic-gate }
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate int
ldl_need_commit(ml_unit_t * ul)9590Sstevel@tonic-gate ldl_need_commit(ml_unit_t *ul)
9600Sstevel@tonic-gate {
9610Sstevel@tonic-gate 	return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2)));
9620Sstevel@tonic-gate }
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate int
ldl_has_space(ml_unit_t * ul,mapentry_t * me)9650Sstevel@tonic-gate ldl_has_space(ml_unit_t *ul, mapentry_t *me)
9660Sstevel@tonic-gate {
9670Sstevel@tonic-gate 	off_t	nfb;
9680Sstevel@tonic-gate 	off_t	nb;
9690Sstevel@tonic-gate 
9700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
9710Sstevel@tonic-gate 
9720Sstevel@tonic-gate 	/*
9730Sstevel@tonic-gate 	 * Add up the size used by the deltas
9740Sstevel@tonic-gate 	 * round nb up to a sector length plus an extra sector
9750Sstevel@tonic-gate 	 *	w/o the extra sector we couldn't distinguish
9760Sstevel@tonic-gate 	 *	a full log (head == tail) from an empty log (head == tail)
9770Sstevel@tonic-gate 	 */
9780Sstevel@tonic-gate 	for (nb = DEV_BSIZE; me; me = me->me_hash) {
9790Sstevel@tonic-gate 		nb += sizeof (struct delta);
9800Sstevel@tonic-gate 		if (me->me_dt != DT_CANCEL)
9810Sstevel@tonic-gate 			nb += me->me_nb;
9820Sstevel@tonic-gate 	}
9830Sstevel@tonic-gate 	nb = P2ROUNDUP(nb, DEV_BSIZE);
9840Sstevel@tonic-gate 
9850Sstevel@tonic-gate 	if (ul->un_head_lof <= ul->un_tail_lof)
9860Sstevel@tonic-gate 		nfb = (ul->un_head_lof - ul->un_bol_lof) +
9874662Sfrankho 		    (ul->un_eol_lof - ul->un_tail_lof);
9880Sstevel@tonic-gate 	else
9890Sstevel@tonic-gate 		nfb = ul->un_head_lof - ul->un_tail_lof;
9900Sstevel@tonic-gate 
9910Sstevel@tonic-gate 	return (nb < nfb);
9920Sstevel@tonic-gate }
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate void
ldl_write(ml_unit_t * ul,caddr_t bufp,offset_t bufmof,struct mapentry * me)9950Sstevel@tonic-gate ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me)
9960Sstevel@tonic-gate {
9970Sstevel@tonic-gate 	buf_t		*bp;
9980Sstevel@tonic-gate 	caddr_t		va;
9990Sstevel@tonic-gate 	size_t		nb;
10000Sstevel@tonic-gate 	size_t		actual;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
10030Sstevel@tonic-gate 
10040Sstevel@tonic-gate 	/* Write the delta */
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	nb = sizeof (struct delta);
10070Sstevel@tonic-gate 	va = (caddr_t)&me->me_delta;
10080Sstevel@tonic-gate 	bp = get_write_bp(ul);
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 	while (nb) {
10110Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
10120Sstevel@tonic-gate 			sema_v(&bp->b_sem);
10130Sstevel@tonic-gate 			return;
10140Sstevel@tonic-gate 		}
10150Sstevel@tonic-gate 		actual = storebuf(ul, bp, va, nb);
10160Sstevel@tonic-gate 		ASSERT(actual);
10170Sstevel@tonic-gate 		va += actual;
10180Sstevel@tonic-gate 		nb -= actual;
10190Sstevel@tonic-gate 		if (nb)
10200Sstevel@tonic-gate 			bp = get_write_bp(ul);
10210Sstevel@tonic-gate 	}
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	/* If a commit, cancel, or 0's; we're almost done */
10240Sstevel@tonic-gate 	switch (me->me_dt) {
10250Sstevel@tonic-gate 		case DT_COMMIT:
10260Sstevel@tonic-gate 		case DT_CANCEL:
10270Sstevel@tonic-gate 		case DT_ABZERO:
10280Sstevel@tonic-gate 			/* roll needs to know where the next delta will go */
10290Sstevel@tonic-gate 			me->me_lof = ul->un_tail_lof;
10300Sstevel@tonic-gate 			return;
10310Sstevel@tonic-gate 		default:
10320Sstevel@tonic-gate 			break;
10330Sstevel@tonic-gate 	}
10340Sstevel@tonic-gate 
10350Sstevel@tonic-gate 	/* Now write the data */
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	ASSERT(me->me_nb != 0);
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	nb = me->me_nb;
10400Sstevel@tonic-gate 	va = (me->me_mof - bufmof) + bufp;
10410Sstevel@tonic-gate 	bp = get_write_bp(ul);
10420Sstevel@tonic-gate 
10430Sstevel@tonic-gate 	/* Save where we will put the data */
10440Sstevel@tonic-gate 	me->me_lof = ul->un_tail_lof;
10450Sstevel@tonic-gate 
10460Sstevel@tonic-gate 	while (nb) {
10470Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
10480Sstevel@tonic-gate 			sema_v(&bp->b_sem);
10490Sstevel@tonic-gate 			return;
10500Sstevel@tonic-gate 		}
10510Sstevel@tonic-gate 		actual = storebuf(ul, bp, va, nb);
10520Sstevel@tonic-gate 		ASSERT(actual);
10530Sstevel@tonic-gate 		va += actual;
10540Sstevel@tonic-gate 		nb -= actual;
10550Sstevel@tonic-gate 		if (nb)
10560Sstevel@tonic-gate 			bp = get_write_bp(ul);
10570Sstevel@tonic-gate 	}
10580Sstevel@tonic-gate }
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate void
ldl_waito(ml_unit_t * ul)10610Sstevel@tonic-gate ldl_waito(ml_unit_t *ul)
10620Sstevel@tonic-gate {
10630Sstevel@tonic-gate 	buf_t		*bp;
10640Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
10670Sstevel@tonic-gate 	/*
10680Sstevel@tonic-gate 	 * wait on them
10690Sstevel@tonic-gate 	 */
10700Sstevel@tonic-gate 	bp = cb->cb_bp;
10710Sstevel@tonic-gate 	do {
10720Sstevel@tonic-gate 		if ((bp->b_flags & B_DONE) == 0) {
10730Sstevel@tonic-gate 			makebusy(ul, bp);
10740Sstevel@tonic-gate 			sema_v(&bp->b_sem);
10750Sstevel@tonic-gate 		}
10760Sstevel@tonic-gate 		bp = bp->b_forw;
10770Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
10780Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
10790Sstevel@tonic-gate }
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate /*
10820Sstevel@tonic-gate  * seek nb bytes from location lof
10830Sstevel@tonic-gate  */
10840Sstevel@tonic-gate static int
logseek(ml_unit_t * ul,off_t lof,size_t nb,off_t * lofp)10850Sstevel@tonic-gate logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp)
10860Sstevel@tonic-gate {
10870Sstevel@tonic-gate 	buf_t	*bp;
10880Sstevel@tonic-gate 	ulong_t	actual;
10890Sstevel@tonic-gate 
10900Sstevel@tonic-gate 	while (nb) {
10910Sstevel@tonic-gate 		bp = get_read_bp(ul, lof);
10920Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
10930Sstevel@tonic-gate 			sema_v(&bp->b_sem);
10940Sstevel@tonic-gate 			return (EIO);
10950Sstevel@tonic-gate 		}
10960Sstevel@tonic-gate 		actual = fetchbuf(ul, bp, NULL, nb, &lof);
10970Sstevel@tonic-gate 		ASSERT(actual);
10980Sstevel@tonic-gate 		nb -= actual;
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 	*lofp = lof;
11010Sstevel@tonic-gate 	ASSERT(nb == 0);
11020Sstevel@tonic-gate 	return (0);
11030Sstevel@tonic-gate }
11040Sstevel@tonic-gate 
11050Sstevel@tonic-gate int
ldl_read(ml_unit_t * ul,caddr_t va,offset_t mof,off_t nb,mapentry_t * me)11060Sstevel@tonic-gate ldl_read(
11070Sstevel@tonic-gate 	ml_unit_t *ul,		/* Log unit */
11080Sstevel@tonic-gate 	caddr_t va,		/* address of buffer to read into */
11090Sstevel@tonic-gate 	offset_t mof,		/* mof of buffer */
11100Sstevel@tonic-gate 	off_t nb,		/* length of buffer */
11110Sstevel@tonic-gate 	mapentry_t *me)		/* Map entry list */
11120Sstevel@tonic-gate {
11130Sstevel@tonic-gate 	buf_t	*bp;
11140Sstevel@tonic-gate 	crb_t   *crb;
11150Sstevel@tonic-gate 	caddr_t	rva;			/* address to read into */
11160Sstevel@tonic-gate 	size_t	rnb;			/* # of bytes to read */
11170Sstevel@tonic-gate 	off_t	lof;			/* log device offset to read from */
11180Sstevel@tonic-gate 	off_t   skip;
11190Sstevel@tonic-gate 	ulong_t	actual;
11200Sstevel@tonic-gate 	int	error;
11210Sstevel@tonic-gate 	caddr_t	eva	= va + nb;	/* end of buffer */
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	for (; me; me = me->me_agenext) {
11240Sstevel@tonic-gate 		ASSERT(me->me_dt != DT_CANCEL);
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate 		/*
11270Sstevel@tonic-gate 		 * check for an cached roll buffer
11280Sstevel@tonic-gate 		 */
11290Sstevel@tonic-gate 		crb = me->me_crb;
11300Sstevel@tonic-gate 		if (crb) {
11310Sstevel@tonic-gate 			if (mof > crb->c_mof) {
11320Sstevel@tonic-gate 				/*
11330Sstevel@tonic-gate 				 * This mapentry overlaps with the beginning of
11340Sstevel@tonic-gate 				 * the supplied buffer
11350Sstevel@tonic-gate 				 */
11360Sstevel@tonic-gate 				skip = mof - crb->c_mof;
11370Sstevel@tonic-gate 				bcopy(crb->c_buf + skip, va,
11380Sstevel@tonic-gate 				    MIN(nb, crb->c_nb - skip));
11390Sstevel@tonic-gate 			} else {
11400Sstevel@tonic-gate 				/*
11410Sstevel@tonic-gate 				 * This mapentry starts at or after
11420Sstevel@tonic-gate 				 * the supplied buffer.
11430Sstevel@tonic-gate 				 */
11440Sstevel@tonic-gate 				skip = crb->c_mof - mof;
11450Sstevel@tonic-gate 				bcopy(crb->c_buf, va + skip,
11460Sstevel@tonic-gate 				    MIN(crb->c_nb, nb - skip));
11470Sstevel@tonic-gate 			}
11480Sstevel@tonic-gate 			logstats.ls_lreadsinmem.value.ui64++;
11490Sstevel@tonic-gate 			continue;
11500Sstevel@tonic-gate 		}
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 		/*
11530Sstevel@tonic-gate 		 * check for a delta full of zeroes - there's no log data
11540Sstevel@tonic-gate 		 */
11550Sstevel@tonic-gate 		if (me->me_dt == DT_ABZERO) {
11560Sstevel@tonic-gate 			fetchzeroes(va, mof, nb, me);
11570Sstevel@tonic-gate 			continue;
11580Sstevel@tonic-gate 		}
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate 		if (mof > me->me_mof) {
11610Sstevel@tonic-gate 			rnb = (size_t)(mof - me->me_mof);
11620Sstevel@tonic-gate 			error = logseek(ul, me->me_lof, rnb, &lof);
11630Sstevel@tonic-gate 			if (error)
11640Sstevel@tonic-gate 				return (EIO);
11650Sstevel@tonic-gate 			rva = va;
11660Sstevel@tonic-gate 			rnb = me->me_nb - rnb;
11670Sstevel@tonic-gate 			rnb = ((rva + rnb) > eva) ? eva - rva : rnb;
11680Sstevel@tonic-gate 		} else {
11690Sstevel@tonic-gate 			lof = me->me_lof;
11700Sstevel@tonic-gate 			rva = (me->me_mof - mof) + va;
11710Sstevel@tonic-gate 			rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb;
11720Sstevel@tonic-gate 		}
11730Sstevel@tonic-gate 
11740Sstevel@tonic-gate 		while (rnb) {
11750Sstevel@tonic-gate 			bp = get_read_bp(ul, lof);
11760Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
11770Sstevel@tonic-gate 				sema_v(&bp->b_sem);
11780Sstevel@tonic-gate 				return (EIO);
11790Sstevel@tonic-gate 			}
11800Sstevel@tonic-gate 			ASSERT(((me->me_flags & ME_ROLL) == 0) ||
11814662Sfrankho 			    (bp != ul->un_wrbuf.cb_dirty));
11820Sstevel@tonic-gate 			actual = fetchbuf(ul, bp, rva, rnb, &lof);
11830Sstevel@tonic-gate 			ASSERT(actual);
11840Sstevel@tonic-gate 			rva += actual;
11850Sstevel@tonic-gate 			rnb -= actual;
11860Sstevel@tonic-gate 		}
11870Sstevel@tonic-gate 	}
11880Sstevel@tonic-gate 	return (0);
11890Sstevel@tonic-gate }
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate void
ldl_savestate(ml_unit_t * ul)11920Sstevel@tonic-gate ldl_savestate(ml_unit_t *ul)
11930Sstevel@tonic-gate {
11940Sstevel@tonic-gate 	int		error;
11950Sstevel@tonic-gate 	buf_t		*bp	= ul->un_bp;
11960Sstevel@tonic-gate 	ml_odunit_t	*ud	= (void *)bp->b_un.b_addr;
11970Sstevel@tonic-gate 	ml_odunit_t	*ud2	= (void *)(bp->b_un.b_addr + DEV_BSIZE);
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate #if	DEBUG
12000Sstevel@tonic-gate 	/*
12010Sstevel@tonic-gate 	 * Scan test is running; don't update intermediate state
12020Sstevel@tonic-gate 	 */
12030Sstevel@tonic-gate 	if (ul->un_logmap && ul->un_logmap->mtm_trimlof)
12040Sstevel@tonic-gate 		return;
12050Sstevel@tonic-gate #endif	/* DEBUG */
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate 	mutex_enter(&ul->un_state_mutex);
12080Sstevel@tonic-gate 	bcopy(&ul->un_ondisk, ud, sizeof (*ud));
12090Sstevel@tonic-gate 	ud->od_chksum = ud->od_head_ident + ud->od_tail_ident;
12100Sstevel@tonic-gate 	bcopy(ud, ud2, sizeof (*ud));
12110Sstevel@tonic-gate 
12120Sstevel@tonic-gate 	/* If a snapshot is enabled write through the shapshot driver. */
12130Sstevel@tonic-gate 	if (ul->un_ufsvfs->vfs_snapshot)
12140Sstevel@tonic-gate 		UFS_BWRITE2(ul->un_ufsvfs, bp);
12150Sstevel@tonic-gate 	else
12160Sstevel@tonic-gate 		BWRITE2(bp);
12170Sstevel@tonic-gate 	logstats.ls_ldlwrites.value.ui64++;
12180Sstevel@tonic-gate 	error = bp->b_flags & B_ERROR;
12190Sstevel@tonic-gate 	mutex_exit(&ul->un_state_mutex);
12200Sstevel@tonic-gate 	if (error)
12210Sstevel@tonic-gate 		ldl_seterror(ul, "Error writing ufs log state");
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate /*
12250Sstevel@tonic-gate  * The head will be set to (new_lof - header) since ldl_sethead is
12260Sstevel@tonic-gate  * called with the new_lof of the data portion of a delta.
12270Sstevel@tonic-gate  */
12280Sstevel@tonic-gate void
ldl_sethead(ml_unit_t * ul,off_t data_lof,uint32_t tid)12290Sstevel@tonic-gate ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid)
12300Sstevel@tonic-gate {
12310Sstevel@tonic-gate 	off_t		nb;
12320Sstevel@tonic-gate 	off_t		new_lof;
12330Sstevel@tonic-gate 	uint32_t	new_ident;
12340Sstevel@tonic-gate 	daddr_t		beg_blkno;
12350Sstevel@tonic-gate 	daddr_t		end_blkno;
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
12380Sstevel@tonic-gate 
12390Sstevel@tonic-gate 	if (data_lof == -1) {
12400Sstevel@tonic-gate 		/* log is empty */
12417455SWolfgang.Schremser@Sun.COM 		new_ident = lufs_hd_genid(ul);
12420Sstevel@tonic-gate 		new_lof = ul->un_tail_lof;
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	} else {
12450Sstevel@tonic-gate 		/* compute header's lof */
12460Sstevel@tonic-gate 		new_ident = ul->un_head_ident;
12470Sstevel@tonic-gate 		new_lof = data_lof - sizeof (struct delta);
12480Sstevel@tonic-gate 
12490Sstevel@tonic-gate 		/* whoops, header spans sectors; subtract out sector trailer */
12500Sstevel@tonic-gate 		if (btodb(new_lof) != btodb(data_lof))
12510Sstevel@tonic-gate 			new_lof -= sizeof (sect_trailer_t);
12520Sstevel@tonic-gate 
12530Sstevel@tonic-gate 		/* whoops, header wrapped the log; go to last sector */
12540Sstevel@tonic-gate 		if (new_lof < ul->un_bol_lof) {
12550Sstevel@tonic-gate 			/* sector offset */
12560Sstevel@tonic-gate 			new_lof -= dbtob(btodb(new_lof));
12570Sstevel@tonic-gate 			/* add to last sector's lof */
12580Sstevel@tonic-gate 			new_lof += (ul->un_eol_lof - DEV_BSIZE);
12590Sstevel@tonic-gate 		}
12600Sstevel@tonic-gate 		ul->un_head_tid = tid;
12610Sstevel@tonic-gate 	}
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	/*
12640Sstevel@tonic-gate 	 * check for nop
12650Sstevel@tonic-gate 	 */
12660Sstevel@tonic-gate 	if (new_lof == ul->un_head_lof)
12670Sstevel@tonic-gate 		return;
12680Sstevel@tonic-gate 
12690Sstevel@tonic-gate 	/*
12700Sstevel@tonic-gate 	 * invalidate the affected bufs and calculate new ident
12710Sstevel@tonic-gate 	 */
12720Sstevel@tonic-gate 	if (new_lof > ul->un_head_lof) {
12730Sstevel@tonic-gate 		nb = new_lof - ul->un_head_lof;
12740Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
12750Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
12760Sstevel@tonic-gate 
12770Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
12780Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
12790Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
12800Sstevel@tonic-gate 	} else {
12810Sstevel@tonic-gate 		nb = ul->un_eol_lof - ul->un_head_lof;
12820Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
12830Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
12840Sstevel@tonic-gate 
12850Sstevel@tonic-gate 		end_blkno = btodb(ul->un_eol_lof);
12860Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
12870Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
12880Sstevel@tonic-gate 
12890Sstevel@tonic-gate 		nb = new_lof - ul->un_bol_lof;
12900Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb);
12910Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb);
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
12940Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_bol_lof);
12950Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
12960Sstevel@tonic-gate 	}
12970Sstevel@tonic-gate 	/*
12980Sstevel@tonic-gate 	 * don't update the head if there has been an error
12990Sstevel@tonic-gate 	 */
13000Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
13010Sstevel@tonic-gate 		return;
13020Sstevel@tonic-gate 
13030Sstevel@tonic-gate 	/* Fix up the head and ident */
13040Sstevel@tonic-gate 	ASSERT(new_lof >= ul->un_bol_lof);
13050Sstevel@tonic-gate 	ul->un_head_lof = new_lof;
13060Sstevel@tonic-gate 	ul->un_head_ident = new_ident;
13070Sstevel@tonic-gate 	if (data_lof == -1) {
13080Sstevel@tonic-gate 		ul->un_tail_ident = ul->un_head_ident;
13090Sstevel@tonic-gate 	}
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 	/* Commit to the database */
13130Sstevel@tonic-gate 	ldl_savestate(ul);
13140Sstevel@tonic-gate 
13150Sstevel@tonic-gate 	ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) ||
13164662Sfrankho 	    ldl_sethead_debug(ul));
13170Sstevel@tonic-gate }
13180Sstevel@tonic-gate 
13190Sstevel@tonic-gate /*
13200Sstevel@tonic-gate  * The tail will be set to the sector following lof+nb
13210Sstevel@tonic-gate  *	lof + nb == size of the last delta + commit record
13220Sstevel@tonic-gate  *	this function is called once after the log scan has completed.
13230Sstevel@tonic-gate  */
13240Sstevel@tonic-gate void
ldl_settail(ml_unit_t * ul,off_t lof,size_t nb)13250Sstevel@tonic-gate ldl_settail(ml_unit_t *ul, off_t lof, size_t nb)
13260Sstevel@tonic-gate {
13270Sstevel@tonic-gate 	off_t		new_lof;
13280Sstevel@tonic-gate 	uint32_t	new_ident;
13290Sstevel@tonic-gate 	daddr_t		beg_blkno;
13300Sstevel@tonic-gate 	daddr_t		end_blkno;
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
13330Sstevel@tonic-gate 
13340Sstevel@tonic-gate 	if (lof == -1) {
13350Sstevel@tonic-gate 		ul->un_tail_lof = dbtob(btodb(ul->un_head_lof));
13360Sstevel@tonic-gate 		ul->un_head_lof = ul->un_tail_lof;
13377455SWolfgang.Schremser@Sun.COM 		ul->un_head_ident = lufs_hd_genid(ul);
13380Sstevel@tonic-gate 		ul->un_tail_ident = ul->un_head_ident;
13390Sstevel@tonic-gate 
13400Sstevel@tonic-gate 		/* Commit to the database */
13410Sstevel@tonic-gate 		ldl_savestate(ul);
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate 		return;
13440Sstevel@tonic-gate 	}
13450Sstevel@tonic-gate 
13460Sstevel@tonic-gate 	/*
13470Sstevel@tonic-gate 	 * new_lof is the offset of the sector following the last commit
13480Sstevel@tonic-gate 	 */
13490Sstevel@tonic-gate 	(void) logseek(ul, lof, nb, &new_lof);
13500Sstevel@tonic-gate 	ASSERT(new_lof != dbtob(btodb(ul->un_head_lof)));
13510Sstevel@tonic-gate 
13520Sstevel@tonic-gate 	/*
13530Sstevel@tonic-gate 	 * calculate new ident
13540Sstevel@tonic-gate 	 */
13550Sstevel@tonic-gate 	if (new_lof > ul->un_head_lof) {
13560Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
13570Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
13580Sstevel@tonic-gate 		new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
13590Sstevel@tonic-gate 	} else {
13600Sstevel@tonic-gate 		end_blkno = btodb(ul->un_eol_lof);
13610Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
13620Sstevel@tonic-gate 		new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
13630Sstevel@tonic-gate 
13640Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
13650Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_bol_lof);
13660Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
13670Sstevel@tonic-gate 	}
13680Sstevel@tonic-gate 
13690Sstevel@tonic-gate 	/* Fix up the tail and ident */
13700Sstevel@tonic-gate 	ul->un_tail_lof = new_lof;
13710Sstevel@tonic-gate 	ul->un_tail_ident = new_ident;
13720Sstevel@tonic-gate 
13730Sstevel@tonic-gate 	/* Commit to the database */
13740Sstevel@tonic-gate 	ldl_savestate(ul);
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate /*
13780Sstevel@tonic-gate  * LOGSCAN STUFF
13790Sstevel@tonic-gate  */
13800Sstevel@tonic-gate static int
ldl_logscan_ident(ml_unit_t * ul,buf_t * bp,off_t lof)13810Sstevel@tonic-gate ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof)
13820Sstevel@tonic-gate {
13830Sstevel@tonic-gate 	ulong_t		ident;
13840Sstevel@tonic-gate 	size_t		nblk, i;
13850Sstevel@tonic-gate 	sect_trailer_t	*st;
13860Sstevel@tonic-gate 
13870Sstevel@tonic-gate 	/*
13880Sstevel@tonic-gate 	 * compute ident for first sector in the buffer
13890Sstevel@tonic-gate 	 */
13900Sstevel@tonic-gate 	ident = ul->un_head_ident;
13910Sstevel@tonic-gate 	if (bp->b_blkno >= btodb(ul->un_head_lof)) {
13920Sstevel@tonic-gate 		ident += (bp->b_blkno - btodb(ul->un_head_lof));
13930Sstevel@tonic-gate 	} else {
13940Sstevel@tonic-gate 		ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof));
13950Sstevel@tonic-gate 		ident += (bp->b_blkno - btodb(ul->un_bol_lof));
13960Sstevel@tonic-gate 	}
13970Sstevel@tonic-gate 	/*
13980Sstevel@tonic-gate 	 * truncate the buffer down to the last valid sector
13990Sstevel@tonic-gate 	 */
14000Sstevel@tonic-gate 	nblk = btodb(bp->b_bcount);
14010Sstevel@tonic-gate 	bp->b_bcount = 0;
14020Sstevel@tonic-gate 	/* LINTED */
14030Sstevel@tonic-gate 	st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE);
14040Sstevel@tonic-gate 	for (i = 0; i < nblk; ++i) {
14050Sstevel@tonic-gate 		if (st->st_ident != ident)
14060Sstevel@tonic-gate 			break;
14070Sstevel@tonic-gate 
14080Sstevel@tonic-gate 		/* remember last valid tid for ldl_logscan_error() */
14090Sstevel@tonic-gate 		ul->un_tid = st->st_tid;
14100Sstevel@tonic-gate 
14110Sstevel@tonic-gate 		/* LINTED */
14120Sstevel@tonic-gate 		st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE);
14130Sstevel@tonic-gate 		++ident;
14140Sstevel@tonic-gate 		bp->b_bcount += DEV_BSIZE;
14150Sstevel@tonic-gate 	}
14160Sstevel@tonic-gate 	/*
14170Sstevel@tonic-gate 	 * make sure that lof is still within range
14180Sstevel@tonic-gate 	 */
14190Sstevel@tonic-gate 	return (within_range(lof, bp->b_blkno, bp->b_bcount));
14200Sstevel@tonic-gate }
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate ulong_t
ldl_logscan_nbcommit(off_t lof)14230Sstevel@tonic-gate ldl_logscan_nbcommit(off_t lof)
14240Sstevel@tonic-gate {
14250Sstevel@tonic-gate 	/*
14260Sstevel@tonic-gate 	 * lof is the offset following the commit header.  However,
14270Sstevel@tonic-gate 	 * if the commit header fell on the end-of-sector, then lof
14280Sstevel@tonic-gate 	 * has already been advanced to the beginning of the next
14290Sstevel@tonic-gate 	 * sector.  So do nothing.  Otherwise, return the remaining
14300Sstevel@tonic-gate 	 * bytes in the sector.
14310Sstevel@tonic-gate 	 */
14320Sstevel@tonic-gate 	if ((lof & (DEV_BSIZE - 1)) == 0)
14330Sstevel@tonic-gate 		return (0);
14340Sstevel@tonic-gate 	return (NB_LEFT_IN_SECTOR(lof));
14350Sstevel@tonic-gate }
14360Sstevel@tonic-gate 
14370Sstevel@tonic-gate int
ldl_logscan_read(ml_unit_t * ul,off_t * lofp,size_t nb,caddr_t va)14380Sstevel@tonic-gate ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va)
14390Sstevel@tonic-gate {
14400Sstevel@tonic-gate 	buf_t	*bp;
14410Sstevel@tonic-gate 	ulong_t	actual;
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 	ASSERT(ul->un_head_lof != ul->un_tail_lof);
14440Sstevel@tonic-gate 
14450Sstevel@tonic-gate 	/*
14460Sstevel@tonic-gate 	 * Check the log data doesn't go out of bounds
14470Sstevel@tonic-gate 	 */
14480Sstevel@tonic-gate 	if (ul->un_head_lof < ul->un_tail_lof) {
14490Sstevel@tonic-gate 		if (!WITHIN(*lofp, nb, ul->un_head_lof,
14500Sstevel@tonic-gate 		    (ul->un_tail_lof - ul->un_head_lof))) {
14510Sstevel@tonic-gate 			return (EIO);
14520Sstevel@tonic-gate 		}
14530Sstevel@tonic-gate 	} else {
14540Sstevel@tonic-gate 		if (OVERLAP(*lofp, nb, ul->un_tail_lof,
14550Sstevel@tonic-gate 		    (ul->un_head_lof - ul->un_tail_lof))) {
14560Sstevel@tonic-gate 			return (EIO);
14570Sstevel@tonic-gate 		}
14580Sstevel@tonic-gate 	}
14590Sstevel@tonic-gate 
14600Sstevel@tonic-gate 	while (nb) {
14610Sstevel@tonic-gate 		bp = get_read_bp(ul, *lofp);
14620Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
14630Sstevel@tonic-gate 			sema_v(&bp->b_sem);
14640Sstevel@tonic-gate 			return (EIO);
14650Sstevel@tonic-gate 		}
14660Sstevel@tonic-gate 		/*
14670Sstevel@tonic-gate 		 * out-of-seq idents means partial transaction
14680Sstevel@tonic-gate 		 *	panic, non-corrupting powerfail, ...
14690Sstevel@tonic-gate 		 */
14700Sstevel@tonic-gate 		if (!ldl_logscan_ident(ul, bp, *lofp)) {
14710Sstevel@tonic-gate 			sema_v(&bp->b_sem);
14720Sstevel@tonic-gate 			return (EIO);
14730Sstevel@tonic-gate 		}
14740Sstevel@tonic-gate 		/*
14750Sstevel@tonic-gate 		 * copy the header into the caller's buf
14760Sstevel@tonic-gate 		 */
14770Sstevel@tonic-gate 		actual = fetchbuf(ul, bp, va, nb, lofp);
14780Sstevel@tonic-gate 		if (va)
14790Sstevel@tonic-gate 			va += actual;
14800Sstevel@tonic-gate 		nb -= actual;
14810Sstevel@tonic-gate 	}
14820Sstevel@tonic-gate 	return (0);
14830Sstevel@tonic-gate }
14840Sstevel@tonic-gate 
14850Sstevel@tonic-gate void
ldl_logscan_begin(ml_unit_t * ul)14860Sstevel@tonic-gate ldl_logscan_begin(ml_unit_t *ul)
14870Sstevel@tonic-gate {
14880Sstevel@tonic-gate 	size_t	bufsize;
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	ASSERT(ul->un_wrbuf.cb_dirty == NULL);
14910Sstevel@tonic-gate 
14920Sstevel@tonic-gate 	/*
14930Sstevel@tonic-gate 	 * logscan has begun
14940Sstevel@tonic-gate 	 */
14950Sstevel@tonic-gate 	ul->un_flags |= LDL_SCAN;
14960Sstevel@tonic-gate 
14970Sstevel@tonic-gate 	/*
14980Sstevel@tonic-gate 	 * reset the circular bufs
14990Sstevel@tonic-gate 	 */
15000Sstevel@tonic-gate 	bufsize = ldl_bufsize(ul);
15010Sstevel@tonic-gate 	alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize);
15020Sstevel@tonic-gate 	alloc_wrbuf(&ul->un_wrbuf, bufsize);
15030Sstevel@tonic-gate 
15040Sstevel@tonic-gate 	/*
15050Sstevel@tonic-gate 	 * set the tail to reflect a full log
15060Sstevel@tonic-gate 	 */
15070Sstevel@tonic-gate 	ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE;
15080Sstevel@tonic-gate 
15090Sstevel@tonic-gate 	if (ul->un_tail_lof < ul->un_bol_lof)
15100Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE;
15110Sstevel@tonic-gate 	if (ul->un_tail_lof >= ul->un_eol_lof)
15120Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_bol_lof;
15130Sstevel@tonic-gate 
15140Sstevel@tonic-gate 	/*
15150Sstevel@tonic-gate 	 * un_tid is used during error processing; it is initialized to
15160Sstevel@tonic-gate 	 * the tid of the delta at un_head_lof;
15170Sstevel@tonic-gate 	 */
15180Sstevel@tonic-gate 	ul->un_tid = ul->un_head_tid;
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate void
ldl_logscan_end(ml_unit_t * ul)15220Sstevel@tonic-gate ldl_logscan_end(ml_unit_t *ul)
15230Sstevel@tonic-gate {
15240Sstevel@tonic-gate 	size_t	bufsize;
15250Sstevel@tonic-gate 
15260Sstevel@tonic-gate 	/*
15270Sstevel@tonic-gate 	 * reset the circular bufs
15280Sstevel@tonic-gate 	 */
15290Sstevel@tonic-gate 	bufsize = ldl_bufsize(ul);
15300Sstevel@tonic-gate 	alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE);
15310Sstevel@tonic-gate 	alloc_wrbuf(&ul->un_wrbuf, bufsize);
15320Sstevel@tonic-gate 
15330Sstevel@tonic-gate 	/*
15340Sstevel@tonic-gate 	 * Done w/scan
15350Sstevel@tonic-gate 	 */
15360Sstevel@tonic-gate 	ul->un_flags &= ~LDL_SCAN;
15370Sstevel@tonic-gate }
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate int
ldl_need_roll(ml_unit_t * ul)15400Sstevel@tonic-gate ldl_need_roll(ml_unit_t *ul)
15410Sstevel@tonic-gate {
15420Sstevel@tonic-gate 	off_t	busybytes;
15430Sstevel@tonic-gate 	off_t	head;
15440Sstevel@tonic-gate 	off_t	tail;
15450Sstevel@tonic-gate 	off_t	bol;
15460Sstevel@tonic-gate 	off_t	eol;
15470Sstevel@tonic-gate 	off_t	nb;
15480Sstevel@tonic-gate 
15490Sstevel@tonic-gate 	/*
15500Sstevel@tonic-gate 	 * snapshot the log state
15510Sstevel@tonic-gate 	 */
15520Sstevel@tonic-gate 	head = ul->un_head_lof;
15530Sstevel@tonic-gate 	tail = ul->un_tail_lof;
15540Sstevel@tonic-gate 	bol = ul->un_bol_lof;
15550Sstevel@tonic-gate 	eol = ul->un_eol_lof;
15560Sstevel@tonic-gate 	nb = ul->un_logsize;
15570Sstevel@tonic-gate 
15580Sstevel@tonic-gate 	/*
15590Sstevel@tonic-gate 	 * compute number of busy (inuse) bytes
15600Sstevel@tonic-gate 	 */
15610Sstevel@tonic-gate 	if (head <= tail)
15620Sstevel@tonic-gate 		busybytes = tail - head;
15630Sstevel@tonic-gate 	else
15640Sstevel@tonic-gate 		busybytes = (eol - head) + (tail - bol);
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate 	/*
15670Sstevel@tonic-gate 	 * return TRUE if > 75% full
15680Sstevel@tonic-gate 	 */
15690Sstevel@tonic-gate 	return (busybytes > (nb - (nb >> 2)));
15700Sstevel@tonic-gate }
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate void
ldl_seterror(ml_unit_t * ul,char * why)15730Sstevel@tonic-gate ldl_seterror(ml_unit_t *ul, char *why)
15740Sstevel@tonic-gate {
15750Sstevel@tonic-gate 	/*
15760Sstevel@tonic-gate 	 * already in error state; do nothing
15770Sstevel@tonic-gate 	 */
15780Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
15790Sstevel@tonic-gate 		return;
15800Sstevel@tonic-gate 
15810Sstevel@tonic-gate 	ul->un_flags |= LDL_ERROR;	/* incore */
15820Sstevel@tonic-gate 	ul->un_badlog = 1;		/* ondisk (cleared by fsck) */
15830Sstevel@tonic-gate 
15840Sstevel@tonic-gate 	/*
15850Sstevel@tonic-gate 	 * Commit to state sectors
15860Sstevel@tonic-gate 	 */
15870Sstevel@tonic-gate 	uniqtime(&ul->un_timestamp);
15880Sstevel@tonic-gate 	ldl_savestate(ul);
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 	/* Pretty print */
15910Sstevel@tonic-gate 	cmn_err(CE_WARN, "%s", why);
15920Sstevel@tonic-gate 	cmn_err(CE_WARN, "ufs log for %s changed state to Error",
15930Sstevel@tonic-gate 	    ul->un_ufsvfs->vfs_fs->fs_fsmnt);
15940Sstevel@tonic-gate 	cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)",
15950Sstevel@tonic-gate 	    ul->un_ufsvfs->vfs_fs->fs_fsmnt);
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate 	/*
15980Sstevel@tonic-gate 	 * If we aren't in the middle of scan (aka snarf); tell ufs
15990Sstevel@tonic-gate 	 * to hard lock itself.
16000Sstevel@tonic-gate 	 */
16010Sstevel@tonic-gate 	if ((ul->un_flags & LDL_SCAN) == 0)
16020Sstevel@tonic-gate 		ufs_trans_onerror();
16030Sstevel@tonic-gate }
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate size_t
ldl_bufsize(ml_unit_t * ul)16060Sstevel@tonic-gate ldl_bufsize(ml_unit_t *ul)
16070Sstevel@tonic-gate {
16080Sstevel@tonic-gate 	size_t		bufsize;
16090Sstevel@tonic-gate 	extern uint32_t	ldl_minbufsize;
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate 	/*
16120Sstevel@tonic-gate 	 * initial guess is the maxtransfer value for this log device
16130Sstevel@tonic-gate 	 * 	increase if too small
16140Sstevel@tonic-gate 	 * 	decrease if too large
16150Sstevel@tonic-gate 	 */
16160Sstevel@tonic-gate 	bufsize = dbtob(btod(ul->un_maxtransfer));
16170Sstevel@tonic-gate 	if (bufsize < ldl_minbufsize)
16180Sstevel@tonic-gate 		bufsize = ldl_minbufsize;
16190Sstevel@tonic-gate 	if (bufsize > maxphys)
16200Sstevel@tonic-gate 		bufsize = maxphys;
16210Sstevel@tonic-gate 	if (bufsize > ul->un_maxtransfer)
16220Sstevel@tonic-gate 		bufsize = ul->un_maxtransfer;
16230Sstevel@tonic-gate 	return (bufsize);
16240Sstevel@tonic-gate }
1625