xref: /onnv-gate/usr/src/uts/common/fs/ufs/lufs_log.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/systm.h>
30*0Sstevel@tonic-gate #include <sys/types.h>
31*0Sstevel@tonic-gate #include <sys/vnode.h>
32*0Sstevel@tonic-gate #include <sys/errno.h>
33*0Sstevel@tonic-gate #include <sys/sysmacros.h>
34*0Sstevel@tonic-gate #include <sys/debug.h>
35*0Sstevel@tonic-gate #include <sys/kmem.h>
36*0Sstevel@tonic-gate #include <sys/conf.h>
37*0Sstevel@tonic-gate #include <sys/proc.h>
38*0Sstevel@tonic-gate #include <sys/cmn_err.h>
39*0Sstevel@tonic-gate #include <sys/fssnap_if.h>
40*0Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
41*0Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
42*0Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
43*0Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
44*0Sstevel@tonic-gate #include <sys/atomic.h>
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate extern int		maxphys;
47*0Sstevel@tonic-gate extern uint_t		bypass_snapshot_throttle_key;
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate extern struct kmem_cache	*lufs_sv;
50*0Sstevel@tonic-gate extern struct kmem_cache	*lufs_bp;
51*0Sstevel@tonic-gate 
52*0Sstevel@tonic-gate static void
53*0Sstevel@tonic-gate makebusy(ml_unit_t *ul, buf_t *bp)
54*0Sstevel@tonic-gate {
55*0Sstevel@tonic-gate 	sema_p(&bp->b_sem);
56*0Sstevel@tonic-gate 	if ((bp->b_flags & B_ERROR) == 0)
57*0Sstevel@tonic-gate 		return;
58*0Sstevel@tonic-gate 	if (bp->b_flags & B_READ)
59*0Sstevel@tonic-gate 		ldl_seterror(ul, "Error reading ufs log");
60*0Sstevel@tonic-gate 	else
61*0Sstevel@tonic-gate 		ldl_seterror(ul, "Error writing ufs log");
62*0Sstevel@tonic-gate }
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate static int
65*0Sstevel@tonic-gate logdone(buf_t *bp)
66*0Sstevel@tonic-gate {
67*0Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate 	if (bp->b_flags & B_WRITE)
70*0Sstevel@tonic-gate 		sema_v(&bp->b_sem);
71*0Sstevel@tonic-gate 	else
72*0Sstevel@tonic-gate 		/* wakeup the thread waiting on this buf */
73*0Sstevel@tonic-gate 		sema_v(&bp->b_io);
74*0Sstevel@tonic-gate 	return (0);
75*0Sstevel@tonic-gate }
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate static int
78*0Sstevel@tonic-gate ldl_strategy_done(buf_t *cb)
79*0Sstevel@tonic-gate {
80*0Sstevel@tonic-gate 	lufs_save_t	*sv;
81*0Sstevel@tonic-gate 	lufs_buf_t	*lbp;
82*0Sstevel@tonic-gate 	buf_t		*bp;
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&cb->b_sem));
85*0Sstevel@tonic-gate 	ASSERT((cb->b_flags & B_DONE) == 0);
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate 	/*
88*0Sstevel@tonic-gate 	 * Compute address of the ``save'' struct
89*0Sstevel@tonic-gate 	 */
90*0Sstevel@tonic-gate 	lbp = (lufs_buf_t *)cb;
91*0Sstevel@tonic-gate 	sv = (lufs_save_t *)lbp->lb_ptr;
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate 	if (cb->b_flags & B_ERROR)
94*0Sstevel@tonic-gate 		sv->sv_error = 1;
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate 	/*
97*0Sstevel@tonic-gate 	 * If this is the last request, release the resources and
98*0Sstevel@tonic-gate 	 * ``done'' the original buffer header.
99*0Sstevel@tonic-gate 	 */
100*0Sstevel@tonic-gate 	if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) {
101*0Sstevel@tonic-gate 		kmem_cache_free(lufs_bp, lbp);
102*0Sstevel@tonic-gate 		return (1);
103*0Sstevel@tonic-gate 	}
104*0Sstevel@tonic-gate 	/* Propagate any errors back to the original buffer header */
105*0Sstevel@tonic-gate 	bp = sv->sv_bp;
106*0Sstevel@tonic-gate 	if (sv->sv_error)
107*0Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
108*0Sstevel@tonic-gate 	kmem_cache_free(lufs_bp, lbp);
109*0Sstevel@tonic-gate 	kmem_cache_free(lufs_sv, sv);
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate 	biodone(bp);
112*0Sstevel@tonic-gate 	return (0);
113*0Sstevel@tonic-gate }
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate /*
116*0Sstevel@tonic-gate  * Map the log logical block number to a physical disk block number
117*0Sstevel@tonic-gate  */
118*0Sstevel@tonic-gate static int
119*0Sstevel@tonic-gate map_frag(
120*0Sstevel@tonic-gate 	ml_unit_t	*ul,
121*0Sstevel@tonic-gate 	daddr_t		lblkno,
122*0Sstevel@tonic-gate 	size_t		bcount,
123*0Sstevel@tonic-gate 	daddr_t		*pblkno,
124*0Sstevel@tonic-gate 	size_t		*pbcount)
125*0Sstevel@tonic-gate {
126*0Sstevel@tonic-gate 	ic_extent_t	*ext = ul->un_ebp->ic_extents;
127*0Sstevel@tonic-gate 	uint32_t	e = ul->un_ebp->ic_nextents;
128*0Sstevel@tonic-gate 	uint32_t	s = 0;
129*0Sstevel@tonic-gate 	uint32_t	i = e >> 1;
130*0Sstevel@tonic-gate 	uint32_t	lasti = i;
131*0Sstevel@tonic-gate 	uint32_t	bno_off;
132*0Sstevel@tonic-gate 
133*0Sstevel@tonic-gate again:
134*0Sstevel@tonic-gate 	if (ext[i].ic_lbno <= lblkno) {
135*0Sstevel@tonic-gate 		if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) {
136*0Sstevel@tonic-gate 			/* FOUND IT */
137*0Sstevel@tonic-gate 			bno_off = lblkno - (uint32_t)ext[i].ic_lbno;
138*0Sstevel@tonic-gate 			*pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off));
139*0Sstevel@tonic-gate 			*pblkno = ext[i].ic_pbno + bno_off;
140*0Sstevel@tonic-gate 			return (0);
141*0Sstevel@tonic-gate 		} else
142*0Sstevel@tonic-gate 			s = i;
143*0Sstevel@tonic-gate 	} else
144*0Sstevel@tonic-gate 		e = i;
145*0Sstevel@tonic-gate 	i = s + ((e - s) >> 1);
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate 	if (i == lasti) {
148*0Sstevel@tonic-gate 		*pbcount = bcount;
149*0Sstevel@tonic-gate 		return (ENOENT);
150*0Sstevel@tonic-gate 	}
151*0Sstevel@tonic-gate 	lasti = i;
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate 	goto again;
154*0Sstevel@tonic-gate }
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate /*
157*0Sstevel@tonic-gate  * The log is a set of extents (which typically will be only one, but
158*0Sstevel@tonic-gate  * may be more if the disk was close to full when the log was created)
159*0Sstevel@tonic-gate  * and hence the logical offsets into the log
160*0Sstevel@tonic-gate  * have to be translated into their real device locations before
161*0Sstevel@tonic-gate  * calling the device's strategy routine. The translation may result
162*0Sstevel@tonic-gate  * in several IO requests if this request spans extents.
163*0Sstevel@tonic-gate  */
164*0Sstevel@tonic-gate void
165*0Sstevel@tonic-gate ldl_strategy(ml_unit_t *ul, buf_t *pb)
166*0Sstevel@tonic-gate {
167*0Sstevel@tonic-gate 	lufs_save_t	*sv;
168*0Sstevel@tonic-gate 	lufs_buf_t	*lbp;
169*0Sstevel@tonic-gate 	buf_t		*cb;
170*0Sstevel@tonic-gate 	ufsvfs_t	*ufsvfsp = ul->un_ufsvfs;
171*0Sstevel@tonic-gate 	daddr_t		lblkno, pblkno;
172*0Sstevel@tonic-gate 	size_t		nb_left, pbcount;
173*0Sstevel@tonic-gate 	off_t		offset;
174*0Sstevel@tonic-gate 	dev_t		dev	= ul->un_dev;
175*0Sstevel@tonic-gate 	int		error;
176*0Sstevel@tonic-gate 	int		read = pb->b_flags & B_READ;
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 	/*
179*0Sstevel@tonic-gate 	 * Allocate and initialise the save stucture,
180*0Sstevel@tonic-gate 	 */
181*0Sstevel@tonic-gate 	sv = kmem_cache_alloc(lufs_sv, KM_SLEEP);
182*0Sstevel@tonic-gate 	sv->sv_error = 0;
183*0Sstevel@tonic-gate 	sv->sv_bp = pb;
184*0Sstevel@tonic-gate 	nb_left = pb->b_bcount;
185*0Sstevel@tonic-gate 	sv->sv_nb_left = nb_left;
186*0Sstevel@tonic-gate 
187*0Sstevel@tonic-gate 	lblkno = pb->b_blkno;
188*0Sstevel@tonic-gate 	offset = 0;
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate 	do {
191*0Sstevel@tonic-gate 		error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount);
192*0Sstevel@tonic-gate 
193*0Sstevel@tonic-gate 		lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP);
194*0Sstevel@tonic-gate 		bioinit(&lbp->lb_buf);
195*0Sstevel@tonic-gate 		lbp->lb_ptr = sv;
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate 		cb = bioclone(pb, offset, pbcount, dev,
198*0Sstevel@tonic-gate 		    pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP);
199*0Sstevel@tonic-gate 
200*0Sstevel@tonic-gate 		offset += pbcount;
201*0Sstevel@tonic-gate 		lblkno += btodb(pbcount);
202*0Sstevel@tonic-gate 		nb_left -= pbcount;
203*0Sstevel@tonic-gate 
204*0Sstevel@tonic-gate 		if (error) {
205*0Sstevel@tonic-gate 			cb->b_flags |= B_ERROR;
206*0Sstevel@tonic-gate 			cb->b_resid = cb->b_bcount;
207*0Sstevel@tonic-gate 			biodone(cb);
208*0Sstevel@tonic-gate 		} else {
209*0Sstevel@tonic-gate 			if (read) {
210*0Sstevel@tonic-gate 				logstats.ls_ldlreads.value.ui64++;
211*0Sstevel@tonic-gate 				ufsvfsp->vfs_iotstamp = lbolt;
212*0Sstevel@tonic-gate 				lwp_stat_update(LWP_STAT_INBLK, 1);
213*0Sstevel@tonic-gate 			} else {
214*0Sstevel@tonic-gate 				logstats.ls_ldlwrites.value.ui64++;
215*0Sstevel@tonic-gate 				lwp_stat_update(LWP_STAT_OUBLK, 1);
216*0Sstevel@tonic-gate 			}
217*0Sstevel@tonic-gate 
218*0Sstevel@tonic-gate 			/*
219*0Sstevel@tonic-gate 			 * write through the snapshot driver if necessary
220*0Sstevel@tonic-gate 			 * We do not want this write to be throttled because
221*0Sstevel@tonic-gate 			 * we are holding the un_log mutex here. If we
222*0Sstevel@tonic-gate 			 * are throttled in fssnap_translate, the fssnap_taskq
223*0Sstevel@tonic-gate 			 * thread which can wake us up can get blocked on
224*0Sstevel@tonic-gate 			 * the un_log mutex resulting in a deadlock.
225*0Sstevel@tonic-gate 			 */
226*0Sstevel@tonic-gate 			if (ufsvfsp->vfs_snapshot) {
227*0Sstevel@tonic-gate 				(void) tsd_set(bypass_snapshot_throttle_key, \
228*0Sstevel@tonic-gate 							(void *)1);
229*0Sstevel@tonic-gate 				fssnap_strategy(&ufsvfsp->vfs_snapshot, cb);
230*0Sstevel@tonic-gate 
231*0Sstevel@tonic-gate 				(void) tsd_set(bypass_snapshot_throttle_key, \
232*0Sstevel@tonic-gate 							(void *)0);
233*0Sstevel@tonic-gate 			} else {
234*0Sstevel@tonic-gate 				(void) bdev_strategy(cb);
235*0Sstevel@tonic-gate 			}
236*0Sstevel@tonic-gate 		}
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 	} while (nb_left);
239*0Sstevel@tonic-gate }
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate static void
242*0Sstevel@tonic-gate writelog(ml_unit_t *ul, buf_t *bp)
243*0Sstevel@tonic-gate {
244*0Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
245*0Sstevel@tonic-gate 
246*0Sstevel@tonic-gate 	/*
247*0Sstevel@tonic-gate 	 * This is really an B_ASYNC write but we want Presto to
248*0Sstevel@tonic-gate 	 * cache this write.  The iodone routine, logdone, processes
249*0Sstevel@tonic-gate 	 * the buf correctly.
250*0Sstevel@tonic-gate 	 */
251*0Sstevel@tonic-gate 	bp->b_flags = B_WRITE;
252*0Sstevel@tonic-gate 	bp->b_edev = ul->un_dev;
253*0Sstevel@tonic-gate 	bp->b_iodone = logdone;
254*0Sstevel@tonic-gate 
255*0Sstevel@tonic-gate 	/*
256*0Sstevel@tonic-gate 	 * return EIO for every IO if in hard error state
257*0Sstevel@tonic-gate 	 */
258*0Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR) {
259*0Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
260*0Sstevel@tonic-gate 		bp->b_error = EIO;
261*0Sstevel@tonic-gate 		biodone(bp);
262*0Sstevel@tonic-gate 		return;
263*0Sstevel@tonic-gate 	}
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate 	ldl_strategy(ul, bp);
266*0Sstevel@tonic-gate }
267*0Sstevel@tonic-gate 
268*0Sstevel@tonic-gate static void
269*0Sstevel@tonic-gate readlog(ml_unit_t *ul, buf_t *bp)
270*0Sstevel@tonic-gate {
271*0Sstevel@tonic-gate 	ASSERT(SEMA_HELD(&bp->b_sem));
272*0Sstevel@tonic-gate 	ASSERT(bp->b_bcount);
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 	bp->b_flags = B_READ;
275*0Sstevel@tonic-gate 	bp->b_edev = ul->un_dev;
276*0Sstevel@tonic-gate 	bp->b_iodone = logdone;
277*0Sstevel@tonic-gate 
278*0Sstevel@tonic-gate 	/* all IO returns errors when in error state */
279*0Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR) {
280*0Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
281*0Sstevel@tonic-gate 		bp->b_error = EIO;
282*0Sstevel@tonic-gate 		biodone(bp);
283*0Sstevel@tonic-gate 		(void) trans_wait(bp);
284*0Sstevel@tonic-gate 		return;
285*0Sstevel@tonic-gate 	}
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 	ldl_strategy(ul, bp);
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 	if (trans_wait(bp))
290*0Sstevel@tonic-gate 		ldl_seterror(ul, "Error reading ufs log");
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate 
293*0Sstevel@tonic-gate /*
294*0Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
295*0Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
296*0Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
297*0Sstevel@tonic-gate  * the bp makebusy lock.
298*0Sstevel@tonic-gate  */
299*0Sstevel@tonic-gate static void
300*0Sstevel@tonic-gate push_dirty_bp(ml_unit_t *ul, buf_t *bp)
301*0Sstevel@tonic-gate {
302*0Sstevel@tonic-gate 	buf_t		*newbp;
303*0Sstevel@tonic-gate 	cirbuf_t	*cb		= &ul->un_wrbuf;
304*0Sstevel@tonic-gate 
305*0Sstevel@tonic-gate 	ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
306*0Sstevel@tonic-gate 	ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0);
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate 	/*
309*0Sstevel@tonic-gate 	 * async write the buf
310*0Sstevel@tonic-gate 	 */
311*0Sstevel@tonic-gate 	writelog(ul, bp);
312*0Sstevel@tonic-gate 
313*0Sstevel@tonic-gate 	/*
314*0Sstevel@tonic-gate 	 * no longer filling any buf
315*0Sstevel@tonic-gate 	 */
316*0Sstevel@tonic-gate 	cb->cb_dirty = NULL;
317*0Sstevel@tonic-gate 
318*0Sstevel@tonic-gate 	/*
319*0Sstevel@tonic-gate 	 * no extra buffer space; all done
320*0Sstevel@tonic-gate 	 */
321*0Sstevel@tonic-gate 	if (bp->b_bcount == bp->b_bufsize)
322*0Sstevel@tonic-gate 		return;
323*0Sstevel@tonic-gate 
324*0Sstevel@tonic-gate 	/*
325*0Sstevel@tonic-gate 	 * give extra buffer space to a new bp
326*0Sstevel@tonic-gate 	 * 	try to take buf off of free list
327*0Sstevel@tonic-gate 	 */
328*0Sstevel@tonic-gate 	if ((newbp = cb->cb_free) != NULL) {
329*0Sstevel@tonic-gate 		cb->cb_free = newbp->b_forw;
330*0Sstevel@tonic-gate 	} else {
331*0Sstevel@tonic-gate 		newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
332*0Sstevel@tonic-gate 		sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
333*0Sstevel@tonic-gate 		sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
334*0Sstevel@tonic-gate 	}
335*0Sstevel@tonic-gate 	newbp->b_flags = 0;
336*0Sstevel@tonic-gate 	newbp->b_bcount = 0;
337*0Sstevel@tonic-gate 	newbp->b_file = NULL;
338*0Sstevel@tonic-gate 	newbp->b_offset = -1;
339*0Sstevel@tonic-gate 	newbp->b_bufsize = bp->b_bufsize - bp->b_bcount;
340*0Sstevel@tonic-gate 	newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount;
341*0Sstevel@tonic-gate 	bp->b_bufsize = bp->b_bcount;
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 	/*
344*0Sstevel@tonic-gate 	 * lock out readers and put new buf at LRU position
345*0Sstevel@tonic-gate 	 */
346*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
347*0Sstevel@tonic-gate 	newbp->b_forw = bp->b_forw;
348*0Sstevel@tonic-gate 	newbp->b_back = bp;
349*0Sstevel@tonic-gate 	bp->b_forw->b_back = newbp;
350*0Sstevel@tonic-gate 	bp->b_forw = newbp;
351*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
352*0Sstevel@tonic-gate }
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate static void
355*0Sstevel@tonic-gate inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb)
356*0Sstevel@tonic-gate {
357*0Sstevel@tonic-gate 	buf_t		*bp;
358*0Sstevel@tonic-gate 	off_t		elof	= lof + nb;
359*0Sstevel@tonic-gate 	off_t		buflof;
360*0Sstevel@tonic-gate 	off_t		bufelof;
361*0Sstevel@tonic-gate 
362*0Sstevel@tonic-gate 	/*
363*0Sstevel@tonic-gate 	 * discard all bufs that overlap the range (lof, lof + nb)
364*0Sstevel@tonic-gate 	 */
365*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
366*0Sstevel@tonic-gate 	bp = cb->cb_bp;
367*0Sstevel@tonic-gate 	do {
368*0Sstevel@tonic-gate 		if (bp == cb->cb_dirty || bp->b_bcount == 0) {
369*0Sstevel@tonic-gate 			bp = bp->b_forw;
370*0Sstevel@tonic-gate 			continue;
371*0Sstevel@tonic-gate 		}
372*0Sstevel@tonic-gate 		buflof = dbtob(bp->b_blkno);
373*0Sstevel@tonic-gate 		bufelof = buflof + bp->b_bcount;
374*0Sstevel@tonic-gate 		if ((buflof < lof && bufelof <= lof) ||
375*0Sstevel@tonic-gate 		    (buflof >= elof && bufelof > elof)) {
376*0Sstevel@tonic-gate 			bp = bp->b_forw;
377*0Sstevel@tonic-gate 			continue;
378*0Sstevel@tonic-gate 		}
379*0Sstevel@tonic-gate 		makebusy(ul, bp);
380*0Sstevel@tonic-gate 		bp->b_flags = 0;
381*0Sstevel@tonic-gate 		bp->b_bcount = 0;
382*0Sstevel@tonic-gate 		sema_v(&bp->b_sem);
383*0Sstevel@tonic-gate 		bp = bp->b_forw;
384*0Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
385*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
386*0Sstevel@tonic-gate }
387*0Sstevel@tonic-gate 
388*0Sstevel@tonic-gate /*
389*0Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
390*0Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
391*0Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
392*0Sstevel@tonic-gate  * the bp makebusy lock.
393*0Sstevel@tonic-gate  */
394*0Sstevel@tonic-gate static buf_t *
395*0Sstevel@tonic-gate get_write_bp(ml_unit_t *ul)
396*0Sstevel@tonic-gate {
397*0Sstevel@tonic-gate 	cirbuf_t	*cb = &ul->un_wrbuf;
398*0Sstevel@tonic-gate 	buf_t		*bp;
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate 	/*
401*0Sstevel@tonic-gate 	 * cb_dirty is the buffer we are currently filling; if any
402*0Sstevel@tonic-gate 	 */
403*0Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) != NULL) {
404*0Sstevel@tonic-gate 		makebusy(ul, bp);
405*0Sstevel@tonic-gate 		return (bp);
406*0Sstevel@tonic-gate 	}
407*0Sstevel@tonic-gate 	/*
408*0Sstevel@tonic-gate 	 * discard any bp that overlaps the current tail since we are
409*0Sstevel@tonic-gate 	 * about to overwrite it.
410*0Sstevel@tonic-gate 	 */
411*0Sstevel@tonic-gate 	inval_range(ul, cb, ul->un_tail_lof, 1);
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate 	/*
414*0Sstevel@tonic-gate 	 * steal LRU buf
415*0Sstevel@tonic-gate 	 */
416*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
417*0Sstevel@tonic-gate 	bp = cb->cb_bp->b_forw;
418*0Sstevel@tonic-gate 	makebusy(ul, bp);
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate 	cb->cb_dirty = bp;
421*0Sstevel@tonic-gate 	cb->cb_bp = bp;
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate 	bp->b_flags = 0;
424*0Sstevel@tonic-gate 	bp->b_bcount = 0;
425*0Sstevel@tonic-gate 	bp->b_blkno = btodb(ul->un_tail_lof);
426*0Sstevel@tonic-gate 	ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof);
427*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
428*0Sstevel@tonic-gate 
429*0Sstevel@tonic-gate 	/*
430*0Sstevel@tonic-gate 	 * NOTE:
431*0Sstevel@tonic-gate 	 *	1. un_tail_lof never addresses >= un_eol_lof
432*0Sstevel@tonic-gate 	 *	2. b_blkno + btodb(b_bufsize) may > un_eol_lof
433*0Sstevel@tonic-gate 	 *		this case is handled in storebuf
434*0Sstevel@tonic-gate 	 */
435*0Sstevel@tonic-gate 	return (bp);
436*0Sstevel@tonic-gate }
437*0Sstevel@tonic-gate 
438*0Sstevel@tonic-gate void
439*0Sstevel@tonic-gate alloc_wrbuf(cirbuf_t *cb, size_t bufsize)
440*0Sstevel@tonic-gate {
441*0Sstevel@tonic-gate 	int	i;
442*0Sstevel@tonic-gate 	buf_t	*bp;
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 	/*
445*0Sstevel@tonic-gate 	 * Clear previous allocation
446*0Sstevel@tonic-gate 	 */
447*0Sstevel@tonic-gate 	if (cb->cb_nb)
448*0Sstevel@tonic-gate 		free_cirbuf(cb);
449*0Sstevel@tonic-gate 
450*0Sstevel@tonic-gate 	bzero(cb, sizeof (*cb));
451*0Sstevel@tonic-gate 	rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
454*0Sstevel@tonic-gate 
455*0Sstevel@tonic-gate 	/*
456*0Sstevel@tonic-gate 	 * preallocate 3 bp's and put them on the free list.
457*0Sstevel@tonic-gate 	 */
458*0Sstevel@tonic-gate 	for (i = 0; i < 3; ++i) {
459*0Sstevel@tonic-gate 		bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP);
460*0Sstevel@tonic-gate 		sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
461*0Sstevel@tonic-gate 		sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
462*0Sstevel@tonic-gate 		bp->b_offset = -1;
463*0Sstevel@tonic-gate 		bp->b_forw = cb->cb_free;
464*0Sstevel@tonic-gate 		cb->cb_free = bp;
465*0Sstevel@tonic-gate 	}
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 	cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
468*0Sstevel@tonic-gate 	cb->cb_nb = bufsize;
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate 	/*
471*0Sstevel@tonic-gate 	 * first bp claims entire write buffer
472*0Sstevel@tonic-gate 	 */
473*0Sstevel@tonic-gate 	bp = cb->cb_free;
474*0Sstevel@tonic-gate 	cb->cb_free = bp->b_forw;
475*0Sstevel@tonic-gate 
476*0Sstevel@tonic-gate 	bp->b_forw = bp;
477*0Sstevel@tonic-gate 	bp->b_back = bp;
478*0Sstevel@tonic-gate 	cb->cb_bp = bp;
479*0Sstevel@tonic-gate 	bp->b_un.b_addr = cb->cb_va;
480*0Sstevel@tonic-gate 	bp->b_bufsize = cb->cb_nb;
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
483*0Sstevel@tonic-gate }
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate void
486*0Sstevel@tonic-gate alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize)
487*0Sstevel@tonic-gate {
488*0Sstevel@tonic-gate 	caddr_t	va;
489*0Sstevel@tonic-gate 	size_t	nb;
490*0Sstevel@tonic-gate 	buf_t	*bp;
491*0Sstevel@tonic-gate 
492*0Sstevel@tonic-gate 	/*
493*0Sstevel@tonic-gate 	 * Clear previous allocation
494*0Sstevel@tonic-gate 	 */
495*0Sstevel@tonic-gate 	if (cb->cb_nb)
496*0Sstevel@tonic-gate 		free_cirbuf(cb);
497*0Sstevel@tonic-gate 
498*0Sstevel@tonic-gate 	bzero(cb, sizeof (*cb));
499*0Sstevel@tonic-gate 	rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL);
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate 	cb->cb_va = kmem_alloc(bufsize, KM_SLEEP);
504*0Sstevel@tonic-gate 	cb->cb_nb = bufsize;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 	/*
507*0Sstevel@tonic-gate 	 * preallocate N bufs that are hard-sized to blksize
508*0Sstevel@tonic-gate 	 *	in other words, the read buffer pool is a linked list
509*0Sstevel@tonic-gate 	 *	of statically sized bufs.
510*0Sstevel@tonic-gate 	 */
511*0Sstevel@tonic-gate 	va = cb->cb_va;
512*0Sstevel@tonic-gate 	while ((nb = bufsize) != 0) {
513*0Sstevel@tonic-gate 		if (nb > blksize)
514*0Sstevel@tonic-gate 			nb = blksize;
515*0Sstevel@tonic-gate 		bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
516*0Sstevel@tonic-gate 		bzero(bp, sizeof (buf_t));
517*0Sstevel@tonic-gate 		sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL);
518*0Sstevel@tonic-gate 		sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
519*0Sstevel@tonic-gate 		bp->b_un.b_addr = va;
520*0Sstevel@tonic-gate 		bp->b_bufsize = nb;
521*0Sstevel@tonic-gate 		if (cb->cb_bp) {
522*0Sstevel@tonic-gate 			bp->b_forw = cb->cb_bp->b_forw;
523*0Sstevel@tonic-gate 			bp->b_back = cb->cb_bp;
524*0Sstevel@tonic-gate 			cb->cb_bp->b_forw->b_back = bp;
525*0Sstevel@tonic-gate 			cb->cb_bp->b_forw = bp;
526*0Sstevel@tonic-gate 		} else
527*0Sstevel@tonic-gate 			bp->b_forw = bp->b_back = bp;
528*0Sstevel@tonic-gate 		cb->cb_bp = bp;
529*0Sstevel@tonic-gate 		bufsize -= nb;
530*0Sstevel@tonic-gate 		va += nb;
531*0Sstevel@tonic-gate 	}
532*0Sstevel@tonic-gate 
533*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
534*0Sstevel@tonic-gate }
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate void
537*0Sstevel@tonic-gate free_cirbuf(cirbuf_t *cb)
538*0Sstevel@tonic-gate {
539*0Sstevel@tonic-gate 	buf_t	*bp;
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate 	if (cb->cb_nb == 0)
542*0Sstevel@tonic-gate 		return;
543*0Sstevel@tonic-gate 
544*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
545*0Sstevel@tonic-gate 	ASSERT(cb->cb_dirty == NULL);
546*0Sstevel@tonic-gate 
547*0Sstevel@tonic-gate 	/*
548*0Sstevel@tonic-gate 	 * free the active bufs
549*0Sstevel@tonic-gate 	 */
550*0Sstevel@tonic-gate 	while ((bp = cb->cb_bp) != NULL) {
551*0Sstevel@tonic-gate 		if (bp == bp->b_forw)
552*0Sstevel@tonic-gate 			cb->cb_bp = NULL;
553*0Sstevel@tonic-gate 		else
554*0Sstevel@tonic-gate 			cb->cb_bp = bp->b_forw;
555*0Sstevel@tonic-gate 		bp->b_back->b_forw = bp->b_forw;
556*0Sstevel@tonic-gate 		bp->b_forw->b_back = bp->b_back;
557*0Sstevel@tonic-gate 		sema_destroy(&bp->b_sem);
558*0Sstevel@tonic-gate 		sema_destroy(&bp->b_io);
559*0Sstevel@tonic-gate 		kmem_free(bp, sizeof (buf_t));
560*0Sstevel@tonic-gate 	}
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate 	/*
563*0Sstevel@tonic-gate 	 * free the free bufs
564*0Sstevel@tonic-gate 	 */
565*0Sstevel@tonic-gate 	while ((bp = cb->cb_free) != NULL) {
566*0Sstevel@tonic-gate 		cb->cb_free = bp->b_forw;
567*0Sstevel@tonic-gate 		sema_destroy(&bp->b_sem);
568*0Sstevel@tonic-gate 		sema_destroy(&bp->b_io);
569*0Sstevel@tonic-gate 		kmem_free(bp, sizeof (buf_t));
570*0Sstevel@tonic-gate 	}
571*0Sstevel@tonic-gate 	kmem_free(cb->cb_va, cb->cb_nb);
572*0Sstevel@tonic-gate 	cb->cb_va = NULL;
573*0Sstevel@tonic-gate 	cb->cb_nb = 0;
574*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
575*0Sstevel@tonic-gate 	rw_destroy(&cb->cb_rwlock);
576*0Sstevel@tonic-gate }
577*0Sstevel@tonic-gate 
578*0Sstevel@tonic-gate static int
579*0Sstevel@tonic-gate within_range(off_t lof, daddr_t blkno, ulong_t bcount)
580*0Sstevel@tonic-gate {
581*0Sstevel@tonic-gate 	off_t	blof	= dbtob(blkno);
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	return ((lof >= blof) && (lof < (blof + bcount)));
584*0Sstevel@tonic-gate }
585*0Sstevel@tonic-gate 
586*0Sstevel@tonic-gate static buf_t *
587*0Sstevel@tonic-gate find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
588*0Sstevel@tonic-gate {
589*0Sstevel@tonic-gate 	buf_t *bp;
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	/*
592*0Sstevel@tonic-gate 	 * find a buf that contains the offset lof
593*0Sstevel@tonic-gate 	 */
594*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_READER);
595*0Sstevel@tonic-gate 	bp = cb->cb_bp;
596*0Sstevel@tonic-gate 	do {
597*0Sstevel@tonic-gate 		if (bp->b_bcount &&
598*0Sstevel@tonic-gate 		    within_range(lof, bp->b_blkno, bp->b_bcount)) {
599*0Sstevel@tonic-gate 			makebusy(ul, bp);
600*0Sstevel@tonic-gate 			rw_exit(&cb->cb_rwlock);
601*0Sstevel@tonic-gate 			return (bp);
602*0Sstevel@tonic-gate 		}
603*0Sstevel@tonic-gate 		bp = bp->b_forw;
604*0Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
605*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate 	return (NULL);
608*0Sstevel@tonic-gate }
609*0Sstevel@tonic-gate 
610*0Sstevel@tonic-gate static off_t
611*0Sstevel@tonic-gate find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof)
612*0Sstevel@tonic-gate {
613*0Sstevel@tonic-gate 	buf_t	*bp, *bpend;
614*0Sstevel@tonic-gate 	off_t	rlof;
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate 	/*
617*0Sstevel@tonic-gate 	 * we mustn't:
618*0Sstevel@tonic-gate 	 *	o read past eol
619*0Sstevel@tonic-gate 	 *	o read past the tail
620*0Sstevel@tonic-gate 	 *	o read data that may be being written.
621*0Sstevel@tonic-gate 	 */
622*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_READER);
623*0Sstevel@tonic-gate 	bpend = bp = cb->cb_bp->b_forw;
624*0Sstevel@tonic-gate 	rlof = ul->un_tail_lof;
625*0Sstevel@tonic-gate 	do {
626*0Sstevel@tonic-gate 		if (bp->b_bcount) {
627*0Sstevel@tonic-gate 			rlof = dbtob(bp->b_blkno);
628*0Sstevel@tonic-gate 			break;
629*0Sstevel@tonic-gate 		}
630*0Sstevel@tonic-gate 		bp = bp->b_forw;
631*0Sstevel@tonic-gate 	} while (bp != bpend);
632*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
633*0Sstevel@tonic-gate 
634*0Sstevel@tonic-gate 	if (lof <= rlof)
635*0Sstevel@tonic-gate 		/* lof is prior to the range represented by the write buf */
636*0Sstevel@tonic-gate 		return (rlof);
637*0Sstevel@tonic-gate 	else
638*0Sstevel@tonic-gate 		/* lof follows the range represented by the write buf */
639*0Sstevel@tonic-gate 		return ((off_t)ul->un_eol_lof);
640*0Sstevel@tonic-gate }
641*0Sstevel@tonic-gate 
642*0Sstevel@tonic-gate static buf_t *
643*0Sstevel@tonic-gate get_read_bp(ml_unit_t *ul, off_t lof)
644*0Sstevel@tonic-gate {
645*0Sstevel@tonic-gate 	cirbuf_t	*cb;
646*0Sstevel@tonic-gate 	buf_t		*bp;
647*0Sstevel@tonic-gate 	off_t		rlof;
648*0Sstevel@tonic-gate 
649*0Sstevel@tonic-gate 	/*
650*0Sstevel@tonic-gate 	 * retrieve as much data as possible from the incore buffers
651*0Sstevel@tonic-gate 	 */
652*0Sstevel@tonic-gate 	if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) {
653*0Sstevel@tonic-gate 		logstats.ls_lreadsinmem.value.ui64++;
654*0Sstevel@tonic-gate 		return (bp);
655*0Sstevel@tonic-gate 	}
656*0Sstevel@tonic-gate 	if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) {
657*0Sstevel@tonic-gate 		logstats.ls_lreadsinmem.value.ui64++;
658*0Sstevel@tonic-gate 		return (bp);
659*0Sstevel@tonic-gate 	}
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate 	/*
662*0Sstevel@tonic-gate 	 * steal the LRU buf
663*0Sstevel@tonic-gate 	 */
664*0Sstevel@tonic-gate 	cb = &ul->un_rdbuf;
665*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
666*0Sstevel@tonic-gate 	bp = cb->cb_bp->b_forw;
667*0Sstevel@tonic-gate 	makebusy(ul, bp);
668*0Sstevel@tonic-gate 	bp->b_flags = 0;
669*0Sstevel@tonic-gate 	bp->b_bcount = 0;
670*0Sstevel@tonic-gate 	cb->cb_bp = bp;
671*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
672*0Sstevel@tonic-gate 
673*0Sstevel@tonic-gate 	/*
674*0Sstevel@tonic-gate 	 * don't read past the tail or the end-of-log
675*0Sstevel@tonic-gate 	 */
676*0Sstevel@tonic-gate 	bp->b_blkno = btodb(lof);
677*0Sstevel@tonic-gate 	lof = dbtob(bp->b_blkno);
678*0Sstevel@tonic-gate 	rlof = find_read_lof(ul, &ul->un_wrbuf, lof);
679*0Sstevel@tonic-gate 	bp->b_bcount = MIN(bp->b_bufsize, rlof - lof);
680*0Sstevel@tonic-gate 	readlog(ul, bp);
681*0Sstevel@tonic-gate 	return (bp);
682*0Sstevel@tonic-gate }
683*0Sstevel@tonic-gate 
684*0Sstevel@tonic-gate /*
685*0Sstevel@tonic-gate  * NOTE: writers are single threaded thru the log layer.
686*0Sstevel@tonic-gate  * This means we can safely reference and change the cb and bp fields
687*0Sstevel@tonic-gate  * that ldl_read does not reference w/o holding the cb_rwlock or
688*0Sstevel@tonic-gate  * the bp makebusy lock.
689*0Sstevel@tonic-gate  */
690*0Sstevel@tonic-gate static int
691*0Sstevel@tonic-gate extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp)
692*0Sstevel@tonic-gate {
693*0Sstevel@tonic-gate 	buf_t	*bpforw	= bp->b_forw;
694*0Sstevel@tonic-gate 
695*0Sstevel@tonic-gate 	ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty);
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 	/*
698*0Sstevel@tonic-gate 	 * there is no `next' bp; do nothing
699*0Sstevel@tonic-gate 	 */
700*0Sstevel@tonic-gate 	if (bpforw == bp)
701*0Sstevel@tonic-gate 		return (0);
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 	/*
704*0Sstevel@tonic-gate 	 * buffer space is not adjacent; do nothing
705*0Sstevel@tonic-gate 	 */
706*0Sstevel@tonic-gate 	if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr)
707*0Sstevel@tonic-gate 		return (0);
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	/*
710*0Sstevel@tonic-gate 	 * locking protocol requires giving up any bp locks before
711*0Sstevel@tonic-gate 	 * acquiring cb_rwlock.  This is okay because we hold
712*0Sstevel@tonic-gate 	 * un_log_mutex.
713*0Sstevel@tonic-gate 	 */
714*0Sstevel@tonic-gate 	sema_v(&bp->b_sem);
715*0Sstevel@tonic-gate 
716*0Sstevel@tonic-gate 	/*
717*0Sstevel@tonic-gate 	 * lock out ldl_read
718*0Sstevel@tonic-gate 	 */
719*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	/*
722*0Sstevel@tonic-gate 	 * wait for current IO to finish w/next bp; if necessary
723*0Sstevel@tonic-gate 	 */
724*0Sstevel@tonic-gate 	makebusy(ul, bpforw);
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	/*
727*0Sstevel@tonic-gate 	 * free the next bp and steal its space
728*0Sstevel@tonic-gate 	 */
729*0Sstevel@tonic-gate 	bp->b_forw = bpforw->b_forw;
730*0Sstevel@tonic-gate 	bpforw->b_forw->b_back = bp;
731*0Sstevel@tonic-gate 	bp->b_bufsize += bpforw->b_bufsize;
732*0Sstevel@tonic-gate 	sema_v(&bpforw->b_sem);
733*0Sstevel@tonic-gate 	bpforw->b_forw = cb->cb_free;
734*0Sstevel@tonic-gate 	cb->cb_free = bpforw;
735*0Sstevel@tonic-gate 	makebusy(ul, bp);
736*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 	return (1);
739*0Sstevel@tonic-gate }
740*0Sstevel@tonic-gate 
741*0Sstevel@tonic-gate static size_t
742*0Sstevel@tonic-gate storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb)
743*0Sstevel@tonic-gate {
744*0Sstevel@tonic-gate 	size_t		copy_nb;
745*0Sstevel@tonic-gate 	size_t		nb_in_sec;
746*0Sstevel@tonic-gate 	sect_trailer_t	*st;
747*0Sstevel@tonic-gate 	size_t		nb_left = nb;
748*0Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
749*0Sstevel@tonic-gate 
750*0Sstevel@tonic-gate again:
751*0Sstevel@tonic-gate 	nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount);
752*0Sstevel@tonic-gate 	copy_nb = MIN(nb_left, nb_in_sec);
753*0Sstevel@tonic-gate 
754*0Sstevel@tonic-gate 	ASSERT(copy_nb);
755*0Sstevel@tonic-gate 
756*0Sstevel@tonic-gate 	bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb);
757*0Sstevel@tonic-gate 	bp->b_bcount += copy_nb;
758*0Sstevel@tonic-gate 	va += copy_nb;
759*0Sstevel@tonic-gate 	nb_left -= copy_nb;
760*0Sstevel@tonic-gate 	ul->un_tail_lof += copy_nb;
761*0Sstevel@tonic-gate 
762*0Sstevel@tonic-gate 	if ((nb_in_sec -= copy_nb) == 0) {
763*0Sstevel@tonic-gate 		st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount);
764*0Sstevel@tonic-gate 
765*0Sstevel@tonic-gate 		st->st_tid = ul->un_logmap->mtm_tid;
766*0Sstevel@tonic-gate 		st->st_ident = ul->un_tail_ident++;
767*0Sstevel@tonic-gate 		bp->b_bcount += sizeof (sect_trailer_t);
768*0Sstevel@tonic-gate 		ul->un_tail_lof += sizeof (sect_trailer_t);
769*0Sstevel@tonic-gate 		/*
770*0Sstevel@tonic-gate 		 * log wrapped; async write this bp
771*0Sstevel@tonic-gate 		 */
772*0Sstevel@tonic-gate 		if (ul->un_tail_lof == ul->un_eol_lof) {
773*0Sstevel@tonic-gate 			ul->un_tail_lof = ul->un_bol_lof;
774*0Sstevel@tonic-gate 			push_dirty_bp(ul, bp);
775*0Sstevel@tonic-gate 			return (nb - nb_left);
776*0Sstevel@tonic-gate 		}
777*0Sstevel@tonic-gate 		/*
778*0Sstevel@tonic-gate 		 * out of bp space; get more or async write buf
779*0Sstevel@tonic-gate 		 */
780*0Sstevel@tonic-gate 		if (bp->b_bcount == bp->b_bufsize) {
781*0Sstevel@tonic-gate 			if (!extend_write_bp(ul, cb, bp)) {
782*0Sstevel@tonic-gate 				push_dirty_bp(ul, bp);
783*0Sstevel@tonic-gate 				return (nb - nb_left);
784*0Sstevel@tonic-gate 			}
785*0Sstevel@tonic-gate 		}
786*0Sstevel@tonic-gate 	}
787*0Sstevel@tonic-gate 	if (nb_left)
788*0Sstevel@tonic-gate 		goto again;
789*0Sstevel@tonic-gate 
790*0Sstevel@tonic-gate 	sema_v(&bp->b_sem);
791*0Sstevel@tonic-gate 	return (nb);
792*0Sstevel@tonic-gate }
793*0Sstevel@tonic-gate 
794*0Sstevel@tonic-gate static void
795*0Sstevel@tonic-gate fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me)
796*0Sstevel@tonic-gate {
797*0Sstevel@tonic-gate 	offset_t	src_mof	= me->me_mof;
798*0Sstevel@tonic-gate 	size_t		src_nb	= me->me_nb;
799*0Sstevel@tonic-gate 
800*0Sstevel@tonic-gate 	if (src_mof > dst_mof) {
801*0Sstevel@tonic-gate 		ASSERT(src_mof < (dst_mof + dst_nb));
802*0Sstevel@tonic-gate 		dst_va += (src_mof - dst_mof);
803*0Sstevel@tonic-gate 		dst_nb -= (src_mof - dst_mof);
804*0Sstevel@tonic-gate 	} else {
805*0Sstevel@tonic-gate 		ASSERT(dst_mof < (src_mof + src_nb));
806*0Sstevel@tonic-gate 		src_nb -= (dst_mof - src_mof);
807*0Sstevel@tonic-gate 	}
808*0Sstevel@tonic-gate 
809*0Sstevel@tonic-gate 	src_nb = MIN(src_nb, dst_nb);
810*0Sstevel@tonic-gate 	ASSERT(src_nb);
811*0Sstevel@tonic-gate 	bzero(dst_va, src_nb);
812*0Sstevel@tonic-gate }
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate /*
815*0Sstevel@tonic-gate  * dst_va == NULL means don't copy anything
816*0Sstevel@tonic-gate  */
817*0Sstevel@tonic-gate static ulong_t
818*0Sstevel@tonic-gate fetchbuf(
819*0Sstevel@tonic-gate 	ml_unit_t *ul,
820*0Sstevel@tonic-gate 	buf_t *bp,
821*0Sstevel@tonic-gate 	caddr_t dst_va,
822*0Sstevel@tonic-gate 	size_t dst_nb,
823*0Sstevel@tonic-gate 	off_t *dst_lofp)
824*0Sstevel@tonic-gate {
825*0Sstevel@tonic-gate 	caddr_t	copy_va;
826*0Sstevel@tonic-gate 	size_t	copy_nb;
827*0Sstevel@tonic-gate 	size_t	nb_sec;
828*0Sstevel@tonic-gate 	off_t	dst_lof		= *dst_lofp;
829*0Sstevel@tonic-gate 	ulong_t	sav_dst_nb	= dst_nb;
830*0Sstevel@tonic-gate 	ulong_t	src_nb		= bp->b_bcount;
831*0Sstevel@tonic-gate 	off_t	src_lof		= dbtob(bp->b_blkno);
832*0Sstevel@tonic-gate 	off_t	src_elof	= src_lof + src_nb;
833*0Sstevel@tonic-gate 	caddr_t	src_va		= bp->b_un.b_addr;
834*0Sstevel@tonic-gate 
835*0Sstevel@tonic-gate 	/*
836*0Sstevel@tonic-gate 	 * copy from bp to dst_va
837*0Sstevel@tonic-gate 	 */
838*0Sstevel@tonic-gate 	while (dst_nb) {
839*0Sstevel@tonic-gate 		/*
840*0Sstevel@tonic-gate 		 * compute address within bp
841*0Sstevel@tonic-gate 		 */
842*0Sstevel@tonic-gate 		copy_va = src_va + (dst_lof - src_lof);
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate 		/*
845*0Sstevel@tonic-gate 		 * adjust copy size to amount of data in bp
846*0Sstevel@tonic-gate 		 */
847*0Sstevel@tonic-gate 		copy_nb = MIN(dst_nb, src_elof - dst_lof);
848*0Sstevel@tonic-gate 
849*0Sstevel@tonic-gate 		/*
850*0Sstevel@tonic-gate 		 * adjust copy size to amount of data in sector
851*0Sstevel@tonic-gate 		 */
852*0Sstevel@tonic-gate 		nb_sec = NB_LEFT_IN_SECTOR(dst_lof);
853*0Sstevel@tonic-gate 		copy_nb = MIN(copy_nb, nb_sec);
854*0Sstevel@tonic-gate 
855*0Sstevel@tonic-gate 		/*
856*0Sstevel@tonic-gate 		 * dst_va == NULL means don't do copy (see logseek())
857*0Sstevel@tonic-gate 		 */
858*0Sstevel@tonic-gate 		if (dst_va) {
859*0Sstevel@tonic-gate 			bcopy(copy_va, dst_va, copy_nb);
860*0Sstevel@tonic-gate 			dst_va += copy_nb;
861*0Sstevel@tonic-gate 		}
862*0Sstevel@tonic-gate 		dst_lof += copy_nb;
863*0Sstevel@tonic-gate 		dst_nb -= copy_nb;
864*0Sstevel@tonic-gate 		nb_sec -= copy_nb;
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 		/*
867*0Sstevel@tonic-gate 		 * advance over sector trailer
868*0Sstevel@tonic-gate 		 */
869*0Sstevel@tonic-gate 		if (nb_sec == 0)
870*0Sstevel@tonic-gate 			dst_lof += sizeof (sect_trailer_t);
871*0Sstevel@tonic-gate 
872*0Sstevel@tonic-gate 		/*
873*0Sstevel@tonic-gate 		 * exhausted buffer
874*0Sstevel@tonic-gate 		 *	return current lof for next read
875*0Sstevel@tonic-gate 		 */
876*0Sstevel@tonic-gate 		if (dst_lof == src_elof) {
877*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
878*0Sstevel@tonic-gate 			if (dst_lof == ul->un_eol_lof)
879*0Sstevel@tonic-gate 				dst_lof = ul->un_bol_lof;
880*0Sstevel@tonic-gate 			*dst_lofp = dst_lof;
881*0Sstevel@tonic-gate 			return (sav_dst_nb - dst_nb);
882*0Sstevel@tonic-gate 		}
883*0Sstevel@tonic-gate 	}
884*0Sstevel@tonic-gate 
885*0Sstevel@tonic-gate 	/*
886*0Sstevel@tonic-gate 	 * copy complete - return current lof
887*0Sstevel@tonic-gate 	 */
888*0Sstevel@tonic-gate 	sema_v(&bp->b_sem);
889*0Sstevel@tonic-gate 	*dst_lofp = dst_lof;
890*0Sstevel@tonic-gate 	return (sav_dst_nb);
891*0Sstevel@tonic-gate }
892*0Sstevel@tonic-gate 
893*0Sstevel@tonic-gate void
894*0Sstevel@tonic-gate ldl_round_commit(ml_unit_t *ul)
895*0Sstevel@tonic-gate {
896*0Sstevel@tonic-gate 	int		wrapped;
897*0Sstevel@tonic-gate 	buf_t		*bp;
898*0Sstevel@tonic-gate 	sect_trailer_t	*st;
899*0Sstevel@tonic-gate 	size_t		bcount;
900*0Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
901*0Sstevel@tonic-gate 
902*0Sstevel@tonic-gate 	/*
903*0Sstevel@tonic-gate 	 * if nothing to write; then do nothing
904*0Sstevel@tonic-gate 	 */
905*0Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) == NULL)
906*0Sstevel@tonic-gate 		return;
907*0Sstevel@tonic-gate 	makebusy(ul, bp);
908*0Sstevel@tonic-gate 
909*0Sstevel@tonic-gate 	/*
910*0Sstevel@tonic-gate 	 * round up to sector boundary and set new tail
911*0Sstevel@tonic-gate 	 *	don't readjust st_ident if buf is already rounded
912*0Sstevel@tonic-gate 	 */
913*0Sstevel@tonic-gate 	bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE);
914*0Sstevel@tonic-gate 	if (bcount == bp->b_bcount) {
915*0Sstevel@tonic-gate 		sema_v(&bp->b_sem);
916*0Sstevel@tonic-gate 		return;
917*0Sstevel@tonic-gate 	}
918*0Sstevel@tonic-gate 	bp->b_bcount = bcount;
919*0Sstevel@tonic-gate 	ul->un_tail_lof = dbtob(bp->b_blkno) + bcount;
920*0Sstevel@tonic-gate 	wrapped = 0;
921*0Sstevel@tonic-gate 	if (ul->un_tail_lof == ul->un_eol_lof) {
922*0Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_bol_lof;
923*0Sstevel@tonic-gate 		++wrapped;
924*0Sstevel@tonic-gate 	}
925*0Sstevel@tonic-gate 	ASSERT(ul->un_tail_lof != ul->un_head_lof);
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 	/*
928*0Sstevel@tonic-gate 	 * fix up the sector trailer
929*0Sstevel@tonic-gate 	 */
930*0Sstevel@tonic-gate 	/* LINTED */
931*0Sstevel@tonic-gate 	st = (sect_trailer_t *)
932*0Sstevel@tonic-gate 		((bp->b_un.b_addr + bcount) - sizeof (*st));
933*0Sstevel@tonic-gate 	st->st_tid = ul->un_logmap->mtm_tid;
934*0Sstevel@tonic-gate 	st->st_ident = ul->un_tail_ident++;
935*0Sstevel@tonic-gate 
936*0Sstevel@tonic-gate 	/*
937*0Sstevel@tonic-gate 	 * if tail wrapped or we have exhausted this buffer
938*0Sstevel@tonic-gate 	 *	async write the buffer
939*0Sstevel@tonic-gate 	 */
940*0Sstevel@tonic-gate 	if (wrapped || bcount == bp->b_bufsize)
941*0Sstevel@tonic-gate 		push_dirty_bp(ul, bp);
942*0Sstevel@tonic-gate 	else
943*0Sstevel@tonic-gate 		sema_v(&bp->b_sem);
944*0Sstevel@tonic-gate }
945*0Sstevel@tonic-gate 
946*0Sstevel@tonic-gate void
947*0Sstevel@tonic-gate ldl_push_commit(ml_unit_t *ul)
948*0Sstevel@tonic-gate {
949*0Sstevel@tonic-gate 	buf_t		*bp;
950*0Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
951*0Sstevel@tonic-gate 
952*0Sstevel@tonic-gate 	/*
953*0Sstevel@tonic-gate 	 * if nothing to write; then do nothing
954*0Sstevel@tonic-gate 	 */
955*0Sstevel@tonic-gate 	if ((bp = cb->cb_dirty) == NULL)
956*0Sstevel@tonic-gate 		return;
957*0Sstevel@tonic-gate 	makebusy(ul, bp);
958*0Sstevel@tonic-gate 	push_dirty_bp(ul, bp);
959*0Sstevel@tonic-gate }
960*0Sstevel@tonic-gate 
961*0Sstevel@tonic-gate int
962*0Sstevel@tonic-gate ldl_need_commit(ml_unit_t *ul)
963*0Sstevel@tonic-gate {
964*0Sstevel@tonic-gate 	return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2)));
965*0Sstevel@tonic-gate }
966*0Sstevel@tonic-gate 
967*0Sstevel@tonic-gate int
968*0Sstevel@tonic-gate ldl_has_space(ml_unit_t *ul, mapentry_t *me)
969*0Sstevel@tonic-gate {
970*0Sstevel@tonic-gate 	off_t	nfb;
971*0Sstevel@tonic-gate 	off_t	nb;
972*0Sstevel@tonic-gate 
973*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
974*0Sstevel@tonic-gate 
975*0Sstevel@tonic-gate 	/*
976*0Sstevel@tonic-gate 	 * Add up the size used by the deltas
977*0Sstevel@tonic-gate 	 * round nb up to a sector length plus an extra sector
978*0Sstevel@tonic-gate 	 *	w/o the extra sector we couldn't distinguish
979*0Sstevel@tonic-gate 	 *	a full log (head == tail) from an empty log (head == tail)
980*0Sstevel@tonic-gate 	 */
981*0Sstevel@tonic-gate 	for (nb = DEV_BSIZE; me; me = me->me_hash) {
982*0Sstevel@tonic-gate 		nb += sizeof (struct delta);
983*0Sstevel@tonic-gate 		if (me->me_dt != DT_CANCEL)
984*0Sstevel@tonic-gate 			nb += me->me_nb;
985*0Sstevel@tonic-gate 	}
986*0Sstevel@tonic-gate 	nb = P2ROUNDUP(nb, DEV_BSIZE);
987*0Sstevel@tonic-gate 
988*0Sstevel@tonic-gate 	if (ul->un_head_lof <= ul->un_tail_lof)
989*0Sstevel@tonic-gate 		nfb = (ul->un_head_lof - ul->un_bol_lof) +
990*0Sstevel@tonic-gate 			(ul->un_eol_lof - ul->un_tail_lof);
991*0Sstevel@tonic-gate 	else
992*0Sstevel@tonic-gate 		nfb = ul->un_head_lof - ul->un_tail_lof;
993*0Sstevel@tonic-gate 
994*0Sstevel@tonic-gate 	return (nb < nfb);
995*0Sstevel@tonic-gate }
996*0Sstevel@tonic-gate 
997*0Sstevel@tonic-gate void
998*0Sstevel@tonic-gate ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me)
999*0Sstevel@tonic-gate {
1000*0Sstevel@tonic-gate 	buf_t		*bp;
1001*0Sstevel@tonic-gate 	caddr_t		va;
1002*0Sstevel@tonic-gate 	size_t		nb;
1003*0Sstevel@tonic-gate 	size_t		actual;
1004*0Sstevel@tonic-gate 
1005*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1006*0Sstevel@tonic-gate 
1007*0Sstevel@tonic-gate 	/* Write the delta */
1008*0Sstevel@tonic-gate 
1009*0Sstevel@tonic-gate 	nb = sizeof (struct delta);
1010*0Sstevel@tonic-gate 	va = (caddr_t)&me->me_delta;
1011*0Sstevel@tonic-gate 	bp = get_write_bp(ul);
1012*0Sstevel@tonic-gate 
1013*0Sstevel@tonic-gate 	while (nb) {
1014*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
1015*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1016*0Sstevel@tonic-gate 			return;
1017*0Sstevel@tonic-gate 		}
1018*0Sstevel@tonic-gate 		actual = storebuf(ul, bp, va, nb);
1019*0Sstevel@tonic-gate 		ASSERT(actual);
1020*0Sstevel@tonic-gate 		va += actual;
1021*0Sstevel@tonic-gate 		nb -= actual;
1022*0Sstevel@tonic-gate 		if (nb)
1023*0Sstevel@tonic-gate 			bp = get_write_bp(ul);
1024*0Sstevel@tonic-gate 	}
1025*0Sstevel@tonic-gate 
1026*0Sstevel@tonic-gate 	/* If a commit, cancel, or 0's; we're almost done */
1027*0Sstevel@tonic-gate 	switch (me->me_dt) {
1028*0Sstevel@tonic-gate 		case DT_COMMIT:
1029*0Sstevel@tonic-gate 		case DT_CANCEL:
1030*0Sstevel@tonic-gate 		case DT_ABZERO:
1031*0Sstevel@tonic-gate 			/* roll needs to know where the next delta will go */
1032*0Sstevel@tonic-gate 			me->me_lof = ul->un_tail_lof;
1033*0Sstevel@tonic-gate 			return;
1034*0Sstevel@tonic-gate 		default:
1035*0Sstevel@tonic-gate 			break;
1036*0Sstevel@tonic-gate 	}
1037*0Sstevel@tonic-gate 
1038*0Sstevel@tonic-gate 	/* Now write the data */
1039*0Sstevel@tonic-gate 
1040*0Sstevel@tonic-gate 	ASSERT(me->me_nb != 0);
1041*0Sstevel@tonic-gate 
1042*0Sstevel@tonic-gate 	nb = me->me_nb;
1043*0Sstevel@tonic-gate 	va = (me->me_mof - bufmof) + bufp;
1044*0Sstevel@tonic-gate 	bp = get_write_bp(ul);
1045*0Sstevel@tonic-gate 
1046*0Sstevel@tonic-gate 	/* Save where we will put the data */
1047*0Sstevel@tonic-gate 	me->me_lof = ul->un_tail_lof;
1048*0Sstevel@tonic-gate 
1049*0Sstevel@tonic-gate 	while (nb) {
1050*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
1051*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1052*0Sstevel@tonic-gate 			return;
1053*0Sstevel@tonic-gate 		}
1054*0Sstevel@tonic-gate 		actual = storebuf(ul, bp, va, nb);
1055*0Sstevel@tonic-gate 		ASSERT(actual);
1056*0Sstevel@tonic-gate 		va += actual;
1057*0Sstevel@tonic-gate 		nb -= actual;
1058*0Sstevel@tonic-gate 		if (nb)
1059*0Sstevel@tonic-gate 			bp = get_write_bp(ul);
1060*0Sstevel@tonic-gate 	}
1061*0Sstevel@tonic-gate }
1062*0Sstevel@tonic-gate 
1063*0Sstevel@tonic-gate void
1064*0Sstevel@tonic-gate ldl_waito(ml_unit_t *ul)
1065*0Sstevel@tonic-gate {
1066*0Sstevel@tonic-gate 	buf_t		*bp;
1067*0Sstevel@tonic-gate 	cirbuf_t	*cb	= &ul->un_wrbuf;
1068*0Sstevel@tonic-gate 
1069*0Sstevel@tonic-gate 	rw_enter(&cb->cb_rwlock, RW_WRITER);
1070*0Sstevel@tonic-gate 	/*
1071*0Sstevel@tonic-gate 	 * wait on them
1072*0Sstevel@tonic-gate 	 */
1073*0Sstevel@tonic-gate 	bp = cb->cb_bp;
1074*0Sstevel@tonic-gate 	do {
1075*0Sstevel@tonic-gate 		if ((bp->b_flags & B_DONE) == 0) {
1076*0Sstevel@tonic-gate 			makebusy(ul, bp);
1077*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1078*0Sstevel@tonic-gate 		}
1079*0Sstevel@tonic-gate 		bp = bp->b_forw;
1080*0Sstevel@tonic-gate 	} while (bp != cb->cb_bp);
1081*0Sstevel@tonic-gate 	rw_exit(&cb->cb_rwlock);
1082*0Sstevel@tonic-gate }
1083*0Sstevel@tonic-gate 
1084*0Sstevel@tonic-gate /*
1085*0Sstevel@tonic-gate  * seek nb bytes from location lof
1086*0Sstevel@tonic-gate  */
1087*0Sstevel@tonic-gate static int
1088*0Sstevel@tonic-gate logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp)
1089*0Sstevel@tonic-gate {
1090*0Sstevel@tonic-gate 	buf_t	*bp;
1091*0Sstevel@tonic-gate 	ulong_t	actual;
1092*0Sstevel@tonic-gate 
1093*0Sstevel@tonic-gate 	while (nb) {
1094*0Sstevel@tonic-gate 		bp = get_read_bp(ul, lof);
1095*0Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
1096*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1097*0Sstevel@tonic-gate 			return (EIO);
1098*0Sstevel@tonic-gate 		}
1099*0Sstevel@tonic-gate 		actual = fetchbuf(ul, bp, NULL, nb, &lof);
1100*0Sstevel@tonic-gate 		ASSERT(actual);
1101*0Sstevel@tonic-gate 		nb -= actual;
1102*0Sstevel@tonic-gate 	}
1103*0Sstevel@tonic-gate 	*lofp = lof;
1104*0Sstevel@tonic-gate 	ASSERT(nb == 0);
1105*0Sstevel@tonic-gate 	return (0);
1106*0Sstevel@tonic-gate }
1107*0Sstevel@tonic-gate 
1108*0Sstevel@tonic-gate int
1109*0Sstevel@tonic-gate ldl_read(
1110*0Sstevel@tonic-gate 	ml_unit_t *ul,		/* Log unit */
1111*0Sstevel@tonic-gate 	caddr_t va,		/* address of buffer to read into */
1112*0Sstevel@tonic-gate 	offset_t mof,		/* mof of buffer */
1113*0Sstevel@tonic-gate 	off_t nb,		/* length of buffer */
1114*0Sstevel@tonic-gate 	mapentry_t *me)		/* Map entry list */
1115*0Sstevel@tonic-gate {
1116*0Sstevel@tonic-gate 	buf_t	*bp;
1117*0Sstevel@tonic-gate 	crb_t   *crb;
1118*0Sstevel@tonic-gate 	caddr_t	rva;			/* address to read into */
1119*0Sstevel@tonic-gate 	size_t	rnb;			/* # of bytes to read */
1120*0Sstevel@tonic-gate 	off_t	lof;			/* log device offset to read from */
1121*0Sstevel@tonic-gate 	off_t   skip;
1122*0Sstevel@tonic-gate 	ulong_t	actual;
1123*0Sstevel@tonic-gate 	int	error;
1124*0Sstevel@tonic-gate 	caddr_t	eva	= va + nb;	/* end of buffer */
1125*0Sstevel@tonic-gate 
1126*0Sstevel@tonic-gate 	for (; me; me = me->me_agenext) {
1127*0Sstevel@tonic-gate 		ASSERT(me->me_dt != DT_CANCEL);
1128*0Sstevel@tonic-gate 
1129*0Sstevel@tonic-gate 		/*
1130*0Sstevel@tonic-gate 		 * check for an cached roll buffer
1131*0Sstevel@tonic-gate 		 */
1132*0Sstevel@tonic-gate 		crb = me->me_crb;
1133*0Sstevel@tonic-gate 		if (crb) {
1134*0Sstevel@tonic-gate 			if (mof > crb->c_mof) {
1135*0Sstevel@tonic-gate 				/*
1136*0Sstevel@tonic-gate 				 * This mapentry overlaps with the beginning of
1137*0Sstevel@tonic-gate 				 * the supplied buffer
1138*0Sstevel@tonic-gate 				 */
1139*0Sstevel@tonic-gate 				skip = mof - crb->c_mof;
1140*0Sstevel@tonic-gate 				bcopy(crb->c_buf + skip, va,
1141*0Sstevel@tonic-gate 				    MIN(nb, crb->c_nb - skip));
1142*0Sstevel@tonic-gate 			} else {
1143*0Sstevel@tonic-gate 				/*
1144*0Sstevel@tonic-gate 				 * This mapentry starts at or after
1145*0Sstevel@tonic-gate 				 * the supplied buffer.
1146*0Sstevel@tonic-gate 				 */
1147*0Sstevel@tonic-gate 				skip = crb->c_mof - mof;
1148*0Sstevel@tonic-gate 				bcopy(crb->c_buf, va + skip,
1149*0Sstevel@tonic-gate 				    MIN(crb->c_nb, nb - skip));
1150*0Sstevel@tonic-gate 			}
1151*0Sstevel@tonic-gate 			logstats.ls_lreadsinmem.value.ui64++;
1152*0Sstevel@tonic-gate 			continue;
1153*0Sstevel@tonic-gate 		}
1154*0Sstevel@tonic-gate 
1155*0Sstevel@tonic-gate 		/*
1156*0Sstevel@tonic-gate 		 * check for a delta full of zeroes - there's no log data
1157*0Sstevel@tonic-gate 		 */
1158*0Sstevel@tonic-gate 		if (me->me_dt == DT_ABZERO) {
1159*0Sstevel@tonic-gate 			fetchzeroes(va, mof, nb, me);
1160*0Sstevel@tonic-gate 			continue;
1161*0Sstevel@tonic-gate 		}
1162*0Sstevel@tonic-gate 
1163*0Sstevel@tonic-gate 		if (mof > me->me_mof) {
1164*0Sstevel@tonic-gate 			rnb = (size_t)(mof - me->me_mof);
1165*0Sstevel@tonic-gate 			error = logseek(ul, me->me_lof, rnb, &lof);
1166*0Sstevel@tonic-gate 			if (error)
1167*0Sstevel@tonic-gate 				return (EIO);
1168*0Sstevel@tonic-gate 			rva = va;
1169*0Sstevel@tonic-gate 			rnb = me->me_nb - rnb;
1170*0Sstevel@tonic-gate 			rnb = ((rva + rnb) > eva) ? eva - rva : rnb;
1171*0Sstevel@tonic-gate 		} else {
1172*0Sstevel@tonic-gate 			lof = me->me_lof;
1173*0Sstevel@tonic-gate 			rva = (me->me_mof - mof) + va;
1174*0Sstevel@tonic-gate 			rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb;
1175*0Sstevel@tonic-gate 		}
1176*0Sstevel@tonic-gate 
1177*0Sstevel@tonic-gate 		while (rnb) {
1178*0Sstevel@tonic-gate 			bp = get_read_bp(ul, lof);
1179*0Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
1180*0Sstevel@tonic-gate 				sema_v(&bp->b_sem);
1181*0Sstevel@tonic-gate 				return (EIO);
1182*0Sstevel@tonic-gate 			}
1183*0Sstevel@tonic-gate 			ASSERT(((me->me_flags & ME_ROLL) == 0) ||
1184*0Sstevel@tonic-gate 				(bp != ul->un_wrbuf.cb_dirty));
1185*0Sstevel@tonic-gate 			actual = fetchbuf(ul, bp, rva, rnb, &lof);
1186*0Sstevel@tonic-gate 			ASSERT(actual);
1187*0Sstevel@tonic-gate 			rva += actual;
1188*0Sstevel@tonic-gate 			rnb -= actual;
1189*0Sstevel@tonic-gate 		}
1190*0Sstevel@tonic-gate 	}
1191*0Sstevel@tonic-gate 	return (0);
1192*0Sstevel@tonic-gate }
1193*0Sstevel@tonic-gate 
1194*0Sstevel@tonic-gate void
1195*0Sstevel@tonic-gate ldl_savestate(ml_unit_t *ul)
1196*0Sstevel@tonic-gate {
1197*0Sstevel@tonic-gate 	int		error;
1198*0Sstevel@tonic-gate 	buf_t		*bp	= ul->un_bp;
1199*0Sstevel@tonic-gate 	ml_odunit_t	*ud	= (void *)bp->b_un.b_addr;
1200*0Sstevel@tonic-gate 	ml_odunit_t	*ud2	= (void *)(bp->b_un.b_addr + DEV_BSIZE);
1201*0Sstevel@tonic-gate 
1202*0Sstevel@tonic-gate #if	DEBUG
1203*0Sstevel@tonic-gate 	/*
1204*0Sstevel@tonic-gate 	 * Scan test is running; don't update intermediate state
1205*0Sstevel@tonic-gate 	 */
1206*0Sstevel@tonic-gate 	if (ul->un_logmap && ul->un_logmap->mtm_trimlof)
1207*0Sstevel@tonic-gate 		return;
1208*0Sstevel@tonic-gate #endif	/* DEBUG */
1209*0Sstevel@tonic-gate 
1210*0Sstevel@tonic-gate 	mutex_enter(&ul->un_state_mutex);
1211*0Sstevel@tonic-gate 	bcopy(&ul->un_ondisk, ud, sizeof (*ud));
1212*0Sstevel@tonic-gate 	ud->od_chksum = ud->od_head_ident + ud->od_tail_ident;
1213*0Sstevel@tonic-gate 	bcopy(ud, ud2, sizeof (*ud));
1214*0Sstevel@tonic-gate 
1215*0Sstevel@tonic-gate 	/* If a snapshot is enabled write through the shapshot driver. */
1216*0Sstevel@tonic-gate 	if (ul->un_ufsvfs->vfs_snapshot)
1217*0Sstevel@tonic-gate 		UFS_BWRITE2(ul->un_ufsvfs, bp);
1218*0Sstevel@tonic-gate 	else
1219*0Sstevel@tonic-gate 		BWRITE2(bp);
1220*0Sstevel@tonic-gate 	logstats.ls_ldlwrites.value.ui64++;
1221*0Sstevel@tonic-gate 	error = bp->b_flags & B_ERROR;
1222*0Sstevel@tonic-gate 	mutex_exit(&ul->un_state_mutex);
1223*0Sstevel@tonic-gate 	if (error)
1224*0Sstevel@tonic-gate 		ldl_seterror(ul, "Error writing ufs log state");
1225*0Sstevel@tonic-gate }
1226*0Sstevel@tonic-gate 
1227*0Sstevel@tonic-gate /*
1228*0Sstevel@tonic-gate  * The head will be set to (new_lof - header) since ldl_sethead is
1229*0Sstevel@tonic-gate  * called with the new_lof of the data portion of a delta.
1230*0Sstevel@tonic-gate  */
1231*0Sstevel@tonic-gate void
1232*0Sstevel@tonic-gate ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid)
1233*0Sstevel@tonic-gate {
1234*0Sstevel@tonic-gate 	off_t		nb;
1235*0Sstevel@tonic-gate 	off_t		new_lof;
1236*0Sstevel@tonic-gate 	uint32_t	new_ident;
1237*0Sstevel@tonic-gate 	daddr_t		beg_blkno;
1238*0Sstevel@tonic-gate 	daddr_t		end_blkno;
1239*0Sstevel@tonic-gate 	struct timeval	tv;
1240*0Sstevel@tonic-gate 
1241*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1242*0Sstevel@tonic-gate 
1243*0Sstevel@tonic-gate 	if (data_lof == -1) {
1244*0Sstevel@tonic-gate 		/* log is empty */
1245*0Sstevel@tonic-gate 		uniqtime(&tv);
1246*0Sstevel@tonic-gate 		if (tv.tv_usec == ul->un_head_ident) {
1247*0Sstevel@tonic-gate 			tv.tv_usec++;
1248*0Sstevel@tonic-gate 		}
1249*0Sstevel@tonic-gate 		last_loghead_ident = tv.tv_usec;
1250*0Sstevel@tonic-gate 		new_ident = tv.tv_usec;
1251*0Sstevel@tonic-gate 		new_lof = ul->un_tail_lof;
1252*0Sstevel@tonic-gate 
1253*0Sstevel@tonic-gate 	} else {
1254*0Sstevel@tonic-gate 		/* compute header's lof */
1255*0Sstevel@tonic-gate 		new_ident = ul->un_head_ident;
1256*0Sstevel@tonic-gate 		new_lof = data_lof - sizeof (struct delta);
1257*0Sstevel@tonic-gate 
1258*0Sstevel@tonic-gate 		/* whoops, header spans sectors; subtract out sector trailer */
1259*0Sstevel@tonic-gate 		if (btodb(new_lof) != btodb(data_lof))
1260*0Sstevel@tonic-gate 			new_lof -= sizeof (sect_trailer_t);
1261*0Sstevel@tonic-gate 
1262*0Sstevel@tonic-gate 		/* whoops, header wrapped the log; go to last sector */
1263*0Sstevel@tonic-gate 		if (new_lof < ul->un_bol_lof) {
1264*0Sstevel@tonic-gate 			/* sector offset */
1265*0Sstevel@tonic-gate 			new_lof -= dbtob(btodb(new_lof));
1266*0Sstevel@tonic-gate 			/* add to last sector's lof */
1267*0Sstevel@tonic-gate 			new_lof += (ul->un_eol_lof - DEV_BSIZE);
1268*0Sstevel@tonic-gate 		}
1269*0Sstevel@tonic-gate 		ul->un_head_tid = tid;
1270*0Sstevel@tonic-gate 	}
1271*0Sstevel@tonic-gate 
1272*0Sstevel@tonic-gate 	/*
1273*0Sstevel@tonic-gate 	 * check for nop
1274*0Sstevel@tonic-gate 	 */
1275*0Sstevel@tonic-gate 	if (new_lof == ul->un_head_lof)
1276*0Sstevel@tonic-gate 		return;
1277*0Sstevel@tonic-gate 
1278*0Sstevel@tonic-gate 	/*
1279*0Sstevel@tonic-gate 	 * invalidate the affected bufs and calculate new ident
1280*0Sstevel@tonic-gate 	 */
1281*0Sstevel@tonic-gate 	if (new_lof > ul->un_head_lof) {
1282*0Sstevel@tonic-gate 		nb = new_lof - ul->un_head_lof;
1283*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
1284*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
1285*0Sstevel@tonic-gate 
1286*0Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
1287*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
1288*0Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
1289*0Sstevel@tonic-gate 	} else {
1290*0Sstevel@tonic-gate 		nb = ul->un_eol_lof - ul->un_head_lof;
1291*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb);
1292*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb);
1293*0Sstevel@tonic-gate 
1294*0Sstevel@tonic-gate 		end_blkno = btodb(ul->un_eol_lof);
1295*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
1296*0Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
1297*0Sstevel@tonic-gate 
1298*0Sstevel@tonic-gate 		nb = new_lof - ul->un_bol_lof;
1299*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb);
1300*0Sstevel@tonic-gate 		inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb);
1301*0Sstevel@tonic-gate 
1302*0Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
1303*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_bol_lof);
1304*0Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
1305*0Sstevel@tonic-gate 	}
1306*0Sstevel@tonic-gate 	/*
1307*0Sstevel@tonic-gate 	 * don't update the head if there has been an error
1308*0Sstevel@tonic-gate 	 */
1309*0Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
1310*0Sstevel@tonic-gate 		return;
1311*0Sstevel@tonic-gate 
1312*0Sstevel@tonic-gate 	/* Fix up the head and ident */
1313*0Sstevel@tonic-gate 	ASSERT(new_lof >= ul->un_bol_lof);
1314*0Sstevel@tonic-gate 	ul->un_head_lof = new_lof;
1315*0Sstevel@tonic-gate 	ul->un_head_ident = new_ident;
1316*0Sstevel@tonic-gate 	if (data_lof == -1) {
1317*0Sstevel@tonic-gate 		ul->un_tail_ident = ul->un_head_ident;
1318*0Sstevel@tonic-gate 	}
1319*0Sstevel@tonic-gate 
1320*0Sstevel@tonic-gate 
1321*0Sstevel@tonic-gate 	/* Commit to the database */
1322*0Sstevel@tonic-gate 	ldl_savestate(ul);
1323*0Sstevel@tonic-gate 
1324*0Sstevel@tonic-gate 	ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) ||
1325*0Sstevel@tonic-gate 		ldl_sethead_debug(ul));
1326*0Sstevel@tonic-gate }
1327*0Sstevel@tonic-gate 
1328*0Sstevel@tonic-gate /*
1329*0Sstevel@tonic-gate  * The tail will be set to the sector following lof+nb
1330*0Sstevel@tonic-gate  *	lof + nb == size of the last delta + commit record
1331*0Sstevel@tonic-gate  *	this function is called once after the log scan has completed.
1332*0Sstevel@tonic-gate  */
1333*0Sstevel@tonic-gate void
1334*0Sstevel@tonic-gate ldl_settail(ml_unit_t *ul, off_t lof, size_t nb)
1335*0Sstevel@tonic-gate {
1336*0Sstevel@tonic-gate 	off_t		new_lof;
1337*0Sstevel@tonic-gate 	uint32_t	new_ident;
1338*0Sstevel@tonic-gate 	daddr_t		beg_blkno;
1339*0Sstevel@tonic-gate 	daddr_t		end_blkno;
1340*0Sstevel@tonic-gate 	struct timeval	tv;
1341*0Sstevel@tonic-gate 
1342*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1343*0Sstevel@tonic-gate 
1344*0Sstevel@tonic-gate 	if (lof == -1) {
1345*0Sstevel@tonic-gate 		uniqtime(&tv);
1346*0Sstevel@tonic-gate 		if (tv.tv_usec == ul->un_head_ident) {
1347*0Sstevel@tonic-gate 			tv.tv_usec++;
1348*0Sstevel@tonic-gate 		}
1349*0Sstevel@tonic-gate 		last_loghead_ident = tv.tv_usec;
1350*0Sstevel@tonic-gate 		ul->un_tail_lof = dbtob(btodb(ul->un_head_lof));
1351*0Sstevel@tonic-gate 		ul->un_head_lof = ul->un_tail_lof;
1352*0Sstevel@tonic-gate 		ul->un_head_ident = tv.tv_usec;
1353*0Sstevel@tonic-gate 		ul->un_tail_ident = ul->un_head_ident;
1354*0Sstevel@tonic-gate 
1355*0Sstevel@tonic-gate 		/* Commit to the database */
1356*0Sstevel@tonic-gate 		ldl_savestate(ul);
1357*0Sstevel@tonic-gate 
1358*0Sstevel@tonic-gate 		return;
1359*0Sstevel@tonic-gate 	}
1360*0Sstevel@tonic-gate 
1361*0Sstevel@tonic-gate 	/*
1362*0Sstevel@tonic-gate 	 * new_lof is the offset of the sector following the last commit
1363*0Sstevel@tonic-gate 	 */
1364*0Sstevel@tonic-gate 	(void) logseek(ul, lof, nb, &new_lof);
1365*0Sstevel@tonic-gate 	ASSERT(new_lof != dbtob(btodb(ul->un_head_lof)));
1366*0Sstevel@tonic-gate 
1367*0Sstevel@tonic-gate 	/*
1368*0Sstevel@tonic-gate 	 * calculate new ident
1369*0Sstevel@tonic-gate 	 */
1370*0Sstevel@tonic-gate 	if (new_lof > ul->un_head_lof) {
1371*0Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
1372*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
1373*0Sstevel@tonic-gate 		new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
1374*0Sstevel@tonic-gate 	} else {
1375*0Sstevel@tonic-gate 		end_blkno = btodb(ul->un_eol_lof);
1376*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_head_lof);
1377*0Sstevel@tonic-gate 		new_ident = ul->un_head_ident + (end_blkno - beg_blkno);
1378*0Sstevel@tonic-gate 
1379*0Sstevel@tonic-gate 		end_blkno = btodb(new_lof);
1380*0Sstevel@tonic-gate 		beg_blkno = btodb(ul->un_bol_lof);
1381*0Sstevel@tonic-gate 		new_ident += (end_blkno - beg_blkno);
1382*0Sstevel@tonic-gate 	}
1383*0Sstevel@tonic-gate 
1384*0Sstevel@tonic-gate 	/* Fix up the tail and ident */
1385*0Sstevel@tonic-gate 	ul->un_tail_lof = new_lof;
1386*0Sstevel@tonic-gate 	ul->un_tail_ident = new_ident;
1387*0Sstevel@tonic-gate 
1388*0Sstevel@tonic-gate 	/* Commit to the database */
1389*0Sstevel@tonic-gate 	ldl_savestate(ul);
1390*0Sstevel@tonic-gate }
1391*0Sstevel@tonic-gate 
1392*0Sstevel@tonic-gate /*
1393*0Sstevel@tonic-gate  * LOGSCAN STUFF
1394*0Sstevel@tonic-gate  */
1395*0Sstevel@tonic-gate static int
1396*0Sstevel@tonic-gate ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof)
1397*0Sstevel@tonic-gate {
1398*0Sstevel@tonic-gate 	ulong_t		ident;
1399*0Sstevel@tonic-gate 	size_t		nblk, i;
1400*0Sstevel@tonic-gate 	sect_trailer_t	*st;
1401*0Sstevel@tonic-gate 
1402*0Sstevel@tonic-gate 	/*
1403*0Sstevel@tonic-gate 	 * compute ident for first sector in the buffer
1404*0Sstevel@tonic-gate 	 */
1405*0Sstevel@tonic-gate 	ident = ul->un_head_ident;
1406*0Sstevel@tonic-gate 	if (bp->b_blkno >= btodb(ul->un_head_lof)) {
1407*0Sstevel@tonic-gate 		ident += (bp->b_blkno - btodb(ul->un_head_lof));
1408*0Sstevel@tonic-gate 	} else {
1409*0Sstevel@tonic-gate 		ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof));
1410*0Sstevel@tonic-gate 		ident += (bp->b_blkno - btodb(ul->un_bol_lof));
1411*0Sstevel@tonic-gate 	}
1412*0Sstevel@tonic-gate 	/*
1413*0Sstevel@tonic-gate 	 * truncate the buffer down to the last valid sector
1414*0Sstevel@tonic-gate 	 */
1415*0Sstevel@tonic-gate 	nblk = btodb(bp->b_bcount);
1416*0Sstevel@tonic-gate 	bp->b_bcount = 0;
1417*0Sstevel@tonic-gate 	/* LINTED */
1418*0Sstevel@tonic-gate 	st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE);
1419*0Sstevel@tonic-gate 	for (i = 0; i < nblk; ++i) {
1420*0Sstevel@tonic-gate 		if (st->st_ident != ident)
1421*0Sstevel@tonic-gate 			break;
1422*0Sstevel@tonic-gate 
1423*0Sstevel@tonic-gate 		/* remember last valid tid for ldl_logscan_error() */
1424*0Sstevel@tonic-gate 		ul->un_tid = st->st_tid;
1425*0Sstevel@tonic-gate 
1426*0Sstevel@tonic-gate 		/* LINTED */
1427*0Sstevel@tonic-gate 		st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE);
1428*0Sstevel@tonic-gate 		++ident;
1429*0Sstevel@tonic-gate 		bp->b_bcount += DEV_BSIZE;
1430*0Sstevel@tonic-gate 	}
1431*0Sstevel@tonic-gate 	/*
1432*0Sstevel@tonic-gate 	 * make sure that lof is still within range
1433*0Sstevel@tonic-gate 	 */
1434*0Sstevel@tonic-gate 	return (within_range(lof, bp->b_blkno, bp->b_bcount));
1435*0Sstevel@tonic-gate }
1436*0Sstevel@tonic-gate 
1437*0Sstevel@tonic-gate ulong_t
1438*0Sstevel@tonic-gate ldl_logscan_nbcommit(off_t lof)
1439*0Sstevel@tonic-gate {
1440*0Sstevel@tonic-gate 	/*
1441*0Sstevel@tonic-gate 	 * lof is the offset following the commit header.  However,
1442*0Sstevel@tonic-gate 	 * if the commit header fell on the end-of-sector, then lof
1443*0Sstevel@tonic-gate 	 * has already been advanced to the beginning of the next
1444*0Sstevel@tonic-gate 	 * sector.  So do nothing.  Otherwise, return the remaining
1445*0Sstevel@tonic-gate 	 * bytes in the sector.
1446*0Sstevel@tonic-gate 	 */
1447*0Sstevel@tonic-gate 	if ((lof & (DEV_BSIZE - 1)) == 0)
1448*0Sstevel@tonic-gate 		return (0);
1449*0Sstevel@tonic-gate 	return (NB_LEFT_IN_SECTOR(lof));
1450*0Sstevel@tonic-gate }
1451*0Sstevel@tonic-gate 
1452*0Sstevel@tonic-gate int
1453*0Sstevel@tonic-gate ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va)
1454*0Sstevel@tonic-gate {
1455*0Sstevel@tonic-gate 	buf_t	*bp;
1456*0Sstevel@tonic-gate 	ulong_t	actual;
1457*0Sstevel@tonic-gate 
1458*0Sstevel@tonic-gate 	ASSERT(ul->un_head_lof != ul->un_tail_lof);
1459*0Sstevel@tonic-gate 
1460*0Sstevel@tonic-gate 	/*
1461*0Sstevel@tonic-gate 	 * Check the log data doesn't go out of bounds
1462*0Sstevel@tonic-gate 	 */
1463*0Sstevel@tonic-gate 	if (ul->un_head_lof < ul->un_tail_lof) {
1464*0Sstevel@tonic-gate 		if (!WITHIN(*lofp, nb, ul->un_head_lof,
1465*0Sstevel@tonic-gate 		    (ul->un_tail_lof - ul->un_head_lof))) {
1466*0Sstevel@tonic-gate 			return (EIO);
1467*0Sstevel@tonic-gate 		}
1468*0Sstevel@tonic-gate 	} else {
1469*0Sstevel@tonic-gate 		if (OVERLAP(*lofp, nb, ul->un_tail_lof,
1470*0Sstevel@tonic-gate 		    (ul->un_head_lof - ul->un_tail_lof))) {
1471*0Sstevel@tonic-gate 			return (EIO);
1472*0Sstevel@tonic-gate 		}
1473*0Sstevel@tonic-gate 	}
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate 	while (nb) {
1476*0Sstevel@tonic-gate 		bp = get_read_bp(ul, *lofp);
1477*0Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
1478*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1479*0Sstevel@tonic-gate 			return (EIO);
1480*0Sstevel@tonic-gate 		}
1481*0Sstevel@tonic-gate 		/*
1482*0Sstevel@tonic-gate 		 * out-of-seq idents means partial transaction
1483*0Sstevel@tonic-gate 		 *	panic, non-corrupting powerfail, ...
1484*0Sstevel@tonic-gate 		 */
1485*0Sstevel@tonic-gate 		if (!ldl_logscan_ident(ul, bp, *lofp)) {
1486*0Sstevel@tonic-gate 			sema_v(&bp->b_sem);
1487*0Sstevel@tonic-gate 			return (EIO);
1488*0Sstevel@tonic-gate 		}
1489*0Sstevel@tonic-gate 		/*
1490*0Sstevel@tonic-gate 		 * copy the header into the caller's buf
1491*0Sstevel@tonic-gate 		 */
1492*0Sstevel@tonic-gate 		actual = fetchbuf(ul, bp, va, nb, lofp);
1493*0Sstevel@tonic-gate 		if (va)
1494*0Sstevel@tonic-gate 			va += actual;
1495*0Sstevel@tonic-gate 		nb -= actual;
1496*0Sstevel@tonic-gate 	}
1497*0Sstevel@tonic-gate 	return (0);
1498*0Sstevel@tonic-gate }
1499*0Sstevel@tonic-gate 
1500*0Sstevel@tonic-gate void
1501*0Sstevel@tonic-gate ldl_logscan_begin(ml_unit_t *ul)
1502*0Sstevel@tonic-gate {
1503*0Sstevel@tonic-gate 	size_t	bufsize;
1504*0Sstevel@tonic-gate 
1505*0Sstevel@tonic-gate 	ASSERT(ul->un_wrbuf.cb_dirty == NULL);
1506*0Sstevel@tonic-gate 
1507*0Sstevel@tonic-gate 	/*
1508*0Sstevel@tonic-gate 	 * logscan has begun
1509*0Sstevel@tonic-gate 	 */
1510*0Sstevel@tonic-gate 	ul->un_flags |= LDL_SCAN;
1511*0Sstevel@tonic-gate 
1512*0Sstevel@tonic-gate 	/*
1513*0Sstevel@tonic-gate 	 * reset the circular bufs
1514*0Sstevel@tonic-gate 	 */
1515*0Sstevel@tonic-gate 	bufsize = ldl_bufsize(ul);
1516*0Sstevel@tonic-gate 	alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize);
1517*0Sstevel@tonic-gate 	alloc_wrbuf(&ul->un_wrbuf, bufsize);
1518*0Sstevel@tonic-gate 
1519*0Sstevel@tonic-gate 	/*
1520*0Sstevel@tonic-gate 	 * set the tail to reflect a full log
1521*0Sstevel@tonic-gate 	 */
1522*0Sstevel@tonic-gate 	ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE;
1523*0Sstevel@tonic-gate 
1524*0Sstevel@tonic-gate 	if (ul->un_tail_lof < ul->un_bol_lof)
1525*0Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE;
1526*0Sstevel@tonic-gate 	if (ul->un_tail_lof >= ul->un_eol_lof)
1527*0Sstevel@tonic-gate 		ul->un_tail_lof = ul->un_bol_lof;
1528*0Sstevel@tonic-gate 
1529*0Sstevel@tonic-gate 	/*
1530*0Sstevel@tonic-gate 	 * un_tid is used during error processing; it is initialized to
1531*0Sstevel@tonic-gate 	 * the tid of the delta at un_head_lof;
1532*0Sstevel@tonic-gate 	 */
1533*0Sstevel@tonic-gate 	ul->un_tid = ul->un_head_tid;
1534*0Sstevel@tonic-gate }
1535*0Sstevel@tonic-gate 
1536*0Sstevel@tonic-gate void
1537*0Sstevel@tonic-gate ldl_logscan_end(ml_unit_t *ul)
1538*0Sstevel@tonic-gate {
1539*0Sstevel@tonic-gate 	size_t	bufsize;
1540*0Sstevel@tonic-gate 
1541*0Sstevel@tonic-gate 	/*
1542*0Sstevel@tonic-gate 	 * reset the circular bufs
1543*0Sstevel@tonic-gate 	 */
1544*0Sstevel@tonic-gate 	bufsize = ldl_bufsize(ul);
1545*0Sstevel@tonic-gate 	alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE);
1546*0Sstevel@tonic-gate 	alloc_wrbuf(&ul->un_wrbuf, bufsize);
1547*0Sstevel@tonic-gate 
1548*0Sstevel@tonic-gate 	/*
1549*0Sstevel@tonic-gate 	 * Done w/scan
1550*0Sstevel@tonic-gate 	 */
1551*0Sstevel@tonic-gate 	ul->un_flags &= ~LDL_SCAN;
1552*0Sstevel@tonic-gate }
1553*0Sstevel@tonic-gate 
1554*0Sstevel@tonic-gate int
1555*0Sstevel@tonic-gate ldl_need_roll(ml_unit_t *ul)
1556*0Sstevel@tonic-gate {
1557*0Sstevel@tonic-gate 	off_t	busybytes;
1558*0Sstevel@tonic-gate 	off_t	head;
1559*0Sstevel@tonic-gate 	off_t	tail;
1560*0Sstevel@tonic-gate 	off_t	bol;
1561*0Sstevel@tonic-gate 	off_t	eol;
1562*0Sstevel@tonic-gate 	off_t	nb;
1563*0Sstevel@tonic-gate 
1564*0Sstevel@tonic-gate 	/*
1565*0Sstevel@tonic-gate 	 * snapshot the log state
1566*0Sstevel@tonic-gate 	 */
1567*0Sstevel@tonic-gate 	head = ul->un_head_lof;
1568*0Sstevel@tonic-gate 	tail = ul->un_tail_lof;
1569*0Sstevel@tonic-gate 	bol = ul->un_bol_lof;
1570*0Sstevel@tonic-gate 	eol = ul->un_eol_lof;
1571*0Sstevel@tonic-gate 	nb = ul->un_logsize;
1572*0Sstevel@tonic-gate 
1573*0Sstevel@tonic-gate 	/*
1574*0Sstevel@tonic-gate 	 * compute number of busy (inuse) bytes
1575*0Sstevel@tonic-gate 	 */
1576*0Sstevel@tonic-gate 	if (head <= tail)
1577*0Sstevel@tonic-gate 		busybytes = tail - head;
1578*0Sstevel@tonic-gate 	else
1579*0Sstevel@tonic-gate 		busybytes = (eol - head) + (tail - bol);
1580*0Sstevel@tonic-gate 
1581*0Sstevel@tonic-gate 	/*
1582*0Sstevel@tonic-gate 	 * return TRUE if > 75% full
1583*0Sstevel@tonic-gate 	 */
1584*0Sstevel@tonic-gate 	return (busybytes > (nb - (nb >> 2)));
1585*0Sstevel@tonic-gate }
1586*0Sstevel@tonic-gate 
1587*0Sstevel@tonic-gate void
1588*0Sstevel@tonic-gate ldl_seterror(ml_unit_t *ul, char *why)
1589*0Sstevel@tonic-gate {
1590*0Sstevel@tonic-gate 	/*
1591*0Sstevel@tonic-gate 	 * already in error state; do nothing
1592*0Sstevel@tonic-gate 	 */
1593*0Sstevel@tonic-gate 	if (ul->un_flags & LDL_ERROR)
1594*0Sstevel@tonic-gate 		return;
1595*0Sstevel@tonic-gate 
1596*0Sstevel@tonic-gate 	ul->un_flags |= LDL_ERROR;	/* incore */
1597*0Sstevel@tonic-gate 	ul->un_badlog = 1;		/* ondisk (cleared by fsck) */
1598*0Sstevel@tonic-gate 
1599*0Sstevel@tonic-gate 	/*
1600*0Sstevel@tonic-gate 	 * Commit to state sectors
1601*0Sstevel@tonic-gate 	 */
1602*0Sstevel@tonic-gate 	uniqtime(&ul->un_timestamp);
1603*0Sstevel@tonic-gate 	ldl_savestate(ul);
1604*0Sstevel@tonic-gate 
1605*0Sstevel@tonic-gate 	/* Pretty print */
1606*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "%s", why);
1607*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "ufs log for %s changed state to Error",
1608*0Sstevel@tonic-gate 	    ul->un_ufsvfs->vfs_fs->fs_fsmnt);
1609*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)",
1610*0Sstevel@tonic-gate 	    ul->un_ufsvfs->vfs_fs->fs_fsmnt);
1611*0Sstevel@tonic-gate 
1612*0Sstevel@tonic-gate 	/*
1613*0Sstevel@tonic-gate 	 * If we aren't in the middle of scan (aka snarf); tell ufs
1614*0Sstevel@tonic-gate 	 * to hard lock itself.
1615*0Sstevel@tonic-gate 	 */
1616*0Sstevel@tonic-gate 	if ((ul->un_flags & LDL_SCAN) == 0)
1617*0Sstevel@tonic-gate 		ufs_trans_onerror();
1618*0Sstevel@tonic-gate }
1619*0Sstevel@tonic-gate 
1620*0Sstevel@tonic-gate size_t
1621*0Sstevel@tonic-gate ldl_bufsize(ml_unit_t *ul)
1622*0Sstevel@tonic-gate {
1623*0Sstevel@tonic-gate 	size_t		bufsize;
1624*0Sstevel@tonic-gate 	extern uint32_t	ldl_minbufsize;
1625*0Sstevel@tonic-gate 
1626*0Sstevel@tonic-gate 	/*
1627*0Sstevel@tonic-gate 	 * initial guess is the maxtransfer value for this log device
1628*0Sstevel@tonic-gate 	 * 	increase if too small
1629*0Sstevel@tonic-gate 	 * 	decrease if too large
1630*0Sstevel@tonic-gate 	 */
1631*0Sstevel@tonic-gate 	bufsize = dbtob(btod(ul->un_maxtransfer));
1632*0Sstevel@tonic-gate 	if (bufsize < ldl_minbufsize)
1633*0Sstevel@tonic-gate 		bufsize = ldl_minbufsize;
1634*0Sstevel@tonic-gate 	if (bufsize > maxphys)
1635*0Sstevel@tonic-gate 		bufsize = maxphys;
1636*0Sstevel@tonic-gate 	if (bufsize > ul->un_maxtransfer)
1637*0Sstevel@tonic-gate 		bufsize = ul->un_maxtransfer;
1638*0Sstevel@tonic-gate 	return (bufsize);
1639*0Sstevel@tonic-gate }
1640