xref: /onnv-gate/usr/src/uts/common/fs/ufs/lufs_map.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
23*0Sstevel@tonic-gate 
24*0Sstevel@tonic-gate /*
25*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26*0Sstevel@tonic-gate  * Use is subject to license terms.
27*0Sstevel@tonic-gate  */
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/systm.h>
30*0Sstevel@tonic-gate #include <sys/types.h>
31*0Sstevel@tonic-gate #include <sys/vnode.h>
32*0Sstevel@tonic-gate #include <sys/errno.h>
33*0Sstevel@tonic-gate #include <sys/sysmacros.h>
34*0Sstevel@tonic-gate #include <sys/debug.h>
35*0Sstevel@tonic-gate #include <sys/kmem.h>
36*0Sstevel@tonic-gate #include <sys/conf.h>
37*0Sstevel@tonic-gate #include <sys/proc.h>
38*0Sstevel@tonic-gate #include <sys/cmn_err.h>
39*0Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
40*0Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
41*0Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
42*0Sstevel@tonic-gate #include <sys/inttypes.h>
43*0Sstevel@tonic-gate #include <sys/atomic.h>
44*0Sstevel@tonic-gate #include <sys/tuneable.h>
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate /*
47*0Sstevel@tonic-gate  * externs
48*0Sstevel@tonic-gate  */
49*0Sstevel@tonic-gate extern pri_t minclsyspri;
50*0Sstevel@tonic-gate extern struct kmem_cache *lufs_bp;
51*0Sstevel@tonic-gate extern int ufs_trans_push_quota();
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate /*
54*0Sstevel@tonic-gate  * globals
55*0Sstevel@tonic-gate  */
56*0Sstevel@tonic-gate kmem_cache_t *mapentry_cache;
57*0Sstevel@tonic-gate 
58*0Sstevel@tonic-gate /*
59*0Sstevel@tonic-gate  * logmap tuning constants
60*0Sstevel@tonic-gate  */
61*0Sstevel@tonic-gate long	logmap_maxnme_commit	= 2048;
62*0Sstevel@tonic-gate long	logmap_maxnme_async	= 4096;
63*0Sstevel@tonic-gate long	logmap_maxnme_sync	= 6144;
64*0Sstevel@tonic-gate long	logmap_maxcfrag_commit	= 4;	/* Max canceled fragments per moby */
65*0Sstevel@tonic-gate 
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate uint64_t ufs_crb_size = 0;		/* current size of all crb buffers */
68*0Sstevel@tonic-gate uint64_t ufs_crb_max_size = 0;		/* highest crb buffer use so far */
69*0Sstevel@tonic-gate size_t ufs_crb_limit;			/* max allowable size for crbs */
70*0Sstevel@tonic-gate uint64_t ufs_crb_alloc_fails = 0;	/* crb allocation failures stat */
71*0Sstevel@tonic-gate #define	UFS_MAX_CRB_DEFAULT_DIVISOR 10	/* max 1/10 kmem_maxavail() */
72*0Sstevel@tonic-gate int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */
73*0Sstevel@tonic-gate void handle_dquot(mapentry_t *);
74*0Sstevel@tonic-gate 
75*0Sstevel@tonic-gate /*
76*0Sstevel@tonic-gate  * GENERIC MAP ROUTINES
77*0Sstevel@tonic-gate  */
78*0Sstevel@tonic-gate 
79*0Sstevel@tonic-gate #define	CRB_FREE(crb, me) \
80*0Sstevel@tonic-gate 	kmem_free(crb->c_buf, crb->c_nb); \
81*0Sstevel@tonic-gate 	atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
82*0Sstevel@tonic-gate 	kmem_free(crb, sizeof (crb_t)); \
83*0Sstevel@tonic-gate 	(me)->me_crb = NULL;
84*0Sstevel@tonic-gate 
85*0Sstevel@tonic-gate #define	CRB_RELE(me) { \
86*0Sstevel@tonic-gate 	crb_t *crb = (me)->me_crb; \
87*0Sstevel@tonic-gate 	if (crb && (--crb->c_refcnt == 0)) { \
88*0Sstevel@tonic-gate 		CRB_FREE(crb, me) \
89*0Sstevel@tonic-gate 	} \
90*0Sstevel@tonic-gate }
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate /*
93*0Sstevel@tonic-gate  * Check that the old delta has an argument and a push function of
94*0Sstevel@tonic-gate  * ufs_trans_push_quota(), then check that the old and new deltas differ.
95*0Sstevel@tonic-gate  * If so we clean up with handle_dquot() before replacing the old delta.
96*0Sstevel@tonic-gate  */
97*0Sstevel@tonic-gate #define	HANDLE_DQUOT(me, melist) { \
98*0Sstevel@tonic-gate 	if ((me->me_arg) && \
99*0Sstevel@tonic-gate 	    (me->me_func == ufs_trans_push_quota)) { \
100*0Sstevel@tonic-gate 		if (!((me->me_dt == melist->me_dt) && \
101*0Sstevel@tonic-gate 		    (me->me_arg == melist->me_arg) && \
102*0Sstevel@tonic-gate 		    (me->me_func == melist->me_func))) { \
103*0Sstevel@tonic-gate 			handle_dquot(me); \
104*0Sstevel@tonic-gate 		} \
105*0Sstevel@tonic-gate 	} \
106*0Sstevel@tonic-gate }
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate /*
109*0Sstevel@tonic-gate  * free up all the mapentries for a map
110*0Sstevel@tonic-gate  */
111*0Sstevel@tonic-gate void
112*0Sstevel@tonic-gate map_free_entries(mt_map_t *mtm)
113*0Sstevel@tonic-gate {
114*0Sstevel@tonic-gate 	int		i;
115*0Sstevel@tonic-gate 	mapentry_t	*me;
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
118*0Sstevel@tonic-gate 		me->me_next->me_prev = me->me_prev;
119*0Sstevel@tonic-gate 		me->me_prev->me_next = me->me_next;
120*0Sstevel@tonic-gate 		CRB_RELE(me);
121*0Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
122*0Sstevel@tonic-gate 	}
123*0Sstevel@tonic-gate 	for (i = 0; i < mtm->mtm_nhash; i++)
124*0Sstevel@tonic-gate 		mtm->mtm_hash[i] = NULL;
125*0Sstevel@tonic-gate 	mtm->mtm_nme = 0;
126*0Sstevel@tonic-gate 	mtm->mtm_nmet = 0;
127*0Sstevel@tonic-gate }
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate /*
130*0Sstevel@tonic-gate  * done with map; free if necessary
131*0Sstevel@tonic-gate  */
132*0Sstevel@tonic-gate mt_map_t *
133*0Sstevel@tonic-gate map_put(mt_map_t *mtm)
134*0Sstevel@tonic-gate {
135*0Sstevel@tonic-gate 	/*
136*0Sstevel@tonic-gate 	 * free up the map's memory
137*0Sstevel@tonic-gate 	 */
138*0Sstevel@tonic-gate 	map_free_entries(mtm);
139*0Sstevel@tonic-gate 	ASSERT(map_put_debug(mtm));
140*0Sstevel@tonic-gate 	kmem_free(mtm->mtm_hash,
141*0Sstevel@tonic-gate 		(size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
142*0Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_mutex);
143*0Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_scan_mutex);
144*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_to_roll_cv);
145*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_from_roll_cv);
146*0Sstevel@tonic-gate 	rw_destroy(&mtm->mtm_rwlock);
147*0Sstevel@tonic-gate 	mutex_destroy(&mtm->mtm_lock);
148*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_commit);
149*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_next);
150*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv_eot);
151*0Sstevel@tonic-gate 	cv_destroy(&mtm->mtm_cv);
152*0Sstevel@tonic-gate 	kmem_free(mtm, sizeof (mt_map_t));
153*0Sstevel@tonic-gate 	return (NULL);
154*0Sstevel@tonic-gate }
155*0Sstevel@tonic-gate /*
156*0Sstevel@tonic-gate  * Allocate a map;
157*0Sstevel@tonic-gate  */
158*0Sstevel@tonic-gate mt_map_t *
159*0Sstevel@tonic-gate map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
160*0Sstevel@tonic-gate {
161*0Sstevel@tonic-gate 	mt_map_t	*mtm;
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate 	/*
164*0Sstevel@tonic-gate 	 * assume the map is not here and allocate the necessary structs
165*0Sstevel@tonic-gate 	 */
166*0Sstevel@tonic-gate 	mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
167*0Sstevel@tonic-gate 	mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
168*0Sstevel@tonic-gate 	mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
169*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
170*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
171*0Sstevel@tonic-gate 	rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
172*0Sstevel@tonic-gate 	mtm->mtm_next = (mapentry_t *)mtm;
173*0Sstevel@tonic-gate 	mtm->mtm_prev = (mapentry_t *)mtm;
174*0Sstevel@tonic-gate 	mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
175*0Sstevel@tonic-gate 	    KM_SLEEP);
176*0Sstevel@tonic-gate 	mtm->mtm_nhash = nh;
177*0Sstevel@tonic-gate 	mtm->mtm_debug = ul->un_debug;
178*0Sstevel@tonic-gate 	mtm->mtm_type = maptype;
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	mtm->mtm_cfrags = 0;
181*0Sstevel@tonic-gate 	mtm->mtm_cfragmax = logmap_maxcfrag_commit;
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate 	/*
184*0Sstevel@tonic-gate 	 * for scan test
185*0Sstevel@tonic-gate 	 */
186*0Sstevel@tonic-gate 	mtm->mtm_ul = ul;
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate 	/*
189*0Sstevel@tonic-gate 	 * Initialize locks
190*0Sstevel@tonic-gate 	 */
191*0Sstevel@tonic-gate 	mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
192*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
193*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
194*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
195*0Sstevel@tonic-gate 	cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
196*0Sstevel@tonic-gate 	ASSERT(map_get_debug(ul, mtm));
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate 	return (mtm);
199*0Sstevel@tonic-gate }
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate /*
202*0Sstevel@tonic-gate  * DELTAMAP ROUTINES
203*0Sstevel@tonic-gate  */
204*0Sstevel@tonic-gate /*
205*0Sstevel@tonic-gate  * deltamap tuning constants
206*0Sstevel@tonic-gate  */
207*0Sstevel@tonic-gate long	deltamap_maxnme	= 1024;	/* global so it can be set */
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate int
210*0Sstevel@tonic-gate deltamap_need_commit(mt_map_t *mtm)
211*0Sstevel@tonic-gate {
212*0Sstevel@tonic-gate 	return (mtm->mtm_nme > deltamap_maxnme);
213*0Sstevel@tonic-gate }
214*0Sstevel@tonic-gate 
215*0Sstevel@tonic-gate /*
216*0Sstevel@tonic-gate  * put a delta into a deltamap; may sleep on memory
217*0Sstevel@tonic-gate  */
218*0Sstevel@tonic-gate void
219*0Sstevel@tonic-gate deltamap_add(
220*0Sstevel@tonic-gate 	mt_map_t *mtm,
221*0Sstevel@tonic-gate 	offset_t mof,
222*0Sstevel@tonic-gate 	off_t nb,
223*0Sstevel@tonic-gate 	delta_t dtyp,
224*0Sstevel@tonic-gate 	int (*func)(),
225*0Sstevel@tonic-gate 	ulong_t arg,
226*0Sstevel@tonic-gate 	threadtrans_t *tp)
227*0Sstevel@tonic-gate {
228*0Sstevel@tonic-gate 	int32_t		hnb;
229*0Sstevel@tonic-gate 	mapentry_t	*me;
230*0Sstevel@tonic-gate 	mapentry_t	**mep;
231*0Sstevel@tonic-gate 
232*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
233*0Sstevel@tonic-gate 		map_check_linkage(mtm));
234*0Sstevel@tonic-gate 
235*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
238*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
239*0Sstevel@tonic-gate 		if (hnb > nb)
240*0Sstevel@tonic-gate 			hnb = nb;
241*0Sstevel@tonic-gate 		/*
242*0Sstevel@tonic-gate 		 * Search for dup entry. We need to ensure that we don't
243*0Sstevel@tonic-gate 		 * replace a map entry which carries quota information
244*0Sstevel@tonic-gate 		 * with a map entry which doesn't. In that case we lose
245*0Sstevel@tonic-gate 		 * reference the the dquot structure which will not be
246*0Sstevel@tonic-gate 		 * cleaned up by the push function me->me_func as this will
247*0Sstevel@tonic-gate 		 * never be called.
248*0Sstevel@tonic-gate 		 * The stray dquot would be found later by invalidatedq()
249*0Sstevel@tonic-gate 		 * causing a panic when the filesystem is unmounted.
250*0Sstevel@tonic-gate 		 */
251*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
252*0Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
253*0Sstevel@tonic-gate 			if (DATAwithinME(mof, hnb, me)) {
254*0Sstevel@tonic-gate 			    if (me->me_func == ufs_trans_push_quota) {
255*0Sstevel@tonic-gate 				/*
256*0Sstevel@tonic-gate 				 * Don't remove quota entries which have
257*0Sstevel@tonic-gate 				 * incremented the ref count (those with a
258*0Sstevel@tonic-gate 				 * ufs_trans_push_quota push function).
259*0Sstevel@tonic-gate 				 * Let logmap_add[_buf] clean them up.
260*0Sstevel@tonic-gate 				 */
261*0Sstevel@tonic-gate 				continue;
262*0Sstevel@tonic-gate 			    }
263*0Sstevel@tonic-gate 			    break;
264*0Sstevel@tonic-gate 			}
265*0Sstevel@tonic-gate 			ASSERT((dtyp == DT_CANCEL) ||
266*0Sstevel@tonic-gate 				(!DATAoverlapME(mof, hnb, me)) ||
267*0Sstevel@tonic-gate 				MEwithinDATA(me, mof, hnb));
268*0Sstevel@tonic-gate 		}
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate 		if (me) {
271*0Sstevel@tonic-gate 			/* already in map */
272*0Sstevel@tonic-gate 			continue;
273*0Sstevel@tonic-gate 		}
274*0Sstevel@tonic-gate 
275*0Sstevel@tonic-gate 		/*
276*0Sstevel@tonic-gate 		 * Add up all the delta map deltas so we can compute
277*0Sstevel@tonic-gate 		 * an upper bound on the log size used.
278*0Sstevel@tonic-gate 		 * Note, some deltas get removed from the deltamap
279*0Sstevel@tonic-gate 		 * before the deltamap_push by lufs_write_strategy
280*0Sstevel@tonic-gate 		 * and so multiple deltas to the same mof offset
281*0Sstevel@tonic-gate 		 * don't get cancelled here but in the logmap.
282*0Sstevel@tonic-gate 		 * Thus we can't easily get a accurate count of
283*0Sstevel@tonic-gate 		 * the log space used - only an upper bound.
284*0Sstevel@tonic-gate 		 */
285*0Sstevel@tonic-gate 		if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
286*0Sstevel@tonic-gate 			ASSERT(dtyp != DT_CANCEL);
287*0Sstevel@tonic-gate 			if (dtyp == DT_ABZERO) {
288*0Sstevel@tonic-gate 				tp->deltas_size += sizeof (struct delta);
289*0Sstevel@tonic-gate 			} else {
290*0Sstevel@tonic-gate 				tp->deltas_size +=
291*0Sstevel@tonic-gate 				    (hnb + sizeof (struct delta));
292*0Sstevel@tonic-gate 			}
293*0Sstevel@tonic-gate 		}
294*0Sstevel@tonic-gate 
295*0Sstevel@tonic-gate 		delta_stats[dtyp]++;
296*0Sstevel@tonic-gate 
297*0Sstevel@tonic-gate 		/*
298*0Sstevel@tonic-gate 		 * get a mapentry
299*0Sstevel@tonic-gate 		 * May need to drop & re-grab the mtm_mutex
300*0Sstevel@tonic-gate 		 * and then recheck for a duplicate
301*0Sstevel@tonic-gate 		 */
302*0Sstevel@tonic-gate 		me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
303*0Sstevel@tonic-gate 		if (me == NULL) {
304*0Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
305*0Sstevel@tonic-gate 			me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
306*0Sstevel@tonic-gate 			mutex_enter(&mtm->mtm_mutex);
307*0Sstevel@tonic-gate 		}
308*0Sstevel@tonic-gate 		bzero(me, sizeof (mapentry_t));
309*0Sstevel@tonic-gate 
310*0Sstevel@tonic-gate 		/*
311*0Sstevel@tonic-gate 		 * initialize and put in deltamap
312*0Sstevel@tonic-gate 		 */
313*0Sstevel@tonic-gate 		me->me_mof = mof;
314*0Sstevel@tonic-gate 		me->me_nb = hnb;
315*0Sstevel@tonic-gate 		me->me_func = func;
316*0Sstevel@tonic-gate 		me->me_arg = arg;
317*0Sstevel@tonic-gate 		me->me_dt = dtyp;
318*0Sstevel@tonic-gate 		me->me_flags = ME_HASH;
319*0Sstevel@tonic-gate 		me->me_tid = mtm->mtm_tid;
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 		me->me_hash = *mep;
322*0Sstevel@tonic-gate 		*mep = me;
323*0Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
324*0Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
325*0Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
326*0Sstevel@tonic-gate 		mtm->mtm_prev = me;
327*0Sstevel@tonic-gate 		mtm->mtm_nme++;
328*0Sstevel@tonic-gate 	}
329*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
330*0Sstevel@tonic-gate 
331*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
332*0Sstevel@tonic-gate 		map_check_linkage(mtm));
333*0Sstevel@tonic-gate }
334*0Sstevel@tonic-gate 
335*0Sstevel@tonic-gate /*
336*0Sstevel@tonic-gate  * remove deltas within (mof, nb) and return as linked list
337*0Sstevel@tonic-gate  */
338*0Sstevel@tonic-gate mapentry_t *
339*0Sstevel@tonic-gate deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
340*0Sstevel@tonic-gate {
341*0Sstevel@tonic-gate 	off_t		hnb;
342*0Sstevel@tonic-gate 	mapentry_t	*me;
343*0Sstevel@tonic-gate 	mapentry_t	**mep;
344*0Sstevel@tonic-gate 	mapentry_t	*mer;
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate 	if (mtm == NULL)
347*0Sstevel@tonic-gate 		return (NULL);
348*0Sstevel@tonic-gate 
349*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
350*0Sstevel@tonic-gate 		map_check_linkage(mtm));
351*0Sstevel@tonic-gate 
352*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
353*0Sstevel@tonic-gate 	for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
354*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
355*0Sstevel@tonic-gate 		if (hnb > nb)
356*0Sstevel@tonic-gate 			hnb = nb;
357*0Sstevel@tonic-gate 		/*
358*0Sstevel@tonic-gate 		 * remove entries from hash and return as a aged linked list
359*0Sstevel@tonic-gate 		 */
360*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
361*0Sstevel@tonic-gate 		while ((me = *mep) != 0) {
362*0Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, hnb)) {
363*0Sstevel@tonic-gate 				*mep = me->me_hash;
364*0Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
365*0Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
366*0Sstevel@tonic-gate 				me->me_hash = mer;
367*0Sstevel@tonic-gate 				mer = me;
368*0Sstevel@tonic-gate 				me->me_flags |= ME_LIST;
369*0Sstevel@tonic-gate 				me->me_flags &= ~ME_HASH;
370*0Sstevel@tonic-gate 				mtm->mtm_nme--;
371*0Sstevel@tonic-gate 			} else
372*0Sstevel@tonic-gate 				mep = &me->me_hash;
373*0Sstevel@tonic-gate 		}
374*0Sstevel@tonic-gate 	}
375*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
378*0Sstevel@tonic-gate 		map_check_linkage(mtm));
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 	return (mer);
381*0Sstevel@tonic-gate }
382*0Sstevel@tonic-gate 
383*0Sstevel@tonic-gate /*
384*0Sstevel@tonic-gate  * delete entries within (mof, nb)
385*0Sstevel@tonic-gate  */
386*0Sstevel@tonic-gate void
387*0Sstevel@tonic-gate deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
388*0Sstevel@tonic-gate {
389*0Sstevel@tonic-gate 	mapentry_t	*me;
390*0Sstevel@tonic-gate 	mapentry_t	*menext;
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 	menext = deltamap_remove(mtm, mof, nb);
393*0Sstevel@tonic-gate 	while ((me = menext) != 0) {
394*0Sstevel@tonic-gate 		menext = me->me_hash;
395*0Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
396*0Sstevel@tonic-gate 	}
397*0Sstevel@tonic-gate }
398*0Sstevel@tonic-gate 
399*0Sstevel@tonic-gate /*
400*0Sstevel@tonic-gate  * Call the indicated function to cause deltas to move to the logmap.
401*0Sstevel@tonic-gate  * top_end_sync() is the only caller of this function and
402*0Sstevel@tonic-gate  * it has waited for the completion of all threads, so there can
403*0Sstevel@tonic-gate  * be no other activity in the deltamap. Therefore we don't need to
404*0Sstevel@tonic-gate  * hold the deltamap lock.
405*0Sstevel@tonic-gate  */
406*0Sstevel@tonic-gate void
407*0Sstevel@tonic-gate deltamap_push(ml_unit_t *ul)
408*0Sstevel@tonic-gate {
409*0Sstevel@tonic-gate 	delta_t		dtyp;
410*0Sstevel@tonic-gate 	int		(*func)();
411*0Sstevel@tonic-gate 	ulong_t		arg;
412*0Sstevel@tonic-gate 	mapentry_t	*me;
413*0Sstevel@tonic-gate 	offset_t	mof;
414*0Sstevel@tonic-gate 	off_t		nb;
415*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_deltamap;
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
418*0Sstevel@tonic-gate 		map_check_linkage(mtm));
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate 	/*
421*0Sstevel@tonic-gate 	 * for every entry in the deltamap
422*0Sstevel@tonic-gate 	 */
423*0Sstevel@tonic-gate 	while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
424*0Sstevel@tonic-gate 		ASSERT(me->me_func);
425*0Sstevel@tonic-gate 		func = me->me_func;
426*0Sstevel@tonic-gate 		dtyp = me->me_dt;
427*0Sstevel@tonic-gate 		arg = me->me_arg;
428*0Sstevel@tonic-gate 		mof = me->me_mof;
429*0Sstevel@tonic-gate 		nb = me->me_nb;
430*0Sstevel@tonic-gate 		if ((ul->un_flags & LDL_ERROR) ||
431*0Sstevel@tonic-gate 		    (*func)(ul->un_ufsvfs, dtyp, arg))
432*0Sstevel@tonic-gate 			deltamap_del(mtm, mof, nb);
433*0Sstevel@tonic-gate 	}
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
436*0Sstevel@tonic-gate 		map_check_linkage(mtm));
437*0Sstevel@tonic-gate }
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate /*
440*0Sstevel@tonic-gate  * LOGMAP ROUTINES
441*0Sstevel@tonic-gate  */
442*0Sstevel@tonic-gate 
443*0Sstevel@tonic-gate int
444*0Sstevel@tonic-gate logmap_need_commit(mt_map_t *mtm)
445*0Sstevel@tonic-gate {
446*0Sstevel@tonic-gate 	return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
447*0Sstevel@tonic-gate 		(mtm->mtm_cfrags >= mtm->mtm_cfragmax));
448*0Sstevel@tonic-gate }
449*0Sstevel@tonic-gate 
450*0Sstevel@tonic-gate int
451*0Sstevel@tonic-gate logmap_need_roll_async(mt_map_t *mtm)
452*0Sstevel@tonic-gate {
453*0Sstevel@tonic-gate 	return (mtm->mtm_nme > logmap_maxnme_async);
454*0Sstevel@tonic-gate }
455*0Sstevel@tonic-gate 
456*0Sstevel@tonic-gate int
457*0Sstevel@tonic-gate logmap_need_roll_sync(mt_map_t *mtm)
458*0Sstevel@tonic-gate {
459*0Sstevel@tonic-gate 	return (mtm->mtm_nme > logmap_maxnme_sync);
460*0Sstevel@tonic-gate }
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate void
463*0Sstevel@tonic-gate logmap_start_roll(ml_unit_t *ul)
464*0Sstevel@tonic-gate {
465*0Sstevel@tonic-gate 	mt_map_t	*logmap	= ul->un_logmap;
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 	logmap_settail(logmap, ul);
468*0Sstevel@tonic-gate 	ASSERT(!(ul->un_flags & LDL_NOROLL));
469*0Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
470*0Sstevel@tonic-gate 	if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
471*0Sstevel@tonic-gate 		logmap->mtm_flags |= MTM_ROLL_RUNNING;
472*0Sstevel@tonic-gate 		logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
473*0Sstevel@tonic-gate 		(void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
474*0Sstevel@tonic-gate 		    TS_RUN, minclsyspri);
475*0Sstevel@tonic-gate 	}
476*0Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
477*0Sstevel@tonic-gate }
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate void
480*0Sstevel@tonic-gate logmap_kill_roll(ml_unit_t *ul)
481*0Sstevel@tonic-gate {
482*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
483*0Sstevel@tonic-gate 
484*0Sstevel@tonic-gate 	if (mtm == NULL)
485*0Sstevel@tonic-gate 		return;
486*0Sstevel@tonic-gate 
487*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
488*0Sstevel@tonic-gate 
489*0Sstevel@tonic-gate 	while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
490*0Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_ROLL_EXIT;
491*0Sstevel@tonic-gate 		cv_signal(&mtm->mtm_to_roll_cv);
492*0Sstevel@tonic-gate 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
493*0Sstevel@tonic-gate 	}
494*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
495*0Sstevel@tonic-gate }
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate /*
498*0Sstevel@tonic-gate  * kick the roll thread if it's not doing anything
499*0Sstevel@tonic-gate  */
500*0Sstevel@tonic-gate void
501*0Sstevel@tonic-gate logmap_forceroll_nowait(mt_map_t *logmap)
502*0Sstevel@tonic-gate {
503*0Sstevel@tonic-gate 	/*
504*0Sstevel@tonic-gate 	 * Don't need to lock mtm_mutex to read mtm_flags here as we
505*0Sstevel@tonic-gate 	 * don't care in the rare case when we get a transitional value
506*0Sstevel@tonic-gate 	 * of mtm_flags. Just by signalling the thread it will wakeup
507*0Sstevel@tonic-gate 	 * and notice it has too many logmap entries.
508*0Sstevel@tonic-gate 	 */
509*0Sstevel@tonic-gate 	ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
510*0Sstevel@tonic-gate 	if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
511*0Sstevel@tonic-gate 		cv_signal(&logmap->mtm_to_roll_cv);
512*0Sstevel@tonic-gate 	}
513*0Sstevel@tonic-gate }
514*0Sstevel@tonic-gate 
515*0Sstevel@tonic-gate /*
516*0Sstevel@tonic-gate  * kick the roll thread and wait for it to finish a cycle
517*0Sstevel@tonic-gate  */
518*0Sstevel@tonic-gate void
519*0Sstevel@tonic-gate logmap_forceroll(mt_map_t *mtm)
520*0Sstevel@tonic-gate {
521*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
522*0Sstevel@tonic-gate 	if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
523*0Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_FORCE_ROLL;
524*0Sstevel@tonic-gate 		cv_signal(&mtm->mtm_to_roll_cv);
525*0Sstevel@tonic-gate 	}
526*0Sstevel@tonic-gate 	do {
527*0Sstevel@tonic-gate 		if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
528*0Sstevel@tonic-gate 			mtm->mtm_flags &= ~MTM_FORCE_ROLL;
529*0Sstevel@tonic-gate 			goto out;
530*0Sstevel@tonic-gate 		}
531*0Sstevel@tonic-gate 		cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
532*0Sstevel@tonic-gate 	} while (mtm->mtm_flags & MTM_FORCE_ROLL);
533*0Sstevel@tonic-gate out:
534*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
535*0Sstevel@tonic-gate }
536*0Sstevel@tonic-gate 
537*0Sstevel@tonic-gate /*
538*0Sstevel@tonic-gate  * remove rolled deltas within (mof, nb) and free them
539*0Sstevel@tonic-gate  */
540*0Sstevel@tonic-gate void
541*0Sstevel@tonic-gate logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
542*0Sstevel@tonic-gate {
543*0Sstevel@tonic-gate 	int		dolock = 0;
544*0Sstevel@tonic-gate 	off_t		hnb;
545*0Sstevel@tonic-gate 	mapentry_t	*me;
546*0Sstevel@tonic-gate 	mapentry_t	**mep;
547*0Sstevel@tonic-gate 	offset_t	savmof	= mof;
548*0Sstevel@tonic-gate 	off_t		savnb	= nb;
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
551*0Sstevel@tonic-gate 		map_check_linkage(mtm));
552*0Sstevel@tonic-gate 
553*0Sstevel@tonic-gate again:
554*0Sstevel@tonic-gate 	if (dolock)
555*0Sstevel@tonic-gate 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
556*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
557*0Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
558*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
559*0Sstevel@tonic-gate 		if (hnb > nb)
560*0Sstevel@tonic-gate 			hnb = nb;
561*0Sstevel@tonic-gate 		/*
562*0Sstevel@tonic-gate 		 * remove and free the rolled entries
563*0Sstevel@tonic-gate 		 */
564*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
565*0Sstevel@tonic-gate 		while ((me = *mep) != 0) {
566*0Sstevel@tonic-gate 			if ((me->me_flags & ME_ROLL) &&
567*0Sstevel@tonic-gate 			    (MEwithinDATA(me, mof, hnb))) {
568*0Sstevel@tonic-gate 				if (me->me_flags & ME_AGE) {
569*0Sstevel@tonic-gate 					ASSERT(dolock == 0);
570*0Sstevel@tonic-gate 					dolock = 1;
571*0Sstevel@tonic-gate 					mutex_exit(&mtm->mtm_mutex);
572*0Sstevel@tonic-gate 					mof = savmof;
573*0Sstevel@tonic-gate 					nb = savnb;
574*0Sstevel@tonic-gate 					goto again;
575*0Sstevel@tonic-gate 				}
576*0Sstevel@tonic-gate 				*mep = me->me_hash;
577*0Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
578*0Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
579*0Sstevel@tonic-gate 				me->me_flags &= ~(ME_HASH|ME_ROLL);
580*0Sstevel@tonic-gate 				ASSERT(!(me->me_flags & ME_USER));
581*0Sstevel@tonic-gate 				mtm->mtm_nme--;
582*0Sstevel@tonic-gate 				/*
583*0Sstevel@tonic-gate 				 * cancelled entries are handled by someone else
584*0Sstevel@tonic-gate 				 */
585*0Sstevel@tonic-gate 				if ((me->me_flags & ME_CANCEL) == 0) {
586*0Sstevel@tonic-gate 					roll_stats[me->me_dt]++;
587*0Sstevel@tonic-gate 					CRB_RELE(me);
588*0Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
589*0Sstevel@tonic-gate 				}
590*0Sstevel@tonic-gate 			} else
591*0Sstevel@tonic-gate 				mep = &me->me_hash;
592*0Sstevel@tonic-gate 		}
593*0Sstevel@tonic-gate 	}
594*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
595*0Sstevel@tonic-gate 
596*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
597*0Sstevel@tonic-gate 		map_check_linkage(mtm));
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate 	if (dolock)
600*0Sstevel@tonic-gate 		rw_exit(&mtm->mtm_rwlock);
601*0Sstevel@tonic-gate }
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate /*
604*0Sstevel@tonic-gate  * Find the disk offset of the next delta to roll.
605*0Sstevel@tonic-gate  * Returns 0: no more deltas to roll or a transaction is being committed
606*0Sstevel@tonic-gate  *	   1: a delta to roll has been found and *mofp points
607*0Sstevel@tonic-gate  *	      to the master file disk offset
608*0Sstevel@tonic-gate  */
609*0Sstevel@tonic-gate int
610*0Sstevel@tonic-gate logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
611*0Sstevel@tonic-gate {
612*0Sstevel@tonic-gate 	mapentry_t *me;
613*0Sstevel@tonic-gate 
614*0Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
615*0Sstevel@tonic-gate 		map_check_linkage(logmap));
616*0Sstevel@tonic-gate 
617*0Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
618*0Sstevel@tonic-gate 	for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
619*0Sstevel@tonic-gate 	    me = me->me_next) {
620*0Sstevel@tonic-gate 		/* already rolled */
621*0Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL) {
622*0Sstevel@tonic-gate 			continue;
623*0Sstevel@tonic-gate 		}
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate 		/* part of currently busy transaction; stop */
626*0Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_tid) {
627*0Sstevel@tonic-gate 			break;
628*0Sstevel@tonic-gate 		}
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate 		/* part of commit-in-progress transaction; stop */
631*0Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_committid) {
632*0Sstevel@tonic-gate 			break;
633*0Sstevel@tonic-gate 		}
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 		/*
636*0Sstevel@tonic-gate 		 * We shouldn't see a DT_CANCEL mapentry whose
637*0Sstevel@tonic-gate 		 * tid != mtm_committid, or != mtm_tid since
638*0Sstevel@tonic-gate 		 * these are removed at the end of each committed
639*0Sstevel@tonic-gate 		 * transaction.
640*0Sstevel@tonic-gate 		 */
641*0Sstevel@tonic-gate 		ASSERT(!(me->me_dt == DT_CANCEL));
642*0Sstevel@tonic-gate 
643*0Sstevel@tonic-gate 		*mofp = me->me_mof;
644*0Sstevel@tonic-gate 		mutex_exit(&logmap->mtm_mutex);
645*0Sstevel@tonic-gate 		return (1);
646*0Sstevel@tonic-gate 	}
647*0Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
648*0Sstevel@tonic-gate 	return (0);
649*0Sstevel@tonic-gate }
650*0Sstevel@tonic-gate 
651*0Sstevel@tonic-gate /*
652*0Sstevel@tonic-gate  * put mapentry on sorted age list
653*0Sstevel@tonic-gate  */
654*0Sstevel@tonic-gate static void
655*0Sstevel@tonic-gate logmap_list_age(mapentry_t **age, mapentry_t *meadd)
656*0Sstevel@tonic-gate {
657*0Sstevel@tonic-gate 	mapentry_t	*me;
658*0Sstevel@tonic-gate 
659*0Sstevel@tonic-gate 	ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate 	for (me = *age; me; age = &me->me_agenext, me = *age) {
662*0Sstevel@tonic-gate 		if (me->me_age > meadd->me_age)
663*0Sstevel@tonic-gate 			break;
664*0Sstevel@tonic-gate 	}
665*0Sstevel@tonic-gate 	meadd->me_agenext = me;
666*0Sstevel@tonic-gate 	meadd->me_flags |= ME_AGE;
667*0Sstevel@tonic-gate 	*age = meadd;
668*0Sstevel@tonic-gate }
669*0Sstevel@tonic-gate 
670*0Sstevel@tonic-gate /*
671*0Sstevel@tonic-gate  * get a list of deltas within <mof, mof+nb>
672*0Sstevel@tonic-gate  *	returns with mtm_rwlock held
673*0Sstevel@tonic-gate  *	return value says whether the entire mof range is covered by deltas
674*0Sstevel@tonic-gate  */
675*0Sstevel@tonic-gate int
676*0Sstevel@tonic-gate logmap_list_get(
677*0Sstevel@tonic-gate 	mt_map_t *mtm,
678*0Sstevel@tonic-gate 	offset_t mof,
679*0Sstevel@tonic-gate 	off_t nb,
680*0Sstevel@tonic-gate 	mapentry_t **age)
681*0Sstevel@tonic-gate {
682*0Sstevel@tonic-gate 	off_t		hnb;
683*0Sstevel@tonic-gate 	mapentry_t	*me;
684*0Sstevel@tonic-gate 	mapentry_t	**mep;
685*0Sstevel@tonic-gate 	int		rwtype	= RW_READER;
686*0Sstevel@tonic-gate 	offset_t	savmof	= mof;
687*0Sstevel@tonic-gate 	off_t		savnb	= nb;
688*0Sstevel@tonic-gate 	int		entire	= 0;
689*0Sstevel@tonic-gate 	crb_t		*crb;
690*0Sstevel@tonic-gate 
691*0Sstevel@tonic-gate 	mtm->mtm_ref = 1;
692*0Sstevel@tonic-gate again:
693*0Sstevel@tonic-gate 
694*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
695*0Sstevel@tonic-gate 		map_check_linkage(mtm));
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 	rw_enter(&mtm->mtm_rwlock, rwtype);
698*0Sstevel@tonic-gate 	*age = NULL;
699*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
700*0Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
701*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
702*0Sstevel@tonic-gate 		if (hnb > nb)
703*0Sstevel@tonic-gate 			hnb = nb;
704*0Sstevel@tonic-gate 		/*
705*0Sstevel@tonic-gate 		 * find overlapping entries
706*0Sstevel@tonic-gate 		 */
707*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
708*0Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
709*0Sstevel@tonic-gate 			if (me->me_dt == DT_CANCEL)
710*0Sstevel@tonic-gate 				continue;
711*0Sstevel@tonic-gate 			if (!DATAoverlapME(mof, hnb, me))
712*0Sstevel@tonic-gate 				continue;
713*0Sstevel@tonic-gate 			/*
714*0Sstevel@tonic-gate 			 * check if map entry is in use
715*0Sstevel@tonic-gate 			 * (about to be rolled).
716*0Sstevel@tonic-gate 			 */
717*0Sstevel@tonic-gate 			if (me->me_flags & ME_AGE) {
718*0Sstevel@tonic-gate 				/*
719*0Sstevel@tonic-gate 				 * reset the age bit in the list,
720*0Sstevel@tonic-gate 				 * upgrade the lock, and try again
721*0Sstevel@tonic-gate 				 */
722*0Sstevel@tonic-gate 				for (me = *age; me; me = *age) {
723*0Sstevel@tonic-gate 					*age = me->me_agenext;
724*0Sstevel@tonic-gate 					me->me_flags &= ~ME_AGE;
725*0Sstevel@tonic-gate 				}
726*0Sstevel@tonic-gate 				mutex_exit(&mtm->mtm_mutex);
727*0Sstevel@tonic-gate 				rw_exit(&mtm->mtm_rwlock);
728*0Sstevel@tonic-gate 				rwtype = RW_WRITER;
729*0Sstevel@tonic-gate 				mof = savmof;
730*0Sstevel@tonic-gate 				nb = savnb;
731*0Sstevel@tonic-gate 				entire = 0;
732*0Sstevel@tonic-gate 				goto again;
733*0Sstevel@tonic-gate 			} else {
734*0Sstevel@tonic-gate 				/* add mapentry to age ordered list */
735*0Sstevel@tonic-gate 				logmap_list_age(age, me);
736*0Sstevel@tonic-gate 				crb = me->me_crb;
737*0Sstevel@tonic-gate 				if (crb) {
738*0Sstevel@tonic-gate 					if (DATAwithinCRB(savmof, savnb, crb)) {
739*0Sstevel@tonic-gate 						entire = 1;
740*0Sstevel@tonic-gate 					}
741*0Sstevel@tonic-gate 				} else {
742*0Sstevel@tonic-gate 					if (DATAwithinME(savmof, savnb, me)) {
743*0Sstevel@tonic-gate 						entire = 1;
744*0Sstevel@tonic-gate 					}
745*0Sstevel@tonic-gate 				}
746*0Sstevel@tonic-gate 			}
747*0Sstevel@tonic-gate 		}
748*0Sstevel@tonic-gate 	}
749*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
750*0Sstevel@tonic-gate 
751*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
752*0Sstevel@tonic-gate 	return (entire);
753*0Sstevel@tonic-gate }
754*0Sstevel@tonic-gate 
755*0Sstevel@tonic-gate /*
756*0Sstevel@tonic-gate  * Get a list of deltas for rolling - returns sucess or failure.
757*0Sstevel@tonic-gate  * Also return the cached roll buffer if all deltas point to it.
758*0Sstevel@tonic-gate  */
759*0Sstevel@tonic-gate int
760*0Sstevel@tonic-gate logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
761*0Sstevel@tonic-gate {
762*0Sstevel@tonic-gate 	mapentry_t	*me, **mep, *age = NULL;
763*0Sstevel@tonic-gate 	crb_t		*crb = NULL;
764*0Sstevel@tonic-gate 
765*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
766*0Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
767*0Sstevel@tonic-gate 		map_check_linkage(logmap));
768*0Sstevel@tonic-gate 	ASSERT((mof & MAPBLOCKOFF) == 0);
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate 	rbp->rb_crb = NULL;
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate 	/*
773*0Sstevel@tonic-gate 	 * find overlapping entries
774*0Sstevel@tonic-gate 	 */
775*0Sstevel@tonic-gate 	mutex_enter(&logmap->mtm_mutex);
776*0Sstevel@tonic-gate 	mep = MAP_HASH(mof, logmap);
777*0Sstevel@tonic-gate 	for (me = *mep; me; me = me->me_hash) {
778*0Sstevel@tonic-gate 		if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
779*0Sstevel@tonic-gate 			continue;
780*0Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_tid)
781*0Sstevel@tonic-gate 			continue;
782*0Sstevel@tonic-gate 		if (me->me_tid == logmap->mtm_committid)
783*0Sstevel@tonic-gate 			continue;
784*0Sstevel@tonic-gate 		if (me->me_dt == DT_CANCEL)
785*0Sstevel@tonic-gate 			continue;
786*0Sstevel@tonic-gate 
787*0Sstevel@tonic-gate 		/*
788*0Sstevel@tonic-gate 		 * Check if map entry is in use (by lufs_read_strategy())
789*0Sstevel@tonic-gate 		 * and if so reset the age bit in the list,
790*0Sstevel@tonic-gate 		 * upgrade the lock, and try again
791*0Sstevel@tonic-gate 		 */
792*0Sstevel@tonic-gate 		if (me->me_flags & ME_AGE) {
793*0Sstevel@tonic-gate 			for (me = age; me; me = age) {
794*0Sstevel@tonic-gate 				age = me->me_agenext;
795*0Sstevel@tonic-gate 				me->me_flags &= ~ME_AGE;
796*0Sstevel@tonic-gate 			}
797*0Sstevel@tonic-gate 			mutex_exit(&logmap->mtm_mutex);
798*0Sstevel@tonic-gate 			return (1); /* failure */
799*0Sstevel@tonic-gate 		} else {
800*0Sstevel@tonic-gate 			/* add mapentry to age ordered list */
801*0Sstevel@tonic-gate 			logmap_list_age(&age, me);
802*0Sstevel@tonic-gate 		}
803*0Sstevel@tonic-gate 	}
804*0Sstevel@tonic-gate 	if (!age) {
805*0Sstevel@tonic-gate 		goto out;
806*0Sstevel@tonic-gate 	}
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate 	/*
809*0Sstevel@tonic-gate 	 * Mark the deltas as being rolled.
810*0Sstevel@tonic-gate 	 */
811*0Sstevel@tonic-gate 	for (me = age; me; me = me->me_agenext) {
812*0Sstevel@tonic-gate 		me->me_flags |= ME_ROLL;
813*0Sstevel@tonic-gate 	}
814*0Sstevel@tonic-gate 
815*0Sstevel@tonic-gate 	/*
816*0Sstevel@tonic-gate 	 * Test if all deltas are covered by one valid roll buffer
817*0Sstevel@tonic-gate 	 */
818*0Sstevel@tonic-gate 	crb = age->me_crb;
819*0Sstevel@tonic-gate 	if (crb && !(crb->c_invalid)) {
820*0Sstevel@tonic-gate 		for (me = age; me; me = me->me_agenext) {
821*0Sstevel@tonic-gate 			if (me->me_crb != crb) {
822*0Sstevel@tonic-gate 				crb = NULL;
823*0Sstevel@tonic-gate 				break;
824*0Sstevel@tonic-gate 			}
825*0Sstevel@tonic-gate 		}
826*0Sstevel@tonic-gate 		rbp->rb_crb = crb;
827*0Sstevel@tonic-gate 	}
828*0Sstevel@tonic-gate out:
829*0Sstevel@tonic-gate 	rbp->rb_age = age;
830*0Sstevel@tonic-gate 
831*0Sstevel@tonic-gate 	mutex_exit(&logmap->mtm_mutex);
832*0Sstevel@tonic-gate 
833*0Sstevel@tonic-gate 	ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
834*0Sstevel@tonic-gate 		logmap_logscan_debug(logmap, age));
835*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
836*0Sstevel@tonic-gate 	return (0); /* success */
837*0Sstevel@tonic-gate }
838*0Sstevel@tonic-gate 
839*0Sstevel@tonic-gate void
840*0Sstevel@tonic-gate logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
841*0Sstevel@tonic-gate {
842*0Sstevel@tonic-gate 	mapentry_t	*me;
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
845*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
846*0Sstevel@tonic-gate 	for (me = age; me; me = age) {
847*0Sstevel@tonic-gate 		age = me->me_agenext;
848*0Sstevel@tonic-gate 		me->me_flags &= ~ME_AGE;
849*0Sstevel@tonic-gate 	}
850*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
851*0Sstevel@tonic-gate }
852*0Sstevel@tonic-gate 
853*0Sstevel@tonic-gate void
854*0Sstevel@tonic-gate logmap_list_put(mt_map_t *mtm, mapentry_t *age)
855*0Sstevel@tonic-gate {
856*0Sstevel@tonic-gate 	mapentry_t	*me;
857*0Sstevel@tonic-gate 
858*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
859*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
860*0Sstevel@tonic-gate 	for (me = age; me; me = age) {
861*0Sstevel@tonic-gate 		age = me->me_agenext;
862*0Sstevel@tonic-gate 		me->me_flags &= ~ME_AGE;
863*0Sstevel@tonic-gate 	}
864*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
865*0Sstevel@tonic-gate 	rw_exit(&mtm->mtm_rwlock);
866*0Sstevel@tonic-gate }
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate #define	UFS_RW_BALANCE 2
869*0Sstevel@tonic-gate int ufs_rw_balance = UFS_RW_BALANCE;
870*0Sstevel@tonic-gate 
871*0Sstevel@tonic-gate /*
872*0Sstevel@tonic-gate  * Check if we need to read the master.
873*0Sstevel@tonic-gate  * The master does not need to be read if the log deltas to the
874*0Sstevel@tonic-gate  * block are for one contiguous set of full disk sectors.
875*0Sstevel@tonic-gate  * Both cylinder group bit maps DT_CG (8K); directory entries (512B);
876*0Sstevel@tonic-gate  * and possibly others should not require master disk reads.
877*0Sstevel@tonic-gate  * Calculate the sector map for writing later.
878*0Sstevel@tonic-gate  */
879*0Sstevel@tonic-gate int
880*0Sstevel@tonic-gate logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
881*0Sstevel@tonic-gate {
882*0Sstevel@tonic-gate 	offset_t mof;
883*0Sstevel@tonic-gate 	crb_t *crb;
884*0Sstevel@tonic-gate 	mapentry_t *me;
885*0Sstevel@tonic-gate 	int32_t nb;
886*0Sstevel@tonic-gate 	int i;
887*0Sstevel@tonic-gate 	int start_sec, end_sec;
888*0Sstevel@tonic-gate 	int read_needed = 0;
889*0Sstevel@tonic-gate 	int all_inodes = 1;
890*0Sstevel@tonic-gate 	int first_sec = INT_MAX;
891*0Sstevel@tonic-gate 	int last_sec = -1;
892*0Sstevel@tonic-gate 	rbsecmap_t secmap = 0;
893*0Sstevel@tonic-gate 
894*0Sstevel@tonic-gate 	/* LINTED: warning: logical expression always true: op "||" */
895*0Sstevel@tonic-gate 	ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate 	for (me = age; me; me = me->me_agenext) {
898*0Sstevel@tonic-gate 		crb = me->me_crb;
899*0Sstevel@tonic-gate 		if (crb) {
900*0Sstevel@tonic-gate 			nb = crb->c_nb;
901*0Sstevel@tonic-gate 			mof = crb->c_mof;
902*0Sstevel@tonic-gate 		} else {
903*0Sstevel@tonic-gate 			nb = me->me_nb;
904*0Sstevel@tonic-gate 			mof = me->me_mof;
905*0Sstevel@tonic-gate 		}
906*0Sstevel@tonic-gate 
907*0Sstevel@tonic-gate 		/*
908*0Sstevel@tonic-gate 		 * If the delta is not sector aligned then
909*0Sstevel@tonic-gate 		 * read the whole block.
910*0Sstevel@tonic-gate 		 */
911*0Sstevel@tonic-gate 		if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
912*0Sstevel@tonic-gate 			read_needed = 1;
913*0Sstevel@tonic-gate 		}
914*0Sstevel@tonic-gate 
915*0Sstevel@tonic-gate 		/* Set sector map used in the MAPBLOCKSIZE block.  */
916*0Sstevel@tonic-gate 		start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
917*0Sstevel@tonic-gate 		end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
918*0Sstevel@tonic-gate 		for (i = start_sec; i <= end_sec; i++) {
919*0Sstevel@tonic-gate 			secmap |= UINT16_C(1) << i;
920*0Sstevel@tonic-gate 		}
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate 		if (me->me_dt != DT_INODE) {
923*0Sstevel@tonic-gate 			all_inodes = 0;
924*0Sstevel@tonic-gate 		}
925*0Sstevel@tonic-gate 		if (start_sec < first_sec) {
926*0Sstevel@tonic-gate 			first_sec = start_sec;
927*0Sstevel@tonic-gate 		}
928*0Sstevel@tonic-gate 		if (end_sec > last_sec) {
929*0Sstevel@tonic-gate 			last_sec = end_sec;
930*0Sstevel@tonic-gate 		}
931*0Sstevel@tonic-gate 	}
932*0Sstevel@tonic-gate 
933*0Sstevel@tonic-gate 	ASSERT(secmap);
934*0Sstevel@tonic-gate 	ASSERT(first_sec != INT_MAX);
935*0Sstevel@tonic-gate 	ASSERT(last_sec != -1);
936*0Sstevel@tonic-gate 
937*0Sstevel@tonic-gate 	if (all_inodes) {
938*0Sstevel@tonic-gate 		/*
939*0Sstevel@tonic-gate 		 * Here we have a tradeoff choice. It must be better to
940*0Sstevel@tonic-gate 		 * do 2 writes * in the same MAPBLOCKSIZE chunk, than a
941*0Sstevel@tonic-gate 		 * read and a write. But what about 3 or more writes, versus
942*0Sstevel@tonic-gate 		 * a read+write? * Where is the cut over? It will depend on
943*0Sstevel@tonic-gate 		 * the track caching, scsi driver and other activity.
944*0Sstevel@tonic-gate 		 * A unpublished tunable is defined (ufs_rw_balance) that
945*0Sstevel@tonic-gate 		 * currently defaults to 2.
946*0Sstevel@tonic-gate 		 */
947*0Sstevel@tonic-gate 		if (!read_needed) {
948*0Sstevel@tonic-gate 			int count = 0, gap = 0;
949*0Sstevel@tonic-gate 			int sector_set; /* write needed to this sector */
950*0Sstevel@tonic-gate 
951*0Sstevel@tonic-gate 			/* Count the gaps (every 1 to 0 transation) */
952*0Sstevel@tonic-gate 			for (i = first_sec + 1; i < last_sec; i++) {
953*0Sstevel@tonic-gate 				sector_set = secmap & (UINT16_C(1) << i);
954*0Sstevel@tonic-gate 				if (!gap && !sector_set) {
955*0Sstevel@tonic-gate 					gap = 1;
956*0Sstevel@tonic-gate 					count++;
957*0Sstevel@tonic-gate 					if (count > ufs_rw_balance) {
958*0Sstevel@tonic-gate 						read_needed = 1;
959*0Sstevel@tonic-gate 						break;
960*0Sstevel@tonic-gate 					}
961*0Sstevel@tonic-gate 				} else if (gap && sector_set) {
962*0Sstevel@tonic-gate 					gap = 0;
963*0Sstevel@tonic-gate 				}
964*0Sstevel@tonic-gate 			}
965*0Sstevel@tonic-gate 		}
966*0Sstevel@tonic-gate 
967*0Sstevel@tonic-gate 		/*
968*0Sstevel@tonic-gate 		 * Inodes commonly make up the majority (~85%) of deltas.
969*0Sstevel@tonic-gate 		 * They cannot contain embedded user data, so its safe to
970*0Sstevel@tonic-gate 		 * read and write them all in one IO.
971*0Sstevel@tonic-gate 		 * But for directory entries, shadow inode data, and
972*0Sstevel@tonic-gate 		 * quota record data the user data fragments can be embedded
973*0Sstevel@tonic-gate 		 * betwen those metadata, and so its not safe to read, modify
974*0Sstevel@tonic-gate 		 * then write the entire range as user asynchronous user data
975*0Sstevel@tonic-gate 		 * writes could get overwritten with old data.
976*0Sstevel@tonic-gate 		 * Thus we have to create a segment map of meta data that
977*0Sstevel@tonic-gate 		 * needs to get written.
978*0Sstevel@tonic-gate 		 *
979*0Sstevel@tonic-gate 		 * If user data was logged then this issue would go away.
980*0Sstevel@tonic-gate 		 */
981*0Sstevel@tonic-gate 		if (read_needed) {
982*0Sstevel@tonic-gate 			for (i = first_sec + 1; i < last_sec; i++) {
983*0Sstevel@tonic-gate 				secmap |= (UINT16_C(1) << i);
984*0Sstevel@tonic-gate 			}
985*0Sstevel@tonic-gate 		}
986*0Sstevel@tonic-gate 	}
987*0Sstevel@tonic-gate 	rbp->rb_secmap = secmap;
988*0Sstevel@tonic-gate 	return (read_needed);
989*0Sstevel@tonic-gate }
990*0Sstevel@tonic-gate 
991*0Sstevel@tonic-gate /*
992*0Sstevel@tonic-gate  * Abort the load of a set of log map delta's.
993*0Sstevel@tonic-gate  * ie,
994*0Sstevel@tonic-gate  * Clear out all mapentries on this unit's log map
995*0Sstevel@tonic-gate  * which have a tid (transaction id) equal to the
996*0Sstevel@tonic-gate  * parameter tid.   Walk the cancel list, taking everything
997*0Sstevel@tonic-gate  * off it, too.
998*0Sstevel@tonic-gate  */
999*0Sstevel@tonic-gate static void
1000*0Sstevel@tonic-gate logmap_abort(ml_unit_t *ul, uint32_t tid)
1001*0Sstevel@tonic-gate {
1002*0Sstevel@tonic-gate 	struct mt_map	*mtm = ul->un_logmap;	/* Log map */
1003*0Sstevel@tonic-gate 	mapentry_t	*me,
1004*0Sstevel@tonic-gate 			**mep;
1005*0Sstevel@tonic-gate 	int		i;
1006*0Sstevel@tonic-gate 
1007*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1008*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate 	/*
1011*0Sstevel@tonic-gate 	 * wait for any outstanding reads to finish; lock out future reads
1012*0Sstevel@tonic-gate 	 */
1013*0Sstevel@tonic-gate 	rw_enter(&mtm->mtm_rwlock, RW_WRITER);
1014*0Sstevel@tonic-gate 
1015*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1016*0Sstevel@tonic-gate 	/* Take everything off cancel list */
1017*0Sstevel@tonic-gate 	while ((me = mtm->mtm_cancel) != NULL) {
1018*0Sstevel@tonic-gate 		mtm->mtm_cancel = me->me_cancel;
1019*0Sstevel@tonic-gate 		me->me_flags &= ~ME_CANCEL;
1020*0Sstevel@tonic-gate 		me->me_cancel = NULL;
1021*0Sstevel@tonic-gate 	}
1022*0Sstevel@tonic-gate 
1023*0Sstevel@tonic-gate 	/*
1024*0Sstevel@tonic-gate 	 * Now take out all mapentries with current tid, and committid
1025*0Sstevel@tonic-gate 	 * as this function is called from logmap_logscan and logmap_commit
1026*0Sstevel@tonic-gate 	 * When it is called from logmap_logscan mtm_tid == mtm_committid
1027*0Sstevel@tonic-gate 	 * But when logmap_abort is called from logmap_commit it is
1028*0Sstevel@tonic-gate 	 * because the log errored when trying to write the commit record,
1029*0Sstevel@tonic-gate 	 * after the async ops have been allowed to start in top_end_sync.
1030*0Sstevel@tonic-gate 	 * So we also need to remove all mapentries from the transaction whose
1031*0Sstevel@tonic-gate 	 * commit failed.
1032*0Sstevel@tonic-gate 	 */
1033*0Sstevel@tonic-gate 	for (i = 0; i < mtm->mtm_nhash; i++) {
1034*0Sstevel@tonic-gate 		mep = &mtm->mtm_hash[i];
1035*0Sstevel@tonic-gate 		while ((me = *mep) != NULL) {
1036*0Sstevel@tonic-gate 			if (me->me_tid == tid ||
1037*0Sstevel@tonic-gate 				me->me_tid == mtm->mtm_committid) {
1038*0Sstevel@tonic-gate 				*mep = me->me_hash;
1039*0Sstevel@tonic-gate 				me->me_next->me_prev = me->me_prev;
1040*0Sstevel@tonic-gate 				me->me_prev->me_next = me->me_next;
1041*0Sstevel@tonic-gate 				if (!(me->me_flags & ME_USER)) {
1042*0Sstevel@tonic-gate 					mtm->mtm_nme--;
1043*0Sstevel@tonic-gate 				}
1044*0Sstevel@tonic-gate 				CRB_RELE(me);
1045*0Sstevel@tonic-gate 				kmem_cache_free(mapentry_cache, me);
1046*0Sstevel@tonic-gate 				continue;
1047*0Sstevel@tonic-gate 			}
1048*0Sstevel@tonic-gate 			mep = &me->me_hash;
1049*0Sstevel@tonic-gate 		}
1050*0Sstevel@tonic-gate 	}
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate 	if (!(ul->un_flags & LDL_SCAN))
1053*0Sstevel@tonic-gate 		mtm->mtm_flags |= MTM_CANCELED;
1054*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1055*0Sstevel@tonic-gate 	mtm->mtm_dirty = 0;
1056*0Sstevel@tonic-gate 	mtm->mtm_nmet = 0;
1057*0Sstevel@tonic-gate 	rw_exit(&mtm->mtm_rwlock);
1058*0Sstevel@tonic-gate 
1059*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1060*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1061*0Sstevel@tonic-gate }
1062*0Sstevel@tonic-gate 
1063*0Sstevel@tonic-gate static void
1064*0Sstevel@tonic-gate logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
1065*0Sstevel@tonic-gate {
1066*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1067*0Sstevel@tonic-gate 
1068*0Sstevel@tonic-gate 	while (!ldl_has_space(ul, me)) {
1069*0Sstevel@tonic-gate 		ASSERT(!(ul->un_flags & LDL_NOROLL));
1070*0Sstevel@tonic-gate 		mutex_exit(&ul->un_log_mutex);
1071*0Sstevel@tonic-gate 		logmap_forceroll(mtm);
1072*0Sstevel@tonic-gate 		mutex_enter(&ul->un_log_mutex);
1073*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR)
1074*0Sstevel@tonic-gate 			break;
1075*0Sstevel@tonic-gate 	}
1076*0Sstevel@tonic-gate 
1077*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1078*0Sstevel@tonic-gate }
1079*0Sstevel@tonic-gate 
1080*0Sstevel@tonic-gate /*
1081*0Sstevel@tonic-gate  * put a list of deltas into a logmap
1082*0Sstevel@tonic-gate  * If va == NULL, don't write to the log.
1083*0Sstevel@tonic-gate  */
1084*0Sstevel@tonic-gate void
1085*0Sstevel@tonic-gate logmap_add(
1086*0Sstevel@tonic-gate 	ml_unit_t *ul,
1087*0Sstevel@tonic-gate 	char *va,			/* Ptr to buf w/deltas & data */
1088*0Sstevel@tonic-gate 	offset_t vamof,			/* Offset on master of buf start */
1089*0Sstevel@tonic-gate 	mapentry_t *melist)		/* Entries to add */
1090*0Sstevel@tonic-gate {
1091*0Sstevel@tonic-gate 	offset_t	mof;
1092*0Sstevel@tonic-gate 	off_t		nb;
1093*0Sstevel@tonic-gate 	mapentry_t	*me;
1094*0Sstevel@tonic-gate 	mapentry_t	**mep;
1095*0Sstevel@tonic-gate 	mapentry_t	**savmep;
1096*0Sstevel@tonic-gate 	uint32_t	tid;
1097*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1098*0Sstevel@tonic-gate 
1099*0Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
1100*0Sstevel@tonic-gate 	if (va)
1101*0Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, melist);
1102*0Sstevel@tonic-gate 
1103*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1104*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1105*0Sstevel@tonic-gate 
1106*0Sstevel@tonic-gate 	mtm->mtm_ref = 1;
1107*0Sstevel@tonic-gate 	mtm->mtm_dirty++;
1108*0Sstevel@tonic-gate 	tid = mtm->mtm_tid;
1109*0Sstevel@tonic-gate 	while (melist) {
1110*0Sstevel@tonic-gate 		mof = melist->me_mof;
1111*0Sstevel@tonic-gate 		nb  = melist->me_nb;
1112*0Sstevel@tonic-gate 
1113*0Sstevel@tonic-gate 		/*
1114*0Sstevel@tonic-gate 		 * search for overlaping entries
1115*0Sstevel@tonic-gate 		 */
1116*0Sstevel@tonic-gate 		savmep = mep = MAP_HASH(mof, mtm);
1117*0Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
1118*0Sstevel@tonic-gate 		while ((me = *mep) != 0) {
1119*0Sstevel@tonic-gate 			/*
1120*0Sstevel@tonic-gate 			 * Data consumes old map entry; cancel map entry.
1121*0Sstevel@tonic-gate 			 * Take care when we replace an old map entry
1122*0Sstevel@tonic-gate 			 * which carries quota information with a newer entry
1123*0Sstevel@tonic-gate 			 * which does not. In that case the push function
1124*0Sstevel@tonic-gate 			 * would not be called to clean up the dquot structure.
1125*0Sstevel@tonic-gate 			 * This would be found later by invalidatedq() causing
1126*0Sstevel@tonic-gate 			 * a panic when the filesystem in unmounted.
1127*0Sstevel@tonic-gate 			 * We clean up the dquot manually and then replace
1128*0Sstevel@tonic-gate 			 * the map entry.
1129*0Sstevel@tonic-gate 			 */
1130*0Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, nb) &&
1131*0Sstevel@tonic-gate 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
1132*0Sstevel@tonic-gate 				if (tid == me->me_tid &&
1133*0Sstevel@tonic-gate 				    ((me->me_flags & ME_AGE) == 0)) {
1134*0Sstevel@tonic-gate 					*mep = me->me_hash;
1135*0Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
1136*0Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
1137*0Sstevel@tonic-gate 					ASSERT(!(me->me_flags & ME_USER));
1138*0Sstevel@tonic-gate 					mtm->mtm_nme--;
1139*0Sstevel@tonic-gate 					/*
1140*0Sstevel@tonic-gate 					 * Special case if the mapentry
1141*0Sstevel@tonic-gate 					 * carries a dquot and a push function.
1142*0Sstevel@tonic-gate 					 * We have to clean up the quota info
1143*0Sstevel@tonic-gate 					 * before replacing the mapentry.
1144*0Sstevel@tonic-gate 					 */
1145*0Sstevel@tonic-gate 					if (me->me_dt == DT_QR)
1146*0Sstevel@tonic-gate 						HANDLE_DQUOT(me, melist);
1147*0Sstevel@tonic-gate 
1148*0Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
1149*0Sstevel@tonic-gate 					continue;
1150*0Sstevel@tonic-gate 				}
1151*0Sstevel@tonic-gate 				me->me_cancel = mtm->mtm_cancel;
1152*0Sstevel@tonic-gate 				mtm->mtm_cancel = me;
1153*0Sstevel@tonic-gate 				me->me_flags |= ME_CANCEL;
1154*0Sstevel@tonic-gate 			}
1155*0Sstevel@tonic-gate 			mep = &(*mep)->me_hash;
1156*0Sstevel@tonic-gate 		}
1157*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1158*0Sstevel@tonic-gate 
1159*0Sstevel@tonic-gate 		/*
1160*0Sstevel@tonic-gate 		 * remove from list
1161*0Sstevel@tonic-gate 		 */
1162*0Sstevel@tonic-gate 		me = melist;
1163*0Sstevel@tonic-gate 		melist = melist->me_hash;
1164*0Sstevel@tonic-gate 		me->me_flags &= ~ME_LIST;
1165*0Sstevel@tonic-gate 		/*
1166*0Sstevel@tonic-gate 		 * If va != NULL, put in the log.
1167*0Sstevel@tonic-gate 		 */
1168*0Sstevel@tonic-gate 		if (va)
1169*0Sstevel@tonic-gate 			ldl_write(ul, va, vamof, me);
1170*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
1171*0Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
1172*0Sstevel@tonic-gate 			continue;
1173*0Sstevel@tonic-gate 		}
1174*0Sstevel@tonic-gate 		ASSERT((va == NULL) ||
1175*0Sstevel@tonic-gate 			((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
1176*0Sstevel@tonic-gate 			map_check_ldl_write(ul, va, vamof, me));
1177*0Sstevel@tonic-gate 
1178*0Sstevel@tonic-gate 		/*
1179*0Sstevel@tonic-gate 		 * put on hash
1180*0Sstevel@tonic-gate 		 */
1181*0Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
1182*0Sstevel@tonic-gate 		me->me_hash = *savmep;
1183*0Sstevel@tonic-gate 		*savmep = me;
1184*0Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
1185*0Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
1186*0Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
1187*0Sstevel@tonic-gate 		mtm->mtm_prev = me;
1188*0Sstevel@tonic-gate 		me->me_flags |= ME_HASH;
1189*0Sstevel@tonic-gate 		me->me_tid = tid;
1190*0Sstevel@tonic-gate 		me->me_age = mtm->mtm_age++;
1191*0Sstevel@tonic-gate 		mtm->mtm_nme++;
1192*0Sstevel@tonic-gate 		mtm->mtm_nmet++;
1193*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1194*0Sstevel@tonic-gate 	}
1195*0Sstevel@tonic-gate 
1196*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1197*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1198*0Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
1199*0Sstevel@tonic-gate }
1200*0Sstevel@tonic-gate 
1201*0Sstevel@tonic-gate /*
1202*0Sstevel@tonic-gate  * Add the delta(s) into the log.
1203*0Sstevel@tonic-gate  * Create one cached roll buffer logmap entry, and reference count the
1204*0Sstevel@tonic-gate  * number of mapentries refering to it.
1205*0Sstevel@tonic-gate  * Cancel previous logmap entries.
1206*0Sstevel@tonic-gate  * logmap_add is tolerant of failure to allocate a cached roll buffer.
1207*0Sstevel@tonic-gate  */
1208*0Sstevel@tonic-gate void
1209*0Sstevel@tonic-gate logmap_add_buf(
1210*0Sstevel@tonic-gate 	ml_unit_t *ul,
1211*0Sstevel@tonic-gate 	char *va,			/* Ptr to buf w/deltas & data */
1212*0Sstevel@tonic-gate 	offset_t bufmof,		/* Offset on master of buf start */
1213*0Sstevel@tonic-gate 	mapentry_t *melist,		/* Entries to add */
1214*0Sstevel@tonic-gate 	caddr_t	buf,			/* Buffer containing delta(s) */
1215*0Sstevel@tonic-gate 	uint32_t bufsz)			/* Size of buf */
1216*0Sstevel@tonic-gate {
1217*0Sstevel@tonic-gate 	offset_t	mof;
1218*0Sstevel@tonic-gate 	offset_t	vamof = bufmof + (va - buf);
1219*0Sstevel@tonic-gate 	off_t		nb;
1220*0Sstevel@tonic-gate 	mapentry_t	*me;
1221*0Sstevel@tonic-gate 	mapentry_t	**mep;
1222*0Sstevel@tonic-gate 	mapentry_t	**savmep;
1223*0Sstevel@tonic-gate 	uint32_t	tid;
1224*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1225*0Sstevel@tonic-gate 	crb_t		*crb;
1226*0Sstevel@tonic-gate 	crb_t		*crbsav = NULL;
1227*0Sstevel@tonic-gate 
1228*0Sstevel@tonic-gate 	ASSERT((bufsz & DEV_BMASK) == 0);
1229*0Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
1230*0Sstevel@tonic-gate 	logmap_wait_space(mtm, ul, melist);
1231*0Sstevel@tonic-gate 
1232*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1233*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1234*0Sstevel@tonic-gate 
1235*0Sstevel@tonic-gate 	mtm->mtm_ref = 1;
1236*0Sstevel@tonic-gate 	mtm->mtm_dirty++;
1237*0Sstevel@tonic-gate 	tid = mtm->mtm_tid;
1238*0Sstevel@tonic-gate 	while (melist) {
1239*0Sstevel@tonic-gate 		mof = melist->me_mof;
1240*0Sstevel@tonic-gate 		nb  = melist->me_nb;
1241*0Sstevel@tonic-gate 
1242*0Sstevel@tonic-gate 		/*
1243*0Sstevel@tonic-gate 		 * search for overlapping entries
1244*0Sstevel@tonic-gate 		 */
1245*0Sstevel@tonic-gate 		savmep = mep = MAP_HASH(mof, mtm);
1246*0Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
1247*0Sstevel@tonic-gate 		while ((me = *mep) != 0) {
1248*0Sstevel@tonic-gate 			/*
1249*0Sstevel@tonic-gate 			 * Data consumes old map entry; cancel map entry.
1250*0Sstevel@tonic-gate 			 * Take care when we replace an old map entry
1251*0Sstevel@tonic-gate 			 * which carries quota information with a newer entry
1252*0Sstevel@tonic-gate 			 * which does not. In that case the push function
1253*0Sstevel@tonic-gate 			 * would not be called to clean up the dquot structure.
1254*0Sstevel@tonic-gate 			 * This would be found later by invalidatedq() causing
1255*0Sstevel@tonic-gate 			 * a panic when the filesystem in unmounted.
1256*0Sstevel@tonic-gate 			 * We clean up the dquot manually and then replace
1257*0Sstevel@tonic-gate 			 * the map entry.
1258*0Sstevel@tonic-gate 			 */
1259*0Sstevel@tonic-gate 			crb = me->me_crb;
1260*0Sstevel@tonic-gate 			if (MEwithinDATA(me, mof, nb) &&
1261*0Sstevel@tonic-gate 			    ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
1262*0Sstevel@tonic-gate 				if (tid == me->me_tid &&
1263*0Sstevel@tonic-gate 				    ((me->me_flags & ME_AGE) == 0)) {
1264*0Sstevel@tonic-gate 					*mep = me->me_hash;
1265*0Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
1266*0Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
1267*0Sstevel@tonic-gate 					ASSERT(!(me->me_flags & ME_USER));
1268*0Sstevel@tonic-gate 					mtm->mtm_nme--;
1269*0Sstevel@tonic-gate 					/*
1270*0Sstevel@tonic-gate 					 * Special case if the mapentry
1271*0Sstevel@tonic-gate 					 * carries a dquot and a push function.
1272*0Sstevel@tonic-gate 					 * We have to clean up the quota info
1273*0Sstevel@tonic-gate 					 * before replacing the mapentry.
1274*0Sstevel@tonic-gate 					 */
1275*0Sstevel@tonic-gate 					if (me->me_dt == DT_QR)
1276*0Sstevel@tonic-gate 						HANDLE_DQUOT(me, melist);
1277*0Sstevel@tonic-gate 
1278*0Sstevel@tonic-gate 					/*
1279*0Sstevel@tonic-gate 					 * If this soon to be deleted mapentry
1280*0Sstevel@tonic-gate 					 * has a suitable roll buffer then
1281*0Sstevel@tonic-gate 					 * re-use it.
1282*0Sstevel@tonic-gate 					 */
1283*0Sstevel@tonic-gate 					if (crb && (--crb->c_refcnt == 0)) {
1284*0Sstevel@tonic-gate 						if (crbsav ||
1285*0Sstevel@tonic-gate 						    (crb->c_nb != bufsz)) {
1286*0Sstevel@tonic-gate 							CRB_FREE(crb, me);
1287*0Sstevel@tonic-gate 						} else {
1288*0Sstevel@tonic-gate 							bcopy(buf, crb->c_buf,
1289*0Sstevel@tonic-gate 							    bufsz);
1290*0Sstevel@tonic-gate 							crb->c_invalid = 0;
1291*0Sstevel@tonic-gate 							crb->c_mof = bufmof;
1292*0Sstevel@tonic-gate 							crbsav = crb;
1293*0Sstevel@tonic-gate 							me->me_crb = NULL;
1294*0Sstevel@tonic-gate 						}
1295*0Sstevel@tonic-gate 					}
1296*0Sstevel@tonic-gate 					kmem_cache_free(mapentry_cache, me);
1297*0Sstevel@tonic-gate 					continue;
1298*0Sstevel@tonic-gate 				}
1299*0Sstevel@tonic-gate 				me->me_cancel = mtm->mtm_cancel;
1300*0Sstevel@tonic-gate 				mtm->mtm_cancel = me;
1301*0Sstevel@tonic-gate 				me->me_flags |= ME_CANCEL;
1302*0Sstevel@tonic-gate 			}
1303*0Sstevel@tonic-gate 
1304*0Sstevel@tonic-gate 			/*
1305*0Sstevel@tonic-gate 			 * Inode deltas within the same fs block come
1306*0Sstevel@tonic-gate 			 * in individually as separate calls to logmap_add().
1307*0Sstevel@tonic-gate 			 * All others come in as one call. So check for an
1308*0Sstevel@tonic-gate 			 * existing entry where we can re-use the crb.
1309*0Sstevel@tonic-gate 			 */
1310*0Sstevel@tonic-gate 			if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
1311*0Sstevel@tonic-gate 			    !crbsav && crb &&
1312*0Sstevel@tonic-gate 			    WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
1313*0Sstevel@tonic-gate 				ASSERT(crb->c_mof == bufmof);
1314*0Sstevel@tonic-gate 				ASSERT(crb->c_nb == bufsz);
1315*0Sstevel@tonic-gate 				bcopy(buf, crb->c_buf, bufsz);
1316*0Sstevel@tonic-gate 				crbsav = crb;
1317*0Sstevel@tonic-gate 			}
1318*0Sstevel@tonic-gate 			mep = &(*mep)->me_hash;
1319*0Sstevel@tonic-gate 		}
1320*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1321*0Sstevel@tonic-gate 
1322*0Sstevel@tonic-gate 		/*
1323*0Sstevel@tonic-gate 		 * If we don't already have a crb then allocate one
1324*0Sstevel@tonic-gate 		 * and copy the incoming buffer. Only do this once
1325*0Sstevel@tonic-gate 		 * for all the incoming deltas.
1326*0Sstevel@tonic-gate 		 */
1327*0Sstevel@tonic-gate 		if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
1328*0Sstevel@tonic-gate 			/*
1329*0Sstevel@tonic-gate 			 * Only use a cached roll buffer if we
1330*0Sstevel@tonic-gate 			 * have enough memory, and check for failures.
1331*0Sstevel@tonic-gate 			 */
1332*0Sstevel@tonic-gate 			if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
1333*0Sstevel@tonic-gate 			    (kmem_avail() > bufsz)) {
1334*0Sstevel@tonic-gate 				crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
1335*0Sstevel@tonic-gate 			} else {
1336*0Sstevel@tonic-gate 				ufs_crb_alloc_fails++;
1337*0Sstevel@tonic-gate 			}
1338*0Sstevel@tonic-gate 			if (crbsav) {
1339*0Sstevel@tonic-gate 				crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
1340*0Sstevel@tonic-gate 				if (crbsav->c_buf) {
1341*0Sstevel@tonic-gate 					atomic_add_64(&ufs_crb_size,
1342*0Sstevel@tonic-gate 					    (uint64_t)bufsz);
1343*0Sstevel@tonic-gate 					if (ufs_crb_size > ufs_crb_max_size) {
1344*0Sstevel@tonic-gate 						ufs_crb_max_size = ufs_crb_size;
1345*0Sstevel@tonic-gate 					}
1346*0Sstevel@tonic-gate 					bcopy(buf, crbsav->c_buf, bufsz);
1347*0Sstevel@tonic-gate 					crbsav->c_nb = bufsz;
1348*0Sstevel@tonic-gate 					crbsav->c_refcnt = 0;
1349*0Sstevel@tonic-gate 					crbsav->c_invalid = 0;
1350*0Sstevel@tonic-gate 					ASSERT((bufmof & DEV_BMASK) == 0);
1351*0Sstevel@tonic-gate 					crbsav->c_mof = bufmof;
1352*0Sstevel@tonic-gate 				} else {
1353*0Sstevel@tonic-gate 					kmem_free(crbsav, sizeof (crb_t));
1354*0Sstevel@tonic-gate 					crbsav = NULL;
1355*0Sstevel@tonic-gate 				}
1356*0Sstevel@tonic-gate 			}
1357*0Sstevel@tonic-gate 		}
1358*0Sstevel@tonic-gate 
1359*0Sstevel@tonic-gate 		/*
1360*0Sstevel@tonic-gate 		 * remove from list
1361*0Sstevel@tonic-gate 		 */
1362*0Sstevel@tonic-gate 		me = melist;
1363*0Sstevel@tonic-gate 		melist = melist->me_hash;
1364*0Sstevel@tonic-gate 		me->me_flags &= ~ME_LIST;
1365*0Sstevel@tonic-gate 		me->me_crb = crbsav;
1366*0Sstevel@tonic-gate 		if (crbsav) {
1367*0Sstevel@tonic-gate 			crbsav->c_refcnt++;
1368*0Sstevel@tonic-gate 		}
1369*0Sstevel@tonic-gate 		crbsav = NULL;
1370*0Sstevel@tonic-gate 
1371*0Sstevel@tonic-gate 		ASSERT(va);
1372*0Sstevel@tonic-gate 		ldl_write(ul, va, vamof, me); /* add to on-disk log */
1373*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
1374*0Sstevel@tonic-gate 			CRB_RELE(me);
1375*0Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
1376*0Sstevel@tonic-gate 			continue;
1377*0Sstevel@tonic-gate 		}
1378*0Sstevel@tonic-gate 		ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
1379*0Sstevel@tonic-gate 			map_check_ldl_write(ul, va, vamof, me));
1380*0Sstevel@tonic-gate 
1381*0Sstevel@tonic-gate 		/*
1382*0Sstevel@tonic-gate 		 * put on hash
1383*0Sstevel@tonic-gate 		 */
1384*0Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
1385*0Sstevel@tonic-gate 		me->me_hash = *savmep;
1386*0Sstevel@tonic-gate 		*savmep = me;
1387*0Sstevel@tonic-gate 		me->me_next = (mapentry_t *)mtm;
1388*0Sstevel@tonic-gate 		me->me_prev = mtm->mtm_prev;
1389*0Sstevel@tonic-gate 		mtm->mtm_prev->me_next = me;
1390*0Sstevel@tonic-gate 		mtm->mtm_prev = me;
1391*0Sstevel@tonic-gate 		me->me_flags |= ME_HASH;
1392*0Sstevel@tonic-gate 		me->me_tid = tid;
1393*0Sstevel@tonic-gate 		me->me_age = mtm->mtm_age++;
1394*0Sstevel@tonic-gate 		mtm->mtm_nme++;
1395*0Sstevel@tonic-gate 		mtm->mtm_nmet++;
1396*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1397*0Sstevel@tonic-gate 	}
1398*0Sstevel@tonic-gate 
1399*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1400*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1401*0Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
1402*0Sstevel@tonic-gate }
1403*0Sstevel@tonic-gate 
1404*0Sstevel@tonic-gate /*
1405*0Sstevel@tonic-gate  * free up any cancelled deltas
1406*0Sstevel@tonic-gate  */
1407*0Sstevel@tonic-gate void
1408*0Sstevel@tonic-gate logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
1409*0Sstevel@tonic-gate {
1410*0Sstevel@tonic-gate 	int		dolock	= 0;
1411*0Sstevel@tonic-gate 	mapentry_t	*me;
1412*0Sstevel@tonic-gate 	mapentry_t	**mep;
1413*0Sstevel@tonic-gate 
1414*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1415*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1416*0Sstevel@tonic-gate 
1417*0Sstevel@tonic-gate again:
1418*0Sstevel@tonic-gate 	if (dolock)
1419*0Sstevel@tonic-gate 		rw_enter(&mtm->mtm_rwlock, RW_WRITER);
1420*0Sstevel@tonic-gate 
1421*0Sstevel@tonic-gate 	/*
1422*0Sstevel@tonic-gate 	 * At EOT, cancel the indicated deltas
1423*0Sstevel@tonic-gate 	 */
1424*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1425*0Sstevel@tonic-gate 	if (mtm->mtm_flags & MTM_CANCELED) {
1426*0Sstevel@tonic-gate 		mtm->mtm_flags &= ~MTM_CANCELED;
1427*0Sstevel@tonic-gate 		ASSERT(dolock == 0);
1428*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1429*0Sstevel@tonic-gate 		return;
1430*0Sstevel@tonic-gate 	}
1431*0Sstevel@tonic-gate 
1432*0Sstevel@tonic-gate 	while ((me = *cancelhead) != NULL) {
1433*0Sstevel@tonic-gate 		/*
1434*0Sstevel@tonic-gate 		 * roll forward or read collision; wait and try again
1435*0Sstevel@tonic-gate 		 */
1436*0Sstevel@tonic-gate 		if (me->me_flags & ME_AGE) {
1437*0Sstevel@tonic-gate 			ASSERT(dolock == 0);
1438*0Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
1439*0Sstevel@tonic-gate 			dolock = 1;
1440*0Sstevel@tonic-gate 			goto again;
1441*0Sstevel@tonic-gate 		}
1442*0Sstevel@tonic-gate 		/*
1443*0Sstevel@tonic-gate 		 * remove from cancel list
1444*0Sstevel@tonic-gate 		 */
1445*0Sstevel@tonic-gate 		*cancelhead = me->me_cancel;
1446*0Sstevel@tonic-gate 		me->me_cancel = NULL;
1447*0Sstevel@tonic-gate 		me->me_flags &= ~(ME_CANCEL);
1448*0Sstevel@tonic-gate 
1449*0Sstevel@tonic-gate 		/*
1450*0Sstevel@tonic-gate 		 * logmap_remove_roll handles ME_ROLL entries later
1451*0Sstevel@tonic-gate 		 *	we leave them around for logmap_iscancel
1452*0Sstevel@tonic-gate 		 *	XXX is this necessary?
1453*0Sstevel@tonic-gate 		 */
1454*0Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL)
1455*0Sstevel@tonic-gate 			continue;
1456*0Sstevel@tonic-gate 
1457*0Sstevel@tonic-gate 		/*
1458*0Sstevel@tonic-gate 		 * remove from hash (if necessary)
1459*0Sstevel@tonic-gate 		 */
1460*0Sstevel@tonic-gate 		if (me->me_flags & ME_HASH) {
1461*0Sstevel@tonic-gate 			mep = MAP_HASH(me->me_mof, mtm);
1462*0Sstevel@tonic-gate 			while (*mep) {
1463*0Sstevel@tonic-gate 				if (*mep == me) {
1464*0Sstevel@tonic-gate 					*mep = me->me_hash;
1465*0Sstevel@tonic-gate 					me->me_next->me_prev = me->me_prev;
1466*0Sstevel@tonic-gate 					me->me_prev->me_next = me->me_next;
1467*0Sstevel@tonic-gate 					me->me_flags &= ~(ME_HASH);
1468*0Sstevel@tonic-gate 					if (!(me->me_flags & ME_USER)) {
1469*0Sstevel@tonic-gate 						mtm->mtm_nme--;
1470*0Sstevel@tonic-gate 					}
1471*0Sstevel@tonic-gate 					break;
1472*0Sstevel@tonic-gate 				} else
1473*0Sstevel@tonic-gate 					mep = &(*mep)->me_hash;
1474*0Sstevel@tonic-gate 			}
1475*0Sstevel@tonic-gate 		}
1476*0Sstevel@tonic-gate 		/*
1477*0Sstevel@tonic-gate 		 * put the entry on the free list
1478*0Sstevel@tonic-gate 		 */
1479*0Sstevel@tonic-gate 		CRB_RELE(me);
1480*0Sstevel@tonic-gate 		kmem_cache_free(mapentry_cache, me);
1481*0Sstevel@tonic-gate 	}
1482*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1483*0Sstevel@tonic-gate 	if (dolock)
1484*0Sstevel@tonic-gate 		rw_exit(&mtm->mtm_rwlock);
1485*0Sstevel@tonic-gate 
1486*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1487*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1488*0Sstevel@tonic-gate }
1489*0Sstevel@tonic-gate 
1490*0Sstevel@tonic-gate 
1491*0Sstevel@tonic-gate void
1492*0Sstevel@tonic-gate logmap_commit(ml_unit_t *ul, uint32_t tid)
1493*0Sstevel@tonic-gate {
1494*0Sstevel@tonic-gate 	mapentry_t	me;
1495*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1496*0Sstevel@tonic-gate 
1497*0Sstevel@tonic-gate 
1498*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ul->un_log_mutex));
1499*0Sstevel@tonic-gate 
1500*0Sstevel@tonic-gate 	/*
1501*0Sstevel@tonic-gate 	 * async'ly write a commit rec into the log
1502*0Sstevel@tonic-gate 	 */
1503*0Sstevel@tonic-gate 	if (mtm->mtm_dirty) {
1504*0Sstevel@tonic-gate 		/*
1505*0Sstevel@tonic-gate 		 * put commit record into log
1506*0Sstevel@tonic-gate 		 */
1507*0Sstevel@tonic-gate 		me.me_mof = mtm->mtm_tid;
1508*0Sstevel@tonic-gate 		me.me_dt = DT_COMMIT;
1509*0Sstevel@tonic-gate 		me.me_nb = 0;
1510*0Sstevel@tonic-gate 		me.me_hash = NULL;
1511*0Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, &me);
1512*0Sstevel@tonic-gate 		ldl_write(ul, NULL, (offset_t)0, &me);
1513*0Sstevel@tonic-gate 		ldl_round_commit(ul);
1514*0Sstevel@tonic-gate 
1515*0Sstevel@tonic-gate 		/*
1516*0Sstevel@tonic-gate 		 * abort on error; else reset dirty flag
1517*0Sstevel@tonic-gate 		 */
1518*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR)
1519*0Sstevel@tonic-gate 			logmap_abort(ul, tid);
1520*0Sstevel@tonic-gate 		else {
1521*0Sstevel@tonic-gate 			mtm->mtm_dirty = 0;
1522*0Sstevel@tonic-gate 			mtm->mtm_nmet = 0;
1523*0Sstevel@tonic-gate 			mtm->mtm_cfrags = 0;
1524*0Sstevel@tonic-gate 		}
1525*0Sstevel@tonic-gate 		/* push commit */
1526*0Sstevel@tonic-gate 		ldl_push_commit(ul);
1527*0Sstevel@tonic-gate 	}
1528*0Sstevel@tonic-gate }
1529*0Sstevel@tonic-gate 
1530*0Sstevel@tonic-gate void
1531*0Sstevel@tonic-gate logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
1532*0Sstevel@tonic-gate {
1533*0Sstevel@tonic-gate 	off_t		lof;
1534*0Sstevel@tonic-gate 	uint32_t	tid;
1535*0Sstevel@tonic-gate 	mapentry_t	*me;
1536*0Sstevel@tonic-gate 
1537*0Sstevel@tonic-gate 	/*
1538*0Sstevel@tonic-gate 	 * move the head forward so the log knows how full it is
1539*0Sstevel@tonic-gate 	 * Make sure to skip any mapentry whose me_lof is 0, these
1540*0Sstevel@tonic-gate 	 * are just place holders for DT_CANCELED freed user blocks
1541*0Sstevel@tonic-gate 	 * for the current moby.
1542*0Sstevel@tonic-gate 	 */
1543*0Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
1544*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1545*0Sstevel@tonic-gate 	me = mtm->mtm_next;
1546*0Sstevel@tonic-gate 	while (me != (mapentry_t *)mtm && me->me_lof == 0) {
1547*0Sstevel@tonic-gate 		me = me->me_next;
1548*0Sstevel@tonic-gate 	}
1549*0Sstevel@tonic-gate 
1550*0Sstevel@tonic-gate 	if (me == (mapentry_t *)mtm)
1551*0Sstevel@tonic-gate 		lof = -1;
1552*0Sstevel@tonic-gate 	else {
1553*0Sstevel@tonic-gate 		lof = me->me_lof;
1554*0Sstevel@tonic-gate 		tid = me->me_tid;
1555*0Sstevel@tonic-gate 	}
1556*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1557*0Sstevel@tonic-gate 	ldl_sethead(ul, lof, tid);
1558*0Sstevel@tonic-gate 	if (lof == -1)
1559*0Sstevel@tonic-gate 		mtm->mtm_age = 0;
1560*0Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
1561*0Sstevel@tonic-gate }
1562*0Sstevel@tonic-gate 
1563*0Sstevel@tonic-gate void
1564*0Sstevel@tonic-gate logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
1565*0Sstevel@tonic-gate {
1566*0Sstevel@tonic-gate 	off_t		lof;
1567*0Sstevel@tonic-gate 	size_t		nb;
1568*0Sstevel@tonic-gate 
1569*0Sstevel@tonic-gate 	/*
1570*0Sstevel@tonic-gate 	 * set the tail after the logmap_abort
1571*0Sstevel@tonic-gate 	 */
1572*0Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
1573*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1574*0Sstevel@tonic-gate 	if (mtm->mtm_prev == (mapentry_t *)mtm)
1575*0Sstevel@tonic-gate 		lof = -1;
1576*0Sstevel@tonic-gate 	else {
1577*0Sstevel@tonic-gate 		/*
1578*0Sstevel@tonic-gate 		 * set the tail to the end of the last commit
1579*0Sstevel@tonic-gate 		 */
1580*0Sstevel@tonic-gate 		lof = mtm->mtm_tail_lof;
1581*0Sstevel@tonic-gate 		nb = mtm->mtm_tail_nb;
1582*0Sstevel@tonic-gate 	}
1583*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1584*0Sstevel@tonic-gate 	ldl_settail(ul, lof, nb);
1585*0Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
1586*0Sstevel@tonic-gate }
1587*0Sstevel@tonic-gate 
1588*0Sstevel@tonic-gate /*
1589*0Sstevel@tonic-gate  * when reseting a device; roll the log until every
1590*0Sstevel@tonic-gate  * delta has been rolled forward
1591*0Sstevel@tonic-gate  */
1592*0Sstevel@tonic-gate void
1593*0Sstevel@tonic-gate logmap_roll_dev(ml_unit_t *ul)
1594*0Sstevel@tonic-gate {
1595*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1596*0Sstevel@tonic-gate 	mapentry_t	*me;
1597*0Sstevel@tonic-gate 	ufsvfs_t	*ufsvfsp = ul->un_ufsvfs;
1598*0Sstevel@tonic-gate 
1599*0Sstevel@tonic-gate again:
1600*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1601*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1602*0Sstevel@tonic-gate 	if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
1603*0Sstevel@tonic-gate 		return;
1604*0Sstevel@tonic-gate 
1605*0Sstevel@tonic-gate 	/*
1606*0Sstevel@tonic-gate 	 * look for deltas
1607*0Sstevel@tonic-gate 	 */
1608*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1609*0Sstevel@tonic-gate 	for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
1610*0Sstevel@tonic-gate 		if (me->me_flags & ME_ROLL)
1611*0Sstevel@tonic-gate 			break;
1612*0Sstevel@tonic-gate 		if (me->me_tid == mtm->mtm_tid)
1613*0Sstevel@tonic-gate 			continue;
1614*0Sstevel@tonic-gate 		if (me->me_tid == mtm->mtm_committid)
1615*0Sstevel@tonic-gate 			continue;
1616*0Sstevel@tonic-gate 		break;
1617*0Sstevel@tonic-gate 	}
1618*0Sstevel@tonic-gate 
1619*0Sstevel@tonic-gate 	/*
1620*0Sstevel@tonic-gate 	 * found a delta; kick the roll thread
1621*0Sstevel@tonic-gate 	 * but only if the thread is running... (jmh)
1622*0Sstevel@tonic-gate 	 */
1623*0Sstevel@tonic-gate 	if (me != (mapentry_t *)mtm) {
1624*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1625*0Sstevel@tonic-gate 		logmap_forceroll(mtm);
1626*0Sstevel@tonic-gate 		goto again;
1627*0Sstevel@tonic-gate 	}
1628*0Sstevel@tonic-gate 
1629*0Sstevel@tonic-gate 	/*
1630*0Sstevel@tonic-gate 	 * no more deltas, return
1631*0Sstevel@tonic-gate 	 */
1632*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1633*0Sstevel@tonic-gate 	(void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
1634*0Sstevel@tonic-gate 
1635*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1636*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1637*0Sstevel@tonic-gate }
1638*0Sstevel@tonic-gate 
1639*0Sstevel@tonic-gate static void
1640*0Sstevel@tonic-gate logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
1641*0Sstevel@tonic-gate {
1642*0Sstevel@tonic-gate 	mapentry_t	*me;
1643*0Sstevel@tonic-gate 	mapentry_t	**mep;
1644*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1645*0Sstevel@tonic-gate 	int		frags;
1646*0Sstevel@tonic-gate 
1647*0Sstevel@tonic-gate 	/*
1648*0Sstevel@tonic-gate 	 * map has been referenced and is dirty
1649*0Sstevel@tonic-gate 	 */
1650*0Sstevel@tonic-gate 	mtm->mtm_ref = 1;
1651*0Sstevel@tonic-gate 	mtm->mtm_dirty++;
1652*0Sstevel@tonic-gate 
1653*0Sstevel@tonic-gate 	/*
1654*0Sstevel@tonic-gate 	 * get a mapentry
1655*0Sstevel@tonic-gate 	 */
1656*0Sstevel@tonic-gate 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
1657*0Sstevel@tonic-gate 	bzero(me, sizeof (mapentry_t));
1658*0Sstevel@tonic-gate 
1659*0Sstevel@tonic-gate 	/*
1660*0Sstevel@tonic-gate 	 * initialize cancel record and put in logmap
1661*0Sstevel@tonic-gate 	 */
1662*0Sstevel@tonic-gate 	me->me_mof = mof;
1663*0Sstevel@tonic-gate 	me->me_nb = nb;
1664*0Sstevel@tonic-gate 	me->me_dt = DT_CANCEL;
1665*0Sstevel@tonic-gate 	me->me_tid = mtm->mtm_tid;
1666*0Sstevel@tonic-gate 	me->me_hash = NULL;
1667*0Sstevel@tonic-gate 
1668*0Sstevel@tonic-gate 	/*
1669*0Sstevel@tonic-gate 	 * Write delta to log if this delta is for metadata.  If this is not
1670*0Sstevel@tonic-gate 	 * metadata it is user data and we are just putting a cancel
1671*0Sstevel@tonic-gate 	 * mapentry into the hash to cancel a user block deletion
1672*0Sstevel@tonic-gate 	 * in which we do not want the block to be allocated
1673*0Sstevel@tonic-gate 	 * within this moby.  This cancel entry will prevent the block from
1674*0Sstevel@tonic-gate 	 * being allocated within the moby and prevent user data corruption
1675*0Sstevel@tonic-gate 	 * if we happen to crash before this moby is committed.
1676*0Sstevel@tonic-gate 	 */
1677*0Sstevel@tonic-gate 	mutex_enter(&ul->un_log_mutex);
1678*0Sstevel@tonic-gate 	if (metadata) {
1679*0Sstevel@tonic-gate 		logmap_wait_space(mtm, ul, me);
1680*0Sstevel@tonic-gate 		ldl_write(ul, NULL, (offset_t)0, me);
1681*0Sstevel@tonic-gate 		if (ul->un_flags & LDL_ERROR) {
1682*0Sstevel@tonic-gate 			kmem_cache_free(mapentry_cache, me);
1683*0Sstevel@tonic-gate 			mutex_exit(&ul->un_log_mutex);
1684*0Sstevel@tonic-gate 			return;
1685*0Sstevel@tonic-gate 		}
1686*0Sstevel@tonic-gate 	}
1687*0Sstevel@tonic-gate 
1688*0Sstevel@tonic-gate 	/*
1689*0Sstevel@tonic-gate 	 * put in hash and on cancel list
1690*0Sstevel@tonic-gate 	 */
1691*0Sstevel@tonic-gate 	mep = MAP_HASH(mof, mtm);
1692*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1693*0Sstevel@tonic-gate 	me->me_age = mtm->mtm_age++;
1694*0Sstevel@tonic-gate 	me->me_hash = *mep;
1695*0Sstevel@tonic-gate 	*mep = me;
1696*0Sstevel@tonic-gate 	me->me_next = (mapentry_t *)mtm;
1697*0Sstevel@tonic-gate 	me->me_prev = mtm->mtm_prev;
1698*0Sstevel@tonic-gate 	mtm->mtm_prev->me_next = me;
1699*0Sstevel@tonic-gate 	mtm->mtm_prev = me;
1700*0Sstevel@tonic-gate 	me->me_cancel = mtm->mtm_cancel;
1701*0Sstevel@tonic-gate 	mtm->mtm_cancel = me;
1702*0Sstevel@tonic-gate 	if (metadata) {
1703*0Sstevel@tonic-gate 		mtm->mtm_nme++;
1704*0Sstevel@tonic-gate 		mtm->mtm_nmet++;
1705*0Sstevel@tonic-gate 	} else {
1706*0Sstevel@tonic-gate 		me->me_flags = ME_USER;
1707*0Sstevel@tonic-gate 	}
1708*0Sstevel@tonic-gate 	me->me_flags |= (ME_HASH|ME_CANCEL);
1709*0Sstevel@tonic-gate 	if (!(metadata)) {
1710*0Sstevel@tonic-gate 		frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
1711*0Sstevel@tonic-gate 		if (frags)
1712*0Sstevel@tonic-gate 			mtm->mtm_cfrags += numfrags(ul->un_ufsvfs->vfs_fs,
1713*0Sstevel@tonic-gate 				frags);
1714*0Sstevel@tonic-gate 	}
1715*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1716*0Sstevel@tonic-gate 
1717*0Sstevel@tonic-gate 	mutex_exit(&ul->un_log_mutex);
1718*0Sstevel@tonic-gate }
1719*0Sstevel@tonic-gate 
1720*0Sstevel@tonic-gate /*
1721*0Sstevel@tonic-gate  * cancel entries in a logmap (entries are freed at EOT)
1722*0Sstevel@tonic-gate  */
1723*0Sstevel@tonic-gate void
1724*0Sstevel@tonic-gate logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
1725*0Sstevel@tonic-gate {
1726*0Sstevel@tonic-gate 	int32_t		hnb;
1727*0Sstevel@tonic-gate 	mapentry_t	*me;
1728*0Sstevel@tonic-gate 	mapentry_t	**mep;
1729*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1730*0Sstevel@tonic-gate 	crb_t		*crb;
1731*0Sstevel@tonic-gate 
1732*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1733*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1734*0Sstevel@tonic-gate 
1735*0Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
1736*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
1737*0Sstevel@tonic-gate 		if (hnb > nb)
1738*0Sstevel@tonic-gate 			hnb = nb;
1739*0Sstevel@tonic-gate 		/*
1740*0Sstevel@tonic-gate 		 * find overlapping entries
1741*0Sstevel@tonic-gate 		 */
1742*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
1743*0Sstevel@tonic-gate 		mutex_enter(&mtm->mtm_mutex);
1744*0Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
1745*0Sstevel@tonic-gate 			if (!DATAoverlapME(mof, hnb, me))
1746*0Sstevel@tonic-gate 				continue;
1747*0Sstevel@tonic-gate 
1748*0Sstevel@tonic-gate 			ASSERT(MEwithinDATA(me, mof, hnb));
1749*0Sstevel@tonic-gate 
1750*0Sstevel@tonic-gate 			if ((me->me_flags & ME_CANCEL) == 0) {
1751*0Sstevel@tonic-gate 				me->me_cancel = mtm->mtm_cancel;
1752*0Sstevel@tonic-gate 				mtm->mtm_cancel = me;
1753*0Sstevel@tonic-gate 				me->me_flags |= ME_CANCEL;
1754*0Sstevel@tonic-gate 				crb = me->me_crb;
1755*0Sstevel@tonic-gate 				if (crb) {
1756*0Sstevel@tonic-gate 					crb->c_invalid = 1;
1757*0Sstevel@tonic-gate 				}
1758*0Sstevel@tonic-gate 			}
1759*0Sstevel@tonic-gate 		}
1760*0Sstevel@tonic-gate 		mutex_exit(&mtm->mtm_mutex);
1761*0Sstevel@tonic-gate 
1762*0Sstevel@tonic-gate 		/*
1763*0Sstevel@tonic-gate 		 * put a cancel record into the log
1764*0Sstevel@tonic-gate 		 */
1765*0Sstevel@tonic-gate 		logmap_cancel_delta(ul, mof, hnb, metadata);
1766*0Sstevel@tonic-gate 	}
1767*0Sstevel@tonic-gate 
1768*0Sstevel@tonic-gate 	ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1769*0Sstevel@tonic-gate 		map_check_linkage(mtm));
1770*0Sstevel@tonic-gate }
1771*0Sstevel@tonic-gate 
1772*0Sstevel@tonic-gate /*
1773*0Sstevel@tonic-gate  * check for overlap w/cancel delta
1774*0Sstevel@tonic-gate  */
1775*0Sstevel@tonic-gate int
1776*0Sstevel@tonic-gate logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
1777*0Sstevel@tonic-gate {
1778*0Sstevel@tonic-gate 	off_t		hnb;
1779*0Sstevel@tonic-gate 	mapentry_t	*me;
1780*0Sstevel@tonic-gate 	mapentry_t	**mep;
1781*0Sstevel@tonic-gate 
1782*0Sstevel@tonic-gate 	mutex_enter(&mtm->mtm_mutex);
1783*0Sstevel@tonic-gate 	for (hnb = 0; nb; nb -= hnb, mof += hnb) {
1784*0Sstevel@tonic-gate 		hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
1785*0Sstevel@tonic-gate 		if (hnb > nb)
1786*0Sstevel@tonic-gate 			hnb = nb;
1787*0Sstevel@tonic-gate 		/*
1788*0Sstevel@tonic-gate 		 * search for dup entry
1789*0Sstevel@tonic-gate 		 */
1790*0Sstevel@tonic-gate 		mep = MAP_HASH(mof, mtm);
1791*0Sstevel@tonic-gate 		for (me = *mep; me; me = me->me_hash) {
1792*0Sstevel@tonic-gate 			if (((me->me_flags & ME_ROLL) == 0) &&
1793*0Sstevel@tonic-gate 			    (me->me_dt != DT_CANCEL))
1794*0Sstevel@tonic-gate 				continue;
1795*0Sstevel@tonic-gate 			if (DATAoverlapME(mof, hnb, me))
1796*0Sstevel@tonic-gate 				break;
1797*0Sstevel@tonic-gate 		}
1798*0Sstevel@tonic-gate 
1799*0Sstevel@tonic-gate 		/*
1800*0Sstevel@tonic-gate 		 * overlap detected
1801*0Sstevel@tonic-gate 		 */
1802*0Sstevel@tonic-gate 		if (me) {
1803*0Sstevel@tonic-gate 			mutex_exit(&mtm->mtm_mutex);
1804*0Sstevel@tonic-gate 			return (1);
1805*0Sstevel@tonic-gate 		}
1806*0Sstevel@tonic-gate 	}
1807*0Sstevel@tonic-gate 	mutex_exit(&mtm->mtm_mutex);
1808*0Sstevel@tonic-gate 	return (0);
1809*0Sstevel@tonic-gate }
1810*0Sstevel@tonic-gate 
1811*0Sstevel@tonic-gate static int
1812*0Sstevel@tonic-gate logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
1813*0Sstevel@tonic-gate {
1814*0Sstevel@tonic-gate 	mapentry_t	*me;
1815*0Sstevel@tonic-gate 	int		error;
1816*0Sstevel@tonic-gate 	mt_map_t	*mtm	= ul->un_logmap;
1817*0Sstevel@tonic-gate 
1818*0Sstevel@tonic-gate 	/*
1819*0Sstevel@tonic-gate 	 * verify delta header; failure == mediafail
1820*0Sstevel@tonic-gate 	 */
1821*0Sstevel@tonic-gate 	error = 0;
1822*0Sstevel@tonic-gate 	/* delta type */
1823*0Sstevel@tonic-gate 	if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
1824*0Sstevel@tonic-gate 		error = EINVAL;
1825*0Sstevel@tonic-gate 	if (dp->d_typ == DT_COMMIT) {
1826*0Sstevel@tonic-gate 		if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
1827*0Sstevel@tonic-gate 			error = EINVAL;
1828*0Sstevel@tonic-gate 	} else {
1829*0Sstevel@tonic-gate 		/* length of delta */
1830*0Sstevel@tonic-gate 		if ((dp->d_nb < INT32_C(0)) ||
1831*0Sstevel@tonic-gate 		    (dp->d_nb > INT32_C(MAPBLOCKSIZE)))
1832*0Sstevel@tonic-gate 			error = EINVAL;
1833*0Sstevel@tonic-gate 
1834*0Sstevel@tonic-gate 		/* offset on master device */
1835*0Sstevel@tonic-gate 		if (dp->d_mof < INT64_C(0))
1836*0Sstevel@tonic-gate 			error = EINVAL;
1837*0Sstevel@tonic-gate 	}
1838*0Sstevel@tonic-gate 
1839*0Sstevel@tonic-gate 	if (error) {
1840*0Sstevel@tonic-gate 		ldl_seterror(ul, "Error processing ufs log data during scan");
1841*0Sstevel@tonic-gate 		return (error);
1842*0Sstevel@tonic-gate 	}
1843*0Sstevel@tonic-gate 
1844*0Sstevel@tonic-gate 	/*
1845*0Sstevel@tonic-gate 	 * process commit record
1846*0Sstevel@tonic-gate 	 */
1847*0Sstevel@tonic-gate 	if (dp->d_typ == DT_COMMIT) {
1848*0Sstevel@tonic-gate 		if (mtm->mtm_dirty) {
1849*0Sstevel@tonic-gate 			ASSERT(dp->d_nb == INT32_C(0));
1850*0Sstevel@tonic-gate 			logmap_free_cancel(mtm, &mtm->mtm_cancel);
1851*0Sstevel@tonic-gate 			mtm->mtm_dirty = 0;
1852*0Sstevel@tonic-gate 			mtm->mtm_nmet = 0;
1853*0Sstevel@tonic-gate 			mtm->mtm_tid++;
1854*0Sstevel@tonic-gate 			mtm->mtm_committid = mtm->mtm_tid;
1855*0Sstevel@tonic-gate 			ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
1856*0Sstevel@tonic-gate 				logmap_logscan_commit_debug(lof, mtm));
1857*0Sstevel@tonic-gate 		}
1858*0Sstevel@tonic-gate 		/*
1859*0Sstevel@tonic-gate 		 * return #bytes to next sector (next delta header)
1860*0Sstevel@tonic-gate 		 */
1861*0Sstevel@tonic-gate 		*nbp = ldl_logscan_nbcommit(lof);
1862*0Sstevel@tonic-gate 		mtm->mtm_tail_lof = lof;
1863*0Sstevel@tonic-gate 		mtm->mtm_tail_nb = *nbp;
1864*0Sstevel@tonic-gate 		return (0);
1865*0Sstevel@tonic-gate 	}
1866*0Sstevel@tonic-gate 
1867*0Sstevel@tonic-gate 	/*
1868*0Sstevel@tonic-gate 	 * add delta to logmap
1869*0Sstevel@tonic-gate 	 */
1870*0Sstevel@tonic-gate 	me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
1871*0Sstevel@tonic-gate 	bzero(me, sizeof (mapentry_t));
1872*0Sstevel@tonic-gate 	me->me_lof = lof;
1873*0Sstevel@tonic-gate 	me->me_mof = dp->d_mof;
1874*0Sstevel@tonic-gate 	me->me_nb = dp->d_nb;
1875*0Sstevel@tonic-gate 	me->me_tid = mtm->mtm_tid;
1876*0Sstevel@tonic-gate 	me->me_dt = dp->d_typ;
1877*0Sstevel@tonic-gate 	me->me_hash = NULL;
1878*0Sstevel@tonic-gate 	me->me_flags = (ME_LIST | ME_SCAN);
1879*0Sstevel@tonic-gate 	logmap_add(ul, NULL, 0, me);
1880*0Sstevel@tonic-gate 	switch (dp->d_typ) {
1881*0Sstevel@tonic-gate 	case DT_CANCEL:
1882*0Sstevel@tonic-gate 		me->me_flags |= ME_CANCEL;
1883*0Sstevel@tonic-gate 		me->me_cancel = mtm->mtm_cancel;
1884*0Sstevel@tonic-gate 		mtm->mtm_cancel = me;
1885*0Sstevel@tonic-gate 		break;
1886*0Sstevel@tonic-gate 	default:
1887*0Sstevel@tonic-gate 		ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
1888*0Sstevel@tonic-gate 			logmap_logscan_add_debug(dp, mtm));
1889*0Sstevel@tonic-gate 		break;
1890*0Sstevel@tonic-gate 	}
1891*0Sstevel@tonic-gate 
1892*0Sstevel@tonic-gate sizeofdelta:
1893*0Sstevel@tonic-gate 	/*
1894*0Sstevel@tonic-gate 	 * return #bytes till next delta header
1895*0Sstevel@tonic-gate 	 */
1896*0Sstevel@tonic-gate 	if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
1897*0Sstevel@tonic-gate 		*nbp = 0;
1898*0Sstevel@tonic-gate 	else
1899*0Sstevel@tonic-gate 		*nbp = dp->d_nb;
1900*0Sstevel@tonic-gate 	return (0);
1901*0Sstevel@tonic-gate }
1902*0Sstevel@tonic-gate 
1903*0Sstevel@tonic-gate void
1904*0Sstevel@tonic-gate logmap_logscan(ml_unit_t *ul)
1905*0Sstevel@tonic-gate {
1906*0Sstevel@tonic-gate 	size_t		nb, nbd;
1907*0Sstevel@tonic-gate 	off_t		lof;
1908*0Sstevel@tonic-gate 	struct delta	delta;
1909*0Sstevel@tonic-gate 	mt_map_t	*logmap	= ul->un_logmap;
1910*0Sstevel@tonic-gate 
1911*0Sstevel@tonic-gate 	ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
1912*0Sstevel@tonic-gate 
1913*0Sstevel@tonic-gate 	/*
1914*0Sstevel@tonic-gate 	 * prepare the log for a logscan
1915*0Sstevel@tonic-gate 	 */
1916*0Sstevel@tonic-gate 	ldl_logscan_begin(ul);
1917*0Sstevel@tonic-gate 
1918*0Sstevel@tonic-gate 	/*
1919*0Sstevel@tonic-gate 	 * prepare the logmap for a logscan
1920*0Sstevel@tonic-gate 	 */
1921*0Sstevel@tonic-gate 	(void) map_free_entries(logmap);
1922*0Sstevel@tonic-gate 	logmap->mtm_tid = 0;
1923*0Sstevel@tonic-gate 	logmap->mtm_committid = UINT32_C(0);
1924*0Sstevel@tonic-gate 	logmap->mtm_age = 0;
1925*0Sstevel@tonic-gate 	logmap->mtm_dirty = 0;
1926*0Sstevel@tonic-gate 	logmap->mtm_ref = 0;
1927*0Sstevel@tonic-gate 
1928*0Sstevel@tonic-gate 	/*
1929*0Sstevel@tonic-gate 	 * while not at end of log
1930*0Sstevel@tonic-gate 	 *	read delta header
1931*0Sstevel@tonic-gate 	 *	add to logmap
1932*0Sstevel@tonic-gate 	 *	seek to beginning of next delta
1933*0Sstevel@tonic-gate 	 */
1934*0Sstevel@tonic-gate 	lof = ul->un_head_lof;
1935*0Sstevel@tonic-gate 	nbd = sizeof (delta);
1936*0Sstevel@tonic-gate 	while (lof != ul->un_tail_lof) {
1937*0Sstevel@tonic-gate 
1938*0Sstevel@tonic-gate 		/* read delta header */
1939*0Sstevel@tonic-gate 		if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
1940*0Sstevel@tonic-gate 			break;
1941*0Sstevel@tonic-gate 
1942*0Sstevel@tonic-gate 		/* add to logmap */
1943*0Sstevel@tonic-gate 		if (logmap_logscan_add(ul, &delta, lof, &nb))
1944*0Sstevel@tonic-gate 			break;
1945*0Sstevel@tonic-gate 
1946*0Sstevel@tonic-gate 		/* seek to next header (skip data) */
1947*0Sstevel@tonic-gate 		if (ldl_logscan_read(ul, &lof, nb, NULL))
1948*0Sstevel@tonic-gate 			break;
1949*0Sstevel@tonic-gate 	}
1950*0Sstevel@tonic-gate 
1951*0Sstevel@tonic-gate 	/*
1952*0Sstevel@tonic-gate 	 * remove the last partial transaction from the logmap
1953*0Sstevel@tonic-gate 	 */
1954*0Sstevel@tonic-gate 	logmap_abort(ul, logmap->mtm_tid);
1955*0Sstevel@tonic-gate 
1956*0Sstevel@tonic-gate 	ldl_logscan_end(ul);
1957*0Sstevel@tonic-gate }
1958*0Sstevel@tonic-gate 
1959*0Sstevel@tonic-gate void
1960*0Sstevel@tonic-gate _init_map(void)
1961*0Sstevel@tonic-gate {
1962*0Sstevel@tonic-gate 	/*
1963*0Sstevel@tonic-gate 	 * Initialise the mapentry cache. No constructor or deconstructor
1964*0Sstevel@tonic-gate 	 * is needed. Also no reclaim function is supplied as reclaiming
1965*0Sstevel@tonic-gate 	 * current entries is not possible.
1966*0Sstevel@tonic-gate 	 */
1967*0Sstevel@tonic-gate 	mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
1968*0Sstevel@tonic-gate 	    sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1969*0Sstevel@tonic-gate }
1970*0Sstevel@tonic-gate 
1971*0Sstevel@tonic-gate /*
1972*0Sstevel@tonic-gate  * Special case when we replace an old map entry which carries quota
1973*0Sstevel@tonic-gate  * information with a newer entry which does not.
1974*0Sstevel@tonic-gate  * In that case the push function would not be called to clean up the
1975*0Sstevel@tonic-gate  * dquot structure. This would be found later by invalidatedq() causing
1976*0Sstevel@tonic-gate  * a panic when the filesystem in unmounted.
1977*0Sstevel@tonic-gate  * We clean up the dquot manually before replacing the map entry.
1978*0Sstevel@tonic-gate  */
1979*0Sstevel@tonic-gate void
1980*0Sstevel@tonic-gate handle_dquot(mapentry_t *me)
1981*0Sstevel@tonic-gate {
1982*0Sstevel@tonic-gate 	int dolock = 0;
1983*0Sstevel@tonic-gate 	int domutex = 0;
1984*0Sstevel@tonic-gate 	struct dquot *dqp;
1985*0Sstevel@tonic-gate 
1986*0Sstevel@tonic-gate 	dqp = (struct dquot *)me->me_arg;
1987*0Sstevel@tonic-gate 
1988*0Sstevel@tonic-gate 	/*
1989*0Sstevel@tonic-gate 	 * We need vfs_dqrwlock to call dqput()
1990*0Sstevel@tonic-gate 	 */
1991*0Sstevel@tonic-gate 	dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
1992*0Sstevel@tonic-gate 	if (dolock)
1993*0Sstevel@tonic-gate 		rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
1994*0Sstevel@tonic-gate 
1995*0Sstevel@tonic-gate 	domutex = (!MUTEX_HELD(&dqp->dq_lock));
1996*0Sstevel@tonic-gate 	if (domutex)
1997*0Sstevel@tonic-gate 		mutex_enter(&dqp->dq_lock);
1998*0Sstevel@tonic-gate 
1999*0Sstevel@tonic-gate 	/*
2000*0Sstevel@tonic-gate 	 * Only clean up if the dquot is referenced
2001*0Sstevel@tonic-gate 	 */
2002*0Sstevel@tonic-gate 	if (dqp->dq_cnt == 0) {
2003*0Sstevel@tonic-gate 		if (domutex)
2004*0Sstevel@tonic-gate 			mutex_exit(&dqp->dq_lock);
2005*0Sstevel@tonic-gate 		if (dolock)
2006*0Sstevel@tonic-gate 			rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
2007*0Sstevel@tonic-gate 		return;
2008*0Sstevel@tonic-gate 	}
2009*0Sstevel@tonic-gate 
2010*0Sstevel@tonic-gate 	dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
2011*0Sstevel@tonic-gate 	dqput(dqp);
2012*0Sstevel@tonic-gate 
2013*0Sstevel@tonic-gate 	if (domutex)
2014*0Sstevel@tonic-gate 		mutex_exit(&dqp->dq_lock);
2015*0Sstevel@tonic-gate 
2016*0Sstevel@tonic-gate 	if (dolock)
2017*0Sstevel@tonic-gate 		rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
2018*0Sstevel@tonic-gate 
2019*0Sstevel@tonic-gate }
2020