10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52040Sjkennedy * Common Development and Distribution License (the "License").
62040Sjkennedy * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
220Sstevel@tonic-gate
230Sstevel@tonic-gate /*
24*4662Sfrankho * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
250Sstevel@tonic-gate * Use is subject to license terms.
260Sstevel@tonic-gate */
270Sstevel@tonic-gate
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/vnode.h>
310Sstevel@tonic-gate #include <sys/errno.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/debug.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/conf.h>
360Sstevel@tonic-gate #include <sys/proc.h>
370Sstevel@tonic-gate #include <sys/cmn_err.h>
380Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
390Sstevel@tonic-gate #include <sys/fs/ufs_filio.h>
400Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
410Sstevel@tonic-gate #include <sys/inttypes.h>
420Sstevel@tonic-gate #include <sys/atomic.h>
430Sstevel@tonic-gate #include <sys/tuneable.h>
440Sstevel@tonic-gate
450Sstevel@tonic-gate /*
460Sstevel@tonic-gate * externs
470Sstevel@tonic-gate */
480Sstevel@tonic-gate extern pri_t minclsyspri;
490Sstevel@tonic-gate extern struct kmem_cache *lufs_bp;
500Sstevel@tonic-gate extern int ufs_trans_push_quota();
510Sstevel@tonic-gate
520Sstevel@tonic-gate /*
530Sstevel@tonic-gate * globals
540Sstevel@tonic-gate */
550Sstevel@tonic-gate kmem_cache_t *mapentry_cache;
560Sstevel@tonic-gate
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate * logmap tuning constants
590Sstevel@tonic-gate */
600Sstevel@tonic-gate long logmap_maxnme_commit = 2048;
610Sstevel@tonic-gate long logmap_maxnme_async = 4096;
620Sstevel@tonic-gate long logmap_maxnme_sync = 6144;
630Sstevel@tonic-gate long logmap_maxcfrag_commit = 4; /* Max canceled fragments per moby */
640Sstevel@tonic-gate
650Sstevel@tonic-gate
660Sstevel@tonic-gate uint64_t ufs_crb_size = 0; /* current size of all crb buffers */
670Sstevel@tonic-gate uint64_t ufs_crb_max_size = 0; /* highest crb buffer use so far */
680Sstevel@tonic-gate size_t ufs_crb_limit; /* max allowable size for crbs */
690Sstevel@tonic-gate uint64_t ufs_crb_alloc_fails = 0; /* crb allocation failures stat */
700Sstevel@tonic-gate #define UFS_MAX_CRB_DEFAULT_DIVISOR 10 /* max 1/10 kmem_maxavail() */
710Sstevel@tonic-gate int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */
720Sstevel@tonic-gate void handle_dquot(mapentry_t *);
730Sstevel@tonic-gate
740Sstevel@tonic-gate /*
750Sstevel@tonic-gate * GENERIC MAP ROUTINES
760Sstevel@tonic-gate */
770Sstevel@tonic-gate
780Sstevel@tonic-gate #define CRB_FREE(crb, me) \
790Sstevel@tonic-gate kmem_free(crb->c_buf, crb->c_nb); \
800Sstevel@tonic-gate atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
810Sstevel@tonic-gate kmem_free(crb, sizeof (crb_t)); \
820Sstevel@tonic-gate (me)->me_crb = NULL;
830Sstevel@tonic-gate
840Sstevel@tonic-gate #define CRB_RELE(me) { \
850Sstevel@tonic-gate crb_t *crb = (me)->me_crb; \
860Sstevel@tonic-gate if (crb && (--crb->c_refcnt == 0)) { \
870Sstevel@tonic-gate CRB_FREE(crb, me) \
880Sstevel@tonic-gate } \
890Sstevel@tonic-gate }
900Sstevel@tonic-gate
910Sstevel@tonic-gate /*
920Sstevel@tonic-gate * Check that the old delta has an argument and a push function of
930Sstevel@tonic-gate * ufs_trans_push_quota(), then check that the old and new deltas differ.
940Sstevel@tonic-gate * If so we clean up with handle_dquot() before replacing the old delta.
950Sstevel@tonic-gate */
960Sstevel@tonic-gate #define HANDLE_DQUOT(me, melist) { \
970Sstevel@tonic-gate if ((me->me_arg) && \
980Sstevel@tonic-gate (me->me_func == ufs_trans_push_quota)) { \
990Sstevel@tonic-gate if (!((me->me_dt == melist->me_dt) && \
1000Sstevel@tonic-gate (me->me_arg == melist->me_arg) && \
1010Sstevel@tonic-gate (me->me_func == melist->me_func))) { \
1020Sstevel@tonic-gate handle_dquot(me); \
1030Sstevel@tonic-gate } \
1040Sstevel@tonic-gate } \
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate * free up all the mapentries for a map
1090Sstevel@tonic-gate */
1100Sstevel@tonic-gate void
map_free_entries(mt_map_t * mtm)1110Sstevel@tonic-gate map_free_entries(mt_map_t *mtm)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate int i;
1140Sstevel@tonic-gate mapentry_t *me;
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
1170Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
1180Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
1190Sstevel@tonic-gate CRB_RELE(me);
1200Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate for (i = 0; i < mtm->mtm_nhash; i++)
1230Sstevel@tonic-gate mtm->mtm_hash[i] = NULL;
1240Sstevel@tonic-gate mtm->mtm_nme = 0;
1250Sstevel@tonic-gate mtm->mtm_nmet = 0;
1260Sstevel@tonic-gate }
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate /*
1290Sstevel@tonic-gate * done with map; free if necessary
1300Sstevel@tonic-gate */
1310Sstevel@tonic-gate mt_map_t *
map_put(mt_map_t * mtm)1320Sstevel@tonic-gate map_put(mt_map_t *mtm)
1330Sstevel@tonic-gate {
1340Sstevel@tonic-gate /*
1350Sstevel@tonic-gate * free up the map's memory
1360Sstevel@tonic-gate */
1370Sstevel@tonic-gate map_free_entries(mtm);
1380Sstevel@tonic-gate ASSERT(map_put_debug(mtm));
1390Sstevel@tonic-gate kmem_free(mtm->mtm_hash,
140*4662Sfrankho (size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
1410Sstevel@tonic-gate mutex_destroy(&mtm->mtm_mutex);
1420Sstevel@tonic-gate mutex_destroy(&mtm->mtm_scan_mutex);
1430Sstevel@tonic-gate cv_destroy(&mtm->mtm_to_roll_cv);
1440Sstevel@tonic-gate cv_destroy(&mtm->mtm_from_roll_cv);
1450Sstevel@tonic-gate rw_destroy(&mtm->mtm_rwlock);
1460Sstevel@tonic-gate mutex_destroy(&mtm->mtm_lock);
1470Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_commit);
1480Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_next);
1490Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_eot);
1500Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv);
1510Sstevel@tonic-gate kmem_free(mtm, sizeof (mt_map_t));
1520Sstevel@tonic-gate return (NULL);
1530Sstevel@tonic-gate }
1540Sstevel@tonic-gate /*
1550Sstevel@tonic-gate * Allocate a map;
1560Sstevel@tonic-gate */
1570Sstevel@tonic-gate mt_map_t *
map_get(ml_unit_t * ul,enum maptypes maptype,int nh)1580Sstevel@tonic-gate map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
1590Sstevel@tonic-gate {
1600Sstevel@tonic-gate mt_map_t *mtm;
1610Sstevel@tonic-gate
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate * assume the map is not here and allocate the necessary structs
1640Sstevel@tonic-gate */
1650Sstevel@tonic-gate mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
1660Sstevel@tonic-gate mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
1670Sstevel@tonic-gate mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
1680Sstevel@tonic-gate cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
1690Sstevel@tonic-gate cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
1700Sstevel@tonic-gate rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
1710Sstevel@tonic-gate mtm->mtm_next = (mapentry_t *)mtm;
1720Sstevel@tonic-gate mtm->mtm_prev = (mapentry_t *)mtm;
1730Sstevel@tonic-gate mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
1740Sstevel@tonic-gate KM_SLEEP);
1750Sstevel@tonic-gate mtm->mtm_nhash = nh;
1760Sstevel@tonic-gate mtm->mtm_debug = ul->un_debug;
1770Sstevel@tonic-gate mtm->mtm_type = maptype;
1780Sstevel@tonic-gate
1790Sstevel@tonic-gate mtm->mtm_cfrags = 0;
1800Sstevel@tonic-gate mtm->mtm_cfragmax = logmap_maxcfrag_commit;
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate /*
1830Sstevel@tonic-gate * for scan test
1840Sstevel@tonic-gate */
1850Sstevel@tonic-gate mtm->mtm_ul = ul;
1860Sstevel@tonic-gate
1870Sstevel@tonic-gate /*
1880Sstevel@tonic-gate * Initialize locks
1890Sstevel@tonic-gate */
1900Sstevel@tonic-gate mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
1910Sstevel@tonic-gate cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
1920Sstevel@tonic-gate cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
1930Sstevel@tonic-gate cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
1940Sstevel@tonic-gate cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
1950Sstevel@tonic-gate ASSERT(map_get_debug(ul, mtm));
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate return (mtm);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate
2000Sstevel@tonic-gate /*
2010Sstevel@tonic-gate * DELTAMAP ROUTINES
2020Sstevel@tonic-gate */
2030Sstevel@tonic-gate /*
2040Sstevel@tonic-gate * deltamap tuning constants
2050Sstevel@tonic-gate */
2060Sstevel@tonic-gate long deltamap_maxnme = 1024; /* global so it can be set */
2070Sstevel@tonic-gate
2080Sstevel@tonic-gate int
deltamap_need_commit(mt_map_t * mtm)2090Sstevel@tonic-gate deltamap_need_commit(mt_map_t *mtm)
2100Sstevel@tonic-gate {
2110Sstevel@tonic-gate return (mtm->mtm_nme > deltamap_maxnme);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate /*
2150Sstevel@tonic-gate * put a delta into a deltamap; may sleep on memory
2160Sstevel@tonic-gate */
2170Sstevel@tonic-gate void
deltamap_add(mt_map_t * mtm,offset_t mof,off_t nb,delta_t dtyp,int (* func)(),ulong_t arg,threadtrans_t * tp)2180Sstevel@tonic-gate deltamap_add(
2190Sstevel@tonic-gate mt_map_t *mtm,
2200Sstevel@tonic-gate offset_t mof,
2210Sstevel@tonic-gate off_t nb,
2220Sstevel@tonic-gate delta_t dtyp,
2230Sstevel@tonic-gate int (*func)(),
2240Sstevel@tonic-gate ulong_t arg,
2250Sstevel@tonic-gate threadtrans_t *tp)
2260Sstevel@tonic-gate {
2270Sstevel@tonic-gate int32_t hnb;
2280Sstevel@tonic-gate mapentry_t *me;
2290Sstevel@tonic-gate mapentry_t **mep;
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
232*4662Sfrankho map_check_linkage(mtm));
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) {
2370Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
2380Sstevel@tonic-gate if (hnb > nb)
2390Sstevel@tonic-gate hnb = nb;
2400Sstevel@tonic-gate /*
2410Sstevel@tonic-gate * Search for dup entry. We need to ensure that we don't
2420Sstevel@tonic-gate * replace a map entry which carries quota information
2430Sstevel@tonic-gate * with a map entry which doesn't. In that case we lose
2440Sstevel@tonic-gate * reference the the dquot structure which will not be
2450Sstevel@tonic-gate * cleaned up by the push function me->me_func as this will
2460Sstevel@tonic-gate * never be called.
2470Sstevel@tonic-gate * The stray dquot would be found later by invalidatedq()
2480Sstevel@tonic-gate * causing a panic when the filesystem is unmounted.
2490Sstevel@tonic-gate */
2500Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
2510Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) {
2520Sstevel@tonic-gate if (DATAwithinME(mof, hnb, me)) {
2530Sstevel@tonic-gate /*
2540Sstevel@tonic-gate * Don't remove quota entries which have
2550Sstevel@tonic-gate * incremented the ref count (those with a
2560Sstevel@tonic-gate * ufs_trans_push_quota push function).
2570Sstevel@tonic-gate * Let logmap_add[_buf] clean them up.
2580Sstevel@tonic-gate */
259*4662Sfrankho if (me->me_func == ufs_trans_push_quota) {
260*4662Sfrankho continue;
261*4662Sfrankho }
262*4662Sfrankho break;
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate ASSERT((dtyp == DT_CANCEL) ||
265*4662Sfrankho (!DATAoverlapME(mof, hnb, me)) ||
266*4662Sfrankho MEwithinDATA(me, mof, hnb));
2670Sstevel@tonic-gate }
2680Sstevel@tonic-gate
2690Sstevel@tonic-gate if (me) {
2700Sstevel@tonic-gate /* already in map */
2710Sstevel@tonic-gate continue;
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate * Add up all the delta map deltas so we can compute
2760Sstevel@tonic-gate * an upper bound on the log size used.
2770Sstevel@tonic-gate * Note, some deltas get removed from the deltamap
2780Sstevel@tonic-gate * before the deltamap_push by lufs_write_strategy
2790Sstevel@tonic-gate * and so multiple deltas to the same mof offset
2800Sstevel@tonic-gate * don't get cancelled here but in the logmap.
2810Sstevel@tonic-gate * Thus we can't easily get a accurate count of
2820Sstevel@tonic-gate * the log space used - only an upper bound.
2830Sstevel@tonic-gate */
2840Sstevel@tonic-gate if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
2850Sstevel@tonic-gate ASSERT(dtyp != DT_CANCEL);
2860Sstevel@tonic-gate if (dtyp == DT_ABZERO) {
2870Sstevel@tonic-gate tp->deltas_size += sizeof (struct delta);
2880Sstevel@tonic-gate } else {
2890Sstevel@tonic-gate tp->deltas_size +=
2900Sstevel@tonic-gate (hnb + sizeof (struct delta));
2910Sstevel@tonic-gate }
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate delta_stats[dtyp]++;
2950Sstevel@tonic-gate
2960Sstevel@tonic-gate /*
2970Sstevel@tonic-gate * get a mapentry
2980Sstevel@tonic-gate * May need to drop & re-grab the mtm_mutex
2990Sstevel@tonic-gate * and then recheck for a duplicate
3000Sstevel@tonic-gate */
3010Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
3020Sstevel@tonic-gate if (me == NULL) {
3030Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
3040Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
3050Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
3060Sstevel@tonic-gate }
3070Sstevel@tonic-gate bzero(me, sizeof (mapentry_t));
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate /*
3100Sstevel@tonic-gate * initialize and put in deltamap
3110Sstevel@tonic-gate */
3120Sstevel@tonic-gate me->me_mof = mof;
3130Sstevel@tonic-gate me->me_nb = hnb;
3140Sstevel@tonic-gate me->me_func = func;
3150Sstevel@tonic-gate me->me_arg = arg;
3160Sstevel@tonic-gate me->me_dt = dtyp;
3170Sstevel@tonic-gate me->me_flags = ME_HASH;
3180Sstevel@tonic-gate me->me_tid = mtm->mtm_tid;
3190Sstevel@tonic-gate
3200Sstevel@tonic-gate me->me_hash = *mep;
3210Sstevel@tonic-gate *mep = me;
3220Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm;
3230Sstevel@tonic-gate me->me_prev = mtm->mtm_prev;
3240Sstevel@tonic-gate mtm->mtm_prev->me_next = me;
3250Sstevel@tonic-gate mtm->mtm_prev = me;
3260Sstevel@tonic-gate mtm->mtm_nme++;
3270Sstevel@tonic-gate }
3280Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
3290Sstevel@tonic-gate
3300Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
331*4662Sfrankho map_check_linkage(mtm));
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate
3340Sstevel@tonic-gate /*
3350Sstevel@tonic-gate * remove deltas within (mof, nb) and return as linked list
3360Sstevel@tonic-gate */
3370Sstevel@tonic-gate mapentry_t *
deltamap_remove(mt_map_t * mtm,offset_t mof,off_t nb)3380Sstevel@tonic-gate deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
3390Sstevel@tonic-gate {
3400Sstevel@tonic-gate off_t hnb;
3410Sstevel@tonic-gate mapentry_t *me;
3420Sstevel@tonic-gate mapentry_t **mep;
3430Sstevel@tonic-gate mapentry_t *mer;
3440Sstevel@tonic-gate
3450Sstevel@tonic-gate if (mtm == NULL)
3460Sstevel@tonic-gate return (NULL);
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
349*4662Sfrankho map_check_linkage(mtm));
3500Sstevel@tonic-gate
3510Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
3520Sstevel@tonic-gate for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
3530Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
3540Sstevel@tonic-gate if (hnb > nb)
3550Sstevel@tonic-gate hnb = nb;
3560Sstevel@tonic-gate /*
3570Sstevel@tonic-gate * remove entries from hash and return as a aged linked list
3580Sstevel@tonic-gate */
3590Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
3600Sstevel@tonic-gate while ((me = *mep) != 0) {
3610Sstevel@tonic-gate if (MEwithinDATA(me, mof, hnb)) {
3620Sstevel@tonic-gate *mep = me->me_hash;
3630Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
3640Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
3650Sstevel@tonic-gate me->me_hash = mer;
3660Sstevel@tonic-gate mer = me;
3670Sstevel@tonic-gate me->me_flags |= ME_LIST;
3680Sstevel@tonic-gate me->me_flags &= ~ME_HASH;
3690Sstevel@tonic-gate mtm->mtm_nme--;
3700Sstevel@tonic-gate } else
3710Sstevel@tonic-gate mep = &me->me_hash;
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate }
3740Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
3750Sstevel@tonic-gate
3760Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
377*4662Sfrankho map_check_linkage(mtm));
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate return (mer);
3800Sstevel@tonic-gate }
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate /*
3830Sstevel@tonic-gate * delete entries within (mof, nb)
3840Sstevel@tonic-gate */
3850Sstevel@tonic-gate void
deltamap_del(mt_map_t * mtm,offset_t mof,off_t nb)3860Sstevel@tonic-gate deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
3870Sstevel@tonic-gate {
3880Sstevel@tonic-gate mapentry_t *me;
3890Sstevel@tonic-gate mapentry_t *menext;
3900Sstevel@tonic-gate
3910Sstevel@tonic-gate menext = deltamap_remove(mtm, mof, nb);
3920Sstevel@tonic-gate while ((me = menext) != 0) {
3930Sstevel@tonic-gate menext = me->me_hash;
3940Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
3950Sstevel@tonic-gate }
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate /*
3990Sstevel@tonic-gate * Call the indicated function to cause deltas to move to the logmap.
4000Sstevel@tonic-gate * top_end_sync() is the only caller of this function and
4010Sstevel@tonic-gate * it has waited for the completion of all threads, so there can
4020Sstevel@tonic-gate * be no other activity in the deltamap. Therefore we don't need to
4030Sstevel@tonic-gate * hold the deltamap lock.
4040Sstevel@tonic-gate */
4050Sstevel@tonic-gate void
deltamap_push(ml_unit_t * ul)4060Sstevel@tonic-gate deltamap_push(ml_unit_t *ul)
4070Sstevel@tonic-gate {
4080Sstevel@tonic-gate delta_t dtyp;
4090Sstevel@tonic-gate int (*func)();
4100Sstevel@tonic-gate ulong_t arg;
4110Sstevel@tonic-gate mapentry_t *me;
4120Sstevel@tonic-gate offset_t mof;
4130Sstevel@tonic-gate off_t nb;
4140Sstevel@tonic-gate mt_map_t *mtm = ul->un_deltamap;
4150Sstevel@tonic-gate
4160Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
417*4662Sfrankho map_check_linkage(mtm));
4180Sstevel@tonic-gate
4190Sstevel@tonic-gate /*
4200Sstevel@tonic-gate * for every entry in the deltamap
4210Sstevel@tonic-gate */
4220Sstevel@tonic-gate while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
4230Sstevel@tonic-gate ASSERT(me->me_func);
4240Sstevel@tonic-gate func = me->me_func;
4250Sstevel@tonic-gate dtyp = me->me_dt;
4260Sstevel@tonic-gate arg = me->me_arg;
4270Sstevel@tonic-gate mof = me->me_mof;
4280Sstevel@tonic-gate nb = me->me_nb;
4290Sstevel@tonic-gate if ((ul->un_flags & LDL_ERROR) ||
4300Sstevel@tonic-gate (*func)(ul->un_ufsvfs, dtyp, arg))
4310Sstevel@tonic-gate deltamap_del(mtm, mof, nb);
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
435*4662Sfrankho map_check_linkage(mtm));
4360Sstevel@tonic-gate }
4370Sstevel@tonic-gate
4380Sstevel@tonic-gate /*
4390Sstevel@tonic-gate * LOGMAP ROUTINES
4400Sstevel@tonic-gate */
4410Sstevel@tonic-gate
4420Sstevel@tonic-gate int
logmap_need_commit(mt_map_t * mtm)4430Sstevel@tonic-gate logmap_need_commit(mt_map_t *mtm)
4440Sstevel@tonic-gate {
4450Sstevel@tonic-gate return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
446*4662Sfrankho (mtm->mtm_cfrags >= mtm->mtm_cfragmax));
4470Sstevel@tonic-gate }
4480Sstevel@tonic-gate
4490Sstevel@tonic-gate int
logmap_need_roll_async(mt_map_t * mtm)4500Sstevel@tonic-gate logmap_need_roll_async(mt_map_t *mtm)
4510Sstevel@tonic-gate {
4520Sstevel@tonic-gate return (mtm->mtm_nme > logmap_maxnme_async);
4530Sstevel@tonic-gate }
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate int
logmap_need_roll_sync(mt_map_t * mtm)4560Sstevel@tonic-gate logmap_need_roll_sync(mt_map_t *mtm)
4570Sstevel@tonic-gate {
4580Sstevel@tonic-gate return (mtm->mtm_nme > logmap_maxnme_sync);
4590Sstevel@tonic-gate }
4600Sstevel@tonic-gate
4610Sstevel@tonic-gate void
logmap_start_roll(ml_unit_t * ul)4620Sstevel@tonic-gate logmap_start_roll(ml_unit_t *ul)
4630Sstevel@tonic-gate {
4640Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap;
4650Sstevel@tonic-gate
4660Sstevel@tonic-gate logmap_settail(logmap, ul);
4670Sstevel@tonic-gate ASSERT(!(ul->un_flags & LDL_NOROLL));
4680Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex);
4690Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
4700Sstevel@tonic-gate logmap->mtm_flags |= MTM_ROLL_RUNNING;
4710Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
4720Sstevel@tonic-gate (void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
4730Sstevel@tonic-gate TS_RUN, minclsyspri);
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate
4780Sstevel@tonic-gate void
logmap_kill_roll(ml_unit_t * ul)4790Sstevel@tonic-gate logmap_kill_roll(ml_unit_t *ul)
4800Sstevel@tonic-gate {
4810Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
4820Sstevel@tonic-gate
4830Sstevel@tonic-gate if (mtm == NULL)
4840Sstevel@tonic-gate return;
4850Sstevel@tonic-gate
4860Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
4890Sstevel@tonic-gate mtm->mtm_flags |= MTM_ROLL_EXIT;
4900Sstevel@tonic-gate cv_signal(&mtm->mtm_to_roll_cv);
4910Sstevel@tonic-gate cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
4920Sstevel@tonic-gate }
4930Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate /*
4970Sstevel@tonic-gate * kick the roll thread if it's not doing anything
4980Sstevel@tonic-gate */
4990Sstevel@tonic-gate void
logmap_forceroll_nowait(mt_map_t * logmap)5000Sstevel@tonic-gate logmap_forceroll_nowait(mt_map_t *logmap)
5010Sstevel@tonic-gate {
5020Sstevel@tonic-gate /*
5030Sstevel@tonic-gate * Don't need to lock mtm_mutex to read mtm_flags here as we
5040Sstevel@tonic-gate * don't care in the rare case when we get a transitional value
5050Sstevel@tonic-gate * of mtm_flags. Just by signalling the thread it will wakeup
5060Sstevel@tonic-gate * and notice it has too many logmap entries.
5070Sstevel@tonic-gate */
5080Sstevel@tonic-gate ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
5090Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
5100Sstevel@tonic-gate cv_signal(&logmap->mtm_to_roll_cv);
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate }
5130Sstevel@tonic-gate
5140Sstevel@tonic-gate /*
5150Sstevel@tonic-gate * kick the roll thread and wait for it to finish a cycle
5160Sstevel@tonic-gate */
5170Sstevel@tonic-gate void
logmap_forceroll(mt_map_t * mtm)5180Sstevel@tonic-gate logmap_forceroll(mt_map_t *mtm)
5190Sstevel@tonic-gate {
5200Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
5210Sstevel@tonic-gate if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
5220Sstevel@tonic-gate mtm->mtm_flags |= MTM_FORCE_ROLL;
5230Sstevel@tonic-gate cv_signal(&mtm->mtm_to_roll_cv);
5240Sstevel@tonic-gate }
5250Sstevel@tonic-gate do {
5260Sstevel@tonic-gate if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
5270Sstevel@tonic-gate mtm->mtm_flags &= ~MTM_FORCE_ROLL;
5280Sstevel@tonic-gate goto out;
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
5310Sstevel@tonic-gate } while (mtm->mtm_flags & MTM_FORCE_ROLL);
5320Sstevel@tonic-gate out:
5330Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
5340Sstevel@tonic-gate }
5350Sstevel@tonic-gate
5360Sstevel@tonic-gate /*
5370Sstevel@tonic-gate * remove rolled deltas within (mof, nb) and free them
5380Sstevel@tonic-gate */
5390Sstevel@tonic-gate void
logmap_remove_roll(mt_map_t * mtm,offset_t mof,off_t nb)5400Sstevel@tonic-gate logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
5410Sstevel@tonic-gate {
5420Sstevel@tonic-gate int dolock = 0;
5430Sstevel@tonic-gate off_t hnb;
5440Sstevel@tonic-gate mapentry_t *me;
5450Sstevel@tonic-gate mapentry_t **mep;
5460Sstevel@tonic-gate offset_t savmof = mof;
5470Sstevel@tonic-gate off_t savnb = nb;
5480Sstevel@tonic-gate
5490Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
550*4662Sfrankho map_check_linkage(mtm));
5510Sstevel@tonic-gate
5520Sstevel@tonic-gate again:
5530Sstevel@tonic-gate if (dolock)
5540Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER);
5550Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
5560Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) {
5570Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
5580Sstevel@tonic-gate if (hnb > nb)
5590Sstevel@tonic-gate hnb = nb;
5600Sstevel@tonic-gate /*
5610Sstevel@tonic-gate * remove and free the rolled entries
5620Sstevel@tonic-gate */
5630Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
5640Sstevel@tonic-gate while ((me = *mep) != 0) {
5650Sstevel@tonic-gate if ((me->me_flags & ME_ROLL) &&
5660Sstevel@tonic-gate (MEwithinDATA(me, mof, hnb))) {
5670Sstevel@tonic-gate if (me->me_flags & ME_AGE) {
5680Sstevel@tonic-gate ASSERT(dolock == 0);
5690Sstevel@tonic-gate dolock = 1;
5700Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
5710Sstevel@tonic-gate mof = savmof;
5720Sstevel@tonic-gate nb = savnb;
5730Sstevel@tonic-gate goto again;
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate *mep = me->me_hash;
5760Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
5770Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
5780Sstevel@tonic-gate me->me_flags &= ~(ME_HASH|ME_ROLL);
5790Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER));
5800Sstevel@tonic-gate mtm->mtm_nme--;
5810Sstevel@tonic-gate /*
5820Sstevel@tonic-gate * cancelled entries are handled by someone else
5830Sstevel@tonic-gate */
5840Sstevel@tonic-gate if ((me->me_flags & ME_CANCEL) == 0) {
5850Sstevel@tonic-gate roll_stats[me->me_dt]++;
5860Sstevel@tonic-gate CRB_RELE(me);
5870Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
5880Sstevel@tonic-gate }
5890Sstevel@tonic-gate } else
5900Sstevel@tonic-gate mep = &me->me_hash;
5910Sstevel@tonic-gate }
5920Sstevel@tonic-gate }
5930Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
596*4662Sfrankho map_check_linkage(mtm));
5970Sstevel@tonic-gate
5980Sstevel@tonic-gate if (dolock)
5990Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock);
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate * Find the disk offset of the next delta to roll.
6040Sstevel@tonic-gate * Returns 0: no more deltas to roll or a transaction is being committed
6050Sstevel@tonic-gate * 1: a delta to roll has been found and *mofp points
6060Sstevel@tonic-gate * to the master file disk offset
6070Sstevel@tonic-gate */
6080Sstevel@tonic-gate int
logmap_next_roll(mt_map_t * logmap,offset_t * mofp)6090Sstevel@tonic-gate logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
6100Sstevel@tonic-gate {
6110Sstevel@tonic-gate mapentry_t *me;
6120Sstevel@tonic-gate
6130Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
614*4662Sfrankho map_check_linkage(logmap));
6150Sstevel@tonic-gate
6160Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex);
6170Sstevel@tonic-gate for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
6180Sstevel@tonic-gate me = me->me_next) {
6190Sstevel@tonic-gate /* already rolled */
6200Sstevel@tonic-gate if (me->me_flags & ME_ROLL) {
6210Sstevel@tonic-gate continue;
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate /* part of currently busy transaction; stop */
6250Sstevel@tonic-gate if (me->me_tid == logmap->mtm_tid) {
6260Sstevel@tonic-gate break;
6270Sstevel@tonic-gate }
6280Sstevel@tonic-gate
6290Sstevel@tonic-gate /* part of commit-in-progress transaction; stop */
6300Sstevel@tonic-gate if (me->me_tid == logmap->mtm_committid) {
6310Sstevel@tonic-gate break;
6320Sstevel@tonic-gate }
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate /*
6350Sstevel@tonic-gate * We shouldn't see a DT_CANCEL mapentry whose
6360Sstevel@tonic-gate * tid != mtm_committid, or != mtm_tid since
6370Sstevel@tonic-gate * these are removed at the end of each committed
6380Sstevel@tonic-gate * transaction.
6390Sstevel@tonic-gate */
6400Sstevel@tonic-gate ASSERT(!(me->me_dt == DT_CANCEL));
6410Sstevel@tonic-gate
6420Sstevel@tonic-gate *mofp = me->me_mof;
6430Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex);
6440Sstevel@tonic-gate return (1);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex);
6470Sstevel@tonic-gate return (0);
6480Sstevel@tonic-gate }
6490Sstevel@tonic-gate
6500Sstevel@tonic-gate /*
6510Sstevel@tonic-gate * put mapentry on sorted age list
6520Sstevel@tonic-gate */
6530Sstevel@tonic-gate static void
logmap_list_age(mapentry_t ** age,mapentry_t * meadd)6540Sstevel@tonic-gate logmap_list_age(mapentry_t **age, mapentry_t *meadd)
6550Sstevel@tonic-gate {
6560Sstevel@tonic-gate mapentry_t *me;
6570Sstevel@tonic-gate
6580Sstevel@tonic-gate ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate for (me = *age; me; age = &me->me_agenext, me = *age) {
6610Sstevel@tonic-gate if (me->me_age > meadd->me_age)
6620Sstevel@tonic-gate break;
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate meadd->me_agenext = me;
6650Sstevel@tonic-gate meadd->me_flags |= ME_AGE;
6660Sstevel@tonic-gate *age = meadd;
6670Sstevel@tonic-gate }
6680Sstevel@tonic-gate
6690Sstevel@tonic-gate /*
6700Sstevel@tonic-gate * get a list of deltas within <mof, mof+nb>
6710Sstevel@tonic-gate * returns with mtm_rwlock held
6720Sstevel@tonic-gate * return value says whether the entire mof range is covered by deltas
6730Sstevel@tonic-gate */
6740Sstevel@tonic-gate int
logmap_list_get(mt_map_t * mtm,offset_t mof,off_t nb,mapentry_t ** age)6750Sstevel@tonic-gate logmap_list_get(
6760Sstevel@tonic-gate mt_map_t *mtm,
6770Sstevel@tonic-gate offset_t mof,
6780Sstevel@tonic-gate off_t nb,
6790Sstevel@tonic-gate mapentry_t **age)
6800Sstevel@tonic-gate {
6810Sstevel@tonic-gate off_t hnb;
6820Sstevel@tonic-gate mapentry_t *me;
6830Sstevel@tonic-gate mapentry_t **mep;
6840Sstevel@tonic-gate int rwtype = RW_READER;
6850Sstevel@tonic-gate offset_t savmof = mof;
6860Sstevel@tonic-gate off_t savnb = nb;
6870Sstevel@tonic-gate int entire = 0;
6880Sstevel@tonic-gate crb_t *crb;
6890Sstevel@tonic-gate
6900Sstevel@tonic-gate mtm->mtm_ref = 1;
6910Sstevel@tonic-gate again:
6920Sstevel@tonic-gate
6930Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
694*4662Sfrankho map_check_linkage(mtm));
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, rwtype);
6970Sstevel@tonic-gate *age = NULL;
6980Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
6990Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) {
7000Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
7010Sstevel@tonic-gate if (hnb > nb)
7020Sstevel@tonic-gate hnb = nb;
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate * find overlapping entries
7050Sstevel@tonic-gate */
7060Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
7070Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) {
7080Sstevel@tonic-gate if (me->me_dt == DT_CANCEL)
7090Sstevel@tonic-gate continue;
7100Sstevel@tonic-gate if (!DATAoverlapME(mof, hnb, me))
7110Sstevel@tonic-gate continue;
7120Sstevel@tonic-gate /*
7130Sstevel@tonic-gate * check if map entry is in use
7140Sstevel@tonic-gate * (about to be rolled).
7150Sstevel@tonic-gate */
7160Sstevel@tonic-gate if (me->me_flags & ME_AGE) {
7170Sstevel@tonic-gate /*
7180Sstevel@tonic-gate * reset the age bit in the list,
7190Sstevel@tonic-gate * upgrade the lock, and try again
7200Sstevel@tonic-gate */
7210Sstevel@tonic-gate for (me = *age; me; me = *age) {
7220Sstevel@tonic-gate *age = me->me_agenext;
7230Sstevel@tonic-gate me->me_flags &= ~ME_AGE;
7240Sstevel@tonic-gate }
7250Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
7260Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock);
7270Sstevel@tonic-gate rwtype = RW_WRITER;
7280Sstevel@tonic-gate mof = savmof;
7290Sstevel@tonic-gate nb = savnb;
7300Sstevel@tonic-gate entire = 0;
7310Sstevel@tonic-gate goto again;
7320Sstevel@tonic-gate } else {
7330Sstevel@tonic-gate /* add mapentry to age ordered list */
7340Sstevel@tonic-gate logmap_list_age(age, me);
7350Sstevel@tonic-gate crb = me->me_crb;
7360Sstevel@tonic-gate if (crb) {
7370Sstevel@tonic-gate if (DATAwithinCRB(savmof, savnb, crb)) {
7380Sstevel@tonic-gate entire = 1;
7390Sstevel@tonic-gate }
7400Sstevel@tonic-gate } else {
7410Sstevel@tonic-gate if (DATAwithinME(savmof, savnb, me)) {
7420Sstevel@tonic-gate entire = 1;
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate }
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate }
7480Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
7490Sstevel@tonic-gate
7500Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
7510Sstevel@tonic-gate return (entire);
7520Sstevel@tonic-gate }
7530Sstevel@tonic-gate
7540Sstevel@tonic-gate /*
7550Sstevel@tonic-gate * Get a list of deltas for rolling - returns sucess or failure.
7560Sstevel@tonic-gate * Also return the cached roll buffer if all deltas point to it.
7570Sstevel@tonic-gate */
7580Sstevel@tonic-gate int
logmap_list_get_roll(mt_map_t * logmap,offset_t mof,rollbuf_t * rbp)7590Sstevel@tonic-gate logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
7600Sstevel@tonic-gate {
7610Sstevel@tonic-gate mapentry_t *me, **mep, *age = NULL;
7620Sstevel@tonic-gate crb_t *crb = NULL;
7630Sstevel@tonic-gate
7640Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
7650Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
766*4662Sfrankho map_check_linkage(logmap));
7670Sstevel@tonic-gate ASSERT((mof & MAPBLOCKOFF) == 0);
7680Sstevel@tonic-gate
7690Sstevel@tonic-gate rbp->rb_crb = NULL;
7700Sstevel@tonic-gate
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate * find overlapping entries
7730Sstevel@tonic-gate */
7740Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex);
7750Sstevel@tonic-gate mep = MAP_HASH(mof, logmap);
7760Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) {
7770Sstevel@tonic-gate if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
7780Sstevel@tonic-gate continue;
7790Sstevel@tonic-gate if (me->me_tid == logmap->mtm_tid)
7800Sstevel@tonic-gate continue;
7810Sstevel@tonic-gate if (me->me_tid == logmap->mtm_committid)
7820Sstevel@tonic-gate continue;
7830Sstevel@tonic-gate if (me->me_dt == DT_CANCEL)
7840Sstevel@tonic-gate continue;
7850Sstevel@tonic-gate
7860Sstevel@tonic-gate /*
7870Sstevel@tonic-gate * Check if map entry is in use (by lufs_read_strategy())
7880Sstevel@tonic-gate * and if so reset the age bit in the list,
7890Sstevel@tonic-gate * upgrade the lock, and try again
7900Sstevel@tonic-gate */
7910Sstevel@tonic-gate if (me->me_flags & ME_AGE) {
7920Sstevel@tonic-gate for (me = age; me; me = age) {
7930Sstevel@tonic-gate age = me->me_agenext;
7940Sstevel@tonic-gate me->me_flags &= ~ME_AGE;
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex);
7970Sstevel@tonic-gate return (1); /* failure */
7980Sstevel@tonic-gate } else {
7990Sstevel@tonic-gate /* add mapentry to age ordered list */
8000Sstevel@tonic-gate logmap_list_age(&age, me);
8010Sstevel@tonic-gate }
8020Sstevel@tonic-gate }
8030Sstevel@tonic-gate if (!age) {
8040Sstevel@tonic-gate goto out;
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate
8070Sstevel@tonic-gate /*
8080Sstevel@tonic-gate * Mark the deltas as being rolled.
8090Sstevel@tonic-gate */
8100Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) {
8110Sstevel@tonic-gate me->me_flags |= ME_ROLL;
8120Sstevel@tonic-gate }
8130Sstevel@tonic-gate
8140Sstevel@tonic-gate /*
8150Sstevel@tonic-gate * Test if all deltas are covered by one valid roll buffer
8160Sstevel@tonic-gate */
8170Sstevel@tonic-gate crb = age->me_crb;
8180Sstevel@tonic-gate if (crb && !(crb->c_invalid)) {
8190Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) {
8200Sstevel@tonic-gate if (me->me_crb != crb) {
8210Sstevel@tonic-gate crb = NULL;
8220Sstevel@tonic-gate break;
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate }
8250Sstevel@tonic-gate rbp->rb_crb = crb;
8260Sstevel@tonic-gate }
8270Sstevel@tonic-gate out:
8280Sstevel@tonic-gate rbp->rb_age = age;
8290Sstevel@tonic-gate
8300Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex);
8310Sstevel@tonic-gate
8320Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
833*4662Sfrankho logmap_logscan_debug(logmap, age));
8340Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
8350Sstevel@tonic-gate return (0); /* success */
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate
8380Sstevel@tonic-gate void
logmap_list_put_roll(mt_map_t * mtm,mapentry_t * age)8390Sstevel@tonic-gate logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
8400Sstevel@tonic-gate {
8410Sstevel@tonic-gate mapentry_t *me;
8420Sstevel@tonic-gate
8430Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
8440Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
8450Sstevel@tonic-gate for (me = age; me; me = age) {
8460Sstevel@tonic-gate age = me->me_agenext;
8470Sstevel@tonic-gate me->me_flags &= ~ME_AGE;
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
8500Sstevel@tonic-gate }
8510Sstevel@tonic-gate
8520Sstevel@tonic-gate void
logmap_list_put(mt_map_t * mtm,mapentry_t * age)8530Sstevel@tonic-gate logmap_list_put(mt_map_t *mtm, mapentry_t *age)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate mapentry_t *me;
8560Sstevel@tonic-gate
8570Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
8580Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
8590Sstevel@tonic-gate for (me = age; me; me = age) {
8600Sstevel@tonic-gate age = me->me_agenext;
8610Sstevel@tonic-gate me->me_flags &= ~ME_AGE;
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
8640Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock);
8650Sstevel@tonic-gate }
8660Sstevel@tonic-gate
8670Sstevel@tonic-gate #define UFS_RW_BALANCE 2
8680Sstevel@tonic-gate int ufs_rw_balance = UFS_RW_BALANCE;
8690Sstevel@tonic-gate
8700Sstevel@tonic-gate /*
8710Sstevel@tonic-gate * Check if we need to read the master.
8720Sstevel@tonic-gate * The master does not need to be read if the log deltas to the
8730Sstevel@tonic-gate * block are for one contiguous set of full disk sectors.
8740Sstevel@tonic-gate * Both cylinder group bit maps DT_CG (8K); directory entries (512B);
8750Sstevel@tonic-gate * and possibly others should not require master disk reads.
8760Sstevel@tonic-gate * Calculate the sector map for writing later.
8770Sstevel@tonic-gate */
8780Sstevel@tonic-gate int
logmap_setup_read(mapentry_t * age,rollbuf_t * rbp)8790Sstevel@tonic-gate logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
8800Sstevel@tonic-gate {
8810Sstevel@tonic-gate offset_t mof;
8820Sstevel@tonic-gate crb_t *crb;
8830Sstevel@tonic-gate mapentry_t *me;
8840Sstevel@tonic-gate int32_t nb;
8850Sstevel@tonic-gate int i;
8860Sstevel@tonic-gate int start_sec, end_sec;
8870Sstevel@tonic-gate int read_needed = 0;
8880Sstevel@tonic-gate int all_inodes = 1;
8890Sstevel@tonic-gate int first_sec = INT_MAX;
8900Sstevel@tonic-gate int last_sec = -1;
8910Sstevel@tonic-gate rbsecmap_t secmap = 0;
8920Sstevel@tonic-gate
8930Sstevel@tonic-gate /* LINTED: warning: logical expression always true: op "||" */
8940Sstevel@tonic-gate ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
8950Sstevel@tonic-gate
8960Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) {
8970Sstevel@tonic-gate crb = me->me_crb;
8980Sstevel@tonic-gate if (crb) {
8990Sstevel@tonic-gate nb = crb->c_nb;
9000Sstevel@tonic-gate mof = crb->c_mof;
9010Sstevel@tonic-gate } else {
9020Sstevel@tonic-gate nb = me->me_nb;
9030Sstevel@tonic-gate mof = me->me_mof;
9040Sstevel@tonic-gate }
9050Sstevel@tonic-gate
9060Sstevel@tonic-gate /*
9070Sstevel@tonic-gate * If the delta is not sector aligned then
9080Sstevel@tonic-gate * read the whole block.
9090Sstevel@tonic-gate */
9100Sstevel@tonic-gate if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
9110Sstevel@tonic-gate read_needed = 1;
9120Sstevel@tonic-gate }
9130Sstevel@tonic-gate
9140Sstevel@tonic-gate /* Set sector map used in the MAPBLOCKSIZE block. */
9150Sstevel@tonic-gate start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
9160Sstevel@tonic-gate end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
9170Sstevel@tonic-gate for (i = start_sec; i <= end_sec; i++) {
9180Sstevel@tonic-gate secmap |= UINT16_C(1) << i;
9190Sstevel@tonic-gate }
9200Sstevel@tonic-gate
9210Sstevel@tonic-gate if (me->me_dt != DT_INODE) {
9220Sstevel@tonic-gate all_inodes = 0;
9230Sstevel@tonic-gate }
9240Sstevel@tonic-gate if (start_sec < first_sec) {
9250Sstevel@tonic-gate first_sec = start_sec;
9260Sstevel@tonic-gate }
9270Sstevel@tonic-gate if (end_sec > last_sec) {
9280Sstevel@tonic-gate last_sec = end_sec;
9290Sstevel@tonic-gate }
9300Sstevel@tonic-gate }
9310Sstevel@tonic-gate
9320Sstevel@tonic-gate ASSERT(secmap);
9330Sstevel@tonic-gate ASSERT(first_sec != INT_MAX);
9340Sstevel@tonic-gate ASSERT(last_sec != -1);
9350Sstevel@tonic-gate
9360Sstevel@tonic-gate if (all_inodes) {
9370Sstevel@tonic-gate /*
9380Sstevel@tonic-gate * Here we have a tradeoff choice. It must be better to
9390Sstevel@tonic-gate * do 2 writes * in the same MAPBLOCKSIZE chunk, than a
9400Sstevel@tonic-gate * read and a write. But what about 3 or more writes, versus
9410Sstevel@tonic-gate * a read+write? * Where is the cut over? It will depend on
9420Sstevel@tonic-gate * the track caching, scsi driver and other activity.
9430Sstevel@tonic-gate * A unpublished tunable is defined (ufs_rw_balance) that
9440Sstevel@tonic-gate * currently defaults to 2.
9450Sstevel@tonic-gate */
9460Sstevel@tonic-gate if (!read_needed) {
9470Sstevel@tonic-gate int count = 0, gap = 0;
9480Sstevel@tonic-gate int sector_set; /* write needed to this sector */
9490Sstevel@tonic-gate
9500Sstevel@tonic-gate /* Count the gaps (every 1 to 0 transation) */
9510Sstevel@tonic-gate for (i = first_sec + 1; i < last_sec; i++) {
9520Sstevel@tonic-gate sector_set = secmap & (UINT16_C(1) << i);
9530Sstevel@tonic-gate if (!gap && !sector_set) {
9540Sstevel@tonic-gate gap = 1;
9550Sstevel@tonic-gate count++;
9560Sstevel@tonic-gate if (count > ufs_rw_balance) {
9570Sstevel@tonic-gate read_needed = 1;
9580Sstevel@tonic-gate break;
9590Sstevel@tonic-gate }
9600Sstevel@tonic-gate } else if (gap && sector_set) {
9610Sstevel@tonic-gate gap = 0;
9620Sstevel@tonic-gate }
9630Sstevel@tonic-gate }
9640Sstevel@tonic-gate }
9650Sstevel@tonic-gate
9660Sstevel@tonic-gate /*
9670Sstevel@tonic-gate * Inodes commonly make up the majority (~85%) of deltas.
9680Sstevel@tonic-gate * They cannot contain embedded user data, so its safe to
9690Sstevel@tonic-gate * read and write them all in one IO.
9700Sstevel@tonic-gate * But for directory entries, shadow inode data, and
9710Sstevel@tonic-gate * quota record data the user data fragments can be embedded
9720Sstevel@tonic-gate * betwen those metadata, and so its not safe to read, modify
9730Sstevel@tonic-gate * then write the entire range as user asynchronous user data
9740Sstevel@tonic-gate * writes could get overwritten with old data.
9750Sstevel@tonic-gate * Thus we have to create a segment map of meta data that
9760Sstevel@tonic-gate * needs to get written.
9770Sstevel@tonic-gate *
9780Sstevel@tonic-gate * If user data was logged then this issue would go away.
9790Sstevel@tonic-gate */
9800Sstevel@tonic-gate if (read_needed) {
9810Sstevel@tonic-gate for (i = first_sec + 1; i < last_sec; i++) {
9820Sstevel@tonic-gate secmap |= (UINT16_C(1) << i);
9830Sstevel@tonic-gate }
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate }
9860Sstevel@tonic-gate rbp->rb_secmap = secmap;
9870Sstevel@tonic-gate return (read_needed);
9880Sstevel@tonic-gate }
9890Sstevel@tonic-gate
9900Sstevel@tonic-gate /*
9910Sstevel@tonic-gate * Abort the load of a set of log map delta's.
9920Sstevel@tonic-gate * ie,
9930Sstevel@tonic-gate * Clear out all mapentries on this unit's log map
9940Sstevel@tonic-gate * which have a tid (transaction id) equal to the
9950Sstevel@tonic-gate * parameter tid. Walk the cancel list, taking everything
9960Sstevel@tonic-gate * off it, too.
9970Sstevel@tonic-gate */
9980Sstevel@tonic-gate static void
logmap_abort(ml_unit_t * ul,uint32_t tid)9990Sstevel@tonic-gate logmap_abort(ml_unit_t *ul, uint32_t tid)
10000Sstevel@tonic-gate {
10010Sstevel@tonic-gate struct mt_map *mtm = ul->un_logmap; /* Log map */
1002*4662Sfrankho mapentry_t *me, **mep;
10030Sstevel@tonic-gate int i;
10040Sstevel@tonic-gate
10050Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1006*4662Sfrankho map_check_linkage(mtm));
10070Sstevel@tonic-gate
10080Sstevel@tonic-gate /*
10090Sstevel@tonic-gate * wait for any outstanding reads to finish; lock out future reads
10100Sstevel@tonic-gate */
10110Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER);
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
10140Sstevel@tonic-gate /* Take everything off cancel list */
10150Sstevel@tonic-gate while ((me = mtm->mtm_cancel) != NULL) {
10160Sstevel@tonic-gate mtm->mtm_cancel = me->me_cancel;
10170Sstevel@tonic-gate me->me_flags &= ~ME_CANCEL;
10180Sstevel@tonic-gate me->me_cancel = NULL;
10190Sstevel@tonic-gate }
10200Sstevel@tonic-gate
10210Sstevel@tonic-gate /*
10220Sstevel@tonic-gate * Now take out all mapentries with current tid, and committid
10230Sstevel@tonic-gate * as this function is called from logmap_logscan and logmap_commit
10240Sstevel@tonic-gate * When it is called from logmap_logscan mtm_tid == mtm_committid
10250Sstevel@tonic-gate * But when logmap_abort is called from logmap_commit it is
10260Sstevel@tonic-gate * because the log errored when trying to write the commit record,
10270Sstevel@tonic-gate * after the async ops have been allowed to start in top_end_sync.
10280Sstevel@tonic-gate * So we also need to remove all mapentries from the transaction whose
10290Sstevel@tonic-gate * commit failed.
10300Sstevel@tonic-gate */
10310Sstevel@tonic-gate for (i = 0; i < mtm->mtm_nhash; i++) {
10320Sstevel@tonic-gate mep = &mtm->mtm_hash[i];
10330Sstevel@tonic-gate while ((me = *mep) != NULL) {
10340Sstevel@tonic-gate if (me->me_tid == tid ||
1035*4662Sfrankho me->me_tid == mtm->mtm_committid) {
10360Sstevel@tonic-gate *mep = me->me_hash;
10370Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
10380Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
10390Sstevel@tonic-gate if (!(me->me_flags & ME_USER)) {
10400Sstevel@tonic-gate mtm->mtm_nme--;
10410Sstevel@tonic-gate }
10420Sstevel@tonic-gate CRB_RELE(me);
10430Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
10440Sstevel@tonic-gate continue;
10450Sstevel@tonic-gate }
10460Sstevel@tonic-gate mep = &me->me_hash;
10470Sstevel@tonic-gate }
10480Sstevel@tonic-gate }
10490Sstevel@tonic-gate
10500Sstevel@tonic-gate if (!(ul->un_flags & LDL_SCAN))
10510Sstevel@tonic-gate mtm->mtm_flags |= MTM_CANCELED;
10520Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
10530Sstevel@tonic-gate mtm->mtm_dirty = 0;
10540Sstevel@tonic-gate mtm->mtm_nmet = 0;
10550Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock);
10560Sstevel@tonic-gate
10570Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1058*4662Sfrankho map_check_linkage(mtm));
10590Sstevel@tonic-gate }
10600Sstevel@tonic-gate
10610Sstevel@tonic-gate static void
logmap_wait_space(mt_map_t * mtm,ml_unit_t * ul,mapentry_t * me)10620Sstevel@tonic-gate logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
10630Sstevel@tonic-gate {
10640Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex));
10650Sstevel@tonic-gate
10660Sstevel@tonic-gate while (!ldl_has_space(ul, me)) {
10670Sstevel@tonic-gate ASSERT(!(ul->un_flags & LDL_NOROLL));
10680Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
10690Sstevel@tonic-gate logmap_forceroll(mtm);
10700Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
10710Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR)
10720Sstevel@tonic-gate break;
10730Sstevel@tonic-gate }
10740Sstevel@tonic-gate
10750Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex));
10760Sstevel@tonic-gate }
10770Sstevel@tonic-gate
10780Sstevel@tonic-gate /*
10790Sstevel@tonic-gate * put a list of deltas into a logmap
10800Sstevel@tonic-gate * If va == NULL, don't write to the log.
10810Sstevel@tonic-gate */
10820Sstevel@tonic-gate void
logmap_add(ml_unit_t * ul,char * va,offset_t vamof,mapentry_t * melist)10830Sstevel@tonic-gate logmap_add(
10840Sstevel@tonic-gate ml_unit_t *ul,
10850Sstevel@tonic-gate char *va, /* Ptr to buf w/deltas & data */
10860Sstevel@tonic-gate offset_t vamof, /* Offset on master of buf start */
10870Sstevel@tonic-gate mapentry_t *melist) /* Entries to add */
10880Sstevel@tonic-gate {
10890Sstevel@tonic-gate offset_t mof;
10900Sstevel@tonic-gate off_t nb;
10910Sstevel@tonic-gate mapentry_t *me;
10920Sstevel@tonic-gate mapentry_t **mep;
10930Sstevel@tonic-gate mapentry_t **savmep;
10940Sstevel@tonic-gate uint32_t tid;
10950Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
10980Sstevel@tonic-gate if (va)
10990Sstevel@tonic-gate logmap_wait_space(mtm, ul, melist);
11000Sstevel@tonic-gate
11010Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1102*4662Sfrankho map_check_linkage(mtm));
11030Sstevel@tonic-gate
11040Sstevel@tonic-gate mtm->mtm_ref = 1;
11050Sstevel@tonic-gate mtm->mtm_dirty++;
11060Sstevel@tonic-gate tid = mtm->mtm_tid;
11070Sstevel@tonic-gate while (melist) {
11080Sstevel@tonic-gate mof = melist->me_mof;
11090Sstevel@tonic-gate nb = melist->me_nb;
11100Sstevel@tonic-gate
11110Sstevel@tonic-gate /*
11120Sstevel@tonic-gate * search for overlaping entries
11130Sstevel@tonic-gate */
11140Sstevel@tonic-gate savmep = mep = MAP_HASH(mof, mtm);
11150Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
11160Sstevel@tonic-gate while ((me = *mep) != 0) {
11170Sstevel@tonic-gate /*
11180Sstevel@tonic-gate * Data consumes old map entry; cancel map entry.
11190Sstevel@tonic-gate * Take care when we replace an old map entry
11200Sstevel@tonic-gate * which carries quota information with a newer entry
11210Sstevel@tonic-gate * which does not. In that case the push function
11220Sstevel@tonic-gate * would not be called to clean up the dquot structure.
11230Sstevel@tonic-gate * This would be found later by invalidatedq() causing
11240Sstevel@tonic-gate * a panic when the filesystem in unmounted.
11250Sstevel@tonic-gate * We clean up the dquot manually and then replace
11260Sstevel@tonic-gate * the map entry.
11270Sstevel@tonic-gate */
11280Sstevel@tonic-gate if (MEwithinDATA(me, mof, nb) &&
11290Sstevel@tonic-gate ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
11300Sstevel@tonic-gate if (tid == me->me_tid &&
11310Sstevel@tonic-gate ((me->me_flags & ME_AGE) == 0)) {
11320Sstevel@tonic-gate *mep = me->me_hash;
11330Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
11340Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
11350Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER));
11360Sstevel@tonic-gate mtm->mtm_nme--;
11370Sstevel@tonic-gate /*
11380Sstevel@tonic-gate * Special case if the mapentry
11390Sstevel@tonic-gate * carries a dquot and a push function.
11400Sstevel@tonic-gate * We have to clean up the quota info
11410Sstevel@tonic-gate * before replacing the mapentry.
11420Sstevel@tonic-gate */
11430Sstevel@tonic-gate if (me->me_dt == DT_QR)
11440Sstevel@tonic-gate HANDLE_DQUOT(me, melist);
11450Sstevel@tonic-gate
11460Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
11470Sstevel@tonic-gate continue;
11480Sstevel@tonic-gate }
11490Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel;
11500Sstevel@tonic-gate mtm->mtm_cancel = me;
11510Sstevel@tonic-gate me->me_flags |= ME_CANCEL;
11520Sstevel@tonic-gate }
11530Sstevel@tonic-gate mep = &(*mep)->me_hash;
11540Sstevel@tonic-gate }
11550Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
11560Sstevel@tonic-gate
11570Sstevel@tonic-gate /*
11580Sstevel@tonic-gate * remove from list
11590Sstevel@tonic-gate */
11600Sstevel@tonic-gate me = melist;
11610Sstevel@tonic-gate melist = melist->me_hash;
11620Sstevel@tonic-gate me->me_flags &= ~ME_LIST;
11630Sstevel@tonic-gate /*
11640Sstevel@tonic-gate * If va != NULL, put in the log.
11650Sstevel@tonic-gate */
11660Sstevel@tonic-gate if (va)
11670Sstevel@tonic-gate ldl_write(ul, va, vamof, me);
11680Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) {
11690Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
11700Sstevel@tonic-gate continue;
11710Sstevel@tonic-gate }
11720Sstevel@tonic-gate ASSERT((va == NULL) ||
1173*4662Sfrankho ((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
1174*4662Sfrankho map_check_ldl_write(ul, va, vamof, me));
11750Sstevel@tonic-gate
11760Sstevel@tonic-gate /*
11770Sstevel@tonic-gate * put on hash
11780Sstevel@tonic-gate */
11790Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
11800Sstevel@tonic-gate me->me_hash = *savmep;
11810Sstevel@tonic-gate *savmep = me;
11820Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm;
11830Sstevel@tonic-gate me->me_prev = mtm->mtm_prev;
11840Sstevel@tonic-gate mtm->mtm_prev->me_next = me;
11850Sstevel@tonic-gate mtm->mtm_prev = me;
11860Sstevel@tonic-gate me->me_flags |= ME_HASH;
11870Sstevel@tonic-gate me->me_tid = tid;
11880Sstevel@tonic-gate me->me_age = mtm->mtm_age++;
11890Sstevel@tonic-gate mtm->mtm_nme++;
11900Sstevel@tonic-gate mtm->mtm_nmet++;
11910Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
11920Sstevel@tonic-gate }
11930Sstevel@tonic-gate
11940Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1195*4662Sfrankho map_check_linkage(mtm));
11960Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
11970Sstevel@tonic-gate }
11980Sstevel@tonic-gate
11990Sstevel@tonic-gate /*
12000Sstevel@tonic-gate * Add the delta(s) into the log.
12010Sstevel@tonic-gate * Create one cached roll buffer logmap entry, and reference count the
12020Sstevel@tonic-gate * number of mapentries refering to it.
12030Sstevel@tonic-gate * Cancel previous logmap entries.
12040Sstevel@tonic-gate * logmap_add is tolerant of failure to allocate a cached roll buffer.
12050Sstevel@tonic-gate */
12060Sstevel@tonic-gate void
logmap_add_buf(ml_unit_t * ul,char * va,offset_t bufmof,mapentry_t * melist,caddr_t buf,uint32_t bufsz)12070Sstevel@tonic-gate logmap_add_buf(
12080Sstevel@tonic-gate ml_unit_t *ul,
12090Sstevel@tonic-gate char *va, /* Ptr to buf w/deltas & data */
12100Sstevel@tonic-gate offset_t bufmof, /* Offset on master of buf start */
12110Sstevel@tonic-gate mapentry_t *melist, /* Entries to add */
12120Sstevel@tonic-gate caddr_t buf, /* Buffer containing delta(s) */
12130Sstevel@tonic-gate uint32_t bufsz) /* Size of buf */
12140Sstevel@tonic-gate {
12150Sstevel@tonic-gate offset_t mof;
12160Sstevel@tonic-gate offset_t vamof = bufmof + (va - buf);
12170Sstevel@tonic-gate off_t nb;
12180Sstevel@tonic-gate mapentry_t *me;
12190Sstevel@tonic-gate mapentry_t **mep;
12200Sstevel@tonic-gate mapentry_t **savmep;
12210Sstevel@tonic-gate uint32_t tid;
12220Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
12230Sstevel@tonic-gate crb_t *crb;
12240Sstevel@tonic-gate crb_t *crbsav = NULL;
12250Sstevel@tonic-gate
12260Sstevel@tonic-gate ASSERT((bufsz & DEV_BMASK) == 0);
12270Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
12280Sstevel@tonic-gate logmap_wait_space(mtm, ul, melist);
12290Sstevel@tonic-gate
12300Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1231*4662Sfrankho map_check_linkage(mtm));
12320Sstevel@tonic-gate
12330Sstevel@tonic-gate mtm->mtm_ref = 1;
12340Sstevel@tonic-gate mtm->mtm_dirty++;
12350Sstevel@tonic-gate tid = mtm->mtm_tid;
12360Sstevel@tonic-gate while (melist) {
12370Sstevel@tonic-gate mof = melist->me_mof;
12380Sstevel@tonic-gate nb = melist->me_nb;
12390Sstevel@tonic-gate
12400Sstevel@tonic-gate /*
12410Sstevel@tonic-gate * search for overlapping entries
12420Sstevel@tonic-gate */
12430Sstevel@tonic-gate savmep = mep = MAP_HASH(mof, mtm);
12440Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
12450Sstevel@tonic-gate while ((me = *mep) != 0) {
12460Sstevel@tonic-gate /*
12470Sstevel@tonic-gate * Data consumes old map entry; cancel map entry.
12480Sstevel@tonic-gate * Take care when we replace an old map entry
12490Sstevel@tonic-gate * which carries quota information with a newer entry
12500Sstevel@tonic-gate * which does not. In that case the push function
12510Sstevel@tonic-gate * would not be called to clean up the dquot structure.
12520Sstevel@tonic-gate * This would be found later by invalidatedq() causing
12530Sstevel@tonic-gate * a panic when the filesystem in unmounted.
12540Sstevel@tonic-gate * We clean up the dquot manually and then replace
12550Sstevel@tonic-gate * the map entry.
12560Sstevel@tonic-gate */
12570Sstevel@tonic-gate crb = me->me_crb;
12580Sstevel@tonic-gate if (MEwithinDATA(me, mof, nb) &&
12590Sstevel@tonic-gate ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
12600Sstevel@tonic-gate if (tid == me->me_tid &&
12610Sstevel@tonic-gate ((me->me_flags & ME_AGE) == 0)) {
12620Sstevel@tonic-gate *mep = me->me_hash;
12630Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
12640Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
12650Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER));
12660Sstevel@tonic-gate mtm->mtm_nme--;
12670Sstevel@tonic-gate /*
12680Sstevel@tonic-gate * Special case if the mapentry
12690Sstevel@tonic-gate * carries a dquot and a push function.
12700Sstevel@tonic-gate * We have to clean up the quota info
12710Sstevel@tonic-gate * before replacing the mapentry.
12720Sstevel@tonic-gate */
12730Sstevel@tonic-gate if (me->me_dt == DT_QR)
12740Sstevel@tonic-gate HANDLE_DQUOT(me, melist);
12750Sstevel@tonic-gate
12760Sstevel@tonic-gate /*
12770Sstevel@tonic-gate * If this soon to be deleted mapentry
12780Sstevel@tonic-gate * has a suitable roll buffer then
12790Sstevel@tonic-gate * re-use it.
12800Sstevel@tonic-gate */
12810Sstevel@tonic-gate if (crb && (--crb->c_refcnt == 0)) {
12820Sstevel@tonic-gate if (crbsav ||
12830Sstevel@tonic-gate (crb->c_nb != bufsz)) {
12840Sstevel@tonic-gate CRB_FREE(crb, me);
12850Sstevel@tonic-gate } else {
12860Sstevel@tonic-gate bcopy(buf, crb->c_buf,
12870Sstevel@tonic-gate bufsz);
12880Sstevel@tonic-gate crb->c_invalid = 0;
12890Sstevel@tonic-gate crb->c_mof = bufmof;
12900Sstevel@tonic-gate crbsav = crb;
12910Sstevel@tonic-gate me->me_crb = NULL;
12920Sstevel@tonic-gate }
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
12950Sstevel@tonic-gate continue;
12960Sstevel@tonic-gate }
12970Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel;
12980Sstevel@tonic-gate mtm->mtm_cancel = me;
12990Sstevel@tonic-gate me->me_flags |= ME_CANCEL;
13000Sstevel@tonic-gate }
13010Sstevel@tonic-gate
13020Sstevel@tonic-gate /*
13030Sstevel@tonic-gate * Inode deltas within the same fs block come
13040Sstevel@tonic-gate * in individually as separate calls to logmap_add().
13050Sstevel@tonic-gate * All others come in as one call. So check for an
13060Sstevel@tonic-gate * existing entry where we can re-use the crb.
13070Sstevel@tonic-gate */
13080Sstevel@tonic-gate if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
13090Sstevel@tonic-gate !crbsav && crb &&
13100Sstevel@tonic-gate WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
13110Sstevel@tonic-gate ASSERT(crb->c_mof == bufmof);
13120Sstevel@tonic-gate ASSERT(crb->c_nb == bufsz);
13130Sstevel@tonic-gate bcopy(buf, crb->c_buf, bufsz);
13140Sstevel@tonic-gate crbsav = crb;
13150Sstevel@tonic-gate }
13160Sstevel@tonic-gate mep = &(*mep)->me_hash;
13170Sstevel@tonic-gate }
13180Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
13190Sstevel@tonic-gate
13200Sstevel@tonic-gate /*
13210Sstevel@tonic-gate * If we don't already have a crb then allocate one
13220Sstevel@tonic-gate * and copy the incoming buffer. Only do this once
13230Sstevel@tonic-gate * for all the incoming deltas.
13240Sstevel@tonic-gate */
13250Sstevel@tonic-gate if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
13260Sstevel@tonic-gate /*
13270Sstevel@tonic-gate * Only use a cached roll buffer if we
13280Sstevel@tonic-gate * have enough memory, and check for failures.
13290Sstevel@tonic-gate */
13300Sstevel@tonic-gate if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
13310Sstevel@tonic-gate (kmem_avail() > bufsz)) {
13320Sstevel@tonic-gate crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
13330Sstevel@tonic-gate } else {
13340Sstevel@tonic-gate ufs_crb_alloc_fails++;
13350Sstevel@tonic-gate }
13360Sstevel@tonic-gate if (crbsav) {
13370Sstevel@tonic-gate crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
13380Sstevel@tonic-gate if (crbsav->c_buf) {
13390Sstevel@tonic-gate atomic_add_64(&ufs_crb_size,
13400Sstevel@tonic-gate (uint64_t)bufsz);
13410Sstevel@tonic-gate if (ufs_crb_size > ufs_crb_max_size) {
13420Sstevel@tonic-gate ufs_crb_max_size = ufs_crb_size;
13430Sstevel@tonic-gate }
13440Sstevel@tonic-gate bcopy(buf, crbsav->c_buf, bufsz);
13450Sstevel@tonic-gate crbsav->c_nb = bufsz;
13460Sstevel@tonic-gate crbsav->c_refcnt = 0;
13470Sstevel@tonic-gate crbsav->c_invalid = 0;
13480Sstevel@tonic-gate ASSERT((bufmof & DEV_BMASK) == 0);
13490Sstevel@tonic-gate crbsav->c_mof = bufmof;
13500Sstevel@tonic-gate } else {
13510Sstevel@tonic-gate kmem_free(crbsav, sizeof (crb_t));
13520Sstevel@tonic-gate crbsav = NULL;
13530Sstevel@tonic-gate }
13540Sstevel@tonic-gate }
13550Sstevel@tonic-gate }
13560Sstevel@tonic-gate
13570Sstevel@tonic-gate /*
13580Sstevel@tonic-gate * remove from list
13590Sstevel@tonic-gate */
13600Sstevel@tonic-gate me = melist;
13610Sstevel@tonic-gate melist = melist->me_hash;
13620Sstevel@tonic-gate me->me_flags &= ~ME_LIST;
13630Sstevel@tonic-gate me->me_crb = crbsav;
13640Sstevel@tonic-gate if (crbsav) {
13650Sstevel@tonic-gate crbsav->c_refcnt++;
13660Sstevel@tonic-gate }
13670Sstevel@tonic-gate crbsav = NULL;
13680Sstevel@tonic-gate
13690Sstevel@tonic-gate ASSERT(va);
13700Sstevel@tonic-gate ldl_write(ul, va, vamof, me); /* add to on-disk log */
13710Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) {
13720Sstevel@tonic-gate CRB_RELE(me);
13730Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
13740Sstevel@tonic-gate continue;
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
1377*4662Sfrankho map_check_ldl_write(ul, va, vamof, me));
13780Sstevel@tonic-gate
13790Sstevel@tonic-gate /*
13800Sstevel@tonic-gate * put on hash
13810Sstevel@tonic-gate */
13820Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
13830Sstevel@tonic-gate me->me_hash = *savmep;
13840Sstevel@tonic-gate *savmep = me;
13850Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm;
13860Sstevel@tonic-gate me->me_prev = mtm->mtm_prev;
13870Sstevel@tonic-gate mtm->mtm_prev->me_next = me;
13880Sstevel@tonic-gate mtm->mtm_prev = me;
13890Sstevel@tonic-gate me->me_flags |= ME_HASH;
13900Sstevel@tonic-gate me->me_tid = tid;
13910Sstevel@tonic-gate me->me_age = mtm->mtm_age++;
13920Sstevel@tonic-gate mtm->mtm_nme++;
13930Sstevel@tonic-gate mtm->mtm_nmet++;
13940Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
13950Sstevel@tonic-gate }
13960Sstevel@tonic-gate
13970Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1398*4662Sfrankho map_check_linkage(mtm));
13990Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
14000Sstevel@tonic-gate }
14010Sstevel@tonic-gate
14020Sstevel@tonic-gate /*
14030Sstevel@tonic-gate * free up any cancelled deltas
14040Sstevel@tonic-gate */
14050Sstevel@tonic-gate void
logmap_free_cancel(mt_map_t * mtm,mapentry_t ** cancelhead)14060Sstevel@tonic-gate logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
14070Sstevel@tonic-gate {
14080Sstevel@tonic-gate int dolock = 0;
14090Sstevel@tonic-gate mapentry_t *me;
14100Sstevel@tonic-gate mapentry_t **mep;
14110Sstevel@tonic-gate
14120Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1413*4662Sfrankho map_check_linkage(mtm));
14140Sstevel@tonic-gate
14150Sstevel@tonic-gate again:
14160Sstevel@tonic-gate if (dolock)
14170Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER);
14180Sstevel@tonic-gate
14190Sstevel@tonic-gate /*
14200Sstevel@tonic-gate * At EOT, cancel the indicated deltas
14210Sstevel@tonic-gate */
14220Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
14230Sstevel@tonic-gate if (mtm->mtm_flags & MTM_CANCELED) {
14240Sstevel@tonic-gate mtm->mtm_flags &= ~MTM_CANCELED;
14250Sstevel@tonic-gate ASSERT(dolock == 0);
14260Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
14270Sstevel@tonic-gate return;
14280Sstevel@tonic-gate }
14290Sstevel@tonic-gate
14300Sstevel@tonic-gate while ((me = *cancelhead) != NULL) {
14310Sstevel@tonic-gate /*
14320Sstevel@tonic-gate * roll forward or read collision; wait and try again
14330Sstevel@tonic-gate */
14340Sstevel@tonic-gate if (me->me_flags & ME_AGE) {
14350Sstevel@tonic-gate ASSERT(dolock == 0);
14360Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
14370Sstevel@tonic-gate dolock = 1;
14380Sstevel@tonic-gate goto again;
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate /*
14410Sstevel@tonic-gate * remove from cancel list
14420Sstevel@tonic-gate */
14430Sstevel@tonic-gate *cancelhead = me->me_cancel;
14440Sstevel@tonic-gate me->me_cancel = NULL;
14450Sstevel@tonic-gate me->me_flags &= ~(ME_CANCEL);
14460Sstevel@tonic-gate
14470Sstevel@tonic-gate /*
14480Sstevel@tonic-gate * logmap_remove_roll handles ME_ROLL entries later
14490Sstevel@tonic-gate * we leave them around for logmap_iscancel
14500Sstevel@tonic-gate * XXX is this necessary?
14510Sstevel@tonic-gate */
14520Sstevel@tonic-gate if (me->me_flags & ME_ROLL)
14530Sstevel@tonic-gate continue;
14540Sstevel@tonic-gate
14550Sstevel@tonic-gate /*
14560Sstevel@tonic-gate * remove from hash (if necessary)
14570Sstevel@tonic-gate */
14580Sstevel@tonic-gate if (me->me_flags & ME_HASH) {
14590Sstevel@tonic-gate mep = MAP_HASH(me->me_mof, mtm);
14600Sstevel@tonic-gate while (*mep) {
14610Sstevel@tonic-gate if (*mep == me) {
14620Sstevel@tonic-gate *mep = me->me_hash;
14630Sstevel@tonic-gate me->me_next->me_prev = me->me_prev;
14640Sstevel@tonic-gate me->me_prev->me_next = me->me_next;
14650Sstevel@tonic-gate me->me_flags &= ~(ME_HASH);
14660Sstevel@tonic-gate if (!(me->me_flags & ME_USER)) {
14670Sstevel@tonic-gate mtm->mtm_nme--;
14680Sstevel@tonic-gate }
14690Sstevel@tonic-gate break;
14700Sstevel@tonic-gate } else
14710Sstevel@tonic-gate mep = &(*mep)->me_hash;
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate }
14740Sstevel@tonic-gate /*
14750Sstevel@tonic-gate * put the entry on the free list
14760Sstevel@tonic-gate */
14770Sstevel@tonic-gate CRB_RELE(me);
14780Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
14790Sstevel@tonic-gate }
14800Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
14810Sstevel@tonic-gate if (dolock)
14820Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock);
14830Sstevel@tonic-gate
14840Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1485*4662Sfrankho map_check_linkage(mtm));
14860Sstevel@tonic-gate }
14870Sstevel@tonic-gate
14880Sstevel@tonic-gate
14890Sstevel@tonic-gate void
logmap_commit(ml_unit_t * ul,uint32_t tid)14900Sstevel@tonic-gate logmap_commit(ml_unit_t *ul, uint32_t tid)
14910Sstevel@tonic-gate {
14920Sstevel@tonic-gate mapentry_t me;
14930Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
14940Sstevel@tonic-gate
14950Sstevel@tonic-gate
14960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex));
14970Sstevel@tonic-gate
14980Sstevel@tonic-gate /*
14990Sstevel@tonic-gate * async'ly write a commit rec into the log
15000Sstevel@tonic-gate */
15010Sstevel@tonic-gate if (mtm->mtm_dirty) {
15020Sstevel@tonic-gate /*
15030Sstevel@tonic-gate * put commit record into log
15040Sstevel@tonic-gate */
15050Sstevel@tonic-gate me.me_mof = mtm->mtm_tid;
15060Sstevel@tonic-gate me.me_dt = DT_COMMIT;
15070Sstevel@tonic-gate me.me_nb = 0;
15080Sstevel@tonic-gate me.me_hash = NULL;
15090Sstevel@tonic-gate logmap_wait_space(mtm, ul, &me);
15100Sstevel@tonic-gate ldl_write(ul, NULL, (offset_t)0, &me);
15110Sstevel@tonic-gate ldl_round_commit(ul);
15120Sstevel@tonic-gate
15130Sstevel@tonic-gate /*
15140Sstevel@tonic-gate * abort on error; else reset dirty flag
15150Sstevel@tonic-gate */
15160Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR)
15170Sstevel@tonic-gate logmap_abort(ul, tid);
15180Sstevel@tonic-gate else {
15190Sstevel@tonic-gate mtm->mtm_dirty = 0;
15200Sstevel@tonic-gate mtm->mtm_nmet = 0;
15210Sstevel@tonic-gate mtm->mtm_cfrags = 0;
15220Sstevel@tonic-gate }
15230Sstevel@tonic-gate /* push commit */
15240Sstevel@tonic-gate ldl_push_commit(ul);
15250Sstevel@tonic-gate }
15260Sstevel@tonic-gate }
15270Sstevel@tonic-gate
15280Sstevel@tonic-gate void
logmap_sethead(mt_map_t * mtm,ml_unit_t * ul)15290Sstevel@tonic-gate logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
15300Sstevel@tonic-gate {
15310Sstevel@tonic-gate off_t lof;
15320Sstevel@tonic-gate uint32_t tid;
15330Sstevel@tonic-gate mapentry_t *me;
15340Sstevel@tonic-gate
15350Sstevel@tonic-gate /*
15360Sstevel@tonic-gate * move the head forward so the log knows how full it is
15370Sstevel@tonic-gate * Make sure to skip any mapentry whose me_lof is 0, these
15380Sstevel@tonic-gate * are just place holders for DT_CANCELED freed user blocks
15390Sstevel@tonic-gate * for the current moby.
15400Sstevel@tonic-gate */
15410Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
15420Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
15430Sstevel@tonic-gate me = mtm->mtm_next;
15440Sstevel@tonic-gate while (me != (mapentry_t *)mtm && me->me_lof == 0) {
15450Sstevel@tonic-gate me = me->me_next;
15460Sstevel@tonic-gate }
15470Sstevel@tonic-gate
15480Sstevel@tonic-gate if (me == (mapentry_t *)mtm)
15490Sstevel@tonic-gate lof = -1;
15500Sstevel@tonic-gate else {
15510Sstevel@tonic-gate lof = me->me_lof;
15520Sstevel@tonic-gate tid = me->me_tid;
15530Sstevel@tonic-gate }
15540Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
15550Sstevel@tonic-gate ldl_sethead(ul, lof, tid);
15560Sstevel@tonic-gate if (lof == -1)
15570Sstevel@tonic-gate mtm->mtm_age = 0;
15580Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate
15610Sstevel@tonic-gate void
logmap_settail(mt_map_t * mtm,ml_unit_t * ul)15620Sstevel@tonic-gate logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
15630Sstevel@tonic-gate {
15640Sstevel@tonic-gate off_t lof;
15650Sstevel@tonic-gate size_t nb;
15660Sstevel@tonic-gate
15670Sstevel@tonic-gate /*
15680Sstevel@tonic-gate * set the tail after the logmap_abort
15690Sstevel@tonic-gate */
15700Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
15710Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
15720Sstevel@tonic-gate if (mtm->mtm_prev == (mapentry_t *)mtm)
15730Sstevel@tonic-gate lof = -1;
15740Sstevel@tonic-gate else {
15750Sstevel@tonic-gate /*
15760Sstevel@tonic-gate * set the tail to the end of the last commit
15770Sstevel@tonic-gate */
15780Sstevel@tonic-gate lof = mtm->mtm_tail_lof;
15790Sstevel@tonic-gate nb = mtm->mtm_tail_nb;
15800Sstevel@tonic-gate }
15810Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
15820Sstevel@tonic-gate ldl_settail(ul, lof, nb);
15830Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
15840Sstevel@tonic-gate }
15850Sstevel@tonic-gate
15860Sstevel@tonic-gate /*
15870Sstevel@tonic-gate * when reseting a device; roll the log until every
15880Sstevel@tonic-gate * delta has been rolled forward
15890Sstevel@tonic-gate */
15900Sstevel@tonic-gate void
logmap_roll_dev(ml_unit_t * ul)15910Sstevel@tonic-gate logmap_roll_dev(ml_unit_t *ul)
15920Sstevel@tonic-gate {
15930Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
15940Sstevel@tonic-gate mapentry_t *me;
15950Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ul->un_ufsvfs;
15960Sstevel@tonic-gate
15970Sstevel@tonic-gate again:
15980Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1599*4662Sfrankho map_check_linkage(mtm));
16000Sstevel@tonic-gate if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
16010Sstevel@tonic-gate return;
16020Sstevel@tonic-gate
16030Sstevel@tonic-gate /*
16040Sstevel@tonic-gate * look for deltas
16050Sstevel@tonic-gate */
16060Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
16070Sstevel@tonic-gate for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
16080Sstevel@tonic-gate if (me->me_flags & ME_ROLL)
16090Sstevel@tonic-gate break;
16100Sstevel@tonic-gate if (me->me_tid == mtm->mtm_tid)
16110Sstevel@tonic-gate continue;
16120Sstevel@tonic-gate if (me->me_tid == mtm->mtm_committid)
16130Sstevel@tonic-gate continue;
16140Sstevel@tonic-gate break;
16150Sstevel@tonic-gate }
16160Sstevel@tonic-gate
16170Sstevel@tonic-gate /*
16180Sstevel@tonic-gate * found a delta; kick the roll thread
16190Sstevel@tonic-gate * but only if the thread is running... (jmh)
16200Sstevel@tonic-gate */
16210Sstevel@tonic-gate if (me != (mapentry_t *)mtm) {
16220Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
16230Sstevel@tonic-gate logmap_forceroll(mtm);
16240Sstevel@tonic-gate goto again;
16250Sstevel@tonic-gate }
16260Sstevel@tonic-gate
16270Sstevel@tonic-gate /*
16280Sstevel@tonic-gate * no more deltas, return
16290Sstevel@tonic-gate */
16300Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
16310Sstevel@tonic-gate (void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
16320Sstevel@tonic-gate
16330Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1634*4662Sfrankho map_check_linkage(mtm));
16350Sstevel@tonic-gate }
16360Sstevel@tonic-gate
16370Sstevel@tonic-gate static void
logmap_cancel_delta(ml_unit_t * ul,offset_t mof,int32_t nb,int metadata)16380Sstevel@tonic-gate logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
16390Sstevel@tonic-gate {
16400Sstevel@tonic-gate mapentry_t *me;
16410Sstevel@tonic-gate mapentry_t **mep;
16420Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
16430Sstevel@tonic-gate int frags;
16440Sstevel@tonic-gate
16450Sstevel@tonic-gate /*
16460Sstevel@tonic-gate * map has been referenced and is dirty
16470Sstevel@tonic-gate */
16480Sstevel@tonic-gate mtm->mtm_ref = 1;
16490Sstevel@tonic-gate mtm->mtm_dirty++;
16500Sstevel@tonic-gate
16510Sstevel@tonic-gate /*
16520Sstevel@tonic-gate * get a mapentry
16530Sstevel@tonic-gate */
16540Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
16550Sstevel@tonic-gate bzero(me, sizeof (mapentry_t));
16560Sstevel@tonic-gate
16570Sstevel@tonic-gate /*
16580Sstevel@tonic-gate * initialize cancel record and put in logmap
16590Sstevel@tonic-gate */
16600Sstevel@tonic-gate me->me_mof = mof;
16610Sstevel@tonic-gate me->me_nb = nb;
16620Sstevel@tonic-gate me->me_dt = DT_CANCEL;
16630Sstevel@tonic-gate me->me_tid = mtm->mtm_tid;
16640Sstevel@tonic-gate me->me_hash = NULL;
16650Sstevel@tonic-gate
16660Sstevel@tonic-gate /*
16670Sstevel@tonic-gate * Write delta to log if this delta is for metadata. If this is not
16680Sstevel@tonic-gate * metadata it is user data and we are just putting a cancel
16690Sstevel@tonic-gate * mapentry into the hash to cancel a user block deletion
16700Sstevel@tonic-gate * in which we do not want the block to be allocated
16710Sstevel@tonic-gate * within this moby. This cancel entry will prevent the block from
16720Sstevel@tonic-gate * being allocated within the moby and prevent user data corruption
16730Sstevel@tonic-gate * if we happen to crash before this moby is committed.
16740Sstevel@tonic-gate */
16750Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex);
16760Sstevel@tonic-gate if (metadata) {
16770Sstevel@tonic-gate logmap_wait_space(mtm, ul, me);
16780Sstevel@tonic-gate ldl_write(ul, NULL, (offset_t)0, me);
16790Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) {
16800Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me);
16810Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
16820Sstevel@tonic-gate return;
16830Sstevel@tonic-gate }
16840Sstevel@tonic-gate }
16850Sstevel@tonic-gate
16860Sstevel@tonic-gate /*
16870Sstevel@tonic-gate * put in hash and on cancel list
16880Sstevel@tonic-gate */
16890Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
16900Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
16910Sstevel@tonic-gate me->me_age = mtm->mtm_age++;
16920Sstevel@tonic-gate me->me_hash = *mep;
16930Sstevel@tonic-gate *mep = me;
16940Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm;
16950Sstevel@tonic-gate me->me_prev = mtm->mtm_prev;
16960Sstevel@tonic-gate mtm->mtm_prev->me_next = me;
16970Sstevel@tonic-gate mtm->mtm_prev = me;
16980Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel;
16990Sstevel@tonic-gate mtm->mtm_cancel = me;
17000Sstevel@tonic-gate if (metadata) {
17010Sstevel@tonic-gate mtm->mtm_nme++;
17020Sstevel@tonic-gate mtm->mtm_nmet++;
17030Sstevel@tonic-gate } else {
17040Sstevel@tonic-gate me->me_flags = ME_USER;
17050Sstevel@tonic-gate }
17060Sstevel@tonic-gate me->me_flags |= (ME_HASH|ME_CANCEL);
17070Sstevel@tonic-gate if (!(metadata)) {
17080Sstevel@tonic-gate frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
17090Sstevel@tonic-gate if (frags)
1710*4662Sfrankho mtm->mtm_cfrags +=
1711*4662Sfrankho numfrags(ul->un_ufsvfs->vfs_fs, frags);
17120Sstevel@tonic-gate }
17130Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex);
17160Sstevel@tonic-gate }
17170Sstevel@tonic-gate
17180Sstevel@tonic-gate /*
17190Sstevel@tonic-gate * cancel entries in a logmap (entries are freed at EOT)
17200Sstevel@tonic-gate */
17210Sstevel@tonic-gate void
logmap_cancel(ml_unit_t * ul,offset_t mof,off_t nb,int metadata)17220Sstevel@tonic-gate logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
17230Sstevel@tonic-gate {
17240Sstevel@tonic-gate int32_t hnb;
17250Sstevel@tonic-gate mapentry_t *me;
17260Sstevel@tonic-gate mapentry_t **mep;
17270Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
17280Sstevel@tonic-gate crb_t *crb;
17290Sstevel@tonic-gate
17300Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1731*4662Sfrankho map_check_linkage(mtm));
17320Sstevel@tonic-gate
17330Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) {
17340Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
17350Sstevel@tonic-gate if (hnb > nb)
17360Sstevel@tonic-gate hnb = nb;
17370Sstevel@tonic-gate /*
17382040Sjkennedy * Find overlapping metadata entries. Don't search through
17392040Sjkennedy * the hash chains if this is user data because it is only
17402040Sjkennedy * possible to have overlapping map entries for metadata,
17412040Sjkennedy * and the search can become expensive for large files.
17420Sstevel@tonic-gate */
17432040Sjkennedy if (metadata) {
17442040Sjkennedy mep = MAP_HASH(mof, mtm);
17452040Sjkennedy mutex_enter(&mtm->mtm_mutex);
17462040Sjkennedy for (me = *mep; me; me = me->me_hash) {
17472040Sjkennedy if (!DATAoverlapME(mof, hnb, me))
17482040Sjkennedy continue;
17490Sstevel@tonic-gate
17502040Sjkennedy ASSERT(MEwithinDATA(me, mof, hnb));
17510Sstevel@tonic-gate
17522040Sjkennedy if ((me->me_flags & ME_CANCEL) == 0) {
17532040Sjkennedy me->me_cancel = mtm->mtm_cancel;
17542040Sjkennedy mtm->mtm_cancel = me;
17552040Sjkennedy me->me_flags |= ME_CANCEL;
17562040Sjkennedy crb = me->me_crb;
17572040Sjkennedy if (crb) {
17582040Sjkennedy crb->c_invalid = 1;
17592040Sjkennedy }
17600Sstevel@tonic-gate }
17610Sstevel@tonic-gate }
17622040Sjkennedy mutex_exit(&mtm->mtm_mutex);
17630Sstevel@tonic-gate }
17640Sstevel@tonic-gate
17650Sstevel@tonic-gate /*
17660Sstevel@tonic-gate * put a cancel record into the log
17670Sstevel@tonic-gate */
17680Sstevel@tonic-gate logmap_cancel_delta(ul, mof, hnb, metadata);
17690Sstevel@tonic-gate }
17700Sstevel@tonic-gate
17710Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
1772*4662Sfrankho map_check_linkage(mtm));
17730Sstevel@tonic-gate }
17740Sstevel@tonic-gate
17750Sstevel@tonic-gate /*
17760Sstevel@tonic-gate * check for overlap w/cancel delta
17770Sstevel@tonic-gate */
17780Sstevel@tonic-gate int
logmap_iscancel(mt_map_t * mtm,offset_t mof,off_t nb)17790Sstevel@tonic-gate logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
17800Sstevel@tonic-gate {
17810Sstevel@tonic-gate off_t hnb;
17820Sstevel@tonic-gate mapentry_t *me;
17830Sstevel@tonic-gate mapentry_t **mep;
17840Sstevel@tonic-gate
17850Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex);
17860Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) {
17870Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
17880Sstevel@tonic-gate if (hnb > nb)
17890Sstevel@tonic-gate hnb = nb;
17900Sstevel@tonic-gate /*
17910Sstevel@tonic-gate * search for dup entry
17920Sstevel@tonic-gate */
17930Sstevel@tonic-gate mep = MAP_HASH(mof, mtm);
17940Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) {
17950Sstevel@tonic-gate if (((me->me_flags & ME_ROLL) == 0) &&
17960Sstevel@tonic-gate (me->me_dt != DT_CANCEL))
17970Sstevel@tonic-gate continue;
17980Sstevel@tonic-gate if (DATAoverlapME(mof, hnb, me))
17990Sstevel@tonic-gate break;
18000Sstevel@tonic-gate }
18010Sstevel@tonic-gate
18020Sstevel@tonic-gate /*
18030Sstevel@tonic-gate * overlap detected
18040Sstevel@tonic-gate */
18050Sstevel@tonic-gate if (me) {
18060Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
18070Sstevel@tonic-gate return (1);
18080Sstevel@tonic-gate }
18090Sstevel@tonic-gate }
18100Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex);
18110Sstevel@tonic-gate return (0);
18120Sstevel@tonic-gate }
18130Sstevel@tonic-gate
18140Sstevel@tonic-gate static int
logmap_logscan_add(ml_unit_t * ul,struct delta * dp,off_t lof,size_t * nbp)18150Sstevel@tonic-gate logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
18160Sstevel@tonic-gate {
18170Sstevel@tonic-gate mapentry_t *me;
18180Sstevel@tonic-gate int error;
18190Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap;
18200Sstevel@tonic-gate
18210Sstevel@tonic-gate /*
18220Sstevel@tonic-gate * verify delta header; failure == mediafail
18230Sstevel@tonic-gate */
18240Sstevel@tonic-gate error = 0;
18250Sstevel@tonic-gate /* delta type */
18260Sstevel@tonic-gate if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
18270Sstevel@tonic-gate error = EINVAL;
18280Sstevel@tonic-gate if (dp->d_typ == DT_COMMIT) {
18290Sstevel@tonic-gate if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
18300Sstevel@tonic-gate error = EINVAL;
18310Sstevel@tonic-gate } else {
18320Sstevel@tonic-gate /* length of delta */
18330Sstevel@tonic-gate if ((dp->d_nb < INT32_C(0)) ||
18340Sstevel@tonic-gate (dp->d_nb > INT32_C(MAPBLOCKSIZE)))
18350Sstevel@tonic-gate error = EINVAL;
18360Sstevel@tonic-gate
18370Sstevel@tonic-gate /* offset on master device */
18380Sstevel@tonic-gate if (dp->d_mof < INT64_C(0))
18390Sstevel@tonic-gate error = EINVAL;
18400Sstevel@tonic-gate }
18410Sstevel@tonic-gate
18420Sstevel@tonic-gate if (error) {
18430Sstevel@tonic-gate ldl_seterror(ul, "Error processing ufs log data during scan");
18440Sstevel@tonic-gate return (error);
18450Sstevel@tonic-gate }
18460Sstevel@tonic-gate
18470Sstevel@tonic-gate /*
18480Sstevel@tonic-gate * process commit record
18490Sstevel@tonic-gate */
18500Sstevel@tonic-gate if (dp->d_typ == DT_COMMIT) {
18510Sstevel@tonic-gate if (mtm->mtm_dirty) {
18520Sstevel@tonic-gate ASSERT(dp->d_nb == INT32_C(0));
18530Sstevel@tonic-gate logmap_free_cancel(mtm, &mtm->mtm_cancel);
18540Sstevel@tonic-gate mtm->mtm_dirty = 0;
18550Sstevel@tonic-gate mtm->mtm_nmet = 0;
18560Sstevel@tonic-gate mtm->mtm_tid++;
18570Sstevel@tonic-gate mtm->mtm_committid = mtm->mtm_tid;
18580Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
1859*4662Sfrankho logmap_logscan_commit_debug(lof, mtm));
18600Sstevel@tonic-gate }
18610Sstevel@tonic-gate /*
18620Sstevel@tonic-gate * return #bytes to next sector (next delta header)
18630Sstevel@tonic-gate */
18640Sstevel@tonic-gate *nbp = ldl_logscan_nbcommit(lof);
18650Sstevel@tonic-gate mtm->mtm_tail_lof = lof;
18660Sstevel@tonic-gate mtm->mtm_tail_nb = *nbp;
18670Sstevel@tonic-gate return (0);
18680Sstevel@tonic-gate }
18690Sstevel@tonic-gate
18700Sstevel@tonic-gate /*
18710Sstevel@tonic-gate * add delta to logmap
18720Sstevel@tonic-gate */
18730Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
18740Sstevel@tonic-gate bzero(me, sizeof (mapentry_t));
18750Sstevel@tonic-gate me->me_lof = lof;
18760Sstevel@tonic-gate me->me_mof = dp->d_mof;
18770Sstevel@tonic-gate me->me_nb = dp->d_nb;
18780Sstevel@tonic-gate me->me_tid = mtm->mtm_tid;
18790Sstevel@tonic-gate me->me_dt = dp->d_typ;
18800Sstevel@tonic-gate me->me_hash = NULL;
18810Sstevel@tonic-gate me->me_flags = (ME_LIST | ME_SCAN);
18820Sstevel@tonic-gate logmap_add(ul, NULL, 0, me);
18830Sstevel@tonic-gate switch (dp->d_typ) {
18840Sstevel@tonic-gate case DT_CANCEL:
18850Sstevel@tonic-gate me->me_flags |= ME_CANCEL;
18860Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel;
18870Sstevel@tonic-gate mtm->mtm_cancel = me;
18880Sstevel@tonic-gate break;
18890Sstevel@tonic-gate default:
18900Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
1891*4662Sfrankho logmap_logscan_add_debug(dp, mtm));
18920Sstevel@tonic-gate break;
18930Sstevel@tonic-gate }
18940Sstevel@tonic-gate
18950Sstevel@tonic-gate sizeofdelta:
18960Sstevel@tonic-gate /*
18970Sstevel@tonic-gate * return #bytes till next delta header
18980Sstevel@tonic-gate */
18990Sstevel@tonic-gate if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
19000Sstevel@tonic-gate *nbp = 0;
19010Sstevel@tonic-gate else
19020Sstevel@tonic-gate *nbp = dp->d_nb;
19030Sstevel@tonic-gate return (0);
19040Sstevel@tonic-gate }
19050Sstevel@tonic-gate
19060Sstevel@tonic-gate void
logmap_logscan(ml_unit_t * ul)19070Sstevel@tonic-gate logmap_logscan(ml_unit_t *ul)
19080Sstevel@tonic-gate {
19090Sstevel@tonic-gate size_t nb, nbd;
19100Sstevel@tonic-gate off_t lof;
19110Sstevel@tonic-gate struct delta delta;
19120Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap;
19130Sstevel@tonic-gate
19140Sstevel@tonic-gate ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
19150Sstevel@tonic-gate
19160Sstevel@tonic-gate /*
19170Sstevel@tonic-gate * prepare the log for a logscan
19180Sstevel@tonic-gate */
19190Sstevel@tonic-gate ldl_logscan_begin(ul);
19200Sstevel@tonic-gate
19210Sstevel@tonic-gate /*
19220Sstevel@tonic-gate * prepare the logmap for a logscan
19230Sstevel@tonic-gate */
19240Sstevel@tonic-gate (void) map_free_entries(logmap);
19250Sstevel@tonic-gate logmap->mtm_tid = 0;
19260Sstevel@tonic-gate logmap->mtm_committid = UINT32_C(0);
19270Sstevel@tonic-gate logmap->mtm_age = 0;
19280Sstevel@tonic-gate logmap->mtm_dirty = 0;
19290Sstevel@tonic-gate logmap->mtm_ref = 0;
19300Sstevel@tonic-gate
19310Sstevel@tonic-gate /*
19320Sstevel@tonic-gate * while not at end of log
19330Sstevel@tonic-gate * read delta header
19340Sstevel@tonic-gate * add to logmap
19350Sstevel@tonic-gate * seek to beginning of next delta
19360Sstevel@tonic-gate */
19370Sstevel@tonic-gate lof = ul->un_head_lof;
19380Sstevel@tonic-gate nbd = sizeof (delta);
19390Sstevel@tonic-gate while (lof != ul->un_tail_lof) {
19400Sstevel@tonic-gate
19410Sstevel@tonic-gate /* read delta header */
19420Sstevel@tonic-gate if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
19430Sstevel@tonic-gate break;
19440Sstevel@tonic-gate
19450Sstevel@tonic-gate /* add to logmap */
19460Sstevel@tonic-gate if (logmap_logscan_add(ul, &delta, lof, &nb))
19470Sstevel@tonic-gate break;
19480Sstevel@tonic-gate
19490Sstevel@tonic-gate /* seek to next header (skip data) */
19500Sstevel@tonic-gate if (ldl_logscan_read(ul, &lof, nb, NULL))
19510Sstevel@tonic-gate break;
19520Sstevel@tonic-gate }
19530Sstevel@tonic-gate
19540Sstevel@tonic-gate /*
19550Sstevel@tonic-gate * remove the last partial transaction from the logmap
19560Sstevel@tonic-gate */
19570Sstevel@tonic-gate logmap_abort(ul, logmap->mtm_tid);
19580Sstevel@tonic-gate
19590Sstevel@tonic-gate ldl_logscan_end(ul);
19600Sstevel@tonic-gate }
19610Sstevel@tonic-gate
19620Sstevel@tonic-gate void
_init_map(void)19630Sstevel@tonic-gate _init_map(void)
19640Sstevel@tonic-gate {
19650Sstevel@tonic-gate /*
19660Sstevel@tonic-gate * Initialise the mapentry cache. No constructor or deconstructor
19670Sstevel@tonic-gate * is needed. Also no reclaim function is supplied as reclaiming
19680Sstevel@tonic-gate * current entries is not possible.
19690Sstevel@tonic-gate */
19700Sstevel@tonic-gate mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
19710Sstevel@tonic-gate sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
19720Sstevel@tonic-gate }
19730Sstevel@tonic-gate
19740Sstevel@tonic-gate /*
19750Sstevel@tonic-gate * Special case when we replace an old map entry which carries quota
19760Sstevel@tonic-gate * information with a newer entry which does not.
19770Sstevel@tonic-gate * In that case the push function would not be called to clean up the
19780Sstevel@tonic-gate * dquot structure. This would be found later by invalidatedq() causing
19790Sstevel@tonic-gate * a panic when the filesystem in unmounted.
19800Sstevel@tonic-gate * We clean up the dquot manually before replacing the map entry.
19810Sstevel@tonic-gate */
19820Sstevel@tonic-gate void
handle_dquot(mapentry_t * me)19830Sstevel@tonic-gate handle_dquot(mapentry_t *me)
19840Sstevel@tonic-gate {
19850Sstevel@tonic-gate int dolock = 0;
19860Sstevel@tonic-gate int domutex = 0;
19870Sstevel@tonic-gate struct dquot *dqp;
19880Sstevel@tonic-gate
19890Sstevel@tonic-gate dqp = (struct dquot *)me->me_arg;
19900Sstevel@tonic-gate
19910Sstevel@tonic-gate /*
19920Sstevel@tonic-gate * We need vfs_dqrwlock to call dqput()
19930Sstevel@tonic-gate */
19940Sstevel@tonic-gate dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
19950Sstevel@tonic-gate if (dolock)
19960Sstevel@tonic-gate rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
19970Sstevel@tonic-gate
19980Sstevel@tonic-gate domutex = (!MUTEX_HELD(&dqp->dq_lock));
19990Sstevel@tonic-gate if (domutex)
20000Sstevel@tonic-gate mutex_enter(&dqp->dq_lock);
20010Sstevel@tonic-gate
20020Sstevel@tonic-gate /*
20030Sstevel@tonic-gate * Only clean up if the dquot is referenced
20040Sstevel@tonic-gate */
20050Sstevel@tonic-gate if (dqp->dq_cnt == 0) {
20060Sstevel@tonic-gate if (domutex)
20070Sstevel@tonic-gate mutex_exit(&dqp->dq_lock);
20080Sstevel@tonic-gate if (dolock)
20090Sstevel@tonic-gate rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
20100Sstevel@tonic-gate return;
20110Sstevel@tonic-gate }
20120Sstevel@tonic-gate
20130Sstevel@tonic-gate dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
20140Sstevel@tonic-gate dqput(dqp);
20150Sstevel@tonic-gate
20160Sstevel@tonic-gate if (domutex)
20170Sstevel@tonic-gate mutex_exit(&dqp->dq_lock);
20180Sstevel@tonic-gate
20190Sstevel@tonic-gate if (dolock)
20200Sstevel@tonic-gate rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
20210Sstevel@tonic-gate
20220Sstevel@tonic-gate }
2023