10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2040Sjkennedy * Common Development and Distribution License (the "License"). 6*2040Sjkennedy * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 220Sstevel@tonic-gate 230Sstevel@tonic-gate /* 24*2040Sjkennedy * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 250Sstevel@tonic-gate * Use is subject to license terms. 260Sstevel@tonic-gate */ 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/systm.h> 290Sstevel@tonic-gate #include <sys/types.h> 300Sstevel@tonic-gate #include <sys/vnode.h> 310Sstevel@tonic-gate #include <sys/errno.h> 320Sstevel@tonic-gate #include <sys/sysmacros.h> 330Sstevel@tonic-gate #include <sys/debug.h> 340Sstevel@tonic-gate #include <sys/kmem.h> 350Sstevel@tonic-gate #include <sys/conf.h> 360Sstevel@tonic-gate #include <sys/proc.h> 370Sstevel@tonic-gate #include <sys/cmn_err.h> 380Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 390Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 400Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 410Sstevel@tonic-gate #include <sys/inttypes.h> 420Sstevel@tonic-gate #include <sys/atomic.h> 430Sstevel@tonic-gate #include <sys/tuneable.h> 440Sstevel@tonic-gate 450Sstevel@tonic-gate /* 460Sstevel@tonic-gate * externs 470Sstevel@tonic-gate */ 480Sstevel@tonic-gate extern pri_t minclsyspri; 490Sstevel@tonic-gate extern struct kmem_cache *lufs_bp; 500Sstevel@tonic-gate extern int ufs_trans_push_quota(); 510Sstevel@tonic-gate 520Sstevel@tonic-gate /* 530Sstevel@tonic-gate * globals 540Sstevel@tonic-gate */ 550Sstevel@tonic-gate kmem_cache_t *mapentry_cache; 560Sstevel@tonic-gate 570Sstevel@tonic-gate /* 580Sstevel@tonic-gate * logmap tuning constants 590Sstevel@tonic-gate */ 600Sstevel@tonic-gate long logmap_maxnme_commit = 2048; 610Sstevel@tonic-gate long logmap_maxnme_async = 4096; 620Sstevel@tonic-gate long logmap_maxnme_sync = 6144; 630Sstevel@tonic-gate long logmap_maxcfrag_commit = 4; /* Max canceled fragments per moby */ 640Sstevel@tonic-gate 650Sstevel@tonic-gate 660Sstevel@tonic-gate uint64_t ufs_crb_size = 0; /* current size of all crb buffers */ 670Sstevel@tonic-gate uint64_t ufs_crb_max_size = 0; /* highest crb buffer use so far */ 680Sstevel@tonic-gate size_t ufs_crb_limit; /* max allowable size for crbs */ 690Sstevel@tonic-gate uint64_t ufs_crb_alloc_fails = 0; /* crb allocation failures stat */ 700Sstevel@tonic-gate #define UFS_MAX_CRB_DEFAULT_DIVISOR 10 /* max 1/10 kmem_maxavail() */ 710Sstevel@tonic-gate int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */ 720Sstevel@tonic-gate void handle_dquot(mapentry_t *); 730Sstevel@tonic-gate 740Sstevel@tonic-gate /* 750Sstevel@tonic-gate * GENERIC MAP ROUTINES 760Sstevel@tonic-gate */ 770Sstevel@tonic-gate 780Sstevel@tonic-gate #define CRB_FREE(crb, me) \ 790Sstevel@tonic-gate kmem_free(crb->c_buf, crb->c_nb); \ 800Sstevel@tonic-gate atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \ 810Sstevel@tonic-gate kmem_free(crb, sizeof (crb_t)); \ 820Sstevel@tonic-gate (me)->me_crb = NULL; 830Sstevel@tonic-gate 840Sstevel@tonic-gate #define CRB_RELE(me) { \ 850Sstevel@tonic-gate crb_t *crb = (me)->me_crb; \ 860Sstevel@tonic-gate if (crb && (--crb->c_refcnt == 0)) { \ 870Sstevel@tonic-gate CRB_FREE(crb, me) \ 880Sstevel@tonic-gate } \ 890Sstevel@tonic-gate } 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * Check that the old delta has an argument and a push function of 930Sstevel@tonic-gate * ufs_trans_push_quota(), then check that the old and new deltas differ. 940Sstevel@tonic-gate * If so we clean up with handle_dquot() before replacing the old delta. 950Sstevel@tonic-gate */ 960Sstevel@tonic-gate #define HANDLE_DQUOT(me, melist) { \ 970Sstevel@tonic-gate if ((me->me_arg) && \ 980Sstevel@tonic-gate (me->me_func == ufs_trans_push_quota)) { \ 990Sstevel@tonic-gate if (!((me->me_dt == melist->me_dt) && \ 1000Sstevel@tonic-gate (me->me_arg == melist->me_arg) && \ 1010Sstevel@tonic-gate (me->me_func == melist->me_func))) { \ 1020Sstevel@tonic-gate handle_dquot(me); \ 1030Sstevel@tonic-gate } \ 1040Sstevel@tonic-gate } \ 1050Sstevel@tonic-gate } 1060Sstevel@tonic-gate 1070Sstevel@tonic-gate /* 1080Sstevel@tonic-gate * free up all the mapentries for a map 1090Sstevel@tonic-gate */ 1100Sstevel@tonic-gate void 1110Sstevel@tonic-gate map_free_entries(mt_map_t *mtm) 1120Sstevel@tonic-gate { 1130Sstevel@tonic-gate int i; 1140Sstevel@tonic-gate mapentry_t *me; 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate while ((me = mtm->mtm_next) != (mapentry_t *)mtm) { 1170Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 1180Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 1190Sstevel@tonic-gate CRB_RELE(me); 1200Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 1210Sstevel@tonic-gate } 1220Sstevel@tonic-gate for (i = 0; i < mtm->mtm_nhash; i++) 1230Sstevel@tonic-gate mtm->mtm_hash[i] = NULL; 1240Sstevel@tonic-gate mtm->mtm_nme = 0; 1250Sstevel@tonic-gate mtm->mtm_nmet = 0; 1260Sstevel@tonic-gate } 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate /* 1290Sstevel@tonic-gate * done with map; free if necessary 1300Sstevel@tonic-gate */ 1310Sstevel@tonic-gate mt_map_t * 1320Sstevel@tonic-gate map_put(mt_map_t *mtm) 1330Sstevel@tonic-gate { 1340Sstevel@tonic-gate /* 1350Sstevel@tonic-gate * free up the map's memory 1360Sstevel@tonic-gate */ 1370Sstevel@tonic-gate map_free_entries(mtm); 1380Sstevel@tonic-gate ASSERT(map_put_debug(mtm)); 1390Sstevel@tonic-gate kmem_free(mtm->mtm_hash, 1400Sstevel@tonic-gate (size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash)); 1410Sstevel@tonic-gate mutex_destroy(&mtm->mtm_mutex); 1420Sstevel@tonic-gate mutex_destroy(&mtm->mtm_scan_mutex); 1430Sstevel@tonic-gate cv_destroy(&mtm->mtm_to_roll_cv); 1440Sstevel@tonic-gate cv_destroy(&mtm->mtm_from_roll_cv); 1450Sstevel@tonic-gate rw_destroy(&mtm->mtm_rwlock); 1460Sstevel@tonic-gate mutex_destroy(&mtm->mtm_lock); 1470Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_commit); 1480Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_next); 1490Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv_eot); 1500Sstevel@tonic-gate cv_destroy(&mtm->mtm_cv); 1510Sstevel@tonic-gate kmem_free(mtm, sizeof (mt_map_t)); 1520Sstevel@tonic-gate return (NULL); 1530Sstevel@tonic-gate } 1540Sstevel@tonic-gate /* 1550Sstevel@tonic-gate * Allocate a map; 1560Sstevel@tonic-gate */ 1570Sstevel@tonic-gate mt_map_t * 1580Sstevel@tonic-gate map_get(ml_unit_t *ul, enum maptypes maptype, int nh) 1590Sstevel@tonic-gate { 1600Sstevel@tonic-gate mt_map_t *mtm; 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * assume the map is not here and allocate the necessary structs 1640Sstevel@tonic-gate */ 1650Sstevel@tonic-gate mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP); 1660Sstevel@tonic-gate mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL); 1670Sstevel@tonic-gate mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL); 1680Sstevel@tonic-gate cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL); 1690Sstevel@tonic-gate cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL); 1700Sstevel@tonic-gate rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL); 1710Sstevel@tonic-gate mtm->mtm_next = (mapentry_t *)mtm; 1720Sstevel@tonic-gate mtm->mtm_prev = (mapentry_t *)mtm; 1730Sstevel@tonic-gate mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh), 1740Sstevel@tonic-gate KM_SLEEP); 1750Sstevel@tonic-gate mtm->mtm_nhash = nh; 1760Sstevel@tonic-gate mtm->mtm_debug = ul->un_debug; 1770Sstevel@tonic-gate mtm->mtm_type = maptype; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate mtm->mtm_cfrags = 0; 1800Sstevel@tonic-gate mtm->mtm_cfragmax = logmap_maxcfrag_commit; 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate /* 1830Sstevel@tonic-gate * for scan test 1840Sstevel@tonic-gate */ 1850Sstevel@tonic-gate mtm->mtm_ul = ul; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* 1880Sstevel@tonic-gate * Initialize locks 1890Sstevel@tonic-gate */ 1900Sstevel@tonic-gate mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL); 1910Sstevel@tonic-gate cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL); 1920Sstevel@tonic-gate cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL); 1930Sstevel@tonic-gate cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL); 1940Sstevel@tonic-gate cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL); 1950Sstevel@tonic-gate ASSERT(map_get_debug(ul, mtm)); 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate return (mtm); 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * DELTAMAP ROUTINES 2020Sstevel@tonic-gate */ 2030Sstevel@tonic-gate /* 2040Sstevel@tonic-gate * deltamap tuning constants 2050Sstevel@tonic-gate */ 2060Sstevel@tonic-gate long deltamap_maxnme = 1024; /* global so it can be set */ 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate int 2090Sstevel@tonic-gate deltamap_need_commit(mt_map_t *mtm) 2100Sstevel@tonic-gate { 2110Sstevel@tonic-gate return (mtm->mtm_nme > deltamap_maxnme); 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate /* 2150Sstevel@tonic-gate * put a delta into a deltamap; may sleep on memory 2160Sstevel@tonic-gate */ 2170Sstevel@tonic-gate void 2180Sstevel@tonic-gate deltamap_add( 2190Sstevel@tonic-gate mt_map_t *mtm, 2200Sstevel@tonic-gate offset_t mof, 2210Sstevel@tonic-gate off_t nb, 2220Sstevel@tonic-gate delta_t dtyp, 2230Sstevel@tonic-gate int (*func)(), 2240Sstevel@tonic-gate ulong_t arg, 2250Sstevel@tonic-gate threadtrans_t *tp) 2260Sstevel@tonic-gate { 2270Sstevel@tonic-gate int32_t hnb; 2280Sstevel@tonic-gate mapentry_t *me; 2290Sstevel@tonic-gate mapentry_t **mep; 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 2320Sstevel@tonic-gate map_check_linkage(mtm)); 2330Sstevel@tonic-gate 2340Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) { 2370Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 2380Sstevel@tonic-gate if (hnb > nb) 2390Sstevel@tonic-gate hnb = nb; 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * Search for dup entry. We need to ensure that we don't 2420Sstevel@tonic-gate * replace a map entry which carries quota information 2430Sstevel@tonic-gate * with a map entry which doesn't. In that case we lose 2440Sstevel@tonic-gate * reference the the dquot structure which will not be 2450Sstevel@tonic-gate * cleaned up by the push function me->me_func as this will 2460Sstevel@tonic-gate * never be called. 2470Sstevel@tonic-gate * The stray dquot would be found later by invalidatedq() 2480Sstevel@tonic-gate * causing a panic when the filesystem is unmounted. 2490Sstevel@tonic-gate */ 2500Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 2510Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) { 2520Sstevel@tonic-gate if (DATAwithinME(mof, hnb, me)) { 2530Sstevel@tonic-gate if (me->me_func == ufs_trans_push_quota) { 2540Sstevel@tonic-gate /* 2550Sstevel@tonic-gate * Don't remove quota entries which have 2560Sstevel@tonic-gate * incremented the ref count (those with a 2570Sstevel@tonic-gate * ufs_trans_push_quota push function). 2580Sstevel@tonic-gate * Let logmap_add[_buf] clean them up. 2590Sstevel@tonic-gate */ 2600Sstevel@tonic-gate continue; 2610Sstevel@tonic-gate } 2620Sstevel@tonic-gate break; 2630Sstevel@tonic-gate } 2640Sstevel@tonic-gate ASSERT((dtyp == DT_CANCEL) || 2650Sstevel@tonic-gate (!DATAoverlapME(mof, hnb, me)) || 2660Sstevel@tonic-gate MEwithinDATA(me, mof, hnb)); 2670Sstevel@tonic-gate } 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate if (me) { 2700Sstevel@tonic-gate /* already in map */ 2710Sstevel@tonic-gate continue; 2720Sstevel@tonic-gate } 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate /* 2750Sstevel@tonic-gate * Add up all the delta map deltas so we can compute 2760Sstevel@tonic-gate * an upper bound on the log size used. 2770Sstevel@tonic-gate * Note, some deltas get removed from the deltamap 2780Sstevel@tonic-gate * before the deltamap_push by lufs_write_strategy 2790Sstevel@tonic-gate * and so multiple deltas to the same mof offset 2800Sstevel@tonic-gate * don't get cancelled here but in the logmap. 2810Sstevel@tonic-gate * Thus we can't easily get a accurate count of 2820Sstevel@tonic-gate * the log space used - only an upper bound. 2830Sstevel@tonic-gate */ 2840Sstevel@tonic-gate if (tp && (mtm->mtm_ul->un_deltamap == mtm)) { 2850Sstevel@tonic-gate ASSERT(dtyp != DT_CANCEL); 2860Sstevel@tonic-gate if (dtyp == DT_ABZERO) { 2870Sstevel@tonic-gate tp->deltas_size += sizeof (struct delta); 2880Sstevel@tonic-gate } else { 2890Sstevel@tonic-gate tp->deltas_size += 2900Sstevel@tonic-gate (hnb + sizeof (struct delta)); 2910Sstevel@tonic-gate } 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate delta_stats[dtyp]++; 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate /* 2970Sstevel@tonic-gate * get a mapentry 2980Sstevel@tonic-gate * May need to drop & re-grab the mtm_mutex 2990Sstevel@tonic-gate * and then recheck for a duplicate 3000Sstevel@tonic-gate */ 3010Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP); 3020Sstevel@tonic-gate if (me == NULL) { 3030Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 3040Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP); 3050Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 3060Sstevel@tonic-gate } 3070Sstevel@tonic-gate bzero(me, sizeof (mapentry_t)); 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate /* 3100Sstevel@tonic-gate * initialize and put in deltamap 3110Sstevel@tonic-gate */ 3120Sstevel@tonic-gate me->me_mof = mof; 3130Sstevel@tonic-gate me->me_nb = hnb; 3140Sstevel@tonic-gate me->me_func = func; 3150Sstevel@tonic-gate me->me_arg = arg; 3160Sstevel@tonic-gate me->me_dt = dtyp; 3170Sstevel@tonic-gate me->me_flags = ME_HASH; 3180Sstevel@tonic-gate me->me_tid = mtm->mtm_tid; 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate me->me_hash = *mep; 3210Sstevel@tonic-gate *mep = me; 3220Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm; 3230Sstevel@tonic-gate me->me_prev = mtm->mtm_prev; 3240Sstevel@tonic-gate mtm->mtm_prev->me_next = me; 3250Sstevel@tonic-gate mtm->mtm_prev = me; 3260Sstevel@tonic-gate mtm->mtm_nme++; 3270Sstevel@tonic-gate } 3280Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 3310Sstevel@tonic-gate map_check_linkage(mtm)); 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate /* 3350Sstevel@tonic-gate * remove deltas within (mof, nb) and return as linked list 3360Sstevel@tonic-gate */ 3370Sstevel@tonic-gate mapentry_t * 3380Sstevel@tonic-gate deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb) 3390Sstevel@tonic-gate { 3400Sstevel@tonic-gate off_t hnb; 3410Sstevel@tonic-gate mapentry_t *me; 3420Sstevel@tonic-gate mapentry_t **mep; 3430Sstevel@tonic-gate mapentry_t *mer; 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate if (mtm == NULL) 3460Sstevel@tonic-gate return (NULL); 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 3490Sstevel@tonic-gate map_check_linkage(mtm)); 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 3520Sstevel@tonic-gate for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) { 3530Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 3540Sstevel@tonic-gate if (hnb > nb) 3550Sstevel@tonic-gate hnb = nb; 3560Sstevel@tonic-gate /* 3570Sstevel@tonic-gate * remove entries from hash and return as a aged linked list 3580Sstevel@tonic-gate */ 3590Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 3600Sstevel@tonic-gate while ((me = *mep) != 0) { 3610Sstevel@tonic-gate if (MEwithinDATA(me, mof, hnb)) { 3620Sstevel@tonic-gate *mep = me->me_hash; 3630Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 3640Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 3650Sstevel@tonic-gate me->me_hash = mer; 3660Sstevel@tonic-gate mer = me; 3670Sstevel@tonic-gate me->me_flags |= ME_LIST; 3680Sstevel@tonic-gate me->me_flags &= ~ME_HASH; 3690Sstevel@tonic-gate mtm->mtm_nme--; 3700Sstevel@tonic-gate } else 3710Sstevel@tonic-gate mep = &me->me_hash; 3720Sstevel@tonic-gate } 3730Sstevel@tonic-gate } 3740Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 3770Sstevel@tonic-gate map_check_linkage(mtm)); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate return (mer); 3800Sstevel@tonic-gate } 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate /* 3830Sstevel@tonic-gate * delete entries within (mof, nb) 3840Sstevel@tonic-gate */ 3850Sstevel@tonic-gate void 3860Sstevel@tonic-gate deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb) 3870Sstevel@tonic-gate { 3880Sstevel@tonic-gate mapentry_t *me; 3890Sstevel@tonic-gate mapentry_t *menext; 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate menext = deltamap_remove(mtm, mof, nb); 3920Sstevel@tonic-gate while ((me = menext) != 0) { 3930Sstevel@tonic-gate menext = me->me_hash; 3940Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 3950Sstevel@tonic-gate } 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate /* 3990Sstevel@tonic-gate * Call the indicated function to cause deltas to move to the logmap. 4000Sstevel@tonic-gate * top_end_sync() is the only caller of this function and 4010Sstevel@tonic-gate * it has waited for the completion of all threads, so there can 4020Sstevel@tonic-gate * be no other activity in the deltamap. Therefore we don't need to 4030Sstevel@tonic-gate * hold the deltamap lock. 4040Sstevel@tonic-gate */ 4050Sstevel@tonic-gate void 4060Sstevel@tonic-gate deltamap_push(ml_unit_t *ul) 4070Sstevel@tonic-gate { 4080Sstevel@tonic-gate delta_t dtyp; 4090Sstevel@tonic-gate int (*func)(); 4100Sstevel@tonic-gate ulong_t arg; 4110Sstevel@tonic-gate mapentry_t *me; 4120Sstevel@tonic-gate offset_t mof; 4130Sstevel@tonic-gate off_t nb; 4140Sstevel@tonic-gate mt_map_t *mtm = ul->un_deltamap; 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 4170Sstevel@tonic-gate map_check_linkage(mtm)); 4180Sstevel@tonic-gate 4190Sstevel@tonic-gate /* 4200Sstevel@tonic-gate * for every entry in the deltamap 4210Sstevel@tonic-gate */ 4220Sstevel@tonic-gate while ((me = mtm->mtm_next) != (mapentry_t *)mtm) { 4230Sstevel@tonic-gate ASSERT(me->me_func); 4240Sstevel@tonic-gate func = me->me_func; 4250Sstevel@tonic-gate dtyp = me->me_dt; 4260Sstevel@tonic-gate arg = me->me_arg; 4270Sstevel@tonic-gate mof = me->me_mof; 4280Sstevel@tonic-gate nb = me->me_nb; 4290Sstevel@tonic-gate if ((ul->un_flags & LDL_ERROR) || 4300Sstevel@tonic-gate (*func)(ul->un_ufsvfs, dtyp, arg)) 4310Sstevel@tonic-gate deltamap_del(mtm, mof, nb); 4320Sstevel@tonic-gate } 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 4350Sstevel@tonic-gate map_check_linkage(mtm)); 4360Sstevel@tonic-gate } 4370Sstevel@tonic-gate 4380Sstevel@tonic-gate /* 4390Sstevel@tonic-gate * LOGMAP ROUTINES 4400Sstevel@tonic-gate */ 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate int 4430Sstevel@tonic-gate logmap_need_commit(mt_map_t *mtm) 4440Sstevel@tonic-gate { 4450Sstevel@tonic-gate return ((mtm->mtm_nmet > logmap_maxnme_commit) || 4460Sstevel@tonic-gate (mtm->mtm_cfrags >= mtm->mtm_cfragmax)); 4470Sstevel@tonic-gate } 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate int 4500Sstevel@tonic-gate logmap_need_roll_async(mt_map_t *mtm) 4510Sstevel@tonic-gate { 4520Sstevel@tonic-gate return (mtm->mtm_nme > logmap_maxnme_async); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate int 4560Sstevel@tonic-gate logmap_need_roll_sync(mt_map_t *mtm) 4570Sstevel@tonic-gate { 4580Sstevel@tonic-gate return (mtm->mtm_nme > logmap_maxnme_sync); 4590Sstevel@tonic-gate } 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate void 4620Sstevel@tonic-gate logmap_start_roll(ml_unit_t *ul) 4630Sstevel@tonic-gate { 4640Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate logmap_settail(logmap, ul); 4670Sstevel@tonic-gate ASSERT(!(ul->un_flags & LDL_NOROLL)); 4680Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 4690Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) { 4700Sstevel@tonic-gate logmap->mtm_flags |= MTM_ROLL_RUNNING; 4710Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT); 4720Sstevel@tonic-gate (void) thread_create(NULL, 0, trans_roll, ul, 0, &p0, 4730Sstevel@tonic-gate TS_RUN, minclsyspri); 4740Sstevel@tonic-gate } 4750Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate void 4790Sstevel@tonic-gate logmap_kill_roll(ml_unit_t *ul) 4800Sstevel@tonic-gate { 4810Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate if (mtm == NULL) 4840Sstevel@tonic-gate return; 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate while (mtm->mtm_flags & MTM_ROLL_RUNNING) { 4890Sstevel@tonic-gate mtm->mtm_flags |= MTM_ROLL_EXIT; 4900Sstevel@tonic-gate cv_signal(&mtm->mtm_to_roll_cv); 4910Sstevel@tonic-gate cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex); 4920Sstevel@tonic-gate } 4930Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate /* 4970Sstevel@tonic-gate * kick the roll thread if it's not doing anything 4980Sstevel@tonic-gate */ 4990Sstevel@tonic-gate void 5000Sstevel@tonic-gate logmap_forceroll_nowait(mt_map_t *logmap) 5010Sstevel@tonic-gate { 5020Sstevel@tonic-gate /* 5030Sstevel@tonic-gate * Don't need to lock mtm_mutex to read mtm_flags here as we 5040Sstevel@tonic-gate * don't care in the rare case when we get a transitional value 5050Sstevel@tonic-gate * of mtm_flags. Just by signalling the thread it will wakeup 5060Sstevel@tonic-gate * and notice it has too many logmap entries. 5070Sstevel@tonic-gate */ 5080Sstevel@tonic-gate ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL)); 5090Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_ROLLING) == 0) { 5100Sstevel@tonic-gate cv_signal(&logmap->mtm_to_roll_cv); 5110Sstevel@tonic-gate } 5120Sstevel@tonic-gate } 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate /* 5150Sstevel@tonic-gate * kick the roll thread and wait for it to finish a cycle 5160Sstevel@tonic-gate */ 5170Sstevel@tonic-gate void 5180Sstevel@tonic-gate logmap_forceroll(mt_map_t *mtm) 5190Sstevel@tonic-gate { 5200Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 5210Sstevel@tonic-gate if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) { 5220Sstevel@tonic-gate mtm->mtm_flags |= MTM_FORCE_ROLL; 5230Sstevel@tonic-gate cv_signal(&mtm->mtm_to_roll_cv); 5240Sstevel@tonic-gate } 5250Sstevel@tonic-gate do { 5260Sstevel@tonic-gate if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) { 5270Sstevel@tonic-gate mtm->mtm_flags &= ~MTM_FORCE_ROLL; 5280Sstevel@tonic-gate goto out; 5290Sstevel@tonic-gate } 5300Sstevel@tonic-gate cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex); 5310Sstevel@tonic-gate } while (mtm->mtm_flags & MTM_FORCE_ROLL); 5320Sstevel@tonic-gate out: 5330Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 5340Sstevel@tonic-gate } 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate /* 5370Sstevel@tonic-gate * remove rolled deltas within (mof, nb) and free them 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate void 5400Sstevel@tonic-gate logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb) 5410Sstevel@tonic-gate { 5420Sstevel@tonic-gate int dolock = 0; 5430Sstevel@tonic-gate off_t hnb; 5440Sstevel@tonic-gate mapentry_t *me; 5450Sstevel@tonic-gate mapentry_t **mep; 5460Sstevel@tonic-gate offset_t savmof = mof; 5470Sstevel@tonic-gate off_t savnb = nb; 5480Sstevel@tonic-gate 5490Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 5500Sstevel@tonic-gate map_check_linkage(mtm)); 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate again: 5530Sstevel@tonic-gate if (dolock) 5540Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER); 5550Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 5560Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) { 5570Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 5580Sstevel@tonic-gate if (hnb > nb) 5590Sstevel@tonic-gate hnb = nb; 5600Sstevel@tonic-gate /* 5610Sstevel@tonic-gate * remove and free the rolled entries 5620Sstevel@tonic-gate */ 5630Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 5640Sstevel@tonic-gate while ((me = *mep) != 0) { 5650Sstevel@tonic-gate if ((me->me_flags & ME_ROLL) && 5660Sstevel@tonic-gate (MEwithinDATA(me, mof, hnb))) { 5670Sstevel@tonic-gate if (me->me_flags & ME_AGE) { 5680Sstevel@tonic-gate ASSERT(dolock == 0); 5690Sstevel@tonic-gate dolock = 1; 5700Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 5710Sstevel@tonic-gate mof = savmof; 5720Sstevel@tonic-gate nb = savnb; 5730Sstevel@tonic-gate goto again; 5740Sstevel@tonic-gate } 5750Sstevel@tonic-gate *mep = me->me_hash; 5760Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 5770Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 5780Sstevel@tonic-gate me->me_flags &= ~(ME_HASH|ME_ROLL); 5790Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER)); 5800Sstevel@tonic-gate mtm->mtm_nme--; 5810Sstevel@tonic-gate /* 5820Sstevel@tonic-gate * cancelled entries are handled by someone else 5830Sstevel@tonic-gate */ 5840Sstevel@tonic-gate if ((me->me_flags & ME_CANCEL) == 0) { 5850Sstevel@tonic-gate roll_stats[me->me_dt]++; 5860Sstevel@tonic-gate CRB_RELE(me); 5870Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate } else 5900Sstevel@tonic-gate mep = &me->me_hash; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 5960Sstevel@tonic-gate map_check_linkage(mtm)); 5970Sstevel@tonic-gate 5980Sstevel@tonic-gate if (dolock) 5990Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock); 6000Sstevel@tonic-gate } 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * Find the disk offset of the next delta to roll. 6040Sstevel@tonic-gate * Returns 0: no more deltas to roll or a transaction is being committed 6050Sstevel@tonic-gate * 1: a delta to roll has been found and *mofp points 6060Sstevel@tonic-gate * to the master file disk offset 6070Sstevel@tonic-gate */ 6080Sstevel@tonic-gate int 6090Sstevel@tonic-gate logmap_next_roll(mt_map_t *logmap, offset_t *mofp) 6100Sstevel@tonic-gate { 6110Sstevel@tonic-gate mapentry_t *me; 6120Sstevel@tonic-gate 6130Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) || 6140Sstevel@tonic-gate map_check_linkage(logmap)); 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 6170Sstevel@tonic-gate for (me = logmap->mtm_next; me != (mapentry_t *)logmap; 6180Sstevel@tonic-gate me = me->me_next) { 6190Sstevel@tonic-gate /* already rolled */ 6200Sstevel@tonic-gate if (me->me_flags & ME_ROLL) { 6210Sstevel@tonic-gate continue; 6220Sstevel@tonic-gate } 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate /* part of currently busy transaction; stop */ 6250Sstevel@tonic-gate if (me->me_tid == logmap->mtm_tid) { 6260Sstevel@tonic-gate break; 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate /* part of commit-in-progress transaction; stop */ 6300Sstevel@tonic-gate if (me->me_tid == logmap->mtm_committid) { 6310Sstevel@tonic-gate break; 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate 6340Sstevel@tonic-gate /* 6350Sstevel@tonic-gate * We shouldn't see a DT_CANCEL mapentry whose 6360Sstevel@tonic-gate * tid != mtm_committid, or != mtm_tid since 6370Sstevel@tonic-gate * these are removed at the end of each committed 6380Sstevel@tonic-gate * transaction. 6390Sstevel@tonic-gate */ 6400Sstevel@tonic-gate ASSERT(!(me->me_dt == DT_CANCEL)); 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate *mofp = me->me_mof; 6430Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 6440Sstevel@tonic-gate return (1); 6450Sstevel@tonic-gate } 6460Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 6470Sstevel@tonic-gate return (0); 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate /* 6510Sstevel@tonic-gate * put mapentry on sorted age list 6520Sstevel@tonic-gate */ 6530Sstevel@tonic-gate static void 6540Sstevel@tonic-gate logmap_list_age(mapentry_t **age, mapentry_t *meadd) 6550Sstevel@tonic-gate { 6560Sstevel@tonic-gate mapentry_t *me; 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST))); 6590Sstevel@tonic-gate 6600Sstevel@tonic-gate for (me = *age; me; age = &me->me_agenext, me = *age) { 6610Sstevel@tonic-gate if (me->me_age > meadd->me_age) 6620Sstevel@tonic-gate break; 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate meadd->me_agenext = me; 6650Sstevel@tonic-gate meadd->me_flags |= ME_AGE; 6660Sstevel@tonic-gate *age = meadd; 6670Sstevel@tonic-gate } 6680Sstevel@tonic-gate 6690Sstevel@tonic-gate /* 6700Sstevel@tonic-gate * get a list of deltas within <mof, mof+nb> 6710Sstevel@tonic-gate * returns with mtm_rwlock held 6720Sstevel@tonic-gate * return value says whether the entire mof range is covered by deltas 6730Sstevel@tonic-gate */ 6740Sstevel@tonic-gate int 6750Sstevel@tonic-gate logmap_list_get( 6760Sstevel@tonic-gate mt_map_t *mtm, 6770Sstevel@tonic-gate offset_t mof, 6780Sstevel@tonic-gate off_t nb, 6790Sstevel@tonic-gate mapentry_t **age) 6800Sstevel@tonic-gate { 6810Sstevel@tonic-gate off_t hnb; 6820Sstevel@tonic-gate mapentry_t *me; 6830Sstevel@tonic-gate mapentry_t **mep; 6840Sstevel@tonic-gate int rwtype = RW_READER; 6850Sstevel@tonic-gate offset_t savmof = mof; 6860Sstevel@tonic-gate off_t savnb = nb; 6870Sstevel@tonic-gate int entire = 0; 6880Sstevel@tonic-gate crb_t *crb; 6890Sstevel@tonic-gate 6900Sstevel@tonic-gate mtm->mtm_ref = 1; 6910Sstevel@tonic-gate again: 6920Sstevel@tonic-gate 6930Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 6940Sstevel@tonic-gate map_check_linkage(mtm)); 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, rwtype); 6970Sstevel@tonic-gate *age = NULL; 6980Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 6990Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) { 7000Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 7010Sstevel@tonic-gate if (hnb > nb) 7020Sstevel@tonic-gate hnb = nb; 7030Sstevel@tonic-gate /* 7040Sstevel@tonic-gate * find overlapping entries 7050Sstevel@tonic-gate */ 7060Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 7070Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) { 7080Sstevel@tonic-gate if (me->me_dt == DT_CANCEL) 7090Sstevel@tonic-gate continue; 7100Sstevel@tonic-gate if (!DATAoverlapME(mof, hnb, me)) 7110Sstevel@tonic-gate continue; 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * check if map entry is in use 7140Sstevel@tonic-gate * (about to be rolled). 7150Sstevel@tonic-gate */ 7160Sstevel@tonic-gate if (me->me_flags & ME_AGE) { 7170Sstevel@tonic-gate /* 7180Sstevel@tonic-gate * reset the age bit in the list, 7190Sstevel@tonic-gate * upgrade the lock, and try again 7200Sstevel@tonic-gate */ 7210Sstevel@tonic-gate for (me = *age; me; me = *age) { 7220Sstevel@tonic-gate *age = me->me_agenext; 7230Sstevel@tonic-gate me->me_flags &= ~ME_AGE; 7240Sstevel@tonic-gate } 7250Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 7260Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock); 7270Sstevel@tonic-gate rwtype = RW_WRITER; 7280Sstevel@tonic-gate mof = savmof; 7290Sstevel@tonic-gate nb = savnb; 7300Sstevel@tonic-gate entire = 0; 7310Sstevel@tonic-gate goto again; 7320Sstevel@tonic-gate } else { 7330Sstevel@tonic-gate /* add mapentry to age ordered list */ 7340Sstevel@tonic-gate logmap_list_age(age, me); 7350Sstevel@tonic-gate crb = me->me_crb; 7360Sstevel@tonic-gate if (crb) { 7370Sstevel@tonic-gate if (DATAwithinCRB(savmof, savnb, crb)) { 7380Sstevel@tonic-gate entire = 1; 7390Sstevel@tonic-gate } 7400Sstevel@tonic-gate } else { 7410Sstevel@tonic-gate if (DATAwithinME(savmof, savnb, me)) { 7420Sstevel@tonic-gate entire = 1; 7430Sstevel@tonic-gate } 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate } 7460Sstevel@tonic-gate } 7470Sstevel@tonic-gate } 7480Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 7490Sstevel@tonic-gate 7500Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock)); 7510Sstevel@tonic-gate return (entire); 7520Sstevel@tonic-gate } 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * Get a list of deltas for rolling - returns sucess or failure. 7560Sstevel@tonic-gate * Also return the cached roll buffer if all deltas point to it. 7570Sstevel@tonic-gate */ 7580Sstevel@tonic-gate int 7590Sstevel@tonic-gate logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp) 7600Sstevel@tonic-gate { 7610Sstevel@tonic-gate mapentry_t *me, **mep, *age = NULL; 7620Sstevel@tonic-gate crb_t *crb = NULL; 7630Sstevel@tonic-gate 7640Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock)); 7650Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) || 7660Sstevel@tonic-gate map_check_linkage(logmap)); 7670Sstevel@tonic-gate ASSERT((mof & MAPBLOCKOFF) == 0); 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate rbp->rb_crb = NULL; 7700Sstevel@tonic-gate 7710Sstevel@tonic-gate /* 7720Sstevel@tonic-gate * find overlapping entries 7730Sstevel@tonic-gate */ 7740Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 7750Sstevel@tonic-gate mep = MAP_HASH(mof, logmap); 7760Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) { 7770Sstevel@tonic-gate if (!DATAoverlapME(mof, MAPBLOCKSIZE, me)) 7780Sstevel@tonic-gate continue; 7790Sstevel@tonic-gate if (me->me_tid == logmap->mtm_tid) 7800Sstevel@tonic-gate continue; 7810Sstevel@tonic-gate if (me->me_tid == logmap->mtm_committid) 7820Sstevel@tonic-gate continue; 7830Sstevel@tonic-gate if (me->me_dt == DT_CANCEL) 7840Sstevel@tonic-gate continue; 7850Sstevel@tonic-gate 7860Sstevel@tonic-gate /* 7870Sstevel@tonic-gate * Check if map entry is in use (by lufs_read_strategy()) 7880Sstevel@tonic-gate * and if so reset the age bit in the list, 7890Sstevel@tonic-gate * upgrade the lock, and try again 7900Sstevel@tonic-gate */ 7910Sstevel@tonic-gate if (me->me_flags & ME_AGE) { 7920Sstevel@tonic-gate for (me = age; me; me = age) { 7930Sstevel@tonic-gate age = me->me_agenext; 7940Sstevel@tonic-gate me->me_flags &= ~ME_AGE; 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 7970Sstevel@tonic-gate return (1); /* failure */ 7980Sstevel@tonic-gate } else { 7990Sstevel@tonic-gate /* add mapentry to age ordered list */ 8000Sstevel@tonic-gate logmap_list_age(&age, me); 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate if (!age) { 8040Sstevel@tonic-gate goto out; 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate /* 8080Sstevel@tonic-gate * Mark the deltas as being rolled. 8090Sstevel@tonic-gate */ 8100Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) { 8110Sstevel@tonic-gate me->me_flags |= ME_ROLL; 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate 8140Sstevel@tonic-gate /* 8150Sstevel@tonic-gate * Test if all deltas are covered by one valid roll buffer 8160Sstevel@tonic-gate */ 8170Sstevel@tonic-gate crb = age->me_crb; 8180Sstevel@tonic-gate if (crb && !(crb->c_invalid)) { 8190Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) { 8200Sstevel@tonic-gate if (me->me_crb != crb) { 8210Sstevel@tonic-gate crb = NULL; 8220Sstevel@tonic-gate break; 8230Sstevel@tonic-gate } 8240Sstevel@tonic-gate } 8250Sstevel@tonic-gate rbp->rb_crb = crb; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate out: 8280Sstevel@tonic-gate rbp->rb_age = age; 8290Sstevel@tonic-gate 8300Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 8310Sstevel@tonic-gate 8320Sstevel@tonic-gate ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) || 8330Sstevel@tonic-gate logmap_logscan_debug(logmap, age)); 8340Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock)); 8350Sstevel@tonic-gate return (0); /* success */ 8360Sstevel@tonic-gate } 8370Sstevel@tonic-gate 8380Sstevel@tonic-gate void 8390Sstevel@tonic-gate logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age) 8400Sstevel@tonic-gate { 8410Sstevel@tonic-gate mapentry_t *me; 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock)); 8440Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 8450Sstevel@tonic-gate for (me = age; me; me = age) { 8460Sstevel@tonic-gate age = me->me_agenext; 8470Sstevel@tonic-gate me->me_flags &= ~ME_AGE; 8480Sstevel@tonic-gate } 8490Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 8500Sstevel@tonic-gate } 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate void 8530Sstevel@tonic-gate logmap_list_put(mt_map_t *mtm, mapentry_t *age) 8540Sstevel@tonic-gate { 8550Sstevel@tonic-gate mapentry_t *me; 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock)); 8580Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 8590Sstevel@tonic-gate for (me = age; me; me = age) { 8600Sstevel@tonic-gate age = me->me_agenext; 8610Sstevel@tonic-gate me->me_flags &= ~ME_AGE; 8620Sstevel@tonic-gate } 8630Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 8640Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock); 8650Sstevel@tonic-gate } 8660Sstevel@tonic-gate 8670Sstevel@tonic-gate #define UFS_RW_BALANCE 2 8680Sstevel@tonic-gate int ufs_rw_balance = UFS_RW_BALANCE; 8690Sstevel@tonic-gate 8700Sstevel@tonic-gate /* 8710Sstevel@tonic-gate * Check if we need to read the master. 8720Sstevel@tonic-gate * The master does not need to be read if the log deltas to the 8730Sstevel@tonic-gate * block are for one contiguous set of full disk sectors. 8740Sstevel@tonic-gate * Both cylinder group bit maps DT_CG (8K); directory entries (512B); 8750Sstevel@tonic-gate * and possibly others should not require master disk reads. 8760Sstevel@tonic-gate * Calculate the sector map for writing later. 8770Sstevel@tonic-gate */ 8780Sstevel@tonic-gate int 8790Sstevel@tonic-gate logmap_setup_read(mapentry_t *age, rollbuf_t *rbp) 8800Sstevel@tonic-gate { 8810Sstevel@tonic-gate offset_t mof; 8820Sstevel@tonic-gate crb_t *crb; 8830Sstevel@tonic-gate mapentry_t *me; 8840Sstevel@tonic-gate int32_t nb; 8850Sstevel@tonic-gate int i; 8860Sstevel@tonic-gate int start_sec, end_sec; 8870Sstevel@tonic-gate int read_needed = 0; 8880Sstevel@tonic-gate int all_inodes = 1; 8890Sstevel@tonic-gate int first_sec = INT_MAX; 8900Sstevel@tonic-gate int last_sec = -1; 8910Sstevel@tonic-gate rbsecmap_t secmap = 0; 8920Sstevel@tonic-gate 8930Sstevel@tonic-gate /* LINTED: warning: logical expression always true: op "||" */ 8940Sstevel@tonic-gate ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY)); 8950Sstevel@tonic-gate 8960Sstevel@tonic-gate for (me = age; me; me = me->me_agenext) { 8970Sstevel@tonic-gate crb = me->me_crb; 8980Sstevel@tonic-gate if (crb) { 8990Sstevel@tonic-gate nb = crb->c_nb; 9000Sstevel@tonic-gate mof = crb->c_mof; 9010Sstevel@tonic-gate } else { 9020Sstevel@tonic-gate nb = me->me_nb; 9030Sstevel@tonic-gate mof = me->me_mof; 9040Sstevel@tonic-gate } 9050Sstevel@tonic-gate 9060Sstevel@tonic-gate /* 9070Sstevel@tonic-gate * If the delta is not sector aligned then 9080Sstevel@tonic-gate * read the whole block. 9090Sstevel@tonic-gate */ 9100Sstevel@tonic-gate if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) { 9110Sstevel@tonic-gate read_needed = 1; 9120Sstevel@tonic-gate } 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate /* Set sector map used in the MAPBLOCKSIZE block. */ 9150Sstevel@tonic-gate start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT; 9160Sstevel@tonic-gate end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT); 9170Sstevel@tonic-gate for (i = start_sec; i <= end_sec; i++) { 9180Sstevel@tonic-gate secmap |= UINT16_C(1) << i; 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate if (me->me_dt != DT_INODE) { 9220Sstevel@tonic-gate all_inodes = 0; 9230Sstevel@tonic-gate } 9240Sstevel@tonic-gate if (start_sec < first_sec) { 9250Sstevel@tonic-gate first_sec = start_sec; 9260Sstevel@tonic-gate } 9270Sstevel@tonic-gate if (end_sec > last_sec) { 9280Sstevel@tonic-gate last_sec = end_sec; 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate } 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate ASSERT(secmap); 9330Sstevel@tonic-gate ASSERT(first_sec != INT_MAX); 9340Sstevel@tonic-gate ASSERT(last_sec != -1); 9350Sstevel@tonic-gate 9360Sstevel@tonic-gate if (all_inodes) { 9370Sstevel@tonic-gate /* 9380Sstevel@tonic-gate * Here we have a tradeoff choice. It must be better to 9390Sstevel@tonic-gate * do 2 writes * in the same MAPBLOCKSIZE chunk, than a 9400Sstevel@tonic-gate * read and a write. But what about 3 or more writes, versus 9410Sstevel@tonic-gate * a read+write? * Where is the cut over? It will depend on 9420Sstevel@tonic-gate * the track caching, scsi driver and other activity. 9430Sstevel@tonic-gate * A unpublished tunable is defined (ufs_rw_balance) that 9440Sstevel@tonic-gate * currently defaults to 2. 9450Sstevel@tonic-gate */ 9460Sstevel@tonic-gate if (!read_needed) { 9470Sstevel@tonic-gate int count = 0, gap = 0; 9480Sstevel@tonic-gate int sector_set; /* write needed to this sector */ 9490Sstevel@tonic-gate 9500Sstevel@tonic-gate /* Count the gaps (every 1 to 0 transation) */ 9510Sstevel@tonic-gate for (i = first_sec + 1; i < last_sec; i++) { 9520Sstevel@tonic-gate sector_set = secmap & (UINT16_C(1) << i); 9530Sstevel@tonic-gate if (!gap && !sector_set) { 9540Sstevel@tonic-gate gap = 1; 9550Sstevel@tonic-gate count++; 9560Sstevel@tonic-gate if (count > ufs_rw_balance) { 9570Sstevel@tonic-gate read_needed = 1; 9580Sstevel@tonic-gate break; 9590Sstevel@tonic-gate } 9600Sstevel@tonic-gate } else if (gap && sector_set) { 9610Sstevel@tonic-gate gap = 0; 9620Sstevel@tonic-gate } 9630Sstevel@tonic-gate } 9640Sstevel@tonic-gate } 9650Sstevel@tonic-gate 9660Sstevel@tonic-gate /* 9670Sstevel@tonic-gate * Inodes commonly make up the majority (~85%) of deltas. 9680Sstevel@tonic-gate * They cannot contain embedded user data, so its safe to 9690Sstevel@tonic-gate * read and write them all in one IO. 9700Sstevel@tonic-gate * But for directory entries, shadow inode data, and 9710Sstevel@tonic-gate * quota record data the user data fragments can be embedded 9720Sstevel@tonic-gate * betwen those metadata, and so its not safe to read, modify 9730Sstevel@tonic-gate * then write the entire range as user asynchronous user data 9740Sstevel@tonic-gate * writes could get overwritten with old data. 9750Sstevel@tonic-gate * Thus we have to create a segment map of meta data that 9760Sstevel@tonic-gate * needs to get written. 9770Sstevel@tonic-gate * 9780Sstevel@tonic-gate * If user data was logged then this issue would go away. 9790Sstevel@tonic-gate */ 9800Sstevel@tonic-gate if (read_needed) { 9810Sstevel@tonic-gate for (i = first_sec + 1; i < last_sec; i++) { 9820Sstevel@tonic-gate secmap |= (UINT16_C(1) << i); 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate } 9850Sstevel@tonic-gate } 9860Sstevel@tonic-gate rbp->rb_secmap = secmap; 9870Sstevel@tonic-gate return (read_needed); 9880Sstevel@tonic-gate } 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate /* 9910Sstevel@tonic-gate * Abort the load of a set of log map delta's. 9920Sstevel@tonic-gate * ie, 9930Sstevel@tonic-gate * Clear out all mapentries on this unit's log map 9940Sstevel@tonic-gate * which have a tid (transaction id) equal to the 9950Sstevel@tonic-gate * parameter tid. Walk the cancel list, taking everything 9960Sstevel@tonic-gate * off it, too. 9970Sstevel@tonic-gate */ 9980Sstevel@tonic-gate static void 9990Sstevel@tonic-gate logmap_abort(ml_unit_t *ul, uint32_t tid) 10000Sstevel@tonic-gate { 10010Sstevel@tonic-gate struct mt_map *mtm = ul->un_logmap; /* Log map */ 10020Sstevel@tonic-gate mapentry_t *me, 10030Sstevel@tonic-gate **mep; 10040Sstevel@tonic-gate int i; 10050Sstevel@tonic-gate 10060Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 10070Sstevel@tonic-gate map_check_linkage(mtm)); 10080Sstevel@tonic-gate 10090Sstevel@tonic-gate /* 10100Sstevel@tonic-gate * wait for any outstanding reads to finish; lock out future reads 10110Sstevel@tonic-gate */ 10120Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER); 10130Sstevel@tonic-gate 10140Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 10150Sstevel@tonic-gate /* Take everything off cancel list */ 10160Sstevel@tonic-gate while ((me = mtm->mtm_cancel) != NULL) { 10170Sstevel@tonic-gate mtm->mtm_cancel = me->me_cancel; 10180Sstevel@tonic-gate me->me_flags &= ~ME_CANCEL; 10190Sstevel@tonic-gate me->me_cancel = NULL; 10200Sstevel@tonic-gate } 10210Sstevel@tonic-gate 10220Sstevel@tonic-gate /* 10230Sstevel@tonic-gate * Now take out all mapentries with current tid, and committid 10240Sstevel@tonic-gate * as this function is called from logmap_logscan and logmap_commit 10250Sstevel@tonic-gate * When it is called from logmap_logscan mtm_tid == mtm_committid 10260Sstevel@tonic-gate * But when logmap_abort is called from logmap_commit it is 10270Sstevel@tonic-gate * because the log errored when trying to write the commit record, 10280Sstevel@tonic-gate * after the async ops have been allowed to start in top_end_sync. 10290Sstevel@tonic-gate * So we also need to remove all mapentries from the transaction whose 10300Sstevel@tonic-gate * commit failed. 10310Sstevel@tonic-gate */ 10320Sstevel@tonic-gate for (i = 0; i < mtm->mtm_nhash; i++) { 10330Sstevel@tonic-gate mep = &mtm->mtm_hash[i]; 10340Sstevel@tonic-gate while ((me = *mep) != NULL) { 10350Sstevel@tonic-gate if (me->me_tid == tid || 10360Sstevel@tonic-gate me->me_tid == mtm->mtm_committid) { 10370Sstevel@tonic-gate *mep = me->me_hash; 10380Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 10390Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 10400Sstevel@tonic-gate if (!(me->me_flags & ME_USER)) { 10410Sstevel@tonic-gate mtm->mtm_nme--; 10420Sstevel@tonic-gate } 10430Sstevel@tonic-gate CRB_RELE(me); 10440Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 10450Sstevel@tonic-gate continue; 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate mep = &me->me_hash; 10480Sstevel@tonic-gate } 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate 10510Sstevel@tonic-gate if (!(ul->un_flags & LDL_SCAN)) 10520Sstevel@tonic-gate mtm->mtm_flags |= MTM_CANCELED; 10530Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 10540Sstevel@tonic-gate mtm->mtm_dirty = 0; 10550Sstevel@tonic-gate mtm->mtm_nmet = 0; 10560Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock); 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 10590Sstevel@tonic-gate map_check_linkage(mtm)); 10600Sstevel@tonic-gate } 10610Sstevel@tonic-gate 10620Sstevel@tonic-gate static void 10630Sstevel@tonic-gate logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me) 10640Sstevel@tonic-gate { 10650Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 10660Sstevel@tonic-gate 10670Sstevel@tonic-gate while (!ldl_has_space(ul, me)) { 10680Sstevel@tonic-gate ASSERT(!(ul->un_flags & LDL_NOROLL)); 10690Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 10700Sstevel@tonic-gate logmap_forceroll(mtm); 10710Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 10720Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 10730Sstevel@tonic-gate break; 10740Sstevel@tonic-gate } 10750Sstevel@tonic-gate 10760Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 10770Sstevel@tonic-gate } 10780Sstevel@tonic-gate 10790Sstevel@tonic-gate /* 10800Sstevel@tonic-gate * put a list of deltas into a logmap 10810Sstevel@tonic-gate * If va == NULL, don't write to the log. 10820Sstevel@tonic-gate */ 10830Sstevel@tonic-gate void 10840Sstevel@tonic-gate logmap_add( 10850Sstevel@tonic-gate ml_unit_t *ul, 10860Sstevel@tonic-gate char *va, /* Ptr to buf w/deltas & data */ 10870Sstevel@tonic-gate offset_t vamof, /* Offset on master of buf start */ 10880Sstevel@tonic-gate mapentry_t *melist) /* Entries to add */ 10890Sstevel@tonic-gate { 10900Sstevel@tonic-gate offset_t mof; 10910Sstevel@tonic-gate off_t nb; 10920Sstevel@tonic-gate mapentry_t *me; 10930Sstevel@tonic-gate mapentry_t **mep; 10940Sstevel@tonic-gate mapentry_t **savmep; 10950Sstevel@tonic-gate uint32_t tid; 10960Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 10970Sstevel@tonic-gate 10980Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 10990Sstevel@tonic-gate if (va) 11000Sstevel@tonic-gate logmap_wait_space(mtm, ul, melist); 11010Sstevel@tonic-gate 11020Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 11030Sstevel@tonic-gate map_check_linkage(mtm)); 11040Sstevel@tonic-gate 11050Sstevel@tonic-gate mtm->mtm_ref = 1; 11060Sstevel@tonic-gate mtm->mtm_dirty++; 11070Sstevel@tonic-gate tid = mtm->mtm_tid; 11080Sstevel@tonic-gate while (melist) { 11090Sstevel@tonic-gate mof = melist->me_mof; 11100Sstevel@tonic-gate nb = melist->me_nb; 11110Sstevel@tonic-gate 11120Sstevel@tonic-gate /* 11130Sstevel@tonic-gate * search for overlaping entries 11140Sstevel@tonic-gate */ 11150Sstevel@tonic-gate savmep = mep = MAP_HASH(mof, mtm); 11160Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 11170Sstevel@tonic-gate while ((me = *mep) != 0) { 11180Sstevel@tonic-gate /* 11190Sstevel@tonic-gate * Data consumes old map entry; cancel map entry. 11200Sstevel@tonic-gate * Take care when we replace an old map entry 11210Sstevel@tonic-gate * which carries quota information with a newer entry 11220Sstevel@tonic-gate * which does not. In that case the push function 11230Sstevel@tonic-gate * would not be called to clean up the dquot structure. 11240Sstevel@tonic-gate * This would be found later by invalidatedq() causing 11250Sstevel@tonic-gate * a panic when the filesystem in unmounted. 11260Sstevel@tonic-gate * We clean up the dquot manually and then replace 11270Sstevel@tonic-gate * the map entry. 11280Sstevel@tonic-gate */ 11290Sstevel@tonic-gate if (MEwithinDATA(me, mof, nb) && 11300Sstevel@tonic-gate ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) { 11310Sstevel@tonic-gate if (tid == me->me_tid && 11320Sstevel@tonic-gate ((me->me_flags & ME_AGE) == 0)) { 11330Sstevel@tonic-gate *mep = me->me_hash; 11340Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 11350Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 11360Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER)); 11370Sstevel@tonic-gate mtm->mtm_nme--; 11380Sstevel@tonic-gate /* 11390Sstevel@tonic-gate * Special case if the mapentry 11400Sstevel@tonic-gate * carries a dquot and a push function. 11410Sstevel@tonic-gate * We have to clean up the quota info 11420Sstevel@tonic-gate * before replacing the mapentry. 11430Sstevel@tonic-gate */ 11440Sstevel@tonic-gate if (me->me_dt == DT_QR) 11450Sstevel@tonic-gate HANDLE_DQUOT(me, melist); 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 11480Sstevel@tonic-gate continue; 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel; 11510Sstevel@tonic-gate mtm->mtm_cancel = me; 11520Sstevel@tonic-gate me->me_flags |= ME_CANCEL; 11530Sstevel@tonic-gate } 11540Sstevel@tonic-gate mep = &(*mep)->me_hash; 11550Sstevel@tonic-gate } 11560Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 11570Sstevel@tonic-gate 11580Sstevel@tonic-gate /* 11590Sstevel@tonic-gate * remove from list 11600Sstevel@tonic-gate */ 11610Sstevel@tonic-gate me = melist; 11620Sstevel@tonic-gate melist = melist->me_hash; 11630Sstevel@tonic-gate me->me_flags &= ~ME_LIST; 11640Sstevel@tonic-gate /* 11650Sstevel@tonic-gate * If va != NULL, put in the log. 11660Sstevel@tonic-gate */ 11670Sstevel@tonic-gate if (va) 11680Sstevel@tonic-gate ldl_write(ul, va, vamof, me); 11690Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 11700Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 11710Sstevel@tonic-gate continue; 11720Sstevel@tonic-gate } 11730Sstevel@tonic-gate ASSERT((va == NULL) || 11740Sstevel@tonic-gate ((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) || 11750Sstevel@tonic-gate map_check_ldl_write(ul, va, vamof, me)); 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate /* 11780Sstevel@tonic-gate * put on hash 11790Sstevel@tonic-gate */ 11800Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 11810Sstevel@tonic-gate me->me_hash = *savmep; 11820Sstevel@tonic-gate *savmep = me; 11830Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm; 11840Sstevel@tonic-gate me->me_prev = mtm->mtm_prev; 11850Sstevel@tonic-gate mtm->mtm_prev->me_next = me; 11860Sstevel@tonic-gate mtm->mtm_prev = me; 11870Sstevel@tonic-gate me->me_flags |= ME_HASH; 11880Sstevel@tonic-gate me->me_tid = tid; 11890Sstevel@tonic-gate me->me_age = mtm->mtm_age++; 11900Sstevel@tonic-gate mtm->mtm_nme++; 11910Sstevel@tonic-gate mtm->mtm_nmet++; 11920Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 11930Sstevel@tonic-gate } 11940Sstevel@tonic-gate 11950Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 11960Sstevel@tonic-gate map_check_linkage(mtm)); 11970Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 11980Sstevel@tonic-gate } 11990Sstevel@tonic-gate 12000Sstevel@tonic-gate /* 12010Sstevel@tonic-gate * Add the delta(s) into the log. 12020Sstevel@tonic-gate * Create one cached roll buffer logmap entry, and reference count the 12030Sstevel@tonic-gate * number of mapentries refering to it. 12040Sstevel@tonic-gate * Cancel previous logmap entries. 12050Sstevel@tonic-gate * logmap_add is tolerant of failure to allocate a cached roll buffer. 12060Sstevel@tonic-gate */ 12070Sstevel@tonic-gate void 12080Sstevel@tonic-gate logmap_add_buf( 12090Sstevel@tonic-gate ml_unit_t *ul, 12100Sstevel@tonic-gate char *va, /* Ptr to buf w/deltas & data */ 12110Sstevel@tonic-gate offset_t bufmof, /* Offset on master of buf start */ 12120Sstevel@tonic-gate mapentry_t *melist, /* Entries to add */ 12130Sstevel@tonic-gate caddr_t buf, /* Buffer containing delta(s) */ 12140Sstevel@tonic-gate uint32_t bufsz) /* Size of buf */ 12150Sstevel@tonic-gate { 12160Sstevel@tonic-gate offset_t mof; 12170Sstevel@tonic-gate offset_t vamof = bufmof + (va - buf); 12180Sstevel@tonic-gate off_t nb; 12190Sstevel@tonic-gate mapentry_t *me; 12200Sstevel@tonic-gate mapentry_t **mep; 12210Sstevel@tonic-gate mapentry_t **savmep; 12220Sstevel@tonic-gate uint32_t tid; 12230Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 12240Sstevel@tonic-gate crb_t *crb; 12250Sstevel@tonic-gate crb_t *crbsav = NULL; 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate ASSERT((bufsz & DEV_BMASK) == 0); 12280Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 12290Sstevel@tonic-gate logmap_wait_space(mtm, ul, melist); 12300Sstevel@tonic-gate 12310Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 12320Sstevel@tonic-gate map_check_linkage(mtm)); 12330Sstevel@tonic-gate 12340Sstevel@tonic-gate mtm->mtm_ref = 1; 12350Sstevel@tonic-gate mtm->mtm_dirty++; 12360Sstevel@tonic-gate tid = mtm->mtm_tid; 12370Sstevel@tonic-gate while (melist) { 12380Sstevel@tonic-gate mof = melist->me_mof; 12390Sstevel@tonic-gate nb = melist->me_nb; 12400Sstevel@tonic-gate 12410Sstevel@tonic-gate /* 12420Sstevel@tonic-gate * search for overlapping entries 12430Sstevel@tonic-gate */ 12440Sstevel@tonic-gate savmep = mep = MAP_HASH(mof, mtm); 12450Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 12460Sstevel@tonic-gate while ((me = *mep) != 0) { 12470Sstevel@tonic-gate /* 12480Sstevel@tonic-gate * Data consumes old map entry; cancel map entry. 12490Sstevel@tonic-gate * Take care when we replace an old map entry 12500Sstevel@tonic-gate * which carries quota information with a newer entry 12510Sstevel@tonic-gate * which does not. In that case the push function 12520Sstevel@tonic-gate * would not be called to clean up the dquot structure. 12530Sstevel@tonic-gate * This would be found later by invalidatedq() causing 12540Sstevel@tonic-gate * a panic when the filesystem in unmounted. 12550Sstevel@tonic-gate * We clean up the dquot manually and then replace 12560Sstevel@tonic-gate * the map entry. 12570Sstevel@tonic-gate */ 12580Sstevel@tonic-gate crb = me->me_crb; 12590Sstevel@tonic-gate if (MEwithinDATA(me, mof, nb) && 12600Sstevel@tonic-gate ((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) { 12610Sstevel@tonic-gate if (tid == me->me_tid && 12620Sstevel@tonic-gate ((me->me_flags & ME_AGE) == 0)) { 12630Sstevel@tonic-gate *mep = me->me_hash; 12640Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 12650Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 12660Sstevel@tonic-gate ASSERT(!(me->me_flags & ME_USER)); 12670Sstevel@tonic-gate mtm->mtm_nme--; 12680Sstevel@tonic-gate /* 12690Sstevel@tonic-gate * Special case if the mapentry 12700Sstevel@tonic-gate * carries a dquot and a push function. 12710Sstevel@tonic-gate * We have to clean up the quota info 12720Sstevel@tonic-gate * before replacing the mapentry. 12730Sstevel@tonic-gate */ 12740Sstevel@tonic-gate if (me->me_dt == DT_QR) 12750Sstevel@tonic-gate HANDLE_DQUOT(me, melist); 12760Sstevel@tonic-gate 12770Sstevel@tonic-gate /* 12780Sstevel@tonic-gate * If this soon to be deleted mapentry 12790Sstevel@tonic-gate * has a suitable roll buffer then 12800Sstevel@tonic-gate * re-use it. 12810Sstevel@tonic-gate */ 12820Sstevel@tonic-gate if (crb && (--crb->c_refcnt == 0)) { 12830Sstevel@tonic-gate if (crbsav || 12840Sstevel@tonic-gate (crb->c_nb != bufsz)) { 12850Sstevel@tonic-gate CRB_FREE(crb, me); 12860Sstevel@tonic-gate } else { 12870Sstevel@tonic-gate bcopy(buf, crb->c_buf, 12880Sstevel@tonic-gate bufsz); 12890Sstevel@tonic-gate crb->c_invalid = 0; 12900Sstevel@tonic-gate crb->c_mof = bufmof; 12910Sstevel@tonic-gate crbsav = crb; 12920Sstevel@tonic-gate me->me_crb = NULL; 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate } 12950Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 12960Sstevel@tonic-gate continue; 12970Sstevel@tonic-gate } 12980Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel; 12990Sstevel@tonic-gate mtm->mtm_cancel = me; 13000Sstevel@tonic-gate me->me_flags |= ME_CANCEL; 13010Sstevel@tonic-gate } 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate /* 13040Sstevel@tonic-gate * Inode deltas within the same fs block come 13050Sstevel@tonic-gate * in individually as separate calls to logmap_add(). 13060Sstevel@tonic-gate * All others come in as one call. So check for an 13070Sstevel@tonic-gate * existing entry where we can re-use the crb. 13080Sstevel@tonic-gate */ 13090Sstevel@tonic-gate if ((me->me_dt == DT_INODE) && (tid == me->me_tid) && 13100Sstevel@tonic-gate !crbsav && crb && 13110Sstevel@tonic-gate WITHIN(mof, nb, crb->c_mof, crb->c_nb)) { 13120Sstevel@tonic-gate ASSERT(crb->c_mof == bufmof); 13130Sstevel@tonic-gate ASSERT(crb->c_nb == bufsz); 13140Sstevel@tonic-gate bcopy(buf, crb->c_buf, bufsz); 13150Sstevel@tonic-gate crbsav = crb; 13160Sstevel@tonic-gate } 13170Sstevel@tonic-gate mep = &(*mep)->me_hash; 13180Sstevel@tonic-gate } 13190Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 13200Sstevel@tonic-gate 13210Sstevel@tonic-gate /* 13220Sstevel@tonic-gate * If we don't already have a crb then allocate one 13230Sstevel@tonic-gate * and copy the incoming buffer. Only do this once 13240Sstevel@tonic-gate * for all the incoming deltas. 13250Sstevel@tonic-gate */ 13260Sstevel@tonic-gate if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) { 13270Sstevel@tonic-gate /* 13280Sstevel@tonic-gate * Only use a cached roll buffer if we 13290Sstevel@tonic-gate * have enough memory, and check for failures. 13300Sstevel@tonic-gate */ 13310Sstevel@tonic-gate if (((ufs_crb_size + bufsz) < ufs_crb_limit) && 13320Sstevel@tonic-gate (kmem_avail() > bufsz)) { 13330Sstevel@tonic-gate crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP); 13340Sstevel@tonic-gate } else { 13350Sstevel@tonic-gate ufs_crb_alloc_fails++; 13360Sstevel@tonic-gate } 13370Sstevel@tonic-gate if (crbsav) { 13380Sstevel@tonic-gate crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP); 13390Sstevel@tonic-gate if (crbsav->c_buf) { 13400Sstevel@tonic-gate atomic_add_64(&ufs_crb_size, 13410Sstevel@tonic-gate (uint64_t)bufsz); 13420Sstevel@tonic-gate if (ufs_crb_size > ufs_crb_max_size) { 13430Sstevel@tonic-gate ufs_crb_max_size = ufs_crb_size; 13440Sstevel@tonic-gate } 13450Sstevel@tonic-gate bcopy(buf, crbsav->c_buf, bufsz); 13460Sstevel@tonic-gate crbsav->c_nb = bufsz; 13470Sstevel@tonic-gate crbsav->c_refcnt = 0; 13480Sstevel@tonic-gate crbsav->c_invalid = 0; 13490Sstevel@tonic-gate ASSERT((bufmof & DEV_BMASK) == 0); 13500Sstevel@tonic-gate crbsav->c_mof = bufmof; 13510Sstevel@tonic-gate } else { 13520Sstevel@tonic-gate kmem_free(crbsav, sizeof (crb_t)); 13530Sstevel@tonic-gate crbsav = NULL; 13540Sstevel@tonic-gate } 13550Sstevel@tonic-gate } 13560Sstevel@tonic-gate } 13570Sstevel@tonic-gate 13580Sstevel@tonic-gate /* 13590Sstevel@tonic-gate * remove from list 13600Sstevel@tonic-gate */ 13610Sstevel@tonic-gate me = melist; 13620Sstevel@tonic-gate melist = melist->me_hash; 13630Sstevel@tonic-gate me->me_flags &= ~ME_LIST; 13640Sstevel@tonic-gate me->me_crb = crbsav; 13650Sstevel@tonic-gate if (crbsav) { 13660Sstevel@tonic-gate crbsav->c_refcnt++; 13670Sstevel@tonic-gate } 13680Sstevel@tonic-gate crbsav = NULL; 13690Sstevel@tonic-gate 13700Sstevel@tonic-gate ASSERT(va); 13710Sstevel@tonic-gate ldl_write(ul, va, vamof, me); /* add to on-disk log */ 13720Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 13730Sstevel@tonic-gate CRB_RELE(me); 13740Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 13750Sstevel@tonic-gate continue; 13760Sstevel@tonic-gate } 13770Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) || 13780Sstevel@tonic-gate map_check_ldl_write(ul, va, vamof, me)); 13790Sstevel@tonic-gate 13800Sstevel@tonic-gate /* 13810Sstevel@tonic-gate * put on hash 13820Sstevel@tonic-gate */ 13830Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 13840Sstevel@tonic-gate me->me_hash = *savmep; 13850Sstevel@tonic-gate *savmep = me; 13860Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm; 13870Sstevel@tonic-gate me->me_prev = mtm->mtm_prev; 13880Sstevel@tonic-gate mtm->mtm_prev->me_next = me; 13890Sstevel@tonic-gate mtm->mtm_prev = me; 13900Sstevel@tonic-gate me->me_flags |= ME_HASH; 13910Sstevel@tonic-gate me->me_tid = tid; 13920Sstevel@tonic-gate me->me_age = mtm->mtm_age++; 13930Sstevel@tonic-gate mtm->mtm_nme++; 13940Sstevel@tonic-gate mtm->mtm_nmet++; 13950Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 13960Sstevel@tonic-gate } 13970Sstevel@tonic-gate 13980Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 13990Sstevel@tonic-gate map_check_linkage(mtm)); 14000Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 14010Sstevel@tonic-gate } 14020Sstevel@tonic-gate 14030Sstevel@tonic-gate /* 14040Sstevel@tonic-gate * free up any cancelled deltas 14050Sstevel@tonic-gate */ 14060Sstevel@tonic-gate void 14070Sstevel@tonic-gate logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead) 14080Sstevel@tonic-gate { 14090Sstevel@tonic-gate int dolock = 0; 14100Sstevel@tonic-gate mapentry_t *me; 14110Sstevel@tonic-gate mapentry_t **mep; 14120Sstevel@tonic-gate 14130Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 14140Sstevel@tonic-gate map_check_linkage(mtm)); 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate again: 14170Sstevel@tonic-gate if (dolock) 14180Sstevel@tonic-gate rw_enter(&mtm->mtm_rwlock, RW_WRITER); 14190Sstevel@tonic-gate 14200Sstevel@tonic-gate /* 14210Sstevel@tonic-gate * At EOT, cancel the indicated deltas 14220Sstevel@tonic-gate */ 14230Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 14240Sstevel@tonic-gate if (mtm->mtm_flags & MTM_CANCELED) { 14250Sstevel@tonic-gate mtm->mtm_flags &= ~MTM_CANCELED; 14260Sstevel@tonic-gate ASSERT(dolock == 0); 14270Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 14280Sstevel@tonic-gate return; 14290Sstevel@tonic-gate } 14300Sstevel@tonic-gate 14310Sstevel@tonic-gate while ((me = *cancelhead) != NULL) { 14320Sstevel@tonic-gate /* 14330Sstevel@tonic-gate * roll forward or read collision; wait and try again 14340Sstevel@tonic-gate */ 14350Sstevel@tonic-gate if (me->me_flags & ME_AGE) { 14360Sstevel@tonic-gate ASSERT(dolock == 0); 14370Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 14380Sstevel@tonic-gate dolock = 1; 14390Sstevel@tonic-gate goto again; 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate /* 14420Sstevel@tonic-gate * remove from cancel list 14430Sstevel@tonic-gate */ 14440Sstevel@tonic-gate *cancelhead = me->me_cancel; 14450Sstevel@tonic-gate me->me_cancel = NULL; 14460Sstevel@tonic-gate me->me_flags &= ~(ME_CANCEL); 14470Sstevel@tonic-gate 14480Sstevel@tonic-gate /* 14490Sstevel@tonic-gate * logmap_remove_roll handles ME_ROLL entries later 14500Sstevel@tonic-gate * we leave them around for logmap_iscancel 14510Sstevel@tonic-gate * XXX is this necessary? 14520Sstevel@tonic-gate */ 14530Sstevel@tonic-gate if (me->me_flags & ME_ROLL) 14540Sstevel@tonic-gate continue; 14550Sstevel@tonic-gate 14560Sstevel@tonic-gate /* 14570Sstevel@tonic-gate * remove from hash (if necessary) 14580Sstevel@tonic-gate */ 14590Sstevel@tonic-gate if (me->me_flags & ME_HASH) { 14600Sstevel@tonic-gate mep = MAP_HASH(me->me_mof, mtm); 14610Sstevel@tonic-gate while (*mep) { 14620Sstevel@tonic-gate if (*mep == me) { 14630Sstevel@tonic-gate *mep = me->me_hash; 14640Sstevel@tonic-gate me->me_next->me_prev = me->me_prev; 14650Sstevel@tonic-gate me->me_prev->me_next = me->me_next; 14660Sstevel@tonic-gate me->me_flags &= ~(ME_HASH); 14670Sstevel@tonic-gate if (!(me->me_flags & ME_USER)) { 14680Sstevel@tonic-gate mtm->mtm_nme--; 14690Sstevel@tonic-gate } 14700Sstevel@tonic-gate break; 14710Sstevel@tonic-gate } else 14720Sstevel@tonic-gate mep = &(*mep)->me_hash; 14730Sstevel@tonic-gate } 14740Sstevel@tonic-gate } 14750Sstevel@tonic-gate /* 14760Sstevel@tonic-gate * put the entry on the free list 14770Sstevel@tonic-gate */ 14780Sstevel@tonic-gate CRB_RELE(me); 14790Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 14800Sstevel@tonic-gate } 14810Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 14820Sstevel@tonic-gate if (dolock) 14830Sstevel@tonic-gate rw_exit(&mtm->mtm_rwlock); 14840Sstevel@tonic-gate 14850Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 14860Sstevel@tonic-gate map_check_linkage(mtm)); 14870Sstevel@tonic-gate } 14880Sstevel@tonic-gate 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate void 14910Sstevel@tonic-gate logmap_commit(ml_unit_t *ul, uint32_t tid) 14920Sstevel@tonic-gate { 14930Sstevel@tonic-gate mapentry_t me; 14940Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 14950Sstevel@tonic-gate 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 14980Sstevel@tonic-gate 14990Sstevel@tonic-gate /* 15000Sstevel@tonic-gate * async'ly write a commit rec into the log 15010Sstevel@tonic-gate */ 15020Sstevel@tonic-gate if (mtm->mtm_dirty) { 15030Sstevel@tonic-gate /* 15040Sstevel@tonic-gate * put commit record into log 15050Sstevel@tonic-gate */ 15060Sstevel@tonic-gate me.me_mof = mtm->mtm_tid; 15070Sstevel@tonic-gate me.me_dt = DT_COMMIT; 15080Sstevel@tonic-gate me.me_nb = 0; 15090Sstevel@tonic-gate me.me_hash = NULL; 15100Sstevel@tonic-gate logmap_wait_space(mtm, ul, &me); 15110Sstevel@tonic-gate ldl_write(ul, NULL, (offset_t)0, &me); 15120Sstevel@tonic-gate ldl_round_commit(ul); 15130Sstevel@tonic-gate 15140Sstevel@tonic-gate /* 15150Sstevel@tonic-gate * abort on error; else reset dirty flag 15160Sstevel@tonic-gate */ 15170Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 15180Sstevel@tonic-gate logmap_abort(ul, tid); 15190Sstevel@tonic-gate else { 15200Sstevel@tonic-gate mtm->mtm_dirty = 0; 15210Sstevel@tonic-gate mtm->mtm_nmet = 0; 15220Sstevel@tonic-gate mtm->mtm_cfrags = 0; 15230Sstevel@tonic-gate } 15240Sstevel@tonic-gate /* push commit */ 15250Sstevel@tonic-gate ldl_push_commit(ul); 15260Sstevel@tonic-gate } 15270Sstevel@tonic-gate } 15280Sstevel@tonic-gate 15290Sstevel@tonic-gate void 15300Sstevel@tonic-gate logmap_sethead(mt_map_t *mtm, ml_unit_t *ul) 15310Sstevel@tonic-gate { 15320Sstevel@tonic-gate off_t lof; 15330Sstevel@tonic-gate uint32_t tid; 15340Sstevel@tonic-gate mapentry_t *me; 15350Sstevel@tonic-gate 15360Sstevel@tonic-gate /* 15370Sstevel@tonic-gate * move the head forward so the log knows how full it is 15380Sstevel@tonic-gate * Make sure to skip any mapentry whose me_lof is 0, these 15390Sstevel@tonic-gate * are just place holders for DT_CANCELED freed user blocks 15400Sstevel@tonic-gate * for the current moby. 15410Sstevel@tonic-gate */ 15420Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 15430Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 15440Sstevel@tonic-gate me = mtm->mtm_next; 15450Sstevel@tonic-gate while (me != (mapentry_t *)mtm && me->me_lof == 0) { 15460Sstevel@tonic-gate me = me->me_next; 15470Sstevel@tonic-gate } 15480Sstevel@tonic-gate 15490Sstevel@tonic-gate if (me == (mapentry_t *)mtm) 15500Sstevel@tonic-gate lof = -1; 15510Sstevel@tonic-gate else { 15520Sstevel@tonic-gate lof = me->me_lof; 15530Sstevel@tonic-gate tid = me->me_tid; 15540Sstevel@tonic-gate } 15550Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 15560Sstevel@tonic-gate ldl_sethead(ul, lof, tid); 15570Sstevel@tonic-gate if (lof == -1) 15580Sstevel@tonic-gate mtm->mtm_age = 0; 15590Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 15600Sstevel@tonic-gate } 15610Sstevel@tonic-gate 15620Sstevel@tonic-gate void 15630Sstevel@tonic-gate logmap_settail(mt_map_t *mtm, ml_unit_t *ul) 15640Sstevel@tonic-gate { 15650Sstevel@tonic-gate off_t lof; 15660Sstevel@tonic-gate size_t nb; 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * set the tail after the logmap_abort 15700Sstevel@tonic-gate */ 15710Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 15720Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 15730Sstevel@tonic-gate if (mtm->mtm_prev == (mapentry_t *)mtm) 15740Sstevel@tonic-gate lof = -1; 15750Sstevel@tonic-gate else { 15760Sstevel@tonic-gate /* 15770Sstevel@tonic-gate * set the tail to the end of the last commit 15780Sstevel@tonic-gate */ 15790Sstevel@tonic-gate lof = mtm->mtm_tail_lof; 15800Sstevel@tonic-gate nb = mtm->mtm_tail_nb; 15810Sstevel@tonic-gate } 15820Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 15830Sstevel@tonic-gate ldl_settail(ul, lof, nb); 15840Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 15850Sstevel@tonic-gate } 15860Sstevel@tonic-gate 15870Sstevel@tonic-gate /* 15880Sstevel@tonic-gate * when reseting a device; roll the log until every 15890Sstevel@tonic-gate * delta has been rolled forward 15900Sstevel@tonic-gate */ 15910Sstevel@tonic-gate void 15920Sstevel@tonic-gate logmap_roll_dev(ml_unit_t *ul) 15930Sstevel@tonic-gate { 15940Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 15950Sstevel@tonic-gate mapentry_t *me; 15960Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 15970Sstevel@tonic-gate 15980Sstevel@tonic-gate again: 15990Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 16000Sstevel@tonic-gate map_check_linkage(mtm)); 16010Sstevel@tonic-gate if (ul->un_flags & (LDL_ERROR|LDL_NOROLL)) 16020Sstevel@tonic-gate return; 16030Sstevel@tonic-gate 16040Sstevel@tonic-gate /* 16050Sstevel@tonic-gate * look for deltas 16060Sstevel@tonic-gate */ 16070Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 16080Sstevel@tonic-gate for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) { 16090Sstevel@tonic-gate if (me->me_flags & ME_ROLL) 16100Sstevel@tonic-gate break; 16110Sstevel@tonic-gate if (me->me_tid == mtm->mtm_tid) 16120Sstevel@tonic-gate continue; 16130Sstevel@tonic-gate if (me->me_tid == mtm->mtm_committid) 16140Sstevel@tonic-gate continue; 16150Sstevel@tonic-gate break; 16160Sstevel@tonic-gate } 16170Sstevel@tonic-gate 16180Sstevel@tonic-gate /* 16190Sstevel@tonic-gate * found a delta; kick the roll thread 16200Sstevel@tonic-gate * but only if the thread is running... (jmh) 16210Sstevel@tonic-gate */ 16220Sstevel@tonic-gate if (me != (mapentry_t *)mtm) { 16230Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 16240Sstevel@tonic-gate logmap_forceroll(mtm); 16250Sstevel@tonic-gate goto again; 16260Sstevel@tonic-gate } 16270Sstevel@tonic-gate 16280Sstevel@tonic-gate /* 16290Sstevel@tonic-gate * no more deltas, return 16300Sstevel@tonic-gate */ 16310Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 16320Sstevel@tonic-gate (void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs); 16330Sstevel@tonic-gate 16340Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 16350Sstevel@tonic-gate map_check_linkage(mtm)); 16360Sstevel@tonic-gate } 16370Sstevel@tonic-gate 16380Sstevel@tonic-gate static void 16390Sstevel@tonic-gate logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata) 16400Sstevel@tonic-gate { 16410Sstevel@tonic-gate mapentry_t *me; 16420Sstevel@tonic-gate mapentry_t **mep; 16430Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 16440Sstevel@tonic-gate int frags; 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate /* 16470Sstevel@tonic-gate * map has been referenced and is dirty 16480Sstevel@tonic-gate */ 16490Sstevel@tonic-gate mtm->mtm_ref = 1; 16500Sstevel@tonic-gate mtm->mtm_dirty++; 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate /* 16530Sstevel@tonic-gate * get a mapentry 16540Sstevel@tonic-gate */ 16550Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP); 16560Sstevel@tonic-gate bzero(me, sizeof (mapentry_t)); 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate /* 16590Sstevel@tonic-gate * initialize cancel record and put in logmap 16600Sstevel@tonic-gate */ 16610Sstevel@tonic-gate me->me_mof = mof; 16620Sstevel@tonic-gate me->me_nb = nb; 16630Sstevel@tonic-gate me->me_dt = DT_CANCEL; 16640Sstevel@tonic-gate me->me_tid = mtm->mtm_tid; 16650Sstevel@tonic-gate me->me_hash = NULL; 16660Sstevel@tonic-gate 16670Sstevel@tonic-gate /* 16680Sstevel@tonic-gate * Write delta to log if this delta is for metadata. If this is not 16690Sstevel@tonic-gate * metadata it is user data and we are just putting a cancel 16700Sstevel@tonic-gate * mapentry into the hash to cancel a user block deletion 16710Sstevel@tonic-gate * in which we do not want the block to be allocated 16720Sstevel@tonic-gate * within this moby. This cancel entry will prevent the block from 16730Sstevel@tonic-gate * being allocated within the moby and prevent user data corruption 16740Sstevel@tonic-gate * if we happen to crash before this moby is committed. 16750Sstevel@tonic-gate */ 16760Sstevel@tonic-gate mutex_enter(&ul->un_log_mutex); 16770Sstevel@tonic-gate if (metadata) { 16780Sstevel@tonic-gate logmap_wait_space(mtm, ul, me); 16790Sstevel@tonic-gate ldl_write(ul, NULL, (offset_t)0, me); 16800Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 16810Sstevel@tonic-gate kmem_cache_free(mapentry_cache, me); 16820Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 16830Sstevel@tonic-gate return; 16840Sstevel@tonic-gate } 16850Sstevel@tonic-gate } 16860Sstevel@tonic-gate 16870Sstevel@tonic-gate /* 16880Sstevel@tonic-gate * put in hash and on cancel list 16890Sstevel@tonic-gate */ 16900Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 16910Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 16920Sstevel@tonic-gate me->me_age = mtm->mtm_age++; 16930Sstevel@tonic-gate me->me_hash = *mep; 16940Sstevel@tonic-gate *mep = me; 16950Sstevel@tonic-gate me->me_next = (mapentry_t *)mtm; 16960Sstevel@tonic-gate me->me_prev = mtm->mtm_prev; 16970Sstevel@tonic-gate mtm->mtm_prev->me_next = me; 16980Sstevel@tonic-gate mtm->mtm_prev = me; 16990Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel; 17000Sstevel@tonic-gate mtm->mtm_cancel = me; 17010Sstevel@tonic-gate if (metadata) { 17020Sstevel@tonic-gate mtm->mtm_nme++; 17030Sstevel@tonic-gate mtm->mtm_nmet++; 17040Sstevel@tonic-gate } else { 17050Sstevel@tonic-gate me->me_flags = ME_USER; 17060Sstevel@tonic-gate } 17070Sstevel@tonic-gate me->me_flags |= (ME_HASH|ME_CANCEL); 17080Sstevel@tonic-gate if (!(metadata)) { 17090Sstevel@tonic-gate frags = blkoff(ul->un_ufsvfs->vfs_fs, nb); 17100Sstevel@tonic-gate if (frags) 17110Sstevel@tonic-gate mtm->mtm_cfrags += numfrags(ul->un_ufsvfs->vfs_fs, 17120Sstevel@tonic-gate frags); 17130Sstevel@tonic-gate } 17140Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 17150Sstevel@tonic-gate 17160Sstevel@tonic-gate mutex_exit(&ul->un_log_mutex); 17170Sstevel@tonic-gate } 17180Sstevel@tonic-gate 17190Sstevel@tonic-gate /* 17200Sstevel@tonic-gate * cancel entries in a logmap (entries are freed at EOT) 17210Sstevel@tonic-gate */ 17220Sstevel@tonic-gate void 17230Sstevel@tonic-gate logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata) 17240Sstevel@tonic-gate { 17250Sstevel@tonic-gate int32_t hnb; 17260Sstevel@tonic-gate mapentry_t *me; 17270Sstevel@tonic-gate mapentry_t **mep; 17280Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 17290Sstevel@tonic-gate crb_t *crb; 17300Sstevel@tonic-gate 17310Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 17320Sstevel@tonic-gate map_check_linkage(mtm)); 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) { 17350Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 17360Sstevel@tonic-gate if (hnb > nb) 17370Sstevel@tonic-gate hnb = nb; 17380Sstevel@tonic-gate /* 1739*2040Sjkennedy * Find overlapping metadata entries. Don't search through 1740*2040Sjkennedy * the hash chains if this is user data because it is only 1741*2040Sjkennedy * possible to have overlapping map entries for metadata, 1742*2040Sjkennedy * and the search can become expensive for large files. 17430Sstevel@tonic-gate */ 1744*2040Sjkennedy if (metadata) { 1745*2040Sjkennedy mep = MAP_HASH(mof, mtm); 1746*2040Sjkennedy mutex_enter(&mtm->mtm_mutex); 1747*2040Sjkennedy for (me = *mep; me; me = me->me_hash) { 1748*2040Sjkennedy if (!DATAoverlapME(mof, hnb, me)) 1749*2040Sjkennedy continue; 17500Sstevel@tonic-gate 1751*2040Sjkennedy ASSERT(MEwithinDATA(me, mof, hnb)); 17520Sstevel@tonic-gate 1753*2040Sjkennedy if ((me->me_flags & ME_CANCEL) == 0) { 1754*2040Sjkennedy me->me_cancel = mtm->mtm_cancel; 1755*2040Sjkennedy mtm->mtm_cancel = me; 1756*2040Sjkennedy me->me_flags |= ME_CANCEL; 1757*2040Sjkennedy crb = me->me_crb; 1758*2040Sjkennedy if (crb) { 1759*2040Sjkennedy crb->c_invalid = 1; 1760*2040Sjkennedy } 17610Sstevel@tonic-gate } 17620Sstevel@tonic-gate } 1763*2040Sjkennedy mutex_exit(&mtm->mtm_mutex); 17640Sstevel@tonic-gate } 17650Sstevel@tonic-gate 17660Sstevel@tonic-gate /* 17670Sstevel@tonic-gate * put a cancel record into the log 17680Sstevel@tonic-gate */ 17690Sstevel@tonic-gate logmap_cancel_delta(ul, mof, hnb, metadata); 17700Sstevel@tonic-gate } 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) || 17730Sstevel@tonic-gate map_check_linkage(mtm)); 17740Sstevel@tonic-gate } 17750Sstevel@tonic-gate 17760Sstevel@tonic-gate /* 17770Sstevel@tonic-gate * check for overlap w/cancel delta 17780Sstevel@tonic-gate */ 17790Sstevel@tonic-gate int 17800Sstevel@tonic-gate logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb) 17810Sstevel@tonic-gate { 17820Sstevel@tonic-gate off_t hnb; 17830Sstevel@tonic-gate mapentry_t *me; 17840Sstevel@tonic-gate mapentry_t **mep; 17850Sstevel@tonic-gate 17860Sstevel@tonic-gate mutex_enter(&mtm->mtm_mutex); 17870Sstevel@tonic-gate for (hnb = 0; nb; nb -= hnb, mof += hnb) { 17880Sstevel@tonic-gate hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF); 17890Sstevel@tonic-gate if (hnb > nb) 17900Sstevel@tonic-gate hnb = nb; 17910Sstevel@tonic-gate /* 17920Sstevel@tonic-gate * search for dup entry 17930Sstevel@tonic-gate */ 17940Sstevel@tonic-gate mep = MAP_HASH(mof, mtm); 17950Sstevel@tonic-gate for (me = *mep; me; me = me->me_hash) { 17960Sstevel@tonic-gate if (((me->me_flags & ME_ROLL) == 0) && 17970Sstevel@tonic-gate (me->me_dt != DT_CANCEL)) 17980Sstevel@tonic-gate continue; 17990Sstevel@tonic-gate if (DATAoverlapME(mof, hnb, me)) 18000Sstevel@tonic-gate break; 18010Sstevel@tonic-gate } 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate /* 18040Sstevel@tonic-gate * overlap detected 18050Sstevel@tonic-gate */ 18060Sstevel@tonic-gate if (me) { 18070Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 18080Sstevel@tonic-gate return (1); 18090Sstevel@tonic-gate } 18100Sstevel@tonic-gate } 18110Sstevel@tonic-gate mutex_exit(&mtm->mtm_mutex); 18120Sstevel@tonic-gate return (0); 18130Sstevel@tonic-gate } 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate static int 18160Sstevel@tonic-gate logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp) 18170Sstevel@tonic-gate { 18180Sstevel@tonic-gate mapentry_t *me; 18190Sstevel@tonic-gate int error; 18200Sstevel@tonic-gate mt_map_t *mtm = ul->un_logmap; 18210Sstevel@tonic-gate 18220Sstevel@tonic-gate /* 18230Sstevel@tonic-gate * verify delta header; failure == mediafail 18240Sstevel@tonic-gate */ 18250Sstevel@tonic-gate error = 0; 18260Sstevel@tonic-gate /* delta type */ 18270Sstevel@tonic-gate if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX)) 18280Sstevel@tonic-gate error = EINVAL; 18290Sstevel@tonic-gate if (dp->d_typ == DT_COMMIT) { 18300Sstevel@tonic-gate if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1)) 18310Sstevel@tonic-gate error = EINVAL; 18320Sstevel@tonic-gate } else { 18330Sstevel@tonic-gate /* length of delta */ 18340Sstevel@tonic-gate if ((dp->d_nb < INT32_C(0)) || 18350Sstevel@tonic-gate (dp->d_nb > INT32_C(MAPBLOCKSIZE))) 18360Sstevel@tonic-gate error = EINVAL; 18370Sstevel@tonic-gate 18380Sstevel@tonic-gate /* offset on master device */ 18390Sstevel@tonic-gate if (dp->d_mof < INT64_C(0)) 18400Sstevel@tonic-gate error = EINVAL; 18410Sstevel@tonic-gate } 18420Sstevel@tonic-gate 18430Sstevel@tonic-gate if (error) { 18440Sstevel@tonic-gate ldl_seterror(ul, "Error processing ufs log data during scan"); 18450Sstevel@tonic-gate return (error); 18460Sstevel@tonic-gate } 18470Sstevel@tonic-gate 18480Sstevel@tonic-gate /* 18490Sstevel@tonic-gate * process commit record 18500Sstevel@tonic-gate */ 18510Sstevel@tonic-gate if (dp->d_typ == DT_COMMIT) { 18520Sstevel@tonic-gate if (mtm->mtm_dirty) { 18530Sstevel@tonic-gate ASSERT(dp->d_nb == INT32_C(0)); 18540Sstevel@tonic-gate logmap_free_cancel(mtm, &mtm->mtm_cancel); 18550Sstevel@tonic-gate mtm->mtm_dirty = 0; 18560Sstevel@tonic-gate mtm->mtm_nmet = 0; 18570Sstevel@tonic-gate mtm->mtm_tid++; 18580Sstevel@tonic-gate mtm->mtm_committid = mtm->mtm_tid; 18590Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) || 18600Sstevel@tonic-gate logmap_logscan_commit_debug(lof, mtm)); 18610Sstevel@tonic-gate } 18620Sstevel@tonic-gate /* 18630Sstevel@tonic-gate * return #bytes to next sector (next delta header) 18640Sstevel@tonic-gate */ 18650Sstevel@tonic-gate *nbp = ldl_logscan_nbcommit(lof); 18660Sstevel@tonic-gate mtm->mtm_tail_lof = lof; 18670Sstevel@tonic-gate mtm->mtm_tail_nb = *nbp; 18680Sstevel@tonic-gate return (0); 18690Sstevel@tonic-gate } 18700Sstevel@tonic-gate 18710Sstevel@tonic-gate /* 18720Sstevel@tonic-gate * add delta to logmap 18730Sstevel@tonic-gate */ 18740Sstevel@tonic-gate me = kmem_cache_alloc(mapentry_cache, KM_SLEEP); 18750Sstevel@tonic-gate bzero(me, sizeof (mapentry_t)); 18760Sstevel@tonic-gate me->me_lof = lof; 18770Sstevel@tonic-gate me->me_mof = dp->d_mof; 18780Sstevel@tonic-gate me->me_nb = dp->d_nb; 18790Sstevel@tonic-gate me->me_tid = mtm->mtm_tid; 18800Sstevel@tonic-gate me->me_dt = dp->d_typ; 18810Sstevel@tonic-gate me->me_hash = NULL; 18820Sstevel@tonic-gate me->me_flags = (ME_LIST | ME_SCAN); 18830Sstevel@tonic-gate logmap_add(ul, NULL, 0, me); 18840Sstevel@tonic-gate switch (dp->d_typ) { 18850Sstevel@tonic-gate case DT_CANCEL: 18860Sstevel@tonic-gate me->me_flags |= ME_CANCEL; 18870Sstevel@tonic-gate me->me_cancel = mtm->mtm_cancel; 18880Sstevel@tonic-gate mtm->mtm_cancel = me; 18890Sstevel@tonic-gate break; 18900Sstevel@tonic-gate default: 18910Sstevel@tonic-gate ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) || 18920Sstevel@tonic-gate logmap_logscan_add_debug(dp, mtm)); 18930Sstevel@tonic-gate break; 18940Sstevel@tonic-gate } 18950Sstevel@tonic-gate 18960Sstevel@tonic-gate sizeofdelta: 18970Sstevel@tonic-gate /* 18980Sstevel@tonic-gate * return #bytes till next delta header 18990Sstevel@tonic-gate */ 19000Sstevel@tonic-gate if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO)) 19010Sstevel@tonic-gate *nbp = 0; 19020Sstevel@tonic-gate else 19030Sstevel@tonic-gate *nbp = dp->d_nb; 19040Sstevel@tonic-gate return (0); 19050Sstevel@tonic-gate } 19060Sstevel@tonic-gate 19070Sstevel@tonic-gate void 19080Sstevel@tonic-gate logmap_logscan(ml_unit_t *ul) 19090Sstevel@tonic-gate { 19100Sstevel@tonic-gate size_t nb, nbd; 19110Sstevel@tonic-gate off_t lof; 19120Sstevel@tonic-gate struct delta delta; 19130Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap); 19160Sstevel@tonic-gate 19170Sstevel@tonic-gate /* 19180Sstevel@tonic-gate * prepare the log for a logscan 19190Sstevel@tonic-gate */ 19200Sstevel@tonic-gate ldl_logscan_begin(ul); 19210Sstevel@tonic-gate 19220Sstevel@tonic-gate /* 19230Sstevel@tonic-gate * prepare the logmap for a logscan 19240Sstevel@tonic-gate */ 19250Sstevel@tonic-gate (void) map_free_entries(logmap); 19260Sstevel@tonic-gate logmap->mtm_tid = 0; 19270Sstevel@tonic-gate logmap->mtm_committid = UINT32_C(0); 19280Sstevel@tonic-gate logmap->mtm_age = 0; 19290Sstevel@tonic-gate logmap->mtm_dirty = 0; 19300Sstevel@tonic-gate logmap->mtm_ref = 0; 19310Sstevel@tonic-gate 19320Sstevel@tonic-gate /* 19330Sstevel@tonic-gate * while not at end of log 19340Sstevel@tonic-gate * read delta header 19350Sstevel@tonic-gate * add to logmap 19360Sstevel@tonic-gate * seek to beginning of next delta 19370Sstevel@tonic-gate */ 19380Sstevel@tonic-gate lof = ul->un_head_lof; 19390Sstevel@tonic-gate nbd = sizeof (delta); 19400Sstevel@tonic-gate while (lof != ul->un_tail_lof) { 19410Sstevel@tonic-gate 19420Sstevel@tonic-gate /* read delta header */ 19430Sstevel@tonic-gate if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta)) 19440Sstevel@tonic-gate break; 19450Sstevel@tonic-gate 19460Sstevel@tonic-gate /* add to logmap */ 19470Sstevel@tonic-gate if (logmap_logscan_add(ul, &delta, lof, &nb)) 19480Sstevel@tonic-gate break; 19490Sstevel@tonic-gate 19500Sstevel@tonic-gate /* seek to next header (skip data) */ 19510Sstevel@tonic-gate if (ldl_logscan_read(ul, &lof, nb, NULL)) 19520Sstevel@tonic-gate break; 19530Sstevel@tonic-gate } 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate /* 19560Sstevel@tonic-gate * remove the last partial transaction from the logmap 19570Sstevel@tonic-gate */ 19580Sstevel@tonic-gate logmap_abort(ul, logmap->mtm_tid); 19590Sstevel@tonic-gate 19600Sstevel@tonic-gate ldl_logscan_end(ul); 19610Sstevel@tonic-gate } 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate void 19640Sstevel@tonic-gate _init_map(void) 19650Sstevel@tonic-gate { 19660Sstevel@tonic-gate /* 19670Sstevel@tonic-gate * Initialise the mapentry cache. No constructor or deconstructor 19680Sstevel@tonic-gate * is needed. Also no reclaim function is supplied as reclaiming 19690Sstevel@tonic-gate * current entries is not possible. 19700Sstevel@tonic-gate */ 19710Sstevel@tonic-gate mapentry_cache = kmem_cache_create("lufs_mapentry_cache", 19720Sstevel@tonic-gate sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 19730Sstevel@tonic-gate } 19740Sstevel@tonic-gate 19750Sstevel@tonic-gate /* 19760Sstevel@tonic-gate * Special case when we replace an old map entry which carries quota 19770Sstevel@tonic-gate * information with a newer entry which does not. 19780Sstevel@tonic-gate * In that case the push function would not be called to clean up the 19790Sstevel@tonic-gate * dquot structure. This would be found later by invalidatedq() causing 19800Sstevel@tonic-gate * a panic when the filesystem in unmounted. 19810Sstevel@tonic-gate * We clean up the dquot manually before replacing the map entry. 19820Sstevel@tonic-gate */ 19830Sstevel@tonic-gate void 19840Sstevel@tonic-gate handle_dquot(mapentry_t *me) 19850Sstevel@tonic-gate { 19860Sstevel@tonic-gate int dolock = 0; 19870Sstevel@tonic-gate int domutex = 0; 19880Sstevel@tonic-gate struct dquot *dqp; 19890Sstevel@tonic-gate 19900Sstevel@tonic-gate dqp = (struct dquot *)me->me_arg; 19910Sstevel@tonic-gate 19920Sstevel@tonic-gate /* 19930Sstevel@tonic-gate * We need vfs_dqrwlock to call dqput() 19940Sstevel@tonic-gate */ 19950Sstevel@tonic-gate dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock)); 19960Sstevel@tonic-gate if (dolock) 19970Sstevel@tonic-gate rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER); 19980Sstevel@tonic-gate 19990Sstevel@tonic-gate domutex = (!MUTEX_HELD(&dqp->dq_lock)); 20000Sstevel@tonic-gate if (domutex) 20010Sstevel@tonic-gate mutex_enter(&dqp->dq_lock); 20020Sstevel@tonic-gate 20030Sstevel@tonic-gate /* 20040Sstevel@tonic-gate * Only clean up if the dquot is referenced 20050Sstevel@tonic-gate */ 20060Sstevel@tonic-gate if (dqp->dq_cnt == 0) { 20070Sstevel@tonic-gate if (domutex) 20080Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 20090Sstevel@tonic-gate if (dolock) 20100Sstevel@tonic-gate rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock); 20110Sstevel@tonic-gate return; 20120Sstevel@tonic-gate } 20130Sstevel@tonic-gate 20140Sstevel@tonic-gate dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS); 20150Sstevel@tonic-gate dqput(dqp); 20160Sstevel@tonic-gate 20170Sstevel@tonic-gate if (domutex) 20180Sstevel@tonic-gate mutex_exit(&dqp->dq_lock); 20190Sstevel@tonic-gate 20200Sstevel@tonic-gate if (dolock) 20210Sstevel@tonic-gate rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock); 20220Sstevel@tonic-gate 20230Sstevel@tonic-gate } 2024