10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*3247Sgjelinek * Common Development and Distribution License (the "License"). 6*3247Sgjelinek * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*3247Sgjelinek * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 270Sstevel@tonic-gate /* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 310Sstevel@tonic-gate * The Regents of the University of California 320Sstevel@tonic-gate * All Rights Reserved 330Sstevel@tonic-gate * 340Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 350Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 360Sstevel@tonic-gate * contributors. 370Sstevel@tonic-gate */ 380Sstevel@tonic-gate 390Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 400Sstevel@tonic-gate 410Sstevel@tonic-gate /* 420Sstevel@tonic-gate * VM - segment management. 430Sstevel@tonic-gate */ 440Sstevel@tonic-gate 450Sstevel@tonic-gate #include <sys/types.h> 460Sstevel@tonic-gate #include <sys/inttypes.h> 470Sstevel@tonic-gate #include <sys/t_lock.h> 480Sstevel@tonic-gate #include <sys/param.h> 490Sstevel@tonic-gate #include <sys/systm.h> 500Sstevel@tonic-gate #include <sys/kmem.h> 510Sstevel@tonic-gate #include <sys/vmsystm.h> 520Sstevel@tonic-gate #include <sys/debug.h> 530Sstevel@tonic-gate #include <sys/cmn_err.h> 540Sstevel@tonic-gate #include <sys/callb.h> 550Sstevel@tonic-gate #include <sys/mem_config.h> 56*3247Sgjelinek #include <sys/mman.h> 570Sstevel@tonic-gate 580Sstevel@tonic-gate #include <vm/hat.h> 590Sstevel@tonic-gate #include <vm/as.h> 600Sstevel@tonic-gate #include <vm/seg.h> 610Sstevel@tonic-gate #include <vm/seg_kmem.h> 62*3247Sgjelinek #include <vm/seg_spt.h> 63*3247Sgjelinek #include <vm/seg_vn.h> 640Sstevel@tonic-gate /* 650Sstevel@tonic-gate * kstats for segment advise 660Sstevel@tonic-gate */ 670Sstevel@tonic-gate segadvstat_t segadvstat = { 680Sstevel@tonic-gate { "MADV_FREE_hit", KSTAT_DATA_ULONG }, 690Sstevel@tonic-gate { "MADV_FREE_miss", KSTAT_DATA_ULONG }, 700Sstevel@tonic-gate }; 710Sstevel@tonic-gate 720Sstevel@tonic-gate kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat; 730Sstevel@tonic-gate uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t); 740Sstevel@tonic-gate 750Sstevel@tonic-gate /* #define PDEBUG */ 760Sstevel@tonic-gate #if defined(PDEBUG) || defined(lint) || defined(__lint) 770Sstevel@tonic-gate int pdebug = 0; 780Sstevel@tonic-gate #else 790Sstevel@tonic-gate #define pdebug 0 800Sstevel@tonic-gate #endif /* PDEBUG */ 810Sstevel@tonic-gate 820Sstevel@tonic-gate #define PPRINTF if (pdebug) printf 830Sstevel@tonic-gate #define PPRINT(x) PPRINTF(x) 840Sstevel@tonic-gate #define PPRINT1(x, a) PPRINTF(x, a) 850Sstevel@tonic-gate #define PPRINT2(x, a, b) PPRINTF(x, a, b) 860Sstevel@tonic-gate #define PPRINT3(x, a, b, c) PPRINTF(x, a, b, c) 870Sstevel@tonic-gate #define PPRINT4(x, a, b, c, d) PPRINTF(x, a, b, c, d) 880Sstevel@tonic-gate #define PPRINT5(x, a, b, c, d, e) PPRINTF(x, a, b, c, d, e) 890Sstevel@tonic-gate 900Sstevel@tonic-gate #define P_HASHMASK (p_hashsize - 1) 910Sstevel@tonic-gate #define P_BASESHIFT 6 920Sstevel@tonic-gate 930Sstevel@tonic-gate /* 940Sstevel@tonic-gate * entry in the segment page cache 950Sstevel@tonic-gate */ 960Sstevel@tonic-gate struct seg_pcache { 970Sstevel@tonic-gate struct seg_pcache *p_hnext; /* list for hashed blocks */ 980Sstevel@tonic-gate struct seg_pcache *p_hprev; 990Sstevel@tonic-gate int p_active; /* active count */ 1000Sstevel@tonic-gate int p_ref; /* ref bit */ 1010Sstevel@tonic-gate size_t p_len; /* segment length */ 1020Sstevel@tonic-gate caddr_t p_addr; /* base address */ 1030Sstevel@tonic-gate struct seg *p_seg; /* segment */ 1040Sstevel@tonic-gate struct page **p_pp; /* pp shadow list */ 1050Sstevel@tonic-gate enum seg_rw p_rw; /* rw */ 1060Sstevel@tonic-gate uint_t p_flags; /* bit flags */ 1070Sstevel@tonic-gate int (*p_callback)(struct seg *, caddr_t, size_t, 1080Sstevel@tonic-gate struct page **, enum seg_rw); 1090Sstevel@tonic-gate }; 1100Sstevel@tonic-gate 1110Sstevel@tonic-gate struct seg_phash { 1120Sstevel@tonic-gate struct seg_pcache *p_hnext; /* list for hashed blocks */ 1130Sstevel@tonic-gate struct seg_pcache *p_hprev; 1140Sstevel@tonic-gate int p_qlen; /* Q length */ 1150Sstevel@tonic-gate kmutex_t p_hmutex; /* protects hash bucket */ 1160Sstevel@tonic-gate }; 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate static int seg_preap_time = 20; /* reclaim every 20 secs */ 1190Sstevel@tonic-gate static int seg_pmaxqlen = 5; /* max Q length in hash list */ 1200Sstevel@tonic-gate static int seg_ppcount = 5; /* max # of purges per reclaim interval */ 1210Sstevel@tonic-gate static int seg_plazy = 1; /* if 1, pages are cached after pageunlock */ 1220Sstevel@tonic-gate static pgcnt_t seg_pwindow; /* max # of pages that can be cached */ 1230Sstevel@tonic-gate static pgcnt_t seg_plocked; /* # of pages which are cached by pagelock */ 1240Sstevel@tonic-gate static pgcnt_t seg_plocked_window; /* # pages from window */ 1250Sstevel@tonic-gate int seg_preapahead; 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate static uint_t seg_pdisable = 0; /* if not 0, caching temporarily disabled */ 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate static int seg_pupdate_active = 1; /* background reclaim thread */ 1300Sstevel@tonic-gate static clock_t seg_preap_interval; /* reap interval in ticks */ 1310Sstevel@tonic-gate 1320Sstevel@tonic-gate static kmutex_t seg_pcache; /* protects the whole pagelock cache */ 1330Sstevel@tonic-gate static kmutex_t seg_pmem; /* protects window counter */ 1340Sstevel@tonic-gate static ksema_t seg_psaync_sem; /* sema for reclaim thread */ 1350Sstevel@tonic-gate static struct seg_phash *p_hashtab; 1360Sstevel@tonic-gate static int p_hashsize = 0; 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate #define p_hash(seg) \ 1390Sstevel@tonic-gate (P_HASHMASK & \ 1400Sstevel@tonic-gate ((uintptr_t)(seg) >> P_BASESHIFT)) 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate #define p_match(pcp, seg, addr, len, rw) \ 1430Sstevel@tonic-gate (((pcp)->p_seg == (seg) && \ 1440Sstevel@tonic-gate (pcp)->p_addr == (addr) && \ 1450Sstevel@tonic-gate (pcp)->p_rw == (rw) && \ 1460Sstevel@tonic-gate (pcp)->p_len == (len)) ? 1 : 0) 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate #define p_match_pp(pcp, seg, addr, len, pp, rw) \ 1490Sstevel@tonic-gate (((pcp)->p_seg == (seg) && \ 1500Sstevel@tonic-gate (pcp)->p_addr == (addr) && \ 1510Sstevel@tonic-gate (pcp)->p_pp == (pp) && \ 1520Sstevel@tonic-gate (pcp)->p_rw == (rw) && \ 1530Sstevel@tonic-gate (pcp)->p_len == (len)) ? 1 : 0) 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate /* 1570Sstevel@tonic-gate * lookup an address range in pagelock cache. Return shadow list 1580Sstevel@tonic-gate * and bump up active count. 1590Sstevel@tonic-gate */ 1600Sstevel@tonic-gate struct page ** 1610Sstevel@tonic-gate seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1620Sstevel@tonic-gate { 1630Sstevel@tonic-gate struct seg_pcache *pcp; 1640Sstevel@tonic-gate struct seg_phash *hp; 1650Sstevel@tonic-gate 1660Sstevel@tonic-gate /* 1670Sstevel@tonic-gate * Skip pagelock cache, while DR is in progress or 1680Sstevel@tonic-gate * seg_pcache is off. 1690Sstevel@tonic-gate */ 1700Sstevel@tonic-gate if (seg_pdisable || seg_plazy == 0) { 1710Sstevel@tonic-gate return (NULL); 1720Sstevel@tonic-gate } 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 1750Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 1760Sstevel@tonic-gate for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 1770Sstevel@tonic-gate pcp = pcp->p_hnext) { 1780Sstevel@tonic-gate if (p_match(pcp, seg, addr, len, rw)) { 1790Sstevel@tonic-gate pcp->p_active++; 1800Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate PPRINT5("seg_plookup hit: seg %p, addr %p, " 1830Sstevel@tonic-gate "len %lx, count %d, pplist %p \n", 1840Sstevel@tonic-gate (void *)seg, (void *)addr, len, pcp->p_active, 1850Sstevel@tonic-gate (void *)pcp->p_pp); 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate return (pcp->p_pp); 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 1910Sstevel@tonic-gate 1920Sstevel@tonic-gate PPRINT("seg_plookup miss:\n"); 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate return (NULL); 1950Sstevel@tonic-gate } 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate /* 1980Sstevel@tonic-gate * mark address range inactive. If the cache is off or the address 1990Sstevel@tonic-gate * range is not in the cache we call the segment driver to reclaim 2000Sstevel@tonic-gate * the pages. Otherwise just decrement active count and set ref bit. 2010Sstevel@tonic-gate */ 2020Sstevel@tonic-gate void 2030Sstevel@tonic-gate seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 2040Sstevel@tonic-gate enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t, 2050Sstevel@tonic-gate struct page **, enum seg_rw)) 2060Sstevel@tonic-gate { 2070Sstevel@tonic-gate struct seg_pcache *pcp; 2080Sstevel@tonic-gate struct seg_phash *hp; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if (seg_plazy == 0) { 2110Sstevel@tonic-gate (void) (*callback)(seg, addr, len, pp, rw); 2120Sstevel@tonic-gate return; 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 2150Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 2160Sstevel@tonic-gate for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 2170Sstevel@tonic-gate pcp = pcp->p_hnext) { 2180Sstevel@tonic-gate if (p_match_pp(pcp, seg, addr, len, pp, rw)) { 2190Sstevel@tonic-gate pcp->p_active--; 2200Sstevel@tonic-gate ASSERT(pcp->p_active >= 0); 2210Sstevel@tonic-gate if (pcp->p_active == 0 && seg_pdisable) { 2220Sstevel@tonic-gate int npages; 2230Sstevel@tonic-gate 2240Sstevel@tonic-gate ASSERT(callback == pcp->p_callback); 2250Sstevel@tonic-gate /* free the entry */ 2260Sstevel@tonic-gate hp->p_qlen--; 2270Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 2280Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 2290Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 2300Sstevel@tonic-gate npages = pcp->p_len >> PAGESHIFT; 2310Sstevel@tonic-gate mutex_enter(&seg_pmem); 2320Sstevel@tonic-gate seg_plocked -= npages; 2330Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 2340Sstevel@tonic-gate seg_plocked_window -= npages; 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate mutex_exit(&seg_pmem); 2370Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 2380Sstevel@tonic-gate goto out; 2390Sstevel@tonic-gate } 2400Sstevel@tonic-gate pcp->p_ref = 1; 2410Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 2420Sstevel@tonic-gate return; 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate } 2450Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 2460Sstevel@tonic-gate out: 2470Sstevel@tonic-gate (void) (*callback)(seg, addr, len, pp, rw); 2480Sstevel@tonic-gate } 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate /* 2510Sstevel@tonic-gate * The seg_pinsert_check() is used by segment drivers to predict whether 2520Sstevel@tonic-gate * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing. 2530Sstevel@tonic-gate */ 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate int 2560Sstevel@tonic-gate seg_pinsert_check(struct seg *seg, size_t len, uint_t flags) 2570Sstevel@tonic-gate { 2580Sstevel@tonic-gate struct seg_phash *hp; 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate if (seg_plazy == 0) { 2610Sstevel@tonic-gate return (SEGP_FAIL); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate if (seg_pdisable != 0) { 2640Sstevel@tonic-gate return (SEGP_FAIL); 2650Sstevel@tonic-gate } 2660Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 2670Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 2680Sstevel@tonic-gate if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 2690Sstevel@tonic-gate return (SEGP_FAIL); 2700Sstevel@tonic-gate } 2710Sstevel@tonic-gate /* 2720Sstevel@tonic-gate * If the SEGP_FORCE_WIRED flag is set, 2730Sstevel@tonic-gate * we skip the check for seg_pwindow. 2740Sstevel@tonic-gate */ 2750Sstevel@tonic-gate if ((flags & SEGP_FORCE_WIRED) == 0) { 2760Sstevel@tonic-gate pgcnt_t npages; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate npages = len >> PAGESHIFT; 2790Sstevel@tonic-gate if ((seg_plocked_window + npages) > seg_pwindow) { 2800Sstevel@tonic-gate return (SEGP_FAIL); 2810Sstevel@tonic-gate } 2820Sstevel@tonic-gate } 2830Sstevel@tonic-gate return (SEGP_SUCCESS); 2840Sstevel@tonic-gate } 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate /* 2880Sstevel@tonic-gate * insert address range with shadow list into pagelock cache. If 2890Sstevel@tonic-gate * the cache is off or caching is temporarily disabled or the allowed 2900Sstevel@tonic-gate * 'window' is exceeded - return SEGP_FAIL. Otherwise return 2910Sstevel@tonic-gate * SEGP_SUCCESS. 2920Sstevel@tonic-gate */ 2930Sstevel@tonic-gate int 2940Sstevel@tonic-gate seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 2950Sstevel@tonic-gate enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t, 2960Sstevel@tonic-gate size_t, struct page **, enum seg_rw)) 2970Sstevel@tonic-gate { 2980Sstevel@tonic-gate struct seg_pcache *pcp; 2990Sstevel@tonic-gate struct seg_phash *hp; 3000Sstevel@tonic-gate pgcnt_t npages; 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate if (seg_plazy == 0) { 3030Sstevel@tonic-gate return (SEGP_FAIL); 3040Sstevel@tonic-gate } 3050Sstevel@tonic-gate if (seg_pdisable != 0) { 3060Sstevel@tonic-gate return (SEGP_FAIL); 3070Sstevel@tonic-gate } 3080Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 3090Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 3100Sstevel@tonic-gate if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 3110Sstevel@tonic-gate return (SEGP_FAIL); 3120Sstevel@tonic-gate } 3130Sstevel@tonic-gate npages = len >> PAGESHIFT; 3140Sstevel@tonic-gate mutex_enter(&seg_pmem); 3150Sstevel@tonic-gate /* 3160Sstevel@tonic-gate * If the SEGP_FORCE_WIRED flag is set, 3170Sstevel@tonic-gate * we skip the check for seg_pwindow. 3180Sstevel@tonic-gate */ 3190Sstevel@tonic-gate if ((flags & SEGP_FORCE_WIRED) == 0) { 3200Sstevel@tonic-gate seg_plocked_window += npages; 3210Sstevel@tonic-gate if (seg_plocked_window > seg_pwindow) { 3220Sstevel@tonic-gate seg_plocked_window -= npages; 3230Sstevel@tonic-gate mutex_exit(&seg_pmem); 3240Sstevel@tonic-gate return (SEGP_FAIL); 3250Sstevel@tonic-gate } 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate seg_plocked += npages; 3280Sstevel@tonic-gate mutex_exit(&seg_pmem); 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP); 3310Sstevel@tonic-gate pcp->p_seg = seg; 3320Sstevel@tonic-gate pcp->p_addr = addr; 3330Sstevel@tonic-gate pcp->p_len = len; 3340Sstevel@tonic-gate pcp->p_pp = pp; 3350Sstevel@tonic-gate pcp->p_rw = rw; 3360Sstevel@tonic-gate pcp->p_callback = callback; 3370Sstevel@tonic-gate pcp->p_active = 1; 3380Sstevel@tonic-gate pcp->p_flags = flags; 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n", 3410Sstevel@tonic-gate (void *)seg, (void *)addr, len, (void *)pp); 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 3440Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 3450Sstevel@tonic-gate hp->p_qlen++; 3460Sstevel@tonic-gate pcp->p_hnext = hp->p_hnext; 3470Sstevel@tonic-gate pcp->p_hprev = (struct seg_pcache *)hp; 3480Sstevel@tonic-gate hp->p_hnext->p_hprev = pcp; 3490Sstevel@tonic-gate hp->p_hnext = pcp; 3500Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 3510Sstevel@tonic-gate return (SEGP_SUCCESS); 3520Sstevel@tonic-gate } 3530Sstevel@tonic-gate 3540Sstevel@tonic-gate /* 3550Sstevel@tonic-gate * purge all entries from the pagelock cache if not active 3560Sstevel@tonic-gate * and not recently used. Drop all locks and call through 3570Sstevel@tonic-gate * the address space into the segment driver to reclaim 3580Sstevel@tonic-gate * the pages. This makes sure we get the address space 3590Sstevel@tonic-gate * and segment driver locking right. 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate static void 3620Sstevel@tonic-gate seg_ppurge_all(int force) 3630Sstevel@tonic-gate { 3640Sstevel@tonic-gate struct seg_pcache *delcallb_list = NULL; 3650Sstevel@tonic-gate struct seg_pcache *pcp; 3660Sstevel@tonic-gate struct seg_phash *hp; 3670Sstevel@tonic-gate int purge_count = 0; 3680Sstevel@tonic-gate pgcnt_t npages = 0; 3690Sstevel@tonic-gate pgcnt_t npages_window = 0; 3700Sstevel@tonic-gate 3710Sstevel@tonic-gate /* 3720Sstevel@tonic-gate * if the cache if off or empty, return 3730Sstevel@tonic-gate */ 3740Sstevel@tonic-gate if (seg_plazy == 0 || seg_plocked == 0) { 3750Sstevel@tonic-gate return; 3760Sstevel@tonic-gate } 3770Sstevel@tonic-gate for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 3780Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 3790Sstevel@tonic-gate pcp = hp->p_hnext; 3800Sstevel@tonic-gate 3810Sstevel@tonic-gate /* 3820Sstevel@tonic-gate * While 'force' is set, seg_pasync_thread is not 3830Sstevel@tonic-gate * throttled. This is to speedup flushing of seg_pcache 3840Sstevel@tonic-gate * in preparation for DR. 3850Sstevel@tonic-gate * 3860Sstevel@tonic-gate * In normal case, when 'force' is not set, we throttle 3870Sstevel@tonic-gate * seg_pasync_thread so that we don't spend all the time 3880Sstevel@tonic-gate * time in purging the cache. 3890Sstevel@tonic-gate */ 3900Sstevel@tonic-gate while ((pcp != (struct seg_pcache *)hp) && 3910Sstevel@tonic-gate (force || (purge_count <= seg_ppcount))) { 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate /* 3940Sstevel@tonic-gate * purge entries which are not active and 3950Sstevel@tonic-gate * have not been used recently and 3960Sstevel@tonic-gate * have the SEGP_ASYNC_FLUSH flag. 3970Sstevel@tonic-gate * 3980Sstevel@tonic-gate * In the 'force' case, we ignore the 3990Sstevel@tonic-gate * SEGP_ASYNC_FLUSH flag. 4000Sstevel@tonic-gate */ 4010Sstevel@tonic-gate if (!(pcp->p_flags & SEGP_ASYNC_FLUSH)) 4020Sstevel@tonic-gate pcp->p_ref = 1; 4030Sstevel@tonic-gate if (force) 4040Sstevel@tonic-gate pcp->p_ref = 0; 4050Sstevel@tonic-gate if (!pcp->p_ref && !pcp->p_active) { 4060Sstevel@tonic-gate struct as *as = pcp->p_seg->s_as; 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4090Sstevel@tonic-gate * try to get the readers lock on the address 4100Sstevel@tonic-gate * space before taking out the cache element. 4110Sstevel@tonic-gate * This ensures as_pagereclaim() can actually 4120Sstevel@tonic-gate * call through the address space and free 4130Sstevel@tonic-gate * the pages. If we don't get the lock, just 4140Sstevel@tonic-gate * skip this entry. The pages will be reclaimed 4150Sstevel@tonic-gate * by the segment driver at unmap time. 4160Sstevel@tonic-gate */ 4170Sstevel@tonic-gate if (AS_LOCK_TRYENTER(as, &as->a_lock, 4180Sstevel@tonic-gate RW_READER)) { 4190Sstevel@tonic-gate hp->p_qlen--; 4200Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 4210Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 4220Sstevel@tonic-gate pcp->p_hprev = delcallb_list; 4230Sstevel@tonic-gate delcallb_list = pcp; 4240Sstevel@tonic-gate purge_count++; 4250Sstevel@tonic-gate } 4260Sstevel@tonic-gate } else { 4270Sstevel@tonic-gate pcp->p_ref = 0; 4280Sstevel@tonic-gate } 4290Sstevel@tonic-gate pcp = pcp->p_hnext; 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 4320Sstevel@tonic-gate if (!force && purge_count > seg_ppcount) 4330Sstevel@tonic-gate break; 4340Sstevel@tonic-gate } 4350Sstevel@tonic-gate 4360Sstevel@tonic-gate /* 4370Sstevel@tonic-gate * run the delayed callback list. We don't want to hold the 4380Sstevel@tonic-gate * cache lock during a call through the address space. 4390Sstevel@tonic-gate */ 4400Sstevel@tonic-gate while (delcallb_list != NULL) { 4410Sstevel@tonic-gate struct as *as; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate pcp = delcallb_list; 4440Sstevel@tonic-gate delcallb_list = pcp->p_hprev; 4450Sstevel@tonic-gate as = pcp->p_seg->s_as; 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, " 4480Sstevel@tonic-gate "pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr, 4490Sstevel@tonic-gate pcp->p_len, (void *)pcp->p_pp); 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate as_pagereclaim(as, pcp->p_pp, pcp->p_addr, 4520Sstevel@tonic-gate pcp->p_len, pcp->p_rw); 4530Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 4540Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 4550Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 4560Sstevel@tonic-gate npages_window += pcp->p_len >> PAGESHIFT; 4570Sstevel@tonic-gate } 4580Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 4590Sstevel@tonic-gate } 4600Sstevel@tonic-gate mutex_enter(&seg_pmem); 4610Sstevel@tonic-gate seg_plocked -= npages; 4620Sstevel@tonic-gate seg_plocked_window -= npages_window; 4630Sstevel@tonic-gate mutex_exit(&seg_pmem); 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate /* 4670Sstevel@tonic-gate * Remove cached pages for segment(s) entries from hashtable. 4680Sstevel@tonic-gate * The segments are identified by a given clients callback 4690Sstevel@tonic-gate * function. 4700Sstevel@tonic-gate * This is useful for multiple seg's cached on behalf of 4710Sstevel@tonic-gate * dummy segment (ISM/DISM) with common callback function. 4720Sstevel@tonic-gate * The clients callback function may return status indicating 4730Sstevel@tonic-gate * that the last seg's entry has been purged. In such a case 4740Sstevel@tonic-gate * the seg_ppurge_seg() stops searching hashtable and exits. 4750Sstevel@tonic-gate * Otherwise all hashtable entries are scanned. 4760Sstevel@tonic-gate */ 4770Sstevel@tonic-gate void 4780Sstevel@tonic-gate seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t, 4790Sstevel@tonic-gate struct page **, enum seg_rw)) 4800Sstevel@tonic-gate { 4810Sstevel@tonic-gate struct seg_pcache *pcp, *npcp; 4820Sstevel@tonic-gate struct seg_phash *hp; 4830Sstevel@tonic-gate pgcnt_t npages = 0; 4840Sstevel@tonic-gate pgcnt_t npages_window = 0; 4850Sstevel@tonic-gate int done = 0; 4860Sstevel@tonic-gate 4870Sstevel@tonic-gate /* 4880Sstevel@tonic-gate * if the cache if off or empty, return 4890Sstevel@tonic-gate */ 4900Sstevel@tonic-gate if (seg_plazy == 0 || seg_plocked == 0) { 4910Sstevel@tonic-gate return; 4920Sstevel@tonic-gate } 4930Sstevel@tonic-gate mutex_enter(&seg_pcache); 4940Sstevel@tonic-gate seg_pdisable++; 4950Sstevel@tonic-gate mutex_exit(&seg_pcache); 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 5000Sstevel@tonic-gate pcp = hp->p_hnext; 5010Sstevel@tonic-gate while (pcp != (struct seg_pcache *)hp) { 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate /* 5040Sstevel@tonic-gate * purge entries which are not active 5050Sstevel@tonic-gate */ 5060Sstevel@tonic-gate npcp = pcp->p_hnext; 5070Sstevel@tonic-gate if (!pcp->p_active && pcp->p_callback == callback) { 5080Sstevel@tonic-gate hp->p_qlen--; 5090Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 5100Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 5110Sstevel@tonic-gate 5120Sstevel@tonic-gate if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr, 5130Sstevel@tonic-gate pcp->p_len, pcp->p_pp, pcp->p_rw)) { 5140Sstevel@tonic-gate done = 1; 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 5180Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 5190Sstevel@tonic-gate npages_window += 5200Sstevel@tonic-gate pcp->p_len >> PAGESHIFT; 5210Sstevel@tonic-gate } 5220Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 5230Sstevel@tonic-gate } 5240Sstevel@tonic-gate pcp = npcp; 5250Sstevel@tonic-gate if (done) 5260Sstevel@tonic-gate break; 5270Sstevel@tonic-gate } 5280Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 5290Sstevel@tonic-gate if (done) 5300Sstevel@tonic-gate break; 5310Sstevel@tonic-gate } 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate mutex_enter(&seg_pcache); 5340Sstevel@tonic-gate seg_pdisable--; 5350Sstevel@tonic-gate mutex_exit(&seg_pcache); 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate mutex_enter(&seg_pmem); 5380Sstevel@tonic-gate seg_plocked -= npages; 5390Sstevel@tonic-gate seg_plocked_window -= npages_window; 5400Sstevel@tonic-gate mutex_exit(&seg_pmem); 5410Sstevel@tonic-gate } 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate /* 5440Sstevel@tonic-gate * purge all entries for a given segment. Since we 5450Sstevel@tonic-gate * callback into the segment driver directly for page 5460Sstevel@tonic-gate * reclaim the caller needs to hold the right locks. 5470Sstevel@tonic-gate */ 5480Sstevel@tonic-gate void 5490Sstevel@tonic-gate seg_ppurge(struct seg *seg) 5500Sstevel@tonic-gate { 5510Sstevel@tonic-gate struct seg_pcache *delcallb_list = NULL; 5520Sstevel@tonic-gate struct seg_pcache *pcp; 5530Sstevel@tonic-gate struct seg_phash *hp; 5540Sstevel@tonic-gate pgcnt_t npages = 0; 5550Sstevel@tonic-gate pgcnt_t npages_window = 0; 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate if (seg_plazy == 0) { 5580Sstevel@tonic-gate return; 5590Sstevel@tonic-gate } 5600Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 5610Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 5620Sstevel@tonic-gate pcp = hp->p_hnext; 5630Sstevel@tonic-gate while (pcp != (struct seg_pcache *)hp) { 5640Sstevel@tonic-gate if (pcp->p_seg == seg) { 5650Sstevel@tonic-gate if (pcp->p_active) { 5660Sstevel@tonic-gate break; 5670Sstevel@tonic-gate } 5680Sstevel@tonic-gate hp->p_qlen--; 5690Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 5700Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 5710Sstevel@tonic-gate pcp->p_hprev = delcallb_list; 5720Sstevel@tonic-gate delcallb_list = pcp; 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate pcp = pcp->p_hnext; 5750Sstevel@tonic-gate } 5760Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 5770Sstevel@tonic-gate while (delcallb_list != NULL) { 5780Sstevel@tonic-gate pcp = delcallb_list; 5790Sstevel@tonic-gate delcallb_list = pcp->p_hprev; 5800Sstevel@tonic-gate 5810Sstevel@tonic-gate PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, " 5820Sstevel@tonic-gate "pplist %p\n", (void *)seg, (void *)pcp->p_addr, 5830Sstevel@tonic-gate pcp->p_len, (void *)pcp->p_pp); 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate ASSERT(seg == pcp->p_seg); 5860Sstevel@tonic-gate (void) (*pcp->p_callback)(seg, pcp->p_addr, 5870Sstevel@tonic-gate pcp->p_len, pcp->p_pp, pcp->p_rw); 5880Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 5890Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 5900Sstevel@tonic-gate npages_window += pcp->p_len >> PAGESHIFT; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate mutex_enter(&seg_pmem); 5950Sstevel@tonic-gate seg_plocked -= npages; 5960Sstevel@tonic-gate seg_plocked_window -= npages_window; 5970Sstevel@tonic-gate mutex_exit(&seg_pmem); 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate 6000Sstevel@tonic-gate static void seg_pinit_mem_config(void); 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * setup the pagelock cache 6040Sstevel@tonic-gate */ 6050Sstevel@tonic-gate static void 6060Sstevel@tonic-gate seg_pinit(void) 6070Sstevel@tonic-gate { 6080Sstevel@tonic-gate struct seg_phash *hp; 6090Sstevel@tonic-gate int i; 6100Sstevel@tonic-gate uint_t physmegs; 6110Sstevel@tonic-gate 6120Sstevel@tonic-gate sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL); 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate mutex_enter(&seg_pcache); 6150Sstevel@tonic-gate if (p_hashtab == NULL) { 6160Sstevel@tonic-gate physmegs = physmem >> (20 - PAGESHIFT); 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate /* If p_hashsize was not set in /etc/system ... */ 6190Sstevel@tonic-gate if (p_hashsize == 0) { 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * Choose p_hashsize based on physmem. 6220Sstevel@tonic-gate */ 6230Sstevel@tonic-gate if (physmegs < 64) { 6240Sstevel@tonic-gate p_hashsize = 64; 6250Sstevel@tonic-gate } else if (physmegs < 1024) { 6260Sstevel@tonic-gate p_hashsize = 1024; 6270Sstevel@tonic-gate } else if (physmegs < 10 * 1024) { 6280Sstevel@tonic-gate p_hashsize = 8192; 6290Sstevel@tonic-gate } else if (physmegs < 20 * 1024) { 6300Sstevel@tonic-gate p_hashsize = 2 * 8192; 6310Sstevel@tonic-gate seg_pmaxqlen = 16; 6320Sstevel@tonic-gate } else { 6330Sstevel@tonic-gate p_hashsize = 128 * 1024; 6340Sstevel@tonic-gate seg_pmaxqlen = 128; 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate p_hashtab = kmem_zalloc( 6390Sstevel@tonic-gate p_hashsize * sizeof (struct seg_phash), KM_SLEEP); 6400Sstevel@tonic-gate for (i = 0; i < p_hashsize; i++) { 6410Sstevel@tonic-gate hp = (struct seg_phash *)&p_hashtab[i]; 6420Sstevel@tonic-gate hp->p_hnext = (struct seg_pcache *)hp; 6430Sstevel@tonic-gate hp->p_hprev = (struct seg_pcache *)hp; 6440Sstevel@tonic-gate mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL); 6450Sstevel@tonic-gate } 6460Sstevel@tonic-gate if (seg_pwindow == 0) { 6470Sstevel@tonic-gate if (physmegs < 24) { 6480Sstevel@tonic-gate /* don't use cache */ 6490Sstevel@tonic-gate seg_plazy = 0; 6500Sstevel@tonic-gate } else if (physmegs < 64) { 6510Sstevel@tonic-gate seg_pwindow = physmem >> 5; /* 3% of memory */ 6520Sstevel@tonic-gate } else if (physmegs < 10 * 1024) { 6530Sstevel@tonic-gate seg_pwindow = physmem >> 3; /* 12% of memory */ 6540Sstevel@tonic-gate } else { 6550Sstevel@tonic-gate seg_pwindow = physmem >> 1; 6560Sstevel@tonic-gate } 6570Sstevel@tonic-gate } 6580Sstevel@tonic-gate } 6590Sstevel@tonic-gate mutex_exit(&seg_pcache); 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate seg_pinit_mem_config(); 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate /* 6650Sstevel@tonic-gate * called by pageout if memory is low 6660Sstevel@tonic-gate */ 6670Sstevel@tonic-gate void 6680Sstevel@tonic-gate seg_preap(void) 6690Sstevel@tonic-gate { 6700Sstevel@tonic-gate /* 6710Sstevel@tonic-gate * if the cache if off or empty, return 6720Sstevel@tonic-gate */ 6730Sstevel@tonic-gate if (seg_plocked == 0 || seg_plazy == 0) { 6740Sstevel@tonic-gate return; 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate sema_v(&seg_psaync_sem); 6770Sstevel@tonic-gate } 6780Sstevel@tonic-gate 6790Sstevel@tonic-gate static void seg_pupdate(void *); 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate /* 6820Sstevel@tonic-gate * run as a backgroud thread and reclaim pagelock 6830Sstevel@tonic-gate * pages which have not been used recently 6840Sstevel@tonic-gate */ 6850Sstevel@tonic-gate void 6860Sstevel@tonic-gate seg_pasync_thread(void) 6870Sstevel@tonic-gate { 6880Sstevel@tonic-gate callb_cpr_t cpr_info; 6890Sstevel@tonic-gate kmutex_t pasync_lock; /* just for CPR stuff */ 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL); 6920Sstevel@tonic-gate 6930Sstevel@tonic-gate CALLB_CPR_INIT(&cpr_info, &pasync_lock, 6940Sstevel@tonic-gate callb_generic_cpr, "seg_pasync"); 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate if (seg_preap_interval == 0) { 6970Sstevel@tonic-gate seg_preap_interval = seg_preap_time * hz; 6980Sstevel@tonic-gate } else { 6990Sstevel@tonic-gate seg_preap_interval *= hz; 7000Sstevel@tonic-gate } 7010Sstevel@tonic-gate if (seg_plazy && seg_pupdate_active) { 7020Sstevel@tonic-gate (void) timeout(seg_pupdate, NULL, seg_preap_interval); 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate for (;;) { 7060Sstevel@tonic-gate mutex_enter(&pasync_lock); 7070Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info); 7080Sstevel@tonic-gate mutex_exit(&pasync_lock); 7090Sstevel@tonic-gate sema_p(&seg_psaync_sem); 7100Sstevel@tonic-gate mutex_enter(&pasync_lock); 7110Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock); 7120Sstevel@tonic-gate mutex_exit(&pasync_lock); 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate seg_ppurge_all(0); 7150Sstevel@tonic-gate } 7160Sstevel@tonic-gate } 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate static void 7190Sstevel@tonic-gate seg_pupdate(void *dummy) 7200Sstevel@tonic-gate { 7210Sstevel@tonic-gate sema_v(&seg_psaync_sem); 7220Sstevel@tonic-gate 7230Sstevel@tonic-gate if (seg_plazy && seg_pupdate_active) { 7240Sstevel@tonic-gate (void) timeout(seg_pupdate, dummy, seg_preap_interval); 7250Sstevel@tonic-gate } 7260Sstevel@tonic-gate } 7270Sstevel@tonic-gate 7280Sstevel@tonic-gate static struct kmem_cache *seg_cache; 7290Sstevel@tonic-gate 7300Sstevel@tonic-gate /* 7310Sstevel@tonic-gate * Initialize segment management data structures. 7320Sstevel@tonic-gate */ 7330Sstevel@tonic-gate void 7340Sstevel@tonic-gate seg_init(void) 7350Sstevel@tonic-gate { 7360Sstevel@tonic-gate kstat_t *ksp; 7370Sstevel@tonic-gate 7380Sstevel@tonic-gate seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg), 7390Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, NULL, 0); 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED, 7420Sstevel@tonic-gate segadvstat_ndata, KSTAT_FLAG_VIRTUAL); 7430Sstevel@tonic-gate if (ksp) { 7440Sstevel@tonic-gate ksp->ks_data = (void *)segadvstat_ptr; 7450Sstevel@tonic-gate kstat_install(ksp); 7460Sstevel@tonic-gate } 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate seg_pinit(); 7490Sstevel@tonic-gate } 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate /* 7520Sstevel@tonic-gate * Allocate a segment to cover [base, base+size] 7530Sstevel@tonic-gate * and attach it to the specified address space. 7540Sstevel@tonic-gate */ 7550Sstevel@tonic-gate struct seg * 7560Sstevel@tonic-gate seg_alloc(struct as *as, caddr_t base, size_t size) 7570Sstevel@tonic-gate { 7580Sstevel@tonic-gate struct seg *new; 7590Sstevel@tonic-gate caddr_t segbase; 7600Sstevel@tonic-gate size_t segsize; 7610Sstevel@tonic-gate 7620Sstevel@tonic-gate segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK); 7630Sstevel@tonic-gate segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) - 7640Sstevel@tonic-gate (uintptr_t)segbase; 7650Sstevel@tonic-gate 7660Sstevel@tonic-gate if (!valid_va_range(&segbase, &segsize, segsize, AH_LO)) 7670Sstevel@tonic-gate return ((struct seg *)NULL); /* bad virtual addr range */ 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate if (as != &kas && 7700Sstevel@tonic-gate valid_usr_range(segbase, segsize, 0, as, 7710Sstevel@tonic-gate as->a_userlimit) != RANGE_OKAY) 7720Sstevel@tonic-gate return ((struct seg *)NULL); /* bad virtual addr range */ 7730Sstevel@tonic-gate 7740Sstevel@tonic-gate new = kmem_cache_alloc(seg_cache, KM_SLEEP); 7750Sstevel@tonic-gate new->s_ops = NULL; 7760Sstevel@tonic-gate new->s_data = NULL; 7770Sstevel@tonic-gate new->s_szc = 0; 7780Sstevel@tonic-gate new->s_flags = 0; 7790Sstevel@tonic-gate if (seg_attach(as, segbase, segsize, new) < 0) { 7800Sstevel@tonic-gate kmem_cache_free(seg_cache, new); 7810Sstevel@tonic-gate return ((struct seg *)NULL); 7820Sstevel@tonic-gate } 7830Sstevel@tonic-gate /* caller must fill in ops, data */ 7840Sstevel@tonic-gate return (new); 7850Sstevel@tonic-gate } 7860Sstevel@tonic-gate 7870Sstevel@tonic-gate /* 7880Sstevel@tonic-gate * Attach a segment to the address space. Used by seg_alloc() 7890Sstevel@tonic-gate * and for kernel startup to attach to static segments. 7900Sstevel@tonic-gate */ 7910Sstevel@tonic-gate int 7920Sstevel@tonic-gate seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg) 7930Sstevel@tonic-gate { 7940Sstevel@tonic-gate seg->s_as = as; 7950Sstevel@tonic-gate seg->s_base = base; 7960Sstevel@tonic-gate seg->s_size = size; 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate /* 7990Sstevel@tonic-gate * as_addseg() will add the segment at the appropraite point 8000Sstevel@tonic-gate * in the list. It will return -1 if there is overlap with 8010Sstevel@tonic-gate * an already existing segment. 8020Sstevel@tonic-gate */ 8030Sstevel@tonic-gate return (as_addseg(as, seg)); 8040Sstevel@tonic-gate } 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * Unmap a segment and free it from its associated address space. 8080Sstevel@tonic-gate * This should be called by anybody who's finished with a whole segment's 8090Sstevel@tonic-gate * mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the 8100Sstevel@tonic-gate * responsibility of the segment driver to unlink the the segment 8110Sstevel@tonic-gate * from the address space, and to free public and private data structures 8120Sstevel@tonic-gate * associated with the segment. (This is typically done by a call to 8130Sstevel@tonic-gate * seg_free()). 8140Sstevel@tonic-gate */ 8150Sstevel@tonic-gate void 8160Sstevel@tonic-gate seg_unmap(struct seg *seg) 8170Sstevel@tonic-gate { 8180Sstevel@tonic-gate #ifdef DEBUG 8190Sstevel@tonic-gate int ret; 8200Sstevel@tonic-gate #endif /* DEBUG */ 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 8230Sstevel@tonic-gate 8240Sstevel@tonic-gate /* Shouldn't have called seg_unmap if mapping isn't yet established */ 8250Sstevel@tonic-gate ASSERT(seg->s_data != NULL); 8260Sstevel@tonic-gate 8270Sstevel@tonic-gate /* Unmap the whole mapping */ 8280Sstevel@tonic-gate #ifdef DEBUG 8290Sstevel@tonic-gate ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 8300Sstevel@tonic-gate ASSERT(ret == 0); 8310Sstevel@tonic-gate #else 8320Sstevel@tonic-gate SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 8330Sstevel@tonic-gate #endif /* DEBUG */ 8340Sstevel@tonic-gate } 8350Sstevel@tonic-gate 8360Sstevel@tonic-gate /* 8370Sstevel@tonic-gate * Free the segment from its associated as. This should only be called 8380Sstevel@tonic-gate * if a mapping to the segment has not yet been established (e.g., if 8390Sstevel@tonic-gate * an error occurs in the middle of doing an as_map when the segment 8400Sstevel@tonic-gate * has already been partially set up) or if it has already been deleted 8410Sstevel@tonic-gate * (e.g., from a segment driver unmap routine if the unmap applies to the 8420Sstevel@tonic-gate * entire segment). If the mapping is currently set up then seg_unmap() should 8430Sstevel@tonic-gate * be called instead. 8440Sstevel@tonic-gate */ 8450Sstevel@tonic-gate void 8460Sstevel@tonic-gate seg_free(struct seg *seg) 8470Sstevel@tonic-gate { 8480Sstevel@tonic-gate register struct as *as = seg->s_as; 8490Sstevel@tonic-gate struct seg *tseg = as_removeseg(as, seg); 8500Sstevel@tonic-gate 8510Sstevel@tonic-gate ASSERT(tseg == seg); 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate /* 8540Sstevel@tonic-gate * If the segment private data field is NULL, 8550Sstevel@tonic-gate * then segment driver is not attached yet. 8560Sstevel@tonic-gate */ 8570Sstevel@tonic-gate if (seg->s_data != NULL) 8580Sstevel@tonic-gate SEGOP_FREE(seg); 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate kmem_cache_free(seg_cache, seg); 8610Sstevel@tonic-gate } 8620Sstevel@tonic-gate 8630Sstevel@tonic-gate /*ARGSUSED*/ 8640Sstevel@tonic-gate static void 8650Sstevel@tonic-gate seg_p_mem_config_post_add( 8660Sstevel@tonic-gate void *arg, 8670Sstevel@tonic-gate pgcnt_t delta_pages) 8680Sstevel@tonic-gate { 8690Sstevel@tonic-gate /* Nothing to do. */ 8700Sstevel@tonic-gate } 8710Sstevel@tonic-gate 8720Sstevel@tonic-gate /* 8730Sstevel@tonic-gate * Attempt to purge seg_pcache. May need to return before this has 8740Sstevel@tonic-gate * completed to allow other pre_del callbacks to unlock pages. This is 8750Sstevel@tonic-gate * ok because: 8760Sstevel@tonic-gate * 1) The seg_pdisable flag has been set so at least we won't 8770Sstevel@tonic-gate * cache anymore locks and the locks we couldn't purge 8780Sstevel@tonic-gate * will not be held if they do get released by a subsequent 8790Sstevel@tonic-gate * pre-delete callback. 8800Sstevel@tonic-gate * 8810Sstevel@tonic-gate * 2) The rest of the memory delete thread processing does not 8820Sstevel@tonic-gate * depend on the changes made in this pre-delete callback. No 8830Sstevel@tonic-gate * panics will result, the worst that will happen is that the 8840Sstevel@tonic-gate * DR code will timeout and cancel the delete. 8850Sstevel@tonic-gate */ 8860Sstevel@tonic-gate /*ARGSUSED*/ 8870Sstevel@tonic-gate static int 8880Sstevel@tonic-gate seg_p_mem_config_pre_del( 8890Sstevel@tonic-gate void *arg, 8900Sstevel@tonic-gate pgcnt_t delta_pages) 8910Sstevel@tonic-gate { 8920Sstevel@tonic-gate pgcnt_t old_plocked; 8930Sstevel@tonic-gate int stall_count = 0; 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate mutex_enter(&seg_pcache); 8960Sstevel@tonic-gate seg_pdisable++; 8970Sstevel@tonic-gate ASSERT(seg_pdisable != 0); 8980Sstevel@tonic-gate mutex_exit(&seg_pcache); 8990Sstevel@tonic-gate 9000Sstevel@tonic-gate /* 9010Sstevel@tonic-gate * Attempt to empty the cache. Terminate if seg_plocked does not 9020Sstevel@tonic-gate * diminish with SEGP_STALL_THRESHOLD consecutive attempts. 9030Sstevel@tonic-gate */ 9040Sstevel@tonic-gate while (seg_plocked != 0) { 9050Sstevel@tonic-gate old_plocked = seg_plocked; 9060Sstevel@tonic-gate seg_ppurge_all(1); 9070Sstevel@tonic-gate if (seg_plocked == old_plocked) { 9080Sstevel@tonic-gate if (stall_count++ > SEGP_STALL_THRESHOLD) { 9090Sstevel@tonic-gate cmn_err(CE_NOTE, "!Pre-delete couldn't purge" 9100Sstevel@tonic-gate " pagelock cache - continuing"); 9110Sstevel@tonic-gate break; 9120Sstevel@tonic-gate } 9130Sstevel@tonic-gate } else 9140Sstevel@tonic-gate stall_count = 0; 9150Sstevel@tonic-gate if (seg_plocked != 0) 9160Sstevel@tonic-gate delay(hz/SEGP_PREDEL_DELAY_FACTOR); 9170Sstevel@tonic-gate } 9180Sstevel@tonic-gate return (0); 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate /*ARGSUSED*/ 9220Sstevel@tonic-gate static void 9230Sstevel@tonic-gate seg_p_mem_config_post_del( 9240Sstevel@tonic-gate void *arg, 9250Sstevel@tonic-gate pgcnt_t delta_pages, 9260Sstevel@tonic-gate int cancelled) 9270Sstevel@tonic-gate { 9280Sstevel@tonic-gate mutex_enter(&seg_pcache); 9290Sstevel@tonic-gate ASSERT(seg_pdisable != 0); 9300Sstevel@tonic-gate seg_pdisable--; 9310Sstevel@tonic-gate mutex_exit(&seg_pcache); 9320Sstevel@tonic-gate } 9330Sstevel@tonic-gate 9340Sstevel@tonic-gate static kphysm_setup_vector_t seg_p_mem_config_vec = { 9350Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 9360Sstevel@tonic-gate seg_p_mem_config_post_add, 9370Sstevel@tonic-gate seg_p_mem_config_pre_del, 9380Sstevel@tonic-gate seg_p_mem_config_post_del, 9390Sstevel@tonic-gate }; 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate static void 9420Sstevel@tonic-gate seg_pinit_mem_config(void) 9430Sstevel@tonic-gate { 9440Sstevel@tonic-gate int ret; 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL); 9470Sstevel@tonic-gate /* 9480Sstevel@tonic-gate * Want to catch this in the debug kernel. At run time, if the 9490Sstevel@tonic-gate * callbacks don't get run all will be OK as the disable just makes 9500Sstevel@tonic-gate * it more likely that the pages can be collected. 9510Sstevel@tonic-gate */ 9520Sstevel@tonic-gate ASSERT(ret == 0); 9530Sstevel@tonic-gate } 954*3247Sgjelinek 955*3247Sgjelinek extern struct seg_ops segvn_ops; 956*3247Sgjelinek extern struct seg_ops segspt_shmops; 957*3247Sgjelinek 958*3247Sgjelinek /* 959*3247Sgjelinek * Verify that segment is not a shared anonymous segment which reserves 960*3247Sgjelinek * swap. zone.max-swap accounting (zone->zone_max_swap) cannot be transfered 961*3247Sgjelinek * from one zone to another if any segments are shared. This is because the 962*3247Sgjelinek * last process to exit will credit the swap reservation. This could lead 963*3247Sgjelinek * to the swap being reserved by one zone, and credited to another. 964*3247Sgjelinek */ 965*3247Sgjelinek boolean_t 966*3247Sgjelinek seg_can_change_zones(struct seg *seg) 967*3247Sgjelinek { 968*3247Sgjelinek struct segvn_data *svd; 969*3247Sgjelinek 970*3247Sgjelinek if (seg->s_ops == &segspt_shmops) 971*3247Sgjelinek return (B_FALSE); 972*3247Sgjelinek 973*3247Sgjelinek if (seg->s_ops == &segvn_ops) { 974*3247Sgjelinek svd = (struct segvn_data *)seg->s_data; 975*3247Sgjelinek if (svd->type == MAP_SHARED && 976*3247Sgjelinek svd->amp != NULL && 977*3247Sgjelinek svd->amp->swresv > 0) 978*3247Sgjelinek return (B_FALSE); 979*3247Sgjelinek } 980*3247Sgjelinek return (B_TRUE); 981*3247Sgjelinek } 982*3247Sgjelinek 983*3247Sgjelinek /* 984*3247Sgjelinek * Return swap reserved by a segment backing a private mapping. 985*3247Sgjelinek */ 986*3247Sgjelinek size_t 987*3247Sgjelinek seg_swresv(struct seg *seg) 988*3247Sgjelinek { 989*3247Sgjelinek struct segvn_data *svd; 990*3247Sgjelinek size_t swap = 0; 991*3247Sgjelinek 992*3247Sgjelinek if (seg->s_ops == &segvn_ops) { 993*3247Sgjelinek svd = (struct segvn_data *)seg->s_data; 994*3247Sgjelinek if (svd->type == MAP_PRIVATE && svd->swresv > 0) 995*3247Sgjelinek swap = svd->swresv; 996*3247Sgjelinek } 997*3247Sgjelinek return (swap); 998*3247Sgjelinek } 999