1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 32*0Sstevel@tonic-gate * The Regents of the University of California 33*0Sstevel@tonic-gate * All Rights Reserved 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 36*0Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 37*0Sstevel@tonic-gate * contributors. 38*0Sstevel@tonic-gate */ 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate /* 43*0Sstevel@tonic-gate * VM - segment management. 44*0Sstevel@tonic-gate */ 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate #include <sys/types.h> 47*0Sstevel@tonic-gate #include <sys/inttypes.h> 48*0Sstevel@tonic-gate #include <sys/t_lock.h> 49*0Sstevel@tonic-gate #include <sys/param.h> 50*0Sstevel@tonic-gate #include <sys/systm.h> 51*0Sstevel@tonic-gate #include <sys/kmem.h> 52*0Sstevel@tonic-gate #include <sys/vmsystm.h> 53*0Sstevel@tonic-gate #include <sys/debug.h> 54*0Sstevel@tonic-gate #include <sys/cmn_err.h> 55*0Sstevel@tonic-gate #include <sys/callb.h> 56*0Sstevel@tonic-gate #include <sys/mem_config.h> 57*0Sstevel@tonic-gate 58*0Sstevel@tonic-gate #include <vm/hat.h> 59*0Sstevel@tonic-gate #include <vm/as.h> 60*0Sstevel@tonic-gate #include <vm/seg.h> 61*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate /* 64*0Sstevel@tonic-gate * kstats for segment advise 65*0Sstevel@tonic-gate */ 66*0Sstevel@tonic-gate segadvstat_t segadvstat = { 67*0Sstevel@tonic-gate { "MADV_FREE_hit", KSTAT_DATA_ULONG }, 68*0Sstevel@tonic-gate { "MADV_FREE_miss", KSTAT_DATA_ULONG }, 69*0Sstevel@tonic-gate }; 70*0Sstevel@tonic-gate 71*0Sstevel@tonic-gate kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat; 72*0Sstevel@tonic-gate uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t); 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate /* #define PDEBUG */ 75*0Sstevel@tonic-gate #if defined(PDEBUG) || defined(lint) || defined(__lint) 76*0Sstevel@tonic-gate int pdebug = 0; 77*0Sstevel@tonic-gate #else 78*0Sstevel@tonic-gate #define pdebug 0 79*0Sstevel@tonic-gate #endif /* PDEBUG */ 80*0Sstevel@tonic-gate 81*0Sstevel@tonic-gate #define PPRINTF if (pdebug) printf 82*0Sstevel@tonic-gate #define PPRINT(x) PPRINTF(x) 83*0Sstevel@tonic-gate #define PPRINT1(x, a) PPRINTF(x, a) 84*0Sstevel@tonic-gate #define PPRINT2(x, a, b) PPRINTF(x, a, b) 85*0Sstevel@tonic-gate #define PPRINT3(x, a, b, c) PPRINTF(x, a, b, c) 86*0Sstevel@tonic-gate #define PPRINT4(x, a, b, c, d) PPRINTF(x, a, b, c, d) 87*0Sstevel@tonic-gate #define PPRINT5(x, a, b, c, d, e) PPRINTF(x, a, b, c, d, e) 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate #define P_HASHMASK (p_hashsize - 1) 90*0Sstevel@tonic-gate #define P_BASESHIFT 6 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate /* 93*0Sstevel@tonic-gate * entry in the segment page cache 94*0Sstevel@tonic-gate */ 95*0Sstevel@tonic-gate struct seg_pcache { 96*0Sstevel@tonic-gate struct seg_pcache *p_hnext; /* list for hashed blocks */ 97*0Sstevel@tonic-gate struct seg_pcache *p_hprev; 98*0Sstevel@tonic-gate int p_active; /* active count */ 99*0Sstevel@tonic-gate int p_ref; /* ref bit */ 100*0Sstevel@tonic-gate size_t p_len; /* segment length */ 101*0Sstevel@tonic-gate caddr_t p_addr; /* base address */ 102*0Sstevel@tonic-gate struct seg *p_seg; /* segment */ 103*0Sstevel@tonic-gate struct page **p_pp; /* pp shadow list */ 104*0Sstevel@tonic-gate enum seg_rw p_rw; /* rw */ 105*0Sstevel@tonic-gate uint_t p_flags; /* bit flags */ 106*0Sstevel@tonic-gate int (*p_callback)(struct seg *, caddr_t, size_t, 107*0Sstevel@tonic-gate struct page **, enum seg_rw); 108*0Sstevel@tonic-gate }; 109*0Sstevel@tonic-gate 110*0Sstevel@tonic-gate struct seg_phash { 111*0Sstevel@tonic-gate struct seg_pcache *p_hnext; /* list for hashed blocks */ 112*0Sstevel@tonic-gate struct seg_pcache *p_hprev; 113*0Sstevel@tonic-gate int p_qlen; /* Q length */ 114*0Sstevel@tonic-gate kmutex_t p_hmutex; /* protects hash bucket */ 115*0Sstevel@tonic-gate }; 116*0Sstevel@tonic-gate 117*0Sstevel@tonic-gate static int seg_preap_time = 20; /* reclaim every 20 secs */ 118*0Sstevel@tonic-gate static int seg_pmaxqlen = 5; /* max Q length in hash list */ 119*0Sstevel@tonic-gate static int seg_ppcount = 5; /* max # of purges per reclaim interval */ 120*0Sstevel@tonic-gate static int seg_plazy = 1; /* if 1, pages are cached after pageunlock */ 121*0Sstevel@tonic-gate static pgcnt_t seg_pwindow; /* max # of pages that can be cached */ 122*0Sstevel@tonic-gate static pgcnt_t seg_plocked; /* # of pages which are cached by pagelock */ 123*0Sstevel@tonic-gate static pgcnt_t seg_plocked_window; /* # pages from window */ 124*0Sstevel@tonic-gate int seg_preapahead; 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate static uint_t seg_pdisable = 0; /* if not 0, caching temporarily disabled */ 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate static int seg_pupdate_active = 1; /* background reclaim thread */ 129*0Sstevel@tonic-gate static clock_t seg_preap_interval; /* reap interval in ticks */ 130*0Sstevel@tonic-gate 131*0Sstevel@tonic-gate static kmutex_t seg_pcache; /* protects the whole pagelock cache */ 132*0Sstevel@tonic-gate static kmutex_t seg_pmem; /* protects window counter */ 133*0Sstevel@tonic-gate static ksema_t seg_psaync_sem; /* sema for reclaim thread */ 134*0Sstevel@tonic-gate static struct seg_phash *p_hashtab; 135*0Sstevel@tonic-gate static int p_hashsize = 0; 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate #define p_hash(seg) \ 138*0Sstevel@tonic-gate (P_HASHMASK & \ 139*0Sstevel@tonic-gate ((uintptr_t)(seg) >> P_BASESHIFT)) 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate #define p_match(pcp, seg, addr, len, rw) \ 142*0Sstevel@tonic-gate (((pcp)->p_seg == (seg) && \ 143*0Sstevel@tonic-gate (pcp)->p_addr == (addr) && \ 144*0Sstevel@tonic-gate (pcp)->p_rw == (rw) && \ 145*0Sstevel@tonic-gate (pcp)->p_len == (len)) ? 1 : 0) 146*0Sstevel@tonic-gate 147*0Sstevel@tonic-gate #define p_match_pp(pcp, seg, addr, len, pp, rw) \ 148*0Sstevel@tonic-gate (((pcp)->p_seg == (seg) && \ 149*0Sstevel@tonic-gate (pcp)->p_addr == (addr) && \ 150*0Sstevel@tonic-gate (pcp)->p_pp == (pp) && \ 151*0Sstevel@tonic-gate (pcp)->p_rw == (rw) && \ 152*0Sstevel@tonic-gate (pcp)->p_len == (len)) ? 1 : 0) 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate /* 156*0Sstevel@tonic-gate * lookup an address range in pagelock cache. Return shadow list 157*0Sstevel@tonic-gate * and bump up active count. 158*0Sstevel@tonic-gate */ 159*0Sstevel@tonic-gate struct page ** 160*0Sstevel@tonic-gate seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 161*0Sstevel@tonic-gate { 162*0Sstevel@tonic-gate struct seg_pcache *pcp; 163*0Sstevel@tonic-gate struct seg_phash *hp; 164*0Sstevel@tonic-gate 165*0Sstevel@tonic-gate /* 166*0Sstevel@tonic-gate * Skip pagelock cache, while DR is in progress or 167*0Sstevel@tonic-gate * seg_pcache is off. 168*0Sstevel@tonic-gate */ 169*0Sstevel@tonic-gate if (seg_pdisable || seg_plazy == 0) { 170*0Sstevel@tonic-gate return (NULL); 171*0Sstevel@tonic-gate } 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 174*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 175*0Sstevel@tonic-gate for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 176*0Sstevel@tonic-gate pcp = pcp->p_hnext) { 177*0Sstevel@tonic-gate if (p_match(pcp, seg, addr, len, rw)) { 178*0Sstevel@tonic-gate pcp->p_active++; 179*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate PPRINT5("seg_plookup hit: seg %p, addr %p, " 182*0Sstevel@tonic-gate "len %lx, count %d, pplist %p \n", 183*0Sstevel@tonic-gate (void *)seg, (void *)addr, len, pcp->p_active, 184*0Sstevel@tonic-gate (void *)pcp->p_pp); 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate return (pcp->p_pp); 187*0Sstevel@tonic-gate } 188*0Sstevel@tonic-gate } 189*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 190*0Sstevel@tonic-gate 191*0Sstevel@tonic-gate PPRINT("seg_plookup miss:\n"); 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate return (NULL); 194*0Sstevel@tonic-gate } 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gate /* 197*0Sstevel@tonic-gate * mark address range inactive. If the cache is off or the address 198*0Sstevel@tonic-gate * range is not in the cache we call the segment driver to reclaim 199*0Sstevel@tonic-gate * the pages. Otherwise just decrement active count and set ref bit. 200*0Sstevel@tonic-gate */ 201*0Sstevel@tonic-gate void 202*0Sstevel@tonic-gate seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 203*0Sstevel@tonic-gate enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t, 204*0Sstevel@tonic-gate struct page **, enum seg_rw)) 205*0Sstevel@tonic-gate { 206*0Sstevel@tonic-gate struct seg_pcache *pcp; 207*0Sstevel@tonic-gate struct seg_phash *hp; 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate if (seg_plazy == 0) { 210*0Sstevel@tonic-gate (void) (*callback)(seg, addr, len, pp, rw); 211*0Sstevel@tonic-gate return; 212*0Sstevel@tonic-gate } 213*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 214*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 215*0Sstevel@tonic-gate for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 216*0Sstevel@tonic-gate pcp = pcp->p_hnext) { 217*0Sstevel@tonic-gate if (p_match_pp(pcp, seg, addr, len, pp, rw)) { 218*0Sstevel@tonic-gate pcp->p_active--; 219*0Sstevel@tonic-gate ASSERT(pcp->p_active >= 0); 220*0Sstevel@tonic-gate if (pcp->p_active == 0 && seg_pdisable) { 221*0Sstevel@tonic-gate int npages; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate ASSERT(callback == pcp->p_callback); 224*0Sstevel@tonic-gate /* free the entry */ 225*0Sstevel@tonic-gate hp->p_qlen--; 226*0Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 227*0Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 228*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 229*0Sstevel@tonic-gate npages = pcp->p_len >> PAGESHIFT; 230*0Sstevel@tonic-gate mutex_enter(&seg_pmem); 231*0Sstevel@tonic-gate seg_plocked -= npages; 232*0Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 233*0Sstevel@tonic-gate seg_plocked_window -= npages; 234*0Sstevel@tonic-gate } 235*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 236*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 237*0Sstevel@tonic-gate goto out; 238*0Sstevel@tonic-gate } 239*0Sstevel@tonic-gate pcp->p_ref = 1; 240*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 241*0Sstevel@tonic-gate return; 242*0Sstevel@tonic-gate } 243*0Sstevel@tonic-gate } 244*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 245*0Sstevel@tonic-gate out: 246*0Sstevel@tonic-gate (void) (*callback)(seg, addr, len, pp, rw); 247*0Sstevel@tonic-gate } 248*0Sstevel@tonic-gate 249*0Sstevel@tonic-gate /* 250*0Sstevel@tonic-gate * The seg_pinsert_check() is used by segment drivers to predict whether 251*0Sstevel@tonic-gate * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing. 252*0Sstevel@tonic-gate */ 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gate int 255*0Sstevel@tonic-gate seg_pinsert_check(struct seg *seg, size_t len, uint_t flags) 256*0Sstevel@tonic-gate { 257*0Sstevel@tonic-gate struct seg_phash *hp; 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate if (seg_plazy == 0) { 260*0Sstevel@tonic-gate return (SEGP_FAIL); 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate if (seg_pdisable != 0) { 263*0Sstevel@tonic-gate return (SEGP_FAIL); 264*0Sstevel@tonic-gate } 265*0Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 266*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 267*0Sstevel@tonic-gate if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 268*0Sstevel@tonic-gate return (SEGP_FAIL); 269*0Sstevel@tonic-gate } 270*0Sstevel@tonic-gate /* 271*0Sstevel@tonic-gate * If the SEGP_FORCE_WIRED flag is set, 272*0Sstevel@tonic-gate * we skip the check for seg_pwindow. 273*0Sstevel@tonic-gate */ 274*0Sstevel@tonic-gate if ((flags & SEGP_FORCE_WIRED) == 0) { 275*0Sstevel@tonic-gate pgcnt_t npages; 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate npages = len >> PAGESHIFT; 278*0Sstevel@tonic-gate if ((seg_plocked_window + npages) > seg_pwindow) { 279*0Sstevel@tonic-gate return (SEGP_FAIL); 280*0Sstevel@tonic-gate } 281*0Sstevel@tonic-gate } 282*0Sstevel@tonic-gate return (SEGP_SUCCESS); 283*0Sstevel@tonic-gate } 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate 286*0Sstevel@tonic-gate /* 287*0Sstevel@tonic-gate * insert address range with shadow list into pagelock cache. If 288*0Sstevel@tonic-gate * the cache is off or caching is temporarily disabled or the allowed 289*0Sstevel@tonic-gate * 'window' is exceeded - return SEGP_FAIL. Otherwise return 290*0Sstevel@tonic-gate * SEGP_SUCCESS. 291*0Sstevel@tonic-gate */ 292*0Sstevel@tonic-gate int 293*0Sstevel@tonic-gate seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 294*0Sstevel@tonic-gate enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t, 295*0Sstevel@tonic-gate size_t, struct page **, enum seg_rw)) 296*0Sstevel@tonic-gate { 297*0Sstevel@tonic-gate struct seg_pcache *pcp; 298*0Sstevel@tonic-gate struct seg_phash *hp; 299*0Sstevel@tonic-gate pgcnt_t npages; 300*0Sstevel@tonic-gate 301*0Sstevel@tonic-gate if (seg_plazy == 0) { 302*0Sstevel@tonic-gate return (SEGP_FAIL); 303*0Sstevel@tonic-gate } 304*0Sstevel@tonic-gate if (seg_pdisable != 0) { 305*0Sstevel@tonic-gate return (SEGP_FAIL); 306*0Sstevel@tonic-gate } 307*0Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 308*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 309*0Sstevel@tonic-gate if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 310*0Sstevel@tonic-gate return (SEGP_FAIL); 311*0Sstevel@tonic-gate } 312*0Sstevel@tonic-gate npages = len >> PAGESHIFT; 313*0Sstevel@tonic-gate mutex_enter(&seg_pmem); 314*0Sstevel@tonic-gate /* 315*0Sstevel@tonic-gate * If the SEGP_FORCE_WIRED flag is set, 316*0Sstevel@tonic-gate * we skip the check for seg_pwindow. 317*0Sstevel@tonic-gate */ 318*0Sstevel@tonic-gate if ((flags & SEGP_FORCE_WIRED) == 0) { 319*0Sstevel@tonic-gate seg_plocked_window += npages; 320*0Sstevel@tonic-gate if (seg_plocked_window > seg_pwindow) { 321*0Sstevel@tonic-gate seg_plocked_window -= npages; 322*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 323*0Sstevel@tonic-gate return (SEGP_FAIL); 324*0Sstevel@tonic-gate } 325*0Sstevel@tonic-gate } 326*0Sstevel@tonic-gate seg_plocked += npages; 327*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP); 330*0Sstevel@tonic-gate pcp->p_seg = seg; 331*0Sstevel@tonic-gate pcp->p_addr = addr; 332*0Sstevel@tonic-gate pcp->p_len = len; 333*0Sstevel@tonic-gate pcp->p_pp = pp; 334*0Sstevel@tonic-gate pcp->p_rw = rw; 335*0Sstevel@tonic-gate pcp->p_callback = callback; 336*0Sstevel@tonic-gate pcp->p_active = 1; 337*0Sstevel@tonic-gate pcp->p_flags = flags; 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n", 340*0Sstevel@tonic-gate (void *)seg, (void *)addr, len, (void *)pp); 341*0Sstevel@tonic-gate 342*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 343*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 344*0Sstevel@tonic-gate hp->p_qlen++; 345*0Sstevel@tonic-gate pcp->p_hnext = hp->p_hnext; 346*0Sstevel@tonic-gate pcp->p_hprev = (struct seg_pcache *)hp; 347*0Sstevel@tonic-gate hp->p_hnext->p_hprev = pcp; 348*0Sstevel@tonic-gate hp->p_hnext = pcp; 349*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 350*0Sstevel@tonic-gate return (SEGP_SUCCESS); 351*0Sstevel@tonic-gate } 352*0Sstevel@tonic-gate 353*0Sstevel@tonic-gate /* 354*0Sstevel@tonic-gate * purge all entries from the pagelock cache if not active 355*0Sstevel@tonic-gate * and not recently used. Drop all locks and call through 356*0Sstevel@tonic-gate * the address space into the segment driver to reclaim 357*0Sstevel@tonic-gate * the pages. This makes sure we get the address space 358*0Sstevel@tonic-gate * and segment driver locking right. 359*0Sstevel@tonic-gate */ 360*0Sstevel@tonic-gate static void 361*0Sstevel@tonic-gate seg_ppurge_all(int force) 362*0Sstevel@tonic-gate { 363*0Sstevel@tonic-gate struct seg_pcache *delcallb_list = NULL; 364*0Sstevel@tonic-gate struct seg_pcache *pcp; 365*0Sstevel@tonic-gate struct seg_phash *hp; 366*0Sstevel@tonic-gate int purge_count = 0; 367*0Sstevel@tonic-gate pgcnt_t npages = 0; 368*0Sstevel@tonic-gate pgcnt_t npages_window = 0; 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gate /* 371*0Sstevel@tonic-gate * if the cache if off or empty, return 372*0Sstevel@tonic-gate */ 373*0Sstevel@tonic-gate if (seg_plazy == 0 || seg_plocked == 0) { 374*0Sstevel@tonic-gate return; 375*0Sstevel@tonic-gate } 376*0Sstevel@tonic-gate for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 377*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 378*0Sstevel@tonic-gate pcp = hp->p_hnext; 379*0Sstevel@tonic-gate 380*0Sstevel@tonic-gate /* 381*0Sstevel@tonic-gate * While 'force' is set, seg_pasync_thread is not 382*0Sstevel@tonic-gate * throttled. This is to speedup flushing of seg_pcache 383*0Sstevel@tonic-gate * in preparation for DR. 384*0Sstevel@tonic-gate * 385*0Sstevel@tonic-gate * In normal case, when 'force' is not set, we throttle 386*0Sstevel@tonic-gate * seg_pasync_thread so that we don't spend all the time 387*0Sstevel@tonic-gate * time in purging the cache. 388*0Sstevel@tonic-gate */ 389*0Sstevel@tonic-gate while ((pcp != (struct seg_pcache *)hp) && 390*0Sstevel@tonic-gate (force || (purge_count <= seg_ppcount))) { 391*0Sstevel@tonic-gate 392*0Sstevel@tonic-gate /* 393*0Sstevel@tonic-gate * purge entries which are not active and 394*0Sstevel@tonic-gate * have not been used recently and 395*0Sstevel@tonic-gate * have the SEGP_ASYNC_FLUSH flag. 396*0Sstevel@tonic-gate * 397*0Sstevel@tonic-gate * In the 'force' case, we ignore the 398*0Sstevel@tonic-gate * SEGP_ASYNC_FLUSH flag. 399*0Sstevel@tonic-gate */ 400*0Sstevel@tonic-gate if (!(pcp->p_flags & SEGP_ASYNC_FLUSH)) 401*0Sstevel@tonic-gate pcp->p_ref = 1; 402*0Sstevel@tonic-gate if (force) 403*0Sstevel@tonic-gate pcp->p_ref = 0; 404*0Sstevel@tonic-gate if (!pcp->p_ref && !pcp->p_active) { 405*0Sstevel@tonic-gate struct as *as = pcp->p_seg->s_as; 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gate /* 408*0Sstevel@tonic-gate * try to get the readers lock on the address 409*0Sstevel@tonic-gate * space before taking out the cache element. 410*0Sstevel@tonic-gate * This ensures as_pagereclaim() can actually 411*0Sstevel@tonic-gate * call through the address space and free 412*0Sstevel@tonic-gate * the pages. If we don't get the lock, just 413*0Sstevel@tonic-gate * skip this entry. The pages will be reclaimed 414*0Sstevel@tonic-gate * by the segment driver at unmap time. 415*0Sstevel@tonic-gate */ 416*0Sstevel@tonic-gate if (AS_LOCK_TRYENTER(as, &as->a_lock, 417*0Sstevel@tonic-gate RW_READER)) { 418*0Sstevel@tonic-gate hp->p_qlen--; 419*0Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 420*0Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 421*0Sstevel@tonic-gate pcp->p_hprev = delcallb_list; 422*0Sstevel@tonic-gate delcallb_list = pcp; 423*0Sstevel@tonic-gate purge_count++; 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate } else { 426*0Sstevel@tonic-gate pcp->p_ref = 0; 427*0Sstevel@tonic-gate } 428*0Sstevel@tonic-gate pcp = pcp->p_hnext; 429*0Sstevel@tonic-gate } 430*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 431*0Sstevel@tonic-gate if (!force && purge_count > seg_ppcount) 432*0Sstevel@tonic-gate break; 433*0Sstevel@tonic-gate } 434*0Sstevel@tonic-gate 435*0Sstevel@tonic-gate /* 436*0Sstevel@tonic-gate * run the delayed callback list. We don't want to hold the 437*0Sstevel@tonic-gate * cache lock during a call through the address space. 438*0Sstevel@tonic-gate */ 439*0Sstevel@tonic-gate while (delcallb_list != NULL) { 440*0Sstevel@tonic-gate struct as *as; 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate pcp = delcallb_list; 443*0Sstevel@tonic-gate delcallb_list = pcp->p_hprev; 444*0Sstevel@tonic-gate as = pcp->p_seg->s_as; 445*0Sstevel@tonic-gate 446*0Sstevel@tonic-gate PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, " 447*0Sstevel@tonic-gate "pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr, 448*0Sstevel@tonic-gate pcp->p_len, (void *)pcp->p_pp); 449*0Sstevel@tonic-gate 450*0Sstevel@tonic-gate as_pagereclaim(as, pcp->p_pp, pcp->p_addr, 451*0Sstevel@tonic-gate pcp->p_len, pcp->p_rw); 452*0Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 453*0Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 454*0Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 455*0Sstevel@tonic-gate npages_window += pcp->p_len >> PAGESHIFT; 456*0Sstevel@tonic-gate } 457*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 458*0Sstevel@tonic-gate } 459*0Sstevel@tonic-gate mutex_enter(&seg_pmem); 460*0Sstevel@tonic-gate seg_plocked -= npages; 461*0Sstevel@tonic-gate seg_plocked_window -= npages_window; 462*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 463*0Sstevel@tonic-gate } 464*0Sstevel@tonic-gate 465*0Sstevel@tonic-gate /* 466*0Sstevel@tonic-gate * Remove cached pages for segment(s) entries from hashtable. 467*0Sstevel@tonic-gate * The segments are identified by a given clients callback 468*0Sstevel@tonic-gate * function. 469*0Sstevel@tonic-gate * This is useful for multiple seg's cached on behalf of 470*0Sstevel@tonic-gate * dummy segment (ISM/DISM) with common callback function. 471*0Sstevel@tonic-gate * The clients callback function may return status indicating 472*0Sstevel@tonic-gate * that the last seg's entry has been purged. In such a case 473*0Sstevel@tonic-gate * the seg_ppurge_seg() stops searching hashtable and exits. 474*0Sstevel@tonic-gate * Otherwise all hashtable entries are scanned. 475*0Sstevel@tonic-gate */ 476*0Sstevel@tonic-gate void 477*0Sstevel@tonic-gate seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t, 478*0Sstevel@tonic-gate struct page **, enum seg_rw)) 479*0Sstevel@tonic-gate { 480*0Sstevel@tonic-gate struct seg_pcache *pcp, *npcp; 481*0Sstevel@tonic-gate struct seg_phash *hp; 482*0Sstevel@tonic-gate pgcnt_t npages = 0; 483*0Sstevel@tonic-gate pgcnt_t npages_window = 0; 484*0Sstevel@tonic-gate int done = 0; 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * if the cache if off or empty, return 488*0Sstevel@tonic-gate */ 489*0Sstevel@tonic-gate if (seg_plazy == 0 || seg_plocked == 0) { 490*0Sstevel@tonic-gate return; 491*0Sstevel@tonic-gate } 492*0Sstevel@tonic-gate mutex_enter(&seg_pcache); 493*0Sstevel@tonic-gate seg_pdisable++; 494*0Sstevel@tonic-gate mutex_exit(&seg_pcache); 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 497*0Sstevel@tonic-gate 498*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 499*0Sstevel@tonic-gate pcp = hp->p_hnext; 500*0Sstevel@tonic-gate while (pcp != (struct seg_pcache *)hp) { 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate /* 503*0Sstevel@tonic-gate * purge entries which are not active 504*0Sstevel@tonic-gate */ 505*0Sstevel@tonic-gate npcp = pcp->p_hnext; 506*0Sstevel@tonic-gate if (!pcp->p_active && pcp->p_callback == callback) { 507*0Sstevel@tonic-gate hp->p_qlen--; 508*0Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 509*0Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 510*0Sstevel@tonic-gate 511*0Sstevel@tonic-gate if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr, 512*0Sstevel@tonic-gate pcp->p_len, pcp->p_pp, pcp->p_rw)) { 513*0Sstevel@tonic-gate done = 1; 514*0Sstevel@tonic-gate } 515*0Sstevel@tonic-gate 516*0Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 517*0Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 518*0Sstevel@tonic-gate npages_window += 519*0Sstevel@tonic-gate pcp->p_len >> PAGESHIFT; 520*0Sstevel@tonic-gate } 521*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 522*0Sstevel@tonic-gate } 523*0Sstevel@tonic-gate pcp = npcp; 524*0Sstevel@tonic-gate if (done) 525*0Sstevel@tonic-gate break; 526*0Sstevel@tonic-gate } 527*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 528*0Sstevel@tonic-gate if (done) 529*0Sstevel@tonic-gate break; 530*0Sstevel@tonic-gate } 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate mutex_enter(&seg_pcache); 533*0Sstevel@tonic-gate seg_pdisable--; 534*0Sstevel@tonic-gate mutex_exit(&seg_pcache); 535*0Sstevel@tonic-gate 536*0Sstevel@tonic-gate mutex_enter(&seg_pmem); 537*0Sstevel@tonic-gate seg_plocked -= npages; 538*0Sstevel@tonic-gate seg_plocked_window -= npages_window; 539*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 540*0Sstevel@tonic-gate } 541*0Sstevel@tonic-gate 542*0Sstevel@tonic-gate /* 543*0Sstevel@tonic-gate * purge all entries for a given segment. Since we 544*0Sstevel@tonic-gate * callback into the segment driver directly for page 545*0Sstevel@tonic-gate * reclaim the caller needs to hold the right locks. 546*0Sstevel@tonic-gate */ 547*0Sstevel@tonic-gate void 548*0Sstevel@tonic-gate seg_ppurge(struct seg *seg) 549*0Sstevel@tonic-gate { 550*0Sstevel@tonic-gate struct seg_pcache *delcallb_list = NULL; 551*0Sstevel@tonic-gate struct seg_pcache *pcp; 552*0Sstevel@tonic-gate struct seg_phash *hp; 553*0Sstevel@tonic-gate pgcnt_t npages = 0; 554*0Sstevel@tonic-gate pgcnt_t npages_window = 0; 555*0Sstevel@tonic-gate 556*0Sstevel@tonic-gate if (seg_plazy == 0) { 557*0Sstevel@tonic-gate return; 558*0Sstevel@tonic-gate } 559*0Sstevel@tonic-gate hp = &p_hashtab[p_hash(seg)]; 560*0Sstevel@tonic-gate mutex_enter(&hp->p_hmutex); 561*0Sstevel@tonic-gate pcp = hp->p_hnext; 562*0Sstevel@tonic-gate while (pcp != (struct seg_pcache *)hp) { 563*0Sstevel@tonic-gate if (pcp->p_seg == seg) { 564*0Sstevel@tonic-gate if (pcp->p_active) { 565*0Sstevel@tonic-gate break; 566*0Sstevel@tonic-gate } 567*0Sstevel@tonic-gate hp->p_qlen--; 568*0Sstevel@tonic-gate pcp->p_hprev->p_hnext = pcp->p_hnext; 569*0Sstevel@tonic-gate pcp->p_hnext->p_hprev = pcp->p_hprev; 570*0Sstevel@tonic-gate pcp->p_hprev = delcallb_list; 571*0Sstevel@tonic-gate delcallb_list = pcp; 572*0Sstevel@tonic-gate } 573*0Sstevel@tonic-gate pcp = pcp->p_hnext; 574*0Sstevel@tonic-gate } 575*0Sstevel@tonic-gate mutex_exit(&hp->p_hmutex); 576*0Sstevel@tonic-gate while (delcallb_list != NULL) { 577*0Sstevel@tonic-gate pcp = delcallb_list; 578*0Sstevel@tonic-gate delcallb_list = pcp->p_hprev; 579*0Sstevel@tonic-gate 580*0Sstevel@tonic-gate PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, " 581*0Sstevel@tonic-gate "pplist %p\n", (void *)seg, (void *)pcp->p_addr, 582*0Sstevel@tonic-gate pcp->p_len, (void *)pcp->p_pp); 583*0Sstevel@tonic-gate 584*0Sstevel@tonic-gate ASSERT(seg == pcp->p_seg); 585*0Sstevel@tonic-gate (void) (*pcp->p_callback)(seg, pcp->p_addr, 586*0Sstevel@tonic-gate pcp->p_len, pcp->p_pp, pcp->p_rw); 587*0Sstevel@tonic-gate npages += pcp->p_len >> PAGESHIFT; 588*0Sstevel@tonic-gate if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 589*0Sstevel@tonic-gate npages_window += pcp->p_len >> PAGESHIFT; 590*0Sstevel@tonic-gate } 591*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct seg_pcache)); 592*0Sstevel@tonic-gate } 593*0Sstevel@tonic-gate mutex_enter(&seg_pmem); 594*0Sstevel@tonic-gate seg_plocked -= npages; 595*0Sstevel@tonic-gate seg_plocked_window -= npages_window; 596*0Sstevel@tonic-gate mutex_exit(&seg_pmem); 597*0Sstevel@tonic-gate } 598*0Sstevel@tonic-gate 599*0Sstevel@tonic-gate static void seg_pinit_mem_config(void); 600*0Sstevel@tonic-gate 601*0Sstevel@tonic-gate /* 602*0Sstevel@tonic-gate * setup the pagelock cache 603*0Sstevel@tonic-gate */ 604*0Sstevel@tonic-gate static void 605*0Sstevel@tonic-gate seg_pinit(void) 606*0Sstevel@tonic-gate { 607*0Sstevel@tonic-gate struct seg_phash *hp; 608*0Sstevel@tonic-gate int i; 609*0Sstevel@tonic-gate uint_t physmegs; 610*0Sstevel@tonic-gate 611*0Sstevel@tonic-gate sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL); 612*0Sstevel@tonic-gate 613*0Sstevel@tonic-gate mutex_enter(&seg_pcache); 614*0Sstevel@tonic-gate if (p_hashtab == NULL) { 615*0Sstevel@tonic-gate physmegs = physmem >> (20 - PAGESHIFT); 616*0Sstevel@tonic-gate 617*0Sstevel@tonic-gate /* If p_hashsize was not set in /etc/system ... */ 618*0Sstevel@tonic-gate if (p_hashsize == 0) { 619*0Sstevel@tonic-gate /* 620*0Sstevel@tonic-gate * Choose p_hashsize based on physmem. 621*0Sstevel@tonic-gate */ 622*0Sstevel@tonic-gate if (physmegs < 64) { 623*0Sstevel@tonic-gate p_hashsize = 64; 624*0Sstevel@tonic-gate } else if (physmegs < 1024) { 625*0Sstevel@tonic-gate p_hashsize = 1024; 626*0Sstevel@tonic-gate } else if (physmegs < 10 * 1024) { 627*0Sstevel@tonic-gate p_hashsize = 8192; 628*0Sstevel@tonic-gate } else if (physmegs < 20 * 1024) { 629*0Sstevel@tonic-gate p_hashsize = 2 * 8192; 630*0Sstevel@tonic-gate seg_pmaxqlen = 16; 631*0Sstevel@tonic-gate } else { 632*0Sstevel@tonic-gate p_hashsize = 128 * 1024; 633*0Sstevel@tonic-gate seg_pmaxqlen = 128; 634*0Sstevel@tonic-gate } 635*0Sstevel@tonic-gate } 636*0Sstevel@tonic-gate 637*0Sstevel@tonic-gate p_hashtab = kmem_zalloc( 638*0Sstevel@tonic-gate p_hashsize * sizeof (struct seg_phash), KM_SLEEP); 639*0Sstevel@tonic-gate for (i = 0; i < p_hashsize; i++) { 640*0Sstevel@tonic-gate hp = (struct seg_phash *)&p_hashtab[i]; 641*0Sstevel@tonic-gate hp->p_hnext = (struct seg_pcache *)hp; 642*0Sstevel@tonic-gate hp->p_hprev = (struct seg_pcache *)hp; 643*0Sstevel@tonic-gate mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL); 644*0Sstevel@tonic-gate } 645*0Sstevel@tonic-gate if (seg_pwindow == 0) { 646*0Sstevel@tonic-gate if (physmegs < 24) { 647*0Sstevel@tonic-gate /* don't use cache */ 648*0Sstevel@tonic-gate seg_plazy = 0; 649*0Sstevel@tonic-gate } else if (physmegs < 64) { 650*0Sstevel@tonic-gate seg_pwindow = physmem >> 5; /* 3% of memory */ 651*0Sstevel@tonic-gate } else if (physmegs < 10 * 1024) { 652*0Sstevel@tonic-gate seg_pwindow = physmem >> 3; /* 12% of memory */ 653*0Sstevel@tonic-gate } else { 654*0Sstevel@tonic-gate seg_pwindow = physmem >> 1; 655*0Sstevel@tonic-gate } 656*0Sstevel@tonic-gate } 657*0Sstevel@tonic-gate } 658*0Sstevel@tonic-gate mutex_exit(&seg_pcache); 659*0Sstevel@tonic-gate 660*0Sstevel@tonic-gate seg_pinit_mem_config(); 661*0Sstevel@tonic-gate } 662*0Sstevel@tonic-gate 663*0Sstevel@tonic-gate /* 664*0Sstevel@tonic-gate * called by pageout if memory is low 665*0Sstevel@tonic-gate */ 666*0Sstevel@tonic-gate void 667*0Sstevel@tonic-gate seg_preap(void) 668*0Sstevel@tonic-gate { 669*0Sstevel@tonic-gate /* 670*0Sstevel@tonic-gate * if the cache if off or empty, return 671*0Sstevel@tonic-gate */ 672*0Sstevel@tonic-gate if (seg_plocked == 0 || seg_plazy == 0) { 673*0Sstevel@tonic-gate return; 674*0Sstevel@tonic-gate } 675*0Sstevel@tonic-gate sema_v(&seg_psaync_sem); 676*0Sstevel@tonic-gate } 677*0Sstevel@tonic-gate 678*0Sstevel@tonic-gate static void seg_pupdate(void *); 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate /* 681*0Sstevel@tonic-gate * run as a backgroud thread and reclaim pagelock 682*0Sstevel@tonic-gate * pages which have not been used recently 683*0Sstevel@tonic-gate */ 684*0Sstevel@tonic-gate void 685*0Sstevel@tonic-gate seg_pasync_thread(void) 686*0Sstevel@tonic-gate { 687*0Sstevel@tonic-gate callb_cpr_t cpr_info; 688*0Sstevel@tonic-gate kmutex_t pasync_lock; /* just for CPR stuff */ 689*0Sstevel@tonic-gate 690*0Sstevel@tonic-gate mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL); 691*0Sstevel@tonic-gate 692*0Sstevel@tonic-gate CALLB_CPR_INIT(&cpr_info, &pasync_lock, 693*0Sstevel@tonic-gate callb_generic_cpr, "seg_pasync"); 694*0Sstevel@tonic-gate 695*0Sstevel@tonic-gate if (seg_preap_interval == 0) { 696*0Sstevel@tonic-gate seg_preap_interval = seg_preap_time * hz; 697*0Sstevel@tonic-gate } else { 698*0Sstevel@tonic-gate seg_preap_interval *= hz; 699*0Sstevel@tonic-gate } 700*0Sstevel@tonic-gate if (seg_plazy && seg_pupdate_active) { 701*0Sstevel@tonic-gate (void) timeout(seg_pupdate, NULL, seg_preap_interval); 702*0Sstevel@tonic-gate } 703*0Sstevel@tonic-gate 704*0Sstevel@tonic-gate for (;;) { 705*0Sstevel@tonic-gate mutex_enter(&pasync_lock); 706*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info); 707*0Sstevel@tonic-gate mutex_exit(&pasync_lock); 708*0Sstevel@tonic-gate sema_p(&seg_psaync_sem); 709*0Sstevel@tonic-gate mutex_enter(&pasync_lock); 710*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock); 711*0Sstevel@tonic-gate mutex_exit(&pasync_lock); 712*0Sstevel@tonic-gate 713*0Sstevel@tonic-gate seg_ppurge_all(0); 714*0Sstevel@tonic-gate } 715*0Sstevel@tonic-gate } 716*0Sstevel@tonic-gate 717*0Sstevel@tonic-gate static void 718*0Sstevel@tonic-gate seg_pupdate(void *dummy) 719*0Sstevel@tonic-gate { 720*0Sstevel@tonic-gate sema_v(&seg_psaync_sem); 721*0Sstevel@tonic-gate 722*0Sstevel@tonic-gate if (seg_plazy && seg_pupdate_active) { 723*0Sstevel@tonic-gate (void) timeout(seg_pupdate, dummy, seg_preap_interval); 724*0Sstevel@tonic-gate } 725*0Sstevel@tonic-gate } 726*0Sstevel@tonic-gate 727*0Sstevel@tonic-gate static struct kmem_cache *seg_cache; 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gate /* 730*0Sstevel@tonic-gate * Initialize segment management data structures. 731*0Sstevel@tonic-gate */ 732*0Sstevel@tonic-gate void 733*0Sstevel@tonic-gate seg_init(void) 734*0Sstevel@tonic-gate { 735*0Sstevel@tonic-gate kstat_t *ksp; 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg), 738*0Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, NULL, 0); 739*0Sstevel@tonic-gate 740*0Sstevel@tonic-gate ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED, 741*0Sstevel@tonic-gate segadvstat_ndata, KSTAT_FLAG_VIRTUAL); 742*0Sstevel@tonic-gate if (ksp) { 743*0Sstevel@tonic-gate ksp->ks_data = (void *)segadvstat_ptr; 744*0Sstevel@tonic-gate kstat_install(ksp); 745*0Sstevel@tonic-gate } 746*0Sstevel@tonic-gate 747*0Sstevel@tonic-gate seg_pinit(); 748*0Sstevel@tonic-gate } 749*0Sstevel@tonic-gate 750*0Sstevel@tonic-gate /* 751*0Sstevel@tonic-gate * Allocate a segment to cover [base, base+size] 752*0Sstevel@tonic-gate * and attach it to the specified address space. 753*0Sstevel@tonic-gate */ 754*0Sstevel@tonic-gate struct seg * 755*0Sstevel@tonic-gate seg_alloc(struct as *as, caddr_t base, size_t size) 756*0Sstevel@tonic-gate { 757*0Sstevel@tonic-gate struct seg *new; 758*0Sstevel@tonic-gate caddr_t segbase; 759*0Sstevel@tonic-gate size_t segsize; 760*0Sstevel@tonic-gate 761*0Sstevel@tonic-gate segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK); 762*0Sstevel@tonic-gate segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) - 763*0Sstevel@tonic-gate (uintptr_t)segbase; 764*0Sstevel@tonic-gate 765*0Sstevel@tonic-gate if (!valid_va_range(&segbase, &segsize, segsize, AH_LO)) 766*0Sstevel@tonic-gate return ((struct seg *)NULL); /* bad virtual addr range */ 767*0Sstevel@tonic-gate 768*0Sstevel@tonic-gate if (as != &kas && 769*0Sstevel@tonic-gate valid_usr_range(segbase, segsize, 0, as, 770*0Sstevel@tonic-gate as->a_userlimit) != RANGE_OKAY) 771*0Sstevel@tonic-gate return ((struct seg *)NULL); /* bad virtual addr range */ 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate new = kmem_cache_alloc(seg_cache, KM_SLEEP); 774*0Sstevel@tonic-gate new->s_ops = NULL; 775*0Sstevel@tonic-gate new->s_data = NULL; 776*0Sstevel@tonic-gate new->s_szc = 0; 777*0Sstevel@tonic-gate new->s_flags = 0; 778*0Sstevel@tonic-gate if (seg_attach(as, segbase, segsize, new) < 0) { 779*0Sstevel@tonic-gate kmem_cache_free(seg_cache, new); 780*0Sstevel@tonic-gate return ((struct seg *)NULL); 781*0Sstevel@tonic-gate } 782*0Sstevel@tonic-gate /* caller must fill in ops, data */ 783*0Sstevel@tonic-gate return (new); 784*0Sstevel@tonic-gate } 785*0Sstevel@tonic-gate 786*0Sstevel@tonic-gate /* 787*0Sstevel@tonic-gate * Attach a segment to the address space. Used by seg_alloc() 788*0Sstevel@tonic-gate * and for kernel startup to attach to static segments. 789*0Sstevel@tonic-gate */ 790*0Sstevel@tonic-gate int 791*0Sstevel@tonic-gate seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg) 792*0Sstevel@tonic-gate { 793*0Sstevel@tonic-gate seg->s_as = as; 794*0Sstevel@tonic-gate seg->s_base = base; 795*0Sstevel@tonic-gate seg->s_size = size; 796*0Sstevel@tonic-gate 797*0Sstevel@tonic-gate /* 798*0Sstevel@tonic-gate * as_addseg() will add the segment at the appropraite point 799*0Sstevel@tonic-gate * in the list. It will return -1 if there is overlap with 800*0Sstevel@tonic-gate * an already existing segment. 801*0Sstevel@tonic-gate */ 802*0Sstevel@tonic-gate return (as_addseg(as, seg)); 803*0Sstevel@tonic-gate } 804*0Sstevel@tonic-gate 805*0Sstevel@tonic-gate /* 806*0Sstevel@tonic-gate * Unmap a segment and free it from its associated address space. 807*0Sstevel@tonic-gate * This should be called by anybody who's finished with a whole segment's 808*0Sstevel@tonic-gate * mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the 809*0Sstevel@tonic-gate * responsibility of the segment driver to unlink the the segment 810*0Sstevel@tonic-gate * from the address space, and to free public and private data structures 811*0Sstevel@tonic-gate * associated with the segment. (This is typically done by a call to 812*0Sstevel@tonic-gate * seg_free()). 813*0Sstevel@tonic-gate */ 814*0Sstevel@tonic-gate void 815*0Sstevel@tonic-gate seg_unmap(struct seg *seg) 816*0Sstevel@tonic-gate { 817*0Sstevel@tonic-gate #ifdef DEBUG 818*0Sstevel@tonic-gate int ret; 819*0Sstevel@tonic-gate #endif /* DEBUG */ 820*0Sstevel@tonic-gate 821*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 822*0Sstevel@tonic-gate 823*0Sstevel@tonic-gate /* Shouldn't have called seg_unmap if mapping isn't yet established */ 824*0Sstevel@tonic-gate ASSERT(seg->s_data != NULL); 825*0Sstevel@tonic-gate 826*0Sstevel@tonic-gate /* Unmap the whole mapping */ 827*0Sstevel@tonic-gate #ifdef DEBUG 828*0Sstevel@tonic-gate ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 829*0Sstevel@tonic-gate ASSERT(ret == 0); 830*0Sstevel@tonic-gate #else 831*0Sstevel@tonic-gate SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 832*0Sstevel@tonic-gate #endif /* DEBUG */ 833*0Sstevel@tonic-gate } 834*0Sstevel@tonic-gate 835*0Sstevel@tonic-gate /* 836*0Sstevel@tonic-gate * Free the segment from its associated as. This should only be called 837*0Sstevel@tonic-gate * if a mapping to the segment has not yet been established (e.g., if 838*0Sstevel@tonic-gate * an error occurs in the middle of doing an as_map when the segment 839*0Sstevel@tonic-gate * has already been partially set up) or if it has already been deleted 840*0Sstevel@tonic-gate * (e.g., from a segment driver unmap routine if the unmap applies to the 841*0Sstevel@tonic-gate * entire segment). If the mapping is currently set up then seg_unmap() should 842*0Sstevel@tonic-gate * be called instead. 843*0Sstevel@tonic-gate */ 844*0Sstevel@tonic-gate void 845*0Sstevel@tonic-gate seg_free(struct seg *seg) 846*0Sstevel@tonic-gate { 847*0Sstevel@tonic-gate register struct as *as = seg->s_as; 848*0Sstevel@tonic-gate struct seg *tseg = as_removeseg(as, seg); 849*0Sstevel@tonic-gate 850*0Sstevel@tonic-gate ASSERT(tseg == seg); 851*0Sstevel@tonic-gate 852*0Sstevel@tonic-gate /* 853*0Sstevel@tonic-gate * If the segment private data field is NULL, 854*0Sstevel@tonic-gate * then segment driver is not attached yet. 855*0Sstevel@tonic-gate */ 856*0Sstevel@tonic-gate if (seg->s_data != NULL) 857*0Sstevel@tonic-gate SEGOP_FREE(seg); 858*0Sstevel@tonic-gate 859*0Sstevel@tonic-gate kmem_cache_free(seg_cache, seg); 860*0Sstevel@tonic-gate } 861*0Sstevel@tonic-gate 862*0Sstevel@tonic-gate /*ARGSUSED*/ 863*0Sstevel@tonic-gate static void 864*0Sstevel@tonic-gate seg_p_mem_config_post_add( 865*0Sstevel@tonic-gate void *arg, 866*0Sstevel@tonic-gate pgcnt_t delta_pages) 867*0Sstevel@tonic-gate { 868*0Sstevel@tonic-gate /* Nothing to do. */ 869*0Sstevel@tonic-gate } 870*0Sstevel@tonic-gate 871*0Sstevel@tonic-gate /* 872*0Sstevel@tonic-gate * Attempt to purge seg_pcache. May need to return before this has 873*0Sstevel@tonic-gate * completed to allow other pre_del callbacks to unlock pages. This is 874*0Sstevel@tonic-gate * ok because: 875*0Sstevel@tonic-gate * 1) The seg_pdisable flag has been set so at least we won't 876*0Sstevel@tonic-gate * cache anymore locks and the locks we couldn't purge 877*0Sstevel@tonic-gate * will not be held if they do get released by a subsequent 878*0Sstevel@tonic-gate * pre-delete callback. 879*0Sstevel@tonic-gate * 880*0Sstevel@tonic-gate * 2) The rest of the memory delete thread processing does not 881*0Sstevel@tonic-gate * depend on the changes made in this pre-delete callback. No 882*0Sstevel@tonic-gate * panics will result, the worst that will happen is that the 883*0Sstevel@tonic-gate * DR code will timeout and cancel the delete. 884*0Sstevel@tonic-gate */ 885*0Sstevel@tonic-gate /*ARGSUSED*/ 886*0Sstevel@tonic-gate static int 887*0Sstevel@tonic-gate seg_p_mem_config_pre_del( 888*0Sstevel@tonic-gate void *arg, 889*0Sstevel@tonic-gate pgcnt_t delta_pages) 890*0Sstevel@tonic-gate { 891*0Sstevel@tonic-gate pgcnt_t old_plocked; 892*0Sstevel@tonic-gate int stall_count = 0; 893*0Sstevel@tonic-gate 894*0Sstevel@tonic-gate mutex_enter(&seg_pcache); 895*0Sstevel@tonic-gate seg_pdisable++; 896*0Sstevel@tonic-gate ASSERT(seg_pdisable != 0); 897*0Sstevel@tonic-gate mutex_exit(&seg_pcache); 898*0Sstevel@tonic-gate 899*0Sstevel@tonic-gate /* 900*0Sstevel@tonic-gate * Attempt to empty the cache. Terminate if seg_plocked does not 901*0Sstevel@tonic-gate * diminish with SEGP_STALL_THRESHOLD consecutive attempts. 902*0Sstevel@tonic-gate */ 903*0Sstevel@tonic-gate while (seg_plocked != 0) { 904*0Sstevel@tonic-gate old_plocked = seg_plocked; 905*0Sstevel@tonic-gate seg_ppurge_all(1); 906*0Sstevel@tonic-gate if (seg_plocked == old_plocked) { 907*0Sstevel@tonic-gate if (stall_count++ > SEGP_STALL_THRESHOLD) { 908*0Sstevel@tonic-gate cmn_err(CE_NOTE, "!Pre-delete couldn't purge" 909*0Sstevel@tonic-gate " pagelock cache - continuing"); 910*0Sstevel@tonic-gate break; 911*0Sstevel@tonic-gate } 912*0Sstevel@tonic-gate } else 913*0Sstevel@tonic-gate stall_count = 0; 914*0Sstevel@tonic-gate if (seg_plocked != 0) 915*0Sstevel@tonic-gate delay(hz/SEGP_PREDEL_DELAY_FACTOR); 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate return (0); 918*0Sstevel@tonic-gate } 919*0Sstevel@tonic-gate 920*0Sstevel@tonic-gate /*ARGSUSED*/ 921*0Sstevel@tonic-gate static void 922*0Sstevel@tonic-gate seg_p_mem_config_post_del( 923*0Sstevel@tonic-gate void *arg, 924*0Sstevel@tonic-gate pgcnt_t delta_pages, 925*0Sstevel@tonic-gate int cancelled) 926*0Sstevel@tonic-gate { 927*0Sstevel@tonic-gate mutex_enter(&seg_pcache); 928*0Sstevel@tonic-gate ASSERT(seg_pdisable != 0); 929*0Sstevel@tonic-gate seg_pdisable--; 930*0Sstevel@tonic-gate mutex_exit(&seg_pcache); 931*0Sstevel@tonic-gate } 932*0Sstevel@tonic-gate 933*0Sstevel@tonic-gate static kphysm_setup_vector_t seg_p_mem_config_vec = { 934*0Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 935*0Sstevel@tonic-gate seg_p_mem_config_post_add, 936*0Sstevel@tonic-gate seg_p_mem_config_pre_del, 937*0Sstevel@tonic-gate seg_p_mem_config_post_del, 938*0Sstevel@tonic-gate }; 939*0Sstevel@tonic-gate 940*0Sstevel@tonic-gate static void 941*0Sstevel@tonic-gate seg_pinit_mem_config(void) 942*0Sstevel@tonic-gate { 943*0Sstevel@tonic-gate int ret; 944*0Sstevel@tonic-gate 945*0Sstevel@tonic-gate ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL); 946*0Sstevel@tonic-gate /* 947*0Sstevel@tonic-gate * Want to catch this in the debug kernel. At run time, if the 948*0Sstevel@tonic-gate * callbacks don't get run all will be OK as the disable just makes 949*0Sstevel@tonic-gate * it more likely that the pages can be collected. 950*0Sstevel@tonic-gate */ 951*0Sstevel@tonic-gate ASSERT(ret == 0); 952*0Sstevel@tonic-gate } 953