10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51498Sbs21162 * Common Development and Distribution License (the "License"). 61498Sbs21162 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 2212908SPavel.Tatashin@Sun.COM * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #include <sys/param.h> 260Sstevel@tonic-gate #include <sys/user.h> 270Sstevel@tonic-gate #include <sys/mman.h> 280Sstevel@tonic-gate #include <sys/kmem.h> 290Sstevel@tonic-gate #include <sys/sysmacros.h> 300Sstevel@tonic-gate #include <sys/cmn_err.h> 310Sstevel@tonic-gate #include <sys/systm.h> 320Sstevel@tonic-gate #include <sys/tuneable.h> 330Sstevel@tonic-gate #include <vm/hat.h> 340Sstevel@tonic-gate #include <vm/seg.h> 350Sstevel@tonic-gate #include <vm/as.h> 360Sstevel@tonic-gate #include <vm/anon.h> 370Sstevel@tonic-gate #include <vm/page.h> 380Sstevel@tonic-gate #include <sys/buf.h> 390Sstevel@tonic-gate #include <sys/swap.h> 400Sstevel@tonic-gate #include <sys/atomic.h> 410Sstevel@tonic-gate #include <vm/seg_spt.h> 420Sstevel@tonic-gate #include <sys/debug.h> 430Sstevel@tonic-gate #include <sys/vtrace.h> 440Sstevel@tonic-gate #include <sys/shm.h> 452768Ssl108498 #include <sys/shm_impl.h> 460Sstevel@tonic-gate #include <sys/lgrp.h> 470Sstevel@tonic-gate #include <sys/vmsystm.h> 482768Ssl108498 #include <sys/policy.h> 492768Ssl108498 #include <sys/project.h> 500Sstevel@tonic-gate #include <sys/tnf_probe.h> 512768Ssl108498 #include <sys/zone.h> 520Sstevel@tonic-gate 530Sstevel@tonic-gate #define SEGSPTADDR (caddr_t)0x0 540Sstevel@tonic-gate 550Sstevel@tonic-gate /* 560Sstevel@tonic-gate * # pages used for spt 570Sstevel@tonic-gate */ 583480Sjfrank size_t spt_used; 590Sstevel@tonic-gate 600Sstevel@tonic-gate /* 610Sstevel@tonic-gate * segspt_minfree is the memory left for system after ISM 620Sstevel@tonic-gate * locked its pages; it is set up to 5% of availrmem in 630Sstevel@tonic-gate * sptcreate when ISM is created. ISM should not use more 640Sstevel@tonic-gate * than ~90% of availrmem; if it does, then the performance 650Sstevel@tonic-gate * of the system may decrease. Machines with large memories may 660Sstevel@tonic-gate * be able to use up more memory for ISM so we set the default 670Sstevel@tonic-gate * segspt_minfree to 5% (which gives ISM max 95% of availrmem. 680Sstevel@tonic-gate * If somebody wants even more memory for ISM (risking hanging 690Sstevel@tonic-gate * the system) they can patch the segspt_minfree to smaller number. 700Sstevel@tonic-gate */ 710Sstevel@tonic-gate pgcnt_t segspt_minfree = 0; 720Sstevel@tonic-gate 730Sstevel@tonic-gate static int segspt_create(struct seg *seg, caddr_t argsp); 740Sstevel@tonic-gate static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize); 750Sstevel@tonic-gate static void segspt_free(struct seg *seg); 760Sstevel@tonic-gate static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len); 770Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr); 780Sstevel@tonic-gate 790Sstevel@tonic-gate static void 800Sstevel@tonic-gate segspt_badop() 810Sstevel@tonic-gate { 820Sstevel@tonic-gate panic("segspt_badop called"); 830Sstevel@tonic-gate /*NOTREACHED*/ 840Sstevel@tonic-gate } 850Sstevel@tonic-gate 860Sstevel@tonic-gate #define SEGSPT_BADOP(t) (t(*)())segspt_badop 870Sstevel@tonic-gate 880Sstevel@tonic-gate struct seg_ops segspt_ops = { 890Sstevel@tonic-gate SEGSPT_BADOP(int), /* dup */ 900Sstevel@tonic-gate segspt_unmap, 910Sstevel@tonic-gate segspt_free, 920Sstevel@tonic-gate SEGSPT_BADOP(int), /* fault */ 930Sstevel@tonic-gate SEGSPT_BADOP(faultcode_t), /* faulta */ 940Sstevel@tonic-gate SEGSPT_BADOP(int), /* setprot */ 950Sstevel@tonic-gate SEGSPT_BADOP(int), /* checkprot */ 960Sstevel@tonic-gate SEGSPT_BADOP(int), /* kluster */ 970Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* swapout */ 980Sstevel@tonic-gate SEGSPT_BADOP(int), /* sync */ 990Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* incore */ 1000Sstevel@tonic-gate SEGSPT_BADOP(int), /* lockop */ 1010Sstevel@tonic-gate SEGSPT_BADOP(int), /* getprot */ 1020Sstevel@tonic-gate SEGSPT_BADOP(u_offset_t), /* getoffset */ 1030Sstevel@tonic-gate SEGSPT_BADOP(int), /* gettype */ 1040Sstevel@tonic-gate SEGSPT_BADOP(int), /* getvp */ 1050Sstevel@tonic-gate SEGSPT_BADOP(int), /* advise */ 1060Sstevel@tonic-gate SEGSPT_BADOP(void), /* dump */ 1070Sstevel@tonic-gate SEGSPT_BADOP(int), /* pagelock */ 1080Sstevel@tonic-gate SEGSPT_BADOP(int), /* setpgsz */ 1090Sstevel@tonic-gate SEGSPT_BADOP(int), /* getmemid */ 1100Sstevel@tonic-gate segspt_getpolicy, /* getpolicy */ 111670Selowe SEGSPT_BADOP(int), /* capable */ 1120Sstevel@tonic-gate }; 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate static int segspt_shmdup(struct seg *seg, struct seg *newseg); 1150Sstevel@tonic-gate static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize); 1160Sstevel@tonic-gate static void segspt_shmfree(struct seg *seg); 1170Sstevel@tonic-gate static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg, 1180Sstevel@tonic-gate caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw); 1190Sstevel@tonic-gate static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr); 1200Sstevel@tonic-gate static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr, 1210Sstevel@tonic-gate register size_t len, register uint_t prot); 1220Sstevel@tonic-gate static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, 1230Sstevel@tonic-gate uint_t prot); 1240Sstevel@tonic-gate static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta); 1250Sstevel@tonic-gate static size_t segspt_shmswapout(struct seg *seg); 1260Sstevel@tonic-gate static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, 1270Sstevel@tonic-gate register char *vec); 1280Sstevel@tonic-gate static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len, 1290Sstevel@tonic-gate int attr, uint_t flags); 1300Sstevel@tonic-gate static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 1310Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos); 1320Sstevel@tonic-gate static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, 1330Sstevel@tonic-gate uint_t *protv); 1340Sstevel@tonic-gate static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr); 1350Sstevel@tonic-gate static int segspt_shmgettype(struct seg *seg, caddr_t addr); 1360Sstevel@tonic-gate static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 1370Sstevel@tonic-gate static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, 1380Sstevel@tonic-gate uint_t behav); 1390Sstevel@tonic-gate static void segspt_shmdump(struct seg *seg); 1400Sstevel@tonic-gate static int segspt_shmpagelock(struct seg *, caddr_t, size_t, 1410Sstevel@tonic-gate struct page ***, enum lock_type, enum seg_rw); 1420Sstevel@tonic-gate static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t); 1430Sstevel@tonic-gate static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *); 1440Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t); 145670Selowe static int segspt_shmcapable(struct seg *, segcapability_t); 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate struct seg_ops segspt_shmops = { 1480Sstevel@tonic-gate segspt_shmdup, 1490Sstevel@tonic-gate segspt_shmunmap, 1500Sstevel@tonic-gate segspt_shmfree, 1510Sstevel@tonic-gate segspt_shmfault, 1520Sstevel@tonic-gate segspt_shmfaulta, 1530Sstevel@tonic-gate segspt_shmsetprot, 1540Sstevel@tonic-gate segspt_shmcheckprot, 1550Sstevel@tonic-gate segspt_shmkluster, 1560Sstevel@tonic-gate segspt_shmswapout, 1570Sstevel@tonic-gate segspt_shmsync, 1580Sstevel@tonic-gate segspt_shmincore, 1590Sstevel@tonic-gate segspt_shmlockop, 1600Sstevel@tonic-gate segspt_shmgetprot, 1610Sstevel@tonic-gate segspt_shmgetoffset, 1620Sstevel@tonic-gate segspt_shmgettype, 1630Sstevel@tonic-gate segspt_shmgetvp, 1640Sstevel@tonic-gate segspt_shmadvise, /* advise */ 1650Sstevel@tonic-gate segspt_shmdump, 1660Sstevel@tonic-gate segspt_shmpagelock, 1670Sstevel@tonic-gate segspt_shmsetpgsz, 1680Sstevel@tonic-gate segspt_shmgetmemid, 1690Sstevel@tonic-gate segspt_shmgetpolicy, 170670Selowe segspt_shmcapable, 1710Sstevel@tonic-gate }; 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate static void segspt_purge(struct seg *seg); 1746695Saguzovsk static int segspt_reclaim(void *, caddr_t, size_t, struct page **, 1756695Saguzovsk enum seg_rw, int); 1760Sstevel@tonic-gate static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, 1770Sstevel@tonic-gate page_t **ppa); 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /*ARGSUSED*/ 1820Sstevel@tonic-gate int 1830Sstevel@tonic-gate sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, 1842768Ssl108498 uint_t prot, uint_t flags, uint_t share_szc) 1850Sstevel@tonic-gate { 1860Sstevel@tonic-gate int err; 1870Sstevel@tonic-gate struct as *newas; 1880Sstevel@tonic-gate struct segspt_crargs sptcargs; 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate #ifdef DEBUG 1910Sstevel@tonic-gate TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, 1922768Ssl108498 tnf_ulong, size, size ); 1930Sstevel@tonic-gate #endif 1940Sstevel@tonic-gate if (segspt_minfree == 0) /* leave min 5% of availrmem for */ 1950Sstevel@tonic-gate segspt_minfree = availrmem/20; /* for the system */ 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate if (!hat_supported(HAT_SHARED_PT, (void *)0)) 1980Sstevel@tonic-gate return (EINVAL); 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * get a new as for this shared memory segment 2020Sstevel@tonic-gate */ 2030Sstevel@tonic-gate newas = as_alloc(); 2042768Ssl108498 newas->a_proc = NULL; 2050Sstevel@tonic-gate sptcargs.amp = amp; 2060Sstevel@tonic-gate sptcargs.prot = prot; 2070Sstevel@tonic-gate sptcargs.flags = flags; 2080Sstevel@tonic-gate sptcargs.szc = share_szc; 2090Sstevel@tonic-gate /* 2100Sstevel@tonic-gate * create a shared page table (spt) segment 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) { 2140Sstevel@tonic-gate as_free(newas); 2150Sstevel@tonic-gate return (err); 2160Sstevel@tonic-gate } 2170Sstevel@tonic-gate *sptseg = sptcargs.seg_spt; 2180Sstevel@tonic-gate return (0); 2190Sstevel@tonic-gate } 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate void 2220Sstevel@tonic-gate sptdestroy(struct as *as, struct anon_map *amp) 2230Sstevel@tonic-gate { 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate #ifdef DEBUG 2260Sstevel@tonic-gate TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */); 2270Sstevel@tonic-gate #endif 2280Sstevel@tonic-gate (void) as_unmap(as, SEGSPTADDR, amp->size); 2290Sstevel@tonic-gate as_free(as); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * called from seg_free(). 2340Sstevel@tonic-gate * free (i.e., unlock, unmap, return to free list) 2350Sstevel@tonic-gate * all the pages in the given seg. 2360Sstevel@tonic-gate */ 2370Sstevel@tonic-gate void 2380Sstevel@tonic-gate segspt_free(struct seg *seg) 2390Sstevel@tonic-gate { 2400Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate if (sptd != NULL) { 2450Sstevel@tonic-gate if (sptd->spt_realsize) 2460Sstevel@tonic-gate segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); 2470Sstevel@tonic-gate 2482768Ssl108498 if (sptd->spt_ppa_lckcnt) 2492768Ssl108498 kmem_free(sptd->spt_ppa_lckcnt, 2502768Ssl108498 sizeof (*sptd->spt_ppa_lckcnt) 2512768Ssl108498 * btopr(sptd->spt_amp->size)); 2520Sstevel@tonic-gate kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); 2535224Smec cv_destroy(&sptd->spt_cv); 2540Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 2550Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 2560Sstevel@tonic-gate } 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate /*ARGSUSED*/ 2600Sstevel@tonic-gate static int 2610Sstevel@tonic-gate segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr, 2620Sstevel@tonic-gate uint_t flags) 2630Sstevel@tonic-gate { 2640Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate return (0); 2670Sstevel@tonic-gate } 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate /*ARGSUSED*/ 2700Sstevel@tonic-gate static size_t 2710Sstevel@tonic-gate segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec) 2720Sstevel@tonic-gate { 2730Sstevel@tonic-gate caddr_t eo_seg; 2740Sstevel@tonic-gate pgcnt_t npages; 2750Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2760Sstevel@tonic-gate struct seg *sptseg; 2770Sstevel@tonic-gate struct spt_data *sptd; 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2800Sstevel@tonic-gate #ifdef lint 2810Sstevel@tonic-gate seg = seg; 2820Sstevel@tonic-gate #endif 2830Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 2840Sstevel@tonic-gate sptd = sptseg->s_data; 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 2870Sstevel@tonic-gate eo_seg = addr + len; 2880Sstevel@tonic-gate while (addr < eo_seg) { 2890Sstevel@tonic-gate /* page exists, and it's locked. */ 2900Sstevel@tonic-gate *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED | 2915224Smec SEG_PAGE_ANON; 2920Sstevel@tonic-gate addr += PAGESIZE; 2930Sstevel@tonic-gate } 2940Sstevel@tonic-gate return (len); 2950Sstevel@tonic-gate } else { 2960Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 2970Sstevel@tonic-gate struct anon *ap; 2980Sstevel@tonic-gate page_t *pp; 2990Sstevel@tonic-gate pgcnt_t anon_index; 3000Sstevel@tonic-gate struct vnode *vp; 3010Sstevel@tonic-gate u_offset_t off; 3020Sstevel@tonic-gate ulong_t i; 3030Sstevel@tonic-gate int ret; 3040Sstevel@tonic-gate anon_sync_obj_t cookie; 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3070Sstevel@tonic-gate anon_index = seg_page(seg, addr); 3080Sstevel@tonic-gate npages = btopr(len); 3090Sstevel@tonic-gate if (anon_index + npages > btopr(shmd->shm_amp->size)) { 3100Sstevel@tonic-gate return (EINVAL); 3110Sstevel@tonic-gate } 3120Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 3130Sstevel@tonic-gate for (i = 0; i < npages; i++, anon_index++) { 3140Sstevel@tonic-gate ret = 0; 3150Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 3160Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 3170Sstevel@tonic-gate if (ap != NULL) { 3180Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 3190Sstevel@tonic-gate anon_array_exit(&cookie); 3200Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 3210Sstevel@tonic-gate if (pp != NULL) { 3220Sstevel@tonic-gate ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON; 3230Sstevel@tonic-gate page_unlock(pp); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate } else { 3260Sstevel@tonic-gate anon_array_exit(&cookie); 3270Sstevel@tonic-gate } 3280Sstevel@tonic-gate if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 3290Sstevel@tonic-gate ret |= SEG_PAGE_LOCKED; 3300Sstevel@tonic-gate } 3310Sstevel@tonic-gate *vec++ = (char)ret; 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3340Sstevel@tonic-gate return (len); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate } 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate static int 3390Sstevel@tonic-gate segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize) 3400Sstevel@tonic-gate { 3410Sstevel@tonic-gate size_t share_size; 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate /* 3460Sstevel@tonic-gate * seg.s_size may have been rounded up to the largest page size 3470Sstevel@tonic-gate * in shmat(). 3480Sstevel@tonic-gate * XXX This should be cleanedup. sptdestroy should take a length 3490Sstevel@tonic-gate * argument which should be the same as sptcreate. Then 3500Sstevel@tonic-gate * this rounding would not be needed (or is done in shm.c) 3510Sstevel@tonic-gate * Only the check for full segment will be needed. 3520Sstevel@tonic-gate * 3530Sstevel@tonic-gate * XXX -- shouldn't raddr == 0 always? These tests don't seem 3540Sstevel@tonic-gate * to be useful at all. 3550Sstevel@tonic-gate */ 3560Sstevel@tonic-gate share_size = page_get_pagesize(seg->s_szc); 3570Sstevel@tonic-gate ssize = P2ROUNDUP(ssize, share_size); 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate if (raddr == seg->s_base && ssize == seg->s_size) { 3600Sstevel@tonic-gate seg_free(seg); 3610Sstevel@tonic-gate return (0); 3620Sstevel@tonic-gate } else 3630Sstevel@tonic-gate return (EINVAL); 3640Sstevel@tonic-gate } 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate int 3670Sstevel@tonic-gate segspt_create(struct seg *seg, caddr_t argsp) 3680Sstevel@tonic-gate { 3690Sstevel@tonic-gate int err; 3700Sstevel@tonic-gate caddr_t addr = seg->s_base; 3710Sstevel@tonic-gate struct spt_data *sptd; 3720Sstevel@tonic-gate struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; 3730Sstevel@tonic-gate struct anon_map *amp = sptcargs->amp; 3742768Ssl108498 struct kshmid *sp = amp->a_sp; 3750Sstevel@tonic-gate struct cred *cred = CRED(); 3760Sstevel@tonic-gate ulong_t i, j, anon_index = 0; 3770Sstevel@tonic-gate pgcnt_t npages = btopr(amp->size); 3780Sstevel@tonic-gate struct vnode *vp; 3790Sstevel@tonic-gate page_t **ppa; 3800Sstevel@tonic-gate uint_t hat_flags; 3812414Saguzovsk size_t pgsz; 3822414Saguzovsk pgcnt_t pgcnt; 3832414Saguzovsk caddr_t a; 3842414Saguzovsk pgcnt_t pidx; 3852414Saguzovsk size_t sz; 3862768Ssl108498 proc_t *procp = curproc; 3872768Ssl108498 rctl_qty_t lockedbytes = 0; 3882768Ssl108498 kproject_t *proj; 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate /* 3910Sstevel@tonic-gate * We are holding the a_lock on the underlying dummy as, 3920Sstevel@tonic-gate * so we can make calls to the HAT layer. 3930Sstevel@tonic-gate */ 3940Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3952768Ssl108498 ASSERT(sp != NULL); 3960Sstevel@tonic-gate 3970Sstevel@tonic-gate #ifdef DEBUG 3980Sstevel@tonic-gate TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, 3995224Smec tnf_opaque, addr, addr, tnf_ulong, len, seg->s_size); 4000Sstevel@tonic-gate #endif 4010Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 40213035SOndrej.Kubecka@Sun.COM if (err = anon_swap_adjust(npages)) 4030Sstevel@tonic-gate return (err); 4040Sstevel@tonic-gate } 4050Sstevel@tonic-gate err = ENOMEM; 4060Sstevel@tonic-gate 4070Sstevel@tonic-gate if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL) 4080Sstevel@tonic-gate goto out1; 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 4110Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages), 4120Sstevel@tonic-gate KM_NOSLEEP)) == NULL) 4130Sstevel@tonic-gate goto out2; 4140Sstevel@tonic-gate } 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL); 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL) 4190Sstevel@tonic-gate goto out3; 4200Sstevel@tonic-gate 4210Sstevel@tonic-gate seg->s_ops = &segspt_ops; 4220Sstevel@tonic-gate sptd->spt_vp = vp; 4230Sstevel@tonic-gate sptd->spt_amp = amp; 4240Sstevel@tonic-gate sptd->spt_prot = sptcargs->prot; 4250Sstevel@tonic-gate sptd->spt_flags = sptcargs->flags; 4260Sstevel@tonic-gate seg->s_data = (caddr_t)sptd; 4270Sstevel@tonic-gate sptd->spt_ppa = NULL; 4280Sstevel@tonic-gate sptd->spt_ppa_lckcnt = NULL; 4290Sstevel@tonic-gate seg->s_szc = sptcargs->szc; 4305224Smec cv_init(&sptd->spt_cv, NULL, CV_DEFAULT, NULL); 4315224Smec sptd->spt_gen = 0; 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 4342414Saguzovsk if (seg->s_szc > amp->a_szc) { 4352414Saguzovsk amp->a_szc = seg->s_szc; 4362414Saguzovsk } 4370Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate /* 4400Sstevel@tonic-gate * Set policy to affect initial allocation of pages in 4410Sstevel@tonic-gate * anon_map_createpages() 4420Sstevel@tonic-gate */ 4430Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index, 4440Sstevel@tonic-gate NULL, 0, ptob(npages)); 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate if (sptcargs->flags & SHM_PAGEABLE) { 4470Sstevel@tonic-gate size_t share_sz; 4480Sstevel@tonic-gate pgcnt_t new_npgs, more_pgs; 4490Sstevel@tonic-gate struct anon_hdr *nahp; 4503379Ssl108498 zone_t *zone; 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate share_sz = page_get_pagesize(seg->s_szc); 4530Sstevel@tonic-gate if (!IS_P2ALIGNED(amp->size, share_sz)) { 4540Sstevel@tonic-gate /* 4550Sstevel@tonic-gate * We are rounding up the size of the anon array 4560Sstevel@tonic-gate * on 4 M boundary because we always create 4 M 4570Sstevel@tonic-gate * of page(s) when locking, faulting pages and we 4580Sstevel@tonic-gate * don't have to check for all corner cases e.g. 4590Sstevel@tonic-gate * if there is enough space to allocate 4 M 4600Sstevel@tonic-gate * page. 4610Sstevel@tonic-gate */ 4620Sstevel@tonic-gate new_npgs = btop(P2ROUNDUP(amp->size, share_sz)); 4630Sstevel@tonic-gate more_pgs = new_npgs - npages; 4640Sstevel@tonic-gate 4653379Ssl108498 /* 4663458Ssl108498 * The zone will never be NULL, as a fully created 4673458Ssl108498 * shm always has an owning zone. 4683379Ssl108498 */ 469*13096SJordan.Vaughan@Sun.com zone = sp->shm_perm.ipc_zone_ref.zref_zone; 4703458Ssl108498 ASSERT(zone != NULL); 4713379Ssl108498 if (anon_resv_zone(ptob(more_pgs), zone) == 0) { 4720Sstevel@tonic-gate err = ENOMEM; 4730Sstevel@tonic-gate goto out4; 4740Sstevel@tonic-gate } 4753379Ssl108498 4760Sstevel@tonic-gate nahp = anon_create(new_npgs, ANON_SLEEP); 4770Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 4780Sstevel@tonic-gate (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages, 4790Sstevel@tonic-gate ANON_SLEEP); 4800Sstevel@tonic-gate anon_release(amp->ahp, npages); 4810Sstevel@tonic-gate amp->ahp = nahp; 4823379Ssl108498 ASSERT(amp->swresv == ptob(npages)); 4830Sstevel@tonic-gate amp->swresv = amp->size = ptob(new_npgs); 4840Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 4850Sstevel@tonic-gate npages = new_npgs; 4860Sstevel@tonic-gate } 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate sptd->spt_ppa_lckcnt = kmem_zalloc(npages * 4890Sstevel@tonic-gate sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP); 4900Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 4910Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 4920Sstevel@tonic-gate sptcargs->seg_spt = seg; 4930Sstevel@tonic-gate return (0); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate /* 4970Sstevel@tonic-gate * get array of pages for each anon slot in amp 4980Sstevel@tonic-gate */ 4990Sstevel@tonic-gate if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa, 5000Sstevel@tonic-gate seg, addr, S_CREATE, cred)) != 0) 5010Sstevel@tonic-gate goto out4; 5020Sstevel@tonic-gate 5032768Ssl108498 mutex_enter(&sp->shm_mlock); 5042768Ssl108498 5052768Ssl108498 /* May be partially locked, so, count bytes to charge for locking */ 5062768Ssl108498 for (i = 0; i < npages; i++) 5072768Ssl108498 if (ppa[i]->p_lckcnt == 0) 5082768Ssl108498 lockedbytes += PAGESIZE; 5092768Ssl108498 5102768Ssl108498 proj = sp->shm_perm.ipc_proj; 5112768Ssl108498 5122768Ssl108498 if (lockedbytes > 0) { 5132768Ssl108498 mutex_enter(&procp->p_lock); 5142768Ssl108498 if (rctl_incr_locked_mem(procp, proj, lockedbytes, 0)) { 5152768Ssl108498 mutex_exit(&procp->p_lock); 5162768Ssl108498 mutex_exit(&sp->shm_mlock); 5172768Ssl108498 for (i = 0; i < npages; i++) 5182768Ssl108498 page_unlock(ppa[i]); 5192768Ssl108498 err = ENOMEM; 5202768Ssl108498 goto out4; 5212768Ssl108498 } 5222768Ssl108498 mutex_exit(&procp->p_lock); 5232768Ssl108498 } 5242768Ssl108498 5250Sstevel@tonic-gate /* 5260Sstevel@tonic-gate * addr is initial address corresponding to the first page on ppa list 5270Sstevel@tonic-gate */ 5280Sstevel@tonic-gate for (i = 0; i < npages; i++) { 5290Sstevel@tonic-gate /* attempt to lock all pages */ 5302768Ssl108498 if (page_pp_lock(ppa[i], 0, 1) == 0) { 5310Sstevel@tonic-gate /* 5320Sstevel@tonic-gate * if unable to lock any page, unlock all 5330Sstevel@tonic-gate * of them and return error 5340Sstevel@tonic-gate */ 5350Sstevel@tonic-gate for (j = 0; j < i; j++) 5360Sstevel@tonic-gate page_pp_unlock(ppa[j], 0, 1); 5372768Ssl108498 for (i = 0; i < npages; i++) 5380Sstevel@tonic-gate page_unlock(ppa[i]); 5392768Ssl108498 rctl_decr_locked_mem(NULL, proj, lockedbytes, 0); 5402768Ssl108498 mutex_exit(&sp->shm_mlock); 5410Sstevel@tonic-gate err = ENOMEM; 5420Sstevel@tonic-gate goto out4; 5430Sstevel@tonic-gate } 5440Sstevel@tonic-gate } 5452768Ssl108498 mutex_exit(&sp->shm_mlock); 5460Sstevel@tonic-gate 5470Sstevel@tonic-gate /* 5480Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 5490Sstevel@tonic-gate * for the entire life of the segment. For example platforms 5500Sstevel@tonic-gate * that do not support Dynamic Reconfiguration. 5510Sstevel@tonic-gate */ 5520Sstevel@tonic-gate hat_flags = HAT_LOAD_SHARE; 5530Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL)) 5540Sstevel@tonic-gate hat_flags |= HAT_LOAD_LOCK; 5550Sstevel@tonic-gate 5562414Saguzovsk /* 5572414Saguzovsk * Load translations one lare page at a time 5582414Saguzovsk * to make sure we don't create mappings bigger than 5592414Saguzovsk * segment's size code in case underlying pages 5602414Saguzovsk * are shared with segvn's segment that uses bigger 5612414Saguzovsk * size code than we do. 5622414Saguzovsk */ 5632414Saguzovsk pgsz = page_get_pagesize(seg->s_szc); 5642414Saguzovsk pgcnt = page_get_pagecnt(seg->s_szc); 5652414Saguzovsk for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) { 5662414Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 5672414Saguzovsk hat_memload_array(seg->s_as->a_hat, a, sz, 5682414Saguzovsk &ppa[pidx], sptd->spt_prot, hat_flags); 5692414Saguzovsk } 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate /* 5720Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 5730Sstevel@tonic-gate * we will leave the pages locked SE_SHARED for the life 5740Sstevel@tonic-gate * of the ISM segment. This will prevent any calls to 5750Sstevel@tonic-gate * hat_pageunload() on this ISM segment for those platforms. 5760Sstevel@tonic-gate */ 5770Sstevel@tonic-gate if (!(hat_flags & HAT_LOAD_LOCK)) { 5780Sstevel@tonic-gate /* 5790Sstevel@tonic-gate * On platforms that support HAT_DYNAMIC_ISM_UNMAP, 5800Sstevel@tonic-gate * we no longer need to hold the SE_SHARED lock on the pages, 5810Sstevel@tonic-gate * since L_PAGELOCK and F_SOFTLOCK calls will grab the 5820Sstevel@tonic-gate * SE_SHARED lock on the pages as necessary. 5830Sstevel@tonic-gate */ 5840Sstevel@tonic-gate for (i = 0; i < npages; i++) 5850Sstevel@tonic-gate page_unlock(ppa[i]); 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 5880Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * npages)); 5890Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 5900Sstevel@tonic-gate atomic_add_long(&spt_used, npages); 5910Sstevel@tonic-gate sptcargs->seg_spt = seg; 5920Sstevel@tonic-gate return (0); 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate out4: 5950Sstevel@tonic-gate seg->s_data = NULL; 5960Sstevel@tonic-gate kmem_free(vp, sizeof (*vp)); 5975224Smec cv_destroy(&sptd->spt_cv); 5980Sstevel@tonic-gate out3: 5990Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 6000Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 6010Sstevel@tonic-gate kmem_free(ppa, (sizeof (*ppa) * npages)); 6020Sstevel@tonic-gate out2: 6030Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 6040Sstevel@tonic-gate out1: 6050Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 6060Sstevel@tonic-gate anon_swap_restore(npages); 6070Sstevel@tonic-gate return (err); 6080Sstevel@tonic-gate } 6090Sstevel@tonic-gate 6100Sstevel@tonic-gate /*ARGSUSED*/ 6110Sstevel@tonic-gate void 6120Sstevel@tonic-gate segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) 6130Sstevel@tonic-gate { 6140Sstevel@tonic-gate struct page *pp; 6150Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 6160Sstevel@tonic-gate pgcnt_t npages; 6170Sstevel@tonic-gate ulong_t anon_idx; 6180Sstevel@tonic-gate struct anon_map *amp; 6190Sstevel@tonic-gate struct anon *ap; 6200Sstevel@tonic-gate struct vnode *vp; 6210Sstevel@tonic-gate u_offset_t off; 6220Sstevel@tonic-gate uint_t hat_flags; 6230Sstevel@tonic-gate int root = 0; 6240Sstevel@tonic-gate pgcnt_t pgs, curnpgs = 0; 6250Sstevel@tonic-gate page_t *rootpp; 6262768Ssl108498 rctl_qty_t unlocked_bytes = 0; 6272768Ssl108498 kproject_t *proj; 6282768Ssl108498 kshmid_t *sp; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate len = P2ROUNDUP(len, PAGESIZE); 6330Sstevel@tonic-gate 6340Sstevel@tonic-gate npages = btop(len); 6350Sstevel@tonic-gate 6364528Spaulsan hat_flags = HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP; 6370Sstevel@tonic-gate if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) || 6380Sstevel@tonic-gate (sptd->spt_flags & SHM_PAGEABLE)) { 6394528Spaulsan hat_flags = HAT_UNLOAD_UNMAP; 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, hat_flags); 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate amp = sptd->spt_amp; 6450Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) 6460Sstevel@tonic-gate npages = btop(amp->size); 6470Sstevel@tonic-gate 6482768Ssl108498 ASSERT(amp != NULL); 6492768Ssl108498 6502768Ssl108498 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 6512768Ssl108498 sp = amp->a_sp; 6522768Ssl108498 proj = sp->shm_perm.ipc_proj; 6532768Ssl108498 mutex_enter(&sp->shm_mlock); 6542768Ssl108498 } 6550Sstevel@tonic-gate for (anon_idx = 0; anon_idx < npages; anon_idx++) { 6560Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 6570Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { 6580Sstevel@tonic-gate panic("segspt_free_pages: null app"); 6590Sstevel@tonic-gate /*NOTREACHED*/ 6600Sstevel@tonic-gate } 6610Sstevel@tonic-gate } else { 6620Sstevel@tonic-gate if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx)) 6630Sstevel@tonic-gate == NULL) 6640Sstevel@tonic-gate continue; 6650Sstevel@tonic-gate } 6660Sstevel@tonic-gate ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0); 6670Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 6680Sstevel@tonic-gate 6690Sstevel@tonic-gate /* 6700Sstevel@tonic-gate * If this platform supports HAT_DYNAMIC_ISM_UNMAP, 6710Sstevel@tonic-gate * the pages won't be having SE_SHARED lock at this 6720Sstevel@tonic-gate * point. 6730Sstevel@tonic-gate * 6740Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 6750Sstevel@tonic-gate * the pages are still held SE_SHARED locked from the 6760Sstevel@tonic-gate * original segspt_create() 6770Sstevel@tonic-gate * 6780Sstevel@tonic-gate * Our goal is to get SE_EXCL lock on each page, remove 6790Sstevel@tonic-gate * permanent lock on it and invalidate the page. 6800Sstevel@tonic-gate */ 6810Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 6824528Spaulsan if (hat_flags == HAT_UNLOAD_UNMAP) 6830Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 6840Sstevel@tonic-gate else { 6850Sstevel@tonic-gate if ((pp = page_find(vp, off)) == NULL) { 6860Sstevel@tonic-gate panic("segspt_free_pages: " 6870Sstevel@tonic-gate "page not locked"); 6880Sstevel@tonic-gate /*NOTREACHED*/ 6890Sstevel@tonic-gate } 6900Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 6910Sstevel@tonic-gate page_unlock(pp); 6920Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate if (pp == NULL) { 6960Sstevel@tonic-gate panic("segspt_free_pages: " 6970Sstevel@tonic-gate "page not in the system"); 6980Sstevel@tonic-gate /*NOTREACHED*/ 6990Sstevel@tonic-gate } 7002768Ssl108498 ASSERT(pp->p_lckcnt > 0); 7010Sstevel@tonic-gate page_pp_unlock(pp, 0, 1); 7022768Ssl108498 if (pp->p_lckcnt == 0) 7035224Smec unlocked_bytes += PAGESIZE; 7040Sstevel@tonic-gate } else { 7050Sstevel@tonic-gate if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) 7060Sstevel@tonic-gate continue; 7070Sstevel@tonic-gate } 7080Sstevel@tonic-gate /* 7090Sstevel@tonic-gate * It's logical to invalidate the pages here as in most cases 7100Sstevel@tonic-gate * these were created by segspt. 7110Sstevel@tonic-gate */ 7120Sstevel@tonic-gate if (pp->p_szc != 0) { 7130Sstevel@tonic-gate if (root == 0) { 7140Sstevel@tonic-gate ASSERT(curnpgs == 0); 7150Sstevel@tonic-gate root = 1; 7160Sstevel@tonic-gate rootpp = pp; 7170Sstevel@tonic-gate pgs = curnpgs = page_get_pagecnt(pp->p_szc); 7180Sstevel@tonic-gate ASSERT(pgs > 1); 7190Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgs, pgs)); 7200Sstevel@tonic-gate ASSERT(!(page_pptonum(pp) & (pgs - 1))); 7210Sstevel@tonic-gate curnpgs--; 7220Sstevel@tonic-gate } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) { 7230Sstevel@tonic-gate ASSERT(curnpgs == 1); 7240Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 7250Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - 1)); 7260Sstevel@tonic-gate page_destroy_pages(rootpp); 7270Sstevel@tonic-gate root = 0; 7280Sstevel@tonic-gate curnpgs = 0; 7290Sstevel@tonic-gate } else { 7300Sstevel@tonic-gate ASSERT(curnpgs > 1); 7310Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 7320Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - curnpgs)); 7330Sstevel@tonic-gate curnpgs--; 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate } else { 7360Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 7370Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 7380Sstevel@tonic-gate /*NOTREACHED*/ 7390Sstevel@tonic-gate } 7409975SGangadhar.M@Sun.COM /* 7419975SGangadhar.M@Sun.COM * Before destroying the pages, we need to take care 7429975SGangadhar.M@Sun.COM * of the rctl locked memory accounting. For that 7439975SGangadhar.M@Sun.COM * we need to calculte the unlocked_bytes. 7449975SGangadhar.M@Sun.COM */ 7459975SGangadhar.M@Sun.COM if (pp->p_lckcnt > 0) 7469975SGangadhar.M@Sun.COM unlocked_bytes += PAGESIZE; 7470Sstevel@tonic-gate /*LINTED: constant in conditional context */ 7480Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 7490Sstevel@tonic-gate } 7500Sstevel@tonic-gate } 7512768Ssl108498 if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 7522768Ssl108498 if (unlocked_bytes > 0) 7532768Ssl108498 rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0); 7542768Ssl108498 mutex_exit(&sp->shm_mlock); 7552768Ssl108498 } 7560Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 7570Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 7580Sstevel@tonic-gate /*NOTREACHED*/ 7590Sstevel@tonic-gate } 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate /* 7620Sstevel@tonic-gate * mark that pages have been released 7630Sstevel@tonic-gate */ 7640Sstevel@tonic-gate sptd->spt_realsize = 0; 7650Sstevel@tonic-gate 7660Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 7670Sstevel@tonic-gate atomic_add_long(&spt_used, -npages); 7680Sstevel@tonic-gate anon_swap_restore(npages); 7690Sstevel@tonic-gate } 7700Sstevel@tonic-gate } 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate /* 7730Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 7740Sstevel@tonic-gate */ 7750Sstevel@tonic-gate static lgrp_mem_policy_info_t * 7760Sstevel@tonic-gate segspt_getpolicy(struct seg *seg, caddr_t addr) 7770Sstevel@tonic-gate { 7780Sstevel@tonic-gate struct anon_map *amp; 7790Sstevel@tonic-gate ulong_t anon_index; 7800Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 7810Sstevel@tonic-gate struct spt_data *spt_data; 7820Sstevel@tonic-gate 7830Sstevel@tonic-gate ASSERT(seg != NULL); 7840Sstevel@tonic-gate 7850Sstevel@tonic-gate /* 7860Sstevel@tonic-gate * Get anon_map from segspt 7870Sstevel@tonic-gate * 7880Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 7890Sstevel@tonic-gate * it should be protected by its reference count which must be 7900Sstevel@tonic-gate * nonzero for an existing segment 7910Sstevel@tonic-gate * Need to grab readers lock on policy tree though 7920Sstevel@tonic-gate */ 7930Sstevel@tonic-gate spt_data = (struct spt_data *)seg->s_data; 7940Sstevel@tonic-gate if (spt_data == NULL) 7950Sstevel@tonic-gate return (NULL); 7960Sstevel@tonic-gate amp = spt_data->spt_amp; 7970Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate /* 8000Sstevel@tonic-gate * Get policy info 8010Sstevel@tonic-gate * 8020Sstevel@tonic-gate * Assume starting anon index of 0 8030Sstevel@tonic-gate */ 8040Sstevel@tonic-gate anon_index = seg_page(seg, addr); 8050Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate return (policy_info); 8080Sstevel@tonic-gate } 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate /* 8110Sstevel@tonic-gate * DISM only. 8120Sstevel@tonic-gate * Return locked pages over a given range. 8130Sstevel@tonic-gate * 8140Sstevel@tonic-gate * We will cache all DISM locked pages and save the pplist for the 8150Sstevel@tonic-gate * entire segment in the ppa field of the underlying DISM segment structure. 8160Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 8170Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 8180Sstevel@tonic-gate */ 8190Sstevel@tonic-gate /*ARGSUSED*/ 8200Sstevel@tonic-gate static int 8210Sstevel@tonic-gate segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len, 8220Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 8230Sstevel@tonic-gate { 8240Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 8250Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 8260Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 8270Sstevel@tonic-gate pgcnt_t pg_idx, npages, tot_npages, npgs; 8280Sstevel@tonic-gate struct page **pplist, **pl, **ppa, *pp; 8290Sstevel@tonic-gate struct anon_map *amp; 8300Sstevel@tonic-gate spgcnt_t an_idx; 8310Sstevel@tonic-gate int ret = ENOTSUP; 8320Sstevel@tonic-gate uint_t pl_built = 0; 8330Sstevel@tonic-gate struct anon *ap; 8340Sstevel@tonic-gate struct vnode *vp; 8350Sstevel@tonic-gate u_offset_t off; 8360Sstevel@tonic-gate pgcnt_t claim_availrmem = 0; 8370Sstevel@tonic-gate uint_t szc; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 8406695Saguzovsk ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 8440Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 8450Sstevel@tonic-gate * and length for the caching arguments. 8460Sstevel@tonic-gate */ 8470Sstevel@tonic-gate ASSERT(sptseg); 8480Sstevel@tonic-gate ASSERT(sptd); 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 8510Sstevel@tonic-gate npages = btopr(len); 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate /* 8540Sstevel@tonic-gate * check if the request is larger than number of pages covered 8550Sstevel@tonic-gate * by amp 8560Sstevel@tonic-gate */ 8570Sstevel@tonic-gate if (pg_idx + npages > btopr(sptd->spt_amp->size)) { 8580Sstevel@tonic-gate *ppp = NULL; 8590Sstevel@tonic-gate return (ENOTSUP); 8600Sstevel@tonic-gate } 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 8630Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 8640Sstevel@tonic-gate 8656695Saguzovsk seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 8666695Saguzovsk sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate /* 8690Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 8700Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 8710Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 8720Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 8730Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 8740Sstevel@tonic-gate * exits on data fault in a multithreaded application. 8750Sstevel@tonic-gate */ 8766695Saguzovsk if ((sptd->spt_flags & DISM_PPA_CHANGED) || 8776695Saguzovsk (AS_ISUNMAPWAIT(seg->s_as) && 8786695Saguzovsk shmd->shm_softlockcnt > 0)) { 8790Sstevel@tonic-gate segspt_purge(seg); 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate return (0); 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate 8846695Saguzovsk /* The L_PAGELOCK case ... */ 8856695Saguzovsk 8860Sstevel@tonic-gate if (sptd->spt_flags & DISM_PPA_CHANGED) { 8870Sstevel@tonic-gate segspt_purge(seg); 8880Sstevel@tonic-gate /* 8890Sstevel@tonic-gate * for DISM ppa needs to be rebuild since 8900Sstevel@tonic-gate * number of locked pages could be changed 8910Sstevel@tonic-gate */ 8920Sstevel@tonic-gate *ppp = NULL; 8930Sstevel@tonic-gate return (ENOTSUP); 8940Sstevel@tonic-gate } 8950Sstevel@tonic-gate 8960Sstevel@tonic-gate /* 8970Sstevel@tonic-gate * First try to find pages in segment page cache, without 8980Sstevel@tonic-gate * holding the segment lock. 8990Sstevel@tonic-gate */ 9006695Saguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 9016695Saguzovsk S_WRITE, SEGP_FORCE_WIRED); 9020Sstevel@tonic-gate if (pplist != NULL) { 9030Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 9040Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 9050Sstevel@tonic-gate ppa = sptd->spt_ppa; 9060Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 9070Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 9086695Saguzovsk seg_pinactive(seg, NULL, seg->s_base, 9090Sstevel@tonic-gate sptd->spt_amp->size, ppa, 9106695Saguzovsk S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 9110Sstevel@tonic-gate *ppp = NULL; 9120Sstevel@tonic-gate return (ENOTSUP); 9130Sstevel@tonic-gate } 9140Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 9150Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 9160Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 9170Sstevel@tonic-gate } else { 9180Sstevel@tonic-gate an_idx++; 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate } 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 9230Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 9240Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 9250Sstevel@tonic-gate */ 9260Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 9270Sstevel@tonic-gate return (0); 9280Sstevel@tonic-gate } 9290Sstevel@tonic-gate 9300Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 9310Sstevel@tonic-gate /* 9320Sstevel@tonic-gate * try to find pages in segment page cache with mutex 9330Sstevel@tonic-gate */ 9346695Saguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 9356695Saguzovsk S_WRITE, SEGP_FORCE_WIRED); 9360Sstevel@tonic-gate if (pplist != NULL) { 9370Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 9380Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 9390Sstevel@tonic-gate ppa = sptd->spt_ppa; 9400Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 9410Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 9420Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 9436695Saguzovsk seg_pinactive(seg, NULL, seg->s_base, 9440Sstevel@tonic-gate sptd->spt_amp->size, ppa, 9456695Saguzovsk S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 9460Sstevel@tonic-gate *ppp = NULL; 9470Sstevel@tonic-gate return (ENOTSUP); 9480Sstevel@tonic-gate } 9490Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 9500Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 9510Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 9520Sstevel@tonic-gate } else { 9530Sstevel@tonic-gate an_idx++; 9540Sstevel@tonic-gate } 9550Sstevel@tonic-gate } 9560Sstevel@tonic-gate /* 9570Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 9580Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 9590Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 9600Sstevel@tonic-gate */ 9610Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 9620Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 9630Sstevel@tonic-gate return (0); 9640Sstevel@tonic-gate } 9656695Saguzovsk if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 9666695Saguzovsk SEGP_FORCE_WIRED) == SEGP_FAIL) { 9670Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 9680Sstevel@tonic-gate *ppp = NULL; 9690Sstevel@tonic-gate return (ENOTSUP); 9700Sstevel@tonic-gate } 9710Sstevel@tonic-gate 9720Sstevel@tonic-gate /* 9730Sstevel@tonic-gate * No need to worry about protections because DISM pages are always rw. 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate pl = pplist = NULL; 9760Sstevel@tonic-gate amp = sptd->spt_amp; 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate /* 9790Sstevel@tonic-gate * Do we need to build the ppa array? 9800Sstevel@tonic-gate */ 9810Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 9820Sstevel@tonic-gate pgcnt_t lpg_cnt = 0; 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate pl_built = 1; 9850Sstevel@tonic-gate tot_npages = btopr(sptd->spt_amp->size); 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 9880Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP); 9890Sstevel@tonic-gate pl = pplist; 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 9920Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; ) { 9930Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 9940Sstevel@tonic-gate /* 9950Sstevel@tonic-gate * Cache only mlocked pages. For large pages 9960Sstevel@tonic-gate * if one (constituent) page is mlocked 9970Sstevel@tonic-gate * all pages for that large page 9980Sstevel@tonic-gate * are cached also. This is for quick 9990Sstevel@tonic-gate * lookups of ppa array; 10000Sstevel@tonic-gate */ 10010Sstevel@tonic-gate if ((ap != NULL) && (lpg_cnt != 0 || 10020Sstevel@tonic-gate (sptd->spt_ppa_lckcnt[an_idx] != 0))) { 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 10050Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 10060Sstevel@tonic-gate ASSERT(pp != NULL); 10070Sstevel@tonic-gate if (lpg_cnt == 0) { 10081498Sbs21162 lpg_cnt++; 10091498Sbs21162 /* 10101498Sbs21162 * For a small page, we are done -- 10111498Sbs21162 * lpg_count is reset to 0 below. 10121498Sbs21162 * 10131498Sbs21162 * For a large page, we are guaranteed 10141498Sbs21162 * to find the anon structures of all 10151498Sbs21162 * constituent pages and a non-zero 10161498Sbs21162 * lpg_cnt ensures that we don't test 10171498Sbs21162 * for mlock for these. We are done 10181498Sbs21162 * when lpg_count reaches (npgs + 1). 10191498Sbs21162 * If we are not the first constituent 10201498Sbs21162 * page, restart at the first one. 10211498Sbs21162 */ 10220Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 10230Sstevel@tonic-gate if (!IS_P2ALIGNED(an_idx, npgs)) { 10240Sstevel@tonic-gate an_idx = P2ALIGN(an_idx, npgs); 10250Sstevel@tonic-gate page_unlock(pp); 10260Sstevel@tonic-gate continue; 10270Sstevel@tonic-gate } 10280Sstevel@tonic-gate } 10291498Sbs21162 if (++lpg_cnt > npgs) 10300Sstevel@tonic-gate lpg_cnt = 0; 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate /* 10330Sstevel@tonic-gate * availrmem is decremented only 10340Sstevel@tonic-gate * for unlocked pages 10350Sstevel@tonic-gate */ 10360Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[an_idx] == 0) 10370Sstevel@tonic-gate claim_availrmem++; 10380Sstevel@tonic-gate pplist[an_idx] = pp; 10390Sstevel@tonic-gate } 10400Sstevel@tonic-gate an_idx++; 10410Sstevel@tonic-gate } 10420Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 10430Sstevel@tonic-gate 10446695Saguzovsk if (claim_availrmem) { 10456695Saguzovsk mutex_enter(&freemem_lock); 10466695Saguzovsk if (availrmem < tune.t_minarmem + claim_availrmem) { 10476695Saguzovsk mutex_exit(&freemem_lock); 10486695Saguzovsk ret = ENOTSUP; 10496695Saguzovsk claim_availrmem = 0; 10506695Saguzovsk goto insert_fail; 10516695Saguzovsk } else { 10526695Saguzovsk availrmem -= claim_availrmem; 10536695Saguzovsk } 10540Sstevel@tonic-gate mutex_exit(&freemem_lock); 10550Sstevel@tonic-gate } 10560Sstevel@tonic-gate 10570Sstevel@tonic-gate sptd->spt_ppa = pl; 10580Sstevel@tonic-gate } else { 10590Sstevel@tonic-gate /* 10600Sstevel@tonic-gate * We already have a valid ppa[]. 10610Sstevel@tonic-gate */ 10620Sstevel@tonic-gate pl = sptd->spt_ppa; 10630Sstevel@tonic-gate } 10640Sstevel@tonic-gate 10650Sstevel@tonic-gate ASSERT(pl != NULL); 10660Sstevel@tonic-gate 10676695Saguzovsk ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 10686695Saguzovsk sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 10690Sstevel@tonic-gate segspt_reclaim); 10700Sstevel@tonic-gate if (ret == SEGP_FAIL) { 10710Sstevel@tonic-gate /* 10720Sstevel@tonic-gate * seg_pinsert failed. We return 10730Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 10740Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 10750Sstevel@tonic-gate */ 1076934Srd117015 if (pl_built) { 1077934Srd117015 /* 1078934Srd117015 * No one else has referenced the ppa[]. 1079934Srd117015 * We created it and we need to destroy it. 1080934Srd117015 */ 1081934Srd117015 sptd->spt_ppa = NULL; 1082934Srd117015 } 10830Sstevel@tonic-gate ret = ENOTSUP; 10840Sstevel@tonic-gate goto insert_fail; 10850Sstevel@tonic-gate } 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate /* 10880Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 10890Sstevel@tonic-gate */ 10900Sstevel@tonic-gate sptd->spt_pcachecnt++; 10910Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1); 10920Sstevel@tonic-gate 10930Sstevel@tonic-gate ppa = sptd->spt_ppa; 10940Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 10950Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 10960Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 10976695Saguzovsk seg_pinactive(seg, NULL, seg->s_base, 10986695Saguzovsk sptd->spt_amp->size, 10996695Saguzovsk pl, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 11000Sstevel@tonic-gate *ppp = NULL; 11010Sstevel@tonic-gate return (ENOTSUP); 11020Sstevel@tonic-gate } 11030Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 11040Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 11050Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 11060Sstevel@tonic-gate } else { 11070Sstevel@tonic-gate an_idx++; 11080Sstevel@tonic-gate } 11090Sstevel@tonic-gate } 11100Sstevel@tonic-gate /* 11110Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 11120Sstevel@tonic-gate * exists and he have incremented pacachecnt. 11130Sstevel@tonic-gate */ 11140Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 11150Sstevel@tonic-gate 11160Sstevel@tonic-gate /* 11170Sstevel@tonic-gate * Since we cache the entire segment, we want to 11180Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 11190Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 11200Sstevel@tonic-gate */ 11210Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 11226695Saguzovsk return (0); 11230Sstevel@tonic-gate 11240Sstevel@tonic-gate insert_fail: 11250Sstevel@tonic-gate /* 11260Sstevel@tonic-gate * We will only reach this code if we tried and failed. 11270Sstevel@tonic-gate * 11280Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 11290Sstevel@tonic-gate * to set up a new ppa[]. 11300Sstevel@tonic-gate */ 11310Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 11320Sstevel@tonic-gate 11330Sstevel@tonic-gate if (pl_built) { 11346695Saguzovsk if (claim_availrmem) { 11356695Saguzovsk mutex_enter(&freemem_lock); 11366695Saguzovsk availrmem += claim_availrmem; 11376695Saguzovsk mutex_exit(&freemem_lock); 11386695Saguzovsk } 11390Sstevel@tonic-gate 11400Sstevel@tonic-gate /* 11410Sstevel@tonic-gate * We created pl and we need to destroy it. 11420Sstevel@tonic-gate */ 11430Sstevel@tonic-gate pplist = pl; 11440Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; an_idx++) { 11450Sstevel@tonic-gate if (pplist[an_idx] != NULL) 11460Sstevel@tonic-gate page_unlock(pplist[an_idx]); 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * tot_npages); 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate 11510Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 11520Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 11530Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 11540Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 11550Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 11560Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 11570Sstevel@tonic-gate } 11580Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 11590Sstevel@tonic-gate } 11600Sstevel@tonic-gate } 11610Sstevel@tonic-gate *ppp = NULL; 11620Sstevel@tonic-gate return (ret); 11630Sstevel@tonic-gate } 11640Sstevel@tonic-gate 11650Sstevel@tonic-gate 11660Sstevel@tonic-gate 11670Sstevel@tonic-gate /* 11680Sstevel@tonic-gate * return locked pages over a given range. 11690Sstevel@tonic-gate * 11700Sstevel@tonic-gate * We will cache the entire ISM segment and save the pplist for the 11710Sstevel@tonic-gate * entire segment in the ppa field of the underlying ISM segment structure. 11720Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 11730Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 11740Sstevel@tonic-gate */ 11750Sstevel@tonic-gate /*ARGSUSED*/ 11760Sstevel@tonic-gate static int 11770Sstevel@tonic-gate segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len, 11780Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 11790Sstevel@tonic-gate { 11800Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 11810Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 11820Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 11830Sstevel@tonic-gate pgcnt_t np, page_index, npages; 11840Sstevel@tonic-gate caddr_t a, spt_base; 11850Sstevel@tonic-gate struct page **pplist, **pl, *pp; 11860Sstevel@tonic-gate struct anon_map *amp; 11870Sstevel@tonic-gate ulong_t anon_index; 11880Sstevel@tonic-gate int ret = ENOTSUP; 11890Sstevel@tonic-gate uint_t pl_built = 0; 11900Sstevel@tonic-gate struct anon *ap; 11910Sstevel@tonic-gate struct vnode *vp; 11920Sstevel@tonic-gate u_offset_t off; 11930Sstevel@tonic-gate 11940Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 11956695Saguzovsk ASSERT(type == L_PAGELOCK || type == L_PAGEUNLOCK); 11966695Saguzovsk 11970Sstevel@tonic-gate 11980Sstevel@tonic-gate /* 11990Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 12000Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 12010Sstevel@tonic-gate * and length for the caching arguments. 12020Sstevel@tonic-gate */ 12030Sstevel@tonic-gate ASSERT(sptseg); 12040Sstevel@tonic-gate ASSERT(sptd); 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 12070Sstevel@tonic-gate return (segspt_dismpagelock(seg, addr, len, ppp, type, rw)); 12080Sstevel@tonic-gate } 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate page_index = seg_page(seg, addr); 12110Sstevel@tonic-gate npages = btopr(len); 12120Sstevel@tonic-gate 12130Sstevel@tonic-gate /* 12140Sstevel@tonic-gate * check if the request is larger than number of pages covered 12150Sstevel@tonic-gate * by amp 12160Sstevel@tonic-gate */ 12170Sstevel@tonic-gate if (page_index + npages > btopr(sptd->spt_amp->size)) { 12180Sstevel@tonic-gate *ppp = NULL; 12190Sstevel@tonic-gate return (ENOTSUP); 12200Sstevel@tonic-gate } 12210Sstevel@tonic-gate 12220Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 12230Sstevel@tonic-gate 12240Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 12250Sstevel@tonic-gate 12266695Saguzovsk seg_pinactive(seg, NULL, seg->s_base, sptd->spt_amp->size, 12276695Saguzovsk sptd->spt_ppa, S_WRITE, SEGP_FORCE_WIRED, segspt_reclaim); 12280Sstevel@tonic-gate 12290Sstevel@tonic-gate /* 12300Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 12310Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 12320Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 12330Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 12340Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 12350Sstevel@tonic-gate * exits on data fault in a multithreaded application. 12360Sstevel@tonic-gate */ 12370Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) { 12380Sstevel@tonic-gate segspt_purge(seg); 12390Sstevel@tonic-gate } 12400Sstevel@tonic-gate return (0); 12416695Saguzovsk } 12420Sstevel@tonic-gate 12436695Saguzovsk /* The L_PAGELOCK case... */ 12440Sstevel@tonic-gate 12450Sstevel@tonic-gate /* 12460Sstevel@tonic-gate * First try to find pages in segment page cache, without 12470Sstevel@tonic-gate * holding the segment lock. 12480Sstevel@tonic-gate */ 12496695Saguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 12506695Saguzovsk S_WRITE, SEGP_FORCE_WIRED); 12510Sstevel@tonic-gate if (pplist != NULL) { 12520Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 12530Sstevel@tonic-gate ASSERT(sptd->spt_ppa[page_index]); 12540Sstevel@tonic-gate /* 12550Sstevel@tonic-gate * Since we cache the entire ISM segment, we want to 12560Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 12570Sstevel@tonic-gate * to the requested addr, i.e. page_index. 12580Sstevel@tonic-gate */ 12590Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 12600Sstevel@tonic-gate return (0); 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 12640Sstevel@tonic-gate 12650Sstevel@tonic-gate /* 12660Sstevel@tonic-gate * try to find pages in segment page cache 12670Sstevel@tonic-gate */ 12686695Saguzovsk pplist = seg_plookup(seg, NULL, seg->s_base, sptd->spt_amp->size, 12696695Saguzovsk S_WRITE, SEGP_FORCE_WIRED); 12700Sstevel@tonic-gate if (pplist != NULL) { 12710Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 12720Sstevel@tonic-gate /* 12730Sstevel@tonic-gate * Since we cache the entire segment, we want to 12740Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 12750Sstevel@tonic-gate * to the requested addr, i.e. page_index. 12760Sstevel@tonic-gate */ 12770Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 12780Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 12790Sstevel@tonic-gate return (0); 12800Sstevel@tonic-gate } 12810Sstevel@tonic-gate 12826695Saguzovsk if (seg_pinsert_check(seg, NULL, seg->s_base, sptd->spt_amp->size, 12836695Saguzovsk SEGP_FORCE_WIRED) == SEGP_FAIL) { 12840Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 12850Sstevel@tonic-gate *ppp = NULL; 12860Sstevel@tonic-gate return (ENOTSUP); 12870Sstevel@tonic-gate } 12880Sstevel@tonic-gate 12890Sstevel@tonic-gate /* 12900Sstevel@tonic-gate * No need to worry about protections because ISM pages 12910Sstevel@tonic-gate * are always rw. 12920Sstevel@tonic-gate */ 12930Sstevel@tonic-gate pl = pplist = NULL; 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate /* 12960Sstevel@tonic-gate * Do we need to build the ppa array? 12970Sstevel@tonic-gate */ 12980Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 12990Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 13000Sstevel@tonic-gate 13010Sstevel@tonic-gate spt_base = sptseg->s_base; 13020Sstevel@tonic-gate pl_built = 1; 13030Sstevel@tonic-gate 13040Sstevel@tonic-gate /* 13050Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 13060Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 13070Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 13080Sstevel@tonic-gate */ 13090Sstevel@tonic-gate amp = sptd->spt_amp; 13100Sstevel@tonic-gate ASSERT(amp != NULL); 13110Sstevel@tonic-gate 13120Sstevel@tonic-gate /* pcachecnt is protected by sptd->spt_lock */ 13130Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 13140Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) 13150Sstevel@tonic-gate * btopr(sptd->spt_amp->size), KM_SLEEP); 13160Sstevel@tonic-gate pl = pplist; 13170Sstevel@tonic-gate 13180Sstevel@tonic-gate anon_index = seg_page(sptseg, spt_base); 13190Sstevel@tonic-gate 13200Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 13210Sstevel@tonic-gate for (a = spt_base; a < (spt_base + sptd->spt_amp->size); 13220Sstevel@tonic-gate a += PAGESIZE, anon_index++, pplist++) { 13230Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 13240Sstevel@tonic-gate ASSERT(ap != NULL); 13250Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 13260Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 13270Sstevel@tonic-gate ASSERT(pp != NULL); 13280Sstevel@tonic-gate *pplist = pp; 13290Sstevel@tonic-gate } 13300Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 13310Sstevel@tonic-gate 13320Sstevel@tonic-gate if (a < (spt_base + sptd->spt_amp->size)) { 13330Sstevel@tonic-gate ret = ENOTSUP; 13340Sstevel@tonic-gate goto insert_fail; 13350Sstevel@tonic-gate } 13360Sstevel@tonic-gate sptd->spt_ppa = pl; 13370Sstevel@tonic-gate } else { 13380Sstevel@tonic-gate /* 13390Sstevel@tonic-gate * We already have a valid ppa[]. 13400Sstevel@tonic-gate */ 13410Sstevel@tonic-gate pl = sptd->spt_ppa; 13420Sstevel@tonic-gate } 13430Sstevel@tonic-gate 13440Sstevel@tonic-gate ASSERT(pl != NULL); 13450Sstevel@tonic-gate 13466695Saguzovsk ret = seg_pinsert(seg, NULL, seg->s_base, sptd->spt_amp->size, 13476695Saguzovsk sptd->spt_amp->size, pl, S_WRITE, SEGP_FORCE_WIRED, 13486695Saguzovsk segspt_reclaim); 13490Sstevel@tonic-gate if (ret == SEGP_FAIL) { 13500Sstevel@tonic-gate /* 13510Sstevel@tonic-gate * seg_pinsert failed. We return 13520Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 13530Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 13540Sstevel@tonic-gate */ 13550Sstevel@tonic-gate if (pl_built) { 13560Sstevel@tonic-gate /* 13570Sstevel@tonic-gate * No one else has referenced the ppa[]. 13580Sstevel@tonic-gate * We created it and we need to destroy it. 13590Sstevel@tonic-gate */ 13600Sstevel@tonic-gate sptd->spt_ppa = NULL; 13610Sstevel@tonic-gate } 13620Sstevel@tonic-gate ret = ENOTSUP; 13630Sstevel@tonic-gate goto insert_fail; 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 13680Sstevel@tonic-gate */ 13690Sstevel@tonic-gate sptd->spt_pcachecnt++; 13700Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1); 13710Sstevel@tonic-gate 13720Sstevel@tonic-gate /* 13730Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 13740Sstevel@tonic-gate * exists and he have incremented pacachecnt. 13750Sstevel@tonic-gate */ 13760Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 13770Sstevel@tonic-gate 13780Sstevel@tonic-gate /* 13790Sstevel@tonic-gate * Since we cache the entire segment, we want to 13800Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 13810Sstevel@tonic-gate * to the requested addr, i.e. page_index. 13820Sstevel@tonic-gate */ 13830Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 13846695Saguzovsk return (0); 13850Sstevel@tonic-gate 13860Sstevel@tonic-gate insert_fail: 13870Sstevel@tonic-gate /* 13880Sstevel@tonic-gate * We will only reach this code if we tried and failed. 13890Sstevel@tonic-gate * 13900Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 13910Sstevel@tonic-gate * to set up a new ppa[]. 13920Sstevel@tonic-gate */ 13930Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate if (pl_built) { 13960Sstevel@tonic-gate /* 13970Sstevel@tonic-gate * We created pl and we need to destroy it. 13980Sstevel@tonic-gate */ 13990Sstevel@tonic-gate pplist = pl; 14000Sstevel@tonic-gate np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT); 14010Sstevel@tonic-gate while (np) { 14020Sstevel@tonic-gate page_unlock(*pplist); 14030Sstevel@tonic-gate np--; 14040Sstevel@tonic-gate pplist++; 14050Sstevel@tonic-gate } 14065224Smec kmem_free(pl, sizeof (page_t *) * btopr(sptd->spt_amp->size)); 14070Sstevel@tonic-gate } 14080Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 14090Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 14100Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 14110Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 14120Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 14130Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 14140Sstevel@tonic-gate } 14150Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 14160Sstevel@tonic-gate } 14170Sstevel@tonic-gate } 14180Sstevel@tonic-gate *ppp = NULL; 14190Sstevel@tonic-gate return (ret); 14200Sstevel@tonic-gate } 14210Sstevel@tonic-gate 14220Sstevel@tonic-gate /* 14230Sstevel@tonic-gate * purge any cached pages in the I/O page cache 14240Sstevel@tonic-gate */ 14250Sstevel@tonic-gate static void 14260Sstevel@tonic-gate segspt_purge(struct seg *seg) 14270Sstevel@tonic-gate { 14286695Saguzovsk seg_ppurge(seg, NULL, SEGP_FORCE_WIRED); 14290Sstevel@tonic-gate } 14300Sstevel@tonic-gate 14310Sstevel@tonic-gate static int 14326695Saguzovsk segspt_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, 14336695Saguzovsk enum seg_rw rw, int async) 14340Sstevel@tonic-gate { 14356695Saguzovsk struct seg *seg = (struct seg *)ptag; 14360Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 14370Sstevel@tonic-gate struct seg *sptseg; 14380Sstevel@tonic-gate struct spt_data *sptd; 14390Sstevel@tonic-gate pgcnt_t npages, i, free_availrmem = 0; 14400Sstevel@tonic-gate int done = 0; 14410Sstevel@tonic-gate 14420Sstevel@tonic-gate #ifdef lint 14430Sstevel@tonic-gate addr = addr; 14440Sstevel@tonic-gate #endif 14450Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 14460Sstevel@tonic-gate sptd = sptseg->s_data; 14470Sstevel@tonic-gate npages = (len >> PAGESHIFT); 14480Sstevel@tonic-gate ASSERT(npages); 14490Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt != 0); 14500Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 14510Sstevel@tonic-gate ASSERT(npages == btopr(sptd->spt_amp->size)); 14526695Saguzovsk ASSERT(async || AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 14536695Saguzovsk 14540Sstevel@tonic-gate /* 14550Sstevel@tonic-gate * Acquire the lock on the dummy seg and destroy the 14560Sstevel@tonic-gate * ppa array IF this is the last pcachecnt. 14570Sstevel@tonic-gate */ 14580Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 14590Sstevel@tonic-gate if (--sptd->spt_pcachecnt == 0) { 14600Sstevel@tonic-gate for (i = 0; i < npages; i++) { 14610Sstevel@tonic-gate if (pplist[i] == NULL) { 14620Sstevel@tonic-gate continue; 14630Sstevel@tonic-gate } 14640Sstevel@tonic-gate if (rw == S_WRITE) { 14650Sstevel@tonic-gate hat_setrefmod(pplist[i]); 14660Sstevel@tonic-gate } else { 14670Sstevel@tonic-gate hat_setref(pplist[i]); 14680Sstevel@tonic-gate } 14690Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) && 14702768Ssl108498 (sptd->spt_ppa_lckcnt[i] == 0)) 14710Sstevel@tonic-gate free_availrmem++; 14720Sstevel@tonic-gate page_unlock(pplist[i]); 14730Sstevel@tonic-gate } 14746695Saguzovsk if ((sptd->spt_flags & SHM_PAGEABLE) && free_availrmem) { 14750Sstevel@tonic-gate mutex_enter(&freemem_lock); 14760Sstevel@tonic-gate availrmem += free_availrmem; 14770Sstevel@tonic-gate mutex_exit(&freemem_lock); 14780Sstevel@tonic-gate } 14790Sstevel@tonic-gate /* 14800Sstevel@tonic-gate * Since we want to cach/uncache the entire ISM segment, 14810Sstevel@tonic-gate * we will track the pplist in a segspt specific field 14820Sstevel@tonic-gate * ppa, that is initialized at the time we add an entry to 14830Sstevel@tonic-gate * the cache. 14840Sstevel@tonic-gate */ 14850Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 14860Sstevel@tonic-gate kmem_free(pplist, sizeof (page_t *) * npages); 14870Sstevel@tonic-gate sptd->spt_ppa = NULL; 14880Sstevel@tonic-gate sptd->spt_flags &= ~DISM_PPA_CHANGED; 14895224Smec sptd->spt_gen++; 14905224Smec cv_broadcast(&sptd->spt_cv); 14910Sstevel@tonic-gate done = 1; 14920Sstevel@tonic-gate } 14930Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 14946695Saguzovsk 14956695Saguzovsk /* 14966695Saguzovsk * If we are pcache async thread or called via seg_ppurge_wiredpp() we 14976695Saguzovsk * may not hold AS lock (in this case async argument is not 0). This 14986695Saguzovsk * means if softlockcnt drops to 0 after the decrement below address 14996695Saguzovsk * space may get freed. We can't allow it since after softlock 15006695Saguzovsk * derement to 0 we still need to access as structure for possible 15016695Saguzovsk * wakeup of unmap waiters. To prevent the disappearance of as we take 15026695Saguzovsk * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes 15036695Saguzovsk * this mutex as a barrier to make sure this routine completes before 15046695Saguzovsk * segment is freed. 15056695Saguzovsk * 15066695Saguzovsk * The second complication we have to deal with in async case is a 15076695Saguzovsk * possibility of missed wake up of unmap wait thread. When we don't 15086695Saguzovsk * hold as lock here we may take a_contents lock before unmap wait 15096695Saguzovsk * thread that was first to see softlockcnt was still not 0. As a 15106695Saguzovsk * result we'll fail to wake up an unmap wait thread. To avoid this 15116695Saguzovsk * race we set nounmapwait flag in as structure if we drop softlockcnt 15126695Saguzovsk * to 0 if async is not 0. unmapwait thread 15136695Saguzovsk * will not block if this flag is set. 15146695Saguzovsk */ 15156695Saguzovsk if (async) 15166695Saguzovsk mutex_enter(&shmd->shm_segfree_syncmtx); 15176695Saguzovsk 15180Sstevel@tonic-gate /* 15190Sstevel@tonic-gate * Now decrement softlockcnt. 15200Sstevel@tonic-gate */ 15216695Saguzovsk ASSERT(shmd->shm_softlockcnt > 0); 15220Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -1); 15230Sstevel@tonic-gate 15240Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 15256695Saguzovsk if (async || AS_ISUNMAPWAIT(seg->s_as)) { 15260Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 15276695Saguzovsk if (async) 15286695Saguzovsk AS_SETNOUNMAPWAIT(seg->s_as); 15290Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 15300Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 15310Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 15320Sstevel@tonic-gate } 15330Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 15340Sstevel@tonic-gate } 15350Sstevel@tonic-gate } 15366695Saguzovsk 15376695Saguzovsk if (async) 15386695Saguzovsk mutex_exit(&shmd->shm_segfree_syncmtx); 15396695Saguzovsk 15400Sstevel@tonic-gate return (done); 15410Sstevel@tonic-gate } 15420Sstevel@tonic-gate 15430Sstevel@tonic-gate /* 15440Sstevel@tonic-gate * Do a F_SOFTUNLOCK call over the range requested. 15450Sstevel@tonic-gate * The range must have already been F_SOFTLOCK'ed. 15460Sstevel@tonic-gate * 15470Sstevel@tonic-gate * The calls to acquire and release the anon map lock mutex were 15480Sstevel@tonic-gate * removed in order to avoid a deadly embrace during a DR 15490Sstevel@tonic-gate * memory delete operation. (Eg. DR blocks while waiting for a 15500Sstevel@tonic-gate * exclusive lock on a page that is being used for kaio; the 15510Sstevel@tonic-gate * thread that will complete the kaio and call segspt_softunlock 15520Sstevel@tonic-gate * blocks on the anon map lock; another thread holding the anon 15530Sstevel@tonic-gate * map lock blocks on another page lock via the segspt_shmfault 15540Sstevel@tonic-gate * -> page_lookup -> page_lookup_create -> page_lock_es code flow.) 15550Sstevel@tonic-gate * 15560Sstevel@tonic-gate * The appropriateness of the removal is based upon the following: 15570Sstevel@tonic-gate * 1. If we are holding a segment's reader lock and the page is held 15580Sstevel@tonic-gate * shared, then the corresponding element in anonmap which points to 15590Sstevel@tonic-gate * anon struct cannot change and there is no need to acquire the 15600Sstevel@tonic-gate * anonymous map lock. 15610Sstevel@tonic-gate * 2. Threads in segspt_softunlock have a reader lock on the segment 15620Sstevel@tonic-gate * and already have the shared page lock, so we are guaranteed that 15630Sstevel@tonic-gate * the anon map slot cannot change and therefore can call anon_get_ptr() 15640Sstevel@tonic-gate * without grabbing the anonymous map lock. 15650Sstevel@tonic-gate * 3. Threads that softlock a shared page break copy-on-write, even if 15660Sstevel@tonic-gate * its a read. Thus cow faults can be ignored with respect to soft 15670Sstevel@tonic-gate * unlocking, since the breaking of cow means that the anon slot(s) will 15680Sstevel@tonic-gate * not be shared. 15690Sstevel@tonic-gate */ 15700Sstevel@tonic-gate static void 15710Sstevel@tonic-gate segspt_softunlock(struct seg *seg, caddr_t sptseg_addr, 15720Sstevel@tonic-gate size_t len, enum seg_rw rw) 15730Sstevel@tonic-gate { 15740Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 15750Sstevel@tonic-gate struct seg *sptseg; 15760Sstevel@tonic-gate struct spt_data *sptd; 15770Sstevel@tonic-gate page_t *pp; 15780Sstevel@tonic-gate caddr_t adr; 15790Sstevel@tonic-gate struct vnode *vp; 15800Sstevel@tonic-gate u_offset_t offset; 15810Sstevel@tonic-gate ulong_t anon_index; 15820Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 15830Sstevel@tonic-gate struct anon *ap = NULL; 15840Sstevel@tonic-gate pgcnt_t npages; 15850Sstevel@tonic-gate 15860Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 15890Sstevel@tonic-gate sptd = sptseg->s_data; 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate /* 15920Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 15930Sstevel@tonic-gate * and therefore their pages are SE_SHARED locked 15940Sstevel@tonic-gate * for the entire life of the segment. 15950Sstevel@tonic-gate */ 15960Sstevel@tonic-gate if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) && 15975224Smec ((sptd->spt_flags & SHM_PAGEABLE) == 0)) { 15980Sstevel@tonic-gate goto softlock_decrement; 15990Sstevel@tonic-gate } 16000Sstevel@tonic-gate 16010Sstevel@tonic-gate /* 16020Sstevel@tonic-gate * Any thread is free to do a page_find and 16030Sstevel@tonic-gate * page_unlock() on the pages within this seg. 16040Sstevel@tonic-gate * 16050Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 16060Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 16070Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 16080Sstevel@tonic-gate * underlying HAT layer. 16090Sstevel@tonic-gate */ 16100Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 16110Sstevel@tonic-gate hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len); 16120Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 16130Sstevel@tonic-gate 16140Sstevel@tonic-gate amp = sptd->spt_amp; 16150Sstevel@tonic-gate ASSERT(amp != NULL); 16160Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 16170Sstevel@tonic-gate 16180Sstevel@tonic-gate for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) { 16190Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 16200Sstevel@tonic-gate ASSERT(ap != NULL); 16210Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 16220Sstevel@tonic-gate 16230Sstevel@tonic-gate /* 16240Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 16250Sstevel@tonic-gate * find the page since we know that it has a 16260Sstevel@tonic-gate * "shared" lock. 16270Sstevel@tonic-gate */ 16280Sstevel@tonic-gate pp = page_find(vp, offset); 16290Sstevel@tonic-gate ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1)); 16300Sstevel@tonic-gate if (pp == NULL) { 16310Sstevel@tonic-gate panic("segspt_softunlock: " 16320Sstevel@tonic-gate "addr %p, ap %p, vp %p, off %llx", 16330Sstevel@tonic-gate (void *)adr, (void *)ap, (void *)vp, offset); 16340Sstevel@tonic-gate /*NOTREACHED*/ 16350Sstevel@tonic-gate } 16360Sstevel@tonic-gate 16370Sstevel@tonic-gate if (rw == S_WRITE) { 16380Sstevel@tonic-gate hat_setrefmod(pp); 16390Sstevel@tonic-gate } else if (rw != S_OTHER) { 16400Sstevel@tonic-gate hat_setref(pp); 16410Sstevel@tonic-gate } 16420Sstevel@tonic-gate page_unlock(pp); 16430Sstevel@tonic-gate } 16440Sstevel@tonic-gate 16450Sstevel@tonic-gate softlock_decrement: 16460Sstevel@tonic-gate npages = btopr(len); 16476695Saguzovsk ASSERT(shmd->shm_softlockcnt >= npages); 16480Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages); 16490Sstevel@tonic-gate if (shmd->shm_softlockcnt == 0) { 16500Sstevel@tonic-gate /* 16510Sstevel@tonic-gate * All SOFTLOCKS are gone. Wakeup any waiting 16520Sstevel@tonic-gate * unmappers so they can try again to unmap. 16530Sstevel@tonic-gate * Check for waiters first without the mutex 16540Sstevel@tonic-gate * held so we don't always grab the mutex on 16550Sstevel@tonic-gate * softunlocks. 16560Sstevel@tonic-gate */ 16570Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 16580Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 16590Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 16600Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 16610Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 16620Sstevel@tonic-gate } 16630Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 16640Sstevel@tonic-gate } 16650Sstevel@tonic-gate } 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate int 16690Sstevel@tonic-gate segspt_shmattach(struct seg *seg, caddr_t *argsp) 16700Sstevel@tonic-gate { 16710Sstevel@tonic-gate struct shm_data *shmd_arg = (struct shm_data *)argsp; 16720Sstevel@tonic-gate struct shm_data *shmd; 16730Sstevel@tonic-gate struct anon_map *shm_amp = shmd_arg->shm_amp; 16740Sstevel@tonic-gate struct spt_data *sptd; 16750Sstevel@tonic-gate int error = 0; 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 16780Sstevel@tonic-gate 16790Sstevel@tonic-gate shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP); 16800Sstevel@tonic-gate if (shmd == NULL) 16810Sstevel@tonic-gate return (ENOMEM); 16820Sstevel@tonic-gate 16830Sstevel@tonic-gate shmd->shm_sptas = shmd_arg->shm_sptas; 16840Sstevel@tonic-gate shmd->shm_amp = shm_amp; 16850Sstevel@tonic-gate shmd->shm_sptseg = shmd_arg->shm_sptseg; 16860Sstevel@tonic-gate 16870Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0, 16880Sstevel@tonic-gate NULL, 0, seg->s_size); 16890Sstevel@tonic-gate 16906695Saguzovsk mutex_init(&shmd->shm_segfree_syncmtx, NULL, MUTEX_DEFAULT, NULL); 16916695Saguzovsk 16920Sstevel@tonic-gate seg->s_data = (void *)shmd; 16930Sstevel@tonic-gate seg->s_ops = &segspt_shmops; 16940Sstevel@tonic-gate seg->s_szc = shmd->shm_sptseg->s_szc; 16950Sstevel@tonic-gate sptd = shmd->shm_sptseg->s_data; 16960Sstevel@tonic-gate 16970Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 16980Sstevel@tonic-gate if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size), 16990Sstevel@tonic-gate KM_NOSLEEP)) == NULL) { 17000Sstevel@tonic-gate seg->s_data = (void *)NULL; 17010Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 17020Sstevel@tonic-gate return (ENOMEM); 17030Sstevel@tonic-gate } 17040Sstevel@tonic-gate shmd->shm_lckpgs = 0; 17050Sstevel@tonic-gate if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 17060Sstevel@tonic-gate if ((error = hat_share(seg->s_as->a_hat, seg->s_base, 17070Sstevel@tonic-gate shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 17080Sstevel@tonic-gate seg->s_size, seg->s_szc)) != 0) { 17090Sstevel@tonic-gate kmem_free(shmd->shm_vpage, 17105224Smec btopr(shm_amp->size)); 17110Sstevel@tonic-gate } 17120Sstevel@tonic-gate } 17130Sstevel@tonic-gate } else { 17140Sstevel@tonic-gate error = hat_share(seg->s_as->a_hat, seg->s_base, 17155224Smec shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 17165224Smec seg->s_size, seg->s_szc); 17170Sstevel@tonic-gate } 17180Sstevel@tonic-gate if (error) { 17190Sstevel@tonic-gate seg->s_szc = 0; 17200Sstevel@tonic-gate seg->s_data = (void *)NULL; 17210Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 17220Sstevel@tonic-gate } else { 17230Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 17240Sstevel@tonic-gate shm_amp->refcnt++; 17250Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 17260Sstevel@tonic-gate } 17270Sstevel@tonic-gate return (error); 17280Sstevel@tonic-gate } 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate int 17310Sstevel@tonic-gate segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize) 17320Sstevel@tonic-gate { 17330Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 17340Sstevel@tonic-gate int reclaim = 1; 17350Sstevel@tonic-gate 17360Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 17370Sstevel@tonic-gate retry: 17380Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) { 17390Sstevel@tonic-gate if (reclaim == 1) { 17400Sstevel@tonic-gate segspt_purge(seg); 17410Sstevel@tonic-gate reclaim = 0; 17420Sstevel@tonic-gate goto retry; 17430Sstevel@tonic-gate } 17440Sstevel@tonic-gate return (EAGAIN); 17450Sstevel@tonic-gate } 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate if (ssize != seg->s_size) { 17480Sstevel@tonic-gate #ifdef DEBUG 17490Sstevel@tonic-gate cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n", 17500Sstevel@tonic-gate ssize, seg->s_size); 17510Sstevel@tonic-gate #endif 17520Sstevel@tonic-gate return (EINVAL); 17530Sstevel@tonic-gate } 17540Sstevel@tonic-gate 17550Sstevel@tonic-gate (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK, 17560Sstevel@tonic-gate NULL, 0); 17570Sstevel@tonic-gate hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc); 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate seg_free(seg); 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate return (0); 17620Sstevel@tonic-gate } 17630Sstevel@tonic-gate 17640Sstevel@tonic-gate void 17650Sstevel@tonic-gate segspt_shmfree(struct seg *seg) 17660Sstevel@tonic-gate { 17670Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 17680Sstevel@tonic-gate struct anon_map *shm_amp = shmd->shm_amp; 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0, 17735224Smec MC_UNLOCK, NULL, 0); 17740Sstevel@tonic-gate 17750Sstevel@tonic-gate /* 17760Sstevel@tonic-gate * Need to increment refcnt when attaching 17770Sstevel@tonic-gate * and decrement when detaching because of dup(). 17780Sstevel@tonic-gate */ 17790Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 17800Sstevel@tonic-gate shm_amp->refcnt--; 17810Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 17820Sstevel@tonic-gate 17830Sstevel@tonic-gate if (shmd->shm_vpage) { /* only for DISM */ 17840Sstevel@tonic-gate kmem_free(shmd->shm_vpage, btopr(shm_amp->size)); 17850Sstevel@tonic-gate shmd->shm_vpage = NULL; 17860Sstevel@tonic-gate } 17876695Saguzovsk 17886695Saguzovsk /* 17896695Saguzovsk * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's 17906695Saguzovsk * still working with this segment without holding as lock. 17916695Saguzovsk */ 17926695Saguzovsk ASSERT(shmd->shm_softlockcnt == 0); 17936695Saguzovsk mutex_enter(&shmd->shm_segfree_syncmtx); 17946695Saguzovsk mutex_destroy(&shmd->shm_segfree_syncmtx); 17956695Saguzovsk 17960Sstevel@tonic-gate kmem_free(shmd, sizeof (*shmd)); 17970Sstevel@tonic-gate } 17980Sstevel@tonic-gate 17990Sstevel@tonic-gate /*ARGSUSED*/ 18000Sstevel@tonic-gate int 18010Sstevel@tonic-gate segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 18020Sstevel@tonic-gate { 18030Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 18040Sstevel@tonic-gate 18050Sstevel@tonic-gate /* 18060Sstevel@tonic-gate * Shared page table is more than shared mapping. 18070Sstevel@tonic-gate * Individual process sharing page tables can't change prot 18080Sstevel@tonic-gate * because there is only one set of page tables. 18090Sstevel@tonic-gate * This will be allowed after private page table is 18100Sstevel@tonic-gate * supported. 18110Sstevel@tonic-gate */ 18120Sstevel@tonic-gate /* need to return correct status error? */ 18130Sstevel@tonic-gate return (0); 18140Sstevel@tonic-gate } 18150Sstevel@tonic-gate 18160Sstevel@tonic-gate 18170Sstevel@tonic-gate faultcode_t 18180Sstevel@tonic-gate segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, 18190Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 18200Sstevel@tonic-gate { 18210Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 18220Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 18230Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 18240Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 18250Sstevel@tonic-gate pgcnt_t npages; 18262414Saguzovsk size_t size; 18270Sstevel@tonic-gate caddr_t segspt_addr, shm_addr; 18280Sstevel@tonic-gate page_t **ppa; 18290Sstevel@tonic-gate int i; 18300Sstevel@tonic-gate ulong_t an_idx = 0; 18310Sstevel@tonic-gate int err = 0; 1832721Smec int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0); 18332414Saguzovsk size_t pgsz; 18342414Saguzovsk pgcnt_t pgcnt; 18352414Saguzovsk caddr_t a; 18362414Saguzovsk pgcnt_t pidx; 18370Sstevel@tonic-gate 18380Sstevel@tonic-gate #ifdef lint 18390Sstevel@tonic-gate hat = hat; 18400Sstevel@tonic-gate #endif 18410Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 18420Sstevel@tonic-gate 18430Sstevel@tonic-gate /* 18440Sstevel@tonic-gate * Because of the way spt is implemented 18450Sstevel@tonic-gate * the realsize of the segment does not have to be 18460Sstevel@tonic-gate * equal to the segment size itself. The segment size is 18470Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 18480Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 18490Sstevel@tonic-gate * based on what the user requested. This is a bit of 18500Sstevel@tonic-gate * ungliness that is historical but not easily fixed 18510Sstevel@tonic-gate * without re-designing the higher levels of ISM. 18520Sstevel@tonic-gate */ 18530Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 18540Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 18550Sstevel@tonic-gate return (FC_NOMAP); 18560Sstevel@tonic-gate /* 18570Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 18580Sstevel@tonic-gate * make any necessary adjustments to addr and len 18590Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 18600Sstevel@tonic-gate * 18610Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 18620Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 18630Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 18640Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 18650Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 18660Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 18670Sstevel@tonic-gate * over a given virtual range. 18680Sstevel@tonic-gate */ 18692414Saguzovsk pgsz = page_get_pagesize(sptseg->s_szc); 18702414Saguzovsk pgcnt = page_get_pagecnt(sptseg->s_szc); 18712414Saguzovsk shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 18722414Saguzovsk size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 18730Sstevel@tonic-gate npages = btopr(size); 18740Sstevel@tonic-gate 18750Sstevel@tonic-gate /* 18760Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 18770Sstevel@tonic-gate */ 18780Sstevel@tonic-gate an_idx = seg_page(seg, shm_addr); 18790Sstevel@tonic-gate segspt_addr = sptseg->s_base + ptob(an_idx); 18800Sstevel@tonic-gate 18810Sstevel@tonic-gate ASSERT((segspt_addr + ptob(npages)) <= 18825224Smec (sptseg->s_base + sptd->spt_realsize)); 18830Sstevel@tonic-gate ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size)); 18840Sstevel@tonic-gate 18850Sstevel@tonic-gate switch (type) { 18860Sstevel@tonic-gate 18870Sstevel@tonic-gate case F_SOFTLOCK: 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 18900Sstevel@tonic-gate /* 18910Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 18920Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 18930Sstevel@tonic-gate */ 18940Sstevel@tonic-gate /* FALLTHRU */ 18950Sstevel@tonic-gate case F_INVAL: 18960Sstevel@tonic-gate 18970Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 18980Sstevel@tonic-gate return (FC_NOMAP); 18990Sstevel@tonic-gate 19000Sstevel@tonic-gate ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 19010Sstevel@tonic-gate 19020Sstevel@tonic-gate err = spt_anon_getpages(sptseg, segspt_addr, size, ppa); 19030Sstevel@tonic-gate if (err != 0) { 19040Sstevel@tonic-gate if (type == F_SOFTLOCK) { 19050Sstevel@tonic-gate atomic_add_long((ulong_t *)( 19060Sstevel@tonic-gate &(shmd->shm_softlockcnt)), -npages); 19070Sstevel@tonic-gate } 19080Sstevel@tonic-gate goto dism_err; 19090Sstevel@tonic-gate } 19100Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 19112414Saguzovsk a = segspt_addr; 19122414Saguzovsk pidx = 0; 19130Sstevel@tonic-gate if (type == F_SOFTLOCK) { 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate /* 19160Sstevel@tonic-gate * Load up the translation keeping it 19170Sstevel@tonic-gate * locked and don't unlock the page. 19180Sstevel@tonic-gate */ 19192414Saguzovsk for (; pidx < npages; a += pgsz, pidx += pgcnt) { 19202414Saguzovsk hat_memload_array(sptseg->s_as->a_hat, 19212414Saguzovsk a, pgsz, &ppa[pidx], sptd->spt_prot, 19222414Saguzovsk HAT_LOAD_LOCK | HAT_LOAD_SHARE); 19232414Saguzovsk } 19240Sstevel@tonic-gate } else { 19250Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 19260Sstevel@tonic-gate 19270Sstevel@tonic-gate /* 19280Sstevel@tonic-gate * Migrate pages marked for migration 19290Sstevel@tonic-gate */ 19300Sstevel@tonic-gate if (lgrp_optimizations()) 19310Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 19320Sstevel@tonic-gate npages); 19330Sstevel@tonic-gate 19340Sstevel@tonic-gate /* CPU HAT */ 19352414Saguzovsk for (; pidx < npages; 19362414Saguzovsk a += pgsz, pidx += pgcnt) { 19372414Saguzovsk hat_memload_array(sptseg->s_as->a_hat, 19382414Saguzovsk a, pgsz, &ppa[pidx], 19392414Saguzovsk sptd->spt_prot, 19402414Saguzovsk HAT_LOAD_SHARE); 19412414Saguzovsk } 19420Sstevel@tonic-gate } else { 19430Sstevel@tonic-gate /* XHAT. Pass real address */ 19440Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 19450Sstevel@tonic-gate size, ppa, sptd->spt_prot, HAT_LOAD_SHARE); 19460Sstevel@tonic-gate } 19470Sstevel@tonic-gate 19480Sstevel@tonic-gate /* 19490Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 19500Sstevel@tonic-gate */ 1951721Smec if (dyn_ism_unmap) { 1952721Smec for (i = 0; i < npages; i++) { 1953721Smec page_unlock(ppa[i]); 1954721Smec } 1955721Smec } 19560Sstevel@tonic-gate } 19570Sstevel@tonic-gate 1958721Smec if (!dyn_ism_unmap) { 19590Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, shm_addr, 19600Sstevel@tonic-gate curspt->a_hat, segspt_addr, ptob(npages), 19610Sstevel@tonic-gate seg->s_szc) != 0) { 19620Sstevel@tonic-gate panic("hat_share err in DISM fault"); 19630Sstevel@tonic-gate /* NOTREACHED */ 19640Sstevel@tonic-gate } 1965721Smec if (type == F_INVAL) { 1966721Smec for (i = 0; i < npages; i++) { 1967721Smec page_unlock(ppa[i]); 1968721Smec } 1969721Smec } 19700Sstevel@tonic-gate } 19710Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 19720Sstevel@tonic-gate dism_err: 19730Sstevel@tonic-gate kmem_free(ppa, npages * sizeof (page_t *)); 19740Sstevel@tonic-gate return (err); 19750Sstevel@tonic-gate 19760Sstevel@tonic-gate case F_SOFTUNLOCK: 19770Sstevel@tonic-gate 19780Sstevel@tonic-gate /* 19790Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 19800Sstevel@tonic-gate * but the segspt_addr is the virtual address within the 19810Sstevel@tonic-gate * dummy seg. 19820Sstevel@tonic-gate */ 19830Sstevel@tonic-gate segspt_softunlock(seg, segspt_addr, size, rw); 19840Sstevel@tonic-gate return (0); 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate case F_PROT: 19870Sstevel@tonic-gate 19880Sstevel@tonic-gate /* 19890Sstevel@tonic-gate * This takes care of the unusual case where a user 19900Sstevel@tonic-gate * allocates a stack in shared memory and a register 19910Sstevel@tonic-gate * window overflow is written to that stack page before 19920Sstevel@tonic-gate * it is otherwise modified. 19930Sstevel@tonic-gate * 19940Sstevel@tonic-gate * We can get away with this because ISM segments are 19950Sstevel@tonic-gate * always rw. Other than this unusual case, there 19960Sstevel@tonic-gate * should be no instances of protection violations. 19970Sstevel@tonic-gate */ 19980Sstevel@tonic-gate return (0); 19990Sstevel@tonic-gate 20000Sstevel@tonic-gate default: 20010Sstevel@tonic-gate #ifdef DEBUG 20020Sstevel@tonic-gate panic("segspt_dismfault default type?"); 20030Sstevel@tonic-gate #else 20040Sstevel@tonic-gate return (FC_NOMAP); 20050Sstevel@tonic-gate #endif 20060Sstevel@tonic-gate } 20070Sstevel@tonic-gate } 20080Sstevel@tonic-gate 20090Sstevel@tonic-gate 20100Sstevel@tonic-gate faultcode_t 20110Sstevel@tonic-gate segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, 20120Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 20130Sstevel@tonic-gate { 20140Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 20150Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 20160Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 20170Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 20180Sstevel@tonic-gate pgcnt_t npages; 20192414Saguzovsk size_t size; 20200Sstevel@tonic-gate caddr_t sptseg_addr, shm_addr; 20210Sstevel@tonic-gate page_t *pp, **ppa; 20220Sstevel@tonic-gate int i; 20230Sstevel@tonic-gate u_offset_t offset; 20240Sstevel@tonic-gate ulong_t anon_index = 0; 20250Sstevel@tonic-gate struct vnode *vp; 20260Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 20270Sstevel@tonic-gate struct anon *ap = NULL; 20282414Saguzovsk size_t pgsz; 20292414Saguzovsk pgcnt_t pgcnt; 20302414Saguzovsk caddr_t a; 20312414Saguzovsk pgcnt_t pidx; 20322414Saguzovsk size_t sz; 20330Sstevel@tonic-gate 20340Sstevel@tonic-gate #ifdef lint 20350Sstevel@tonic-gate hat = hat; 20360Sstevel@tonic-gate #endif 20370Sstevel@tonic-gate 20380Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 20390Sstevel@tonic-gate 20400Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 20410Sstevel@tonic-gate return (segspt_dismfault(hat, seg, addr, len, type, rw)); 20420Sstevel@tonic-gate } 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate /* 20450Sstevel@tonic-gate * Because of the way spt is implemented 20460Sstevel@tonic-gate * the realsize of the segment does not have to be 20470Sstevel@tonic-gate * equal to the segment size itself. The segment size is 20480Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 20490Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 20500Sstevel@tonic-gate * based on what the user requested. This is a bit of 20510Sstevel@tonic-gate * ungliness that is historical but not easily fixed 20520Sstevel@tonic-gate * without re-designing the higher levels of ISM. 20530Sstevel@tonic-gate */ 20540Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 20550Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 20560Sstevel@tonic-gate return (FC_NOMAP); 20570Sstevel@tonic-gate /* 20580Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 20590Sstevel@tonic-gate * make any necessary adjustments to addr and len 20600Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 20610Sstevel@tonic-gate * 20620Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 20630Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 20640Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 20650Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 20660Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 20670Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 20680Sstevel@tonic-gate * over a given virtual range. 20690Sstevel@tonic-gate */ 20702414Saguzovsk pgsz = page_get_pagesize(sptseg->s_szc); 20712414Saguzovsk pgcnt = page_get_pagecnt(sptseg->s_szc); 20722414Saguzovsk shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); 20732414Saguzovsk size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); 20740Sstevel@tonic-gate npages = btopr(size); 20750Sstevel@tonic-gate 20760Sstevel@tonic-gate /* 20770Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 20780Sstevel@tonic-gate */ 20790Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 20800Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 20810Sstevel@tonic-gate 20820Sstevel@tonic-gate /* 20830Sstevel@tonic-gate * And now we may have to adjust npages downward if we have 20840Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 20850Sstevel@tonic-gate * allocations. 20860Sstevel@tonic-gate */ 20870Sstevel@tonic-gate if ((sptseg_addr + ptob(npages)) > 20880Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 20890Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr; 20900Sstevel@tonic-gate 20910Sstevel@tonic-gate npages = btopr(size); 20920Sstevel@tonic-gate 20930Sstevel@tonic-gate ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size)); 20940Sstevel@tonic-gate ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0); 20950Sstevel@tonic-gate 20960Sstevel@tonic-gate switch (type) { 20970Sstevel@tonic-gate 20980Sstevel@tonic-gate case F_SOFTLOCK: 20990Sstevel@tonic-gate 21000Sstevel@tonic-gate /* 21010Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 21020Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 21030Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 21040Sstevel@tonic-gate */ 21050Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 21060Sstevel@tonic-gate /* 21070Sstevel@tonic-gate * Some platforms assume that ISM pages are SE_SHARED 21080Sstevel@tonic-gate * locked for the entire life of the segment. 21090Sstevel@tonic-gate */ 21100Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) 21110Sstevel@tonic-gate return (0); 21120Sstevel@tonic-gate /* 21130Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 21140Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 21150Sstevel@tonic-gate */ 21160Sstevel@tonic-gate 21170Sstevel@tonic-gate /* FALLTHRU */ 21180Sstevel@tonic-gate case F_INVAL: 21190Sstevel@tonic-gate 21200Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 21210Sstevel@tonic-gate return (FC_NOMAP); 21220Sstevel@tonic-gate 21230Sstevel@tonic-gate /* 21240Sstevel@tonic-gate * Some platforms that do NOT support DYNAMIC_ISM_UNMAP 21250Sstevel@tonic-gate * may still rely on this call to hat_share(). That 21260Sstevel@tonic-gate * would imply that those hat's can fault on a 21270Sstevel@tonic-gate * HAT_LOAD_LOCK translation, which would seem 21280Sstevel@tonic-gate * contradictory. 21290Sstevel@tonic-gate */ 21300Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 21310Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, seg->s_base, 21320Sstevel@tonic-gate curspt->a_hat, sptseg->s_base, 21330Sstevel@tonic-gate sptseg->s_size, sptseg->s_szc) != 0) { 21340Sstevel@tonic-gate panic("hat_share error in ISM fault"); 21350Sstevel@tonic-gate /*NOTREACHED*/ 21360Sstevel@tonic-gate } 21370Sstevel@tonic-gate return (0); 21380Sstevel@tonic-gate } 21390Sstevel@tonic-gate ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP); 21400Sstevel@tonic-gate 21410Sstevel@tonic-gate /* 21420Sstevel@tonic-gate * I see no need to lock the real seg, 21430Sstevel@tonic-gate * here, because all of our work will be on the underlying 21440Sstevel@tonic-gate * dummy seg. 21450Sstevel@tonic-gate * 21460Sstevel@tonic-gate * sptseg_addr and npages now account for large pages. 21470Sstevel@tonic-gate */ 21480Sstevel@tonic-gate amp = sptd->spt_amp; 21490Sstevel@tonic-gate ASSERT(amp != NULL); 21500Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 21510Sstevel@tonic-gate 21520Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 21530Sstevel@tonic-gate for (i = 0; i < npages; i++) { 21540Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 21550Sstevel@tonic-gate ASSERT(ap != NULL); 21560Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 21570Sstevel@tonic-gate pp = page_lookup(vp, offset, SE_SHARED); 21580Sstevel@tonic-gate ASSERT(pp != NULL); 21590Sstevel@tonic-gate ppa[i] = pp; 21600Sstevel@tonic-gate } 21610Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 21620Sstevel@tonic-gate ASSERT(i == npages); 21630Sstevel@tonic-gate 21640Sstevel@tonic-gate /* 21650Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 21660Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 21670Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 21680Sstevel@tonic-gate * underlying HAT layer. 21690Sstevel@tonic-gate */ 21700Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 21712414Saguzovsk a = sptseg_addr; 21722414Saguzovsk pidx = 0; 21730Sstevel@tonic-gate if (type == F_SOFTLOCK) { 21740Sstevel@tonic-gate /* 21750Sstevel@tonic-gate * Load up the translation keeping it 21760Sstevel@tonic-gate * locked and don't unlock the page. 21770Sstevel@tonic-gate */ 21782414Saguzovsk for (; pidx < npages; a += pgsz, pidx += pgcnt) { 21792414Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 21802414Saguzovsk hat_memload_array(sptseg->s_as->a_hat, a, 21812414Saguzovsk sz, &ppa[pidx], sptd->spt_prot, 21822414Saguzovsk HAT_LOAD_LOCK | HAT_LOAD_SHARE); 21832414Saguzovsk } 21840Sstevel@tonic-gate } else { 21850Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 21860Sstevel@tonic-gate 21870Sstevel@tonic-gate /* 21880Sstevel@tonic-gate * Migrate pages marked for migration. 21890Sstevel@tonic-gate */ 21900Sstevel@tonic-gate if (lgrp_optimizations()) 21910Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 21920Sstevel@tonic-gate npages); 21930Sstevel@tonic-gate 21940Sstevel@tonic-gate /* CPU HAT */ 21952414Saguzovsk for (; pidx < npages; 21962414Saguzovsk a += pgsz, pidx += pgcnt) { 21972414Saguzovsk sz = MIN(pgsz, ptob(npages - pidx)); 21982414Saguzovsk hat_memload_array(sptseg->s_as->a_hat, 21992414Saguzovsk a, sz, &ppa[pidx], 22002414Saguzovsk sptd->spt_prot, HAT_LOAD_SHARE); 22012414Saguzovsk } 22020Sstevel@tonic-gate } else { 22030Sstevel@tonic-gate /* XHAT. Pass real address */ 22040Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 22050Sstevel@tonic-gate ptob(npages), ppa, sptd->spt_prot, 22060Sstevel@tonic-gate HAT_LOAD_SHARE); 22070Sstevel@tonic-gate } 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* 22100Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 22110Sstevel@tonic-gate */ 22120Sstevel@tonic-gate for (i = 0; i < npages; i++) 22130Sstevel@tonic-gate page_unlock(ppa[i]); 22140Sstevel@tonic-gate } 22150Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 22160Sstevel@tonic-gate 22170Sstevel@tonic-gate kmem_free(ppa, sizeof (page_t *) * npages); 22180Sstevel@tonic-gate return (0); 22190Sstevel@tonic-gate case F_SOFTUNLOCK: 22200Sstevel@tonic-gate 22210Sstevel@tonic-gate /* 22220Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 22230Sstevel@tonic-gate * but the sptseg_addr is the virtual address within the 22240Sstevel@tonic-gate * dummy seg. 22250Sstevel@tonic-gate */ 22260Sstevel@tonic-gate segspt_softunlock(seg, sptseg_addr, ptob(npages), rw); 22270Sstevel@tonic-gate return (0); 22280Sstevel@tonic-gate 22290Sstevel@tonic-gate case F_PROT: 22300Sstevel@tonic-gate 22310Sstevel@tonic-gate /* 22320Sstevel@tonic-gate * This takes care of the unusual case where a user 22330Sstevel@tonic-gate * allocates a stack in shared memory and a register 22340Sstevel@tonic-gate * window overflow is written to that stack page before 22350Sstevel@tonic-gate * it is otherwise modified. 22360Sstevel@tonic-gate * 22370Sstevel@tonic-gate * We can get away with this because ISM segments are 22380Sstevel@tonic-gate * always rw. Other than this unusual case, there 22390Sstevel@tonic-gate * should be no instances of protection violations. 22400Sstevel@tonic-gate */ 22410Sstevel@tonic-gate return (0); 22420Sstevel@tonic-gate 22430Sstevel@tonic-gate default: 22440Sstevel@tonic-gate #ifdef DEBUG 22450Sstevel@tonic-gate cmn_err(CE_WARN, "segspt_shmfault default type?"); 22460Sstevel@tonic-gate #endif 22470Sstevel@tonic-gate return (FC_NOMAP); 22480Sstevel@tonic-gate } 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate 22510Sstevel@tonic-gate /*ARGSUSED*/ 22520Sstevel@tonic-gate static faultcode_t 22530Sstevel@tonic-gate segspt_shmfaulta(struct seg *seg, caddr_t addr) 22540Sstevel@tonic-gate { 22550Sstevel@tonic-gate return (0); 22560Sstevel@tonic-gate } 22570Sstevel@tonic-gate 22580Sstevel@tonic-gate /*ARGSUSED*/ 22590Sstevel@tonic-gate static int 22600Sstevel@tonic-gate segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta) 22610Sstevel@tonic-gate { 22620Sstevel@tonic-gate return (0); 22630Sstevel@tonic-gate } 22640Sstevel@tonic-gate 22650Sstevel@tonic-gate /*ARGSUSED*/ 22660Sstevel@tonic-gate static size_t 22670Sstevel@tonic-gate segspt_shmswapout(struct seg *seg) 22680Sstevel@tonic-gate { 22690Sstevel@tonic-gate return (0); 22700Sstevel@tonic-gate } 22710Sstevel@tonic-gate 22720Sstevel@tonic-gate /* 22730Sstevel@tonic-gate * duplicate the shared page tables 22740Sstevel@tonic-gate */ 22750Sstevel@tonic-gate int 22760Sstevel@tonic-gate segspt_shmdup(struct seg *seg, struct seg *newseg) 22770Sstevel@tonic-gate { 22780Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 22790Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 22800Sstevel@tonic-gate struct shm_data *shmd_new; 22810Sstevel@tonic-gate struct seg *spt_seg = shmd->shm_sptseg; 22820Sstevel@tonic-gate struct spt_data *sptd = spt_seg->s_data; 2283721Smec int error = 0; 22840Sstevel@tonic-gate 22850Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 22860Sstevel@tonic-gate 22870Sstevel@tonic-gate shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP); 22880Sstevel@tonic-gate newseg->s_data = (void *)shmd_new; 22890Sstevel@tonic-gate shmd_new->shm_sptas = shmd->shm_sptas; 22900Sstevel@tonic-gate shmd_new->shm_amp = amp; 22910Sstevel@tonic-gate shmd_new->shm_sptseg = shmd->shm_sptseg; 22920Sstevel@tonic-gate newseg->s_ops = &segspt_shmops; 22930Sstevel@tonic-gate newseg->s_szc = seg->s_szc; 22940Sstevel@tonic-gate ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc); 22950Sstevel@tonic-gate 22960Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 22970Sstevel@tonic-gate amp->refcnt++; 22980Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 22990Sstevel@tonic-gate 23000Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 23010Sstevel@tonic-gate shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP); 23020Sstevel@tonic-gate shmd_new->shm_lckpgs = 0; 2303721Smec if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 2304721Smec if ((error = hat_share(newseg->s_as->a_hat, 2305721Smec newseg->s_base, shmd->shm_sptas->a_hat, SEGSPTADDR, 2306721Smec seg->s_size, seg->s_szc)) != 0) { 2307721Smec kmem_free(shmd_new->shm_vpage, 23085224Smec btopr(amp->size)); 2309721Smec } 2310721Smec } 2311721Smec return (error); 2312721Smec } else { 2313721Smec return (hat_share(newseg->s_as->a_hat, newseg->s_base, 2314721Smec shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size, 2315721Smec seg->s_szc)); 2316721Smec 23170Sstevel@tonic-gate } 23180Sstevel@tonic-gate } 23190Sstevel@tonic-gate 23200Sstevel@tonic-gate /*ARGSUSED*/ 23210Sstevel@tonic-gate int 23220Sstevel@tonic-gate segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot) 23230Sstevel@tonic-gate { 23240Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 23250Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 23260Sstevel@tonic-gate 23270Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate /* 23300Sstevel@tonic-gate * ISM segment is always rw. 23310Sstevel@tonic-gate */ 23320Sstevel@tonic-gate return (((sptd->spt_prot & prot) != prot) ? EACCES : 0); 23330Sstevel@tonic-gate } 23340Sstevel@tonic-gate 23350Sstevel@tonic-gate /* 23360Sstevel@tonic-gate * Return an array of locked large pages, for empty slots allocate 23370Sstevel@tonic-gate * private zero-filled anon pages. 23380Sstevel@tonic-gate */ 23390Sstevel@tonic-gate static int 23400Sstevel@tonic-gate spt_anon_getpages( 23410Sstevel@tonic-gate struct seg *sptseg, 23420Sstevel@tonic-gate caddr_t sptaddr, 23430Sstevel@tonic-gate size_t len, 23440Sstevel@tonic-gate page_t *ppa[]) 23450Sstevel@tonic-gate { 23460Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 23470Sstevel@tonic-gate struct anon_map *amp = sptd->spt_amp; 23480Sstevel@tonic-gate enum seg_rw rw = sptd->spt_prot; 23490Sstevel@tonic-gate uint_t szc = sptseg->s_szc; 23500Sstevel@tonic-gate size_t pg_sz, share_sz = page_get_pagesize(szc); 23510Sstevel@tonic-gate pgcnt_t lp_npgs; 23520Sstevel@tonic-gate caddr_t lp_addr, e_sptaddr; 23530Sstevel@tonic-gate uint_t vpprot, ppa_szc = 0; 23540Sstevel@tonic-gate struct vpage *vpage = NULL; 23550Sstevel@tonic-gate ulong_t j, ppa_idx; 23560Sstevel@tonic-gate int err, ierr = 0; 23570Sstevel@tonic-gate pgcnt_t an_idx; 23580Sstevel@tonic-gate anon_sync_obj_t cookie; 23595224Smec int anon_locked = 0; 23605224Smec pgcnt_t amp_pgs; 23615224Smec 23620Sstevel@tonic-gate 23630Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz)); 23640Sstevel@tonic-gate ASSERT(len != 0); 23650Sstevel@tonic-gate 23660Sstevel@tonic-gate pg_sz = share_sz; 23670Sstevel@tonic-gate lp_npgs = btop(pg_sz); 23680Sstevel@tonic-gate lp_addr = sptaddr; 23690Sstevel@tonic-gate e_sptaddr = sptaddr + len; 23700Sstevel@tonic-gate an_idx = seg_page(sptseg, sptaddr); 23710Sstevel@tonic-gate ppa_idx = 0; 23720Sstevel@tonic-gate 23730Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 23745224Smec 23755224Smec amp_pgs = page_get_pagecnt(amp->a_szc); 23765224Smec 23770Sstevel@tonic-gate /*CONSTCOND*/ 23780Sstevel@tonic-gate while (1) { 23790Sstevel@tonic-gate for (; lp_addr < e_sptaddr; 23805224Smec an_idx += lp_npgs, lp_addr += pg_sz, ppa_idx += lp_npgs) { 23810Sstevel@tonic-gate 23825224Smec /* 23835224Smec * If we're currently locked, and we get to a new 23845224Smec * page, unlock our current anon chunk. 23855224Smec */ 23865224Smec if (anon_locked && P2PHASE(an_idx, amp_pgs) == 0) { 23875224Smec anon_array_exit(&cookie); 23885224Smec anon_locked = 0; 23895224Smec } 23905224Smec if (!anon_locked) { 23915224Smec anon_array_enter(amp, an_idx, &cookie); 23925224Smec anon_locked = 1; 23935224Smec } 23940Sstevel@tonic-gate ppa_szc = (uint_t)-1; 23950Sstevel@tonic-gate ierr = anon_map_getpages(amp, an_idx, szc, sptseg, 23960Sstevel@tonic-gate lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx], 23974426Saguzovsk &ppa_szc, vpage, rw, 0, segvn_anypgsz, 0, kcred); 23980Sstevel@tonic-gate 23990Sstevel@tonic-gate if (ierr != 0) { 24000Sstevel@tonic-gate if (ierr > 0) { 24010Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 24020Sstevel@tonic-gate goto lpgs_err; 24030Sstevel@tonic-gate } 24040Sstevel@tonic-gate break; 24050Sstevel@tonic-gate } 24060Sstevel@tonic-gate } 24070Sstevel@tonic-gate if (lp_addr == e_sptaddr) { 24080Sstevel@tonic-gate break; 24090Sstevel@tonic-gate } 24100Sstevel@tonic-gate ASSERT(lp_addr < e_sptaddr); 24110Sstevel@tonic-gate 24120Sstevel@tonic-gate /* 24130Sstevel@tonic-gate * ierr == -1 means we failed to allocate a large page. 24140Sstevel@tonic-gate * so do a size down operation. 24150Sstevel@tonic-gate * 24160Sstevel@tonic-gate * ierr == -2 means some other process that privately shares 24170Sstevel@tonic-gate * pages with this process has allocated a larger page and we 24180Sstevel@tonic-gate * need to retry with larger pages. So do a size up 24190Sstevel@tonic-gate * operation. This relies on the fact that large pages are 24200Sstevel@tonic-gate * never partially shared i.e. if we share any constituent 24210Sstevel@tonic-gate * page of a large page with another process we must share the 24220Sstevel@tonic-gate * entire large page. Note this cannot happen for SOFTLOCK 24230Sstevel@tonic-gate * case, unless current address (lpaddr) is at the beginning 24240Sstevel@tonic-gate * of the next page size boundary because the other process 24250Sstevel@tonic-gate * couldn't have relocated locked pages. 24260Sstevel@tonic-gate */ 24270Sstevel@tonic-gate ASSERT(ierr == -1 || ierr == -2); 24280Sstevel@tonic-gate if (segvn_anypgsz) { 24290Sstevel@tonic-gate ASSERT(ierr == -2 || szc != 0); 24300Sstevel@tonic-gate ASSERT(ierr == -1 || szc < sptseg->s_szc); 24310Sstevel@tonic-gate szc = (ierr == -1) ? szc - 1 : szc + 1; 24320Sstevel@tonic-gate } else { 24330Sstevel@tonic-gate /* 24340Sstevel@tonic-gate * For faults and segvn_anypgsz == 0 24350Sstevel@tonic-gate * we need to be careful not to loop forever 24360Sstevel@tonic-gate * if existing page is found with szc other 24370Sstevel@tonic-gate * than 0 or seg->s_szc. This could be due 24380Sstevel@tonic-gate * to page relocations on behalf of DR or 24390Sstevel@tonic-gate * more likely large page creation. For this 24400Sstevel@tonic-gate * case simply re-size to existing page's szc 24410Sstevel@tonic-gate * if returned by anon_map_getpages(). 24420Sstevel@tonic-gate */ 24430Sstevel@tonic-gate if (ppa_szc == (uint_t)-1) { 24440Sstevel@tonic-gate szc = (ierr == -1) ? 0 : sptseg->s_szc; 24450Sstevel@tonic-gate } else { 24460Sstevel@tonic-gate ASSERT(ppa_szc <= sptseg->s_szc); 24470Sstevel@tonic-gate ASSERT(ierr == -2 || ppa_szc < szc); 24480Sstevel@tonic-gate ASSERT(ierr == -1 || ppa_szc > szc); 24490Sstevel@tonic-gate szc = ppa_szc; 24500Sstevel@tonic-gate } 24510Sstevel@tonic-gate } 24520Sstevel@tonic-gate pg_sz = page_get_pagesize(szc); 24530Sstevel@tonic-gate lp_npgs = btop(pg_sz); 24540Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(lp_addr, pg_sz)); 24550Sstevel@tonic-gate } 24565224Smec if (anon_locked) { 24575224Smec anon_array_exit(&cookie); 24585224Smec } 24590Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 24600Sstevel@tonic-gate return (0); 24610Sstevel@tonic-gate 24620Sstevel@tonic-gate lpgs_err: 24635224Smec if (anon_locked) { 24645224Smec anon_array_exit(&cookie); 24655224Smec } 24660Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 24670Sstevel@tonic-gate for (j = 0; j < ppa_idx; j++) 24680Sstevel@tonic-gate page_unlock(ppa[j]); 24690Sstevel@tonic-gate return (err); 24700Sstevel@tonic-gate } 24710Sstevel@tonic-gate 24722768Ssl108498 /* 24732768Ssl108498 * count the number of bytes in a set of spt pages that are currently not 24742768Ssl108498 * locked 24752768Ssl108498 */ 24762768Ssl108498 static rctl_qty_t 24772768Ssl108498 spt_unlockedbytes(pgcnt_t npages, page_t **ppa) 24782768Ssl108498 { 24792768Ssl108498 ulong_t i; 24802768Ssl108498 rctl_qty_t unlocked = 0; 24812768Ssl108498 24822768Ssl108498 for (i = 0; i < npages; i++) { 24832768Ssl108498 if (ppa[i]->p_lckcnt == 0) 24842768Ssl108498 unlocked += PAGESIZE; 24852768Ssl108498 } 24862768Ssl108498 return (unlocked); 24872768Ssl108498 } 24882768Ssl108498 248912908SPavel.Tatashin@Sun.COM extern u_longlong_t randtick(void); 249012908SPavel.Tatashin@Sun.COM /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */ 249112908SPavel.Tatashin@Sun.COM #define NLCK (NCPU_P2) 249212908SPavel.Tatashin@Sun.COM /* Random number with a range [0, n-1], n must be power of two */ 249312908SPavel.Tatashin@Sun.COM #define RAND_P2(n) \ 249412908SPavel.Tatashin@Sun.COM ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1)) 249512908SPavel.Tatashin@Sun.COM 24960Sstevel@tonic-gate int 24970Sstevel@tonic-gate spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 24982768Ssl108498 page_t **ppa, ulong_t *lockmap, size_t pos, 24992768Ssl108498 rctl_qty_t *locked) 25000Sstevel@tonic-gate { 250112908SPavel.Tatashin@Sun.COM struct shm_data *shmd = seg->s_data; 250212908SPavel.Tatashin@Sun.COM struct spt_data *sptd = shmd->shm_sptseg->s_data; 25030Sstevel@tonic-gate ulong_t i; 25040Sstevel@tonic-gate int kernel; 250512908SPavel.Tatashin@Sun.COM pgcnt_t nlck = 0; 250612908SPavel.Tatashin@Sun.COM int rv = 0; 250712908SPavel.Tatashin@Sun.COM int use_reserved = 1; 25080Sstevel@tonic-gate 25092768Ssl108498 /* return the number of bytes actually locked */ 25102768Ssl108498 *locked = 0; 251112908SPavel.Tatashin@Sun.COM 251212908SPavel.Tatashin@Sun.COM /* 251312908SPavel.Tatashin@Sun.COM * To avoid contention on freemem_lock, availrmem and pages_locked 251412908SPavel.Tatashin@Sun.COM * global counters are updated only every nlck locked pages instead of 251512908SPavel.Tatashin@Sun.COM * every time. Reserve nlck locks up front and deduct from this 251612908SPavel.Tatashin@Sun.COM * reservation for each page that requires a lock. When the reservation 251712908SPavel.Tatashin@Sun.COM * is consumed, reserve again. nlck is randomized, so the competing 251812908SPavel.Tatashin@Sun.COM * threads do not fall into a cyclic lock contention pattern. When 251912908SPavel.Tatashin@Sun.COM * memory is low, the lock ahead is disabled, and instead page_pp_lock() 252012908SPavel.Tatashin@Sun.COM * is used to lock pages. 252112908SPavel.Tatashin@Sun.COM */ 25220Sstevel@tonic-gate for (i = 0; i < npages; anon_index++, pos++, i++) { 252312908SPavel.Tatashin@Sun.COM if (nlck == 0 && use_reserved == 1) { 252412908SPavel.Tatashin@Sun.COM nlck = NLCK + RAND_P2(NLCK); 252512908SPavel.Tatashin@Sun.COM /* if fewer loops left, decrease nlck */ 252612908SPavel.Tatashin@Sun.COM nlck = MIN(nlck, npages - i); 252712908SPavel.Tatashin@Sun.COM /* 252812908SPavel.Tatashin@Sun.COM * Reserve nlck locks up front and deduct from this 252912908SPavel.Tatashin@Sun.COM * reservation for each page that requires a lock. When 253012908SPavel.Tatashin@Sun.COM * the reservation is consumed, reserve again. 253112908SPavel.Tatashin@Sun.COM */ 253212908SPavel.Tatashin@Sun.COM mutex_enter(&freemem_lock); 253312908SPavel.Tatashin@Sun.COM if ((availrmem - nlck) < pages_pp_maximum) { 253412908SPavel.Tatashin@Sun.COM /* Do not do advance memory reserves */ 253512908SPavel.Tatashin@Sun.COM use_reserved = 0; 253612908SPavel.Tatashin@Sun.COM } else { 253712908SPavel.Tatashin@Sun.COM availrmem -= nlck; 253812908SPavel.Tatashin@Sun.COM pages_locked += nlck; 253912908SPavel.Tatashin@Sun.COM } 254012908SPavel.Tatashin@Sun.COM mutex_exit(&freemem_lock); 254112908SPavel.Tatashin@Sun.COM } 25420Sstevel@tonic-gate if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { 25430Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[anon_index] < 25440Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 25450Sstevel@tonic-gate if (++sptd->spt_ppa_lckcnt[anon_index] == 25460Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 25470Sstevel@tonic-gate cmn_err(CE_WARN, 25480Sstevel@tonic-gate "DISM page lock limit " 25490Sstevel@tonic-gate "reached on DISM offset 0x%lx\n", 25500Sstevel@tonic-gate anon_index << PAGESHIFT); 25510Sstevel@tonic-gate } 25520Sstevel@tonic-gate kernel = (sptd->spt_ppa && 255312908SPavel.Tatashin@Sun.COM sptd->spt_ppa[anon_index]); 255412908SPavel.Tatashin@Sun.COM if (!page_pp_lock(ppa[i], 0, kernel || 255512908SPavel.Tatashin@Sun.COM use_reserved)) { 25560Sstevel@tonic-gate sptd->spt_ppa_lckcnt[anon_index]--; 255712908SPavel.Tatashin@Sun.COM rv = EAGAIN; 255812908SPavel.Tatashin@Sun.COM break; 25590Sstevel@tonic-gate } 25602768Ssl108498 /* if this is a newly locked page, count it */ 25612768Ssl108498 if (ppa[i]->p_lckcnt == 1) { 256212908SPavel.Tatashin@Sun.COM if (kernel == 0 && use_reserved == 1) 256312908SPavel.Tatashin@Sun.COM nlck--; 25642768Ssl108498 *locked += PAGESIZE; 25652768Ssl108498 } 25660Sstevel@tonic-gate shmd->shm_lckpgs++; 25670Sstevel@tonic-gate shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; 25680Sstevel@tonic-gate if (lockmap != NULL) 25690Sstevel@tonic-gate BT_SET(lockmap, pos); 25700Sstevel@tonic-gate } 25710Sstevel@tonic-gate } 25720Sstevel@tonic-gate } 257312908SPavel.Tatashin@Sun.COM /* Return unused lock reservation */ 257412908SPavel.Tatashin@Sun.COM if (nlck != 0 && use_reserved == 1) { 257512908SPavel.Tatashin@Sun.COM mutex_enter(&freemem_lock); 257612908SPavel.Tatashin@Sun.COM availrmem += nlck; 257712908SPavel.Tatashin@Sun.COM pages_locked -= nlck; 257812908SPavel.Tatashin@Sun.COM mutex_exit(&freemem_lock); 257912908SPavel.Tatashin@Sun.COM } 258012908SPavel.Tatashin@Sun.COM 258112908SPavel.Tatashin@Sun.COM return (rv); 258212908SPavel.Tatashin@Sun.COM } 258312908SPavel.Tatashin@Sun.COM 258412908SPavel.Tatashin@Sun.COM int 258512908SPavel.Tatashin@Sun.COM spt_unlockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 258612908SPavel.Tatashin@Sun.COM rctl_qty_t *unlocked) 258712908SPavel.Tatashin@Sun.COM { 258812908SPavel.Tatashin@Sun.COM struct shm_data *shmd = seg->s_data; 258912908SPavel.Tatashin@Sun.COM struct spt_data *sptd = shmd->shm_sptseg->s_data; 259012908SPavel.Tatashin@Sun.COM struct anon_map *amp = sptd->spt_amp; 259112908SPavel.Tatashin@Sun.COM struct anon *ap; 259212908SPavel.Tatashin@Sun.COM struct vnode *vp; 259312908SPavel.Tatashin@Sun.COM u_offset_t off; 259412908SPavel.Tatashin@Sun.COM struct page *pp; 259512908SPavel.Tatashin@Sun.COM int kernel; 259612908SPavel.Tatashin@Sun.COM anon_sync_obj_t cookie; 259712908SPavel.Tatashin@Sun.COM ulong_t i; 259812908SPavel.Tatashin@Sun.COM pgcnt_t nlck = 0; 259912908SPavel.Tatashin@Sun.COM pgcnt_t nlck_limit = NLCK; 260012908SPavel.Tatashin@Sun.COM 260112908SPavel.Tatashin@Sun.COM ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 260212908SPavel.Tatashin@Sun.COM for (i = 0; i < npages; i++, anon_index++) { 260312908SPavel.Tatashin@Sun.COM if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 260412908SPavel.Tatashin@Sun.COM anon_array_enter(amp, anon_index, &cookie); 260512908SPavel.Tatashin@Sun.COM ap = anon_get_ptr(amp->ahp, anon_index); 260612908SPavel.Tatashin@Sun.COM ASSERT(ap); 260712908SPavel.Tatashin@Sun.COM 260812908SPavel.Tatashin@Sun.COM swap_xlate(ap, &vp, &off); 260912908SPavel.Tatashin@Sun.COM anon_array_exit(&cookie); 261012908SPavel.Tatashin@Sun.COM pp = page_lookup(vp, off, SE_SHARED); 261112908SPavel.Tatashin@Sun.COM ASSERT(pp); 261212908SPavel.Tatashin@Sun.COM /* 261312908SPavel.Tatashin@Sun.COM * availrmem is decremented only for pages which are not 261412908SPavel.Tatashin@Sun.COM * in seg pcache, for pages in seg pcache availrmem was 261512908SPavel.Tatashin@Sun.COM * decremented in _dismpagelock() 261612908SPavel.Tatashin@Sun.COM */ 261712908SPavel.Tatashin@Sun.COM kernel = (sptd->spt_ppa && sptd->spt_ppa[anon_index]); 261812908SPavel.Tatashin@Sun.COM ASSERT(pp->p_lckcnt > 0); 261912908SPavel.Tatashin@Sun.COM 262012908SPavel.Tatashin@Sun.COM /* 262112908SPavel.Tatashin@Sun.COM * lock page but do not change availrmem, we do it 262212908SPavel.Tatashin@Sun.COM * ourselves every nlck loops. 262312908SPavel.Tatashin@Sun.COM */ 262412908SPavel.Tatashin@Sun.COM page_pp_unlock(pp, 0, 1); 262512908SPavel.Tatashin@Sun.COM if (pp->p_lckcnt == 0) { 262612908SPavel.Tatashin@Sun.COM if (kernel == 0) 262712908SPavel.Tatashin@Sun.COM nlck++; 262812908SPavel.Tatashin@Sun.COM *unlocked += PAGESIZE; 262912908SPavel.Tatashin@Sun.COM } 263012908SPavel.Tatashin@Sun.COM page_unlock(pp); 263112908SPavel.Tatashin@Sun.COM shmd->shm_vpage[anon_index] &= ~DISM_PG_LOCKED; 263212908SPavel.Tatashin@Sun.COM sptd->spt_ppa_lckcnt[anon_index]--; 263312908SPavel.Tatashin@Sun.COM shmd->shm_lckpgs--; 263412908SPavel.Tatashin@Sun.COM } 263512908SPavel.Tatashin@Sun.COM 263612908SPavel.Tatashin@Sun.COM /* 263712908SPavel.Tatashin@Sun.COM * To reduce freemem_lock contention, do not update availrmem 263812908SPavel.Tatashin@Sun.COM * until at least NLCK pages have been unlocked. 263912908SPavel.Tatashin@Sun.COM * 1. No need to update if nlck is zero 264012908SPavel.Tatashin@Sun.COM * 2. Always update if the last iteration 264112908SPavel.Tatashin@Sun.COM */ 264212908SPavel.Tatashin@Sun.COM if (nlck > 0 && (nlck == nlck_limit || i == npages - 1)) { 264312908SPavel.Tatashin@Sun.COM mutex_enter(&freemem_lock); 264412908SPavel.Tatashin@Sun.COM availrmem += nlck; 264512908SPavel.Tatashin@Sun.COM pages_locked -= nlck; 264612908SPavel.Tatashin@Sun.COM mutex_exit(&freemem_lock); 264712908SPavel.Tatashin@Sun.COM nlck = 0; 264812908SPavel.Tatashin@Sun.COM nlck_limit = NLCK + RAND_P2(NLCK); 264912908SPavel.Tatashin@Sun.COM } 265012908SPavel.Tatashin@Sun.COM } 265112908SPavel.Tatashin@Sun.COM ANON_LOCK_EXIT(&->a_rwlock); 265212908SPavel.Tatashin@Sun.COM 26530Sstevel@tonic-gate return (0); 26540Sstevel@tonic-gate } 26550Sstevel@tonic-gate 26560Sstevel@tonic-gate /*ARGSUSED*/ 26570Sstevel@tonic-gate static int 26580Sstevel@tonic-gate segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 26590Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos) 26600Sstevel@tonic-gate { 26610Sstevel@tonic-gate struct shm_data *shmd = seg->s_data; 26620Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 26630Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 26642768Ssl108498 struct kshmid *sp = sptd->spt_amp->a_sp; 26650Sstevel@tonic-gate pgcnt_t npages, a_npages; 26660Sstevel@tonic-gate page_t **ppa; 26670Sstevel@tonic-gate pgcnt_t an_idx, a_an_idx, ppa_idx; 26680Sstevel@tonic-gate caddr_t spt_addr, a_addr; /* spt and aligned address */ 26690Sstevel@tonic-gate size_t a_len; /* aligned len */ 26700Sstevel@tonic-gate size_t share_sz; 26710Sstevel@tonic-gate ulong_t i; 26720Sstevel@tonic-gate int sts = 0; 26732768Ssl108498 rctl_qty_t unlocked = 0; 26742768Ssl108498 rctl_qty_t locked = 0; 26752768Ssl108498 struct proc *p = curproc; 26762768Ssl108498 kproject_t *proj; 26770Sstevel@tonic-gate 26780Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 26792768Ssl108498 ASSERT(sp != NULL); 26800Sstevel@tonic-gate 26810Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 26820Sstevel@tonic-gate return (0); 26830Sstevel@tonic-gate } 26840Sstevel@tonic-gate 26850Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 26860Sstevel@tonic-gate an_idx = seg_page(seg, addr); 26870Sstevel@tonic-gate npages = btopr(len); 26880Sstevel@tonic-gate 26890Sstevel@tonic-gate if (an_idx + npages > btopr(shmd->shm_amp->size)) { 26900Sstevel@tonic-gate return (ENOMEM); 26910Sstevel@tonic-gate } 26920Sstevel@tonic-gate 26932768Ssl108498 /* 26942768Ssl108498 * A shm's project never changes, so no lock needed. 26952768Ssl108498 * The shm has a hold on the project, so it will not go away. 26962768Ssl108498 * Since we have a mapping to shm within this zone, we know 26972768Ssl108498 * that the zone will not go away. 26982768Ssl108498 */ 26992768Ssl108498 proj = sp->shm_perm.ipc_proj; 27002768Ssl108498 27010Sstevel@tonic-gate if (op == MC_LOCK) { 27022768Ssl108498 27030Sstevel@tonic-gate /* 27040Sstevel@tonic-gate * Need to align addr and size request if they are not 27050Sstevel@tonic-gate * aligned so we can always allocate large page(s) however 27060Sstevel@tonic-gate * we only lock what was requested in initial request. 27070Sstevel@tonic-gate */ 27080Sstevel@tonic-gate share_sz = page_get_pagesize(sptseg->s_szc); 27090Sstevel@tonic-gate a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); 27100Sstevel@tonic-gate a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)), 27115224Smec share_sz); 27120Sstevel@tonic-gate a_npages = btop(a_len); 27130Sstevel@tonic-gate a_an_idx = seg_page(seg, a_addr); 27140Sstevel@tonic-gate spt_addr = sptseg->s_base + ptob(a_an_idx); 27150Sstevel@tonic-gate ppa_idx = an_idx - a_an_idx; 27160Sstevel@tonic-gate 27170Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages), 27185224Smec KM_NOSLEEP)) == NULL) { 27190Sstevel@tonic-gate return (ENOMEM); 27200Sstevel@tonic-gate } 27210Sstevel@tonic-gate 27220Sstevel@tonic-gate /* 27230Sstevel@tonic-gate * Don't cache any new pages for IO and 27240Sstevel@tonic-gate * flush any cached pages. 27250Sstevel@tonic-gate */ 27260Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 27270Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27280Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27290Sstevel@tonic-gate 27300Sstevel@tonic-gate sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa); 27310Sstevel@tonic-gate if (sts != 0) { 27320Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27330Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 27340Sstevel@tonic-gate return (sts); 27350Sstevel@tonic-gate } 27360Sstevel@tonic-gate 27372768Ssl108498 mutex_enter(&sp->shm_mlock); 27382768Ssl108498 /* enforce locked memory rctl */ 27392768Ssl108498 unlocked = spt_unlockedbytes(npages, &ppa[ppa_idx]); 27402768Ssl108498 27412768Ssl108498 mutex_enter(&p->p_lock); 27422768Ssl108498 if (rctl_incr_locked_mem(p, proj, unlocked, 0)) { 27432768Ssl108498 mutex_exit(&p->p_lock); 27442768Ssl108498 sts = EAGAIN; 27452768Ssl108498 } else { 27462768Ssl108498 mutex_exit(&p->p_lock); 27472768Ssl108498 sts = spt_lockpages(seg, an_idx, npages, 27482768Ssl108498 &ppa[ppa_idx], lockmap, pos, &locked); 27492768Ssl108498 27502768Ssl108498 /* 27512768Ssl108498 * correct locked count if not all pages could be 27522768Ssl108498 * locked 27532768Ssl108498 */ 27542768Ssl108498 if ((unlocked - locked) > 0) { 27552768Ssl108498 rctl_decr_locked_mem(NULL, proj, 27562768Ssl108498 (unlocked - locked), 0); 27572768Ssl108498 } 27582768Ssl108498 } 27590Sstevel@tonic-gate /* 27602768Ssl108498 * unlock pages 27610Sstevel@tonic-gate */ 27622768Ssl108498 for (i = 0; i < a_npages; i++) 27630Sstevel@tonic-gate page_unlock(ppa[i]); 27640Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27650Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27662768Ssl108498 mutex_exit(&sp->shm_mlock); 27670Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27680Sstevel@tonic-gate 27690Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate } else if (op == MC_UNLOCK) { /* unlock */ 27726695Saguzovsk page_t **ppa; 27730Sstevel@tonic-gate 27740Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 27750Sstevel@tonic-gate if (shmd->shm_lckpgs == 0) { 27760Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27770Sstevel@tonic-gate return (0); 27780Sstevel@tonic-gate } 27790Sstevel@tonic-gate /* 27800Sstevel@tonic-gate * Don't cache new IO pages. 27810Sstevel@tonic-gate */ 27820Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 27830Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27840Sstevel@tonic-gate 27852768Ssl108498 mutex_enter(&sp->shm_mlock); 278612908SPavel.Tatashin@Sun.COM sts = spt_unlockpages(seg, an_idx, npages, &unlocked); 27876695Saguzovsk if ((ppa = sptd->spt_ppa) != NULL) 27880Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 27890Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 27902768Ssl108498 27912768Ssl108498 rctl_decr_locked_mem(NULL, proj, unlocked, 0); 27922768Ssl108498 mutex_exit(&sp->shm_mlock); 27936695Saguzovsk 27946695Saguzovsk if (ppa != NULL) 27956695Saguzovsk seg_ppurge_wiredpp(ppa); 27960Sstevel@tonic-gate } 27970Sstevel@tonic-gate return (sts); 27980Sstevel@tonic-gate } 27990Sstevel@tonic-gate 28000Sstevel@tonic-gate /*ARGSUSED*/ 28010Sstevel@tonic-gate int 28020Sstevel@tonic-gate segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 28030Sstevel@tonic-gate { 28040Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28050Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28060Sstevel@tonic-gate spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1; 28070Sstevel@tonic-gate 28080Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 28090Sstevel@tonic-gate 28100Sstevel@tonic-gate /* 28110Sstevel@tonic-gate * ISM segment is always rw. 28120Sstevel@tonic-gate */ 28130Sstevel@tonic-gate while (--pgno >= 0) 28140Sstevel@tonic-gate *protv++ = sptd->spt_prot; 28150Sstevel@tonic-gate return (0); 28160Sstevel@tonic-gate } 28170Sstevel@tonic-gate 28180Sstevel@tonic-gate /*ARGSUSED*/ 28190Sstevel@tonic-gate u_offset_t 28200Sstevel@tonic-gate segspt_shmgetoffset(struct seg *seg, caddr_t addr) 28210Sstevel@tonic-gate { 28220Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 28230Sstevel@tonic-gate 28240Sstevel@tonic-gate /* Offset does not matter in ISM memory */ 28250Sstevel@tonic-gate 28260Sstevel@tonic-gate return ((u_offset_t)0); 28270Sstevel@tonic-gate } 28280Sstevel@tonic-gate 28290Sstevel@tonic-gate /* ARGSUSED */ 28300Sstevel@tonic-gate int 28310Sstevel@tonic-gate segspt_shmgettype(struct seg *seg, caddr_t addr) 28320Sstevel@tonic-gate { 28330Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28340Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28350Sstevel@tonic-gate 28360Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 28370Sstevel@tonic-gate 28380Sstevel@tonic-gate /* 28390Sstevel@tonic-gate * The shared memory mapping is always MAP_SHARED, SWAP is only 28400Sstevel@tonic-gate * reserved for DISM 28410Sstevel@tonic-gate */ 28420Sstevel@tonic-gate return (MAP_SHARED | 28435224Smec ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE)); 28440Sstevel@tonic-gate } 28450Sstevel@tonic-gate 28460Sstevel@tonic-gate /*ARGSUSED*/ 28470Sstevel@tonic-gate int 28480Sstevel@tonic-gate segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 28490Sstevel@tonic-gate { 28500Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 28510Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28520Sstevel@tonic-gate 28530Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 28540Sstevel@tonic-gate 28550Sstevel@tonic-gate *vpp = sptd->spt_vp; 28560Sstevel@tonic-gate return (0); 28570Sstevel@tonic-gate } 28580Sstevel@tonic-gate 28595224Smec /* 28605224Smec * We need to wait for pending IO to complete to a DISM segment in order for 28615224Smec * pages to get kicked out of the seg_pcache. 120 seconds should be more 28625224Smec * than enough time to wait. 28635224Smec */ 28645224Smec static clock_t spt_pcache_wait = 120; 28655224Smec 28660Sstevel@tonic-gate /*ARGSUSED*/ 28670Sstevel@tonic-gate static int 28680Sstevel@tonic-gate segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 28690Sstevel@tonic-gate { 28705224Smec struct shm_data *shmd = (struct shm_data *)seg->s_data; 28710Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 28720Sstevel@tonic-gate struct anon_map *amp; 28735224Smec pgcnt_t pg_idx; 28745224Smec ushort_t gen; 28755224Smec clock_t end_lbolt; 28765224Smec int writer; 28776695Saguzovsk page_t **ppa; 28780Sstevel@tonic-gate 28790Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 28800Sstevel@tonic-gate 28810Sstevel@tonic-gate if (behav == MADV_FREE) { 28820Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) 28830Sstevel@tonic-gate return (0); 28840Sstevel@tonic-gate 28850Sstevel@tonic-gate amp = sptd->spt_amp; 28860Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 28870Sstevel@tonic-gate 28880Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 28896695Saguzovsk if ((ppa = sptd->spt_ppa) == NULL) { 28905224Smec mutex_exit(&sptd->spt_lock); 28915224Smec ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 28925224Smec anon_disclaim(amp, pg_idx, len); 28935224Smec ANON_LOCK_EXIT(&->a_rwlock); 28945224Smec return (0); 28955224Smec } 28965224Smec 28975224Smec sptd->spt_flags |= DISM_PPA_CHANGED; 28985224Smec gen = sptd->spt_gen; 28995224Smec 29000Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 29010Sstevel@tonic-gate 29020Sstevel@tonic-gate /* 29030Sstevel@tonic-gate * Purge all DISM cached pages 29040Sstevel@tonic-gate */ 29056695Saguzovsk seg_ppurge_wiredpp(ppa); 29060Sstevel@tonic-gate 29075224Smec /* 29085224Smec * Drop the AS_LOCK so that other threads can grab it 29095224Smec * in the as_pageunlock path and hopefully get the segment 29105224Smec * kicked out of the seg_pcache. We bump the shm_softlockcnt 29115224Smec * to keep this segment resident. 29125224Smec */ 29135224Smec writer = AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock); 29145224Smec atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1); 29155224Smec AS_LOCK_EXIT(seg->s_as, &seg->s_as->a_lock); 29165224Smec 29170Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 29185224Smec 291911066Srafael.vanoni@sun.com end_lbolt = ddi_get_lbolt() + (hz * spt_pcache_wait); 29205224Smec 29215224Smec /* 29225224Smec * Try to wait for pages to get kicked out of the seg_pcache. 29235224Smec */ 29245224Smec while (sptd->spt_gen == gen && 29255224Smec (sptd->spt_flags & DISM_PPA_CHANGED) && 292611066Srafael.vanoni@sun.com ddi_get_lbolt() < end_lbolt) { 29275224Smec if (!cv_timedwait_sig(&sptd->spt_cv, 29285224Smec &sptd->spt_lock, end_lbolt)) { 29295224Smec break; 29305224Smec } 29315224Smec } 29325224Smec 29335224Smec mutex_exit(&sptd->spt_lock); 29345224Smec 29355224Smec /* Regrab the AS_LOCK and release our hold on the segment */ 29365224Smec AS_LOCK_ENTER(seg->s_as, &seg->s_as->a_lock, 29375224Smec writer ? RW_WRITER : RW_READER); 29385224Smec atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -1); 29395224Smec if (shmd->shm_softlockcnt <= 0) { 29405224Smec if (AS_ISUNMAPWAIT(seg->s_as)) { 29415224Smec mutex_enter(&seg->s_as->a_contents); 29425224Smec if (AS_ISUNMAPWAIT(seg->s_as)) { 29435224Smec AS_CLRUNMAPWAIT(seg->s_as); 29445224Smec cv_broadcast(&seg->s_as->a_cv); 29455224Smec } 29465224Smec mutex_exit(&seg->s_as->a_contents); 29475224Smec } 29485224Smec } 29495224Smec 29500Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 29515224Smec anon_disclaim(amp, pg_idx, len); 29520Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 29530Sstevel@tonic-gate } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP || 29540Sstevel@tonic-gate behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) { 29550Sstevel@tonic-gate int already_set; 29560Sstevel@tonic-gate ulong_t anon_index; 29570Sstevel@tonic-gate lgrp_mem_policy_t policy; 29580Sstevel@tonic-gate caddr_t shm_addr; 29590Sstevel@tonic-gate size_t share_size; 29600Sstevel@tonic-gate size_t size; 29610Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 29620Sstevel@tonic-gate caddr_t sptseg_addr; 29630Sstevel@tonic-gate 29640Sstevel@tonic-gate /* 29650Sstevel@tonic-gate * Align address and length to page size of underlying segment 29660Sstevel@tonic-gate */ 29670Sstevel@tonic-gate share_size = page_get_pagesize(shmd->shm_sptseg->s_szc); 29680Sstevel@tonic-gate shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); 29690Sstevel@tonic-gate size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), 29700Sstevel@tonic-gate share_size); 29710Sstevel@tonic-gate 29720Sstevel@tonic-gate amp = shmd->shm_amp; 29730Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 29740Sstevel@tonic-gate 29750Sstevel@tonic-gate /* 29760Sstevel@tonic-gate * And now we may have to adjust size downward if we have 29770Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 29780Sstevel@tonic-gate * allocations. 29790Sstevel@tonic-gate */ 29800Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 29810Sstevel@tonic-gate if ((sptseg_addr + size) > 29820Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 29830Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - 29840Sstevel@tonic-gate sptseg_addr; 29850Sstevel@tonic-gate 29860Sstevel@tonic-gate /* 29870Sstevel@tonic-gate * Set memory allocation policy for this segment 29880Sstevel@tonic-gate */ 29890Sstevel@tonic-gate policy = lgrp_madv_to_policy(behav, len, MAP_SHARED); 29900Sstevel@tonic-gate already_set = lgrp_shm_policy_set(policy, amp, anon_index, 29910Sstevel@tonic-gate NULL, 0, len); 29920Sstevel@tonic-gate 29930Sstevel@tonic-gate /* 29940Sstevel@tonic-gate * If random memory allocation policy set already, 29950Sstevel@tonic-gate * don't bother reapplying it. 29960Sstevel@tonic-gate */ 29970Sstevel@tonic-gate if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 29980Sstevel@tonic-gate return (0); 29990Sstevel@tonic-gate 30000Sstevel@tonic-gate /* 30010Sstevel@tonic-gate * Mark any existing pages in the given range for 30020Sstevel@tonic-gate * migration, flushing the I/O page cache, and using 30030Sstevel@tonic-gate * underlying segment to calculate anon index and get 30040Sstevel@tonic-gate * anonmap and vnode pointer from 30050Sstevel@tonic-gate */ 30060Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) 30070Sstevel@tonic-gate segspt_purge(seg); 30080Sstevel@tonic-gate 30090Sstevel@tonic-gate page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0); 30100Sstevel@tonic-gate } 30110Sstevel@tonic-gate 30120Sstevel@tonic-gate return (0); 30130Sstevel@tonic-gate } 30140Sstevel@tonic-gate 30150Sstevel@tonic-gate /*ARGSUSED*/ 30160Sstevel@tonic-gate void 30170Sstevel@tonic-gate segspt_shmdump(struct seg *seg) 30180Sstevel@tonic-gate { 30190Sstevel@tonic-gate /* no-op for ISM segment */ 30200Sstevel@tonic-gate } 30210Sstevel@tonic-gate 30220Sstevel@tonic-gate /*ARGSUSED*/ 30230Sstevel@tonic-gate static faultcode_t 30240Sstevel@tonic-gate segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 30250Sstevel@tonic-gate { 30260Sstevel@tonic-gate return (ENOTSUP); 30270Sstevel@tonic-gate } 30280Sstevel@tonic-gate 30290Sstevel@tonic-gate /* 30300Sstevel@tonic-gate * get a memory ID for an addr in a given segment 30310Sstevel@tonic-gate */ 30320Sstevel@tonic-gate static int 30330Sstevel@tonic-gate segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 30340Sstevel@tonic-gate { 30350Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 30360Sstevel@tonic-gate struct anon *ap; 30370Sstevel@tonic-gate size_t anon_index; 30380Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 30390Sstevel@tonic-gate struct spt_data *sptd = shmd->shm_sptseg->s_data; 30400Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 30410Sstevel@tonic-gate anon_sync_obj_t cookie; 30420Sstevel@tonic-gate 30430Sstevel@tonic-gate anon_index = seg_page(seg, addr); 30440Sstevel@tonic-gate 30450Sstevel@tonic-gate if (addr > (seg->s_base + sptd->spt_realsize)) { 30460Sstevel@tonic-gate return (EFAULT); 30470Sstevel@tonic-gate } 30480Sstevel@tonic-gate 30490Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 30500Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 30510Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 30520Sstevel@tonic-gate if (ap == NULL) { 30530Sstevel@tonic-gate struct page *pp; 30540Sstevel@tonic-gate caddr_t spt_addr = sptseg->s_base + ptob(anon_index); 30550Sstevel@tonic-gate 30560Sstevel@tonic-gate pp = anon_zero(sptseg, spt_addr, &ap, kcred); 30570Sstevel@tonic-gate if (pp == NULL) { 30580Sstevel@tonic-gate anon_array_exit(&cookie); 30590Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30600Sstevel@tonic-gate return (ENOMEM); 30610Sstevel@tonic-gate } 30620Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); 30630Sstevel@tonic-gate page_unlock(pp); 30640Sstevel@tonic-gate } 30650Sstevel@tonic-gate anon_array_exit(&cookie); 30660Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30670Sstevel@tonic-gate memidp->val[0] = (uintptr_t)ap; 30680Sstevel@tonic-gate memidp->val[1] = (uintptr_t)addr & PAGEOFFSET; 30690Sstevel@tonic-gate return (0); 30700Sstevel@tonic-gate } 30710Sstevel@tonic-gate 30720Sstevel@tonic-gate /* 30730Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 30740Sstevel@tonic-gate */ 30750Sstevel@tonic-gate static lgrp_mem_policy_info_t * 30760Sstevel@tonic-gate segspt_shmgetpolicy(struct seg *seg, caddr_t addr) 30770Sstevel@tonic-gate { 30780Sstevel@tonic-gate struct anon_map *amp; 30790Sstevel@tonic-gate ulong_t anon_index; 30800Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 30810Sstevel@tonic-gate struct shm_data *shm_data; 30820Sstevel@tonic-gate 30830Sstevel@tonic-gate ASSERT(seg != NULL); 30840Sstevel@tonic-gate 30850Sstevel@tonic-gate /* 30860Sstevel@tonic-gate * Get anon_map from segshm 30870Sstevel@tonic-gate * 30880Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 30890Sstevel@tonic-gate * it should be protected by its reference count which must be 30900Sstevel@tonic-gate * nonzero for an existing segment 30910Sstevel@tonic-gate * Need to grab readers lock on policy tree though 30920Sstevel@tonic-gate */ 30930Sstevel@tonic-gate shm_data = (struct shm_data *)seg->s_data; 30940Sstevel@tonic-gate if (shm_data == NULL) 30950Sstevel@tonic-gate return (NULL); 30960Sstevel@tonic-gate amp = shm_data->shm_amp; 30970Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 30980Sstevel@tonic-gate 30990Sstevel@tonic-gate /* 31000Sstevel@tonic-gate * Get policy info 31010Sstevel@tonic-gate * 31020Sstevel@tonic-gate * Assume starting anon index of 0 31030Sstevel@tonic-gate */ 31040Sstevel@tonic-gate anon_index = seg_page(seg, addr); 31050Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 31060Sstevel@tonic-gate 31070Sstevel@tonic-gate return (policy_info); 31080Sstevel@tonic-gate } 3109670Selowe 3110670Selowe /*ARGSUSED*/ 3111670Selowe static int 3112670Selowe segspt_shmcapable(struct seg *seg, segcapability_t capability) 3113670Selowe { 3114670Selowe return (0); 3115670Selowe } 3116